From 47ea7417b0744324424405fc1207e266053237a9 Mon Sep 17 00:00:00 2001
From: James Hilliard <james.hilliard1@gmail.com>
Date: Sun, 31 Jul 2022 17:26:49 -0600
Subject: libbpf: Skip empty sections in bpf_object__init_global_data_maps

The GNU assembler generates an empty .bss section. This is a well
established behavior in GAS that happens in all supported targets.

The LLVM assembler doesn't generate an empty .bss section.

bpftool chokes on the empty .bss section.

Additionally in bpf_object__elf_collect the sec_desc->data is not
initialized when a section is not recognized. In this case, this
happens with .comment.

So we must check that sec_desc->data is initialized before checking
if the size is 0.

Signed-off-by: James Hilliard <james.hilliard1@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Link: https://lore.kernel.org/bpf/20220731232649.4668-1-james.hilliard1@gmail.com
---
 tools/lib/bpf/libbpf.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 50d41815f431..77e3797cf75a 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -1642,6 +1642,10 @@ static int bpf_object__init_global_data_maps(struct bpf_object *obj)
 	for (sec_idx = 1; sec_idx < obj->efile.sec_cnt; sec_idx++) {
 		sec_desc = &obj->efile.secs[sec_idx];
 
+		/* Skip recognized sections with size 0. */
+		if (sec_desc->data && sec_desc->data->d_size == 0)
+			continue;
+
 		switch (sec_desc->sec_type) {
 		case SEC_DATA:
 			sec_name = elf_sec_name(obj, elf_sec_by_idx(obj, sec_idx));
-- 
cgit v1.2.3


From 3045f42a64324d339125a8a1a1763bb9e1e08300 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Sun, 31 Jul 2022 19:51:09 -0700
Subject: libbpf: Initialize err in probe_map_create

GCC-11 warns about the possibly unitialized err variable in
probe_map_create:

libbpf_probes.c: In function 'probe_map_create':
libbpf_probes.c:361:38: error: 'err' may be used uninitialized in this function [-Werror=maybe-uninitialized]
  361 |                 return fd < 0 && err == exp_err ? 1 : 0;
      |                                  ~~~~^~~~~~~~~~

Fixes: 878d8def0603 ("libbpf: Rework feature-probing APIs")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Link: https://lore.kernel.org/bpf/20220801025109.1206633-1-f.fainelli@gmail.com
---
 tools/lib/bpf/libbpf_probes.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c
index 0b5398786bf3..6d495656f554 100644
--- a/tools/lib/bpf/libbpf_probes.c
+++ b/tools/lib/bpf/libbpf_probes.c
@@ -193,7 +193,7 @@ static int probe_map_create(enum bpf_map_type map_type)
 	LIBBPF_OPTS(bpf_map_create_opts, opts);
 	int key_size, value_size, max_entries;
 	__u32 btf_key_type_id = 0, btf_value_type_id = 0;
-	int fd = -1, btf_fd = -1, fd_inner = -1, exp_err = 0, err;
+	int fd = -1, btf_fd = -1, fd_inner = -1, exp_err = 0, err = 0;
 
 	key_size	= sizeof(__u32);
 	value_size	= sizeof(__u32);
-- 
cgit v1.2.3


From d55dfe587bc0670f90564a962615723fe7749ab1 Mon Sep 17 00:00:00 2001
From: Manu Bretelle <chantr4@gmail.com>
Date: Mon, 1 Aug 2022 06:24:09 -0700
Subject: bpftool: Remove BPF_OBJ_NAME_LEN restriction when looking up bpf
 program by name

bpftool was limiting the length of names to BPF_OBJ_NAME_LEN in prog_parse
fds.

Since commit b662000aff84 ("bpftool: Adding support for BTF program names")
we can get the full program name from BTF.

This patch removes the restriction of name length when running `bpftool
prog show name ${name}`.

Test:
Tested against some internal program names that were longer than
`BPF_OBJ_NAME_LEN`, here a redacted example of what was ran to test.

    # previous behaviour
    $ sudo bpftool prog show name some_long_program_name
    Error: can't parse name
    # with the patch
    $ sudo ./bpftool prog show name some_long_program_name
    123456789: tracing  name some_long_program_name  tag taghexa  gpl ....
    ...
    ...
    ...
    # too long
    sudo ./bpftool prog show name $(python3 -c 'print("A"*128)')
    Error: can't parse name
    # not too long but no match
    $ sudo ./bpftool prog show name $(python3 -c 'print("A"*127)')

Signed-off-by: Manu Bretelle <chantr4@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Tested-by: Jiri Olsa <jolsa@kernel.org>
Reviewed-by: Quentin Monnet <quentin@isovalent.com>
Link: https://lore.kernel.org/bpf/20220801132409.4147849-1-chantr4@gmail.com
---
 tools/bpf/bpftool/common.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index 067e9ea59e3b..8727765add88 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -722,6 +722,7 @@ print_all_levels(__maybe_unused enum libbpf_print_level level,
 
 static int prog_fd_by_nametag(void *nametag, int **fds, bool tag)
 {
+	char prog_name[MAX_PROG_FULL_NAME];
 	unsigned int id = 0;
 	int fd, nb_fds = 0;
 	void *tmp;
@@ -754,12 +755,20 @@ static int prog_fd_by_nametag(void *nametag, int **fds, bool tag)
 			goto err_close_fd;
 		}
 
-		if ((tag && memcmp(nametag, info.tag, BPF_TAG_SIZE)) ||
-		    (!tag && strncmp(nametag, info.name, BPF_OBJ_NAME_LEN))) {
+		if (tag && memcmp(nametag, info.tag, BPF_TAG_SIZE)) {
 			close(fd);
 			continue;
 		}
 
+		if (!tag) {
+			get_prog_full_name(&info, fd, prog_name,
+					   sizeof(prog_name));
+			if (strncmp(nametag, prog_name, sizeof(prog_name))) {
+				close(fd);
+				continue;
+			}
+		}
+
 		if (nb_fds > 0) {
 			tmp = realloc(*fds, (nb_fds + 1) * sizeof(int));
 			if (!tmp) {
@@ -820,7 +829,7 @@ int prog_parse_fds(int *argc, char ***argv, int **fds)
 		NEXT_ARGP();
 
 		name = **argv;
-		if (strlen(name) > BPF_OBJ_NAME_LEN - 1) {
+		if (strlen(name) > MAX_PROG_FULL_NAME - 1) {
 			p_err("can't parse name");
 			return -1;
 		}
-- 
cgit v1.2.3


From d25f40ff68aa61c838947bb9adee6c6b36e77453 Mon Sep 17 00:00:00 2001
From: James Hilliard <james.hilliard1@gmail.com>
Date: Wed, 3 Aug 2022 09:14:03 -0600
Subject: libbpf: Ensure functions with always_inline attribute are inline
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

GCC expects the always_inline attribute to only be set on inline
functions, as such we should make all functions with this attribute
use the __always_inline macro which makes the function inline and
sets the attribute.

Fixes errors like:
/home/buildroot/bpf-next/tools/testing/selftests/bpf/tools/include/bpf/bpf_tracing.h:439:1: error: ‘always_inline’ function might not be inlinable [-Werror=attributes]
  439 | ____##name(unsigned long long *ctx, ##args)
      | ^~~~

Signed-off-by: James Hilliard <james.hilliard1@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Link: https://lore.kernel.org/bpf/20220803151403.793024-1-james.hilliard1@gmail.com
---
 tools/lib/bpf/bpf_tracing.h | 14 +++++++-------
 tools/lib/bpf/usdt.bpf.h    |  4 ++--
 2 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h
index 43ca3aff2292..5fdb93da423b 100644
--- a/tools/lib/bpf/bpf_tracing.h
+++ b/tools/lib/bpf/bpf_tracing.h
@@ -426,7 +426,7 @@ struct pt_regs;
  */
 #define BPF_PROG(name, args...)						    \
 name(unsigned long long *ctx);						    \
-static __attribute__((always_inline)) typeof(name(0))			    \
+static __always_inline typeof(name(0))					    \
 ____##name(unsigned long long *ctx, ##args);				    \
 typeof(name(0)) name(unsigned long long *ctx)				    \
 {									    \
@@ -435,7 +435,7 @@ typeof(name(0)) name(unsigned long long *ctx)				    \
 	return ____##name(___bpf_ctx_cast(args));			    \
 	_Pragma("GCC diagnostic pop")					    \
 }									    \
-static __attribute__((always_inline)) typeof(name(0))			    \
+static __always_inline typeof(name(0))					    \
 ____##name(unsigned long long *ctx, ##args)
 
 struct pt_regs;
@@ -460,7 +460,7 @@ struct pt_regs;
  */
 #define BPF_KPROBE(name, args...)					    \
 name(struct pt_regs *ctx);						    \
-static __attribute__((always_inline)) typeof(name(0))			    \
+static __always_inline typeof(name(0))					    \
 ____##name(struct pt_regs *ctx, ##args);				    \
 typeof(name(0)) name(struct pt_regs *ctx)				    \
 {									    \
@@ -469,7 +469,7 @@ typeof(name(0)) name(struct pt_regs *ctx)				    \
 	return ____##name(___bpf_kprobe_args(args));			    \
 	_Pragma("GCC diagnostic pop")					    \
 }									    \
-static __attribute__((always_inline)) typeof(name(0))			    \
+static __always_inline typeof(name(0))					    \
 ____##name(struct pt_regs *ctx, ##args)
 
 #define ___bpf_kretprobe_args0()       ctx
@@ -484,7 +484,7 @@ ____##name(struct pt_regs *ctx, ##args)
  */
 #define BPF_KRETPROBE(name, args...)					    \
 name(struct pt_regs *ctx);						    \
-static __attribute__((always_inline)) typeof(name(0))			    \
+static __always_inline typeof(name(0))					    \
 ____##name(struct pt_regs *ctx, ##args);				    \
 typeof(name(0)) name(struct pt_regs *ctx)				    \
 {									    \
@@ -540,7 +540,7 @@ static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args)
 #define BPF_KSYSCALL(name, args...)					    \
 name(struct pt_regs *ctx);						    \
 extern _Bool LINUX_HAS_SYSCALL_WRAPPER __kconfig;			    \
-static __attribute__((always_inline)) typeof(name(0))			    \
+static __always_inline typeof(name(0))					    \
 ____##name(struct pt_regs *ctx, ##args);				    \
 typeof(name(0)) name(struct pt_regs *ctx)				    \
 {									    \
@@ -555,7 +555,7 @@ typeof(name(0)) name(struct pt_regs *ctx)				    \
 		return ____##name(___bpf_syscall_args(args));		    \
 	_Pragma("GCC diagnostic pop")					    \
 }									    \
-static __attribute__((always_inline)) typeof(name(0))			    \
+static __always_inline typeof(name(0))					    \
 ____##name(struct pt_regs *ctx, ##args)
 
 #define BPF_KPROBE_SYSCALL BPF_KSYSCALL
diff --git a/tools/lib/bpf/usdt.bpf.h b/tools/lib/bpf/usdt.bpf.h
index 4f2adc0bd6ca..fdfd235e52c4 100644
--- a/tools/lib/bpf/usdt.bpf.h
+++ b/tools/lib/bpf/usdt.bpf.h
@@ -232,7 +232,7 @@ long bpf_usdt_cookie(struct pt_regs *ctx)
  */
 #define BPF_USDT(name, args...)						    \
 name(struct pt_regs *ctx);						    \
-static __attribute__((always_inline)) typeof(name(0))			    \
+static __always_inline typeof(name(0))					    \
 ____##name(struct pt_regs *ctx, ##args);				    \
 typeof(name(0)) name(struct pt_regs *ctx)				    \
 {									    \
@@ -241,7 +241,7 @@ typeof(name(0)) name(struct pt_regs *ctx)				    \
         return ____##name(___bpf_usdt_args(args));			    \
         _Pragma("GCC diagnostic pop")					    \
 }									    \
-static __attribute__((always_inline)) typeof(name(0))			    \
+static __always_inline typeof(name(0))					    \
 ____##name(struct pt_regs *ctx, ##args)
 
 #endif /* __USDT_BPF_H__ */
-- 
cgit v1.2.3


From 5653f55ebd767b4ef47414ee7f852517993eda6f Mon Sep 17 00:00:00 2001
From: Joanne Koong <joannelkoong@gmail.com>
Date: Fri, 5 Aug 2022 10:14:05 -0700
Subject: selftests/bpf: Clean up sys_nanosleep uses

This patch cleans up a few things:

  * dynptr_fail.c:
    There is no sys_nanosleep tracepoint. dynptr_fail only tests
    that the prog load fails, so just SEC("?raw_tp") suffices here.

  * test_bpf_cookie:
    There is no sys_nanosleep kprobe. The prog is loaded in
    userspace through bpf_program__attach_kprobe_opts passing in
    SYS_NANOSLEEP_KPROBE_NAME, so just SEC("k{ret}probe") suffices here.

  * test_helper_restricted:
    There is no sys_nanosleep kprobe. test_helper_restricted only tests
    that the prog load fails, so just SEC("?kprobe")( suffices here.

There are no functional changes.

Suggested-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20220805171405.2272103-1-joannelkoong@gmail.com
---
 tools/testing/selftests/bpf/progs/dynptr_fail.c    | 56 +++++++++++-----------
 .../testing/selftests/bpf/progs/test_bpf_cookie.c  |  4 +-
 .../selftests/bpf/progs/test_helper_restricted.c   |  4 +-
 3 files changed, 32 insertions(+), 32 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c
index 0a26c243e6e9..b5e0a87f0a36 100644
--- a/tools/testing/selftests/bpf/progs/dynptr_fail.c
+++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c
@@ -65,7 +65,7 @@ static int get_map_val_dynptr(struct bpf_dynptr *ptr)
 /* Every bpf_ringbuf_reserve_dynptr call must have a corresponding
  * bpf_ringbuf_submit/discard_dynptr call
  */
-SEC("?raw_tp/sys_nanosleep")
+SEC("?raw_tp")
 int ringbuf_missing_release1(void *ctx)
 {
 	struct bpf_dynptr ptr;
@@ -77,7 +77,7 @@ int ringbuf_missing_release1(void *ctx)
 	return 0;
 }
 
-SEC("?raw_tp/sys_nanosleep")
+SEC("?raw_tp")
 int ringbuf_missing_release2(void *ctx)
 {
 	struct bpf_dynptr ptr1, ptr2;
@@ -112,7 +112,7 @@ static int missing_release_callback_fn(__u32 index, void *data)
 }
 
 /* Any dynptr initialized within a callback must have bpf_dynptr_put called */
-SEC("?raw_tp/sys_nanosleep")
+SEC("?raw_tp")
 int ringbuf_missing_release_callback(void *ctx)
 {
 	bpf_loop(10, missing_release_callback_fn, NULL, 0);
@@ -120,7 +120,7 @@ int ringbuf_missing_release_callback(void *ctx)
 }
 
 /* Can't call bpf_ringbuf_submit/discard_dynptr on a non-initialized dynptr */
-SEC("?raw_tp/sys_nanosleep")
+SEC("?raw_tp")
 int ringbuf_release_uninit_dynptr(void *ctx)
 {
 	struct bpf_dynptr ptr;
@@ -132,7 +132,7 @@ int ringbuf_release_uninit_dynptr(void *ctx)
 }
 
 /* A dynptr can't be used after it has been invalidated */
-SEC("?raw_tp/sys_nanosleep")
+SEC("?raw_tp")
 int use_after_invalid(void *ctx)
 {
 	struct bpf_dynptr ptr;
@@ -151,7 +151,7 @@ int use_after_invalid(void *ctx)
 }
 
 /* Can't call non-dynptr ringbuf APIs on a dynptr ringbuf sample */
-SEC("?raw_tp/sys_nanosleep")
+SEC("?raw_tp")
 int ringbuf_invalid_api(void *ctx)
 {
 	struct bpf_dynptr ptr;
@@ -173,7 +173,7 @@ done:
 }
 
 /* Can't add a dynptr to a map */
-SEC("?raw_tp/sys_nanosleep")
+SEC("?raw_tp")
 int add_dynptr_to_map1(void *ctx)
 {
 	struct bpf_dynptr ptr;
@@ -190,7 +190,7 @@ int add_dynptr_to_map1(void *ctx)
 }
 
 /* Can't add a struct with an embedded dynptr to a map */
-SEC("?raw_tp/sys_nanosleep")
+SEC("?raw_tp")
 int add_dynptr_to_map2(void *ctx)
 {
 	struct test_info x;
@@ -207,7 +207,7 @@ int add_dynptr_to_map2(void *ctx)
 }
 
 /* A data slice can't be accessed out of bounds */
-SEC("?raw_tp/sys_nanosleep")
+SEC("?raw_tp")
 int data_slice_out_of_bounds_ringbuf(void *ctx)
 {
 	struct bpf_dynptr ptr;
@@ -227,7 +227,7 @@ done:
 	return 0;
 }
 
-SEC("?raw_tp/sys_nanosleep")
+SEC("?raw_tp")
 int data_slice_out_of_bounds_map_value(void *ctx)
 {
 	__u32 key = 0, map_val;
@@ -247,7 +247,7 @@ int data_slice_out_of_bounds_map_value(void *ctx)
 }
 
 /* A data slice can't be used after it has been released */
-SEC("?raw_tp/sys_nanosleep")
+SEC("?raw_tp")
 int data_slice_use_after_release(void *ctx)
 {
 	struct bpf_dynptr ptr;
@@ -273,7 +273,7 @@ done:
 }
 
 /* A data slice must be first checked for NULL */
-SEC("?raw_tp/sys_nanosleep")
+SEC("?raw_tp")
 int data_slice_missing_null_check1(void *ctx)
 {
 	struct bpf_dynptr ptr;
@@ -293,7 +293,7 @@ int data_slice_missing_null_check1(void *ctx)
 }
 
 /* A data slice can't be dereferenced if it wasn't checked for null */
-SEC("?raw_tp/sys_nanosleep")
+SEC("?raw_tp")
 int data_slice_missing_null_check2(void *ctx)
 {
 	struct bpf_dynptr ptr;
@@ -315,7 +315,7 @@ done:
 /* Can't pass in a dynptr as an arg to a helper function that doesn't take in a
  * dynptr argument
  */
-SEC("?raw_tp/sys_nanosleep")
+SEC("?raw_tp")
 int invalid_helper1(void *ctx)
 {
 	struct bpf_dynptr ptr;
@@ -329,7 +329,7 @@ int invalid_helper1(void *ctx)
 }
 
 /* A dynptr can't be passed into a helper function at a non-zero offset */
-SEC("?raw_tp/sys_nanosleep")
+SEC("?raw_tp")
 int invalid_helper2(void *ctx)
 {
 	struct bpf_dynptr ptr;
@@ -344,7 +344,7 @@ int invalid_helper2(void *ctx)
 }
 
 /* A bpf_dynptr is invalidated if it's been written into */
-SEC("?raw_tp/sys_nanosleep")
+SEC("?raw_tp")
 int invalid_write1(void *ctx)
 {
 	struct bpf_dynptr ptr;
@@ -365,7 +365,7 @@ int invalid_write1(void *ctx)
  * A bpf_dynptr can't be used as a dynptr if it has been written into at a fixed
  * offset
  */
-SEC("?raw_tp/sys_nanosleep")
+SEC("?raw_tp")
 int invalid_write2(void *ctx)
 {
 	struct bpf_dynptr ptr;
@@ -388,7 +388,7 @@ int invalid_write2(void *ctx)
  * A bpf_dynptr can't be used as a dynptr if it has been written into at a
  * non-const offset
  */
-SEC("?raw_tp/sys_nanosleep")
+SEC("?raw_tp")
 int invalid_write3(void *ctx)
 {
 	struct bpf_dynptr ptr;
@@ -419,7 +419,7 @@ static int invalid_write4_callback(__u32 index, void *data)
 /* If the dynptr is written into in a callback function, it should
  * be invalidated as a dynptr
  */
-SEC("?raw_tp/sys_nanosleep")
+SEC("?raw_tp")
 int invalid_write4(void *ctx)
 {
 	struct bpf_dynptr ptr;
@@ -436,7 +436,7 @@ int invalid_write4(void *ctx)
 
 /* A globally-defined bpf_dynptr can't be used (it must reside as a stack frame) */
 struct bpf_dynptr global_dynptr;
-SEC("?raw_tp/sys_nanosleep")
+SEC("?raw_tp")
 int global(void *ctx)
 {
 	/* this should fail */
@@ -448,7 +448,7 @@ int global(void *ctx)
 }
 
 /* A direct read should fail */
-SEC("?raw_tp/sys_nanosleep")
+SEC("?raw_tp")
 int invalid_read1(void *ctx)
 {
 	struct bpf_dynptr ptr;
@@ -464,7 +464,7 @@ int invalid_read1(void *ctx)
 }
 
 /* A direct read at an offset should fail */
-SEC("?raw_tp/sys_nanosleep")
+SEC("?raw_tp")
 int invalid_read2(void *ctx)
 {
 	struct bpf_dynptr ptr;
@@ -479,7 +479,7 @@ int invalid_read2(void *ctx)
 }
 
 /* A direct read at an offset into the lower stack slot should fail */
-SEC("?raw_tp/sys_nanosleep")
+SEC("?raw_tp")
 int invalid_read3(void *ctx)
 {
 	struct bpf_dynptr ptr1, ptr2;
@@ -505,7 +505,7 @@ static int invalid_read4_callback(__u32 index, void *data)
 }
 
 /* A direct read within a callback function should fail */
-SEC("?raw_tp/sys_nanosleep")
+SEC("?raw_tp")
 int invalid_read4(void *ctx)
 {
 	struct bpf_dynptr ptr;
@@ -520,7 +520,7 @@ int invalid_read4(void *ctx)
 }
 
 /* Initializing a dynptr on an offset should fail */
-SEC("?raw_tp/sys_nanosleep")
+SEC("?raw_tp")
 int invalid_offset(void *ctx)
 {
 	struct bpf_dynptr ptr;
@@ -534,7 +534,7 @@ int invalid_offset(void *ctx)
 }
 
 /* Can't release a dynptr twice */
-SEC("?raw_tp/sys_nanosleep")
+SEC("?raw_tp")
 int release_twice(void *ctx)
 {
 	struct bpf_dynptr ptr;
@@ -560,7 +560,7 @@ static int release_twice_callback_fn(__u32 index, void *data)
 /* Test that releasing a dynptr twice, where one of the releases happens
  * within a calback function, fails
  */
-SEC("?raw_tp/sys_nanosleep")
+SEC("?raw_tp")
 int release_twice_callback(void *ctx)
 {
 	struct bpf_dynptr ptr;
@@ -575,7 +575,7 @@ int release_twice_callback(void *ctx)
 }
 
 /* Reject unsupported local mem types for dynptr_from_mem API */
-SEC("?raw_tp/sys_nanosleep")
+SEC("?raw_tp")
 int dynptr_from_mem_invalid_api(void *ctx)
 {
 	struct bpf_dynptr ptr;
diff --git a/tools/testing/selftests/bpf/progs/test_bpf_cookie.c b/tools/testing/selftests/bpf/progs/test_bpf_cookie.c
index 22d0ac8709b4..5a3a80f751c4 100644
--- a/tools/testing/selftests/bpf/progs/test_bpf_cookie.c
+++ b/tools/testing/selftests/bpf/progs/test_bpf_cookie.c
@@ -28,14 +28,14 @@ static void update(void *ctx, __u64 *res)
 	*res |= bpf_get_attach_cookie(ctx);
 }
 
-SEC("kprobe/sys_nanosleep")
+SEC("kprobe")
 int handle_kprobe(struct pt_regs *ctx)
 {
 	update(ctx, &kprobe_res);
 	return 0;
 }
 
-SEC("kretprobe/sys_nanosleep")
+SEC("kretprobe")
 int handle_kretprobe(struct pt_regs *ctx)
 {
 	update(ctx, &kretprobe_res);
diff --git a/tools/testing/selftests/bpf/progs/test_helper_restricted.c b/tools/testing/selftests/bpf/progs/test_helper_restricted.c
index 20ef9d433b97..5715c569ec03 100644
--- a/tools/testing/selftests/bpf/progs/test_helper_restricted.c
+++ b/tools/testing/selftests/bpf/progs/test_helper_restricted.c
@@ -72,7 +72,7 @@ int tp_timer(void *ctx)
 	return 0;
 }
 
-SEC("?kprobe/sys_nanosleep")
+SEC("?kprobe")
 int kprobe_timer(void *ctx)
 {
 	timer_work();
@@ -104,7 +104,7 @@ int tp_spin_lock(void *ctx)
 	return 0;
 }
 
-SEC("?kprobe/sys_nanosleep")
+SEC("?kprobe")
 int kprobe_spin_lock(void *ctx)
 {
 	spin_lock_work();
-- 
cgit v1.2.3


From e19db6762c18ab1ddf7a3ef4d0023780c24dc1e8 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 3 Aug 2022 14:42:02 -0700
Subject: libbpf: Reject legacy 'maps' ELF section

Add explicit error message if BPF object file is still using legacy BPF
map definitions in SEC("maps"). Before this change, if BPF object file
is still using legacy map definition user will see a bit confusing:

  libbpf: elf: skipping unrecognized data section(4) maps
  libbpf: prog 'handler': bad map relo against 'server_map' in section 'maps'

Now libbpf will be explicit about rejecting "maps" ELF section:

  libbpf: elf: legacy map definitions in 'maps' section are not supported by libbpf v1.0+

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20220803214202.23750-1-andrii@kernel.org
---
 tools/lib/bpf/libbpf.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 77e3797cf75a..d3d94704583f 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -591,7 +591,6 @@ struct elf_state {
 	size_t strtabidx;
 	struct elf_sec_desc *secs;
 	int sec_cnt;
-	int maps_shndx;
 	int btf_maps_shndx;
 	__u32 btf_maps_sec_btf_id;
 	int text_shndx;
@@ -1272,7 +1271,6 @@ static struct bpf_object *bpf_object__new(const char *path,
 	 */
 	obj->efile.obj_buf = obj_buf;
 	obj->efile.obj_buf_sz = obj_buf_sz;
-	obj->efile.maps_shndx = -1;
 	obj->efile.btf_maps_shndx = -1;
 	obj->efile.st_ops_shndx = -1;
 	obj->kconfig_map_idx = -1;
@@ -3363,7 +3361,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
 			if (err)
 				return err;
 		} else if (strcmp(name, "maps") == 0) {
-			obj->efile.maps_shndx = idx;
+			pr_warn("elf: legacy map definitions in 'maps' section are not supported by libbpf v1.0+\n");
+			return -ENOTSUP;
 		} else if (strcmp(name, MAPS_ELF_SEC) == 0) {
 			obj->efile.btf_maps_shndx = idx;
 		} else if (strcmp(name, BTF_ELF_SEC) == 0) {
@@ -3895,8 +3894,7 @@ static bool bpf_object__shndx_is_data(const struct bpf_object *obj,
 static bool bpf_object__shndx_is_maps(const struct bpf_object *obj,
 				      int shndx)
 {
-	return shndx == obj->efile.maps_shndx ||
-	       shndx == obj->efile.btf_maps_shndx;
+	return shndx == obj->efile.btf_maps_shndx;
 }
 
 static enum libbpf_map_type
-- 
cgit v1.2.3


From 9e32084ef1c33a87a736d6ce3fcb95b60dac9aa1 Mon Sep 17 00:00:00 2001
From: Hengqi Chen <hengqi.chen@gmail.com>
Date: Sat, 6 Aug 2022 18:20:21 +0800
Subject: libbpf: Do not require executable permission for shared libraries

Currently, resolve_full_path() requires executable permission for both
programs and shared libraries. This causes failures on distos like Debian
since the shared libraries are not installed executable and Linux is not
requiring shared libraries to have executable permissions. Let's remove
executable permission check for shared libraries.

Reported-by: Goro Fuji <goro@fastly.com>
Signed-off-by: Hengqi Chen <hengqi.chen@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220806102021.3867130-1-hengqi.chen@gmail.com
---
 tools/lib/bpf/libbpf.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index d3d94704583f..f7364ea82ac1 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -10664,15 +10664,17 @@ static const char *arch_specific_lib_paths(void)
 static int resolve_full_path(const char *file, char *result, size_t result_sz)
 {
 	const char *search_paths[3] = {};
-	int i;
+	int i, perm;
 
 	if (str_has_sfx(file, ".so") || strstr(file, ".so.")) {
 		search_paths[0] = getenv("LD_LIBRARY_PATH");
 		search_paths[1] = "/usr/lib64:/usr/lib";
 		search_paths[2] = arch_specific_lib_paths();
+		perm = R_OK;
 	} else {
 		search_paths[0] = getenv("PATH");
 		search_paths[1] = "/usr/bin:/usr/sbin";
+		perm = R_OK | X_OK;
 	}
 
 	for (i = 0; i < ARRAY_SIZE(search_paths); i++) {
@@ -10691,8 +10693,8 @@ static int resolve_full_path(const char *file, char *result, size_t result_sz)
 			if (!seg_len)
 				continue;
 			snprintf(result, result_sz, "%.*s/%s", seg_len, s, file);
-			/* ensure it is an executable file/link */
-			if (access(result, R_OK | X_OK) < 0)
+			/* ensure it has required permissions */
+			if (access(result, perm) < 0)
 				continue;
 			pr_debug("resolved '%s' to '%s'\n", file, result);
 			return 0;
-- 
cgit v1.2.3


From ca34ce29fc4b0e929cc6aada40829d17ab50fee4 Mon Sep 17 00:00:00 2001
From: Dave Marchevsky <davemarchevsky@fb.com>
Date: Mon, 8 Aug 2022 09:47:23 -0700
Subject: bpf: Improve docstring for BPF_F_USER_BUILD_ID flag

Most tools which use bpf_get_stack or bpf_get_stackid symbolicate the
stack - meaning the stack of addresses in the target process' address
space is transformed into meaningful symbol names. The
BPF_F_USER_BUILD_ID flag eases this process by finding the build_id of
the file-backed vma which the address falls in and translating the
address to an offset within the backing file.

To be more specific, the offset is a "file offset" from the beginning of
the backing file. The symbols in ET_DYN ELF objects have a st_value
which is also described as an "offset" - but an offset in the process
address space, relative to the base address of the object.

It's necessary to translate between the "file offset" and "virtual
address offset" during symbolication before they can be directly
compared. Failure to do so can lead to confusing bugs, so this patch
clarifies language in the documentation in an attempt to keep this from
happening.

Signed-off-by: Dave Marchevsky <davemarchevsky@fb.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220808164723.3107500-1-davemarchevsky@fb.com
---
 include/uapi/linux/bpf.h       | 14 ++++++++++++--
 tools/include/uapi/linux/bpf.h | 14 ++++++++++++--
 2 files changed, 24 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 7bf9ba1329be..534e33fb1029 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3008,8 +3008,18 @@ union bpf_attr {
  * 		**BPF_F_USER_STACK**
  * 			Collect a user space stack instead of a kernel stack.
  * 		**BPF_F_USER_BUILD_ID**
- * 			Collect buildid+offset instead of ips for user stack,
- * 			only valid if **BPF_F_USER_STACK** is also specified.
+ * 			Collect (build_id, file_offset) instead of ips for user
+ * 			stack, only valid if **BPF_F_USER_STACK** is also
+ * 			specified.
+ *
+ * 			*file_offset* is an offset relative to the beginning
+ * 			of the executable or shared object file backing the vma
+ * 			which the *ip* falls in. It is *not* an offset relative
+ * 			to that object's base address. Accordingly, it must be
+ * 			adjusted by adding (sh_addr - sh_offset), where
+ * 			sh_{addr,offset} correspond to the executable section
+ * 			containing *file_offset* in the object, for comparisons
+ * 			to symbols' st_value to be valid.
  *
  * 		**bpf_get_stack**\ () can collect up to
  * 		**PERF_MAX_STACK_DEPTH** both kernel and user frames, subject
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 59a217ca2dfd..f58d58e1d547 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3008,8 +3008,18 @@ union bpf_attr {
  * 		**BPF_F_USER_STACK**
  * 			Collect a user space stack instead of a kernel stack.
  * 		**BPF_F_USER_BUILD_ID**
- * 			Collect buildid+offset instead of ips for user stack,
- * 			only valid if **BPF_F_USER_STACK** is also specified.
+ * 			Collect (build_id, file_offset) instead of ips for user
+ * 			stack, only valid if **BPF_F_USER_STACK** is also
+ * 			specified.
+ *
+ * 			*file_offset* is an offset relative to the beginning
+ * 			of the executable or shared object file backing the vma
+ * 			which the *ip* falls in. It is *not* an offset relative
+ * 			to that object's base address. Accordingly, it must be
+ * 			adjusted by adding (sh_addr - sh_offset), where
+ * 			sh_{addr,offset} correspond to the executable section
+ * 			containing *file_offset* in the object, for comparisons
+ * 			to symbols' st_value to be valid.
  *
  * 		**bpf_get_stack**\ () can collect up to
  * 		**PERF_MAX_STACK_DEPTH** both kernel and user frames, subject
-- 
cgit v1.2.3


From c8996c98f703b09afe77a1d247dae691c9849dc1 Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Tue, 9 Aug 2022 08:08:02 +0200
Subject: bpf: Add BPF-helper for accessing CLOCK_TAI

Commit 3dc6ffae2da2 ("timekeeping: Introduce fast accessor to clock tai")
introduced a fast and NMI-safe accessor for CLOCK_TAI. Especially in time
sensitive networks (TSN), where all nodes are synchronized by Precision Time
Protocol (PTP), it's helpful to have the possibility to generate timestamps
based on CLOCK_TAI instead of CLOCK_MONOTONIC. With a BPF helper for TAI in
place, it becomes very convenient to correlate activity across different
machines in the network.

Use cases for such a BPF helper include functionalities such as Tx launch
time (e.g. ETF and TAPRIO Qdiscs) and timestamping.

Note: CLOCK_TAI is nothing new per se, only the NMI-safe variant of it is.

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
[Kurt: Wrote changelog and renamed helper]
Signed-off-by: Kurt Kanzenbach <kurt@linutronix.de>
Link: https://lore.kernel.org/r/20220809060803.5773-2-kurt@linutronix.de
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h            |  1 +
 include/uapi/linux/bpf.h       | 13 +++++++++++++
 kernel/bpf/core.c              |  1 +
 kernel/bpf/helpers.c           | 14 ++++++++++++++
 tools/include/uapi/linux/bpf.h | 13 +++++++++++++
 5 files changed, 42 insertions(+)

(limited to 'tools')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 20c26aed7896..a627a02cf8ab 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2349,6 +2349,7 @@ extern const struct bpf_func_proto bpf_get_numa_node_id_proto;
 extern const struct bpf_func_proto bpf_tail_call_proto;
 extern const struct bpf_func_proto bpf_ktime_get_ns_proto;
 extern const struct bpf_func_proto bpf_ktime_get_boot_ns_proto;
+extern const struct bpf_func_proto bpf_ktime_get_tai_ns_proto;
 extern const struct bpf_func_proto bpf_get_current_pid_tgid_proto;
 extern const struct bpf_func_proto bpf_get_current_uid_gid_proto;
 extern const struct bpf_func_proto bpf_get_current_comm_proto;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 534e33fb1029..7d1e2794d83e 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5341,6 +5341,18 @@ union bpf_attr {
  *		**-EACCES** if the SYN cookie is not valid.
  *
  *		**-EPROTONOSUPPORT** if CONFIG_IPV6 is not builtin.
+ *
+ * u64 bpf_ktime_get_tai_ns(void)
+ *	Description
+ *		A nonsettable system-wide clock derived from wall-clock time but
+ *		ignoring leap seconds.  This clock does not experience
+ *		discontinuities and backwards jumps caused by NTP inserting leap
+ *		seconds as CLOCK_REALTIME does.
+ *
+ *		See: **clock_gettime**\ (**CLOCK_TAI**)
+ *	Return
+ *		Current *ktime*.
+ *
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5551,6 +5563,7 @@ union bpf_attr {
 	FN(tcp_raw_gen_syncookie_ipv6),	\
 	FN(tcp_raw_check_syncookie_ipv4),	\
 	FN(tcp_raw_check_syncookie_ipv6),	\
+	FN(ktime_get_tai_ns),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index c1e10d088dbb..639437f36928 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -2623,6 +2623,7 @@ const struct bpf_func_proto bpf_get_numa_node_id_proto __weak;
 const struct bpf_func_proto bpf_ktime_get_ns_proto __weak;
 const struct bpf_func_proto bpf_ktime_get_boot_ns_proto __weak;
 const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto __weak;
+const struct bpf_func_proto bpf_ktime_get_tai_ns_proto __weak;
 
 const struct bpf_func_proto bpf_get_current_pid_tgid_proto __weak;
 const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak;
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 1f961f9982d2..a95eb9fb01ff 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -198,6 +198,18 @@ const struct bpf_func_proto bpf_ktime_get_coarse_ns_proto = {
 	.ret_type	= RET_INTEGER,
 };
 
+BPF_CALL_0(bpf_ktime_get_tai_ns)
+{
+	/* NMI safe access to clock tai */
+	return ktime_get_tai_fast_ns();
+}
+
+const struct bpf_func_proto bpf_ktime_get_tai_ns_proto = {
+	.func		= bpf_ktime_get_tai_ns,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+};
+
 BPF_CALL_0(bpf_get_current_pid_tgid)
 {
 	struct task_struct *task = current;
@@ -1617,6 +1629,8 @@ bpf_base_func_proto(enum bpf_func_id func_id)
 		return &bpf_ktime_get_ns_proto;
 	case BPF_FUNC_ktime_get_boot_ns:
 		return &bpf_ktime_get_boot_ns_proto;
+	case BPF_FUNC_ktime_get_tai_ns:
+		return &bpf_ktime_get_tai_ns_proto;
 	case BPF_FUNC_ringbuf_output:
 		return &bpf_ringbuf_output_proto;
 	case BPF_FUNC_ringbuf_reserve:
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index f58d58e1d547..e174ad28aeb7 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -5341,6 +5341,18 @@ union bpf_attr {
  *		**-EACCES** if the SYN cookie is not valid.
  *
  *		**-EPROTONOSUPPORT** if CONFIG_IPV6 is not builtin.
+ *
+ * u64 bpf_ktime_get_tai_ns(void)
+ *	Description
+ *		A nonsettable system-wide clock derived from wall-clock time but
+ *		ignoring leap seconds.  This clock does not experience
+ *		discontinuities and backwards jumps caused by NTP inserting leap
+ *		seconds as CLOCK_REALTIME does.
+ *
+ *		See: **clock_gettime**\ (**CLOCK_TAI**)
+ *	Return
+ *		Current *ktime*.
+ *
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5551,6 +5563,7 @@ union bpf_attr {
 	FN(tcp_raw_gen_syncookie_ipv6),	\
 	FN(tcp_raw_check_syncookie_ipv4),	\
 	FN(tcp_raw_check_syncookie_ipv6),	\
+	FN(ktime_get_tai_ns),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
cgit v1.2.3


From 64e15820b987cc8e5864a8b907dfc17861e6ab5a Mon Sep 17 00:00:00 2001
From: Kurt Kanzenbach <kurt@linutronix.de>
Date: Tue, 9 Aug 2022 08:08:03 +0200
Subject: selftests/bpf: Add BPF-helper test for CLOCK_TAI access

Add BPF-helper test case for CLOCK_TAI access. The added test verifies that:

 * Timestamps are generated
 * Timestamps are moving forward
 * Timestamps are reasonable

Signed-off-by: Kurt Kanzenbach <kurt@linutronix.de>
Link: https://lore.kernel.org/r/20220809060803.5773-3-kurt@linutronix.de
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/prog_tests/time_tai.c | 74 +++++++++++++++++++++++
 tools/testing/selftests/bpf/progs/test_time_tai.c | 24 ++++++++
 2 files changed, 98 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/time_tai.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_time_tai.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/time_tai.c b/tools/testing/selftests/bpf/prog_tests/time_tai.c
new file mode 100644
index 000000000000..a31119823666
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/time_tai.c
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2022 Linutronix GmbH */
+
+#include <test_progs.h>
+#include <network_helpers.h>
+
+#include "test_time_tai.skel.h"
+
+#include <time.h>
+#include <stdint.h>
+
+#define TAI_THRESHOLD	1000000000ULL /* 1s */
+#define NSEC_PER_SEC	1000000000ULL
+
+static __u64 ts_to_ns(const struct timespec *ts)
+{
+	return ts->tv_sec * NSEC_PER_SEC + ts->tv_nsec;
+}
+
+void test_time_tai(void)
+{
+	struct __sk_buff skb = {
+		.cb[0] = 0,
+		.cb[1] = 0,
+		.tstamp = 0,
+	};
+	LIBBPF_OPTS(bpf_test_run_opts, topts,
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.ctx_in = &skb,
+		.ctx_size_in = sizeof(skb),
+		.ctx_out = &skb,
+		.ctx_size_out = sizeof(skb),
+	);
+	struct test_time_tai *skel;
+	struct timespec now_tai;
+	__u64 ts1, ts2, now;
+	int ret, prog_fd;
+
+	/* Open and load */
+	skel = test_time_tai__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "tai_open"))
+		return;
+
+	/* Run test program */
+	prog_fd = bpf_program__fd(skel->progs.time_tai);
+	ret = bpf_prog_test_run_opts(prog_fd, &topts);
+	ASSERT_OK(ret, "test_run");
+
+	/* Retrieve generated TAI timestamps */
+	ts1 = skb.tstamp;
+	ts2 = skb.cb[0] | ((__u64)skb.cb[1] << 32);
+
+	/* TAI != 0 */
+	ASSERT_NEQ(ts1, 0, "tai_ts1");
+	ASSERT_NEQ(ts2, 0, "tai_ts2");
+
+	/* TAI is moving forward only */
+	ASSERT_GT(ts2, ts1, "tai_forward");
+
+	/* Check for future */
+	ret = clock_gettime(CLOCK_TAI, &now_tai);
+	ASSERT_EQ(ret, 0, "tai_gettime");
+	now = ts_to_ns(&now_tai);
+
+	ASSERT_TRUE(now > ts1, "tai_future_ts1");
+	ASSERT_TRUE(now > ts2, "tai_future_ts2");
+
+	/* Check for reasonable range */
+	ASSERT_TRUE(now - ts1 < TAI_THRESHOLD, "tai_range_ts1");
+	ASSERT_TRUE(now - ts2 < TAI_THRESHOLD, "tai_range_ts2");
+
+	test_time_tai__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_time_tai.c b/tools/testing/selftests/bpf/progs/test_time_tai.c
new file mode 100644
index 000000000000..7ea0863f3ddb
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_time_tai.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2022 Linutronix GmbH */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+SEC("tc")
+int time_tai(struct __sk_buff *skb)
+{
+	__u64 ts1, ts2;
+
+	/* Get TAI timestamps */
+	ts1 = bpf_ktime_get_tai_ns();
+	ts2 = bpf_ktime_get_tai_ns();
+
+	/* Save TAI timestamps (Note: skb->hwtstamp is read-only) */
+	skb->tstamp = ts1;
+	skb->cb[0] = ts2 & 0xffffffff;
+	skb->cb[1] = ts2 >> 32;
+
+	return 0;
+}
-- 
cgit v1.2.3


From d020b2360b350b9f91b1769f9c84fe2d22f643db Mon Sep 17 00:00:00 2001
From: Daniel Xu <dxu@dxuuu.xyz>
Date: Tue, 9 Aug 2022 11:11:09 -0600
Subject: selftests/bpf: Fix vmtest.sh -h to not require root
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Set the exit trap only after argument parsing is done. This way argument
parse failure or `-h` will not require sudo.

Reasoning is that it's confusing that a help message would require root
access.

Signed-off-by: Daniel Xu <dxu@dxuuu.xyz>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Daniel Müller <deso@posteo.net>
Link: https://lore.kernel.org/bpf/6a802aa37758e5a7e6aa5de294634f5518005e2b.1660064925.git.dxu@dxuuu.xyz
---
 tools/testing/selftests/bpf/vmtest.sh | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/vmtest.sh b/tools/testing/selftests/bpf/vmtest.sh
index b86ae4a2e5c5..976ef7585b33 100755
--- a/tools/testing/selftests/bpf/vmtest.sh
+++ b/tools/testing/selftests/bpf/vmtest.sh
@@ -307,6 +307,20 @@ update_kconfig()
 	fi
 }
 
+catch()
+{
+	local exit_code=$1
+	local exit_status_file="${OUTPUT_DIR}/${EXIT_STATUS_FILE}"
+	# This is just a cleanup and the directory may
+	# have already been unmounted. So, don't let this
+	# clobber the error code we intend to return.
+	unmount_image || true
+	if [[ -f "${exit_status_file}" ]]; then
+		exit_code="$(cat ${exit_status_file})"
+	fi
+	exit ${exit_code}
+}
+
 main()
 {
 	local script_dir="$(cd -P -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P)"
@@ -353,6 +367,8 @@ main()
 	done
 	shift $((OPTIND -1))
 
+	trap 'catch "$?"' EXIT
+
 	if [[ $# -eq 0  && "${debug_shell}" == "no" ]]; then
 		echo "No command specified, will run ${DEFAULT_COMMAND} in the vm"
 	else
@@ -409,20 +425,4 @@ main()
 	fi
 }
 
-catch()
-{
-	local exit_code=$1
-	local exit_status_file="${OUTPUT_DIR}/${EXIT_STATUS_FILE}"
-	# This is just a cleanup and the directory may
-	# have already been unmounted. So, don't let this
-	# clobber the error code we intend to return.
-	unmount_image || true
-	if [[ -f "${exit_status_file}" ]]; then
-		exit_code="$(cat ${exit_status_file})"
-	fi
-	exit ${exit_code}
-}
-
-trap 'catch "$?"' EXIT
-
 main "$@"
-- 
cgit v1.2.3


From a7be0ab1eb1949f3564739784b4360e1233305f6 Mon Sep 17 00:00:00 2001
From: Daniel Xu <dxu@dxuuu.xyz>
Date: Tue, 9 Aug 2022 11:11:10 -0600
Subject: selftests/bpf: Fix vmtest.sh getopts optstring
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Before, you could see the following errors:

  $ ./vmtest.sh -j
  ./vmtest.sh: option requires an argument -- j
  ./vmtest.sh: line 357: OPTARG: unbound variable

  $ ./vmtest.sh -z
  ./vmtest.sh: illegal option -- z
  ./vmtest.sh: line 357: OPTARG: unbound variable

Fix by adding ':' as first character of optstring. Reason is that getopts
requires ':' as the first character for OPTARG to be set in the `?` and `:`
error cases.

Note that the ':' as the first character of the optstring switches getopts
to silent mode. The desire to run in this mode seems to have been there all
along, as the script takes care of reporting errors.

Signed-off-by: Daniel Xu <dxu@dxuuu.xyz>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Daniel Müller <deso@posteo.net>
Link: https://lore.kernel.org/bpf/0f93b56198328b6b4da7b4cf4662d05c3edb5fd2.1660064925.git.dxu@dxuuu.xyz
---
 tools/testing/selftests/bpf/vmtest.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/vmtest.sh b/tools/testing/selftests/bpf/vmtest.sh
index 976ef7585b33..a29aa05ebb3e 100755
--- a/tools/testing/selftests/bpf/vmtest.sh
+++ b/tools/testing/selftests/bpf/vmtest.sh
@@ -333,7 +333,7 @@ main()
 	local exit_command="poweroff -f"
 	local debug_shell="no"
 
-	while getopts 'hskid:j:' opt; do
+	while getopts ':hskid:j:' opt; do
 		case ${opt} in
 		i)
 			update_image="yes"
-- 
cgit v1.2.3


From dc444be8bae45019396aedd53c745e685a4eb235 Mon Sep 17 00:00:00 2001
From: Joanne Koong <joannelkoong@gmail.com>
Date: Tue, 9 Aug 2022 14:40:55 -0700
Subject: selftests/bpf: add extra test for using dynptr data slice after
 release

Add an additional test, "data_slice_use_after_release2", for ensuring
that data slices are correctly invalidated by the verifier after the
dynptr whose ref obj id they track is released. In particular, this
tests data slice invalidation for dynptrs located at a non-zero offset
from the frame pointer.

Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/r/20220809214055.4050604-2-joannelkoong@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/prog_tests/dynptr.c |  3 +-
 tools/testing/selftests/bpf/progs/dynptr_fail.c | 38 ++++++++++++++++++++++++-
 2 files changed, 39 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/dynptr.c b/tools/testing/selftests/bpf/prog_tests/dynptr.c
index 3c7aa82b98e2..bcf80b9f7c27 100644
--- a/tools/testing/selftests/bpf/prog_tests/dynptr.c
+++ b/tools/testing/selftests/bpf/prog_tests/dynptr.c
@@ -22,7 +22,8 @@ static struct {
 	{"add_dynptr_to_map2", "invalid indirect read from stack"},
 	{"data_slice_out_of_bounds_ringbuf", "value is outside of the allowed memory range"},
 	{"data_slice_out_of_bounds_map_value", "value is outside of the allowed memory range"},
-	{"data_slice_use_after_release", "invalid mem access 'scalar'"},
+	{"data_slice_use_after_release1", "invalid mem access 'scalar'"},
+	{"data_slice_use_after_release2", "invalid mem access 'scalar'"},
 	{"data_slice_missing_null_check1", "invalid mem access 'mem_or_null'"},
 	{"data_slice_missing_null_check2", "invalid mem access 'mem_or_null'"},
 	{"invalid_helper1", "invalid indirect read from stack"},
diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c
index b5e0a87f0a36..b0f08ff024fb 100644
--- a/tools/testing/selftests/bpf/progs/dynptr_fail.c
+++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c
@@ -248,7 +248,7 @@ int data_slice_out_of_bounds_map_value(void *ctx)
 
 /* A data slice can't be used after it has been released */
 SEC("?raw_tp")
-int data_slice_use_after_release(void *ctx)
+int data_slice_use_after_release1(void *ctx)
 {
 	struct bpf_dynptr ptr;
 	struct sample *sample;
@@ -272,6 +272,42 @@ done:
 	return 0;
 }
 
+/* A data slice can't be used after it has been released.
+ *
+ * This tests the case where the data slice tracks a dynptr (ptr2)
+ * that is at a non-zero offset from the frame pointer (ptr1 is at fp,
+ * ptr2 is at fp - 16).
+ */
+SEC("?raw_tp")
+int data_slice_use_after_release2(void *ctx)
+{
+	struct bpf_dynptr ptr1, ptr2;
+	struct sample *sample;
+
+	bpf_ringbuf_reserve_dynptr(&ringbuf, 64, 0, &ptr1);
+	bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(*sample), 0, &ptr2);
+
+	sample = bpf_dynptr_data(&ptr2, 0, sizeof(*sample));
+	if (!sample)
+		goto done;
+
+	sample->pid = 23;
+
+	bpf_ringbuf_submit_dynptr(&ptr2, 0);
+
+	/* this should fail */
+	sample->pid = 23;
+
+	bpf_ringbuf_submit_dynptr(&ptr1, 0);
+
+	return 0;
+
+done:
+	bpf_ringbuf_discard_dynptr(&ptr2, 0);
+	bpf_ringbuf_discard_dynptr(&ptr1, 0);
+	return 0;
+}
+
 /* A data slice must be first checked for NULL */
 SEC("?raw_tp")
 int data_slice_missing_null_check1(void *ctx)
-- 
cgit v1.2.3


From 3143d10b094596f3e5d5964b2660375e586652a3 Mon Sep 17 00:00:00 2001
From: Shibin Koikkara Reeny <shibin.koikkara.reeny@intel.com>
Date: Wed, 3 Aug 2022 14:43:54 +0000
Subject: selftests/xsk: Update poll test cases

Poll test case was not testing all the functionality of the poll feature
in the test suite. This patch updates the poll test case which contains 2
test cases to test the RX and the TX poll functionality and additional 2
more test cases to check the timeout feature of the poll event.

Poll test suite has 4 test cases:

1. TEST_TYPE_RX_POLL: Check if RX path POLLIN function works as expect.
   TX path can use any method to send the traffic.

2. TEST_TYPE_TX_POLL: Check if TX path POLLOUT function works as expect.
   RX path can use any method to receive the traffic.

3. TEST_TYPE_POLL_RXQ_EMPTY: Call poll function with parameter POLLIN on
   empty RX queue will cause timeout. If timeout then test case passes.

4. TEST_TYPE_POLL_TXQ_FULL: When TX queue is filled and packets are not
   cleaned by the kernel then if we invoke the poll function with POLLOUT
   it should trigger timeout.

Signed-off-by: Shibin Koikkara Reeny <shibin.koikkara.reeny@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Link: https://lore.kernel.org/bpf/20220803144354.98122-1-shibin.koikkara.reeny@intel.com
---
 tools/testing/selftests/bpf/xskxceiver.c | 166 ++++++++++++++++++++++++-------
 tools/testing/selftests/bpf/xskxceiver.h |   8 +-
 2 files changed, 134 insertions(+), 40 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c
index 74d56d971baf..20b44ab32a06 100644
--- a/tools/testing/selftests/bpf/xskxceiver.c
+++ b/tools/testing/selftests/bpf/xskxceiver.c
@@ -244,6 +244,11 @@ static void gen_udp_hdr(u32 payload, void *pkt, struct ifobject *ifobject,
 	memset32_htonl(pkt + PKT_HDR_SIZE, payload, UDP_PKT_DATA_SIZE);
 }
 
+static bool is_umem_valid(struct ifobject *ifobj)
+{
+	return !!ifobj->umem->umem;
+}
+
 static void gen_udp_csum(struct udphdr *udp_hdr, struct iphdr *ip_hdr)
 {
 	udp_hdr->check = 0;
@@ -817,12 +822,13 @@ static int complete_pkts(struct xsk_socket_info *xsk, int batch_size)
 	return TEST_PASS;
 }
 
-static int receive_pkts(struct ifobject *ifobj, struct pollfd *fds)
+static int receive_pkts(struct test_spec *test, struct pollfd *fds)
 {
-	struct timeval tv_end, tv_now, tv_timeout = {RECV_TMOUT, 0};
+	struct timeval tv_end, tv_now, tv_timeout = {THREAD_TMOUT, 0};
+	struct pkt_stream *pkt_stream = test->ifobj_rx->pkt_stream;
 	u32 idx_rx = 0, idx_fq = 0, rcvd, i, pkts_sent = 0;
-	struct pkt_stream *pkt_stream = ifobj->pkt_stream;
-	struct xsk_socket_info *xsk = ifobj->xsk;
+	struct xsk_socket_info *xsk = test->ifobj_rx->xsk;
+	struct ifobject *ifobj = test->ifobj_rx;
 	struct xsk_umem_info *umem = xsk->umem;
 	struct pkt *pkt;
 	int ret;
@@ -843,17 +849,28 @@ static int receive_pkts(struct ifobject *ifobj, struct pollfd *fds)
 		}
 
 		kick_rx(xsk);
+		if (ifobj->use_poll) {
+			ret = poll(fds, 1, POLL_TMOUT);
+			if (ret < 0)
+				exit_with_error(-ret);
+
+			if (!ret) {
+				if (!is_umem_valid(test->ifobj_tx))
+					return TEST_PASS;
+
+				ksft_print_msg("ERROR: [%s] Poll timed out\n", __func__);
+				return TEST_FAILURE;
 
-		rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx);
-		if (!rcvd) {
-			if (xsk_ring_prod__needs_wakeup(&umem->fq)) {
-				ret = poll(fds, 1, POLL_TMOUT);
-				if (ret < 0)
-					exit_with_error(-ret);
 			}
-			continue;
+
+			if (!(fds->revents & POLLIN))
+				continue;
 		}
 
+		rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx);
+		if (!rcvd)
+			continue;
+
 		if (ifobj->use_fill_ring) {
 			ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq);
 			while (ret != rcvd) {
@@ -900,13 +917,35 @@ static int receive_pkts(struct ifobject *ifobj, struct pollfd *fds)
 	return TEST_PASS;
 }
 
-static int __send_pkts(struct ifobject *ifobject, u32 *pkt_nb)
+static int __send_pkts(struct ifobject *ifobject, u32 *pkt_nb, struct pollfd *fds,
+		       bool timeout)
 {
 	struct xsk_socket_info *xsk = ifobject->xsk;
-	u32 i, idx, valid_pkts = 0;
+	bool use_poll = ifobject->use_poll;
+	u32 i, idx, ret, valid_pkts = 0;
+
+	while (xsk_ring_prod__reserve(&xsk->tx, BATCH_SIZE, &idx) < BATCH_SIZE) {
+		if (use_poll) {
+			ret = poll(fds, 1, POLL_TMOUT);
+			if (timeout) {
+				if (ret < 0) {
+					ksft_print_msg("ERROR: [%s] Poll error %d\n",
+						       __func__, ret);
+					return TEST_FAILURE;
+				}
+				if (ret == 0)
+					return TEST_PASS;
+				break;
+			}
+			if (ret <= 0) {
+				ksft_print_msg("ERROR: [%s] Poll error %d\n",
+					       __func__, ret);
+				return TEST_FAILURE;
+			}
+		}
 
-	while (xsk_ring_prod__reserve(&xsk->tx, BATCH_SIZE, &idx) < BATCH_SIZE)
 		complete_pkts(xsk, BATCH_SIZE);
+	}
 
 	for (i = 0; i < BATCH_SIZE; i++) {
 		struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx + i);
@@ -933,11 +972,27 @@ static int __send_pkts(struct ifobject *ifobject, u32 *pkt_nb)
 
 	xsk_ring_prod__submit(&xsk->tx, i);
 	xsk->outstanding_tx += valid_pkts;
-	if (complete_pkts(xsk, i))
-		return TEST_FAILURE;
 
-	usleep(10);
-	return TEST_PASS;
+	if (use_poll) {
+		ret = poll(fds, 1, POLL_TMOUT);
+		if (ret <= 0) {
+			if (ret == 0 && timeout)
+				return TEST_PASS;
+
+			ksft_print_msg("ERROR: [%s] Poll error %d\n", __func__, ret);
+			return TEST_FAILURE;
+		}
+	}
+
+	if (!timeout) {
+		if (complete_pkts(xsk, i))
+			return TEST_FAILURE;
+
+		usleep(10);
+		return TEST_PASS;
+	}
+
+	return TEST_CONTINUE;
 }
 
 static void wait_for_tx_completion(struct xsk_socket_info *xsk)
@@ -948,29 +1003,19 @@ static void wait_for_tx_completion(struct xsk_socket_info *xsk)
 
 static int send_pkts(struct test_spec *test, struct ifobject *ifobject)
 {
+	bool timeout = !is_umem_valid(test->ifobj_rx);
 	struct pollfd fds = { };
-	u32 pkt_cnt = 0;
+	u32 pkt_cnt = 0, ret;
 
 	fds.fd = xsk_socket__fd(ifobject->xsk->xsk);
 	fds.events = POLLOUT;
 
 	while (pkt_cnt < ifobject->pkt_stream->nb_pkts) {
-		int err;
-
-		if (ifobject->use_poll) {
-			int ret;
-
-			ret = poll(&fds, 1, POLL_TMOUT);
-			if (ret <= 0)
-				continue;
-
-			if (!(fds.revents & POLLOUT))
-				continue;
-		}
-
-		err = __send_pkts(ifobject, &pkt_cnt);
-		if (err || test->fail)
+		ret = __send_pkts(ifobject, &pkt_cnt, &fds, timeout);
+		if ((ret || test->fail) && !timeout)
 			return TEST_FAILURE;
+		else if (ret == TEST_PASS && timeout)
+			return ret;
 	}
 
 	wait_for_tx_completion(ifobject->xsk);
@@ -1235,7 +1280,7 @@ static void *worker_testapp_validate_rx(void *arg)
 
 	pthread_barrier_wait(&barr);
 
-	err = receive_pkts(ifobject, &fds);
+	err = receive_pkts(test, &fds);
 
 	if (!err && ifobject->validation_func)
 		err = ifobject->validation_func(ifobject);
@@ -1251,6 +1296,33 @@ static void *worker_testapp_validate_rx(void *arg)
 	pthread_exit(NULL);
 }
 
+static int testapp_validate_traffic_single_thread(struct test_spec *test, struct ifobject *ifobj,
+						  enum test_type type)
+{
+	pthread_t t0;
+
+	if (pthread_barrier_init(&barr, NULL, 2))
+		exit_with_error(errno);
+
+	test->current_step++;
+	if (type  == TEST_TYPE_POLL_RXQ_TMOUT)
+		pkt_stream_reset(ifobj->pkt_stream);
+	pkts_in_flight = 0;
+
+	/*Spawn thread */
+	pthread_create(&t0, NULL, ifobj->func_ptr, test);
+
+	if (type != TEST_TYPE_POLL_TXQ_TMOUT)
+		pthread_barrier_wait(&barr);
+
+	if (pthread_barrier_destroy(&barr))
+		exit_with_error(errno);
+
+	pthread_join(t0, NULL);
+
+	return !!test->fail;
+}
+
 static int testapp_validate_traffic(struct test_spec *test)
 {
 	struct ifobject *ifobj_tx = test->ifobj_tx;
@@ -1548,12 +1620,30 @@ static void run_pkt_test(struct test_spec *test, enum test_mode mode, enum test_
 
 		pkt_stream_restore_default(test);
 		break;
-	case TEST_TYPE_POLL:
-		test->ifobj_tx->use_poll = true;
+	case TEST_TYPE_RX_POLL:
 		test->ifobj_rx->use_poll = true;
-		test_spec_set_name(test, "POLL");
+		test_spec_set_name(test, "POLL_RX");
 		testapp_validate_traffic(test);
 		break;
+	case TEST_TYPE_TX_POLL:
+		test->ifobj_tx->use_poll = true;
+		test_spec_set_name(test, "POLL_TX");
+		testapp_validate_traffic(test);
+		break;
+	case TEST_TYPE_POLL_TXQ_TMOUT:
+		test_spec_set_name(test, "POLL_TXQ_FULL");
+		test->ifobj_tx->use_poll = true;
+		/* create invalid frame by set umem frame_size and pkt length equal to 2048 */
+		test->ifobj_tx->umem->frame_size = 2048;
+		pkt_stream_replace(test, 2 * DEFAULT_PKT_CNT, 2048);
+		testapp_validate_traffic_single_thread(test, test->ifobj_tx, type);
+		pkt_stream_restore_default(test);
+		break;
+	case TEST_TYPE_POLL_RXQ_TMOUT:
+		test_spec_set_name(test, "POLL_RXQ_EMPTY");
+		test->ifobj_rx->use_poll = true;
+		testapp_validate_traffic_single_thread(test, test->ifobj_rx, type);
+		break;
 	case TEST_TYPE_ALIGNED_INV_DESC:
 		test_spec_set_name(test, "ALIGNED_INV_DESC");
 		testapp_invalid_desc(test);
diff --git a/tools/testing/selftests/bpf/xskxceiver.h b/tools/testing/selftests/bpf/xskxceiver.h
index 3d17053f98e5..ee97576757a9 100644
--- a/tools/testing/selftests/bpf/xskxceiver.h
+++ b/tools/testing/selftests/bpf/xskxceiver.h
@@ -27,6 +27,7 @@
 
 #define TEST_PASS 0
 #define TEST_FAILURE -1
+#define TEST_CONTINUE 1
 #define MAX_INTERFACES 2
 #define MAX_INTERFACE_NAME_CHARS 7
 #define MAX_INTERFACES_NAMESPACE_CHARS 10
@@ -48,7 +49,7 @@
 #define SOCK_RECONF_CTR 10
 #define BATCH_SIZE 64
 #define POLL_TMOUT 1000
-#define RECV_TMOUT 3
+#define THREAD_TMOUT 3
 #define DEFAULT_PKT_CNT (4 * 1024)
 #define DEFAULT_UMEM_BUFFERS (DEFAULT_PKT_CNT / 4)
 #define UMEM_SIZE (DEFAULT_UMEM_BUFFERS * XSK_UMEM__DEFAULT_FRAME_SIZE)
@@ -68,7 +69,10 @@ enum test_type {
 	TEST_TYPE_RUN_TO_COMPLETION,
 	TEST_TYPE_RUN_TO_COMPLETION_2K_FRAME,
 	TEST_TYPE_RUN_TO_COMPLETION_SINGLE_PKT,
-	TEST_TYPE_POLL,
+	TEST_TYPE_RX_POLL,
+	TEST_TYPE_TX_POLL,
+	TEST_TYPE_POLL_RXQ_TMOUT,
+	TEST_TYPE_POLL_TXQ_TMOUT,
 	TEST_TYPE_UNALIGNED,
 	TEST_TYPE_ALIGNED_INV_DESC,
 	TEST_TYPE_ALIGNED_INV_DESC_2K_FRAME,
-- 
cgit v1.2.3


From e338945816754a1c362f606b8e2029f2c023e51c Mon Sep 17 00:00:00 2001
From: Artem Savkov <asavkov@redhat.com>
Date: Wed, 10 Aug 2022 08:59:05 +0200
Subject: selftests/bpf: add destructive kfunc test

Add a test checking that programs calling destructive kfuncs can only do
so if they have CAP_SYS_BOOT capabilities.

Signed-off-by: Artem Savkov <asavkov@redhat.com>
Link: https://lore.kernel.org/r/20220810065905.475418-4-asavkov@redhat.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 net/bpf/test_run.c                                 |  5 +++
 .../testing/selftests/bpf/prog_tests/kfunc_call.c  | 36 ++++++++++++++++++++++
 .../selftests/bpf/progs/kfunc_call_destructive.c   | 14 +++++++++
 3 files changed, 55 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/progs/kfunc_call_destructive.c

(limited to 'tools')

diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index cbc9cd5058cb..afa7125252f6 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -695,6 +695,10 @@ noinline void bpf_kfunc_call_test_ref(struct prog_test_ref_kfunc *p)
 {
 }
 
+noinline void bpf_kfunc_call_test_destructive(void)
+{
+}
+
 __diag_pop();
 
 ALLOW_ERROR_INJECTION(bpf_modify_return_test, ERRNO);
@@ -719,6 +723,7 @@ BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_pass1)
 BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail1)
 BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail2)
 BTF_ID_FLAGS(func, bpf_kfunc_call_test_ref, KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_destructive, KF_DESTRUCTIVE)
 BTF_SET8_END(test_sk_check_kfunc_ids)
 
 static void *bpf_test_init(const union bpf_attr *kattr, u32 user_size,
diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
index c00eb974eb85..351fafa006fb 100644
--- a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
+++ b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
@@ -5,6 +5,9 @@
 #include "kfunc_call_test.lskel.h"
 #include "kfunc_call_test_subprog.skel.h"
 #include "kfunc_call_test_subprog.lskel.h"
+#include "kfunc_call_destructive.skel.h"
+
+#include "cap_helpers.h"
 
 static void test_main(void)
 {
@@ -86,6 +89,36 @@ static void test_subprog_lskel(void)
 	kfunc_call_test_subprog_lskel__destroy(skel);
 }
 
+static int test_destructive_open_and_load(void)
+{
+	struct kfunc_call_destructive *skel;
+	int err;
+
+	skel = kfunc_call_destructive__open();
+	if (!ASSERT_OK_PTR(skel, "prog_open"))
+		return -1;
+
+	err = kfunc_call_destructive__load(skel);
+
+	kfunc_call_destructive__destroy(skel);
+
+	return err;
+}
+
+static void test_destructive(void)
+{
+	__u64 save_caps = 0;
+
+	ASSERT_OK(test_destructive_open_and_load(), "succesful_load");
+
+	if (!ASSERT_OK(cap_disable_effective(1ULL << CAP_SYS_BOOT, &save_caps), "drop_caps"))
+		return;
+
+	ASSERT_EQ(test_destructive_open_and_load(), -13, "no_caps_failure");
+
+	cap_enable_effective(save_caps, NULL);
+}
+
 void test_kfunc_call(void)
 {
 	if (test__start_subtest("main"))
@@ -96,4 +129,7 @@ void test_kfunc_call(void)
 
 	if (test__start_subtest("subprog_lskel"))
 		test_subprog_lskel();
+
+	if (test__start_subtest("destructive"))
+		test_destructive();
 }
diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_destructive.c b/tools/testing/selftests/bpf/progs/kfunc_call_destructive.c
new file mode 100644
index 000000000000..767472bc5a97
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/kfunc_call_destructive.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+
+extern void bpf_kfunc_call_test_destructive(void) __ksym;
+
+SEC("tc")
+int kfunc_destructive_test(void)
+{
+	bpf_kfunc_call_test_destructive();
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
-- 
cgit v1.2.3


From d7c5802faff6e7f50d18db40fdcb7e50590177f5 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 10 Aug 2022 11:34:25 -0700
Subject: libbpf: preserve errno across pr_warn/pr_info/pr_debug
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As suggested in [0], make sure that libbpf_print saves and restored
errno and as such guaranteed that no matter what actual print callback
user installs, macros like pr_warn/pr_info/pr_debug are completely
transparent as far as errno goes.

While libbpf code is pretty careful about not clobbering important errno
values accidentally with pr_warn(), it's a trivial change to make sure
that pr_warn can be used anywhere without a risk of clobbering errno.

No functional changes, just future proofing.

  [0] https://github.com/libbpf/libbpf/pull/536

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Daniel Müller <deso@posteo.net>
Link: https://lore.kernel.org/r/20220810183425.1998735-1-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/libbpf.c | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index f7364ea82ac1..917d975bd4c6 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -223,13 +223,18 @@ __printf(2, 3)
 void libbpf_print(enum libbpf_print_level level, const char *format, ...)
 {
 	va_list args;
+	int old_errno;
 
 	if (!__libbpf_pr)
 		return;
 
+	old_errno = errno;
+
 	va_start(args, format);
 	__libbpf_pr(level, format, args);
 	va_end(args);
+
+	errno = old_errno;
 }
 
 static void pr_perm_msg(int err)
-- 
cgit v1.2.3


From 10b62d6a38f7c92e9f41983bb7d7669c9fa6e287 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Thu, 11 Aug 2022 11:40:20 +0800
Subject: libbpf: Add names for auxiliary maps

The bpftool self-created maps can appear in final map show output due to
deferred removal in kernel. These maps don't have a name, which would make
users confused about where it comes from.

With a libbpf_ prefix name, users could know who created these maps.
It also could make some tests (like test_offload.py, which skip base maps
without names as a workaround) filter them out.

Kernel adds bpf prog/map name support in the same merge
commit fadad670a8ab ("Merge branch 'bpf-extend-info'"). So we can also use
kernel_supports(NULL, FEAT_PROG_NAME) to check if kernel supports map name.

As discussed [1], Let's make bpf_map_create accept non-null
name string, and silently ignore the name if kernel doesn't support.

  [1] https://lore.kernel.org/bpf/CAEf4BzYL1TQwo1231s83pjTdFPk9XWWhfZC5=KzkU-VO0k=0Ug@mail.gmail.com/

Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220811034020.529685-1-liuhangbin@gmail.com
---
 tools/lib/bpf/bpf.c    | 2 +-
 tools/lib/bpf/libbpf.c | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index efcc06dafbd9..6a96e665dc5d 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -183,7 +183,7 @@ int bpf_map_create(enum bpf_map_type map_type,
 		return libbpf_err(-EINVAL);
 
 	attr.map_type = map_type;
-	if (map_name)
+	if (map_name && kernel_supports(NULL, FEAT_PROG_NAME))
 		libbpf_strlcpy(attr.map_name, map_name, sizeof(attr.map_name));
 	attr.key_size = key_size;
 	attr.value_size = value_size;
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 917d975bd4c6..3f01f5cd8a4c 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -4437,7 +4437,7 @@ static int probe_kern_global_data(void)
 	};
 	int ret, map, insn_cnt = ARRAY_SIZE(insns);
 
-	map = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(int), 32, 1, NULL);
+	map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_global", sizeof(int), 32, 1, NULL);
 	if (map < 0) {
 		ret = -errno;
 		cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
@@ -4570,7 +4570,7 @@ static int probe_kern_array_mmap(void)
 	LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_MMAPABLE);
 	int fd;
 
-	fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(int), sizeof(int), 1, &opts);
+	fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_mmap", sizeof(int), sizeof(int), 1, &opts);
 	return probe_fd(fd);
 }
 
@@ -4617,7 +4617,7 @@ static int probe_prog_bind_map(void)
 	};
 	int ret, map, prog, insn_cnt = ARRAY_SIZE(insns);
 
-	map = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(int), 32, 1, NULL);
+	map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_det_bind", sizeof(int), 32, 1, NULL);
 	if (map < 0) {
 		ret = -errno;
 		cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
-- 
cgit v1.2.3


From 54c939773b2d2c2e6676743c180cb2049bb3a40a Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin@isovalent.com>
Date: Fri, 12 Aug 2022 16:37:25 +0100
Subject: bpftool: Fix a typo in a comment

This is the wrong library name: libcap, not libpcap.

Signed-off-by: Quentin Monnet <quentin@isovalent.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20220812153727.224500-1-quentin@isovalent.com
---
 tools/bpf/bpftool/feature.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c
index 7ecabf7947fb..36cf0f1517c9 100644
--- a/tools/bpf/bpftool/feature.c
+++ b/tools/bpf/bpftool/feature.c
@@ -1147,7 +1147,7 @@ exit_free:
 	return res;
 #else
 	/* Detection assumes user has specific privileges.
-	 * We do not use libpcap so let's approximate, and restrict usage to
+	 * We do not use libcap so let's approximate, and restrict usage to
 	 * root user only.
 	 */
 	if (geteuid()) {
-- 
cgit v1.2.3


From 4961d0772578e8737afe61370743f3bc22867111 Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin@isovalent.com>
Date: Fri, 12 Aug 2022 16:37:27 +0100
Subject: bpf: Clear up confusion in bpf_skb_adjust_room()'s documentation

Adding or removing room space _below_ layers 2 or 3, as the description
mentions, is ambiguous. This was written with a mental image of the
packet with layer 2 at the top, layer 3 under it, and so on. But it has
led users to believe that it was on lower layers (before the beginning
of the L2 and L3 headers respectively).

Let's make it more explicit, and specify between which layers the room
space is adjusted.

Reported-by: Rumen Telbizov <rumen.telbizov@menlosecurity.com>
Signed-off-by: Quentin Monnet <quentin@isovalent.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20220812153727.224500-3-quentin@isovalent.com
---
 include/uapi/linux/bpf.h       | 6 ++++--
 tools/include/uapi/linux/bpf.h | 6 ++++--
 2 files changed, 8 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 7d1e2794d83e..934a2a8beb87 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2573,10 +2573,12 @@ union bpf_attr {
  *		There are two supported modes at this time:
  *
  *		* **BPF_ADJ_ROOM_MAC**: Adjust room at the mac layer
- *		  (room space is added or removed below the layer 2 header).
+ * 		  (room space is added or removed between the layer 2 and
+ * 		  layer 3 headers).
  *
  * 		* **BPF_ADJ_ROOM_NET**: Adjust room at the network layer
- * 		  (room space is added or removed below the layer 3 header).
+ * 		  (room space is added or removed between the layer 3 and
+ * 		  layer 4 headers).
  *
  *		The following flags are supported at this time:
  *
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index e174ad28aeb7..1d6085e15fc8 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -2573,10 +2573,12 @@ union bpf_attr {
  *		There are two supported modes at this time:
  *
  *		* **BPF_ADJ_ROOM_MAC**: Adjust room at the mac layer
- *		  (room space is added or removed below the layer 2 header).
+ * 		  (room space is added or removed between the layer 2 and
+ * 		  layer 3 headers).
  *
  * 		* **BPF_ADJ_ROOM_NET**: Adjust room at the network layer
- * 		  (room space is added or removed below the layer 3 header).
+ * 		  (room space is added or removed between the layer 3 and
+ * 		  layer 4 headers).
  *
  *		The following flags are supported at this time:
  *
-- 
cgit v1.2.3


From cea558855c39b7f1f02ff50dcf701ca6596bc964 Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin@isovalent.com>
Date: Mon, 15 Aug 2022 17:22:05 +0100
Subject: bpftool: Clear errno after libcap's checks

When bpftool is linked against libcap, the library runs a "constructor"
function to compute the number of capabilities of the running kernel
[0], at the beginning of the execution of the program. As part of this,
it performs multiple calls to prctl(). Some of these may fail, and set
errno to a non-zero value:

    # strace -e prctl ./bpftool version
    prctl(PR_CAPBSET_READ, CAP_MAC_OVERRIDE) = 1
    prctl(PR_CAPBSET_READ, 0x30 /* CAP_??? */) = -1 EINVAL (Invalid argument)
    prctl(PR_CAPBSET_READ, CAP_CHECKPOINT_RESTORE) = 1
    prctl(PR_CAPBSET_READ, 0x2c /* CAP_??? */) = -1 EINVAL (Invalid argument)
    prctl(PR_CAPBSET_READ, 0x2a /* CAP_??? */) = -1 EINVAL (Invalid argument)
    prctl(PR_CAPBSET_READ, 0x29 /* CAP_??? */) = -1 EINVAL (Invalid argument)
    ** fprintf added at the top of main(): we have errno == 1
    ./bpftool v7.0.0
    using libbpf v1.0
    features: libbfd, libbpf_strict, skeletons
    +++ exited with 0 +++

This has been addressed in libcap 2.63 [1], but until this version is
available everywhere, we can fix it on bpftool side.

Let's clean errno at the beginning of the main() function, to make sure
that these checks do not interfere with the batch mode, where we error
out if errno is set after a bpftool command.

  [0] https://git.kernel.org/pub/scm/libs/libcap/libcap.git/tree/libcap/cap_alloc.c?h=libcap-2.65#n20
  [1] https://git.kernel.org/pub/scm/libs/libcap/libcap.git/commit/?id=f25a1b7e69f7b33e6afb58b3e38f3450b7d2d9a0

Signed-off-by: Quentin Monnet <quentin@isovalent.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20220815162205.45043-1-quentin@isovalent.com
---
 tools/bpf/bpftool/main.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index 451cefc2d0da..ccd7457f92bf 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -435,6 +435,16 @@ int main(int argc, char **argv)
 
 	setlinebuf(stdout);
 
+#ifdef USE_LIBCAP
+	/* Libcap < 2.63 hooks before main() to compute the number of
+	 * capabilities of the running kernel, and doing so it calls prctl()
+	 * which may fail and set errno to non-zero.
+	 * Let's reset errno to make sure this does not interfere with the
+	 * batch mode.
+	 */
+	errno = 0;
+#endif
+
 	last_do_help = do_help;
 	pretty_output = false;
 	json_output = false;
-- 
cgit v1.2.3


From e81fbd4c1ba7b128a198c2843665e1186db449b6 Mon Sep 17 00:00:00 2001
From: Daniel Xu <dxu@dxuuu.xyz>
Date: Thu, 11 Aug 2022 15:55:25 -0600
Subject: selftests/bpf: Add existing connection bpf_*_ct_lookup() test

Add a test where we do a conntrack lookup on an existing connection.
This is nice because it's a more realistic test than artifically
creating a ct entry and looking it up afterwards.

Signed-off-by: Daniel Xu <dxu@dxuuu.xyz>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/bpf/de5a617832f38f8b5631cc87e2a836da7c94d497.1660254747.git.dxu@dxuuu.xyz
---
 tools/testing/selftests/bpf/prog_tests/bpf_nf.c | 59 +++++++++++++++++++++++++
 tools/testing/selftests/bpf/progs/test_bpf_nf.c | 18 ++++++++
 2 files changed, 77 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
index 7a74a1579076..88a2c0bdefec 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
@@ -24,10 +24,34 @@ enum {
 	TEST_TC_BPF,
 };
 
+#define TIMEOUT_MS 3000
+
+static int connect_to_server(int srv_fd)
+{
+	int fd = -1;
+
+	fd = socket(AF_INET, SOCK_STREAM, 0);
+	if (!ASSERT_GE(fd, 0, "socket"))
+		goto out;
+
+	if (!ASSERT_EQ(connect_fd_to_fd(fd, srv_fd, TIMEOUT_MS), 0, "connect_fd_to_fd")) {
+		close(fd);
+		fd = -1;
+	}
+out:
+	return fd;
+}
+
 static void test_bpf_nf_ct(int mode)
 {
+	const char *iptables = "iptables -t raw %s PREROUTING -j CT";
+	int srv_fd = -1, client_fd = -1, srv_client_fd = -1;
+	struct sockaddr_in peer_addr = {};
 	struct test_bpf_nf *skel;
 	int prog_fd, err;
+	socklen_t len;
+	u16 srv_port;
+	char cmd[64];
 	LIBBPF_OPTS(bpf_test_run_opts, topts,
 		.data_in = &pkt_v4,
 		.data_size_in = sizeof(pkt_v4),
@@ -38,6 +62,32 @@ static void test_bpf_nf_ct(int mode)
 	if (!ASSERT_OK_PTR(skel, "test_bpf_nf__open_and_load"))
 		return;
 
+	/* Enable connection tracking */
+	snprintf(cmd, sizeof(cmd), iptables, "-A");
+	if (!ASSERT_OK(system(cmd), "iptables"))
+		goto end;
+
+	srv_port = (mode == TEST_XDP) ? 5005 : 5006;
+	srv_fd = start_server(AF_INET, SOCK_STREAM, "127.0.0.1", srv_port, TIMEOUT_MS);
+	if (!ASSERT_GE(srv_fd, 0, "start_server"))
+		goto end;
+
+	client_fd = connect_to_server(srv_fd);
+	if (!ASSERT_GE(client_fd, 0, "connect_to_server"))
+		goto end;
+
+	len = sizeof(peer_addr);
+	srv_client_fd = accept(srv_fd, (struct sockaddr *)&peer_addr, &len);
+	if (!ASSERT_GE(srv_client_fd, 0, "accept"))
+		goto end;
+	if (!ASSERT_EQ(len, sizeof(struct sockaddr_in), "sockaddr len"))
+		goto end;
+
+	skel->bss->saddr = peer_addr.sin_addr.s_addr;
+	skel->bss->sport = peer_addr.sin_port;
+	skel->bss->daddr = peer_addr.sin_addr.s_addr;
+	skel->bss->dport = htons(srv_port);
+
 	if (mode == TEST_XDP)
 		prog_fd = bpf_program__fd(skel->progs.nf_xdp_ct_test);
 	else
@@ -63,7 +113,16 @@ static void test_bpf_nf_ct(int mode)
 	ASSERT_LE(skel->bss->test_delta_timeout, 10, "Test for max ct timeout update");
 	/* expected status is IPS_SEEN_REPLY */
 	ASSERT_EQ(skel->bss->test_status, 2, "Test for ct status update ");
+	ASSERT_EQ(skel->data->test_exist_lookup, 0, "Test existing connection lookup");
 end:
+	if (srv_client_fd != -1)
+		close(srv_client_fd);
+	if (client_fd != -1)
+		close(client_fd);
+	if (srv_fd != -1)
+		close(srv_fd);
+	snprintf(cmd, sizeof(cmd), iptables, "-D");
+	system(cmd);
 	test_bpf_nf__destroy(skel);
 }
 
diff --git a/tools/testing/selftests/bpf/progs/test_bpf_nf.c b/tools/testing/selftests/bpf/progs/test_bpf_nf.c
index 196cd8dfe42a..84e0fd479794 100644
--- a/tools/testing/selftests/bpf/progs/test_bpf_nf.c
+++ b/tools/testing/selftests/bpf/progs/test_bpf_nf.c
@@ -23,6 +23,11 @@ int test_insert_entry = -EAFNOSUPPORT;
 int test_succ_lookup = -ENOENT;
 u32 test_delta_timeout = 0;
 u32 test_status = 0;
+__be32 saddr = 0;
+__be16 sport = 0;
+__be32 daddr = 0;
+__be16 dport = 0;
+int test_exist_lookup = -ENOENT;
 
 struct nf_conn;
 
@@ -160,6 +165,19 @@ nf_ct_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32,
 		}
 		test_alloc_entry = 0;
 	}
+
+	bpf_tuple.ipv4.saddr = saddr;
+	bpf_tuple.ipv4.daddr = daddr;
+	bpf_tuple.ipv4.sport = sport;
+	bpf_tuple.ipv4.dport = dport;
+	ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def,
+		       sizeof(opts_def));
+	if (ct) {
+		test_exist_lookup = 0;
+		bpf_ct_release(ct);
+	} else {
+		test_exist_lookup = opts_def.error;
+	}
 }
 
 SEC("xdp")
-- 
cgit v1.2.3


From 99799de2cba2d399acf65f49a986b3d5cf0732ab Mon Sep 17 00:00:00 2001
From: Daniel Xu <dxu@dxuuu.xyz>
Date: Thu, 11 Aug 2022 15:55:26 -0600
Subject: selftests/bpf: Add connmark read test

Test that the prog can read from the connection mark. This test is nice
because it ensures progs can interact with netfilter subsystem
correctly.

Signed-off-by: Daniel Xu <dxu@dxuuu.xyz>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/bpf/d3bc620a491e4c626c20d80631063922cbe13e2b.1660254747.git.dxu@dxuuu.xyz
---
 tools/testing/selftests/bpf/prog_tests/bpf_nf.c | 3 ++-
 tools/testing/selftests/bpf/progs/test_bpf_nf.c | 3 +++
 2 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
index 88a2c0bdefec..544bf90ac2a7 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
@@ -44,7 +44,7 @@ out:
 
 static void test_bpf_nf_ct(int mode)
 {
-	const char *iptables = "iptables -t raw %s PREROUTING -j CT";
+	const char *iptables = "iptables -t raw %s PREROUTING -j CONNMARK --set-mark 42/0";
 	int srv_fd = -1, client_fd = -1, srv_client_fd = -1;
 	struct sockaddr_in peer_addr = {};
 	struct test_bpf_nf *skel;
@@ -114,6 +114,7 @@ static void test_bpf_nf_ct(int mode)
 	/* expected status is IPS_SEEN_REPLY */
 	ASSERT_EQ(skel->bss->test_status, 2, "Test for ct status update ");
 	ASSERT_EQ(skel->data->test_exist_lookup, 0, "Test existing connection lookup");
+	ASSERT_EQ(skel->bss->test_exist_lookup_mark, 43, "Test existing connection lookup ctmark");
 end:
 	if (srv_client_fd != -1)
 		close(srv_client_fd);
diff --git a/tools/testing/selftests/bpf/progs/test_bpf_nf.c b/tools/testing/selftests/bpf/progs/test_bpf_nf.c
index 84e0fd479794..2722441850cc 100644
--- a/tools/testing/selftests/bpf/progs/test_bpf_nf.c
+++ b/tools/testing/selftests/bpf/progs/test_bpf_nf.c
@@ -28,6 +28,7 @@ __be16 sport = 0;
 __be32 daddr = 0;
 __be16 dport = 0;
 int test_exist_lookup = -ENOENT;
+u32 test_exist_lookup_mark = 0;
 
 struct nf_conn;
 
@@ -174,6 +175,8 @@ nf_ct_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32,
 		       sizeof(opts_def));
 	if (ct) {
 		test_exist_lookup = 0;
+		if (ct->mark == 42)
+			test_exist_lookup_mark = 43;
 		bpf_ct_release(ct);
 	} else {
 		test_exist_lookup = opts_def.error;
-- 
cgit v1.2.3


From 8308bf207ce6963adb42791cfb260dc6552b6665 Mon Sep 17 00:00:00 2001
From: Daniel Xu <dxu@dxuuu.xyz>
Date: Thu, 11 Aug 2022 15:55:27 -0600
Subject: selftests/bpf: Update CI kconfig

The previous selftest changes require two kconfig changes in bpf-ci.

Signed-off-by: Daniel Xu <dxu@dxuuu.xyz>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/bpf/2c27c6ebf7a03954915f83560653752450389564.1660254747.git.dxu@dxuuu.xyz
---
 tools/testing/selftests/bpf/config | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index fabf0c014349..3fc46f9cfb22 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -50,9 +50,11 @@ CONFIG_NET_SCHED=y
 CONFIG_NETDEVSIM=m
 CONFIG_NETFILTER=y
 CONFIG_NETFILTER_SYNPROXY=y
+CONFIG_NETFILTER_XT_CONNMARK=y
 CONFIG_NETFILTER_XT_MATCH_STATE=y
 CONFIG_NETFILTER_XT_TARGET_CT=y
 CONFIG_NF_CONNTRACK=y
+CONFIG_NF_CONNTRACK_MARK=y
 CONFIG_NF_DEFRAG_IPV4=y
 CONFIG_NF_DEFRAG_IPV6=y
 CONFIG_RC_CORE=y
-- 
cgit v1.2.3


From 1f235777c3a4ab115162fe7d45b82be534b9ae2e Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Sat, 13 Aug 2022 08:09:36 +0800
Subject: libbpf: Making bpf_prog_load() ignore name if kernel doesn't support

Similar with commit 10b62d6a38f7 ("libbpf: Add names for auxiliary maps"),
let's make bpf_prog_load() also ignore name if kernel doesn't support
program name.

To achieve this, we need to call sys_bpf_prog_load() directly in
probe_kern_prog_name() to avoid circular dependency. sys_bpf_prog_load()
also need to be exported in the libbpf_internal.h file.

Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Quentin Monnet <quentin@isovalent.com>
Link: https://lore.kernel.org/bpf/20220813000936.6464-1-liuhangbin@gmail.com
---
 tools/lib/bpf/bpf.c             |  6 ++----
 tools/lib/bpf/libbpf.c          | 13 +++++++++++--
 tools/lib/bpf/libbpf_internal.h |  3 +++
 3 files changed, 16 insertions(+), 6 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 6a96e665dc5d..575867d69496 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -84,9 +84,7 @@ static inline int sys_bpf_fd(enum bpf_cmd cmd, union bpf_attr *attr,
 	return ensure_good_fd(fd);
 }
 
-#define PROG_LOAD_ATTEMPTS 5
-
-static inline int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size, int attempts)
+int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size, int attempts)
 {
 	int fd;
 
@@ -263,7 +261,7 @@ int bpf_prog_load(enum bpf_prog_type prog_type,
 	attr.prog_ifindex = OPTS_GET(opts, prog_ifindex, 0);
 	attr.kern_version = OPTS_GET(opts, kern_version, 0);
 
-	if (prog_name)
+	if (prog_name && kernel_supports(NULL, FEAT_PROG_NAME))
 		libbpf_strlcpy(attr.prog_name, prog_name, sizeof(attr.prog_name));
 	attr.license = ptr_to_u64(license);
 
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 3f01f5cd8a4c..aa05a99b913d 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -4415,14 +4415,23 @@ static int probe_fd(int fd)
 
 static int probe_kern_prog_name(void)
 {
+	const size_t attr_sz = offsetofend(union bpf_attr, prog_name);
 	struct bpf_insn insns[] = {
 		BPF_MOV64_IMM(BPF_REG_0, 0),
 		BPF_EXIT_INSN(),
 	};
-	int ret, insn_cnt = ARRAY_SIZE(insns);
+	union bpf_attr attr;
+	int ret;
+
+	memset(&attr, 0, attr_sz);
+	attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
+	attr.license = ptr_to_u64("GPL");
+	attr.insns = ptr_to_u64(insns);
+	attr.insn_cnt = (__u32)ARRAY_SIZE(insns);
+	libbpf_strlcpy(attr.prog_name, "libbpf_nametest", sizeof(attr.prog_name));
 
 	/* make sure loading with name works */
-	ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, "test", "GPL", insns, insn_cnt, NULL);
+	ret = sys_bpf_prog_load(&attr, attr_sz, PROG_LOAD_ATTEMPTS);
 	return probe_fd(ret);
 }
 
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index 4135ae0a2bc3..377642ff51fc 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -573,4 +573,7 @@ static inline bool is_pow_of_2(size_t x)
 	return x && (x & (x - 1)) == 0;
 }
 
+#define PROG_LOAD_ATTEMPTS 5
+int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size, int attempts);
+
 #endif /* __LIBBPF_LIBBPF_INTERNAL_H */
-- 
cgit v1.2.3


From 93d7c52a6eb93e58e4569bd4de95ba3b19e3cf20 Mon Sep 17 00:00:00 2001
From: Gautam Menghani <gautammenghani201@gmail.com>
Date: Sat, 6 Aug 2022 22:05:30 +0530
Subject: selftests/net: Refactor xfrm_fill_key() to use array of structs

A TODO in net/ipsec.c asks to refactor the code in xfrm_fill_key() to
use set/map to avoid manually comparing each algorithm with the "name"
parameter passed to the function as an argument. This patch refactors
the code to create an array of structs where each struct contains the
algorithm name and its corresponding key length.

Signed-off-by: Gautam Menghani <gautammenghani201@gmail.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 tools/testing/selftests/net/ipsec.c | 104 ++++++++++++++++--------------------
 1 file changed, 45 insertions(+), 59 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/ipsec.c b/tools/testing/selftests/net/ipsec.c
index cc10c10c5ed9..9a8229abfa02 100644
--- a/tools/testing/selftests/net/ipsec.c
+++ b/tools/testing/selftests/net/ipsec.c
@@ -58,6 +58,8 @@
 #define VETH_FMT	"ktst-%d"
 #define VETH_LEN	12
 
+#define XFRM_ALGO_NR_KEYS 29
+
 static int nsfd_parent	= -1;
 static int nsfd_childa	= -1;
 static int nsfd_childb	= -1;
@@ -75,6 +77,43 @@ const unsigned int ping_timeout		= 300;
 const unsigned int ping_count		= 100;
 const unsigned int ping_success		= 80;
 
+struct xfrm_key_entry {
+	char algo_name[35];
+	int key_len;
+};
+
+struct xfrm_key_entry xfrm_key_entries[] = {
+	{"digest_null", 0},
+	{"ecb(cipher_null)", 0},
+	{"cbc(des)", 64},
+	{"hmac(md5)", 128},
+	{"cmac(aes)", 128},
+	{"xcbc(aes)", 128},
+	{"cbc(cast5)", 128},
+	{"cbc(serpent)", 128},
+	{"hmac(sha1)", 160},
+	{"hmac(rmd160)", 160},
+	{"cbc(des3_ede)", 192},
+	{"hmac(sha256)", 256},
+	{"cbc(aes)", 256},
+	{"cbc(camellia)", 256},
+	{"cbc(twofish)", 256},
+	{"rfc3686(ctr(aes))", 288},
+	{"hmac(sha384)", 384},
+	{"cbc(blowfish)", 448},
+	{"hmac(sha512)", 512},
+	{"rfc4106(gcm(aes))-128", 160},
+	{"rfc4543(gcm(aes))-128", 160},
+	{"rfc4309(ccm(aes))-128", 152},
+	{"rfc4106(gcm(aes))-192", 224},
+	{"rfc4543(gcm(aes))-192", 224},
+	{"rfc4309(ccm(aes))-192", 216},
+	{"rfc4106(gcm(aes))-256", 288},
+	{"rfc4543(gcm(aes))-256", 288},
+	{"rfc4309(ccm(aes))-256", 280},
+	{"rfc7539(chacha20,poly1305)-128", 0}
+};
+
 static void randomize_buffer(void *buf, size_t buflen)
 {
 	int *p = (int *)buf;
@@ -767,65 +806,12 @@ static int do_ping(int cmd_fd, char *buf, size_t buf_len, struct in_addr from,
 static int xfrm_fill_key(char *name, char *buf,
 		size_t buf_len, unsigned int *key_len)
 {
-	/* TODO: use set/map instead */
-	if (strncmp(name, "digest_null", ALGO_LEN) == 0)
-		*key_len = 0;
-	else if (strncmp(name, "ecb(cipher_null)", ALGO_LEN) == 0)
-		*key_len = 0;
-	else if (strncmp(name, "cbc(des)", ALGO_LEN) == 0)
-		*key_len = 64;
-	else if (strncmp(name, "hmac(md5)", ALGO_LEN) == 0)
-		*key_len = 128;
-	else if (strncmp(name, "cmac(aes)", ALGO_LEN) == 0)
-		*key_len = 128;
-	else if (strncmp(name, "xcbc(aes)", ALGO_LEN) == 0)
-		*key_len = 128;
-	else if (strncmp(name, "cbc(cast5)", ALGO_LEN) == 0)
-		*key_len = 128;
-	else if (strncmp(name, "cbc(serpent)", ALGO_LEN) == 0)
-		*key_len = 128;
-	else if (strncmp(name, "hmac(sha1)", ALGO_LEN) == 0)
-		*key_len = 160;
-	else if (strncmp(name, "hmac(rmd160)", ALGO_LEN) == 0)
-		*key_len = 160;
-	else if (strncmp(name, "cbc(des3_ede)", ALGO_LEN) == 0)
-		*key_len = 192;
-	else if (strncmp(name, "hmac(sha256)", ALGO_LEN) == 0)
-		*key_len = 256;
-	else if (strncmp(name, "cbc(aes)", ALGO_LEN) == 0)
-		*key_len = 256;
-	else if (strncmp(name, "cbc(camellia)", ALGO_LEN) == 0)
-		*key_len = 256;
-	else if (strncmp(name, "cbc(twofish)", ALGO_LEN) == 0)
-		*key_len = 256;
-	else if (strncmp(name, "rfc3686(ctr(aes))", ALGO_LEN) == 0)
-		*key_len = 288;
-	else if (strncmp(name, "hmac(sha384)", ALGO_LEN) == 0)
-		*key_len = 384;
-	else if (strncmp(name, "cbc(blowfish)", ALGO_LEN) == 0)
-		*key_len = 448;
-	else if (strncmp(name, "hmac(sha512)", ALGO_LEN) == 0)
-		*key_len = 512;
-	else if (strncmp(name, "rfc4106(gcm(aes))-128", ALGO_LEN) == 0)
-		*key_len = 160;
-	else if (strncmp(name, "rfc4543(gcm(aes))-128", ALGO_LEN) == 0)
-		*key_len = 160;
-	else if (strncmp(name, "rfc4309(ccm(aes))-128", ALGO_LEN) == 0)
-		*key_len = 152;
-	else if (strncmp(name, "rfc4106(gcm(aes))-192", ALGO_LEN) == 0)
-		*key_len = 224;
-	else if (strncmp(name, "rfc4543(gcm(aes))-192", ALGO_LEN) == 0)
-		*key_len = 224;
-	else if (strncmp(name, "rfc4309(ccm(aes))-192", ALGO_LEN) == 0)
-		*key_len = 216;
-	else if (strncmp(name, "rfc4106(gcm(aes))-256", ALGO_LEN) == 0)
-		*key_len = 288;
-	else if (strncmp(name, "rfc4543(gcm(aes))-256", ALGO_LEN) == 0)
-		*key_len = 288;
-	else if (strncmp(name, "rfc4309(ccm(aes))-256", ALGO_LEN) == 0)
-		*key_len = 280;
-	else if (strncmp(name, "rfc7539(chacha20,poly1305)-128", ALGO_LEN) == 0)
-		*key_len = 0;
+	int i;
+
+	for (i = 0; i < XFRM_ALGO_NR_KEYS; i++) {
+		if (strncmp(name, xfrm_key_entries[i].algo_name, ALGO_LEN) == 0)
+			*key_len = xfrm_key_entries[i].key_len;
+	}
 
 	if (*key_len > buf_len) {
 		printk("Can't pack a key - too big for buffer");
-- 
cgit v1.2.3


From 807662cac66af0dfca60ce1cf784063da6ec2f65 Mon Sep 17 00:00:00 2001
From: Artem Savkov <asavkov@redhat.com>
Date: Tue, 16 Aug 2022 07:52:31 +0200
Subject: selftests/bpf: Fix attach point for non-x86 arches in test_progs/lsm

Use SYS_PREFIX macro from bpf_misc.h instead of hard-coded '__x64_'
prefix for sys_setdomainname attach point in lsm test.

Signed-off-by: Artem Savkov <asavkov@redhat.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220816055231.717006-1-asavkov@redhat.com
---
 tools/testing/selftests/bpf/DENYLIST.s390x | 2 +-
 tools/testing/selftests/bpf/progs/lsm.c    | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x
index e33cab34d22f..9d8de15e725e 100644
--- a/tools/testing/selftests/bpf/DENYLIST.s390x
+++ b/tools/testing/selftests/bpf/DENYLIST.s390x
@@ -43,7 +43,7 @@ test_bpffs                               # bpffs test  failed 255
 test_bprm_opts                           # failed to auto-attach program 'secure_exec': -524                           (trampoline)
 test_ima                                 # failed to auto-attach program 'ima': -524                                   (trampoline)
 test_local_storage                       # failed to auto-attach program 'unlink_hook': -524                           (trampoline)
-test_lsm                                 # failed to find kernel BTF type ID of '__x64_sys_setdomainname': -3          (?)
+test_lsm                                 # attach unexpected error: -524                                               (trampoline)
 test_overhead                            # attach_fentry unexpected error: -524                                        (trampoline)
 test_profiler                            # unknown func bpf_probe_read_str#45                                          (overlapping)
 timer                                    # failed to auto-attach program 'test1': -524                                 (trampoline)
diff --git a/tools/testing/selftests/bpf/progs/lsm.c b/tools/testing/selftests/bpf/progs/lsm.c
index 33694ef8acfa..d8d8af623bc2 100644
--- a/tools/testing/selftests/bpf/progs/lsm.c
+++ b/tools/testing/selftests/bpf/progs/lsm.c
@@ -4,6 +4,7 @@
  * Copyright 2020 Google LLC.
  */
 
+#include "bpf_misc.h"
 #include "vmlinux.h"
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
@@ -160,7 +161,7 @@ int BPF_PROG(test_task_free, struct task_struct *task)
 
 int copy_test = 0;
 
-SEC("fentry.s/__x64_sys_setdomainname")
+SEC("fentry.s/" SYS_PREFIX "sys_setdomainname")
 int BPF_PROG(test_sys_setdomainname, struct pt_regs *regs)
 {
 	void *ptr = (void *)PT_REGS_PARM1(regs);
-- 
cgit v1.2.3


From 43cb8cbadffa21e88a65dd1129c86f5552d6c42e Mon Sep 17 00:00:00 2001
From: Hao Luo <haoluo@google.com>
Date: Tue, 16 Aug 2022 16:40:11 -0700
Subject: libbpf: Allows disabling auto attach

Adds libbpf APIs for disabling auto-attach for individual functions.
This is motivated by the use case of cgroup iter [1]. Some iter
types require their parameters to be non-zero, therefore applying
auto-attach on them will fail. With these two new APIs, users who
want to use auto-attach and these types of iters can disable
auto-attach on the program and perform manual attach.

[1] https://lore.kernel.org/bpf/CAEf4BzZ+a2uDo_t6kGBziqdz--m2gh2_EUwkGLDtMd65uwxUjA@mail.gmail.com/

Signed-off-by: Hao Luo <haoluo@google.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220816234012.910255-1-haoluo@google.com
---
 tools/lib/bpf/libbpf.c   | 15 ++++++++++++++-
 tools/lib/bpf/libbpf.h   |  2 ++
 tools/lib/bpf/libbpf.map |  2 ++
 3 files changed, 18 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index aa05a99b913d..0159a43c7efd 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -417,6 +417,7 @@ struct bpf_program {
 
 	int fd;
 	bool autoload;
+	bool autoattach;
 	bool mark_btf_static;
 	enum bpf_prog_type type;
 	enum bpf_attach_type expected_attach_type;
@@ -755,6 +756,8 @@ bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog,
 		prog->autoload = true;
 	}
 
+	prog->autoattach = true;
+
 	/* inherit object's log_level */
 	prog->log_level = obj->log_level;
 
@@ -8314,6 +8317,16 @@ int bpf_program__set_autoload(struct bpf_program *prog, bool autoload)
 	return 0;
 }
 
+bool bpf_program__autoattach(const struct bpf_program *prog)
+{
+	return prog->autoattach;
+}
+
+void bpf_program__set_autoattach(struct bpf_program *prog, bool autoattach)
+{
+	prog->autoattach = autoattach;
+}
+
 const struct bpf_insn *bpf_program__insns(const struct bpf_program *prog)
 {
 	return prog->insns;
@@ -12346,7 +12359,7 @@ int bpf_object__attach_skeleton(struct bpf_object_skeleton *s)
 		struct bpf_program *prog = *s->progs[i].prog;
 		struct bpf_link **link = s->progs[i].link;
 
-		if (!prog->autoload)
+		if (!prog->autoload || !prog->autoattach)
 			continue;
 
 		/* auto-attaching not supported for this program */
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 61493c4cddac..88a1ac34b12a 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -260,6 +260,8 @@ LIBBPF_API const char *bpf_program__name(const struct bpf_program *prog);
 LIBBPF_API const char *bpf_program__section_name(const struct bpf_program *prog);
 LIBBPF_API bool bpf_program__autoload(const struct bpf_program *prog);
 LIBBPF_API int bpf_program__set_autoload(struct bpf_program *prog, bool autoload);
+LIBBPF_API bool bpf_program__autoattach(const struct bpf_program *prog);
+LIBBPF_API void bpf_program__set_autoattach(struct bpf_program *prog, bool autoattach);
 
 struct bpf_insn;
 
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 119e6e1ea7f1..2b928dc21af0 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -358,6 +358,8 @@ LIBBPF_1.0.0 {
 		bpf_obj_get_opts;
 		bpf_prog_query_opts;
 		bpf_program__attach_ksyscall;
+		bpf_program__autoattach;
+		bpf_program__set_autoattach;
 		btf__add_enum64;
 		btf__add_enum64_value;
 		libbpf_bpf_attach_type_str;
-- 
cgit v1.2.3


From 738a2f2f9130f98f92ccb3efd94d4879c0a0990c Mon Sep 17 00:00:00 2001
From: Hao Luo <haoluo@google.com>
Date: Tue, 16 Aug 2022 16:40:12 -0700
Subject: selftests/bpf: Tests libbpf autoattach APIs

Adds test for libbpf APIs that toggle bpf program auto-attaching.

Signed-off-by: Hao Luo <haoluo@google.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220816234012.910255-2-haoluo@google.com
---
 .../testing/selftests/bpf/prog_tests/autoattach.c  | 30 ++++++++++++++++++++++
 .../testing/selftests/bpf/progs/test_autoattach.c  | 23 +++++++++++++++++
 2 files changed, 53 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/autoattach.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_autoattach.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/autoattach.c b/tools/testing/selftests/bpf/prog_tests/autoattach.c
new file mode 100644
index 000000000000..dc5e01d279bd
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/autoattach.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Google */
+
+#include <test_progs.h>
+#include "test_autoattach.skel.h"
+
+void test_autoattach(void)
+{
+	struct test_autoattach *skel;
+
+	skel = test_autoattach__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+		goto cleanup;
+
+	/* disable auto-attach for prog2 */
+	bpf_program__set_autoattach(skel->progs.prog2, false);
+	ASSERT_TRUE(bpf_program__autoattach(skel->progs.prog1), "autoattach_prog1");
+	ASSERT_FALSE(bpf_program__autoattach(skel->progs.prog2), "autoattach_prog2");
+	if (!ASSERT_OK(test_autoattach__attach(skel), "skel_attach"))
+		goto cleanup;
+
+	usleep(1);
+
+	ASSERT_TRUE(skel->bss->prog1_called, "attached_prog1");
+	ASSERT_FALSE(skel->bss->prog2_called, "attached_prog2");
+
+cleanup:
+	test_autoattach__destroy(skel);
+}
+
diff --git a/tools/testing/selftests/bpf/progs/test_autoattach.c b/tools/testing/selftests/bpf/progs/test_autoattach.c
new file mode 100644
index 000000000000..11a44493ebce
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_autoattach.c
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Google */
+
+#include "vmlinux.h"
+#include <bpf/bpf_tracing.h>
+
+bool prog1_called = false;
+bool prog2_called = false;
+
+SEC("raw_tp/sys_enter")
+int prog1(const void *ctx)
+{
+	prog1_called = true;
+	return 0;
+}
+
+SEC("raw_tp/sys_exit")
+int prog2(const void *ctx)
+{
+	prog2_called = true;
+	return 0;
+}
+
-- 
cgit v1.2.3


From d4e6d684f3bea46a2fc195765c77a3b26bcb080e Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Mon, 15 Aug 2022 17:19:26 -0700
Subject: libbpf: Fix potential NULL dereference when parsing ELF

Fix if condition filtering empty ELF sections to prevent NULL
dereference.

Fixes: 47ea7417b074 ("libbpf: Skip empty sections in bpf_object__init_global_data_maps")
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Hao Luo <haoluo@google.com>
Link: https://lore.kernel.org/bpf/20220816001929.369487-2-andrii@kernel.org
---
 tools/lib/bpf/libbpf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 0159a43c7efd..146d35526b87 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -1649,7 +1649,7 @@ static int bpf_object__init_global_data_maps(struct bpf_object *obj)
 		sec_desc = &obj->efile.secs[sec_idx];
 
 		/* Skip recognized sections with size 0. */
-		if (sec_desc->data && sec_desc->data->d_size == 0)
+		if (!sec_desc->data || sec_desc->data->d_size == 0)
 			continue;
 
 		switch (sec_desc->sec_type) {
-- 
cgit v1.2.3


From 813847a31447feba6119df4ee77a7c0c7a77fc72 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Mon, 15 Aug 2022 17:19:27 -0700
Subject: libbpf: Streamline bpf_attr and perf_event_attr initialization

Make sure that entire libbpf code base is initializing bpf_attr and
perf_event_attr with memset(0). Also for bpf_attr make sure we
clear and pass to kernel only relevant parts of bpf_attr. bpf_attr is
a huge union of independent sub-command attributes, so there is no need
to clear and pass entire union bpf_attr, which over time grows quite
a lot and for most commands this growth is completely irrelevant.

Few cases where we were relying on compiler initialization of BPF UAPI
structs (like bpf_prog_info, bpf_map_info, etc) with `= {};` were
switched to memset(0) pattern for future-proofing.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Hao Luo <haoluo@google.com>
Link: https://lore.kernel.org/bpf/20220816001929.369487-3-andrii@kernel.org
---
 tools/lib/bpf/bpf.c           | 173 +++++++++++++++++++++++++-----------------
 tools/lib/bpf/libbpf.c        |  43 +++++++----
 tools/lib/bpf/netlink.c       |   3 +-
 tools/lib/bpf/skel_internal.h |  10 ++-
 4 files changed, 138 insertions(+), 91 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 575867d69496..e3a0bd7efa2f 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -105,7 +105,7 @@ int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size, int attempts)
  */
 int probe_memcg_account(void)
 {
-	const size_t prog_load_attr_sz = offsetofend(union bpf_attr, attach_btf_obj_fd);
+	const size_t attr_sz = offsetofend(union bpf_attr, attach_btf_obj_fd);
 	struct bpf_insn insns[] = {
 		BPF_EMIT_CALL(BPF_FUNC_ktime_get_coarse_ns),
 		BPF_EXIT_INSN(),
@@ -115,13 +115,13 @@ int probe_memcg_account(void)
 	int prog_fd;
 
 	/* attempt loading freplace trying to use custom BTF */
-	memset(&attr, 0, prog_load_attr_sz);
+	memset(&attr, 0, attr_sz);
 	attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
 	attr.insns = ptr_to_u64(insns);
 	attr.insn_cnt = insn_cnt;
 	attr.license = ptr_to_u64("GPL");
 
-	prog_fd = sys_bpf_fd(BPF_PROG_LOAD, &attr, prog_load_attr_sz);
+	prog_fd = sys_bpf_fd(BPF_PROG_LOAD, &attr, attr_sz);
 	if (prog_fd >= 0) {
 		close(prog_fd);
 		return 1;
@@ -232,6 +232,7 @@ int bpf_prog_load(enum bpf_prog_type prog_type,
 		  const struct bpf_insn *insns, size_t insn_cnt,
 		  const struct bpf_prog_load_opts *opts)
 {
+	const size_t attr_sz = offsetofend(union bpf_attr, fd_array);
 	void *finfo = NULL, *linfo = NULL;
 	const char *func_info, *line_info;
 	__u32 log_size, log_level, attach_prog_fd, attach_btf_obj_fd;
@@ -251,7 +252,7 @@ int bpf_prog_load(enum bpf_prog_type prog_type,
 	if (attempts == 0)
 		attempts = PROG_LOAD_ATTEMPTS;
 
-	memset(&attr, 0, sizeof(attr));
+	memset(&attr, 0, attr_sz);
 
 	attr.prog_type = prog_type;
 	attr.expected_attach_type = OPTS_GET(opts, expected_attach_type, 0);
@@ -314,7 +315,7 @@ int bpf_prog_load(enum bpf_prog_type prog_type,
 		attr.log_level = log_level;
 	}
 
-	fd = sys_bpf_prog_load(&attr, sizeof(attr), attempts);
+	fd = sys_bpf_prog_load(&attr, attr_sz, attempts);
 	if (fd >= 0)
 		return fd;
 
@@ -354,7 +355,7 @@ int bpf_prog_load(enum bpf_prog_type prog_type,
 			break;
 		}
 
-		fd = sys_bpf_prog_load(&attr, sizeof(attr), attempts);
+		fd = sys_bpf_prog_load(&attr, attr_sz, attempts);
 		if (fd >= 0)
 			goto done;
 	}
@@ -368,7 +369,7 @@ int bpf_prog_load(enum bpf_prog_type prog_type,
 		attr.log_size = log_size;
 		attr.log_level = 1;
 
-		fd = sys_bpf_prog_load(&attr, sizeof(attr), attempts);
+		fd = sys_bpf_prog_load(&attr, attr_sz, attempts);
 	}
 done:
 	/* free() doesn't affect errno, so we don't need to restore it */
@@ -380,127 +381,136 @@ done:
 int bpf_map_update_elem(int fd, const void *key, const void *value,
 			__u64 flags)
 {
+	const size_t attr_sz = offsetofend(union bpf_attr, flags);
 	union bpf_attr attr;
 	int ret;
 
-	memset(&attr, 0, sizeof(attr));
+	memset(&attr, 0, attr_sz);
 	attr.map_fd = fd;
 	attr.key = ptr_to_u64(key);
 	attr.value = ptr_to_u64(value);
 	attr.flags = flags;
 
-	ret = sys_bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
+	ret = sys_bpf(BPF_MAP_UPDATE_ELEM, &attr, attr_sz);
 	return libbpf_err_errno(ret);
 }
 
 int bpf_map_lookup_elem(int fd, const void *key, void *value)
 {
+	const size_t attr_sz = offsetofend(union bpf_attr, flags);
 	union bpf_attr attr;
 	int ret;
 
-	memset(&attr, 0, sizeof(attr));
+	memset(&attr, 0, attr_sz);
 	attr.map_fd = fd;
 	attr.key = ptr_to_u64(key);
 	attr.value = ptr_to_u64(value);
 
-	ret = sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
+	ret = sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, attr_sz);
 	return libbpf_err_errno(ret);
 }
 
 int bpf_map_lookup_elem_flags(int fd, const void *key, void *value, __u64 flags)
 {
+	const size_t attr_sz = offsetofend(union bpf_attr, flags);
 	union bpf_attr attr;
 	int ret;
 
-	memset(&attr, 0, sizeof(attr));
+	memset(&attr, 0, attr_sz);
 	attr.map_fd = fd;
 	attr.key = ptr_to_u64(key);
 	attr.value = ptr_to_u64(value);
 	attr.flags = flags;
 
-	ret = sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr));
+	ret = sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, attr_sz);
 	return libbpf_err_errno(ret);
 }
 
 int bpf_map_lookup_and_delete_elem(int fd, const void *key, void *value)
 {
+	const size_t attr_sz = offsetofend(union bpf_attr, flags);
 	union bpf_attr attr;
 	int ret;
 
-	memset(&attr, 0, sizeof(attr));
+	memset(&attr, 0, attr_sz);
 	attr.map_fd = fd;
 	attr.key = ptr_to_u64(key);
 	attr.value = ptr_to_u64(value);
 
-	ret = sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, &attr, sizeof(attr));
+	ret = sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, &attr, attr_sz);
 	return libbpf_err_errno(ret);
 }
 
 int bpf_map_lookup_and_delete_elem_flags(int fd, const void *key, void *value, __u64 flags)
 {
+	const size_t attr_sz = offsetofend(union bpf_attr, flags);
 	union bpf_attr attr;
 	int ret;
 
-	memset(&attr, 0, sizeof(attr));
+	memset(&attr, 0, attr_sz);
 	attr.map_fd = fd;
 	attr.key = ptr_to_u64(key);
 	attr.value = ptr_to_u64(value);
 	attr.flags = flags;
 
-	ret = sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, &attr, sizeof(attr));
+	ret = sys_bpf(BPF_MAP_LOOKUP_AND_DELETE_ELEM, &attr, attr_sz);
 	return libbpf_err_errno(ret);
 }
 
 int bpf_map_delete_elem(int fd, const void *key)
 {
+	const size_t attr_sz = offsetofend(union bpf_attr, flags);
 	union bpf_attr attr;
 	int ret;
 
-	memset(&attr, 0, sizeof(attr));
+	memset(&attr, 0, attr_sz);
 	attr.map_fd = fd;
 	attr.key = ptr_to_u64(key);
 
-	ret = sys_bpf(BPF_MAP_DELETE_ELEM, &attr, sizeof(attr));
+	ret = sys_bpf(BPF_MAP_DELETE_ELEM, &attr, attr_sz);
 	return libbpf_err_errno(ret);
 }
 
 int bpf_map_delete_elem_flags(int fd, const void *key, __u64 flags)
 {
+	const size_t attr_sz = offsetofend(union bpf_attr, flags);
 	union bpf_attr attr;
 	int ret;
 
-	memset(&attr, 0, sizeof(attr));
+	memset(&attr, 0, attr_sz);
 	attr.map_fd = fd;
 	attr.key = ptr_to_u64(key);
 	attr.flags = flags;
 
-	ret = sys_bpf(BPF_MAP_DELETE_ELEM, &attr, sizeof(attr));
+	ret = sys_bpf(BPF_MAP_DELETE_ELEM, &attr, attr_sz);
 	return libbpf_err_errno(ret);
 }
 
 int bpf_map_get_next_key(int fd, const void *key, void *next_key)
 {
+	const size_t attr_sz = offsetofend(union bpf_attr, next_key);
 	union bpf_attr attr;
 	int ret;
 
-	memset(&attr, 0, sizeof(attr));
+	memset(&attr, 0, attr_sz);
 	attr.map_fd = fd;
 	attr.key = ptr_to_u64(key);
 	attr.next_key = ptr_to_u64(next_key);
 
-	ret = sys_bpf(BPF_MAP_GET_NEXT_KEY, &attr, sizeof(attr));
+	ret = sys_bpf(BPF_MAP_GET_NEXT_KEY, &attr, attr_sz);
 	return libbpf_err_errno(ret);
 }
 
 int bpf_map_freeze(int fd)
 {
+	const size_t attr_sz = offsetofend(union bpf_attr, map_fd);
 	union bpf_attr attr;
 	int ret;
 
-	memset(&attr, 0, sizeof(attr));
+	memset(&attr, 0, attr_sz);
 	attr.map_fd = fd;
 
-	ret = sys_bpf(BPF_MAP_FREEZE, &attr, sizeof(attr));
+	ret = sys_bpf(BPF_MAP_FREEZE, &attr, attr_sz);
 	return libbpf_err_errno(ret);
 }
 
@@ -509,13 +519,14 @@ static int bpf_map_batch_common(int cmd, int fd, void  *in_batch,
 				__u32 *count,
 				const struct bpf_map_batch_opts *opts)
 {
+	const size_t attr_sz = offsetofend(union bpf_attr, batch);
 	union bpf_attr attr;
 	int ret;
 
 	if (!OPTS_VALID(opts, bpf_map_batch_opts))
 		return libbpf_err(-EINVAL);
 
-	memset(&attr, 0, sizeof(attr));
+	memset(&attr, 0, attr_sz);
 	attr.batch.map_fd = fd;
 	attr.batch.in_batch = ptr_to_u64(in_batch);
 	attr.batch.out_batch = ptr_to_u64(out_batch);
@@ -525,7 +536,7 @@ static int bpf_map_batch_common(int cmd, int fd, void  *in_batch,
 	attr.batch.elem_flags  = OPTS_GET(opts, elem_flags, 0);
 	attr.batch.flags = OPTS_GET(opts, flags, 0);
 
-	ret = sys_bpf(cmd, &attr, sizeof(attr));
+	ret = sys_bpf(cmd, &attr, attr_sz);
 	*count = attr.batch.count;
 
 	return libbpf_err_errno(ret);
@@ -564,14 +575,15 @@ int bpf_map_update_batch(int fd, const void *keys, const void *values, __u32 *co
 
 int bpf_obj_pin(int fd, const char *pathname)
 {
+	const size_t attr_sz = offsetofend(union bpf_attr, file_flags);
 	union bpf_attr attr;
 	int ret;
 
-	memset(&attr, 0, sizeof(attr));
+	memset(&attr, 0, attr_sz);
 	attr.pathname = ptr_to_u64((void *)pathname);
 	attr.bpf_fd = fd;
 
-	ret = sys_bpf(BPF_OBJ_PIN, &attr, sizeof(attr));
+	ret = sys_bpf(BPF_OBJ_PIN, &attr, attr_sz);
 	return libbpf_err_errno(ret);
 }
 
@@ -582,17 +594,18 @@ int bpf_obj_get(const char *pathname)
 
 int bpf_obj_get_opts(const char *pathname, const struct bpf_obj_get_opts *opts)
 {
+	const size_t attr_sz = offsetofend(union bpf_attr, file_flags);
 	union bpf_attr attr;
 	int fd;
 
 	if (!OPTS_VALID(opts, bpf_obj_get_opts))
 		return libbpf_err(-EINVAL);
 
-	memset(&attr, 0, sizeof(attr));
+	memset(&attr, 0, attr_sz);
 	attr.pathname = ptr_to_u64((void *)pathname);
 	attr.file_flags = OPTS_GET(opts, file_flags, 0);
 
-	fd = sys_bpf_fd(BPF_OBJ_GET, &attr, sizeof(attr));
+	fd = sys_bpf_fd(BPF_OBJ_GET, &attr, attr_sz);
 	return libbpf_err_errno(fd);
 }
 
@@ -610,20 +623,21 @@ int bpf_prog_attach_opts(int prog_fd, int target_fd,
 			  enum bpf_attach_type type,
 			  const struct bpf_prog_attach_opts *opts)
 {
+	const size_t attr_sz = offsetofend(union bpf_attr, replace_bpf_fd);
 	union bpf_attr attr;
 	int ret;
 
 	if (!OPTS_VALID(opts, bpf_prog_attach_opts))
 		return libbpf_err(-EINVAL);
 
-	memset(&attr, 0, sizeof(attr));
+	memset(&attr, 0, attr_sz);
 	attr.target_fd	   = target_fd;
 	attr.attach_bpf_fd = prog_fd;
 	attr.attach_type   = type;
 	attr.attach_flags  = OPTS_GET(opts, flags, 0);
 	attr.replace_bpf_fd = OPTS_GET(opts, replace_prog_fd, 0);
 
-	ret = sys_bpf(BPF_PROG_ATTACH, &attr, sizeof(attr));
+	ret = sys_bpf(BPF_PROG_ATTACH, &attr, attr_sz);
 	return libbpf_err_errno(ret);
 }
 
@@ -634,28 +648,30 @@ int bpf_prog_attach_xattr(int prog_fd, int target_fd,
 
 int bpf_prog_detach(int target_fd, enum bpf_attach_type type)
 {
+	const size_t attr_sz = offsetofend(union bpf_attr, replace_bpf_fd);
 	union bpf_attr attr;
 	int ret;
 
-	memset(&attr, 0, sizeof(attr));
+	memset(&attr, 0, attr_sz);
 	attr.target_fd	 = target_fd;
 	attr.attach_type = type;
 
-	ret = sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr));
+	ret = sys_bpf(BPF_PROG_DETACH, &attr, attr_sz);
 	return libbpf_err_errno(ret);
 }
 
 int bpf_prog_detach2(int prog_fd, int target_fd, enum bpf_attach_type type)
 {
+	const size_t attr_sz = offsetofend(union bpf_attr, replace_bpf_fd);
 	union bpf_attr attr;
 	int ret;
 
-	memset(&attr, 0, sizeof(attr));
+	memset(&attr, 0, attr_sz);
 	attr.target_fd	 = target_fd;
 	attr.attach_bpf_fd = prog_fd;
 	attr.attach_type = type;
 
-	ret = sys_bpf(BPF_PROG_DETACH, &attr, sizeof(attr));
+	ret = sys_bpf(BPF_PROG_DETACH, &attr, attr_sz);
 	return libbpf_err_errno(ret);
 }
 
@@ -663,6 +679,7 @@ int bpf_link_create(int prog_fd, int target_fd,
 		    enum bpf_attach_type attach_type,
 		    const struct bpf_link_create_opts *opts)
 {
+	const size_t attr_sz = offsetofend(union bpf_attr, link_create);
 	__u32 target_btf_id, iter_info_len;
 	union bpf_attr attr;
 	int fd, err;
@@ -681,7 +698,7 @@ int bpf_link_create(int prog_fd, int target_fd,
 			return libbpf_err(-EINVAL);
 	}
 
-	memset(&attr, 0, sizeof(attr));
+	memset(&attr, 0, attr_sz);
 	attr.link_create.prog_fd = prog_fd;
 	attr.link_create.target_fd = target_fd;
 	attr.link_create.attach_type = attach_type;
@@ -725,7 +742,7 @@ int bpf_link_create(int prog_fd, int target_fd,
 		break;
 	}
 proceed:
-	fd = sys_bpf_fd(BPF_LINK_CREATE, &attr, sizeof(attr));
+	fd = sys_bpf_fd(BPF_LINK_CREATE, &attr, attr_sz);
 	if (fd >= 0)
 		return fd;
 	/* we'll get EINVAL if LINK_CREATE doesn't support attaching fentry
@@ -761,44 +778,47 @@ proceed:
 
 int bpf_link_detach(int link_fd)
 {
+	const size_t attr_sz = offsetofend(union bpf_attr, link_detach);
 	union bpf_attr attr;
 	int ret;
 
-	memset(&attr, 0, sizeof(attr));
+	memset(&attr, 0, attr_sz);
 	attr.link_detach.link_fd = link_fd;
 
-	ret = sys_bpf(BPF_LINK_DETACH, &attr, sizeof(attr));
+	ret = sys_bpf(BPF_LINK_DETACH, &attr, attr_sz);
 	return libbpf_err_errno(ret);
 }
 
 int bpf_link_update(int link_fd, int new_prog_fd,
 		    const struct bpf_link_update_opts *opts)
 {
+	const size_t attr_sz = offsetofend(union bpf_attr, link_update);
 	union bpf_attr attr;
 	int ret;
 
 	if (!OPTS_VALID(opts, bpf_link_update_opts))
 		return libbpf_err(-EINVAL);
 
-	memset(&attr, 0, sizeof(attr));
+	memset(&attr, 0, attr_sz);
 	attr.link_update.link_fd = link_fd;
 	attr.link_update.new_prog_fd = new_prog_fd;
 	attr.link_update.flags = OPTS_GET(opts, flags, 0);
 	attr.link_update.old_prog_fd = OPTS_GET(opts, old_prog_fd, 0);
 
-	ret = sys_bpf(BPF_LINK_UPDATE, &attr, sizeof(attr));
+	ret = sys_bpf(BPF_LINK_UPDATE, &attr, attr_sz);
 	return libbpf_err_errno(ret);
 }
 
 int bpf_iter_create(int link_fd)
 {
+	const size_t attr_sz = offsetofend(union bpf_attr, iter_create);
 	union bpf_attr attr;
 	int fd;
 
-	memset(&attr, 0, sizeof(attr));
+	memset(&attr, 0, attr_sz);
 	attr.iter_create.link_fd = link_fd;
 
-	fd = sys_bpf_fd(BPF_ITER_CREATE, &attr, sizeof(attr));
+	fd = sys_bpf_fd(BPF_ITER_CREATE, &attr, attr_sz);
 	return libbpf_err_errno(fd);
 }
 
@@ -806,13 +826,14 @@ int bpf_prog_query_opts(int target_fd,
 			enum bpf_attach_type type,
 			struct bpf_prog_query_opts *opts)
 {
+	const size_t attr_sz = offsetofend(union bpf_attr, query);
 	union bpf_attr attr;
 	int ret;
 
 	if (!OPTS_VALID(opts, bpf_prog_query_opts))
 		return libbpf_err(-EINVAL);
 
-	memset(&attr, 0, sizeof(attr));
+	memset(&attr, 0, attr_sz);
 
 	attr.query.target_fd	= target_fd;
 	attr.query.attach_type	= type;
@@ -821,7 +842,7 @@ int bpf_prog_query_opts(int target_fd,
 	attr.query.prog_ids	= ptr_to_u64(OPTS_GET(opts, prog_ids, NULL));
 	attr.query.prog_attach_flags = ptr_to_u64(OPTS_GET(opts, prog_attach_flags, NULL));
 
-	ret = sys_bpf(BPF_PROG_QUERY, &attr, sizeof(attr));
+	ret = sys_bpf(BPF_PROG_QUERY, &attr, attr_sz);
 
 	OPTS_SET(opts, attach_flags, attr.query.attach_flags);
 	OPTS_SET(opts, prog_cnt, attr.query.prog_cnt);
@@ -850,13 +871,14 @@ int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags,
 
 int bpf_prog_test_run_opts(int prog_fd, struct bpf_test_run_opts *opts)
 {
+	const size_t attr_sz = offsetofend(union bpf_attr, test);
 	union bpf_attr attr;
 	int ret;
 
 	if (!OPTS_VALID(opts, bpf_test_run_opts))
 		return libbpf_err(-EINVAL);
 
-	memset(&attr, 0, sizeof(attr));
+	memset(&attr, 0, attr_sz);
 	attr.test.prog_fd = prog_fd;
 	attr.test.batch_size = OPTS_GET(opts, batch_size, 0);
 	attr.test.cpu = OPTS_GET(opts, cpu, 0);
@@ -872,7 +894,7 @@ int bpf_prog_test_run_opts(int prog_fd, struct bpf_test_run_opts *opts)
 	attr.test.data_in = ptr_to_u64(OPTS_GET(opts, data_in, NULL));
 	attr.test.data_out = ptr_to_u64(OPTS_GET(opts, data_out, NULL));
 
-	ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, sizeof(attr));
+	ret = sys_bpf(BPF_PROG_TEST_RUN, &attr, attr_sz);
 
 	OPTS_SET(opts, data_size_out, attr.test.data_size_out);
 	OPTS_SET(opts, ctx_size_out, attr.test.ctx_size_out);
@@ -884,13 +906,14 @@ int bpf_prog_test_run_opts(int prog_fd, struct bpf_test_run_opts *opts)
 
 static int bpf_obj_get_next_id(__u32 start_id, __u32 *next_id, int cmd)
 {
+	const size_t attr_sz = offsetofend(union bpf_attr, open_flags);
 	union bpf_attr attr;
 	int err;
 
-	memset(&attr, 0, sizeof(attr));
+	memset(&attr, 0, attr_sz);
 	attr.start_id = start_id;
 
-	err = sys_bpf(cmd, &attr, sizeof(attr));
+	err = sys_bpf(cmd, &attr, attr_sz);
 	if (!err)
 		*next_id = attr.next_id;
 
@@ -919,80 +942,84 @@ int bpf_link_get_next_id(__u32 start_id, __u32 *next_id)
 
 int bpf_prog_get_fd_by_id(__u32 id)
 {
+	const size_t attr_sz = offsetofend(union bpf_attr, open_flags);
 	union bpf_attr attr;
 	int fd;
 
-	memset(&attr, 0, sizeof(attr));
+	memset(&attr, 0, attr_sz);
 	attr.prog_id = id;
 
-	fd = sys_bpf_fd(BPF_PROG_GET_FD_BY_ID, &attr, sizeof(attr));
+	fd = sys_bpf_fd(BPF_PROG_GET_FD_BY_ID, &attr, attr_sz);
 	return libbpf_err_errno(fd);
 }
 
 int bpf_map_get_fd_by_id(__u32 id)
 {
+	const size_t attr_sz = offsetofend(union bpf_attr, open_flags);
 	union bpf_attr attr;
 	int fd;
 
-	memset(&attr, 0, sizeof(attr));
+	memset(&attr, 0, attr_sz);
 	attr.map_id = id;
 
-	fd = sys_bpf_fd(BPF_MAP_GET_FD_BY_ID, &attr, sizeof(attr));
+	fd = sys_bpf_fd(BPF_MAP_GET_FD_BY_ID, &attr, attr_sz);
 	return libbpf_err_errno(fd);
 }
 
 int bpf_btf_get_fd_by_id(__u32 id)
 {
+	const size_t attr_sz = offsetofend(union bpf_attr, open_flags);
 	union bpf_attr attr;
 	int fd;
 
-	memset(&attr, 0, sizeof(attr));
+	memset(&attr, 0, attr_sz);
 	attr.btf_id = id;
 
-	fd = sys_bpf_fd(BPF_BTF_GET_FD_BY_ID, &attr, sizeof(attr));
+	fd = sys_bpf_fd(BPF_BTF_GET_FD_BY_ID, &attr, attr_sz);
 	return libbpf_err_errno(fd);
 }
 
 int bpf_link_get_fd_by_id(__u32 id)
 {
+	const size_t attr_sz = offsetofend(union bpf_attr, open_flags);
 	union bpf_attr attr;
 	int fd;
 
-	memset(&attr, 0, sizeof(attr));
+	memset(&attr, 0, attr_sz);
 	attr.link_id = id;
 
-	fd = sys_bpf_fd(BPF_LINK_GET_FD_BY_ID, &attr, sizeof(attr));
+	fd = sys_bpf_fd(BPF_LINK_GET_FD_BY_ID, &attr, attr_sz);
 	return libbpf_err_errno(fd);
 }
 
 int bpf_obj_get_info_by_fd(int bpf_fd, void *info, __u32 *info_len)
 {
+	const size_t attr_sz = offsetofend(union bpf_attr, info);
 	union bpf_attr attr;
 	int err;
 
-	memset(&attr, 0, sizeof(attr));
+	memset(&attr, 0, attr_sz);
 	attr.info.bpf_fd = bpf_fd;
 	attr.info.info_len = *info_len;
 	attr.info.info = ptr_to_u64(info);
 
-	err = sys_bpf(BPF_OBJ_GET_INFO_BY_FD, &attr, sizeof(attr));
-
+	err = sys_bpf(BPF_OBJ_GET_INFO_BY_FD, &attr, attr_sz);
 	if (!err)
 		*info_len = attr.info.info_len;
-
 	return libbpf_err_errno(err);
 }
 
 int bpf_raw_tracepoint_open(const char *name, int prog_fd)
 {
+	const size_t attr_sz = offsetofend(union bpf_attr, raw_tracepoint);
 	union bpf_attr attr;
 	int fd;
 
-	memset(&attr, 0, sizeof(attr));
+	memset(&attr, 0, attr_sz);
 	attr.raw_tracepoint.name = ptr_to_u64(name);
 	attr.raw_tracepoint.prog_fd = prog_fd;
 
-	fd = sys_bpf_fd(BPF_RAW_TRACEPOINT_OPEN, &attr, sizeof(attr));
+	fd = sys_bpf_fd(BPF_RAW_TRACEPOINT_OPEN, &attr, attr_sz);
 	return libbpf_err_errno(fd);
 }
 
@@ -1048,16 +1075,18 @@ int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len,
 		      __u32 *prog_id, __u32 *fd_type, __u64 *probe_offset,
 		      __u64 *probe_addr)
 {
-	union bpf_attr attr = {};
+	const size_t attr_sz = offsetofend(union bpf_attr, task_fd_query);
+	union bpf_attr attr;
 	int err;
 
+	memset(&attr, 0, attr_sz);
 	attr.task_fd_query.pid = pid;
 	attr.task_fd_query.fd = fd;
 	attr.task_fd_query.flags = flags;
 	attr.task_fd_query.buf = ptr_to_u64(buf);
 	attr.task_fd_query.buf_len = *buf_len;
 
-	err = sys_bpf(BPF_TASK_FD_QUERY, &attr, sizeof(attr));
+	err = sys_bpf(BPF_TASK_FD_QUERY, &attr, attr_sz);
 
 	*buf_len = attr.task_fd_query.buf_len;
 	*prog_id = attr.task_fd_query.prog_id;
@@ -1070,30 +1099,32 @@ int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len,
 
 int bpf_enable_stats(enum bpf_stats_type type)
 {
+	const size_t attr_sz = offsetofend(union bpf_attr, enable_stats);
 	union bpf_attr attr;
 	int fd;
 
-	memset(&attr, 0, sizeof(attr));
+	memset(&attr, 0, attr_sz);
 	attr.enable_stats.type = type;
 
-	fd = sys_bpf_fd(BPF_ENABLE_STATS, &attr, sizeof(attr));
+	fd = sys_bpf_fd(BPF_ENABLE_STATS, &attr, attr_sz);
 	return libbpf_err_errno(fd);
 }
 
 int bpf_prog_bind_map(int prog_fd, int map_fd,
 		      const struct bpf_prog_bind_opts *opts)
 {
+	const size_t attr_sz = offsetofend(union bpf_attr, prog_bind_map);
 	union bpf_attr attr;
 	int ret;
 
 	if (!OPTS_VALID(opts, bpf_prog_bind_opts))
 		return libbpf_err(-EINVAL);
 
-	memset(&attr, 0, sizeof(attr));
+	memset(&attr, 0, attr_sz);
 	attr.prog_bind_map.prog_fd = prog_fd;
 	attr.prog_bind_map.map_fd = map_fd;
 	attr.prog_bind_map.flags = OPTS_GET(opts, flags, 0);
 
-	ret = sys_bpf(BPF_PROG_BIND_MAP, &attr, sizeof(attr));
+	ret = sys_bpf(BPF_PROG_BIND_MAP, &attr, attr_sz);
 	return libbpf_err_errno(ret);
 }
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 146d35526b87..21fc3fc7f44c 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -4287,11 +4287,12 @@ int bpf_map__set_autocreate(struct bpf_map *map, bool autocreate)
 
 int bpf_map__reuse_fd(struct bpf_map *map, int fd)
 {
-	struct bpf_map_info info = {};
+	struct bpf_map_info info;
 	__u32 len = sizeof(info), name_len;
 	int new_fd, err;
 	char *new_name;
 
+	memset(&info, 0, len);
 	err = bpf_obj_get_info_by_fd(fd, &info, &len);
 	if (err && errno == EINVAL)
 		err = bpf_get_map_info_from_fdinfo(fd, &info);
@@ -4833,13 +4834,12 @@ bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id)
 
 static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd)
 {
-	struct bpf_map_info map_info = {};
+	struct bpf_map_info map_info;
 	char msg[STRERR_BUFSIZE];
-	__u32 map_info_len;
+	__u32 map_info_len = sizeof(map_info);
 	int err;
 
-	map_info_len = sizeof(map_info);
-
+	memset(&map_info, 0, map_info_len);
 	err = bpf_obj_get_info_by_fd(map_fd, &map_info, &map_info_len);
 	if (err && errno == EINVAL)
 		err = bpf_get_map_info_from_fdinfo(map_fd, &map_info);
@@ -9007,11 +9007,12 @@ int libbpf_find_vmlinux_btf_id(const char *name,
 
 static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd)
 {
-	struct bpf_prog_info info = {};
+	struct bpf_prog_info info;
 	__u32 info_len = sizeof(info);
 	struct btf *btf;
 	int err;
 
+	memset(&info, 0, info_len);
 	err = bpf_obj_get_info_by_fd(attach_prog_fd, &info, &info_len);
 	if (err) {
 		pr_warn("failed bpf_obj_get_info_by_fd for FD %d: %d\n",
@@ -9839,13 +9840,16 @@ static int determine_uprobe_retprobe_bit(void)
 static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
 				 uint64_t offset, int pid, size_t ref_ctr_off)
 {
-	struct perf_event_attr attr = {};
+	const size_t attr_sz = sizeof(struct perf_event_attr);
+	struct perf_event_attr attr;
 	char errmsg[STRERR_BUFSIZE];
 	int type, pfd;
 
 	if (ref_ctr_off >= (1ULL << PERF_UPROBE_REF_CTR_OFFSET_BITS))
 		return -EINVAL;
 
+	memset(&attr, 0, attr_sz);
+
 	type = uprobe ? determine_uprobe_perf_type()
 		      : determine_kprobe_perf_type();
 	if (type < 0) {
@@ -9866,7 +9870,7 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,
 		}
 		attr.config |= 1 << bit;
 	}
-	attr.size = sizeof(attr);
+	attr.size = attr_sz;
 	attr.type = type;
 	attr.config |= (__u64)ref_ctr_off << PERF_UPROBE_REF_CTR_OFFSET_SHIFT;
 	attr.config1 = ptr_to_u64(name); /* kprobe_func or uprobe_path */
@@ -9965,7 +9969,8 @@ static int determine_kprobe_perf_type_legacy(const char *probe_name, bool retpro
 static int perf_event_kprobe_open_legacy(const char *probe_name, bool retprobe,
 					 const char *kfunc_name, size_t offset, int pid)
 {
-	struct perf_event_attr attr = {};
+	const size_t attr_sz = sizeof(struct perf_event_attr);
+	struct perf_event_attr attr;
 	char errmsg[STRERR_BUFSIZE];
 	int type, pfd, err;
 
@@ -9984,7 +9989,9 @@ static int perf_event_kprobe_open_legacy(const char *probe_name, bool retprobe,
 			libbpf_strerror_r(err, errmsg, sizeof(errmsg)));
 		goto err_clean_legacy;
 	}
-	attr.size = sizeof(attr);
+
+	memset(&attr, 0, attr_sz);
+	attr.size = attr_sz;
 	attr.config = type;
 	attr.type = PERF_TYPE_TRACEPOINT;
 
@@ -10441,6 +10448,7 @@ static int determine_uprobe_perf_type_legacy(const char *probe_name, bool retpro
 static int perf_event_uprobe_open_legacy(const char *probe_name, bool retprobe,
 					 const char *binary_path, size_t offset, int pid)
 {
+	const size_t attr_sz = sizeof(struct perf_event_attr);
 	struct perf_event_attr attr;
 	int type, pfd, err;
 
@@ -10458,8 +10466,8 @@ static int perf_event_uprobe_open_legacy(const char *probe_name, bool retprobe,
 		goto err_clean_legacy;
 	}
 
-	memset(&attr, 0, sizeof(attr));
-	attr.size = sizeof(attr);
+	memset(&attr, 0, attr_sz);
+	attr.size = attr_sz;
 	attr.config = type;
 	attr.type = PERF_TYPE_TRACEPOINT;
 
@@ -10998,7 +11006,8 @@ static int determine_tracepoint_id(const char *tp_category,
 static int perf_event_open_tracepoint(const char *tp_category,
 				      const char *tp_name)
 {
-	struct perf_event_attr attr = {};
+	const size_t attr_sz = sizeof(struct perf_event_attr);
+	struct perf_event_attr attr;
 	char errmsg[STRERR_BUFSIZE];
 	int tp_id, pfd, err;
 
@@ -11010,8 +11019,9 @@ static int perf_event_open_tracepoint(const char *tp_category,
 		return tp_id;
 	}
 
+	memset(&attr, 0, attr_sz);
 	attr.type = PERF_TYPE_TRACEPOINT;
-	attr.size = sizeof(attr);
+	attr.size = attr_sz;
 	attr.config = tp_id;
 
 	pfd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, 0 /* cpu */,
@@ -11631,12 +11641,15 @@ struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt,
 				     void *ctx,
 				     const struct perf_buffer_opts *opts)
 {
+	const size_t attr_sz = sizeof(struct perf_event_attr);
 	struct perf_buffer_params p = {};
-	struct perf_event_attr attr = {};
+	struct perf_event_attr attr;
 
 	if (!OPTS_VALID(opts, perf_buffer_opts))
 		return libbpf_err_ptr(-EINVAL);
 
+	memset(&attr, 0, attr_sz);
+	attr.size = attr_sz;
 	attr.config = PERF_COUNT_SW_BPF_OUTPUT;
 	attr.type = PERF_TYPE_SOFTWARE;
 	attr.sample_type = PERF_SAMPLE_RAW;
diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c
index 6c013168032d..35104580870c 100644
--- a/tools/lib/bpf/netlink.c
+++ b/tools/lib/bpf/netlink.c
@@ -587,11 +587,12 @@ static int get_tc_info(struct nlmsghdr *nh, libbpf_dump_nlmsg_t fn,
 
 static int tc_add_fd_and_name(struct libbpf_nla_req *req, int fd)
 {
-	struct bpf_prog_info info = {};
+	struct bpf_prog_info info;
 	__u32 info_len = sizeof(info);
 	char name[256];
 	int len, ret;
 
+	memset(&info, 0, info_len);
 	ret = bpf_obj_get_info_by_fd(fd, &info, &info_len);
 	if (ret < 0)
 		return ret;
diff --git a/tools/lib/bpf/skel_internal.h b/tools/lib/bpf/skel_internal.h
index bd6f4505e7b1..365d769e0357 100644
--- a/tools/lib/bpf/skel_internal.h
+++ b/tools/lib/bpf/skel_internal.h
@@ -285,6 +285,8 @@ static inline int skel_link_create(int prog_fd, int target_fd,
 
 static inline int bpf_load_and_run(struct bpf_load_and_run_opts *opts)
 {
+	const size_t prog_load_attr_sz = offsetofend(union bpf_attr, fd_array);
+	const size_t test_run_attr_sz = offsetofend(union bpf_attr, test);
 	int map_fd = -1, prog_fd = -1, key = 0, err;
 	union bpf_attr attr;
 
@@ -302,7 +304,7 @@ static inline int bpf_load_and_run(struct bpf_load_and_run_opts *opts)
 		goto out;
 	}
 
-	memset(&attr, 0, sizeof(attr));
+	memset(&attr, 0, prog_load_attr_sz);
 	attr.prog_type = BPF_PROG_TYPE_SYSCALL;
 	attr.insns = (long) opts->insns;
 	attr.insn_cnt = opts->insns_sz / sizeof(struct bpf_insn);
@@ -313,18 +315,18 @@ static inline int bpf_load_and_run(struct bpf_load_and_run_opts *opts)
 	attr.log_size = opts->ctx->log_size;
 	attr.log_buf = opts->ctx->log_buf;
 	attr.prog_flags = BPF_F_SLEEPABLE;
-	err = prog_fd = skel_sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
+	err = prog_fd = skel_sys_bpf(BPF_PROG_LOAD, &attr, prog_load_attr_sz);
 	if (prog_fd < 0) {
 		opts->errstr = "failed to load loader prog";
 		set_err;
 		goto out;
 	}
 
-	memset(&attr, 0, sizeof(attr));
+	memset(&attr, 0, test_run_attr_sz);
 	attr.test.prog_fd = prog_fd;
 	attr.test.ctx_in = (long) opts->ctx;
 	attr.test.ctx_size_in = opts->ctx->sz;
-	err = skel_sys_bpf(BPF_PROG_RUN, &attr, sizeof(attr));
+	err = skel_sys_bpf(BPF_PROG_RUN, &attr, test_run_attr_sz);
 	if (err < 0 || (int)attr.test.retval < 0) {
 		opts->errstr = "failed to execute loader prog";
 		if (err < 0) {
-- 
cgit v1.2.3


From abf84b64e36b175c9c4dd4ecbad2af4329c00041 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Mon, 15 Aug 2022 17:19:28 -0700
Subject: libbpf: Clean up deprecated and legacy aliases

Remove three missed deprecated APIs that were aliased to new APIs:
bpf_object__unload, bpf_prog_attach_xattr and btf__load.

Also move legacy API libbpf_find_kernel_btf (aliased to
btf__load_vmlinux_btf) into libbpf_legacy.h.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Hao Luo <haoluo@google.com>
Link: https://lore.kernel.org/bpf/20220816001929.369487-4-andrii@kernel.org
---
 tools/lib/bpf/bpf.c           | 5 -----
 tools/lib/bpf/btf.c           | 2 --
 tools/lib/bpf/btf.h           | 1 -
 tools/lib/bpf/libbpf.c        | 2 --
 tools/lib/bpf/libbpf_legacy.h | 2 ++
 5 files changed, 2 insertions(+), 10 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index e3a0bd7efa2f..1d49a0352836 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -641,11 +641,6 @@ int bpf_prog_attach_opts(int prog_fd, int target_fd,
 	return libbpf_err_errno(ret);
 }
 
-__attribute__((alias("bpf_prog_attach_opts")))
-int bpf_prog_attach_xattr(int prog_fd, int target_fd,
-			  enum bpf_attach_type type,
-			  const struct bpf_prog_attach_opts *opts);
-
 int bpf_prog_detach(int target_fd, enum bpf_attach_type type)
 {
 	const size_t attr_sz = offsetofend(union bpf_attr, replace_bpf_fd);
diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 2d14f1a52d7a..361131518d63 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -1225,8 +1225,6 @@ int btf__load_into_kernel(struct btf *btf)
 	return btf_load_into_kernel(btf, NULL, 0, 0);
 }
 
-int btf__load(struct btf *) __attribute__((alias("btf__load_into_kernel")));
-
 int btf__fd(const struct btf *btf)
 {
 	return btf->fd;
diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index 583760df83b4..ae543144ee30 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -116,7 +116,6 @@ LIBBPF_API struct btf *btf__parse_raw_split(const char *path, struct btf *base_b
 
 LIBBPF_API struct btf *btf__load_vmlinux_btf(void);
 LIBBPF_API struct btf *btf__load_module_btf(const char *module_name, struct btf *vmlinux_btf);
-LIBBPF_API struct btf *libbpf_find_kernel_btf(void);
 
 LIBBPF_API struct btf *btf__load_from_kernel_by_id(__u32 id);
 LIBBPF_API struct btf *btf__load_from_kernel_by_id_split(__u32 id, struct btf *base_btf);
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 21fc3fc7f44c..3ad139285fad 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -7263,8 +7263,6 @@ static int bpf_object_unload(struct bpf_object *obj)
 	return 0;
 }
 
-int bpf_object__unload(struct bpf_object *obj) __attribute__((alias("bpf_object_unload")));
-
 static int bpf_object__sanitize_maps(struct bpf_object *obj)
 {
 	struct bpf_map *m;
diff --git a/tools/lib/bpf/libbpf_legacy.h b/tools/lib/bpf/libbpf_legacy.h
index 5b7e0155db6a..1e1be467bede 100644
--- a/tools/lib/bpf/libbpf_legacy.h
+++ b/tools/lib/bpf/libbpf_legacy.h
@@ -125,6 +125,8 @@ struct bpf_map;
 struct btf;
 struct btf_ext;
 
+LIBBPF_API struct btf *libbpf_find_kernel_btf(void);
+
 LIBBPF_API enum bpf_prog_type bpf_program__get_type(const struct bpf_program *prog);
 LIBBPF_API enum bpf_attach_type bpf_program__get_expected_attach_type(const struct bpf_program *prog);
 LIBBPF_API const char *bpf_map__get_pin_path(const struct bpf_map *map);
-- 
cgit v1.2.3


From df78da27260c915039b348b164bbc53fa372ba70 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Mon, 15 Aug 2022 17:19:29 -0700
Subject: selftests/bpf: Few fixes for selftests/bpf built in release mode

Fix few issues found when building and running test_progs in
release mode.

First, potentially uninitialized idx variable in xskxceiver,
force-initialize to zero to satisfy compiler.

Few instances of defining uprobe trigger functions break in release mode
unless marked as noinline, due to being static. Add noinline to make
sure everything works.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Hao Luo <haoluo@google.com>
Link: https://lore.kernel.org/bpf/20220816001929.369487-5-andrii@kernel.org
---
 tools/testing/selftests/bpf/prog_tests/attach_probe.c | 6 +++---
 tools/testing/selftests/bpf/prog_tests/bpf_cookie.c   | 2 +-
 tools/testing/selftests/bpf/prog_tests/task_pt_regs.c | 2 +-
 tools/testing/selftests/bpf/xskxceiver.c              | 2 +-
 4 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
index 0b899d2d8ea7..9566d9d2f6ee 100644
--- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c
+++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
@@ -6,19 +6,19 @@
 volatile unsigned short uprobe_ref_ctr __attribute__((unused)) __attribute((section(".probes")));
 
 /* uprobe attach point */
-static void trigger_func(void)
+static noinline void trigger_func(void)
 {
 	asm volatile ("");
 }
 
 /* attach point for byname uprobe */
-static void trigger_func2(void)
+static noinline void trigger_func2(void)
 {
 	asm volatile ("");
 }
 
 /* attach point for byname sleepable uprobe */
-static void trigger_func3(void)
+static noinline void trigger_func3(void)
 {
 	asm volatile ("");
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
index 2974b44f80fa..2be2d61954bc 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
@@ -13,7 +13,7 @@
 #include "kprobe_multi.skel.h"
 
 /* uprobe attach point */
-static void trigger_func(void)
+static noinline void trigger_func(void)
 {
 	asm volatile ("");
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/task_pt_regs.c b/tools/testing/selftests/bpf/prog_tests/task_pt_regs.c
index 61935e7e056a..f000734a3d1f 100644
--- a/tools/testing/selftests/bpf/prog_tests/task_pt_regs.c
+++ b/tools/testing/selftests/bpf/prog_tests/task_pt_regs.c
@@ -4,7 +4,7 @@
 #include "test_task_pt_regs.skel.h"
 
 /* uprobe attach point */
-static void trigger_func(void)
+static noinline void trigger_func(void)
 {
 	asm volatile ("");
 }
diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c
index 20b44ab32a06..14b4737b223c 100644
--- a/tools/testing/selftests/bpf/xskxceiver.c
+++ b/tools/testing/selftests/bpf/xskxceiver.c
@@ -922,7 +922,7 @@ static int __send_pkts(struct ifobject *ifobject, u32 *pkt_nb, struct pollfd *fd
 {
 	struct xsk_socket_info *xsk = ifobject->xsk;
 	bool use_poll = ifobject->use_poll;
-	u32 i, idx, ret, valid_pkts = 0;
+	u32 i, idx = 0, ret, valid_pkts = 0;
 
 	while (xsk_ring_prod__reserve(&xsk->tx, BATCH_SIZE, &idx) < BATCH_SIZE) {
 		if (use_poll) {
-- 
cgit v1.2.3


From 31123c0360e01ee0389aee3a7b2ad32f13136662 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Tue, 16 Aug 2022 23:18:47 -0700
Subject: selftests/bpf: bpf_setsockopt tests

This patch adds tests to exercise optnames that are allowed
in bpf_setsockopt().

Reviewed-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/r/20220817061847.4182339-1-kafai@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../selftests/bpf/prog_tests/setget_sockopt.c      | 125 ++++++
 .../testing/selftests/bpf/progs/bpf_tracing_net.h  |  31 +-
 tools/testing/selftests/bpf/progs/setget_sockopt.c | 451 +++++++++++++++++++++
 3 files changed, 606 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/setget_sockopt.c
 create mode 100644 tools/testing/selftests/bpf/progs/setget_sockopt.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c b/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c
new file mode 100644
index 000000000000..018611e6b248
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c
@@ -0,0 +1,125 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) Meta Platforms, Inc. and affiliates. */
+
+#define _GNU_SOURCE
+#include <sched.h>
+#include <linux/socket.h>
+#include <net/if.h>
+
+#include "test_progs.h"
+#include "cgroup_helpers.h"
+#include "network_helpers.h"
+
+#include "setget_sockopt.skel.h"
+
+#define CG_NAME "/setget-sockopt-test"
+
+static const char addr4_str[] = "127.0.0.1";
+static const char addr6_str[] = "::1";
+static struct setget_sockopt *skel;
+static int cg_fd;
+
+static int create_netns(void)
+{
+	if (!ASSERT_OK(unshare(CLONE_NEWNET), "create netns"))
+		return -1;
+
+	if (!ASSERT_OK(system("ip link set dev lo up"), "set lo up"))
+		return -1;
+
+	if (!ASSERT_OK(system("ip link add dev binddevtest1 type veth peer name binddevtest2"),
+		       "add veth"))
+		return -1;
+
+	if (!ASSERT_OK(system("ip link set dev binddevtest1 up"),
+		       "bring veth up"))
+		return -1;
+
+	return 0;
+}
+
+static void test_tcp(int family)
+{
+	struct setget_sockopt__bss *bss = skel->bss;
+	int sfd, cfd;
+
+	memset(bss, 0, sizeof(*bss));
+
+	sfd = start_server(family, SOCK_STREAM,
+			   family == AF_INET6 ? addr6_str : addr4_str, 0, 0);
+	if (!ASSERT_GE(sfd, 0, "start_server"))
+		return;
+
+	cfd = connect_to_fd(sfd, 0);
+	if (!ASSERT_GE(cfd, 0, "connect_to_fd_server")) {
+		close(sfd);
+		return;
+	}
+	close(sfd);
+	close(cfd);
+
+	ASSERT_EQ(bss->nr_listen, 1, "nr_listen");
+	ASSERT_EQ(bss->nr_connect, 1, "nr_connect");
+	ASSERT_EQ(bss->nr_active, 1, "nr_active");
+	ASSERT_EQ(bss->nr_passive, 1, "nr_passive");
+	ASSERT_EQ(bss->nr_socket_post_create, 2, "nr_socket_post_create");
+	ASSERT_EQ(bss->nr_binddev, 2, "nr_bind");
+}
+
+static void test_udp(int family)
+{
+	struct setget_sockopt__bss *bss = skel->bss;
+	int sfd;
+
+	memset(bss, 0, sizeof(*bss));
+
+	sfd = start_server(family, SOCK_DGRAM,
+			   family == AF_INET6 ? addr6_str : addr4_str, 0, 0);
+	if (!ASSERT_GE(sfd, 0, "start_server"))
+		return;
+	close(sfd);
+
+	ASSERT_GE(bss->nr_socket_post_create, 1, "nr_socket_post_create");
+	ASSERT_EQ(bss->nr_binddev, 1, "nr_bind");
+}
+
+void test_setget_sockopt(void)
+{
+	cg_fd = test__join_cgroup(CG_NAME);
+	if (cg_fd < 0)
+		return;
+
+	if (create_netns())
+		goto done;
+
+	skel = setget_sockopt__open();
+	if (!ASSERT_OK_PTR(skel, "open skel"))
+		goto done;
+
+	strcpy(skel->rodata->veth, "binddevtest1");
+	skel->rodata->veth_ifindex = if_nametoindex("binddevtest1");
+	if (!ASSERT_GT(skel->rodata->veth_ifindex, 0, "if_nametoindex"))
+		goto done;
+
+	if (!ASSERT_OK(setget_sockopt__load(skel), "load skel"))
+		goto done;
+
+	skel->links.skops_sockopt =
+		bpf_program__attach_cgroup(skel->progs.skops_sockopt, cg_fd);
+	if (!ASSERT_OK_PTR(skel->links.skops_sockopt, "attach cgroup"))
+		goto done;
+
+	skel->links.socket_post_create =
+		bpf_program__attach_cgroup(skel->progs.socket_post_create, cg_fd);
+	if (!ASSERT_OK_PTR(skel->links.socket_post_create, "attach_cgroup"))
+		goto done;
+
+	test_tcp(AF_INET6);
+	test_tcp(AF_INET);
+	test_udp(AF_INET6);
+	test_udp(AF_INET);
+
+done:
+	setget_sockopt__destroy(skel);
+	close(cg_fd);
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
index 98dd2c4815f0..5ebc6dabef84 100644
--- a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
+++ b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
@@ -6,13 +6,40 @@
 #define AF_INET6		10
 
 #define SOL_SOCKET		1
+#define SO_REUSEADDR		2
 #define SO_SNDBUF		7
-#define __SO_ACCEPTCON		(1 << 16)
+#define SO_RCVBUF		8
+#define SO_KEEPALIVE		9
 #define SO_PRIORITY		12
+#define SO_REUSEPORT		15
+#define SO_RCVLOWAT		18
+#define SO_BINDTODEVICE		25
+#define SO_MARK			36
+#define SO_MAX_PACING_RATE	47
+#define SO_BINDTOIFINDEX	62
+#define SO_TXREHASH		74
+#define __SO_ACCEPTCON		(1 << 16)
+
+#define IP_TOS			1
+
+#define IPV6_TCLASS		67
+#define IPV6_AUTOFLOWLABEL	70
 
 #define SOL_TCP			6
+#define TCP_NODELAY		1
+#define TCP_MAXSEG		2
+#define TCP_KEEPIDLE		4
+#define TCP_KEEPINTVL		5
+#define TCP_KEEPCNT		6
+#define TCP_SYNCNT		7
+#define TCP_WINDOW_CLAMP	10
 #define TCP_CONGESTION		13
+#define TCP_THIN_LINEAR_TIMEOUTS	16
+#define TCP_USER_TIMEOUT	18
+#define TCP_NOTSENT_LOWAT	25
+#define TCP_SAVE_SYN		27
 #define TCP_CA_NAME_MAX		16
+#define TCP_NAGLE_OFF		1
 
 #define ICSK_TIME_RETRANS	1
 #define ICSK_TIME_PROBE0	3
@@ -49,6 +76,8 @@
 #define sk_state		__sk_common.skc_state
 #define sk_v6_daddr		__sk_common.skc_v6_daddr
 #define sk_v6_rcv_saddr		__sk_common.skc_v6_rcv_saddr
+#define sk_flags		__sk_common.skc_flags
+#define sk_reuse		__sk_common.skc_reuse
 
 #define s6_addr32		in6_u.u6_addr32
 
diff --git a/tools/testing/selftests/bpf/progs/setget_sockopt.c b/tools/testing/selftests/bpf/progs/setget_sockopt.c
new file mode 100644
index 000000000000..4a4cb44a4a15
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/setget_sockopt.c
@@ -0,0 +1,451 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#endif
+
+extern unsigned long CONFIG_HZ __kconfig;
+
+const volatile char veth[IFNAMSIZ];
+const volatile int veth_ifindex;
+
+int nr_listen;
+int nr_passive;
+int nr_active;
+int nr_connect;
+int nr_binddev;
+int nr_socket_post_create;
+
+struct sockopt_test {
+	int opt;
+	int new;
+	int restore;
+	int expected;
+	int tcp_expected;
+	unsigned int flip:1;
+};
+
+static const char cubic_cc[] = "cubic";
+static const char reno_cc[] = "reno";
+
+static const struct sockopt_test sol_socket_tests[] = {
+	{ .opt = SO_REUSEADDR, .flip = 1, },
+	{ .opt = SO_SNDBUF, .new = 8123, .expected = 8123 * 2, },
+	{ .opt = SO_RCVBUF, .new = 8123, .expected = 8123 * 2, },
+	{ .opt = SO_KEEPALIVE, .flip = 1, },
+	{ .opt = SO_PRIORITY, .new = 0xeb9f, .expected = 0xeb9f, },
+	{ .opt = SO_REUSEPORT, .flip = 1, },
+	{ .opt = SO_RCVLOWAT, .new = 8123, .expected = 8123, },
+	{ .opt = SO_MARK, .new = 0xeb9f, .expected = 0xeb9f, },
+	{ .opt = SO_MAX_PACING_RATE, .new = 0xeb9f, .expected = 0xeb9f, },
+	{ .opt = SO_TXREHASH, .flip = 1, },
+	{ .opt = 0, },
+};
+
+static const struct sockopt_test sol_tcp_tests[] = {
+	{ .opt = TCP_NODELAY, .flip = 1, },
+	{ .opt = TCP_MAXSEG, .new = 1314, .expected = 1314, },
+	{ .opt = TCP_KEEPIDLE, .new = 123, .expected = 123, .restore = 321, },
+	{ .opt = TCP_KEEPINTVL, .new = 123, .expected = 123, .restore = 321, },
+	{ .opt = TCP_KEEPCNT, .new = 123, .expected = 123, .restore = 124, },
+	{ .opt = TCP_SYNCNT, .new = 123, .expected = 123, .restore = 124, },
+	{ .opt = TCP_WINDOW_CLAMP, .new = 8123, .expected = 8123, .restore = 8124, },
+	{ .opt = TCP_CONGESTION, },
+	{ .opt = TCP_THIN_LINEAR_TIMEOUTS, .flip = 1, },
+	{ .opt = TCP_USER_TIMEOUT, .new = 123400, .expected = 123400, },
+	{ .opt = TCP_NOTSENT_LOWAT, .new = 1314, .expected = 1314, },
+	{ .opt = TCP_SAVE_SYN, .new = 1, .expected = 1, },
+	{ .opt = 0, },
+};
+
+static const struct sockopt_test sol_ip_tests[] = {
+	{ .opt = IP_TOS, .new = 0xe1, .expected = 0xe1, .tcp_expected = 0xe0, },
+	{ .opt = 0, },
+};
+
+static const struct sockopt_test sol_ipv6_tests[] = {
+	{ .opt = IPV6_TCLASS, .new = 0xe1, .expected = 0xe1, .tcp_expected = 0xe0, },
+	{ .opt = IPV6_AUTOFLOWLABEL, .flip = 1, },
+	{ .opt = 0, },
+};
+
+struct loop_ctx {
+	void *ctx;
+	struct sock *sk;
+};
+
+static int __bpf_getsockopt(void *ctx, struct sock *sk,
+			    int level, int opt, int *optval,
+			    int optlen)
+{
+	if (level == SOL_SOCKET) {
+		switch (opt) {
+		case SO_REUSEADDR:
+			*optval = !!BPF_CORE_READ_BITFIELD(sk, sk_reuse);
+			break;
+		case SO_KEEPALIVE:
+			*optval = !!(sk->sk_flags & (1UL << 3));
+			break;
+		case SO_RCVLOWAT:
+			*optval = sk->sk_rcvlowat;
+			break;
+		case SO_MAX_PACING_RATE:
+			*optval = sk->sk_max_pacing_rate;
+			break;
+		default:
+			return bpf_getsockopt(ctx, level, opt, optval, optlen);
+		}
+		return 0;
+	}
+
+	if (level == IPPROTO_TCP) {
+		struct tcp_sock *tp = bpf_skc_to_tcp_sock(sk);
+
+		if (!tp)
+			return -1;
+
+		switch (opt) {
+		case TCP_NODELAY:
+			*optval = !!(BPF_CORE_READ_BITFIELD(tp, nonagle) & TCP_NAGLE_OFF);
+			break;
+		case TCP_MAXSEG:
+			*optval = tp->rx_opt.user_mss;
+			break;
+		case TCP_KEEPIDLE:
+			*optval = tp->keepalive_time / CONFIG_HZ;
+			break;
+		case TCP_SYNCNT:
+			*optval = tp->inet_conn.icsk_syn_retries;
+			break;
+		case TCP_KEEPINTVL:
+			*optval = tp->keepalive_intvl / CONFIG_HZ;
+			break;
+		case TCP_KEEPCNT:
+			*optval = tp->keepalive_probes;
+			break;
+		case TCP_WINDOW_CLAMP:
+			*optval = tp->window_clamp;
+			break;
+		case TCP_THIN_LINEAR_TIMEOUTS:
+			*optval = !!BPF_CORE_READ_BITFIELD(tp, thin_lto);
+			break;
+		case TCP_USER_TIMEOUT:
+			*optval = tp->inet_conn.icsk_user_timeout;
+			break;
+		case TCP_NOTSENT_LOWAT:
+			*optval = tp->notsent_lowat;
+			break;
+		case TCP_SAVE_SYN:
+			*optval = BPF_CORE_READ_BITFIELD(tp, save_syn);
+			break;
+		default:
+			return bpf_getsockopt(ctx, level, opt, optval, optlen);
+		}
+		return 0;
+	}
+
+	if (level == IPPROTO_IPV6) {
+		switch (opt) {
+		case IPV6_AUTOFLOWLABEL: {
+			__u16 proto = sk->sk_protocol;
+			struct inet_sock *inet_sk;
+
+			if (proto == IPPROTO_TCP)
+				inet_sk = (struct inet_sock *)bpf_skc_to_tcp_sock(sk);
+			else
+				inet_sk = (struct inet_sock *)bpf_skc_to_udp6_sock(sk);
+
+			if (!inet_sk)
+				return -1;
+
+			*optval = !!inet_sk->pinet6->autoflowlabel;
+			break;
+		}
+		default:
+			return bpf_getsockopt(ctx, level, opt, optval, optlen);
+		}
+		return 0;
+	}
+
+	return bpf_getsockopt(ctx, level, opt, optval, optlen);
+}
+
+static int bpf_test_sockopt_flip(void *ctx, struct sock *sk,
+				 const struct sockopt_test *t,
+				 int level)
+{
+	int old, tmp, new, opt = t->opt;
+
+	opt = t->opt;
+
+	if (__bpf_getsockopt(ctx, sk, level, opt, &old, sizeof(old)))
+		return 1;
+	/* kernel initialized txrehash to 255 */
+	if (level == SOL_SOCKET && opt == SO_TXREHASH && old != 0 && old != 1)
+		old = 1;
+
+	new = !old;
+	if (bpf_setsockopt(ctx, level, opt, &new, sizeof(new)))
+		return 1;
+	if (__bpf_getsockopt(ctx, sk, level, opt, &tmp, sizeof(tmp)) ||
+	    tmp != new)
+		return 1;
+
+	if (bpf_setsockopt(ctx, level, opt, &old, sizeof(old)))
+		return 1;
+
+	return 0;
+}
+
+static int bpf_test_sockopt_int(void *ctx, struct sock *sk,
+				const struct sockopt_test *t,
+				int level)
+{
+	int old, tmp, new, expected, opt;
+
+	opt = t->opt;
+	new = t->new;
+	if (sk->sk_type == SOCK_STREAM && t->tcp_expected)
+		expected = t->tcp_expected;
+	else
+		expected = t->expected;
+
+	if (__bpf_getsockopt(ctx, sk, level, opt, &old, sizeof(old)) ||
+	    old == new)
+		return 1;
+
+	if (bpf_setsockopt(ctx, level, opt, &new, sizeof(new)))
+		return 1;
+	if (__bpf_getsockopt(ctx, sk, level, opt, &tmp, sizeof(tmp)) ||
+	    tmp != expected)
+		return 1;
+
+	if (t->restore)
+		old = t->restore;
+	if (bpf_setsockopt(ctx, level, opt, &old, sizeof(old)))
+		return 1;
+
+	return 0;
+}
+
+static int bpf_test_socket_sockopt(__u32 i, struct loop_ctx *lc)
+{
+	const struct sockopt_test *t;
+
+	if (i >= ARRAY_SIZE(sol_socket_tests))
+		return 1;
+
+	t = &sol_socket_tests[i];
+	if (!t->opt)
+		return 1;
+
+	if (t->flip)
+		return bpf_test_sockopt_flip(lc->ctx, lc->sk, t, SOL_SOCKET);
+
+	return bpf_test_sockopt_int(lc->ctx, lc->sk, t, SOL_SOCKET);
+}
+
+static int bpf_test_ip_sockopt(__u32 i, struct loop_ctx *lc)
+{
+	const struct sockopt_test *t;
+
+	if (i >= ARRAY_SIZE(sol_ip_tests))
+		return 1;
+
+	t = &sol_ip_tests[i];
+	if (!t->opt)
+		return 1;
+
+	if (t->flip)
+		return bpf_test_sockopt_flip(lc->ctx, lc->sk, t, IPPROTO_IP);
+
+	return bpf_test_sockopt_int(lc->ctx, lc->sk, t, IPPROTO_IP);
+}
+
+static int bpf_test_ipv6_sockopt(__u32 i, struct loop_ctx *lc)
+{
+	const struct sockopt_test *t;
+
+	if (i >= ARRAY_SIZE(sol_ipv6_tests))
+		return 1;
+
+	t = &sol_ipv6_tests[i];
+	if (!t->opt)
+		return 1;
+
+	if (t->flip)
+		return bpf_test_sockopt_flip(lc->ctx, lc->sk, t, IPPROTO_IPV6);
+
+	return bpf_test_sockopt_int(lc->ctx, lc->sk, t, IPPROTO_IPV6);
+}
+
+static int bpf_test_tcp_sockopt(__u32 i, struct loop_ctx *lc)
+{
+	const struct sockopt_test *t;
+	struct sock *sk;
+	void *ctx;
+
+	if (i >= ARRAY_SIZE(sol_tcp_tests))
+		return 1;
+
+	t = &sol_tcp_tests[i];
+	if (!t->opt)
+		return 1;
+
+	ctx = lc->ctx;
+	sk = lc->sk;
+
+	if (t->opt == TCP_CONGESTION) {
+		char old_cc[16], tmp_cc[16];
+		const char *new_cc;
+
+		if (bpf_getsockopt(ctx, IPPROTO_TCP, TCP_CONGESTION, old_cc, sizeof(old_cc)))
+			return 1;
+		if (!bpf_strncmp(old_cc, sizeof(old_cc), cubic_cc))
+			new_cc = reno_cc;
+		else
+			new_cc = cubic_cc;
+		if (bpf_setsockopt(ctx, IPPROTO_TCP, TCP_CONGESTION, (void *)new_cc,
+				   sizeof(new_cc)))
+			return 1;
+		if (bpf_getsockopt(ctx, IPPROTO_TCP, TCP_CONGESTION, tmp_cc, sizeof(tmp_cc)))
+			return 1;
+		if (bpf_strncmp(tmp_cc, sizeof(tmp_cc), new_cc))
+			return 1;
+		if (bpf_setsockopt(ctx, IPPROTO_TCP, TCP_CONGESTION, old_cc, sizeof(old_cc)))
+			return 1;
+		return 0;
+	}
+
+	if (t->flip)
+		return bpf_test_sockopt_flip(ctx, sk, t, IPPROTO_TCP);
+
+	return bpf_test_sockopt_int(ctx, sk, t, IPPROTO_TCP);
+}
+
+static int bpf_test_sockopt(void *ctx, struct sock *sk)
+{
+	struct loop_ctx lc = { .ctx = ctx, .sk = sk, };
+	__u16 family, proto;
+	int n;
+
+	family = sk->sk_family;
+	proto = sk->sk_protocol;
+
+	n = bpf_loop(ARRAY_SIZE(sol_socket_tests), bpf_test_socket_sockopt, &lc, 0);
+	if (n != ARRAY_SIZE(sol_socket_tests))
+		return -1;
+
+	if (proto == IPPROTO_TCP) {
+		n = bpf_loop(ARRAY_SIZE(sol_tcp_tests), bpf_test_tcp_sockopt, &lc, 0);
+		if (n != ARRAY_SIZE(sol_tcp_tests))
+			return -1;
+	}
+
+	if (family == AF_INET) {
+		n = bpf_loop(ARRAY_SIZE(sol_ip_tests), bpf_test_ip_sockopt, &lc, 0);
+		if (n != ARRAY_SIZE(sol_ip_tests))
+			return -1;
+	} else {
+		n = bpf_loop(ARRAY_SIZE(sol_ipv6_tests), bpf_test_ipv6_sockopt, &lc, 0);
+		if (n != ARRAY_SIZE(sol_ipv6_tests))
+			return -1;
+	}
+
+	return 0;
+}
+
+static int binddev_test(void *ctx)
+{
+	const char empty_ifname[] = "";
+	int ifindex, zero = 0;
+
+	if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
+			   (void *)veth, sizeof(veth)))
+		return -1;
+	if (bpf_getsockopt(ctx, SOL_SOCKET, SO_BINDTOIFINDEX,
+			   &ifindex, sizeof(int)) ||
+	    ifindex != veth_ifindex)
+		return -1;
+
+	if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
+			   (void *)empty_ifname, sizeof(empty_ifname)))
+		return -1;
+	if (bpf_getsockopt(ctx, SOL_SOCKET, SO_BINDTOIFINDEX,
+			   &ifindex, sizeof(int)) ||
+	    ifindex != 0)
+		return -1;
+
+	if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTOIFINDEX,
+			   (void *)&veth_ifindex, sizeof(int)))
+		return -1;
+	if (bpf_getsockopt(ctx, SOL_SOCKET, SO_BINDTOIFINDEX,
+			   &ifindex, sizeof(int)) ||
+	    ifindex != veth_ifindex)
+		return -1;
+
+	if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTOIFINDEX,
+			   &zero, sizeof(int)))
+		return -1;
+	if (bpf_getsockopt(ctx, SOL_SOCKET, SO_BINDTOIFINDEX,
+			   &ifindex, sizeof(int)) ||
+	    ifindex != 0)
+		return -1;
+
+	return 0;
+}
+
+SEC("lsm_cgroup/socket_post_create")
+int BPF_PROG(socket_post_create, struct socket *sock, int family,
+	     int type, int protocol, int kern)
+{
+	struct sock *sk = sock->sk;
+
+	if (!sk)
+		return 1;
+
+	nr_socket_post_create += !bpf_test_sockopt(sk, sk);
+	nr_binddev += !binddev_test(sk);
+
+	return 1;
+}
+
+SEC("sockops")
+int skops_sockopt(struct bpf_sock_ops *skops)
+{
+	struct bpf_sock *bpf_sk = skops->sk;
+	struct sock *sk;
+
+	if (!bpf_sk)
+		return 1;
+
+	sk = (struct sock *)bpf_skc_to_tcp_sock(bpf_sk);
+	if (!sk)
+		return 1;
+
+	switch (skops->op) {
+	case BPF_SOCK_OPS_TCP_LISTEN_CB:
+		nr_listen += !bpf_test_sockopt(skops, sk);
+		break;
+	case BPF_SOCK_OPS_TCP_CONNECT_CB:
+		nr_connect += !bpf_test_sockopt(skops, sk);
+		break;
+	case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
+		nr_active += !bpf_test_sockopt(skops, sk);
+		break;
+	case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
+		nr_passive += !bpf_test_sockopt(skops, sk);
+		break;
+	}
+
+	return 1;
+}
+
+char _license[] SEC("license") = "GPL";
-- 
cgit v1.2.3


From 2cd87cea78425e6b019e34e969dc008ce560acbf Mon Sep 17 00:00:00 2001
From: Amit Cohen <amcohen@nvidia.com>
Date: Wed, 17 Aug 2022 17:28:25 +0200
Subject: selftests: mlxsw: Add ingress RIF configuration test for 802.1D
 bridge

Before layer 2 forwarding, the device classifies an incoming packet to a
FID. After classification, the FID is known, but also all the attributes of
the FID, such as the router interface (RIF) via which a packet that needs
to be routed will ingress the router block.

For VLAN-unaware bridges (802.1D), the FID classification is done according
to {Port, VID}. When a RIF is added on top of a FID, all the existing
{Port, VID}->FID mappings should be updated by the software with the new
RIF. In addition, when a new mapping is added for FID which already has a
RIF, the correct RIF should be used for it.

Add a test to verify that packets can be routed after {Port, VID}->FID
classification, regardless of the order of the configuration.

 # ./ingress_rif_conf_1d.sh
 TEST: Add RIF for existing {port, VID}->FID mapping                 [ OK ]
 TEST: Add {port, VID}->FID mapping for FID with a RIF               [ OK ]

Signed-off-by: Amit Cohen <amcohen@nvidia.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../drivers/net/mlxsw/ingress_rif_conf_1d.sh       | 264 +++++++++++++++++++++
 1 file changed, 264 insertions(+)
 create mode 100755 tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_1d.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_1d.sh b/tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_1d.sh
new file mode 100755
index 000000000000..df2b09966886
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_1d.sh
@@ -0,0 +1,264 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test routing over bridge and verify that the order of configuration does not
+# impact switch behavior. Verify that RIF is added correctly for existing
+# mappings and that new mappings use the correct RIF.
+
+# +-------------------+                   +--------------------+
+# | H1                |                   | H2                 |
+# |                   |                   |                    |
+# |         $h1.10 +  |                   |  + $h2.10          |
+# |   192.0.2.1/28 |  |                   |  | 192.0.2.3/28    |
+# |                |  |                   |  |                 |
+# |            $h1 +  |                   |  + $h2             |
+# +----------------|--+                   +--|-----------------+
+#                  |                         |
+# +----------------|-------------------------|-----------------+
+# | SW             |                         |                 |
+# | +--------------|-------------------------|---------------+ |
+# | |        $swp1 +                         + $swp2         | |
+# | |              |                         |               | |
+# | |     $swp1.10 +                         + $swp2.10      | |
+# | |                                                        | |
+# | |                           br0                          | |
+# | |                       192.0.2.2/28                     | |
+# | +--------------------------------------------------------+ |
+# |                                                            |
+# |      $swp3.10 +                                            |
+# | 192.0.2.17/28 |                                            |
+# |               |                                            |
+# |         $swp3 +                                            |
+# +---------------|--------------------------------------------+
+#                 |
+# +---------------|--+
+# |           $h3 +  |
+# |               |  |
+# |        $h3.10 +  |
+# | 192.0.2.18/28    |
+# |                  |
+# | H3               |
+# +------------------+
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	port_vid_map_rif
+	rif_port_vid_map
+"
+
+NUM_NETIFS=6
+source $lib_dir/lib.sh
+source $lib_dir/tc_common.sh
+source $lib_dir/devlink_lib.sh
+
+h1_create()
+{
+	simple_if_init $h1
+	vlan_create $h1 10 v$h1 192.0.2.1/28
+
+	ip route add 192.0.2.16/28 vrf v$h1 nexthop via 192.0.2.2
+}
+
+h1_destroy()
+{
+	ip route del 192.0.2.16/28 vrf v$h1 nexthop via 192.0.2.2
+
+	vlan_destroy $h1 10
+	simple_if_fini $h1
+}
+
+h2_create()
+{
+	simple_if_init $h2
+	vlan_create $h2 10 v$h2 192.0.2.3/28
+}
+
+h2_destroy()
+{
+	vlan_destroy $h2 10
+	simple_if_fini $h2
+}
+
+h3_create()
+{
+	simple_if_init $h3
+	vlan_create $h3 10 v$h3 192.0.2.18/28
+
+	ip route add 192.0.2.0/28 vrf v$h3 nexthop via 192.0.2.17
+}
+
+h3_destroy()
+{
+	ip route del 192.0.2.0/28 vrf v$h3 nexthop via 192.0.2.17
+
+	vlan_destroy $h3 10
+	simple_if_fini $h3
+}
+
+switch_create()
+{
+	ip link set dev $swp1 up
+
+	ip link add dev br0 type bridge mcast_snooping 0
+
+	# By default, a link-local address is generated when netdevice becomes
+	# up. Adding an address to the bridge will cause creating a RIF for it.
+	# Prevent generating link-local address to be able to control when the
+	# RIF is added.
+	sysctl_set net.ipv6.conf.br0.addr_gen_mode 1
+	ip link set dev br0 up
+
+	ip link set dev $swp2 up
+	vlan_create $swp2 10
+	ip link set dev $swp2.10 master br0
+
+	ip link set dev $swp3 up
+	vlan_create $swp3 10 "" 192.0.2.17/28
+	tc qdisc add dev $swp3 clsact
+
+	# Replace neighbor to avoid 1 packet which is forwarded in software due
+	# to "unresolved neigh".
+	ip neigh replace dev $swp3.10 192.0.2.18 lladdr $(mac_get $h3.10)
+}
+
+switch_destroy()
+{
+	tc qdisc del dev $swp3 clsact
+	vlan_destroy $swp3 10
+	ip link set dev $swp3 down
+
+	ip link set dev $swp2.10 nomaster
+	vlan_destroy $swp2 10
+	ip link set dev $swp2 down
+
+	ip link set dev br0 down
+	sysctl_restore net.ipv6.conf.br0.addr_gen_mode
+	ip link del dev br0
+
+	ip link set dev $swp1 down
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	vrf_prepare
+	forwarding_enable
+
+	h1_create
+	h2_create
+	h3_create
+
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+
+	h3_destroy
+	h2_destroy
+	h1_destroy
+
+	forwarding_restore
+	vrf_cleanup
+}
+
+bridge_rif_add()
+{
+	rifs_occ_t0=$(devlink_resource_occ_get rifs)
+	__addr_add_del br0 add 192.0.2.2/28
+	rifs_occ_t1=$(devlink_resource_occ_get rifs)
+
+	expected_rifs=$((rifs_occ_t0 + 1))
+
+	[[ $expected_rifs -eq $rifs_occ_t1 ]]
+	check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used"
+
+	sleep 1
+}
+
+bridge_rif_del()
+{
+	__addr_add_del br0 del 192.0.2.2/28
+}
+
+port_vid_map_rif()
+{
+	RET=0
+
+	# First add {port, VID}->FID for $swp1.10, then add a RIF and verify
+	# that packets can be routed via the existing mapping.
+	vlan_create $swp1 10
+	ip link set dev $swp1.10 master br0
+	bridge_rif_add
+
+	# The hardware matches on the first ethertype which is not VLAN,
+	# so the protocol should be IP.
+	tc filter add dev $swp3 egress protocol ip pref 1 handle 101 \
+		flower skip_sw dst_ip 192.0.2.18 action pass
+
+	ping_do $h1.10 192.0.2.18
+	check_err $? "Ping failed"
+
+	tc_check_at_least_x_packets "dev $swp3 egress" 101 10
+	check_err $? "Packets were not routed in hardware"
+
+	log_test "Add RIF for existing {port, VID}->FID mapping"
+
+	tc filter del dev $swp3 egress
+
+	bridge_rif_del
+	ip link set dev $swp1.10 nomaster
+	vlan_destroy $swp1 10
+}
+
+rif_port_vid_map()
+{
+	RET=0
+
+	# First add an address to the bridge, which will create a RIF on top of
+	# it, then add a new {port, VID}->FID mapping and verify that packets
+	# can be routed via the new mapping.
+	bridge_rif_add
+	vlan_create $swp1 10
+	ip link set dev $swp1.10 master br0
+
+	# The hardware matches on the first ethertype which is not VLAN,
+	# so the protocol should be IP.
+	tc filter add dev $swp3 egress protocol ip pref 1 handle 101 \
+		flower skip_sw dst_ip 192.0.2.18 action pass
+
+	ping_do $h1.10 192.0.2.18
+	check_err $? "Ping failed"
+
+	tc_check_at_least_x_packets "dev $swp3 egress" 101 10
+	check_err $? "Packets were not routed in hardware"
+
+	log_test "Add {port, VID}->FID mapping for FID with a RIF"
+
+	tc filter del dev $swp3 egress
+
+	ip link set dev $swp1.10 nomaster
+	vlan_destroy $swp1 10
+	bridge_rif_del
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
-- 
cgit v1.2.3


From 3a5ddc886847d4cd84bd2ce7bbbfdb3fd5845678 Mon Sep 17 00:00:00 2001
From: Amit Cohen <amcohen@nvidia.com>
Date: Wed, 17 Aug 2022 17:28:26 +0200
Subject: selftests: mlxsw: Add ingress RIF configuration test for 802.1Q
 bridge

Before layer 2 forwarding, the device classifies an incoming packet to a
FID. After classification, the FID is known, but also all the attributes of
the FID, such as the router interface (RIF) via which a packet that needs
to be routed will ingress the router block.

For VLAN-aware bridges (802.1Q), the FID classification is done according
to VID. When a RIF is added on top of a FID, the existing VID->FID mapping
should be updated by the software with the new RIF.

We never map multiple VLANs to the same FID using VID->FID, so we cannot
create VID->FID for FID which already has a RIF using 802.1Q. Anyway,
verify that packets can be routed via port which is added after the FID
already has a RIF.

Add a test to verify that packets can be routed after VID->FID
classification, regardless of the order of the configuration.

 # ./ingress_rif_conf_1q.sh
 TEST: Add RIF for existing VID->FID mapping                         [ OK ]
 TEST: Add port to VID->FID mapping for FID with a RIF               [ OK ]

Signed-off-by: Amit Cohen <amcohen@nvidia.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../drivers/net/mlxsw/ingress_rif_conf_1q.sh       | 264 +++++++++++++++++++++
 1 file changed, 264 insertions(+)
 create mode 100755 tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_1q.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_1q.sh b/tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_1q.sh
new file mode 100755
index 000000000000..577293bab88b
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_1q.sh
@@ -0,0 +1,264 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test routing over bridge and verify that the order of configuration does not
+# impact switch behavior. Verify that RIF is added correctly for existing
+# mapping and that packets can be routed via port which is added after the FID
+# already has a RIF.
+
+# +-------------------+                   +--------------------+
+# | H1                |                   | H2                 |
+# |                   |                   |                    |
+# |         $h1.10 +  |                   |  + $h2.10          |
+# |   192.0.2.1/28 |  |                   |  | 192.0.2.3/28    |
+# |                |  |                   |  |                 |
+# |            $h1 +  |                   |  + $h2             |
+# +----------------|--+                   +--|-----------------+
+#                  |                         |
+# +----------------|-------------------------|-----------------+
+# | SW             |                         |                 |
+# | +--------------|-------------------------|---------------+ |
+# | |        $swp1 +                         + $swp2         | |
+# | |                                                        | |
+# | |                           br0                          | |
+# | +--------------------------------------------------------+ |
+# |                              |                             |
+# |                           br0.10                           |
+# |                        192.0.2.2/28                        |
+# |                                                            |
+# |                                                            |
+# |          $swp3 +                                           |
+# |  192.0.2.17/28 |                                           |
+# +----------------|-------------------------------------------+
+#                  |
+# +----------------|--+
+# |            $h3 +  |
+# |  192.0.2.18/28    |
+# |                   |
+# | H3                |
+# +-------------------+
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	vid_map_rif
+	rif_vid_map
+"
+
+NUM_NETIFS=6
+source $lib_dir/lib.sh
+source $lib_dir/tc_common.sh
+source $lib_dir/devlink_lib.sh
+
+h1_create()
+{
+	simple_if_init $h1
+	vlan_create $h1 10 v$h1 192.0.2.1/28
+
+	ip route add 192.0.2.16/28 vrf v$h1 nexthop via 192.0.2.2
+}
+
+h1_destroy()
+{
+	ip route del 192.0.2.16/28 vrf v$h1 nexthop via 192.0.2.2
+
+	vlan_destroy $h1 10
+	simple_if_fini $h1
+}
+
+h2_create()
+{
+	simple_if_init $h2
+	vlan_create $h2 10 v$h2 192.0.2.3/28
+}
+
+h2_destroy()
+{
+	vlan_destroy $h2 10
+	simple_if_fini $h2
+}
+
+h3_create()
+{
+	simple_if_init $h3 192.0.2.18/28
+	ip route add 192.0.2.0/28 vrf v$h3 nexthop via 192.0.2.17
+}
+
+h3_destroy()
+{
+	ip route del 192.0.2.0/28 vrf v$h3 nexthop via 192.0.2.17
+	simple_if_fini $h3 192.0.2.18/28
+}
+
+switch_create()
+{
+	ip link set dev $swp1 up
+
+	ip link add dev br0 type bridge vlan_filtering 1 mcast_snooping 0
+
+	# By default, a link-local address is generated when netdevice becomes
+	# up. Adding an address to the bridge will cause creating a RIF for it.
+	# Prevent generating link-local address to be able to control when the
+	# RIF is added.
+	sysctl_set net.ipv6.conf.br0.addr_gen_mode 1
+	ip link set dev br0 up
+
+	ip link set dev $swp2 up
+	ip link set dev $swp2 master br0
+	bridge vlan add vid 10 dev $swp2
+
+	ip link set dev $swp3 up
+	__addr_add_del $swp3 add 192.0.2.17/28
+	tc qdisc add dev $swp3 clsact
+
+	# Replace neighbor to avoid 1 packet which is forwarded in software due
+	# to "unresolved neigh".
+	ip neigh replace dev $swp3 192.0.2.18 lladdr $(mac_get $h3)
+}
+
+switch_destroy()
+{
+	tc qdisc del dev $swp3 clsact
+	__addr_add_del $swp3 del 192.0.2.17/28
+	ip link set dev $swp3 down
+
+	bridge vlan del vid 10 dev $swp2
+	ip link set dev $swp2 nomaster
+	ip link set dev $swp2 down
+
+	ip link set dev br0 down
+	sysctl_restore net.ipv6.conf.br0.addr_gen_mode
+	ip link del dev br0
+
+	ip link set dev $swp1 down
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	vrf_prepare
+	forwarding_enable
+
+	h1_create
+	h2_create
+	h3_create
+
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+
+	h3_destroy
+	h2_destroy
+	h1_destroy
+
+	forwarding_restore
+	vrf_cleanup
+}
+
+bridge_rif_add()
+{
+	rifs_occ_t0=$(devlink_resource_occ_get rifs)
+	vlan_create br0 10 "" 192.0.2.2/28
+	rifs_occ_t1=$(devlink_resource_occ_get rifs)
+
+	expected_rifs=$((rifs_occ_t0 + 1))
+
+	[[ $expected_rifs -eq $rifs_occ_t1 ]]
+	check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used"
+
+	sleep 1
+}
+
+bridge_rif_del()
+{
+	vlan_destroy br0 10
+}
+
+vid_map_rif()
+{
+	RET=0
+
+	# First add VID->FID for vlan 10, then add a RIF and verify that
+	# packets can be routed via the existing mapping.
+	bridge vlan add vid 10 dev br0 self
+	ip link set dev $swp1 master br0
+	bridge vlan add vid 10 dev $swp1
+
+	bridge_rif_add
+
+	tc filter add dev $swp3 egress protocol ip pref 1 handle 101 \
+		flower skip_sw dst_ip 192.0.2.18 action pass
+
+	ping_do $h1.10 192.0.2.18
+	check_err $? "Ping failed"
+
+	tc_check_at_least_x_packets "dev $swp3 egress" 101 10
+	check_err $? "Packets were not routed in hardware"
+
+	log_test "Add RIF for existing VID->FID mapping"
+
+	tc filter del dev $swp3 egress
+
+	bridge_rif_del
+
+	bridge vlan del vid 10 dev $swp1
+	ip link set dev $swp1 nomaster
+	bridge vlan del vid 10 dev br0 self
+}
+
+rif_vid_map()
+{
+	RET=0
+
+	# Using 802.1Q, there is only one VID->FID map for each VID. That means
+	# that we cannot really check adding a new map for existing FID with a
+	# RIF. Verify that packets can be routed via port which is added after
+	# the FID already has a RIF, although in practice there is no new
+	# mapping in the hardware.
+	bridge vlan add vid 10 dev br0 self
+	bridge_rif_add
+
+	ip link set dev $swp1 master br0
+	bridge vlan add vid 10 dev $swp1
+
+	tc filter add dev $swp3 egress protocol ip pref 1 handle 101 \
+		flower skip_sw dst_ip 192.0.2.18 action pass
+
+	ping_do $h1.10 192.0.2.18
+	check_err $? "Ping failed"
+
+	tc_check_at_least_x_packets "dev $swp3 egress" 101 10
+	check_err $? "Packets were not routed in hardware"
+
+	log_test "Add port to VID->FID mapping for FID with a RIF"
+
+	tc filter del dev $swp3 egress
+
+	bridge vlan del vid 10 dev $swp1
+	ip link set dev $swp1 nomaster
+
+	bridge_rif_del
+	bridge vlan del vid 10 dev br0 self
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
-- 
cgit v1.2.3


From cbeb6e1195d1f293a11534f5eeb7455af3c8f9d5 Mon Sep 17 00:00:00 2001
From: Amit Cohen <amcohen@nvidia.com>
Date: Wed, 17 Aug 2022 17:28:27 +0200
Subject: selftests: mlxsw: Add ingress RIF configuration test for VXLAN

Before layer 2 forwarding, the device classifies an incoming packet to a
FID. After classification, the FID is known, but also all the attributes of
the FID, such as the router interface (RIF) via which a packet that needs
to be routed will ingress the router block.

For VXLAN decapsulation, the FID classification is done according to the
VNI. When a RIF is added on top of a FID, the existing VNI->FID mapping
should be updated by the software with the new RIF. In addition, when a new
mapping is added for FID which already has a RIF, the correct RIF should
be used for it.

Add a test to verify that packets can be routed after decapsulation which
is done after VNI->FID classification, regardless of the order of the
configuration.

 # ./ingress_rif_conf_vxlan.sh
 TEST: Add RIF for existing VNI->FID mapping                         [ OK ]
 TEST: Add VNI->FID mapping for FID with a RIF                       [ OK ]

Signed-off-by: Amit Cohen <amcohen@nvidia.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../drivers/net/mlxsw/ingress_rif_conf_vxlan.sh    | 311 +++++++++++++++++++++
 1 file changed, 311 insertions(+)
 create mode 100755 tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_vxlan.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_vxlan.sh b/tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_vxlan.sh
new file mode 100755
index 000000000000..90450216a10d
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_vxlan.sh
@@ -0,0 +1,311 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test routing after VXLAN decapsulation and verify that the order of
+# configuration does not impact switch behavior. Verify that RIF is added
+# correctly for existing mapping and that new mapping uses the correct RIF.
+
+# +---------------------------+
+# |                        H1 |
+# |    + $h1                  |
+# |    | 192.0.2.1/28         |
+# +----|----------------------+
+#      |
+# +----|----------------------------------------------------------------------+
+# | SW |                                                                      |
+# | +--|--------------------------------------------------------------------+ |
+# | |  + $swp1                         br1                                  | |
+# | |     vid 10 pvid untagged                                              | |
+# | |                                                                       | |
+# | |                                                                       | |
+# | |                                            + vx4001                   | |
+# | |                                              local 192.0.2.17         | |
+# | |                                              remote 192.0.2.18        | |
+# | |                                              id 104001                | |
+# | |                                              dstport $VXPORT          | |
+# | |                                              vid 4001 pvid untagged   | |
+# | |                                                                       | |
+# | +----------------------------------+------------------------------------+ |
+# |                                    |                                      |
+# | +----------------------------------|------------------------------------+ |
+# | |                                  |                                    | |
+# | |  +-------------------------------+---------------------------------+  | |
+# | |  |                                                                 |  | |
+# | |  + vlan10                                                 vlan4001 +  | |
+# | |    192.0.2.2/28                                                       | |
+# | |                                                                       | |
+# | |                               vrf-green                               | |
+# | +-----------------------------------------------------------------------+ |
+# |                                                                           |
+# |    + $rp1                                       +lo                       |
+# |    | 198.51.100.1/24                             192.0.2.17/32            |
+# +----|----------------------------------------------------------------------+
+#      |
+# +----|--------------------------------------------------------+
+# |    |                                             v$rp2      |
+# |    + $rp2                                                   |
+# |      198.51.100.2/24                                        |
+# |                                                             |
+# +-------------------------------------------------------------+
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	vni_fid_map_rif
+	rif_vni_fid_map
+"
+
+NUM_NETIFS=4
+source $lib_dir/lib.sh
+source $lib_dir/tc_common.sh
+source $lib_dir/devlink_lib.sh
+
+: ${VXPORT:=4789}
+export VXPORT
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/28
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 192.0.2.1/28
+}
+
+switch_create()
+{
+	ip link add name br1 type bridge vlan_filtering 1 vlan_default_pvid 0 \
+		mcast_snooping 0
+	# Make sure the bridge uses the MAC address of the local port and not
+	# that of the VxLAN's device.
+	ip link set dev br1 address $(mac_get $swp1)
+	ip link set dev br1 up
+
+	ip link set dev $rp1 up
+	ip address add dev $rp1 198.51.100.1/24
+
+	ip link set dev $swp1 master br1
+	ip link set dev $swp1 up
+	bridge vlan add vid 10 dev $swp1 pvid untagged
+
+	tc qdisc add dev $swp1 clsact
+
+	ip link add name vx4001 type vxlan id 104001 \
+		local 192.0.2.17 dstport $VXPORT \
+		nolearning noudpcsum tos inherit ttl 100
+	ip link set dev vx4001 up
+
+	ip link set dev vx4001 master br1
+
+	ip address add 192.0.2.17/32 dev lo
+
+	# Create SVIs.
+	vrf_create "vrf-green"
+	ip link set dev vrf-green up
+
+	ip link add link br1 name vlan10 up master vrf-green type vlan id 10
+
+	# Replace neighbor to avoid 1 packet which is forwarded in software due
+	# to "unresolved neigh".
+	ip neigh replace dev vlan10 192.0.2.1 lladdr $(mac_get $h1)
+
+	ip address add 192.0.2.2/28 dev vlan10
+
+	bridge vlan add vid 10 dev br1 self
+	bridge vlan add vid 4001 dev br1 self
+
+	sysctl_set net.ipv4.conf.all.rp_filter 0
+}
+
+switch_destroy()
+{
+	sysctl_restore net.ipv4.conf.all.rp_filter
+
+	bridge vlan del vid 4001 dev br1 self
+	bridge vlan del vid 10 dev br1 self
+
+	ip link del dev vlan10
+
+	vrf_destroy "vrf-green"
+
+	ip address del 192.0.2.17/32 dev lo
+
+	tc qdisc del dev $swp1 clsact
+
+	bridge vlan del vid 10 dev $swp1
+	ip link set dev $swp1 down
+	ip link set dev $swp1 nomaster
+
+	ip link set dev vx4001 nomaster
+
+	ip link set dev vx4001 down
+	ip link del dev vx4001
+
+	ip address del dev $rp1 198.51.100.1/24
+	ip link set dev $rp1 down
+
+	ip link set dev br1 down
+	ip link del dev br1
+}
+
+vrp2_create()
+{
+	simple_if_init $rp2 198.51.100.2/24
+
+	ip route add 192.0.2.17/32 vrf v$rp2 nexthop via 198.51.100.1
+}
+
+vrp2_destroy()
+{
+	ip route del 192.0.2.17/32 vrf v$rp2 nexthop via 198.51.100.1
+
+	simple_if_fini $rp2 198.51.100.2/24
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	rp1=${NETIFS[p3]}
+	rp2=${NETIFS[p4]}
+
+	vrf_prepare
+	forwarding_enable
+
+	h1_create
+	switch_create
+
+	vrp2_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	vrp2_destroy
+
+	switch_destroy
+	h1_destroy
+
+	forwarding_restore
+	vrf_cleanup
+}
+
+payload_get()
+{
+	local dest_mac=$(mac_get vlan4001)
+	local src_mac=$(mac_get $rp1)
+
+	p=$(:
+		)"08:"$(                      : VXLAN flags
+		)"00:00:00:"$(                : VXLAN reserved
+		)"01:96:41:"$(                : VXLAN VNI : 104001
+		)"00:"$(                      : VXLAN reserved
+		)"$dest_mac:"$(               : ETH daddr
+		)"$src_mac:"$(                : ETH saddr
+		)"08:00:"$(                   : ETH type
+		)"45:"$(                      : IP version + IHL
+		)"00:"$(                      : IP TOS
+		)"00:54:"$(                   : IP total length
+		)"3f:49:"$(                   : IP identification
+		)"00:00:"$(                   : IP flags + frag off
+		)"3f:"$(                      : IP TTL
+		)"01:"$(                      : IP proto
+		)"50:21:"$(                   : IP header csum
+		)"c6:33:64:0a:"$(             : IP saddr: 198.51.100.10
+		)"c0:00:02:01:"$(             : IP daddr: 192.0.2.1
+	)
+	echo $p
+}
+
+vlan_rif_add()
+{
+	rifs_occ_t0=$(devlink_resource_occ_get rifs)
+
+	ip link add link br1 name vlan4001 up master vrf-green \
+		type vlan id 4001
+
+	rifs_occ_t1=$(devlink_resource_occ_get rifs)
+	expected_rifs=$((rifs_occ_t0 + 1))
+
+	[[ $expected_rifs -eq $rifs_occ_t1 ]]
+	check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used"
+}
+
+vlan_rif_del()
+{
+	ip link del dev vlan4001
+}
+
+vni_fid_map_rif()
+{
+	local rp1_mac=$(mac_get $rp1)
+
+	RET=0
+
+	# First add VNI->FID mapping to the FID of VLAN 4001
+	bridge vlan add vid 4001 dev vx4001 pvid untagged
+
+	# Add a RIF to the FID with VNI->FID mapping
+	vlan_rif_add
+
+	tc filter add dev $swp1 egress protocol ip pref 1 handle 101 \
+		flower skip_sw dst_ip 192.0.2.1 action pass
+
+	payload=$(payload_get)
+	ip vrf exec v$rp2 $MZ $rp2 -c 10 -d 1msec -b $rp1_mac \
+		-B 192.0.2.17 -A 192.0.2.18 \
+		-t udp sp=12345,dp=$VXPORT,p=$payload -q
+
+	tc_check_at_least_x_packets "dev $swp1 egress" 101 10
+	check_err $? "Packets were not routed in hardware"
+
+	log_test "Add RIF for existing VNI->FID mapping"
+
+	tc filter del dev $swp1 egress
+
+	bridge vlan del vid 4001 dev vx4001 pvid untagged
+	vlan_rif_del
+}
+
+rif_vni_fid_map()
+{
+	local rp1_mac=$(mac_get $rp1)
+
+	RET=0
+
+	# First add a RIF to the FID of VLAN 4001
+	vlan_rif_add
+
+	# Add VNI->FID mapping to FID with a RIF
+	bridge vlan add vid 4001 dev vx4001 pvid untagged
+
+	tc filter add dev $swp1 egress protocol ip pref 1 handle 101 \
+		flower skip_sw dst_ip 192.0.2.1 action pass
+
+	payload=$(payload_get)
+	ip vrf exec v$rp2 $MZ $rp2 -c 10 -d 1msec -b $rp1_mac \
+		-B 192.0.2.17 -A 192.0.2.18 \
+		-t udp sp=12345,dp=$VXPORT,p=$payload -q
+
+	tc_check_at_least_x_packets "dev $swp1 egress" 101 10
+	check_err $? "Packets were not routed in hardware"
+
+	log_test "Add VNI->FID mapping for FID with a RIF"
+
+	tc filter del dev $swp1 egress
+
+	bridge vlan del vid 4001 dev vx4001 pvid untagged
+	vlan_rif_del
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
-- 
cgit v1.2.3


From 1623d5719fdff46620eb55c5d4f0cf8af1afcdb4 Mon Sep 17 00:00:00 2001
From: Amit Cohen <amcohen@nvidia.com>
Date: Wed, 17 Aug 2022 17:28:28 +0200
Subject: selftests: mlxsw: Add egress VID classification test

After routing, the device always consults a table that determines the
packet's egress VID based on {egress RIF, egress local port}. In the
unified bridge model, it is up to software to maintain this table via
REIV register.

The table needs to be updated in the following flows:
1. When a RIF is set on a FID, for each FID's {Port, VID} mapping, a new
   {RIF, Port}->VID mapping should be created.
2. When a {Port, VID} is mapped to a FID and the FID already has a RIF,
   a new {RIF, Port}->VID mapping should be created.

Add a test to verify that packets get the correct VID after routing,
regardless of the order of the configuration.

 # ./egress_vid_classification.sh
 TEST: Add RIF for existing {port, VID}->FID mapping                 [ OK ]
 TEST: Add {port, VID}->FID mapping for FID with a RIF               [ OK ]

Signed-off-by: Amit Cohen <amcohen@nvidia.com>
Reviewed-by: Ido Schimmel <idosch@nvidia.com>
Signed-off-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../drivers/net/mlxsw/egress_vid_classification.sh | 273 +++++++++++++++++++++
 1 file changed, 273 insertions(+)
 create mode 100755 tools/testing/selftests/drivers/net/mlxsw/egress_vid_classification.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/drivers/net/mlxsw/egress_vid_classification.sh b/tools/testing/selftests/drivers/net/mlxsw/egress_vid_classification.sh
new file mode 100755
index 000000000000..0cf9e47e3209
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/egress_vid_classification.sh
@@ -0,0 +1,273 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test VLAN classification after routing and verify that the order of
+# configuration does not impact switch behavior. Verify that {RIF, Port}->VID
+# mapping is added correctly for existing {Port, VID}->FID mapping and that
+# {RIF, Port}->VID mapping is added correctly for new {Port, VID}->FID mapping.
+
+# +-------------------+                   +--------------------+
+# | H1                |                   | H2                 |
+# |                   |                   |                    |
+# |         $h1.10 +  |                   |  + $h2.10          |
+# |   192.0.2.1/28 |  |                   |  | 192.0.2.3/28    |
+# |                |  |                   |  |                 |
+# |            $h1 +  |                   |  + $h2             |
+# +----------------|--+                   +--|-----------------+
+#                  |                         |
+# +----------------|-------------------------|-----------------+
+# | SW             |                         |                 |
+# | +--------------|-------------------------|---------------+ |
+# | |        $swp1 +                         + $swp2         | |
+# | |              |                         |               | |
+# | |     $swp1.10 +                         + $swp2.10      | |
+# | |                                                        | |
+# | |                           br0                          | |
+# | |                       192.0.2.2/28                     | |
+# | +--------------------------------------------------------+ |
+# |                                                            |
+# |      $swp3.20 +                                            |
+# | 192.0.2.17/28 |                                            |
+# |               |                                            |
+# |         $swp3 +                                            |
+# +---------------|--------------------------------------------+
+#                 |
+# +---------------|--+
+# |           $h3 +  |
+# |               |  |
+# |        $h3.20 +  |
+# | 192.0.2.18/28    |
+# |                  |
+# | H3               |
+# +------------------+
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	port_vid_map_rif
+	rif_port_vid_map
+"
+
+NUM_NETIFS=6
+source $lib_dir/lib.sh
+source $lib_dir/tc_common.sh
+source $lib_dir/devlink_lib.sh
+
+h1_create()
+{
+	simple_if_init $h1
+	vlan_create $h1 10 v$h1 192.0.2.1/28
+
+	ip route add 192.0.2.16/28 vrf v$h1 nexthop via 192.0.2.2
+}
+
+h1_destroy()
+{
+	ip route del 192.0.2.16/28 vrf v$h1 nexthop via 192.0.2.2
+
+	vlan_destroy $h1 10
+	simple_if_fini $h1
+}
+
+h2_create()
+{
+	simple_if_init $h2
+	vlan_create $h2 10 v$h2 192.0.2.3/28
+}
+
+h2_destroy()
+{
+	vlan_destroy $h2 10
+	simple_if_fini $h2
+}
+
+h3_create()
+{
+	simple_if_init $h3
+	vlan_create $h3 20 v$h3 192.0.2.18/28
+
+	ip route add 192.0.2.0/28 vrf v$h3 nexthop via 192.0.2.17
+}
+
+h3_destroy()
+{
+	ip route del 192.0.2.0/28 vrf v$h3 nexthop via 192.0.2.17
+
+	vlan_destroy $h3 20
+	simple_if_fini $h3
+}
+
+switch_create()
+{
+	ip link set dev $swp1 up
+	tc qdisc add dev $swp1 clsact
+
+	ip link add dev br0 type bridge mcast_snooping 0
+
+	# By default, a link-local address is generated when netdevice becomes
+	# up. Adding an address to the bridge will cause creating a RIF for it.
+	# Prevent generating link-local address to be able to control when the
+	# RIF is added.
+	sysctl_set net.ipv6.conf.br0.addr_gen_mode 1
+	ip link set dev br0 up
+
+	ip link set dev $swp2 up
+	vlan_create $swp2 10
+	ip link set dev $swp2.10 master br0
+
+	ip link set dev $swp3 up
+	vlan_create $swp3 20 "" 192.0.2.17/28
+
+	# Replace neighbor to avoid 1 packet which is forwarded in software due
+	# to "unresolved neigh".
+	ip neigh replace dev $swp3.20 192.0.2.18 lladdr $(mac_get $h3.20)
+}
+
+switch_destroy()
+{
+	vlan_destroy $swp3 20
+	ip link set dev $swp3 down
+
+	ip link set dev $swp2.10 nomaster
+	vlan_destroy $swp2 10
+	ip link set dev $swp2 down
+
+	ip link set dev br0 down
+	sysctl_restore net.ipv6.conf.br0.addr_gen_mode
+	ip link del dev br0
+
+	tc qdisc del dev $swp1 clsact
+	ip link set dev $swp1 down
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	vrf_prepare
+	forwarding_enable
+
+	h1_create
+	h2_create
+	h3_create
+
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+
+	h3_destroy
+	h2_destroy
+	h1_destroy
+
+	forwarding_restore
+	vrf_cleanup
+}
+
+bridge_rif_add()
+{
+	rifs_occ_t0=$(devlink_resource_occ_get rifs)
+	__addr_add_del br0 add 192.0.2.2/28
+	rifs_occ_t1=$(devlink_resource_occ_get rifs)
+
+	expected_rifs=$((rifs_occ_t0 + 1))
+
+	[[ $expected_rifs -eq $rifs_occ_t1 ]]
+	check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used"
+
+	sleep 1
+}
+
+bridge_rif_del()
+{
+	__addr_add_del br0 del 192.0.2.2/28
+}
+
+port_vid_map_rif()
+{
+	RET=0
+
+	# First add {port, VID}->FID for swp1.10, then add a RIF and verify that
+	# packets get the correct VID after routing.
+	vlan_create $swp1 10
+	ip link set dev $swp1.10 master br0
+	bridge_rif_add
+
+	# Replace neighbor to avoid 1 packet which is forwarded in software due
+	# to "unresolved neigh".
+	ip neigh replace dev br0 192.0.2.1 lladdr $(mac_get $h1.10)
+
+	# The hardware matches on the first ethertype which is not VLAN,
+	# so the protocol should be IP.
+	tc filter add dev $swp1 egress protocol ip pref 1 handle 101 \
+		flower skip_sw dst_ip 192.0.2.1 action pass
+
+	ping_do $h1.10 192.0.2.18
+	check_err $? "Ping failed"
+
+	tc_check_at_least_x_packets "dev $swp1 egress" 101 10
+	check_err $? "Packets were not routed in hardware"
+
+	log_test "Add RIF for existing {port, VID}->FID mapping"
+
+	tc filter del dev $swp1 egress
+
+	bridge_rif_del
+	ip link set dev $swp1.10 nomaster
+	vlan_destroy $swp1 10
+}
+
+rif_port_vid_map()
+{
+	RET=0
+
+	# First add an address to the bridge, which will create a RIF on top of
+	# it, then add a new {port, VID}->FID mapping and verify that packets
+	# get the correct VID after routing.
+	bridge_rif_add
+	vlan_create $swp1 10
+	ip link set dev $swp1.10 master br0
+
+	# Replace neighbor to avoid 1 packet which is forwarded in software due
+	# to "unresolved neigh".
+	ip neigh replace dev br0 192.0.2.1 lladdr $(mac_get $h1.10)
+
+	# The hardware matches on the first ethertype which is not VLAN,
+	# so the protocol should be IP.
+	tc filter add dev $swp1 egress protocol ip pref 1 handle 101 \
+		flower skip_sw dst_ip 192.0.2.1 action pass
+
+	ping_do $h1.10 192.0.2.18
+	check_err $? "Ping failed"
+
+	tc_check_at_least_x_packets "dev $swp1 egress" 101 10
+	check_err $? "Packets were not routed in hardware"
+
+	log_test "Add {port, VID}->FID mapping for FID with a RIF"
+
+	tc filter del dev $swp1 egress
+
+	ip link set dev $swp1.10 nomaster
+	vlan_destroy $swp1 10
+	bridge_rif_del
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
-- 
cgit v1.2.3


From e918cd231ee6f1dc969e71718ed11c71e98f5c4c Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.i.king@gmail.com>
Date: Wed, 17 Aug 2022 22:32:42 +0100
Subject: selftests/bpf: Fix spelling mistake.

There is a spelling mistake in an ASSERT_OK literal string. Fix it.

Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
Acked-by: Mykola Lysenko <mykolal@fb.com>
Link: https://lore.kernel.org/r/20220817213242.101277-1-colin.i.king@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/prog_tests/kfunc_call.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
index 351fafa006fb..eede7c304f86 100644
--- a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
+++ b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
@@ -109,7 +109,7 @@ static void test_destructive(void)
 {
 	__u64 save_caps = 0;
 
-	ASSERT_OK(test_destructive_open_and_load(), "succesful_load");
+	ASSERT_OK(test_destructive_open_and_load(), "successful_load");
 
 	if (!ASSERT_OK(cap_disable_effective(1ULL << CAP_SYS_BOOT, &save_caps), "drop_caps"))
 		return;
-- 
cgit v1.2.3


From b979f005d9b1ebdba565e85f5228dda6fe7a30e4 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Fri, 19 Aug 2022 12:21:55 -0700
Subject: selftest/bpf: Add setget_sockopt to DENYLIST.s390x

Trampoline is not supported in s390.

Fixes: 31123c0360e0 ("selftests/bpf: bpf_setsockopt tests")
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/r/20220819192155.91713-1-kafai@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/DENYLIST.s390x | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x
index 9d8de15e725e..a708c3dcc154 100644
--- a/tools/testing/selftests/bpf/DENYLIST.s390x
+++ b/tools/testing/selftests/bpf/DENYLIST.s390x
@@ -65,3 +65,4 @@ send_signal                              # intermittently fails to receive signa
 select_reuseport                         # intermittently fails on new s390x setup
 xdp_synproxy                             # JIT does not support calling kernel function                                (kfunc)
 unpriv_bpf_disabled                      # fentry
+setget_sockopt                           # attach unexpected error: -524                                               (trampoline)
-- 
cgit v1.2.3


From b690842d12fd6687c326663d69d5732de00c00f6 Mon Sep 17 00:00:00 2001
From: Matthias May <matthias.may@westermo.com>
Date: Wed, 17 Aug 2022 09:36:49 +0200
Subject: selftests/net: test l2 tunnel TOS/TTL inheriting

There are currently 3 ip tunnels that are capable of carrying
L2 traffic: gretap, vxlan and geneve.
They all are capable to inherit the TOS/TTL for the outer
IP-header from the inner frame.

Add a test that verifies that these fields are correctly inherited.

These tests failed before the following commits:
b09ab9c92e50 ("ip6_tunnel: allow to inherit from VLAN encapsulated IP")
3f8a8447fd0b ("ip6_gre: use actual protocol to select xmit")
41337f52b967 ("ip6_gre: set DSCP for non-IP")
7ae29fd1be43 ("ip_tunnel: allow to inherit from VLAN encapsulated IP")
7074732c8fae ("ip_tunnels: allow VXLAN/GENEVE to inherit TOS/TTL from VLAN")
ca2bb69514a8 ("geneve: do not use RT_TOS for IPv6 flowlabel")
b4ab94d6adaa ("geneve: fix TOS inheriting for ipv4")

Signed-off-by: Matthias May <matthias.may@westermo.com>
Link: https://lore.kernel.org/r/20220817073649.26117-1-matthias.may@westermo.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/Makefile              |   1 +
 tools/testing/selftests/net/l2_tos_ttl_inherit.sh | 390 ++++++++++++++++++++++
 2 files changed, 391 insertions(+)
 create mode 100755 tools/testing/selftests/net/l2_tos_ttl_inherit.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index c0ee2955fe54..11a288b67e2f 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -42,6 +42,7 @@ TEST_PROGS += arp_ndisc_evict_nocarrier.sh
 TEST_PROGS += ndisc_unsolicited_na_test.sh
 TEST_PROGS += arp_ndisc_untracked_subnets.sh
 TEST_PROGS += stress_reuseport_listen.sh
+TEST_PROGS := l2_tos_ttl_inherit.sh
 TEST_PROGS_EXTENDED := in_netns.sh setup_loopback.sh setup_veth.sh
 TEST_PROGS_EXTENDED += toeplitz_client.sh toeplitz.sh
 TEST_GEN_FILES =  socket nettest
diff --git a/tools/testing/selftests/net/l2_tos_ttl_inherit.sh b/tools/testing/selftests/net/l2_tos_ttl_inherit.sh
new file mode 100755
index 000000000000..dca1e6f777a8
--- /dev/null
+++ b/tools/testing/selftests/net/l2_tos_ttl_inherit.sh
@@ -0,0 +1,390 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+# Author: Matthias May <matthias.may@westermo.com>
+#
+# This script evaluates ip tunnels that are capable of carrying L2 traffic
+# if they inherit or set the inheritable fields.
+# Namely these tunnels are: 'gretap', 'vxlan' and 'geneve'.
+# Checked inheritable fields are: TOS and TTL.
+# The outer tunnel protocol of 'IPv4' or 'IPv6' is verified-
+# As payload frames of type 'IPv4', 'IPv6' and 'other'(ARP) are verified.
+# In addition this script also checks if forcing a specific field in the
+# outer header is working.
+
+if [ "$(id -u)" != "0" ]; then
+	echo "Please run as root."
+	exit 0
+fi
+if ! which tcpdump > /dev/null 2>&1; then
+	echo "No tcpdump found. Required for this test."
+	exit 0
+fi
+
+expected_tos="0x00"
+expected_ttl="0"
+failed=false
+
+get_random_tos() {
+	# Get a random hex tos value between 0x00 and 0xfc, a multiple of 4
+	echo "0x$(tr -dc '0-9a-f' < /dev/urandom | head -c 1)\
+$(tr -dc '048c' < /dev/urandom | head -c 1)"
+}
+get_random_ttl() {
+	# Get a random dec value between 0 and 255
+	printf "%d" "0x$(tr -dc '0-9a-f' < /dev/urandom | head -c 2)"
+}
+get_field() {
+	# Expects to get the 'head -n 1' of a captured frame by tcpdump.
+	# Parses this first line and returns the specified field.
+	local field="$1"
+	local input="$2"
+	local found=false
+	input="$(echo "$input" | tr -d '(),')"
+	for input_field in $input; do
+		if $found; then
+			echo "$input_field"
+			return
+		fi
+		# The next field that we iterate over is the looked for value
+		if [ "$input_field" = "$field" ]; then
+			found=true
+		fi
+	done
+	echo "0"
+}
+setup() {
+	local type="$1"
+	local outer="$2"
+	local inner="$3"
+	local tos_ttl="$4"
+	local vlan="$5"
+	local test_tos="0x00"
+	local test_ttl="0"
+	local ns="ip netns exec testing"
+
+	# We don't want a test-tos of 0x00,
+	# because this is the value that we get when no tos is set.
+	expected_tos="$(get_random_tos)"
+	while [ "$expected_tos" = "0x00" ]; do
+		expected_tos="$(get_random_tos)"
+	done
+	if [ "$tos_ttl" = "random" ]; then
+		test_tos="$expected_tos"
+		tos="fixed $test_tos"
+	elif [ "$tos_ttl" = "inherit" ]; then
+		test_tos="$tos_ttl"
+		tos="inherit $expected_tos"
+	fi
+
+	# We don't want a test-ttl of 64 or 0,
+	# because 64 is when no ttl is set and 0 is not a valid ttl.
+	expected_ttl="$(get_random_ttl)"
+	while [ "$expected_ttl" = "64" ] || [ "$expected_ttl" = "0" ]; do
+		expected_ttl="$(get_random_ttl)"
+	done
+
+	if [ "$tos_ttl" = "random" ]; then
+		test_ttl="$expected_ttl"
+		ttl="fixed $test_ttl"
+	elif [ "$tos_ttl" = "inherit" ]; then
+		test_ttl="$tos_ttl"
+		ttl="inherit $expected_ttl"
+	fi
+	printf "│%7s │%6s │%6s │%13s │%13s │%6s │" \
+	"$type" "$outer" "$inner" "$tos" "$ttl" "$vlan"
+
+	# Create 'testing' netns, veth pair and connect main ns with testing ns
+	ip netns add testing
+	ip link add type veth
+	ip link set veth1 netns testing
+	ip link set veth0 up
+	$ns ip link set veth1 up
+	ip addr flush dev veth0
+	$ns ip addr flush dev veth1
+
+	local local_addr1=""
+	local local_addr2=""
+	if [ "$type" = "gre" ] || [ "$type" = "vxlan" ]; then
+		if [ "$outer" = "4" ]; then
+			local_addr1="local 198.18.0.1"
+			local_addr2="local 198.18.0.2"
+		elif [ "$outer" = "6" ]; then
+			local_addr1="local fdd1:ced0:5d88:3fce::1"
+			local_addr2="local fdd1:ced0:5d88:3fce::2"
+		fi
+	fi
+	local vxlan=""
+	if [ "$type" = "vxlan" ]; then
+		vxlan="vni 100 dstport 4789"
+	fi
+	local geneve=""
+	if [ "$type" = "geneve" ]; then
+		geneve="vni 100"
+	fi
+	# Create tunnel and assign outer IPv4/IPv6 addresses
+	if [ "$outer" = "4" ]; then
+		if [ "$type" = "gre" ]; then
+			type="gretap"
+		fi
+		ip addr add 198.18.0.1/24 dev veth0
+		$ns ip addr add 198.18.0.2/24 dev veth1
+		ip link add name tep0 type $type $local_addr1 remote \
+		198.18.0.2 tos $test_tos ttl $test_ttl $vxlan $geneve
+		$ns ip link add name tep1 type $type $local_addr2 remote \
+		198.18.0.1 tos $test_tos ttl $test_ttl $vxlan $geneve
+	elif [ "$outer" = "6" ]; then
+		if [ "$type" = "gre" ]; then
+			type="ip6gretap"
+		fi
+		ip addr add fdd1:ced0:5d88:3fce::1/64 dev veth0
+		$ns ip addr add fdd1:ced0:5d88:3fce::2/64 dev veth1
+		ip link add name tep0 type $type $local_addr1 \
+		remote fdd1:ced0:5d88:3fce::2 tos $test_tos ttl $test_ttl \
+		$vxlan $geneve
+		$ns ip link add name tep1 type $type $local_addr2 \
+		remote fdd1:ced0:5d88:3fce::1 tos $test_tos ttl $test_ttl \
+		$vxlan $geneve
+	fi
+
+	# Bring L2-tunnel link up and create VLAN on top
+	ip link set tep0 up
+	$ns ip link set tep1 up
+	ip addr flush dev tep0
+	$ns ip addr flush dev tep1
+	local parent
+	if $vlan; then
+		parent="vlan99-"
+		ip link add link tep0 name ${parent}0 type vlan id 99
+		$ns ip link add link tep1 name ${parent}1 type vlan id 99
+		ip link set ${parent}0 up
+		$ns ip link set ${parent}1 up
+		ip addr flush dev ${parent}0
+		$ns ip addr flush dev ${parent}1
+	else
+		parent="tep"
+	fi
+
+	# Assign inner IPv4/IPv6 addresses
+	if [ "$inner" = "4" ] || [ "$inner" = "other" ]; then
+		ip addr add 198.19.0.1/24 brd + dev ${parent}0
+		$ns ip addr add 198.19.0.2/24 brd + dev ${parent}1
+	elif [ "$inner" = "6" ]; then
+		ip addr add fdd4:96cf:4eae:443b::1/64 dev ${parent}0
+		$ns ip addr add fdd4:96cf:4eae:443b::2/64 dev ${parent}1
+	fi
+}
+
+verify() {
+	local outer="$1"
+	local inner="$2"
+	local tos_ttl="$3"
+	local vlan="$4"
+
+	local ping_pid out captured_tos captured_ttl result
+
+	local ping_dst
+	if [ "$inner" = "4" ]; then
+		ping_dst="198.19.0.2"
+	elif [ "$inner" = "6" ]; then
+		ping_dst="fdd4:96cf:4eae:443b::2"
+	elif [ "$inner" = "other" ]; then
+		ping_dst="198.19.0.3" # Generates ARPs which are not IPv4/IPv6
+	fi
+	if [ "$tos_ttl" = "inherit" ]; then
+		ping -i 0.1 $ping_dst -Q "$expected_tos" -t "$expected_ttl" \
+		2>/dev/null 1>&2 & ping_pid="$!"
+	else
+		ping -i 0.1 $ping_dst 2>/dev/null 1>&2 & ping_pid="$!"
+	fi
+	local tunnel_type_offset tunnel_type_proto req_proto_offset req_offset
+	if [ "$type" = "gre" ]; then
+		tunnel_type_proto="0x2f"
+	elif [ "$type" = "vxlan" ] || [ "$type" = "geneve" ]; then
+		tunnel_type_proto="0x11"
+	fi
+	if [ "$outer" = "4" ]; then
+		tunnel_type_offset="9"
+		if [ "$inner" = "4" ]; then
+			req_proto_offset="47"
+			req_offset="58"
+			if [ "$type" = "vxlan" ] || [ "$type" = "geneve" ]; then
+				req_proto_offset="$((req_proto_offset + 12))"
+				req_offset="$((req_offset + 12))"
+			fi
+			if $vlan; then
+				req_proto_offset="$((req_proto_offset + 4))"
+				req_offset="$((req_offset + 4))"
+			fi
+			out="$(tcpdump --immediate-mode -p -c 1 -v -i veth0 -n \
+			ip[$tunnel_type_offset] = $tunnel_type_proto and \
+			ip[$req_proto_offset] = 0x01 and \
+			ip[$req_offset] = 0x08 2>/dev/null | head -n 1)"
+		elif [ "$inner" = "6" ]; then
+			req_proto_offset="44"
+			req_offset="78"
+			if [ "$type" = "vxlan" ] || [ "$type" = "geneve" ]; then
+				req_proto_offset="$((req_proto_offset + 12))"
+				req_offset="$((req_offset + 12))"
+			fi
+			if $vlan; then
+				req_proto_offset="$((req_proto_offset + 4))"
+				req_offset="$((req_offset + 4))"
+			fi
+			out="$(tcpdump --immediate-mode -p -c 1 -v -i veth0 -n \
+			ip[$tunnel_type_offset] = $tunnel_type_proto and \
+			ip[$req_proto_offset] = 0x3a and \
+			ip[$req_offset] = 0x80 2>/dev/null | head -n 1)"
+		elif [ "$inner" = "other" ]; then
+			req_proto_offset="36"
+			req_offset="45"
+			if [ "$type" = "vxlan" ] || [ "$type" = "geneve" ]; then
+				req_proto_offset="$((req_proto_offset + 12))"
+				req_offset="$((req_offset + 12))"
+			fi
+			if $vlan; then
+				req_proto_offset="$((req_proto_offset + 4))"
+				req_offset="$((req_offset + 4))"
+			fi
+			if [ "$tos_ttl" = "inherit" ]; then
+				expected_tos="0x00"
+				expected_ttl="64"
+			fi
+			out="$(tcpdump --immediate-mode -p -c 1 -v -i veth0 -n \
+			ip[$tunnel_type_offset] = $tunnel_type_proto and \
+			ip[$req_proto_offset] = 0x08 and \
+			ip[$((req_proto_offset + 1))] = 0x06 and \
+			ip[$req_offset] = 0x01 2>/dev/null | head -n 1)"
+		fi
+	elif [ "$outer" = "6" ]; then
+		if [ "$type" = "gre" ]; then
+			tunnel_type_offset="40"
+		elif [ "$type" = "vxlan" ] || [ "$type" = "geneve" ]; then
+			tunnel_type_offset="6"
+		fi
+		if [ "$inner" = "4" ]; then
+			local req_proto_offset="75"
+			local req_offset="86"
+			if [ "$type" = "vxlan" ] || [ "$type" = "geneve" ]; then
+				req_proto_offset="$((req_proto_offset + 4))"
+				req_offset="$((req_offset + 4))"
+			fi
+			if $vlan; then
+				req_proto_offset="$((req_proto_offset + 4))"
+				req_offset="$((req_offset + 4))"
+			fi
+			out="$(tcpdump --immediate-mode -p -c 1 -v -i veth0 -n \
+			ip6[$tunnel_type_offset] = $tunnel_type_proto and \
+			ip6[$req_proto_offset] = 0x01 and \
+			ip6[$req_offset] = 0x08 2>/dev/null | head -n 1)"
+		elif [ "$inner" = "6" ]; then
+			local req_proto_offset="72"
+			local req_offset="106"
+			if [ "$type" = "vxlan" ] || [ "$type" = "geneve" ]; then
+				req_proto_offset="$((req_proto_offset + 4))"
+				req_offset="$((req_offset + 4))"
+			fi
+			if $vlan; then
+				req_proto_offset="$((req_proto_offset + 4))"
+				req_offset="$((req_offset + 4))"
+			fi
+			out="$(tcpdump --immediate-mode -p -c 1 -v -i veth0 -n \
+			ip6[$tunnel_type_offset] = $tunnel_type_proto and \
+			ip6[$req_proto_offset] = 0x3a and \
+			ip6[$req_offset] = 0x80 2>/dev/null | head -n 1)"
+		elif [ "$inner" = "other" ]; then
+			local req_proto_offset="64"
+			local req_offset="73"
+			if [ "$type" = "vxlan" ] || [ "$type" = "geneve" ]; then
+				req_proto_offset="$((req_proto_offset + 4))"
+				req_offset="$((req_offset + 4))"
+			fi
+			if $vlan; then
+				req_proto_offset="$((req_proto_offset + 4))"
+				req_offset="$((req_offset + 4))"
+			fi
+			if [ "$tos_ttl" = "inherit" ]; then
+				expected_tos="0x00"
+				expected_ttl="64"
+			fi
+			out="$(tcpdump --immediate-mode -p -c 1 -v -i veth0 -n \
+			ip6[$tunnel_type_offset] = $tunnel_type_proto and \
+			ip6[$req_proto_offset] = 0x08 and \
+			ip6[$((req_proto_offset + 1))] = 0x06 and \
+			ip6[$req_offset] = 0x01 2>/dev/null | head -n 1)"
+		fi
+	fi
+	kill -9 $ping_pid
+	wait $ping_pid 2>/dev/null
+	result="FAIL"
+	if [ "$outer" = "4" ]; then
+		captured_ttl="$(get_field "ttl" "$out")"
+		captured_tos="$(printf "0x%02x" "$(get_field "tos" "$out")")"
+		if [ "$captured_tos" = "$expected_tos" ] &&
+		   [ "$captured_ttl" = "$expected_ttl" ]; then
+			result="OK"
+		fi
+	elif [ "$outer" = "6" ]; then
+		captured_ttl="$(get_field "hlim" "$out")"
+		captured_tos="$(printf "0x%02x" "$(get_field "class" "$out")")"
+		if [ "$captured_tos" = "$expected_tos" ] &&
+		   [ "$captured_ttl" = "$expected_ttl" ]; then
+			result="OK"
+		fi
+	fi
+
+	printf "%7s │\n" "$result"
+	if [ "$result" = "FAIL" ]; then
+		failed=true
+		if [ "$captured_tos" != "$expected_tos" ]; then
+			printf "│%43s%27s │\n" \
+			"Expected TOS value: $expected_tos" \
+			"Captured TOS value: $captured_tos"
+		fi
+		if [ "$captured_ttl" != "$expected_ttl" ]; then
+			printf "│%43s%27s │\n" \
+			"Expected TTL value: $expected_ttl" \
+			"Captured TTL value: $captured_ttl"
+		fi
+		printf "│%71s│\n" " "
+	fi
+}
+
+cleanup() {
+	ip link del veth0 2>/dev/null
+	ip netns del testing 2>/dev/null
+	ip link del tep0 2>/dev/null
+}
+
+printf "┌────────┬───────┬───────┬──────────────┬"
+printf "──────────────┬───────┬────────┐\n"
+for type in gre vxlan geneve; do
+	if ! $(modprobe "$type" 2>/dev/null); then
+		continue
+	fi
+	for outer in 4 6; do
+		printf "├────────┼───────┼───────┼──────────────┼"
+		printf "──────────────┼───────┼────────┤\n"
+		printf "│  Type  │ outer | inner │     tos      │"
+		printf "      ttl     │  vlan │ result │\n"
+		for inner in 4 6 other; do
+			printf "├────────┼───────┼───────┼──────────────┼"
+			printf "──────────────┼───────┼────────┤\n"
+			for tos_ttl in inherit random; do
+				for vlan in false true; do
+					setup "$type" "$outer" "$inner" \
+					"$tos_ttl" "$vlan"
+					verify "$outer" "$inner" "$tos_ttl" \
+					"$vlan"
+					cleanup
+				done
+			done
+		done
+	done
+done
+printf "└────────┴───────┴───────┴──────────────┴"
+printf "──────────────┴───────┴────────┘\n"
+
+if $failed; then
+	exit 1
+fi
-- 
cgit v1.2.3


From b1346338fbaefac1b796a50478f8e8070b54e9e4 Mon Sep 17 00:00:00 2001
From: Arseniy Krasnov <AVKrasnov@sberdevices.ru>
Date: Fri, 19 Aug 2022 05:43:50 +0000
Subject: vsock_test: POLLIN + SO_RCVLOWAT test

This adds test to check, that when poll() returns POLLIN, POLLRDNORM bits,
next read call won't block.

Signed-off-by: Arseniy Krasnov <AVKrasnov@sberdevices.ru>
Reviewed-by: Stefano Garzarella <sgarzare@redhat.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 tools/testing/vsock/vsock_test.c | 108 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 108 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c
index dc577461afc2..bb6d691cb30d 100644
--- a/tools/testing/vsock/vsock_test.c
+++ b/tools/testing/vsock/vsock_test.c
@@ -18,6 +18,7 @@
 #include <sys/socket.h>
 #include <time.h>
 #include <sys/mman.h>
+#include <poll.h>
 
 #include "timeout.h"
 #include "control.h"
@@ -596,6 +597,108 @@ static void test_seqpacket_invalid_rec_buffer_server(const struct test_opts *opt
 	close(fd);
 }
 
+#define RCVLOWAT_BUF_SIZE 128
+
+static void test_stream_poll_rcvlowat_server(const struct test_opts *opts)
+{
+	int fd;
+	int i;
+
+	fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL);
+	if (fd < 0) {
+		perror("accept");
+		exit(EXIT_FAILURE);
+	}
+
+	/* Send 1 byte. */
+	send_byte(fd, 1, 0);
+
+	control_writeln("SRVSENT");
+
+	/* Wait until client is ready to receive rest of data. */
+	control_expectln("CLNSENT");
+
+	for (i = 0; i < RCVLOWAT_BUF_SIZE - 1; i++)
+		send_byte(fd, 1, 0);
+
+	/* Keep socket in active state. */
+	control_expectln("POLLDONE");
+
+	close(fd);
+}
+
+static void test_stream_poll_rcvlowat_client(const struct test_opts *opts)
+{
+	unsigned long lowat_val = RCVLOWAT_BUF_SIZE;
+	char buf[RCVLOWAT_BUF_SIZE];
+	struct pollfd fds;
+	ssize_t read_res;
+	short poll_flags;
+	int fd;
+
+	fd = vsock_stream_connect(opts->peer_cid, 1234);
+	if (fd < 0) {
+		perror("connect");
+		exit(EXIT_FAILURE);
+	}
+
+	if (setsockopt(fd, SOL_SOCKET, SO_RCVLOWAT,
+		       &lowat_val, sizeof(lowat_val))) {
+		perror("setsockopt");
+		exit(EXIT_FAILURE);
+	}
+
+	control_expectln("SRVSENT");
+
+	/* At this point, server sent 1 byte. */
+	fds.fd = fd;
+	poll_flags = POLLIN | POLLRDNORM;
+	fds.events = poll_flags;
+
+	/* Try to wait for 1 sec. */
+	if (poll(&fds, 1, 1000) < 0) {
+		perror("poll");
+		exit(EXIT_FAILURE);
+	}
+
+	/* poll() must return nothing. */
+	if (fds.revents) {
+		fprintf(stderr, "Unexpected poll result %hx\n",
+			fds.revents);
+		exit(EXIT_FAILURE);
+	}
+
+	/* Tell server to send rest of data. */
+	control_writeln("CLNSENT");
+
+	/* Poll for data. */
+	if (poll(&fds, 1, 10000) < 0) {
+		perror("poll");
+		exit(EXIT_FAILURE);
+	}
+
+	/* Only these two bits are expected. */
+	if (fds.revents != poll_flags) {
+		fprintf(stderr, "Unexpected poll result %hx\n",
+			fds.revents);
+		exit(EXIT_FAILURE);
+	}
+
+	/* Use MSG_DONTWAIT, if call is going to wait, EAGAIN
+	 * will be returned.
+	 */
+	read_res = recv(fd, buf, sizeof(buf), MSG_DONTWAIT);
+	if (read_res != RCVLOWAT_BUF_SIZE) {
+		fprintf(stderr, "Unexpected recv result %zi\n",
+			read_res);
+		exit(EXIT_FAILURE);
+	}
+
+	control_writeln("POLLDONE");
+
+	close(fd);
+}
+
 static struct test_case test_cases[] = {
 	{
 		.name = "SOCK_STREAM connection reset",
@@ -646,6 +749,11 @@ static struct test_case test_cases[] = {
 		.run_client = test_seqpacket_invalid_rec_buffer_client,
 		.run_server = test_seqpacket_invalid_rec_buffer_server,
 	},
+	{
+		.name = "SOCK_STREAM poll() + SO_RCVLOWAT",
+		.run_client = test_stream_poll_rcvlowat_client,
+		.run_server = test_stream_poll_rcvlowat_server,
+	},
 	{},
 };
 
-- 
cgit v1.2.3


From 91350fe152930c0d61a362af68272526490efea5 Mon Sep 17 00:00:00 2001
From: Shmulik Ladkani <shmulik.ladkani@gmail.com>
Date: Sun, 21 Aug 2022 14:35:17 +0300
Subject: bpf, flow_dissector: Introduce BPF_FLOW_DISSECTOR_CONTINUE retcode
 for bpf progs

Currently, attaching BPF_PROG_TYPE_FLOW_DISSECTOR programs completely
replaces the flow-dissector logic with custom dissection logic. This
forces implementors to write programs that handle dissection for any
flows expected in the namespace.

It makes sense for flow-dissector BPF programs to just augment the
dissector with custom logic (e.g. dissecting certain flows or custom
protocols), while enjoying the broad capabilities of the standard
dissector for any other traffic.

Introduce BPF_FLOW_DISSECTOR_CONTINUE retcode. Flow-dissector BPF
programs may return this to indicate no dissection was made, and
fallback to the standard dissector is requested.

Signed-off-by: Shmulik Ladkani <shmulik.ladkani@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Stanislav Fomichev <sdf@google.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20220821113519.116765-3-shmulik.ladkani@gmail.com
---
 include/uapi/linux/bpf.h       | 5 +++++
 net/core/flow_dissector.c      | 3 +++
 tools/include/uapi/linux/bpf.h | 5 +++++
 3 files changed, 13 insertions(+)

(limited to 'tools')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 934a2a8beb87..7f87012b012e 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5861,6 +5861,11 @@ enum bpf_ret_code {
 	 *    represented by BPF_REDIRECT above).
 	 */
 	BPF_LWT_REROUTE = 128,
+	/* BPF_FLOW_DISSECTOR_CONTINUE: used by BPF_PROG_TYPE_FLOW_DISSECTOR
+	 *   to indicate that no custom dissection was performed, and
+	 *   fallback to standard dissector is requested.
+	 */
+	BPF_FLOW_DISSECTOR_CONTINUE = 129,
 };
 
 struct bpf_sock {
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index a01817fb4ef4..990429c69ccd 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -1022,11 +1022,14 @@ bool __skb_flow_dissect(const struct net *net,
 			prog = READ_ONCE(run_array->items[0].prog);
 			result = bpf_flow_dissect(prog, &ctx, n_proto, nhoff,
 						  hlen, flags);
+			if (result == BPF_FLOW_DISSECTOR_CONTINUE)
+				goto dissect_continue;
 			__skb_flow_bpf_to_target(&flow_keys, flow_dissector,
 						 target_container);
 			rcu_read_unlock();
 			return result == BPF_OK;
 		}
+dissect_continue:
 		rcu_read_unlock();
 	}
 
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 1d6085e15fc8..f38814fbb618 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -5861,6 +5861,11 @@ enum bpf_ret_code {
 	 *    represented by BPF_REDIRECT above).
 	 */
 	BPF_LWT_REROUTE = 128,
+	/* BPF_FLOW_DISSECTOR_CONTINUE: used by BPF_PROG_TYPE_FLOW_DISSECTOR
+	 *   to indicate that no custom dissection was performed, and
+	 *   fallback to standard dissector is requested.
+	 */
+	BPF_FLOW_DISSECTOR_CONTINUE = 129,
 };
 
 struct bpf_sock {
-- 
cgit v1.2.3


From 5deedfbee84278da3b76fb7176dc3742f56eb370 Mon Sep 17 00:00:00 2001
From: Shmulik Ladkani <shmulik.ladkani@gmail.com>
Date: Sun, 21 Aug 2022 14:35:18 +0300
Subject: bpf, test_run: Propagate bpf_flow_dissect's retval to user's
 bpf_attr.test.retval

Formerly, a boolean denoting whether bpf_flow_dissect returned BPF_OK
was set into 'bpf_attr.test.retval'.

Augment this, so users can check the actual return code of the dissector
program under test.

Existing prog_tests/flow_dissector*.c tests were correspondingly changed
to check against each test's expected retval.

Also, tests' resulting 'flow_keys' are verified only in case the expected
retval is BPF_OK. This allows adding new tests that expect non BPF_OK.

Signed-off-by: Shmulik Ladkani <shmulik.ladkani@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Stanislav Fomichev <sdf@google.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20220821113519.116765-4-shmulik.ladkani@gmail.com
---
 net/bpf/test_run.c                                 |  2 +-
 .../selftests/bpf/prog_tests/flow_dissector.c      | 23 +++++++++++++++++++++-
 .../bpf/prog_tests/flow_dissector_load_bytes.c     |  2 +-
 3 files changed, 24 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 51c479433517..25d8ecf105aa 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -1445,7 +1445,7 @@ int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog,
 	bpf_test_timer_enter(&t);
 	do {
 		retval = bpf_flow_dissect(prog, &ctx, eth->h_proto, ETH_HLEN,
-					  size, flags) == BPF_OK;
+					  size, flags);
 	} while (bpf_test_timer_continue(&t, 1, repeat, &ret, &duration));
 	bpf_test_timer_leave(&t);
 
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
index 0c1661ea996e..8fa3c454995e 100644
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
@@ -100,6 +100,7 @@ struct test {
 	} pkt;
 	struct bpf_flow_keys keys;
 	__u32 flags;
+	__u32 retval;
 };
 
 #define VLAN_HLEN	4
@@ -126,6 +127,7 @@ struct test tests[] = {
 			.sport = 80,
 			.dport = 8080,
 		},
+		.retval = BPF_OK,
 	},
 	{
 		.name = "ipv6",
@@ -146,6 +148,7 @@ struct test tests[] = {
 			.sport = 80,
 			.dport = 8080,
 		},
+		.retval = BPF_OK,
 	},
 	{
 		.name = "802.1q-ipv4",
@@ -168,6 +171,7 @@ struct test tests[] = {
 			.sport = 80,
 			.dport = 8080,
 		},
+		.retval = BPF_OK,
 	},
 	{
 		.name = "802.1ad-ipv6",
@@ -191,6 +195,7 @@ struct test tests[] = {
 			.sport = 80,
 			.dport = 8080,
 		},
+		.retval = BPF_OK,
 	},
 	{
 		.name = "ipv4-frag",
@@ -217,6 +222,7 @@ struct test tests[] = {
 			.dport = 8080,
 		},
 		.flags = BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG,
+		.retval = BPF_OK,
 	},
 	{
 		.name = "ipv4-no-frag",
@@ -239,6 +245,7 @@ struct test tests[] = {
 			.is_frag = true,
 			.is_first_frag = true,
 		},
+		.retval = BPF_OK,
 	},
 	{
 		.name = "ipv6-frag",
@@ -265,6 +272,7 @@ struct test tests[] = {
 			.dport = 8080,
 		},
 		.flags = BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG,
+		.retval = BPF_OK,
 	},
 	{
 		.name = "ipv6-no-frag",
@@ -287,6 +295,7 @@ struct test tests[] = {
 			.is_frag = true,
 			.is_first_frag = true,
 		},
+		.retval = BPF_OK,
 	},
 	{
 		.name = "ipv6-flow-label",
@@ -309,6 +318,7 @@ struct test tests[] = {
 			.dport = 8080,
 			.flow_label = __bpf_constant_htonl(0xbeeef),
 		},
+		.retval = BPF_OK,
 	},
 	{
 		.name = "ipv6-no-flow-label",
@@ -331,6 +341,7 @@ struct test tests[] = {
 			.flow_label = __bpf_constant_htonl(0xbeeef),
 		},
 		.flags = BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL,
+		.retval = BPF_OK,
 	},
 	{
 		.name = "ipip-encap",
@@ -359,6 +370,7 @@ struct test tests[] = {
 			.sport = 80,
 			.dport = 8080,
 		},
+		.retval = BPF_OK,
 	},
 	{
 		.name = "ipip-no-encap",
@@ -386,6 +398,7 @@ struct test tests[] = {
 			.is_encap = true,
 		},
 		.flags = BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP,
+		.retval = BPF_OK,
 	},
 };
 
@@ -503,6 +516,10 @@ static void run_tests_skb_less(int tap_fd, struct bpf_map *keys)
 		err = tx_tap(tap_fd, &tests[i].pkt, sizeof(tests[i].pkt));
 		CHECK(err < 0, "tx_tap", "err %d errno %d\n", err, errno);
 
+		/* check the stored flow_keys only if BPF_OK expected */
+		if (tests[i].retval != BPF_OK)
+			continue;
+
 		err = bpf_map_lookup_elem(keys_fd, &key, &flow_keys);
 		ASSERT_OK(err, "bpf_map_lookup_elem");
 
@@ -588,7 +605,11 @@ void test_flow_dissector(void)
 
 		err = bpf_prog_test_run_opts(prog_fd, &topts);
 		ASSERT_OK(err, "test_run");
-		ASSERT_EQ(topts.retval, 1, "test_run retval");
+		ASSERT_EQ(topts.retval, tests[i].retval, "test_run retval");
+
+		/* check the resulting flow_keys only if BPF_OK returned */
+		if (topts.retval != BPF_OK)
+			continue;
 		ASSERT_EQ(topts.data_size_out, sizeof(flow_keys),
 			  "test_run data_size_out");
 		CHECK_FLOW_KEYS(tests[i].name, flow_keys, tests[i].keys);
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c
index 36afb409c25f..c7a47b57ac91 100644
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c
@@ -44,7 +44,7 @@ void serial_test_flow_dissector_load_bytes(void)
 	ASSERT_OK(err, "test_run");
 	ASSERT_EQ(topts.data_size_out, sizeof(flow_keys),
 		  "test_run data_size_out");
-	ASSERT_EQ(topts.retval, 1, "test_run retval");
+	ASSERT_EQ(topts.retval, BPF_OK, "test_run retval");
 
 	if (fd >= -1)
 		close(fd);
-- 
cgit v1.2.3


From d6513727c2af39a8cffb0d9b07376e51a85f347f Mon Sep 17 00:00:00 2001
From: Shmulik Ladkani <shmulik.ladkani@gmail.com>
Date: Sun, 21 Aug 2022 14:35:19 +0300
Subject: bpf, selftests: Test BPF_FLOW_DISSECTOR_CONTINUE

The dissector program returns BPF_FLOW_DISSECTOR_CONTINUE (and avoids
setting skb->flow_keys or last_dissection map) in case it encounters
IP packets whose (outer) source address is 127.0.0.127.

Additional test is added to prog_tests/flow_dissector.c which sets
this address as test's pkk.iph.saddr, with the expected retval of
BPF_FLOW_DISSECTOR_CONTINUE.

Also, legacy test_flow_dissector.sh was similarly augmented.

Signed-off-by: Shmulik Ladkani <shmulik.ladkani@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Stanislav Fomichev <sdf@google.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20220821113519.116765-5-shmulik.ladkani@gmail.com
---
 .../selftests/bpf/prog_tests/flow_dissector.c       | 21 +++++++++++++++++++++
 tools/testing/selftests/bpf/progs/bpf_flow.c        | 15 +++++++++++++++
 tools/testing/selftests/bpf/test_flow_dissector.sh  |  8 ++++++++
 3 files changed, 44 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
index 8fa3c454995e..7acca37a3d2b 100644
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
@@ -8,6 +8,8 @@
 
 #include "bpf_flow.skel.h"
 
+#define FLOW_CONTINUE_SADDR 0x7f00007f /* 127.0.0.127 */
+
 #ifndef IP_MF
 #define IP_MF 0x2000
 #endif
@@ -400,6 +402,25 @@ struct test tests[] = {
 		.flags = BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP,
 		.retval = BPF_OK,
 	},
+	{
+		.name = "ipip-encap-dissector-continue",
+		.pkt.ipip = {
+			.eth.h_proto = __bpf_constant_htons(ETH_P_IP),
+			.iph.ihl = 5,
+			.iph.protocol = IPPROTO_IPIP,
+			.iph.tot_len = __bpf_constant_htons(MAGIC_BYTES),
+			.iph.saddr = __bpf_constant_htonl(FLOW_CONTINUE_SADDR),
+			.iph_inner.ihl = 5,
+			.iph_inner.protocol = IPPROTO_TCP,
+			.iph_inner.tot_len =
+				__bpf_constant_htons(MAGIC_BYTES) -
+				sizeof(struct iphdr),
+			.tcp.doff = 5,
+			.tcp.source = 99,
+			.tcp.dest = 9090,
+		},
+		.retval = BPF_FLOW_DISSECTOR_CONTINUE,
+	},
 };
 
 static int create_tap(const char *ifname)
diff --git a/tools/testing/selftests/bpf/progs/bpf_flow.c b/tools/testing/selftests/bpf/progs/bpf_flow.c
index f266c757b3df..a20c5ed5e454 100644
--- a/tools/testing/selftests/bpf/progs/bpf_flow.c
+++ b/tools/testing/selftests/bpf/progs/bpf_flow.c
@@ -22,6 +22,8 @@
 #define PROG(F) PROG_(F, _##F)
 #define PROG_(NUM, NAME) SEC("flow_dissector") int flow_dissector_##NUM
 
+#define FLOW_CONTINUE_SADDR 0x7f00007f /* 127.0.0.127 */
+
 /* These are the identifiers of the BPF programs that will be used in tail
  * calls. Name is limited to 16 characters, with the terminating character and
  * bpf_func_ above, we have only 6 to work with, anything after will be cropped.
@@ -143,6 +145,19 @@ int _dissect(struct __sk_buff *skb)
 {
 	struct bpf_flow_keys *keys = skb->flow_keys;
 
+	if (keys->n_proto == bpf_htons(ETH_P_IP)) {
+		/* IP traffic from FLOW_CONTINUE_SADDR falls-back to
+		 * standard dissector
+		 */
+		struct iphdr *iph, _iph;
+
+		iph = bpf_flow_dissect_get_header(skb, sizeof(*iph), &_iph);
+		if (iph && iph->ihl == 5 &&
+		    iph->saddr == bpf_htonl(FLOW_CONTINUE_SADDR)) {
+			return BPF_FLOW_DISSECTOR_CONTINUE;
+		}
+	}
+
 	return parse_eth_proto(skb, keys->n_proto);
 }
 
diff --git a/tools/testing/selftests/bpf/test_flow_dissector.sh b/tools/testing/selftests/bpf/test_flow_dissector.sh
index dbd91221727d..5303ce0c977b 100755
--- a/tools/testing/selftests/bpf/test_flow_dissector.sh
+++ b/tools/testing/selftests/bpf/test_flow_dissector.sh
@@ -115,6 +115,14 @@ tc filter add dev lo parent ffff: protocol ip pref 1337 flower ip_proto \
 # Send 10 IPv4/UDP packets from port 10. Filter should not drop any.
 ./test_flow_dissector -i 4 -f 10
 
+echo "Testing IPv4 from 127.0.0.127 (fallback to generic dissector)..."
+# Send 10 IPv4/UDP packets from port 8. Filter should not drop any.
+./test_flow_dissector -i 4 -S 127.0.0.127 -f 8
+# Send 10 IPv4/UDP packets from port 9. Filter should drop all.
+./test_flow_dissector -i 4 -S 127.0.0.127 -f 9 -F
+# Send 10 IPv4/UDP packets from port 10. Filter should not drop any.
+./test_flow_dissector -i 4 -S 127.0.0.127 -f 10
+
 echo "Testing IPIP..."
 # Send 10 IPv4/IPv4/UDP packets from port 8. Filter should not drop any.
 ./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e bare -i 4 \
-- 
cgit v1.2.3


From 2172fb8007eaafbef18563afb6c1ae5a976bf787 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Tue, 23 Aug 2022 15:25:54 -0700
Subject: bpf: update bpf_{g,s}et_retval documentation

* replace 'syscall' with 'upper layers', still mention that it's being
  exported via syscall errno
* describe what happens in set_retval(-EPERM) + return 1
* describe what happens with bind's 'return 3'

Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/r/20220823222555.523590-5-sdf@google.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/uapi/linux/bpf.h       | 22 +++++++++++++++++-----
 tools/include/uapi/linux/bpf.h | 22 +++++++++++++++++-----
 2 files changed, 34 insertions(+), 10 deletions(-)

(limited to 'tools')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 7f87012b012e..644600dbb114 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5085,17 +5085,29 @@ union bpf_attr {
  *
  * int bpf_get_retval(void)
  *	Description
- *		Get the syscall's return value that will be returned to userspace.
+ *		Get the BPF program's return value that will be returned to the upper layers.
  *
- *		This helper is currently supported by cgroup programs only.
+ *		This helper is currently supported by cgroup programs and only by the hooks
+ *		where BPF program's return value is returned to the userspace via errno.
  *	Return
- *		The syscall's return value.
+ *		The BPF program's return value.
  *
  * int bpf_set_retval(int retval)
  *	Description
- *		Set the syscall's return value that will be returned to userspace.
+ *		Set the BPF program's return value that will be returned to the upper layers.
+ *
+ *		This helper is currently supported by cgroup programs and only by the hooks
+ *		where BPF program's return value is returned to the userspace via errno.
+ *
+ *		Note that there is the following corner case where the program exports an error
+ *		via bpf_set_retval but signals success via 'return 1':
+ *
+ *			bpf_set_retval(-EPERM);
+ *			return 1;
+ *
+ *		In this case, the BPF program's return value will use helper's -EPERM. This
+ *		still holds true for cgroup/bind{4,6} which supports extra 'return 3' success case.
  *
- *		This helper is currently supported by cgroup programs only.
  *	Return
  *		0 on success, or a negative error in case of failure.
  *
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index f38814fbb618..4fb685591035 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -5085,17 +5085,29 @@ union bpf_attr {
  *
  * int bpf_get_retval(void)
  *	Description
- *		Get the syscall's return value that will be returned to userspace.
+ *		Get the BPF program's return value that will be returned to the upper layers.
  *
- *		This helper is currently supported by cgroup programs only.
+ *		This helper is currently supported by cgroup programs and only by the hooks
+ *		where BPF program's return value is returned to the userspace via errno.
  *	Return
- *		The syscall's return value.
+ *		The BPF program's return value.
  *
  * int bpf_set_retval(int retval)
  *	Description
- *		Set the syscall's return value that will be returned to userspace.
+ *		Set the BPF program's return value that will be returned to the upper layers.
+ *
+ *		This helper is currently supported by cgroup programs and only by the hooks
+ *		where BPF program's return value is returned to the userspace via errno.
+ *
+ *		Note that there is the following corner case where the program exports an error
+ *		via bpf_set_retval but signals success via 'return 1':
+ *
+ *			bpf_set_retval(-EPERM);
+ *			return 1;
+ *
+ *		In this case, the BPF program's return value will use helper's -EPERM. This
+ *		still holds true for cgroup/bind{4,6} which supports extra 'return 3' success case.
  *
- *		This helper is currently supported by cgroup programs only.
  *	Return
  *		0 on success, or a negative error in case of failure.
  *
-- 
cgit v1.2.3


From e7215f574079ffb138258e8ebfa3f2bf5a4a1238 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Tue, 23 Aug 2022 15:25:55 -0700
Subject: selftests/bpf: Make sure bpf_{g,s}et_retval is exposed everywhere

For each hook, have a simple bpf_set_retval(bpf_get_retval) program
and make sure it loads for the hooks we want. The exceptions are
the hooks which don't propagate the error to the callers:

- sockops
- recvmsg
- getpeername
- getsockname
- cg_skb ingress and egress

Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/r/20220823222555.523590-6-sdf@google.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/Makefile               |  1 +
 .../selftests/bpf/cgroup_getset_retval_hooks.h     | 25 +++++++++++
 .../bpf/prog_tests/cgroup_getset_retval.c          | 48 ++++++++++++++++++++++
 .../bpf/progs/cgroup_getset_retval_hooks.c         | 16 ++++++++
 4 files changed, 90 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/cgroup_getset_retval_hooks.h
 create mode 100644 tools/testing/selftests/bpf/progs/cgroup_getset_retval_hooks.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 8d59ec7f4c2d..eecad99f1735 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -323,6 +323,7 @@ $(OUTPUT)/test_l4lb_noinline.o: BPF_CFLAGS += -fno-inline
 $(OUTPUT)/test_xdp_noinline.o: BPF_CFLAGS += -fno-inline
 
 $(OUTPUT)/flow_dissector_load.o: flow_dissector_load.h
+$(OUTPUT)/cgroup_getset_retval_hooks.o: cgroup_getset_retval_hooks.h
 
 # Build BPF object using Clang
 # $1 - input .c file
diff --git a/tools/testing/selftests/bpf/cgroup_getset_retval_hooks.h b/tools/testing/selftests/bpf/cgroup_getset_retval_hooks.h
new file mode 100644
index 000000000000..a525d3544fd7
--- /dev/null
+++ b/tools/testing/selftests/bpf/cgroup_getset_retval_hooks.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+BPF_RETVAL_HOOK(ingress, "cgroup_skb/ingress", __sk_buff, -EINVAL)
+BPF_RETVAL_HOOK(egress, "cgroup_skb/egress", __sk_buff, -EINVAL)
+BPF_RETVAL_HOOK(sock_create, "cgroup/sock_create", bpf_sock, 0)
+BPF_RETVAL_HOOK(sock_ops, "sockops", bpf_sock_ops, -EINVAL)
+BPF_RETVAL_HOOK(dev, "cgroup/dev", bpf_cgroup_dev_ctx, 0)
+BPF_RETVAL_HOOK(bind4, "cgroup/bind4", bpf_sock_addr, 0)
+BPF_RETVAL_HOOK(bind6, "cgroup/bind6", bpf_sock_addr, 0)
+BPF_RETVAL_HOOK(connect4, "cgroup/connect4", bpf_sock_addr, 0)
+BPF_RETVAL_HOOK(connect6, "cgroup/connect6", bpf_sock_addr, 0)
+BPF_RETVAL_HOOK(post_bind4, "cgroup/post_bind4", bpf_sock_addr, 0)
+BPF_RETVAL_HOOK(post_bind6, "cgroup/post_bind6", bpf_sock_addr, 0)
+BPF_RETVAL_HOOK(sendmsg4, "cgroup/sendmsg4", bpf_sock_addr, 0)
+BPF_RETVAL_HOOK(sendmsg6, "cgroup/sendmsg6", bpf_sock_addr, 0)
+BPF_RETVAL_HOOK(sysctl, "cgroup/sysctl", bpf_sysctl, 0)
+BPF_RETVAL_HOOK(recvmsg4, "cgroup/recvmsg4", bpf_sock_addr, -EINVAL)
+BPF_RETVAL_HOOK(recvmsg6, "cgroup/recvmsg6", bpf_sock_addr, -EINVAL)
+BPF_RETVAL_HOOK(getsockopt, "cgroup/getsockopt", bpf_sockopt, 0)
+BPF_RETVAL_HOOK(setsockopt, "cgroup/setsockopt", bpf_sockopt, 0)
+BPF_RETVAL_HOOK(getpeername4, "cgroup/getpeername4", bpf_sock_addr, -EINVAL)
+BPF_RETVAL_HOOK(getpeername6, "cgroup/getpeername6", bpf_sock_addr, -EINVAL)
+BPF_RETVAL_HOOK(getsockname4, "cgroup/getsockname4", bpf_sock_addr, -EINVAL)
+BPF_RETVAL_HOOK(getsockname6, "cgroup/getsockname6", bpf_sock_addr, -EINVAL)
+BPF_RETVAL_HOOK(sock_release, "cgroup/sock_release", bpf_sock, 0)
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_getset_retval.c b/tools/testing/selftests/bpf/prog_tests/cgroup_getset_retval.c
index 0b47c3c000c7..4d2fa99273d8 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_getset_retval.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_getset_retval.c
@@ -10,6 +10,7 @@
 
 #include "cgroup_getset_retval_setsockopt.skel.h"
 #include "cgroup_getset_retval_getsockopt.skel.h"
+#include "cgroup_getset_retval_hooks.skel.h"
 
 #define SOL_CUSTOM	0xdeadbeef
 
@@ -433,6 +434,50 @@ close_bpf_object:
 	cgroup_getset_retval_getsockopt__destroy(obj);
 }
 
+struct exposed_hook {
+	const char *name;
+	int expected_err;
+} exposed_hooks[] = {
+
+#define BPF_RETVAL_HOOK(NAME, SECTION, CTX, EXPECTED_ERR) \
+	{ \
+		.name = #NAME, \
+		.expected_err = EXPECTED_ERR, \
+	},
+
+#include "cgroup_getset_retval_hooks.h"
+
+#undef BPF_RETVAL_HOOK
+};
+
+static void test_exposed_hooks(int cgroup_fd, int sock_fd)
+{
+	struct cgroup_getset_retval_hooks *skel;
+	struct bpf_program *prog;
+	int err;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(exposed_hooks); i++) {
+		skel = cgroup_getset_retval_hooks__open();
+		if (!ASSERT_OK_PTR(skel, "cgroup_getset_retval_hooks__open"))
+			continue;
+
+		prog = bpf_object__find_program_by_name(skel->obj, exposed_hooks[i].name);
+		if (!ASSERT_NEQ(prog, NULL, "bpf_object__find_program_by_name"))
+			goto close_skel;
+
+		err = bpf_program__set_autoload(prog, true);
+		if (!ASSERT_OK(err, "bpf_program__set_autoload"))
+			goto close_skel;
+
+		err = cgroup_getset_retval_hooks__load(skel);
+		ASSERT_EQ(err, exposed_hooks[i].expected_err, "expected_err");
+
+close_skel:
+		cgroup_getset_retval_hooks__destroy(skel);
+	}
+}
+
 void test_cgroup_getset_retval(void)
 {
 	int cgroup_fd = -1;
@@ -476,6 +521,9 @@ void test_cgroup_getset_retval(void)
 	if (test__start_subtest("getsockopt-retval_sync"))
 		test_getsockopt_retval_sync(cgroup_fd, sock_fd);
 
+	if (test__start_subtest("exposed_hooks"))
+		test_exposed_hooks(cgroup_fd, sock_fd);
+
 close_fd:
 	close(cgroup_fd);
 }
diff --git a/tools/testing/selftests/bpf/progs/cgroup_getset_retval_hooks.c b/tools/testing/selftests/bpf/progs/cgroup_getset_retval_hooks.c
new file mode 100644
index 000000000000..13dfb4bbfd28
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgroup_getset_retval_hooks.c
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+#define BPF_RETVAL_HOOK(name, section, ctx, expected_err) \
+	__attribute__((__section__("?" section))) \
+	int name(struct ctx *_ctx) \
+	{ \
+		bpf_set_retval(bpf_get_retval()); \
+		return 1; \
+	}
+
+#include "cgroup_getset_retval_hooks.h"
+
+#undef BPF_RETVAL_HOOK
-- 
cgit v1.2.3


From 35f14dbd2fc6619dea8ac9eea18976378b18450b Mon Sep 17 00:00:00 2001
From: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Date: Tue, 23 Aug 2022 03:32:26 +0200
Subject: selftests/bpf: Add tests for reference state fixes for callbacks

These are regression tests to ensure we don't end up in invalid runtime
state for helpers that execute callbacks multiple times. It exercises
the fixes to verifier callback handling for reference state in previous
patches.

Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220823013226.24988-1-memxor@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/prog_tests/cb_refs.c |  48 ++++++++++
 tools/testing/selftests/bpf/progs/cb_refs.c      | 116 +++++++++++++++++++++++
 2 files changed, 164 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/cb_refs.c
 create mode 100644 tools/testing/selftests/bpf/progs/cb_refs.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/cb_refs.c b/tools/testing/selftests/bpf/prog_tests/cb_refs.c
new file mode 100644
index 000000000000..3bff680de16c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cb_refs.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "bpf/libbpf.h"
+#include <test_progs.h>
+#include <network_helpers.h>
+
+#include "cb_refs.skel.h"
+
+static char log_buf[1024 * 1024];
+
+struct {
+	const char *prog_name;
+	const char *err_msg;
+} cb_refs_tests[] = {
+	{ "underflow_prog", "reference has not been acquired before" },
+	{ "leak_prog", "Unreleased reference" },
+	{ "nested_cb", "Unreleased reference id=4 alloc_insn=2" }, /* alloc_insn=2{4,5} */
+	{ "non_cb_transfer_ref", "Unreleased reference id=4 alloc_insn=1" }, /* alloc_insn=1{1,2} */
+};
+
+void test_cb_refs(void)
+{
+	LIBBPF_OPTS(bpf_object_open_opts, opts, .kernel_log_buf = log_buf,
+						.kernel_log_size = sizeof(log_buf),
+						.kernel_log_level = 1);
+	struct bpf_program *prog;
+	struct cb_refs *skel;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(cb_refs_tests); i++) {
+		LIBBPF_OPTS(bpf_test_run_opts, run_opts,
+			.data_in = &pkt_v4,
+			.data_size_in = sizeof(pkt_v4),
+			.repeat = 1,
+		);
+		skel = cb_refs__open_opts(&opts);
+		if (!ASSERT_OK_PTR(skel, "cb_refs__open_and_load"))
+			return;
+		prog = bpf_object__find_program_by_name(skel->obj, cb_refs_tests[i].prog_name);
+		bpf_program__set_autoload(prog, true);
+		if (!ASSERT_ERR(cb_refs__load(skel), "cb_refs__load"))
+			bpf_prog_test_run_opts(bpf_program__fd(prog), &run_opts);
+		if (!ASSERT_OK_PTR(strstr(log_buf, cb_refs_tests[i].err_msg), "expected error message")) {
+			fprintf(stderr, "Expected: %s\n", cb_refs_tests[i].err_msg);
+			fprintf(stderr, "Verifier: %s\n", log_buf);
+		}
+		cb_refs__destroy(skel);
+	}
+}
diff --git a/tools/testing/selftests/bpf/progs/cb_refs.c b/tools/testing/selftests/bpf/progs/cb_refs.c
new file mode 100644
index 000000000000..7653df1bc787
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cb_refs.c
@@ -0,0 +1,116 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+struct map_value {
+	struct prog_test_ref_kfunc __kptr_ref *ptr;
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__type(key, int);
+	__type(value, struct map_value);
+	__uint(max_entries, 16);
+} array_map SEC(".maps");
+
+extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym;
+extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym;
+
+static __noinline int cb1(void *map, void *key, void *value, void *ctx)
+{
+	void *p = *(void **)ctx;
+	bpf_kfunc_call_test_release(p);
+	/* Without the fix this would cause underflow */
+	return 0;
+}
+
+SEC("?tc")
+int underflow_prog(void *ctx)
+{
+	struct prog_test_ref_kfunc *p;
+	unsigned long sl = 0;
+
+	p = bpf_kfunc_call_test_acquire(&sl);
+	if (!p)
+		return 0;
+	bpf_for_each_map_elem(&array_map, cb1, &p, 0);
+	return 0;
+}
+
+static __always_inline int cb2(void *map, void *key, void *value, void *ctx)
+{
+	unsigned long sl = 0;
+
+	*(void **)ctx = bpf_kfunc_call_test_acquire(&sl);
+	/* Without the fix this would leak memory */
+	return 0;
+}
+
+SEC("?tc")
+int leak_prog(void *ctx)
+{
+	struct prog_test_ref_kfunc *p;
+	struct map_value *v;
+	unsigned long sl;
+
+	v = bpf_map_lookup_elem(&array_map, &(int){0});
+	if (!v)
+		return 0;
+
+	p = NULL;
+	bpf_for_each_map_elem(&array_map, cb2, &p, 0);
+	p = bpf_kptr_xchg(&v->ptr, p);
+	if (p)
+		bpf_kfunc_call_test_release(p);
+	return 0;
+}
+
+static __always_inline int cb(void *map, void *key, void *value, void *ctx)
+{
+	return 0;
+}
+
+static __always_inline int cb3(void *map, void *key, void *value, void *ctx)
+{
+	unsigned long sl = 0;
+	void *p;
+
+	bpf_kfunc_call_test_acquire(&sl);
+	bpf_for_each_map_elem(&array_map, cb, &p, 0);
+	/* It should only complain here, not in cb. This is why we need
+	 * callback_ref to be set to frameno.
+	 */
+	return 0;
+}
+
+SEC("?tc")
+int nested_cb(void *ctx)
+{
+	struct prog_test_ref_kfunc *p;
+	unsigned long sl = 0;
+	int sp = 0;
+
+	p = bpf_kfunc_call_test_acquire(&sl);
+	if (!p)
+		return 0;
+	bpf_for_each_map_elem(&array_map, cb3, &sp, 0);
+	bpf_kfunc_call_test_release(p);
+	return 0;
+}
+
+SEC("?tc")
+int non_cb_transfer_ref(void *ctx)
+{
+	struct prog_test_ref_kfunc *p;
+	unsigned long sl = 0;
+
+	p = bpf_kfunc_call_test_acquire(&sl);
+	if (!p)
+		return 0;
+	cb1(NULL, NULL, NULL, &p);
+	bpf_kfunc_call_test_acquire(&sl);
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
-- 
cgit v1.2.3


From b03914f7ff7bc5aca056aaa49fd3ff9120d24f47 Mon Sep 17 00:00:00 2001
From: Daniel Müller <deso@posteo.net>
Date: Wed, 24 Aug 2022 16:39:06 +0000
Subject: selftests/bpf: Add cb_refs test to s390x deny list
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The cb_refs BPF selftest is failing execution on s390x machines. This is
a newly added test that requires a feature not presently supported on
this architecture.

Denylist the test for this architecture.

Fixes: 3cf7e7d8685c ("selftests/bpf: Add tests for reference state fixes for callbacks")
Signed-off-by: Daniel Müller <deso@posteo.net>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20220824163906.1186832-1-deso@posteo.net
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/DENYLIST.s390x | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x
index a708c3dcc154..37bafcbf952a 100644
--- a/tools/testing/selftests/bpf/DENYLIST.s390x
+++ b/tools/testing/selftests/bpf/DENYLIST.s390x
@@ -66,3 +66,4 @@ select_reuseport                         # intermittently fails on new s390x set
 xdp_synproxy                             # JIT does not support calling kernel function                                (kfunc)
 unpriv_bpf_disabled                      # fentry
 setget_sockopt                           # attach unexpected error: -524                                               (trampoline)
+cb_refs                                  # expected error message unexpected error: -524                               (trampoline)
-- 
cgit v1.2.3


From 7e165d1939284d0bf16a83c591c3c5d24a110d0a Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Wed, 24 Aug 2022 09:39:07 +0800
Subject: selftests/bpf: Fix wrong size passed to bpf_setsockopt()

sizeof(new_cc) is not real memory size that new_cc points to; introduce
a new_cc_len to store the size and then pass it to bpf_setsockopt().

Fixes: 31123c0360e0 ("selftests/bpf: bpf_setsockopt tests")
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20220824013907.380448-1-yangyingliang@huawei.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/progs/setget_sockopt.c | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/setget_sockopt.c b/tools/testing/selftests/bpf/progs/setget_sockopt.c
index 4a4cb44a4a15..40606ef47a38 100644
--- a/tools/testing/selftests/bpf/progs/setget_sockopt.c
+++ b/tools/testing/selftests/bpf/progs/setget_sockopt.c
@@ -305,15 +305,19 @@ static int bpf_test_tcp_sockopt(__u32 i, struct loop_ctx *lc)
 	if (t->opt == TCP_CONGESTION) {
 		char old_cc[16], tmp_cc[16];
 		const char *new_cc;
+		int new_cc_len;
 
 		if (bpf_getsockopt(ctx, IPPROTO_TCP, TCP_CONGESTION, old_cc, sizeof(old_cc)))
 			return 1;
-		if (!bpf_strncmp(old_cc, sizeof(old_cc), cubic_cc))
+		if (!bpf_strncmp(old_cc, sizeof(old_cc), cubic_cc)) {
 			new_cc = reno_cc;
-		else
+			new_cc_len = sizeof(reno_cc);
+		} else {
 			new_cc = cubic_cc;
+			new_cc_len = sizeof(cubic_cc);
+		}
 		if (bpf_setsockopt(ctx, IPPROTO_TCP, TCP_CONGESTION, (void *)new_cc,
-				   sizeof(new_cc)))
+				   new_cc_len))
 			return 1;
 		if (bpf_getsockopt(ctx, IPPROTO_TCP, TCP_CONGESTION, tmp_cc, sizeof(tmp_cc)))
 			return 1;
-- 
cgit v1.2.3


From c35ecb95c448cde15cbde8fde93350d50bcc8be7 Mon Sep 17 00:00:00 2001
From: Joanne Koong <joannelkoong@gmail.com>
Date: Mon, 22 Aug 2022 11:10:22 -0700
Subject: selftests/net: Add test for timing a bind request to a port with a
 populated bhash entry

This test populates the bhash table for a given port with
MAX_THREADS * MAX_CONNECTIONS sockets, and then times how long
a bind request on the port takes.

When populating the bhash table, we create the sockets and then bind
the sockets to the same address and port (SO_REUSEADDR and SO_REUSEPORT
are set). When timing how long a bind on the port takes, we bind on a
different address without SO_REUSEPORT set. We do not set SO_REUSEPORT
because we are interested in the case where the bind request does not
go through the tb->fastreuseport path, which is fragile (eg
tb->fastreuseport path does not work if binding with a different uid).

To run the script:
    Usage: ./bind_bhash.sh [-6 | -4] [-p port] [-a address]
	    6: use ipv6
	    4: use ipv4
	    port: Port number
	    address: ip address

Without any arguments, ./bind_bhash.sh defaults to ipv6 using ip address
"2001:0db8:0:f101::1" on port 443.

On my local machine, I see:
ipv4:
before - 0.002317 seconds
with bhash2 - 0.000020 seconds

ipv6:
before - 0.002431 seconds
with bhash2 - 0.000021 seconds

Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/.gitignore    |   3 +-
 tools/testing/selftests/net/Makefile      |   3 +
 tools/testing/selftests/net/bind_bhash.c  | 144 ++++++++++++++++++++++++++++++
 tools/testing/selftests/net/bind_bhash.sh |  66 ++++++++++++++
 4 files changed, 215 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/net/bind_bhash.c
 create mode 100755 tools/testing/selftests/net/bind_bhash.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index 0e5751af6247..89e2d4aa812a 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -39,4 +39,5 @@ toeplitz
 tun
 cmsg_sender
 unix_connect
-tap
\ No newline at end of file
+tap
+bind_bhash
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 11a288b67e2f..b17ec78f3951 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -43,6 +43,7 @@ TEST_PROGS += ndisc_unsolicited_na_test.sh
 TEST_PROGS += arp_ndisc_untracked_subnets.sh
 TEST_PROGS += stress_reuseport_listen.sh
 TEST_PROGS := l2_tos_ttl_inherit.sh
+TEST_PROGS += bind_bhash.sh
 TEST_PROGS_EXTENDED := in_netns.sh setup_loopback.sh setup_veth.sh
 TEST_PROGS_EXTENDED += toeplitz_client.sh toeplitz.sh
 TEST_GEN_FILES =  socket nettest
@@ -64,6 +65,7 @@ TEST_GEN_FILES += cmsg_sender
 TEST_GEN_FILES += stress_reuseport_listen
 TEST_PROGS += test_vxlan_vnifiltering.sh
 TEST_GEN_FILES += io_uring_zerocopy_tx
+TEST_GEN_FILES += bind_bhash
 
 TEST_FILES := settings
 
@@ -74,3 +76,4 @@ include bpf/Makefile
 $(OUTPUT)/reuseport_bpf_numa: LDLIBS += -lnuma
 $(OUTPUT)/tcp_mmap: LDLIBS += -lpthread
 $(OUTPUT)/tcp_inq: LDLIBS += -lpthread
+$(OUTPUT)/bind_bhash: LDLIBS += -lpthread
diff --git a/tools/testing/selftests/net/bind_bhash.c b/tools/testing/selftests/net/bind_bhash.c
new file mode 100644
index 000000000000..57ff67a3751e
--- /dev/null
+++ b/tools/testing/selftests/net/bind_bhash.c
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This times how long it takes to bind to a port when the port already
+ * has multiple sockets in its bhash table.
+ *
+ * In the setup(), we populate the port's bhash table with
+ * MAX_THREADS * MAX_CONNECTIONS number of entries.
+ */
+
+#include <unistd.h>
+#include <stdio.h>
+#include <netdb.h>
+#include <pthread.h>
+#include <string.h>
+#include <stdbool.h>
+
+#define MAX_THREADS 600
+#define MAX_CONNECTIONS 40
+
+static const char *setup_addr_v6 = "::1";
+static const char *setup_addr_v4 = "127.0.0.1";
+static const char *setup_addr;
+static const char *bind_addr;
+static const char *port;
+bool use_v6;
+int ret;
+
+static int fd_array[MAX_THREADS][MAX_CONNECTIONS];
+
+static int bind_socket(int opt, const char *addr)
+{
+	struct addrinfo *res, hint = {};
+	int sock_fd, reuse = 1, err;
+	int domain = use_v6 ? AF_INET6 : AF_INET;
+
+	sock_fd = socket(domain, SOCK_STREAM, 0);
+	if (sock_fd < 0) {
+		perror("socket fd err");
+		return sock_fd;
+	}
+
+	hint.ai_family = domain;
+	hint.ai_socktype = SOCK_STREAM;
+
+	err = getaddrinfo(addr, port, &hint, &res);
+	if (err) {
+		perror("getaddrinfo failed");
+		goto cleanup;
+	}
+
+	if (opt) {
+		err = setsockopt(sock_fd, SOL_SOCKET, opt, &reuse, sizeof(reuse));
+		if (err) {
+			perror("setsockopt failed");
+			goto cleanup;
+		}
+	}
+
+	err = bind(sock_fd, res->ai_addr, res->ai_addrlen);
+	if (err) {
+		perror("failed to bind to port");
+		goto cleanup;
+	}
+
+	return sock_fd;
+
+cleanup:
+	close(sock_fd);
+	return err;
+}
+
+static void *setup(void *arg)
+{
+	int sock_fd, i;
+	int *array = (int *)arg;
+
+	for (i = 0; i < MAX_CONNECTIONS; i++) {
+		sock_fd = bind_socket(SO_REUSEADDR | SO_REUSEPORT, setup_addr);
+		if (sock_fd < 0) {
+			ret = sock_fd;
+			pthread_exit(&ret);
+		}
+		array[i] = sock_fd;
+	}
+
+	return NULL;
+}
+
+int main(int argc, const char *argv[])
+{
+	int listener_fd, sock_fd, i, j;
+	pthread_t tid[MAX_THREADS];
+	clock_t begin, end;
+
+	if (argc != 4) {
+		printf("Usage: listener <port> <ipv6 | ipv4> <bind-addr>\n");
+		return -1;
+	}
+
+	port = argv[1];
+	use_v6 = strcmp(argv[2], "ipv6") == 0;
+	bind_addr = argv[3];
+
+	setup_addr = use_v6 ? setup_addr_v6 : setup_addr_v4;
+
+	listener_fd = bind_socket(SO_REUSEADDR | SO_REUSEPORT, setup_addr);
+	if (listen(listener_fd, 100) < 0) {
+		perror("listen failed");
+		return -1;
+	}
+
+	/* Set up threads to populate the bhash table entry for the port */
+	for (i = 0; i < MAX_THREADS; i++)
+		pthread_create(&tid[i], NULL, setup, fd_array[i]);
+
+	for (i = 0; i < MAX_THREADS; i++)
+		pthread_join(tid[i], NULL);
+
+	if (ret)
+		goto done;
+
+	begin = clock();
+
+	/* Bind to the same port on a different address */
+	sock_fd  = bind_socket(0, bind_addr);
+	if (sock_fd < 0)
+		goto done;
+
+	end = clock();
+
+	printf("time spent = %f\n", (double)(end - begin) / CLOCKS_PER_SEC);
+
+	/* clean up */
+	close(sock_fd);
+
+done:
+	close(listener_fd);
+	for (i = 0; i < MAX_THREADS; i++) {
+		for (j = 0; i < MAX_THREADS; i++)
+			close(fd_array[i][j]);
+	}
+
+	return 0;
+}
diff --git a/tools/testing/selftests/net/bind_bhash.sh b/tools/testing/selftests/net/bind_bhash.sh
new file mode 100755
index 000000000000..ca0292d4b441
--- /dev/null
+++ b/tools/testing/selftests/net/bind_bhash.sh
@@ -0,0 +1,66 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NR_FILES=32768
+SAVED_NR_FILES=$(ulimit -n)
+
+# default values
+port=443
+addr_v6="2001:0db8:0:f101::1"
+addr_v4="10.8.8.8"
+use_v6=true
+addr=""
+
+usage() {
+    echo "Usage: $0 [-6 | -4] [-p port] [-a address]"
+    echo -e "\t6: use ipv6"
+    echo -e "\t4: use ipv4"
+    echo -e "\tport: Port number"
+    echo -e "\taddress: ip address"
+}
+
+while getopts "ha:p:64" opt; do
+    case ${opt} in
+	h)
+	    usage $0
+	    exit 0
+	    ;;
+	a)  addr=$OPTARG;;
+	p)
+	    port=$OPTARG;;
+	6)
+	    use_v6=true;;
+	4)
+	    use_v6=false;;
+    esac
+done
+
+setup() {
+    if [[ "$use_v6" == true ]]; then
+	ip addr add $addr_v6 nodad dev eth0
+    else
+	ip addr add $addr_v4 dev lo
+    fi
+	ulimit -n $NR_FILES
+}
+
+cleanup() {
+    if [[ "$use_v6" == true ]]; then
+	ip addr del $addr_v6 dev eth0
+    else
+	ip addr del $addr_v4/32 dev lo
+    fi
+    ulimit -n $SAVED_NR_FILES
+}
+
+if [[ "$addr" != "" ]]; then
+    addr_v4=$addr;
+    addr_v6=$addr;
+fi
+setup
+if [[ "$use_v6" == true ]] ; then
+    ./bind_bhash $port "ipv6" $addr_v6
+else
+    ./bind_bhash $port "ipv4" $addr_v4
+fi
+cleanup
-- 
cgit v1.2.3


From 1be9ac87a75a4fc0e2cc254e412d2d67a58a7191 Mon Sep 17 00:00:00 2001
From: Joanne Koong <joannelkoong@gmail.com>
Date: Mon, 22 Aug 2022 11:10:23 -0700
Subject: selftests/net: Add sk_bind_sendto_listen and sk_connect_zero_addr

This patch adds 2 new tests: sk_bind_sendto_listen and
sk_connect_zero_addr.

The sk_bind_sendto_listen test exercises the path where a socket's
rcv saddr changes after it has been added to the binding tables,
and then a listen() on the socket is invoked. The listen() should
succeed.

The sk_bind_sendto_listen test is copied over from one of syzbot's
tests: https://syzkaller.appspot.com/x/repro.c?x=1673a38df00000

The sk_connect_zero_addr test exercises the path where the socket was
never previously added to the binding tables and it gets assigned a
saddr upon a connect() to address 0.

Signed-off-by: Joanne Koong <joannelkoong@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/.gitignore             |  2 +
 tools/testing/selftests/net/Makefile               |  2 +
 .../testing/selftests/net/sk_bind_sendto_listen.c  | 80 ++++++++++++++++++++++
 tools/testing/selftests/net/sk_connect_zero_addr.c | 62 +++++++++++++++++
 4 files changed, 146 insertions(+)
 create mode 100644 tools/testing/selftests/net/sk_bind_sendto_listen.c
 create mode 100644 tools/testing/selftests/net/sk_connect_zero_addr.c

(limited to 'tools')

diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index 89e2d4aa812a..bec5cf96984c 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -41,3 +41,5 @@ cmsg_sender
 unix_connect
 tap
 bind_bhash
+sk_bind_sendto_listen
+sk_connect_zero_addr
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index b17ec78f3951..e6a951ba5ba0 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -66,6 +66,8 @@ TEST_GEN_FILES += stress_reuseport_listen
 TEST_PROGS += test_vxlan_vnifiltering.sh
 TEST_GEN_FILES += io_uring_zerocopy_tx
 TEST_GEN_FILES += bind_bhash
+TEST_GEN_PROGS += sk_bind_sendto_listen
+TEST_GEN_PROGS += sk_connect_zero_addr
 
 TEST_FILES := settings
 
diff --git a/tools/testing/selftests/net/sk_bind_sendto_listen.c b/tools/testing/selftests/net/sk_bind_sendto_listen.c
new file mode 100644
index 000000000000..b420d830f72c
--- /dev/null
+++ b/tools/testing/selftests/net/sk_bind_sendto_listen.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <arpa/inet.h>
+#include <error.h>
+#include <errno.h>
+#include <unistd.h>
+
+int main(void)
+{
+	int fd1, fd2, one = 1;
+	struct sockaddr_in6 bind_addr = {
+		.sin6_family = AF_INET6,
+		.sin6_port = htons(20000),
+		.sin6_flowinfo = htonl(0),
+		.sin6_addr = {},
+		.sin6_scope_id = 0,
+	};
+
+	inet_pton(AF_INET6, "::", &bind_addr.sin6_addr);
+
+	fd1 = socket(AF_INET6, SOCK_STREAM, IPPROTO_IP);
+	if (fd1 < 0) {
+		error(1, errno, "socket fd1");
+		return -1;
+	}
+
+	if (setsockopt(fd1, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one))) {
+		error(1, errno, "setsockopt(SO_REUSEADDR) fd1");
+		goto out_err1;
+	}
+
+	if (bind(fd1, (struct sockaddr *)&bind_addr, sizeof(bind_addr))) {
+		error(1, errno, "bind fd1");
+		goto out_err1;
+	}
+
+	if (sendto(fd1, NULL, 0, MSG_FASTOPEN, (struct sockaddr *)&bind_addr,
+		   sizeof(bind_addr))) {
+		error(1, errno, "sendto fd1");
+		goto out_err1;
+	}
+
+	fd2 = socket(AF_INET6, SOCK_STREAM, IPPROTO_IP);
+	if (fd2 < 0) {
+		error(1, errno, "socket fd2");
+		goto out_err1;
+	}
+
+	if (setsockopt(fd2, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one))) {
+		error(1, errno, "setsockopt(SO_REUSEADDR) fd2");
+		goto out_err2;
+	}
+
+	if (bind(fd2, (struct sockaddr *)&bind_addr, sizeof(bind_addr))) {
+		error(1, errno, "bind fd2");
+		goto out_err2;
+	}
+
+	if (sendto(fd2, NULL, 0, MSG_FASTOPEN, (struct sockaddr *)&bind_addr,
+		   sizeof(bind_addr)) != -1) {
+		error(1, errno, "sendto fd2");
+		goto out_err2;
+	}
+
+	if (listen(fd2, 0)) {
+		error(1, errno, "listen");
+		goto out_err2;
+	}
+
+	close(fd2);
+	close(fd1);
+	return 0;
+
+out_err2:
+	close(fd2);
+
+out_err1:
+	close(fd1);
+	return -1;
+}
diff --git a/tools/testing/selftests/net/sk_connect_zero_addr.c b/tools/testing/selftests/net/sk_connect_zero_addr.c
new file mode 100644
index 000000000000..4be418aefd9f
--- /dev/null
+++ b/tools/testing/selftests/net/sk_connect_zero_addr.c
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <arpa/inet.h>
+#include <error.h>
+#include <errno.h>
+#include <unistd.h>
+
+int main(void)
+{
+	int fd1, fd2, one = 1;
+	struct sockaddr_in6 bind_addr = {
+		.sin6_family = AF_INET6,
+		.sin6_port = htons(20000),
+		.sin6_flowinfo = htonl(0),
+		.sin6_addr = {},
+		.sin6_scope_id = 0,
+	};
+
+	inet_pton(AF_INET6, "::", &bind_addr.sin6_addr);
+
+	fd1 = socket(AF_INET6, SOCK_STREAM, IPPROTO_IP);
+	if (fd1 < 0) {
+		error(1, errno, "socket fd1");
+		return -1;
+	}
+
+	if (setsockopt(fd1, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one))) {
+		error(1, errno, "setsockopt(SO_REUSEADDR) fd1");
+		goto out_err1;
+	}
+
+	if (bind(fd1, (struct sockaddr *)&bind_addr, sizeof(bind_addr))) {
+		error(1, errno, "bind fd1");
+		goto out_err1;
+	}
+
+	if (listen(fd1, 0)) {
+		error(1, errno, "listen");
+		goto out_err1;
+	}
+
+	fd2 = socket(AF_INET6, SOCK_STREAM, IPPROTO_IP);
+	if (fd2 < 0) {
+		error(1, errno, "socket fd2");
+		goto out_err1;
+	}
+
+	if (connect(fd2, (struct sockaddr *)&bind_addr, sizeof(bind_addr))) {
+		error(1, errno, "bind fd2");
+		goto out_err2;
+	}
+
+	close(fd2);
+	close(fd1);
+	return 0;
+
+out_err2:
+	close(fd2);
+out_err1:
+	close(fd1);
+	return -1;
+}
-- 
cgit v1.2.3


From d4ccaf58a8472123ac97e6db03932c375b5c45ba Mon Sep 17 00:00:00 2001
From: Hao Luo <haoluo@google.com>
Date: Wed, 24 Aug 2022 16:31:13 -0700
Subject: bpf: Introduce cgroup iter

Cgroup_iter is a type of bpf_iter. It walks over cgroups in four modes:

 - walking a cgroup's descendants in pre-order.
 - walking a cgroup's descendants in post-order.
 - walking a cgroup's ancestors.
 - process only the given cgroup.

When attaching cgroup_iter, one can set a cgroup to the iter_link
created from attaching. This cgroup is passed as a file descriptor
or cgroup id and serves as the starting point of the walk. If no
cgroup is specified, the starting point will be the root cgroup v2.

For walking descendants, one can specify the order: either pre-order or
post-order. For walking ancestors, the walk starts at the specified
cgroup and ends at the root.

One can also terminate the walk early by returning 1 from the iter
program.

Note that because walking cgroup hierarchy holds cgroup_mutex, the iter
program is called with cgroup_mutex held.

Currently only one session is supported, which means, depending on the
volume of data bpf program intends to send to user space, the number
of cgroups that can be walked is limited. For example, given the current
buffer size is 8 * PAGE_SIZE, if the program sends 64B data for each
cgroup, assuming PAGE_SIZE is 4kb, the total number of cgroups that can
be walked is 512. This is a limitation of cgroup_iter. If the output
data is larger than the kernel buffer size, after all data in the
kernel buffer is consumed by user space, the subsequent read() syscall
will signal EOPNOTSUPP. In order to work around, the user may have to
update their program to reduce the volume of data sent to output. For
example, skip some uninteresting cgroups. In future, we may extend
bpf_iter flags to allow customizing buffer size.

Acked-by: Yonghong Song <yhs@fb.com>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Hao Luo <haoluo@google.com>
Link: https://lore.kernel.org/r/20220824233117.1312810-2-haoluo@google.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h                               |   8 +
 include/uapi/linux/bpf.h                          |  30 +++
 kernel/bpf/Makefile                               |   3 +
 kernel/bpf/cgroup_iter.c                          | 284 ++++++++++++++++++++++
 tools/include/uapi/linux/bpf.h                    |  30 +++
 tools/testing/selftests/bpf/prog_tests/btf_dump.c |   4 +-
 6 files changed, 357 insertions(+), 2 deletions(-)
 create mode 100644 kernel/bpf/cgroup_iter.c

(limited to 'tools')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 99fc7a64564f..9c1674973e03 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -48,6 +48,7 @@ struct mem_cgroup;
 struct module;
 struct bpf_func_state;
 struct ftrace_ops;
+struct cgroup;
 
 extern struct idr btf_idr;
 extern spinlock_t btf_idr_lock;
@@ -1730,7 +1731,14 @@ int bpf_obj_get_user(const char __user *pathname, int flags);
 	int __init bpf_iter_ ## target(args) { return 0; }
 
 struct bpf_iter_aux_info {
+	/* for map_elem iter */
 	struct bpf_map *map;
+
+	/* for cgroup iter */
+	struct {
+		struct cgroup *start; /* starting cgroup */
+		enum bpf_cgroup_iter_order order;
+	} cgroup;
 };
 
 typedef int (*bpf_iter_attach_target_t)(struct bpf_prog *prog,
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 644600dbb114..0f61f09f467a 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -87,10 +87,29 @@ struct bpf_cgroup_storage_key {
 	__u32	attach_type;		/* program attach type (enum bpf_attach_type) */
 };
 
+enum bpf_cgroup_iter_order {
+	BPF_ITER_ORDER_UNSPEC = 0,
+	BPF_ITER_SELF_ONLY,		/* process only a single object. */
+	BPF_ITER_DESCENDANTS_PRE,	/* walk descendants in pre-order. */
+	BPF_ITER_DESCENDANTS_POST,	/* walk descendants in post-order. */
+	BPF_ITER_ANCESTORS_UP,		/* walk ancestors upward. */
+};
+
 union bpf_iter_link_info {
 	struct {
 		__u32	map_fd;
 	} map;
+	struct {
+		enum bpf_cgroup_iter_order order;
+
+		/* At most one of cgroup_fd and cgroup_id can be non-zero. If
+		 * both are zero, the walk starts from the default cgroup v2
+		 * root. For walking v1 hierarchy, one should always explicitly
+		 * specify cgroup_fd.
+		 */
+		__u32	cgroup_fd;
+		__u64	cgroup_id;
+	} cgroup;
 };
 
 /* BPF syscall commands, see bpf(2) man-page for more details. */
@@ -6176,11 +6195,22 @@ struct bpf_link_info {
 		struct {
 			__aligned_u64 target_name; /* in/out: target_name buffer ptr */
 			__u32 target_name_len;	   /* in/out: target_name buffer len */
+
+			/* If the iter specific field is 32 bits, it can be put
+			 * in the first or second union. Otherwise it should be
+			 * put in the second union.
+			 */
 			union {
 				struct {
 					__u32 map_id;
 				} map;
 			};
+			union {
+				struct {
+					__u64 cgroup_id;
+					__u32 order;
+				} cgroup;
+			};
 		} iter;
 		struct  {
 			__u32 netns_ino;
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index 057ba8e01e70..00e05b69a4df 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -24,6 +24,9 @@ endif
 ifeq ($(CONFIG_PERF_EVENTS),y)
 obj-$(CONFIG_BPF_SYSCALL) += stackmap.o
 endif
+ifeq ($(CONFIG_CGROUPS),y)
+obj-$(CONFIG_BPF_SYSCALL) += cgroup_iter.o
+endif
 obj-$(CONFIG_CGROUP_BPF) += cgroup.o
 ifeq ($(CONFIG_INET),y)
 obj-$(CONFIG_BPF_SYSCALL) += reuseport_array.o
diff --git a/kernel/bpf/cgroup_iter.c b/kernel/bpf/cgroup_iter.c
new file mode 100644
index 000000000000..cf6d763a57d5
--- /dev/null
+++ b/kernel/bpf/cgroup_iter.c
@@ -0,0 +1,284 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2022 Google */
+#include <linux/bpf.h>
+#include <linux/btf_ids.h>
+#include <linux/cgroup.h>
+#include <linux/kernel.h>
+#include <linux/seq_file.h>
+
+#include "../cgroup/cgroup-internal.h"  /* cgroup_mutex and cgroup_is_dead */
+
+/* cgroup_iter provides four modes of traversal to the cgroup hierarchy.
+ *
+ *  1. Walk the descendants of a cgroup in pre-order.
+ *  2. Walk the descendants of a cgroup in post-order.
+ *  3. Walk the ancestors of a cgroup.
+ *  4. Show the given cgroup only.
+ *
+ * For walking descendants, cgroup_iter can walk in either pre-order or
+ * post-order. For walking ancestors, the iter walks up from a cgroup to
+ * the root.
+ *
+ * The iter program can terminate the walk early by returning 1. Walk
+ * continues if prog returns 0.
+ *
+ * The prog can check (seq->num == 0) to determine whether this is
+ * the first element. The prog may also be passed a NULL cgroup,
+ * which means the walk has completed and the prog has a chance to
+ * do post-processing, such as outputting an epilogue.
+ *
+ * Note: the iter_prog is called with cgroup_mutex held.
+ *
+ * Currently only one session is supported, which means, depending on the
+ * volume of data bpf program intends to send to user space, the number
+ * of cgroups that can be walked is limited. For example, given the current
+ * buffer size is 8 * PAGE_SIZE, if the program sends 64B data for each
+ * cgroup, assuming PAGE_SIZE is 4kb, the total number of cgroups that can
+ * be walked is 512. This is a limitation of cgroup_iter. If the output data
+ * is larger than the kernel buffer size, after all data in the kernel buffer
+ * is consumed by user space, the subsequent read() syscall will signal
+ * EOPNOTSUPP. In order to work around, the user may have to update their
+ * program to reduce the volume of data sent to output. For example, skip
+ * some uninteresting cgroups.
+ */
+
+struct bpf_iter__cgroup {
+	__bpf_md_ptr(struct bpf_iter_meta *, meta);
+	__bpf_md_ptr(struct cgroup *, cgroup);
+};
+
+struct cgroup_iter_priv {
+	struct cgroup_subsys_state *start_css;
+	bool visited_all;
+	bool terminate;
+	int order;
+};
+
+static void *cgroup_iter_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	struct cgroup_iter_priv *p = seq->private;
+
+	mutex_lock(&cgroup_mutex);
+
+	/* cgroup_iter doesn't support read across multiple sessions. */
+	if (*pos > 0) {
+		if (p->visited_all)
+			return NULL;
+
+		/* Haven't visited all, but because cgroup_mutex has dropped,
+		 * return -EOPNOTSUPP to indicate incomplete iteration.
+		 */
+		return ERR_PTR(-EOPNOTSUPP);
+	}
+
+	++*pos;
+	p->terminate = false;
+	p->visited_all = false;
+	if (p->order == BPF_ITER_DESCENDANTS_PRE)
+		return css_next_descendant_pre(NULL, p->start_css);
+	else if (p->order == BPF_ITER_DESCENDANTS_POST)
+		return css_next_descendant_post(NULL, p->start_css);
+	else if (p->order == BPF_ITER_ANCESTORS_UP)
+		return p->start_css;
+	else /* BPF_ITER_SELF_ONLY */
+		return p->start_css;
+}
+
+static int __cgroup_iter_seq_show(struct seq_file *seq,
+				  struct cgroup_subsys_state *css, int in_stop);
+
+static void cgroup_iter_seq_stop(struct seq_file *seq, void *v)
+{
+	struct cgroup_iter_priv *p = seq->private;
+
+	mutex_unlock(&cgroup_mutex);
+
+	/* pass NULL to the prog for post-processing */
+	if (!v) {
+		__cgroup_iter_seq_show(seq, NULL, true);
+		p->visited_all = true;
+	}
+}
+
+static void *cgroup_iter_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct cgroup_subsys_state *curr = (struct cgroup_subsys_state *)v;
+	struct cgroup_iter_priv *p = seq->private;
+
+	++*pos;
+	if (p->terminate)
+		return NULL;
+
+	if (p->order == BPF_ITER_DESCENDANTS_PRE)
+		return css_next_descendant_pre(curr, p->start_css);
+	else if (p->order == BPF_ITER_DESCENDANTS_POST)
+		return css_next_descendant_post(curr, p->start_css);
+	else if (p->order == BPF_ITER_ANCESTORS_UP)
+		return curr->parent;
+	else  /* BPF_ITER_SELF_ONLY */
+		return NULL;
+}
+
+static int __cgroup_iter_seq_show(struct seq_file *seq,
+				  struct cgroup_subsys_state *css, int in_stop)
+{
+	struct cgroup_iter_priv *p = seq->private;
+	struct bpf_iter__cgroup ctx;
+	struct bpf_iter_meta meta;
+	struct bpf_prog *prog;
+	int ret = 0;
+
+	/* cgroup is dead, skip this element */
+	if (css && cgroup_is_dead(css->cgroup))
+		return 0;
+
+	ctx.meta = &meta;
+	ctx.cgroup = css ? css->cgroup : NULL;
+	meta.seq = seq;
+	prog = bpf_iter_get_info(&meta, in_stop);
+	if (prog)
+		ret = bpf_iter_run_prog(prog, &ctx);
+
+	/* if prog returns > 0, terminate after this element. */
+	if (ret != 0)
+		p->terminate = true;
+
+	return 0;
+}
+
+static int cgroup_iter_seq_show(struct seq_file *seq, void *v)
+{
+	return __cgroup_iter_seq_show(seq, (struct cgroup_subsys_state *)v,
+				      false);
+}
+
+static const struct seq_operations cgroup_iter_seq_ops = {
+	.start  = cgroup_iter_seq_start,
+	.next   = cgroup_iter_seq_next,
+	.stop   = cgroup_iter_seq_stop,
+	.show   = cgroup_iter_seq_show,
+};
+
+BTF_ID_LIST_SINGLE(bpf_cgroup_btf_id, struct, cgroup)
+
+static int cgroup_iter_seq_init(void *priv, struct bpf_iter_aux_info *aux)
+{
+	struct cgroup_iter_priv *p = (struct cgroup_iter_priv *)priv;
+	struct cgroup *cgrp = aux->cgroup.start;
+
+	p->start_css = &cgrp->self;
+	p->terminate = false;
+	p->visited_all = false;
+	p->order = aux->cgroup.order;
+	return 0;
+}
+
+static const struct bpf_iter_seq_info cgroup_iter_seq_info = {
+	.seq_ops		= &cgroup_iter_seq_ops,
+	.init_seq_private	= cgroup_iter_seq_init,
+	.seq_priv_size		= sizeof(struct cgroup_iter_priv),
+};
+
+static int bpf_iter_attach_cgroup(struct bpf_prog *prog,
+				  union bpf_iter_link_info *linfo,
+				  struct bpf_iter_aux_info *aux)
+{
+	int fd = linfo->cgroup.cgroup_fd;
+	u64 id = linfo->cgroup.cgroup_id;
+	int order = linfo->cgroup.order;
+	struct cgroup *cgrp;
+
+	if (order != BPF_ITER_DESCENDANTS_PRE &&
+	    order != BPF_ITER_DESCENDANTS_POST &&
+	    order != BPF_ITER_ANCESTORS_UP &&
+	    order != BPF_ITER_SELF_ONLY)
+		return -EINVAL;
+
+	if (fd && id)
+		return -EINVAL;
+
+	if (fd)
+		cgrp = cgroup_get_from_fd(fd);
+	else if (id)
+		cgrp = cgroup_get_from_id(id);
+	else /* walk the entire hierarchy by default. */
+		cgrp = cgroup_get_from_path("/");
+
+	if (IS_ERR(cgrp))
+		return PTR_ERR(cgrp);
+
+	aux->cgroup.start = cgrp;
+	aux->cgroup.order = order;
+	return 0;
+}
+
+static void bpf_iter_detach_cgroup(struct bpf_iter_aux_info *aux)
+{
+	cgroup_put(aux->cgroup.start);
+}
+
+static void bpf_iter_cgroup_show_fdinfo(const struct bpf_iter_aux_info *aux,
+					struct seq_file *seq)
+{
+	char *buf;
+
+	buf = kzalloc(PATH_MAX, GFP_KERNEL);
+	if (!buf) {
+		seq_puts(seq, "cgroup_path:\t<unknown>\n");
+		goto show_order;
+	}
+
+	/* If cgroup_path_ns() fails, buf will be an empty string, cgroup_path
+	 * will print nothing.
+	 *
+	 * Path is in the calling process's cgroup namespace.
+	 */
+	cgroup_path_ns(aux->cgroup.start, buf, PATH_MAX,
+		       current->nsproxy->cgroup_ns);
+	seq_printf(seq, "cgroup_path:\t%s\n", buf);
+	kfree(buf);
+
+show_order:
+	if (aux->cgroup.order == BPF_ITER_DESCENDANTS_PRE)
+		seq_puts(seq, "order: descendants_pre\n");
+	else if (aux->cgroup.order == BPF_ITER_DESCENDANTS_POST)
+		seq_puts(seq, "order: descendants_post\n");
+	else if (aux->cgroup.order == BPF_ITER_ANCESTORS_UP)
+		seq_puts(seq, "order: ancestors_up\n");
+	else /* BPF_ITER_SELF_ONLY */
+		seq_puts(seq, "order: self_only\n");
+}
+
+static int bpf_iter_cgroup_fill_link_info(const struct bpf_iter_aux_info *aux,
+					  struct bpf_link_info *info)
+{
+	info->iter.cgroup.order = aux->cgroup.order;
+	info->iter.cgroup.cgroup_id = cgroup_id(aux->cgroup.start);
+	return 0;
+}
+
+DEFINE_BPF_ITER_FUNC(cgroup, struct bpf_iter_meta *meta,
+		     struct cgroup *cgroup)
+
+static struct bpf_iter_reg bpf_cgroup_reg_info = {
+	.target			= "cgroup",
+	.feature		= BPF_ITER_RESCHED,
+	.attach_target		= bpf_iter_attach_cgroup,
+	.detach_target		= bpf_iter_detach_cgroup,
+	.show_fdinfo		= bpf_iter_cgroup_show_fdinfo,
+	.fill_link_info		= bpf_iter_cgroup_fill_link_info,
+	.ctx_arg_info_size	= 1,
+	.ctx_arg_info		= {
+		{ offsetof(struct bpf_iter__cgroup, cgroup),
+		  PTR_TO_BTF_ID_OR_NULL },
+	},
+	.seq_info		= &cgroup_iter_seq_info,
+};
+
+static int __init bpf_cgroup_iter_init(void)
+{
+	bpf_cgroup_reg_info.ctx_arg_info[0].btf_id = bpf_cgroup_btf_id[0];
+	return bpf_iter_reg_target(&bpf_cgroup_reg_info);
+}
+
+late_initcall(bpf_cgroup_iter_init);
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 4fb685591035..5056cef2112f 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -87,10 +87,29 @@ struct bpf_cgroup_storage_key {
 	__u32	attach_type;		/* program attach type (enum bpf_attach_type) */
 };
 
+enum bpf_cgroup_iter_order {
+	BPF_ITER_ORDER_UNSPEC = 0,
+	BPF_ITER_SELF_ONLY,		/* process only a single object. */
+	BPF_ITER_DESCENDANTS_PRE,	/* walk descendants in pre-order. */
+	BPF_ITER_DESCENDANTS_POST,	/* walk descendants in post-order. */
+	BPF_ITER_ANCESTORS_UP,		/* walk ancestors upward. */
+};
+
 union bpf_iter_link_info {
 	struct {
 		__u32	map_fd;
 	} map;
+	struct {
+		enum bpf_cgroup_iter_order order;
+
+		/* At most one of cgroup_fd and cgroup_id can be non-zero. If
+		 * both are zero, the walk starts from the default cgroup v2
+		 * root. For walking v1 hierarchy, one should always explicitly
+		 * specify cgroup_fd.
+		 */
+		__u32	cgroup_fd;
+		__u64	cgroup_id;
+	} cgroup;
 };
 
 /* BPF syscall commands, see bpf(2) man-page for more details. */
@@ -6176,11 +6195,22 @@ struct bpf_link_info {
 		struct {
 			__aligned_u64 target_name; /* in/out: target_name buffer ptr */
 			__u32 target_name_len;	   /* in/out: target_name buffer len */
+
+			/* If the iter specific field is 32 bits, it can be put
+			 * in the first or second union. Otherwise it should be
+			 * put in the second union.
+			 */
 			union {
 				struct {
 					__u32 map_id;
 				} map;
 			};
+			union {
+				struct {
+					__u64 cgroup_id;
+					__u32 order;
+				} cgroup;
+			};
 		} iter;
 		struct  {
 			__u32 netns_ino;
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
index 5fce7008d1ff..84c1cfaa2b02 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
@@ -764,8 +764,8 @@ static void test_btf_dump_struct_data(struct btf *btf, struct btf_dump *d,
 
 	/* union with nested struct */
 	TEST_BTF_DUMP_DATA(btf, d, "union", str, union bpf_iter_link_info, BTF_F_COMPACT,
-			   "(union bpf_iter_link_info){.map = (struct){.map_fd = (__u32)1,},}",
-			   { .map = { .map_fd = 1 }});
+			   "(union bpf_iter_link_info){.map = (struct){.map_fd = (__u32)1,},.cgroup = (struct){.order = (__u32)1,.cgroup_fd = (__u32)1,},}",
+			   { .cgroup = { .order = 1, .cgroup_fd = 1, }});
 
 	/* struct skb with nested structs/unions; because type output is so
 	 * complex, we don't do a string comparison, just verify we return
-- 
cgit v1.2.3


From fe0dd9d4b7402c9773fc7a453fa65875abaa24ec Mon Sep 17 00:00:00 2001
From: Hao Luo <haoluo@google.com>
Date: Wed, 24 Aug 2022 16:31:14 -0700
Subject: selftests/bpf: Test cgroup_iter.

Add a selftest for cgroup_iter. The selftest creates a mini cgroup tree
of the following structure:

    ROOT (working cgroup)
     |
   PARENT
  /      \
CHILD1  CHILD2

and tests the following scenarios:

 - invalid cgroup fd.
 - pre-order walk over descendants from PARENT.
 - post-order walk over descendants from PARENT.
 - walk of ancestors from PARENT.
 - process only a single object (i.e. PARENT).
 - early termination.

Acked-by: Yonghong Song <yhs@fb.com>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Hao Luo <haoluo@google.com>
Link: https://lore.kernel.org/r/20220824233117.1312810-3-haoluo@google.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/prog_tests/btf_dump.c  |   2 +-
 .../testing/selftests/bpf/prog_tests/cgroup_iter.c | 224 +++++++++++++++++++++
 tools/testing/selftests/bpf/progs/bpf_iter.h       |   7 +
 tools/testing/selftests/bpf/progs/cgroup_iter.c    |  39 ++++
 4 files changed, 271 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/cgroup_iter.c
 create mode 100644 tools/testing/selftests/bpf/progs/cgroup_iter.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
index 84c1cfaa2b02..a1bae92be1fc 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
@@ -764,7 +764,7 @@ static void test_btf_dump_struct_data(struct btf *btf, struct btf_dump *d,
 
 	/* union with nested struct */
 	TEST_BTF_DUMP_DATA(btf, d, "union", str, union bpf_iter_link_info, BTF_F_COMPACT,
-			   "(union bpf_iter_link_info){.map = (struct){.map_fd = (__u32)1,},.cgroup = (struct){.order = (__u32)1,.cgroup_fd = (__u32)1,},}",
+			   "(union bpf_iter_link_info){.map = (struct){.map_fd = (__u32)1,},.cgroup = (struct){.order = (enum bpf_cgroup_iter_order)BPF_ITER_SELF_ONLY,.cgroup_fd = (__u32)1,},}",
 			   { .cgroup = { .order = 1, .cgroup_fd = 1, }});
 
 	/* struct skb with nested structs/unions; because type output is so
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_iter.c b/tools/testing/selftests/bpf/prog_tests/cgroup_iter.c
new file mode 100644
index 000000000000..38958c37b9ce
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_iter.c
@@ -0,0 +1,224 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Google */
+
+#include <test_progs.h>
+#include <bpf/libbpf.h>
+#include <bpf/btf.h>
+#include "cgroup_iter.skel.h"
+#include "cgroup_helpers.h"
+
+#define ROOT           0
+#define PARENT         1
+#define CHILD1         2
+#define CHILD2         3
+#define NUM_CGROUPS    4
+
+#define PROLOGUE       "prologue\n"
+#define EPILOGUE       "epilogue\n"
+
+static const char *cg_path[] = {
+	"/", "/parent", "/parent/child1", "/parent/child2"
+};
+
+static int cg_fd[] = {-1, -1, -1, -1};
+static unsigned long long cg_id[] = {0, 0, 0, 0};
+static char expected_output[64];
+
+static int setup_cgroups(void)
+{
+	int fd, i = 0;
+
+	for (i = 0; i < NUM_CGROUPS; i++) {
+		fd = create_and_get_cgroup(cg_path[i]);
+		if (fd < 0)
+			return fd;
+
+		cg_fd[i] = fd;
+		cg_id[i] = get_cgroup_id(cg_path[i]);
+	}
+	return 0;
+}
+
+static void cleanup_cgroups(void)
+{
+	int i;
+
+	for (i = 0; i < NUM_CGROUPS; i++)
+		close(cg_fd[i]);
+}
+
+static void read_from_cgroup_iter(struct bpf_program *prog, int cgroup_fd,
+				  int order, const char *testname)
+{
+	DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+	union bpf_iter_link_info linfo;
+	struct bpf_link *link;
+	int len, iter_fd;
+	static char buf[128];
+	size_t left;
+	char *p;
+
+	memset(&linfo, 0, sizeof(linfo));
+	linfo.cgroup.cgroup_fd = cgroup_fd;
+	linfo.cgroup.order = order;
+	opts.link_info = &linfo;
+	opts.link_info_len = sizeof(linfo);
+
+	link = bpf_program__attach_iter(prog, &opts);
+	if (!ASSERT_OK_PTR(link, "attach_iter"))
+		return;
+
+	iter_fd = bpf_iter_create(bpf_link__fd(link));
+	if (iter_fd < 0)
+		goto free_link;
+
+	memset(buf, 0, sizeof(buf));
+	left = ARRAY_SIZE(buf);
+	p = buf;
+	while ((len = read(iter_fd, p, left)) > 0) {
+		p += len;
+		left -= len;
+	}
+
+	ASSERT_STREQ(buf, expected_output, testname);
+
+	/* read() after iter finishes should be ok. */
+	if (len == 0)
+		ASSERT_OK(read(iter_fd, buf, sizeof(buf)), "second_read");
+
+	close(iter_fd);
+free_link:
+	bpf_link__destroy(link);
+}
+
+/* Invalid cgroup. */
+static void test_invalid_cgroup(struct cgroup_iter *skel)
+{
+	DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+	union bpf_iter_link_info linfo;
+	struct bpf_link *link;
+
+	memset(&linfo, 0, sizeof(linfo));
+	linfo.cgroup.cgroup_fd = (__u32)-1;
+	opts.link_info = &linfo;
+	opts.link_info_len = sizeof(linfo);
+
+	link = bpf_program__attach_iter(skel->progs.cgroup_id_printer, &opts);
+	ASSERT_ERR_PTR(link, "attach_iter");
+	bpf_link__destroy(link);
+}
+
+/* Specifying both cgroup_fd and cgroup_id is invalid. */
+static void test_invalid_cgroup_spec(struct cgroup_iter *skel)
+{
+	DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+	union bpf_iter_link_info linfo;
+	struct bpf_link *link;
+
+	memset(&linfo, 0, sizeof(linfo));
+	linfo.cgroup.cgroup_fd = (__u32)cg_fd[PARENT];
+	linfo.cgroup.cgroup_id = (__u64)cg_id[PARENT];
+	opts.link_info = &linfo;
+	opts.link_info_len = sizeof(linfo);
+
+	link = bpf_program__attach_iter(skel->progs.cgroup_id_printer, &opts);
+	ASSERT_ERR_PTR(link, "attach_iter");
+	bpf_link__destroy(link);
+}
+
+/* Preorder walk prints parent and child in order. */
+static void test_walk_preorder(struct cgroup_iter *skel)
+{
+	snprintf(expected_output, sizeof(expected_output),
+		 PROLOGUE "%8llu\n%8llu\n%8llu\n" EPILOGUE,
+		 cg_id[PARENT], cg_id[CHILD1], cg_id[CHILD2]);
+
+	read_from_cgroup_iter(skel->progs.cgroup_id_printer, cg_fd[PARENT],
+			      BPF_ITER_DESCENDANTS_PRE, "preorder");
+}
+
+/* Postorder walk prints child and parent in order. */
+static void test_walk_postorder(struct cgroup_iter *skel)
+{
+	snprintf(expected_output, sizeof(expected_output),
+		 PROLOGUE "%8llu\n%8llu\n%8llu\n" EPILOGUE,
+		 cg_id[CHILD1], cg_id[CHILD2], cg_id[PARENT]);
+
+	read_from_cgroup_iter(skel->progs.cgroup_id_printer, cg_fd[PARENT],
+			      BPF_ITER_DESCENDANTS_POST, "postorder");
+}
+
+/* Walking parents prints parent and then root. */
+static void test_walk_ancestors_up(struct cgroup_iter *skel)
+{
+	/* terminate the walk when ROOT is met. */
+	skel->bss->terminal_cgroup = cg_id[ROOT];
+
+	snprintf(expected_output, sizeof(expected_output),
+		 PROLOGUE "%8llu\n%8llu\n" EPILOGUE,
+		 cg_id[PARENT], cg_id[ROOT]);
+
+	read_from_cgroup_iter(skel->progs.cgroup_id_printer, cg_fd[PARENT],
+			      BPF_ITER_ANCESTORS_UP, "ancestors_up");
+
+	skel->bss->terminal_cgroup = 0;
+}
+
+/* Early termination prints parent only. */
+static void test_early_termination(struct cgroup_iter *skel)
+{
+	/* terminate the walk after the first element is processed. */
+	skel->bss->terminate_early = 1;
+
+	snprintf(expected_output, sizeof(expected_output),
+		 PROLOGUE "%8llu\n" EPILOGUE, cg_id[PARENT]);
+
+	read_from_cgroup_iter(skel->progs.cgroup_id_printer, cg_fd[PARENT],
+			      BPF_ITER_DESCENDANTS_PRE, "early_termination");
+
+	skel->bss->terminate_early = 0;
+}
+
+/* Waling self prints self only. */
+static void test_walk_self_only(struct cgroup_iter *skel)
+{
+	snprintf(expected_output, sizeof(expected_output),
+		 PROLOGUE "%8llu\n" EPILOGUE, cg_id[PARENT]);
+
+	read_from_cgroup_iter(skel->progs.cgroup_id_printer, cg_fd[PARENT],
+			      BPF_ITER_SELF_ONLY, "self_only");
+}
+
+void test_cgroup_iter(void)
+{
+	struct cgroup_iter *skel = NULL;
+
+	if (setup_cgroup_environment())
+		return;
+
+	if (setup_cgroups())
+		goto out;
+
+	skel = cgroup_iter__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "cgroup_iter__open_and_load"))
+		goto out;
+
+	if (test__start_subtest("cgroup_iter__invalid_cgroup"))
+		test_invalid_cgroup(skel);
+	if (test__start_subtest("cgroup_iter__invalid_cgroup_spec"))
+		test_invalid_cgroup_spec(skel);
+	if (test__start_subtest("cgroup_iter__preorder"))
+		test_walk_preorder(skel);
+	if (test__start_subtest("cgroup_iter__postorder"))
+		test_walk_postorder(skel);
+	if (test__start_subtest("cgroup_iter__ancestors_up_walk"))
+		test_walk_ancestors_up(skel);
+	if (test__start_subtest("cgroup_iter__early_termination"))
+		test_early_termination(skel);
+	if (test__start_subtest("cgroup_iter__self_only"))
+		test_walk_self_only(skel);
+out:
+	cgroup_iter__destroy(skel);
+	cleanup_cgroups();
+	cleanup_cgroup_environment();
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter.h b/tools/testing/selftests/bpf/progs/bpf_iter.h
index e9846606690d..c41ee80533ca 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter.h
+++ b/tools/testing/selftests/bpf/progs/bpf_iter.h
@@ -17,6 +17,7 @@
 #define bpf_iter__bpf_sk_storage_map bpf_iter__bpf_sk_storage_map___not_used
 #define bpf_iter__sockmap bpf_iter__sockmap___not_used
 #define bpf_iter__bpf_link bpf_iter__bpf_link___not_used
+#define bpf_iter__cgroup bpf_iter__cgroup___not_used
 #define btf_ptr btf_ptr___not_used
 #define BTF_F_COMPACT BTF_F_COMPACT___not_used
 #define BTF_F_NONAME BTF_F_NONAME___not_used
@@ -40,6 +41,7 @@
 #undef bpf_iter__bpf_sk_storage_map
 #undef bpf_iter__sockmap
 #undef bpf_iter__bpf_link
+#undef bpf_iter__cgroup
 #undef btf_ptr
 #undef BTF_F_COMPACT
 #undef BTF_F_NONAME
@@ -141,6 +143,11 @@ struct bpf_iter__bpf_link {
 	struct bpf_link *link;
 };
 
+struct bpf_iter__cgroup {
+	struct bpf_iter_meta *meta;
+	struct cgroup *cgroup;
+} __attribute__((preserve_access_index));
+
 struct btf_ptr {
 	void *ptr;
 	__u32 type_id;
diff --git a/tools/testing/selftests/bpf/progs/cgroup_iter.c b/tools/testing/selftests/bpf/progs/cgroup_iter.c
new file mode 100644
index 000000000000..de03997322a7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgroup_iter.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Google */
+
+#include "bpf_iter.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+int terminate_early = 0;
+u64 terminal_cgroup = 0;
+
+static inline u64 cgroup_id(struct cgroup *cgrp)
+{
+	return cgrp->kn->id;
+}
+
+SEC("iter/cgroup")
+int cgroup_id_printer(struct bpf_iter__cgroup *ctx)
+{
+	struct seq_file *seq = ctx->meta->seq;
+	struct cgroup *cgrp = ctx->cgroup;
+
+	/* epilogue */
+	if (cgrp == NULL) {
+		BPF_SEQ_PRINTF(seq, "epilogue\n");
+		return 0;
+	}
+
+	/* prologue */
+	if (ctx->meta->seq_num == 0)
+		BPF_SEQ_PRINTF(seq, "prologue\n");
+
+	BPF_SEQ_PRINTF(seq, "%8llu\n", cgroup_id(cgrp));
+
+	if (terminal_cgroup == cgroup_id(cgrp))
+		return 1;
+
+	return terminate_early ? 1 : 0;
+}
-- 
cgit v1.2.3


From 434992bb603773c94465c7e68331e68424bdc9eb Mon Sep 17 00:00:00 2001
From: Yosry Ahmed <yosryahmed@google.com>
Date: Wed, 24 Aug 2022 16:31:16 -0700
Subject: selftests/bpf: extend cgroup helpers

This patch extends bpf selft cgroup_helpers [ID] n various ways:
- Add enable_controllers() that allows tests to enable all or a
  subset of controllers for a specific cgroup.
- Add join_cgroup_parent(). The cgroup workdir is based on the pid,
  therefore a spawned child cannot join the same cgroup hierarchy of the
  test through join_cgroup(). join_cgroup_parent() is used in child
  processes to join a cgroup under the parent's workdir.
- Add write_cgroup_file() and write_cgroup_file_parent() (similar to
  join_cgroup_parent() above).
- Add get_root_cgroup() for tests that need to do checks on root cgroup.
- Distinguish relative and absolute cgroup paths in function arguments.
  Now relative paths are called relative_path, and absolute paths are
  called cgroup_path.

Signed-off-by: Yosry Ahmed <yosryahmed@google.com>
Signed-off-by: Hao Luo <haoluo@google.com>
Link: https://lore.kernel.org/r/20220824233117.1312810-5-haoluo@google.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/cgroup_helpers.c | 202 +++++++++++++++++++++------
 tools/testing/selftests/bpf/cgroup_helpers.h |  19 ++-
 2 files changed, 174 insertions(+), 47 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c
index 9d59c3990ca8..e914cc45b766 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.c
+++ b/tools/testing/selftests/bpf/cgroup_helpers.c
@@ -33,49 +33,52 @@
 #define CGROUP_MOUNT_DFLT		"/sys/fs/cgroup"
 #define NETCLS_MOUNT_PATH		CGROUP_MOUNT_DFLT "/net_cls"
 #define CGROUP_WORK_DIR			"/cgroup-test-work-dir"
-#define format_cgroup_path(buf, path) \
+
+#define format_cgroup_path_pid(buf, path, pid) \
 	snprintf(buf, sizeof(buf), "%s%s%d%s", CGROUP_MOUNT_PATH, \
-	CGROUP_WORK_DIR, getpid(), path)
+	CGROUP_WORK_DIR, pid, path)
+
+#define format_cgroup_path(buf, path) \
+	format_cgroup_path_pid(buf, path, getpid())
+
+#define format_parent_cgroup_path(buf, path) \
+	format_cgroup_path_pid(buf, path, getppid())
 
 #define format_classid_path(buf)				\
 	snprintf(buf, sizeof(buf), "%s%s", NETCLS_MOUNT_PATH,	\
 		 CGROUP_WORK_DIR)
 
-/**
- * enable_all_controllers() - Enable all available cgroup v2 controllers
- *
- * Enable all available cgroup v2 controllers in order to increase
- * the code coverage.
- *
- * If successful, 0 is returned.
- */
-static int enable_all_controllers(char *cgroup_path)
+static int __enable_controllers(const char *cgroup_path, const char *controllers)
 {
 	char path[PATH_MAX + 1];
-	char buf[PATH_MAX];
+	char enable[PATH_MAX + 1];
 	char *c, *c2;
 	int fd, cfd;
 	ssize_t len;
 
-	snprintf(path, sizeof(path), "%s/cgroup.controllers", cgroup_path);
-	fd = open(path, O_RDONLY);
-	if (fd < 0) {
-		log_err("Opening cgroup.controllers: %s", path);
-		return 1;
-	}
-
-	len = read(fd, buf, sizeof(buf) - 1);
-	if (len < 0) {
+	/* If not controllers are passed, enable all available controllers */
+	if (!controllers) {
+		snprintf(path, sizeof(path), "%s/cgroup.controllers",
+			 cgroup_path);
+		fd = open(path, O_RDONLY);
+		if (fd < 0) {
+			log_err("Opening cgroup.controllers: %s", path);
+			return 1;
+		}
+		len = read(fd, enable, sizeof(enable) - 1);
+		if (len < 0) {
+			close(fd);
+			log_err("Reading cgroup.controllers: %s", path);
+			return 1;
+		} else if (len == 0) { /* No controllers to enable */
+			close(fd);
+			return 0;
+		}
+		enable[len] = 0;
 		close(fd);
-		log_err("Reading cgroup.controllers: %s", path);
-		return 1;
+	} else {
+		strncpy(enable, controllers, sizeof(enable));
 	}
-	buf[len] = 0;
-	close(fd);
-
-	/* No controllers available? We're probably on cgroup v1. */
-	if (len == 0)
-		return 0;
 
 	snprintf(path, sizeof(path), "%s/cgroup.subtree_control", cgroup_path);
 	cfd = open(path, O_RDWR);
@@ -84,7 +87,7 @@ static int enable_all_controllers(char *cgroup_path)
 		return 1;
 	}
 
-	for (c = strtok_r(buf, " ", &c2); c; c = strtok_r(NULL, " ", &c2)) {
+	for (c = strtok_r(enable, " ", &c2); c; c = strtok_r(NULL, " ", &c2)) {
 		if (dprintf(cfd, "+%s\n", c) <= 0) {
 			log_err("Enabling controller %s: %s", c, path);
 			close(cfd);
@@ -95,6 +98,87 @@ static int enable_all_controllers(char *cgroup_path)
 	return 0;
 }
 
+/**
+ * enable_controllers() - Enable cgroup v2 controllers
+ * @relative_path: The cgroup path, relative to the workdir
+ * @controllers: List of controllers to enable in cgroup.controllers format
+ *
+ *
+ * Enable given cgroup v2 controllers, if @controllers is NULL, enable all
+ * available controllers.
+ *
+ * If successful, 0 is returned.
+ */
+int enable_controllers(const char *relative_path, const char *controllers)
+{
+	char cgroup_path[PATH_MAX + 1];
+
+	format_cgroup_path(cgroup_path, relative_path);
+	return __enable_controllers(cgroup_path, controllers);
+}
+
+static int __write_cgroup_file(const char *cgroup_path, const char *file,
+			       const char *buf)
+{
+	char file_path[PATH_MAX + 1];
+	int fd;
+
+	snprintf(file_path, sizeof(file_path), "%s/%s", cgroup_path, file);
+	fd = open(file_path, O_RDWR);
+	if (fd < 0) {
+		log_err("Opening %s", file_path);
+		return 1;
+	}
+
+	if (dprintf(fd, "%s", buf) <= 0) {
+		log_err("Writing to %s", file_path);
+		close(fd);
+		return 1;
+	}
+	close(fd);
+	return 0;
+}
+
+/**
+ * write_cgroup_file() - Write to a cgroup file
+ * @relative_path: The cgroup path, relative to the workdir
+ * @file: The name of the file in cgroupfs to write to
+ * @buf: Buffer to write to the file
+ *
+ * Write to a file in the given cgroup's directory.
+ *
+ * If successful, 0 is returned.
+ */
+int write_cgroup_file(const char *relative_path, const char *file,
+		      const char *buf)
+{
+	char cgroup_path[PATH_MAX - 24];
+
+	format_cgroup_path(cgroup_path, relative_path);
+	return __write_cgroup_file(cgroup_path, file, buf);
+}
+
+/**
+ * write_cgroup_file_parent() - Write to a cgroup file in the parent process
+ *                              workdir
+ * @relative_path: The cgroup path, relative to the parent process workdir
+ * @file: The name of the file in cgroupfs to write to
+ * @buf: Buffer to write to the file
+ *
+ * Write to a file in the given cgroup's directory under the parent process
+ * workdir.
+ *
+ * If successful, 0 is returned.
+ */
+int write_cgroup_file_parent(const char *relative_path, const char *file,
+			     const char *buf)
+{
+	char cgroup_path[PATH_MAX - 24];
+
+	format_parent_cgroup_path(cgroup_path, relative_path);
+	return __write_cgroup_file(cgroup_path, file, buf);
+}
+
 /**
  * setup_cgroup_environment() - Setup the cgroup environment
  *
@@ -133,7 +217,9 @@ int setup_cgroup_environment(void)
 		return 1;
 	}
 
-	if (enable_all_controllers(cgroup_workdir))
+	/* Enable all available controllers to increase test coverage */
+	if (__enable_controllers(CGROUP_MOUNT_PATH, NULL) ||
+	    __enable_controllers(cgroup_workdir, NULL))
 		return 1;
 
 	return 0;
@@ -173,7 +259,7 @@ static int join_cgroup_from_top(const char *cgroup_path)
 
 /**
  * join_cgroup() - Join a cgroup
- * @path: The cgroup path, relative to the workdir, to join
+ * @relative_path: The cgroup path, relative to the workdir, to join
  *
  * This function expects a cgroup to already be created, relative to the cgroup
  * work dir, and it joins it. For example, passing "/my-cgroup" as the path
@@ -182,11 +268,27 @@ static int join_cgroup_from_top(const char *cgroup_path)
  *
  * On success, it returns 0, otherwise on failure it returns 1.
  */
-int join_cgroup(const char *path)
+int join_cgroup(const char *relative_path)
+{
+	char cgroup_path[PATH_MAX + 1];
+
+	format_cgroup_path(cgroup_path, relative_path);
+	return join_cgroup_from_top(cgroup_path);
+}
+
+/**
+ * join_parent_cgroup() - Join a cgroup in the parent process workdir
+ * @relative_path: The cgroup path, relative to parent process workdir, to join
+ *
+ * See join_cgroup().
+ *
+ * On success, it returns 0, otherwise on failure it returns 1.
+ */
+int join_parent_cgroup(const char *relative_path)
 {
 	char cgroup_path[PATH_MAX + 1];
 
-	format_cgroup_path(cgroup_path, path);
+	format_parent_cgroup_path(cgroup_path, relative_path);
 	return join_cgroup_from_top(cgroup_path);
 }
 
@@ -212,9 +314,27 @@ void cleanup_cgroup_environment(void)
 	nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT);
 }
 
+/**
+ * get_root_cgroup() - Get the FD of the root cgroup
+ *
+ * On success, it returns the file descriptor. On failure, it returns -1.
+ * If there is a failure, it prints the error to stderr.
+ */
+int get_root_cgroup(void)
+{
+	int fd;
+
+	fd = open(CGROUP_MOUNT_PATH, O_RDONLY);
+	if (fd < 0) {
+		log_err("Opening root cgroup");
+		return -1;
+	}
+	return fd;
+}
+
 /**
  * create_and_get_cgroup() - Create a cgroup, relative to workdir, and get the FD
- * @path: The cgroup path, relative to the workdir, to join
+ * @relative_path: The cgroup path, relative to the workdir, to join
  *
  * This function creates a cgroup under the top level workdir and returns the
  * file descriptor. It is idempotent.
@@ -222,14 +342,14 @@ void cleanup_cgroup_environment(void)
  * On success, it returns the file descriptor. On failure it returns -1.
  * If there is a failure, it prints the error to stderr.
  */
-int create_and_get_cgroup(const char *path)
+int create_and_get_cgroup(const char *relative_path)
 {
 	char cgroup_path[PATH_MAX + 1];
 	int fd;
 
-	format_cgroup_path(cgroup_path, path);
+	format_cgroup_path(cgroup_path, relative_path);
 	if (mkdir(cgroup_path, 0777) && errno != EEXIST) {
-		log_err("mkdiring cgroup %s .. %s", path, cgroup_path);
+		log_err("mkdiring cgroup %s .. %s", relative_path, cgroup_path);
 		return -1;
 	}
 
@@ -244,13 +364,13 @@ int create_and_get_cgroup(const char *path)
 
 /**
  * get_cgroup_id() - Get cgroup id for a particular cgroup path
- * @path: The cgroup path, relative to the workdir, to join
+ * @relative_path: The cgroup path, relative to the workdir, to join
  *
  * On success, it returns the cgroup id. On failure it returns 0,
  * which is an invalid cgroup id.
  * If there is a failure, it prints the error to stderr.
  */
-unsigned long long get_cgroup_id(const char *path)
+unsigned long long get_cgroup_id(const char *relative_path)
 {
 	int dirfd, err, flags, mount_id, fhsize;
 	union {
@@ -261,7 +381,7 @@ unsigned long long get_cgroup_id(const char *path)
 	struct file_handle *fhp, *fhp2;
 	unsigned long long ret = 0;
 
-	format_cgroup_path(cgroup_workdir, path);
+	format_cgroup_path(cgroup_workdir, relative_path);
 
 	dirfd = AT_FDCWD;
 	flags = 0;
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.h b/tools/testing/selftests/bpf/cgroup_helpers.h
index fcc9cb91b211..3358734356ab 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.h
+++ b/tools/testing/selftests/bpf/cgroup_helpers.h
@@ -10,11 +10,18 @@
 	__FILE__, __LINE__, clean_errno(), ##__VA_ARGS__)
 
 /* cgroupv2 related */
-int cgroup_setup_and_join(const char *path);
-int create_and_get_cgroup(const char *path);
-unsigned long long get_cgroup_id(const char *path);
-
-int join_cgroup(const char *path);
+int enable_controllers(const char *relative_path, const char *controllers);
+int write_cgroup_file(const char *relative_path, const char *file,
+		      const char *buf);
+int write_cgroup_file_parent(const char *relative_path, const char *file,
+			     const char *buf);
+int cgroup_setup_and_join(const char *relative_path);
+int get_root_cgroup(void);
+int create_and_get_cgroup(const char *relative_path);
+unsigned long long get_cgroup_id(const char *relative_path);
+
+int join_cgroup(const char *relative_path);
+int join_parent_cgroup(const char *relative_path);
 
 int setup_cgroup_environment(void);
 void cleanup_cgroup_environment(void);
@@ -26,4 +33,4 @@ int join_classid(void);
 int setup_classid_environment(void);
 void cleanup_classid_environment(void);
 
-#endif /* __CGROUP_HELPERS_H */
\ No newline at end of file
+#endif /* __CGROUP_HELPERS_H */
-- 
cgit v1.2.3


From 88886309d2e82afcaa86fc302c2ba25d9e47cbc8 Mon Sep 17 00:00:00 2001
From: Yosry Ahmed <yosryahmed@google.com>
Date: Wed, 24 Aug 2022 16:31:17 -0700
Subject: selftests/bpf: add a selftest for cgroup hierarchical stats
 collection

Add a selftest that tests the whole workflow for collecting,
aggregating (flushing), and displaying cgroup hierarchical stats.

TL;DR:
- Userspace program creates a cgroup hierarchy and induces memcg reclaim
  in parts of it.
- Whenever reclaim happens, vmscan_start and vmscan_end update
  per-cgroup percpu readings, and tell rstat which (cgroup, cpu) pairs
  have updates.
- When userspace tries to read the stats, vmscan_dump calls rstat to flush
  the stats, and outputs the stats in text format to userspace (similar
  to cgroupfs stats).
- rstat calls vmscan_flush once for every (cgroup, cpu) pair that has
  updates, vmscan_flush aggregates cpu readings and propagates updates
  to parents.
- Userspace program makes sure the stats are aggregated and read
  correctly.

Detailed explanation:
- The test loads tracing bpf programs, vmscan_start and vmscan_end, to
  measure the latency of cgroup reclaim. Per-cgroup readings are stored in
  percpu maps for efficiency. When a cgroup reading is updated on a cpu,
  cgroup_rstat_updated(cgroup, cpu) is called to add the cgroup to the
  rstat updated tree on that cpu.

- A cgroup_iter program, vmscan_dump, is loaded and pinned to a file, for
  each cgroup. Reading this file invokes the program, which calls
  cgroup_rstat_flush(cgroup) to ask rstat to propagate the updates for all
  cpus and cgroups that have updates in this cgroup's subtree. Afterwards,
  the stats are exposed to the user. vmscan_dump returns 1 to terminate
  iteration early, so that we only expose stats for one cgroup per read.

- An ftrace program, vmscan_flush, is also loaded and attached to
  bpf_rstat_flush. When rstat flushing is ongoing, vmscan_flush is invoked
  once for each (cgroup, cpu) pair that has updates. cgroups are popped
  from the rstat tree in a bottom-up fashion, so calls will always be
  made for cgroups that have updates before their parents. The program
  aggregates percpu readings to a total per-cgroup reading, and also
  propagates them to the parent cgroup. After rstat flushing is over, all
  cgroups will have correct updated hierarchical readings (including all
  cpus and all their descendants).

- Finally, the test creates a cgroup hierarchy and induces memcg reclaim
  in parts of it, and makes sure that the stats collection, aggregation,
  and reading workflow works as expected.

Signed-off-by: Yosry Ahmed <yosryahmed@google.com>
Signed-off-by: Hao Luo <haoluo@google.com>
Link: https://lore.kernel.org/r/20220824233117.1312810-6-haoluo@google.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/DENYLIST.s390x         |   1 +
 .../bpf/prog_tests/cgroup_hierarchical_stats.c     | 357 +++++++++++++++++++++
 .../bpf/progs/cgroup_hierarchical_stats.c          | 226 +++++++++++++
 3 files changed, 584 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/cgroup_hierarchical_stats.c
 create mode 100644 tools/testing/selftests/bpf/progs/cgroup_hierarchical_stats.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x
index 37bafcbf952a..736b65f61022 100644
--- a/tools/testing/selftests/bpf/DENYLIST.s390x
+++ b/tools/testing/selftests/bpf/DENYLIST.s390x
@@ -67,3 +67,4 @@ xdp_synproxy                             # JIT does not support calling kernel f
 unpriv_bpf_disabled                      # fentry
 setget_sockopt                           # attach unexpected error: -524                                               (trampoline)
 cb_refs                                  # expected error message unexpected error: -524                               (trampoline)
+cgroup_hierarchical_stats                # JIT does not support calling kernel function                                (kfunc)
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_hierarchical_stats.c b/tools/testing/selftests/bpf/prog_tests/cgroup_hierarchical_stats.c
new file mode 100644
index 000000000000..101a6d70b863
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_hierarchical_stats.c
@@ -0,0 +1,357 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Functions to manage eBPF programs attached to cgroup subsystems
+ *
+ * Copyright 2022 Google LLC.
+ */
+#include <asm-generic/errno.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include <test_progs.h>
+#include <bpf/libbpf.h>
+#include <bpf/bpf.h>
+
+#include "cgroup_helpers.h"
+#include "cgroup_hierarchical_stats.skel.h"
+
+#define PAGE_SIZE 4096
+#define MB(x) (x << 20)
+
+#define BPFFS_ROOT "/sys/fs/bpf/"
+#define BPFFS_VMSCAN BPFFS_ROOT"vmscan/"
+
+#define CG_ROOT_NAME "root"
+#define CG_ROOT_ID 1
+
+#define CGROUP_PATH(p, n) {.path = p"/"n, .name = n}
+
+static struct {
+	const char *path, *name;
+	unsigned long long id;
+	int fd;
+} cgroups[] = {
+	CGROUP_PATH("/", "test"),
+	CGROUP_PATH("/test", "child1"),
+	CGROUP_PATH("/test", "child2"),
+	CGROUP_PATH("/test/child1", "child1_1"),
+	CGROUP_PATH("/test/child1", "child1_2"),
+	CGROUP_PATH("/test/child2", "child2_1"),
+	CGROUP_PATH("/test/child2", "child2_2"),
+};
+
+#define N_CGROUPS ARRAY_SIZE(cgroups)
+#define N_NON_LEAF_CGROUPS 3
+
+static int root_cgroup_fd;
+static bool mounted_bpffs;
+
+/* reads file at 'path' to 'buf', returns 0 on success. */
+static int read_from_file(const char *path, char *buf, size_t size)
+{
+	int fd, len;
+
+	fd = open(path, O_RDONLY);
+	if (fd < 0)
+		return fd;
+
+	len = read(fd, buf, size);
+	close(fd);
+	if (len < 0)
+		return len;
+
+	buf[len] = 0;
+	return 0;
+}
+
+/* mounts bpffs and mkdir for reading stats, returns 0 on success. */
+static int setup_bpffs(void)
+{
+	int err;
+
+	/* Mount bpffs */
+	err = mount("bpf", BPFFS_ROOT, "bpf", 0, NULL);
+	mounted_bpffs = !err;
+	if (ASSERT_FALSE(err && errno != EBUSY, "mount"))
+		return err;
+
+	/* Create a directory to contain stat files in bpffs */
+	err = mkdir(BPFFS_VMSCAN, 0755);
+	if (!ASSERT_OK(err, "mkdir"))
+		return err;
+
+	return 0;
+}
+
+static void cleanup_bpffs(void)
+{
+	/* Remove created directory in bpffs */
+	ASSERT_OK(rmdir(BPFFS_VMSCAN), "rmdir "BPFFS_VMSCAN);
+
+	/* Unmount bpffs, if it wasn't already mounted when we started */
+	if (mounted_bpffs)
+		return;
+
+	ASSERT_OK(umount(BPFFS_ROOT), "unmount bpffs");
+}
+
+/* sets up cgroups, returns 0 on success. */
+static int setup_cgroups(void)
+{
+	int i, fd, err;
+
+	err = setup_cgroup_environment();
+	if (!ASSERT_OK(err, "setup_cgroup_environment"))
+		return err;
+
+	root_cgroup_fd = get_root_cgroup();
+	if (!ASSERT_GE(root_cgroup_fd, 0, "get_root_cgroup"))
+		return root_cgroup_fd;
+
+	for (i = 0; i < N_CGROUPS; i++) {
+		fd = create_and_get_cgroup(cgroups[i].path);
+		if (!ASSERT_GE(fd, 0, "create_and_get_cgroup"))
+			return fd;
+
+		cgroups[i].fd = fd;
+		cgroups[i].id = get_cgroup_id(cgroups[i].path);
+
+		/*
+		 * Enable memcg controller for the entire hierarchy.
+		 * Note that stats are collected for all cgroups in a hierarchy
+		 * with memcg enabled anyway, but are only exposed for cgroups
+		 * that have memcg enabled.
+		 */
+		if (i < N_NON_LEAF_CGROUPS) {
+			err = enable_controllers(cgroups[i].path, "memory");
+			if (!ASSERT_OK(err, "enable_controllers"))
+				return err;
+		}
+	}
+	return 0;
+}
+
+static void cleanup_cgroups(void)
+{
+	close(root_cgroup_fd);
+	for (int i = 0; i < N_CGROUPS; i++)
+		close(cgroups[i].fd);
+	cleanup_cgroup_environment();
+}
+
+/* Sets up cgroup hiearchary, returns 0 on success. */
+static int setup_hierarchy(void)
+{
+	return setup_bpffs() || setup_cgroups();
+}
+
+static void destroy_hierarchy(void)
+{
+	cleanup_cgroups();
+	cleanup_bpffs();
+}
+
+static int reclaimer(const char *cgroup_path, size_t size)
+{
+	static char size_buf[128];
+	char *buf, *ptr;
+	int err;
+
+	/* Join cgroup in the parent process workdir */
+	if (join_parent_cgroup(cgroup_path))
+		return EACCES;
+
+	/* Allocate memory */
+	buf = malloc(size);
+	if (!buf)
+		return ENOMEM;
+
+	/* Write to memory to make sure it's actually allocated */
+	for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
+		*ptr = 1;
+
+	/* Try to reclaim memory */
+	snprintf(size_buf, 128, "%lu", size);
+	err = write_cgroup_file_parent(cgroup_path, "memory.reclaim", size_buf);
+
+	free(buf);
+	/* memory.reclaim returns EAGAIN if the amount is not fully reclaimed */
+	if (err && errno != EAGAIN)
+		return errno;
+
+	return 0;
+}
+
+static int induce_vmscan(void)
+{
+	int i, status;
+
+	/*
+	 * In every leaf cgroup, run a child process that allocates some memory
+	 * and attempts to reclaim some of it.
+	 */
+	for (i = N_NON_LEAF_CGROUPS; i < N_CGROUPS; i++) {
+		pid_t pid;
+
+		/* Create reclaimer child */
+		pid = fork();
+		if (pid == 0) {
+			status = reclaimer(cgroups[i].path, MB(5));
+			exit(status);
+		}
+
+		/* Cleanup reclaimer child */
+		waitpid(pid, &status, 0);
+		ASSERT_TRUE(WIFEXITED(status), "reclaimer exited");
+		ASSERT_EQ(WEXITSTATUS(status), 0, "reclaim exit code");
+	}
+	return 0;
+}
+
+static unsigned long long
+get_cgroup_vmscan_delay(unsigned long long cgroup_id, const char *file_name)
+{
+	unsigned long long vmscan = 0, id = 0;
+	static char buf[128], path[128];
+
+	/* For every cgroup, read the file generated by cgroup_iter */
+	snprintf(path, 128, "%s%s", BPFFS_VMSCAN, file_name);
+	if (!ASSERT_OK(read_from_file(path, buf, 128), "read cgroup_iter"))
+		return 0;
+
+	/* Check the output file formatting */
+	ASSERT_EQ(sscanf(buf, "cg_id: %llu, total_vmscan_delay: %llu\n",
+			 &id, &vmscan), 2, "output format");
+
+	/* Check that the cgroup_id is displayed correctly */
+	ASSERT_EQ(id, cgroup_id, "cgroup_id");
+	/* Check that the vmscan reading is non-zero */
+	ASSERT_GT(vmscan, 0, "vmscan_reading");
+	return vmscan;
+}
+
+static void check_vmscan_stats(void)
+{
+	unsigned long long vmscan_readings[N_CGROUPS], vmscan_root;
+	int i;
+
+	for (i = 0; i < N_CGROUPS; i++) {
+		vmscan_readings[i] = get_cgroup_vmscan_delay(cgroups[i].id,
+							     cgroups[i].name);
+	}
+
+	/* Read stats for root too */
+	vmscan_root = get_cgroup_vmscan_delay(CG_ROOT_ID, CG_ROOT_NAME);
+
+	/* Check that child1 == child1_1 + child1_2 */
+	ASSERT_EQ(vmscan_readings[1], vmscan_readings[3] + vmscan_readings[4],
+		  "child1_vmscan");
+	/* Check that child2 == child2_1 + child2_2 */
+	ASSERT_EQ(vmscan_readings[2], vmscan_readings[5] + vmscan_readings[6],
+		  "child2_vmscan");
+	/* Check that test == child1 + child2 */
+	ASSERT_EQ(vmscan_readings[0], vmscan_readings[1] + vmscan_readings[2],
+		  "test_vmscan");
+	/* Check that root >= test */
+	ASSERT_GE(vmscan_root, vmscan_readings[1], "root_vmscan");
+}
+
+/* Creates iter link and pins in bpffs, returns 0 on success, -errno on failure.
+ */
+static int setup_cgroup_iter(struct cgroup_hierarchical_stats *obj,
+			     int cgroup_fd, const char *file_name)
+{
+	DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+	union bpf_iter_link_info linfo = {};
+	struct bpf_link *link;
+	static char path[128];
+	int err;
+
+	/*
+	 * Create an iter link, parameterized by cgroup_fd. We only want to
+	 * traverse one cgroup, so set the traversal order to "self".
+	 */
+	linfo.cgroup.cgroup_fd = cgroup_fd;
+	linfo.cgroup.order = BPF_ITER_SELF_ONLY;
+	opts.link_info = &linfo;
+	opts.link_info_len = sizeof(linfo);
+	link = bpf_program__attach_iter(obj->progs.dump_vmscan, &opts);
+	if (!ASSERT_OK_PTR(link, "attach_iter"))
+		return -EFAULT;
+
+	/* Pin the link to a bpffs file */
+	snprintf(path, 128, "%s%s", BPFFS_VMSCAN, file_name);
+	err = bpf_link__pin(link, path);
+	ASSERT_OK(err, "pin cgroup_iter");
+
+	/* Remove the link, leaving only the ref held by the pinned file */
+	bpf_link__destroy(link);
+	return err;
+}
+
+/* Sets up programs for collecting stats, returns 0 on success. */
+static int setup_progs(struct cgroup_hierarchical_stats **skel)
+{
+	int i, err;
+
+	*skel = cgroup_hierarchical_stats__open_and_load();
+	if (!ASSERT_OK_PTR(*skel, "open_and_load"))
+		return 1;
+
+	/* Attach cgroup_iter program that will dump the stats to cgroups */
+	for (i = 0; i < N_CGROUPS; i++) {
+		err = setup_cgroup_iter(*skel, cgroups[i].fd, cgroups[i].name);
+		if (!ASSERT_OK(err, "setup_cgroup_iter"))
+			return err;
+	}
+
+	/* Also dump stats for root */
+	err = setup_cgroup_iter(*skel, root_cgroup_fd, CG_ROOT_NAME);
+	if (!ASSERT_OK(err, "setup_cgroup_iter"))
+		return err;
+
+	bpf_program__set_autoattach((*skel)->progs.dump_vmscan, false);
+	err = cgroup_hierarchical_stats__attach(*skel);
+	if (!ASSERT_OK(err, "attach"))
+		return err;
+
+	return 0;
+}
+
+static void destroy_progs(struct cgroup_hierarchical_stats *skel)
+{
+	static char path[128];
+	int i;
+
+	for (i = 0; i < N_CGROUPS; i++) {
+		/* Delete files in bpffs that cgroup_iters are pinned in */
+		snprintf(path, 128, "%s%s", BPFFS_VMSCAN,
+			 cgroups[i].name);
+		ASSERT_OK(remove(path), "remove cgroup_iter pin");
+	}
+
+	/* Delete root file in bpffs */
+	snprintf(path, 128, "%s%s", BPFFS_VMSCAN, CG_ROOT_NAME);
+	ASSERT_OK(remove(path), "remove cgroup_iter root pin");
+	cgroup_hierarchical_stats__destroy(skel);
+}
+
+void test_cgroup_hierarchical_stats(void)
+{
+	struct cgroup_hierarchical_stats *skel = NULL;
+
+	if (setup_hierarchy())
+		goto hierarchy_cleanup;
+	if (setup_progs(&skel))
+		goto cleanup;
+	if (induce_vmscan())
+		goto cleanup;
+	check_vmscan_stats();
+cleanup:
+	destroy_progs(skel);
+hierarchy_cleanup:
+	destroy_hierarchy();
+}
diff --git a/tools/testing/selftests/bpf/progs/cgroup_hierarchical_stats.c b/tools/testing/selftests/bpf/progs/cgroup_hierarchical_stats.c
new file mode 100644
index 000000000000..8ab4253a1592
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgroup_hierarchical_stats.c
@@ -0,0 +1,226 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Functions to manage eBPF programs attached to cgroup subsystems
+ *
+ * Copyright 2022 Google LLC.
+ */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+char _license[] SEC("license") = "GPL";
+
+/*
+ * Start times are stored per-task, not per-cgroup, as multiple tasks in one
+ * cgroup can perform reclaim concurrently.
+ */
+struct {
+	__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, int);
+	__type(value, __u64);
+} vmscan_start_time SEC(".maps");
+
+struct vmscan_percpu {
+	/* Previous percpu state, to figure out if we have new updates */
+	__u64 prev;
+	/* Current percpu state */
+	__u64 state;
+};
+
+struct vmscan {
+	/* State propagated through children, pending aggregation */
+	__u64 pending;
+	/* Total state, including all cpus and all children */
+	__u64 state;
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+	__uint(max_entries, 100);
+	__type(key, __u64);
+	__type(value, struct vmscan_percpu);
+} pcpu_cgroup_vmscan_elapsed SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, 100);
+	__type(key, __u64);
+	__type(value, struct vmscan);
+} cgroup_vmscan_elapsed SEC(".maps");
+
+extern void cgroup_rstat_updated(struct cgroup *cgrp, int cpu) __ksym;
+extern void cgroup_rstat_flush(struct cgroup *cgrp) __ksym;
+
+static struct cgroup *task_memcg(struct task_struct *task)
+{
+	int cgrp_id;
+
+#if __has_builtin(__builtin_preserve_enum_value)
+	cgrp_id = bpf_core_enum_value(enum cgroup_subsys_id, memory_cgrp_id);
+#else
+	cgrp_id = memory_cgrp_id;
+#endif
+	return task->cgroups->subsys[cgrp_id]->cgroup;
+}
+
+static uint64_t cgroup_id(struct cgroup *cgrp)
+{
+	return cgrp->kn->id;
+}
+
+static int create_vmscan_percpu_elem(__u64 cg_id, __u64 state)
+{
+	struct vmscan_percpu pcpu_init = {.state = state, .prev = 0};
+
+	return bpf_map_update_elem(&pcpu_cgroup_vmscan_elapsed, &cg_id,
+				   &pcpu_init, BPF_NOEXIST);
+}
+
+static int create_vmscan_elem(__u64 cg_id, __u64 state, __u64 pending)
+{
+	struct vmscan init = {.state = state, .pending = pending};
+
+	return bpf_map_update_elem(&cgroup_vmscan_elapsed, &cg_id,
+				   &init, BPF_NOEXIST);
+}
+
+SEC("tp_btf/mm_vmscan_memcg_reclaim_begin")
+int BPF_PROG(vmscan_start, int order, gfp_t gfp_flags)
+{
+	struct task_struct *task = bpf_get_current_task_btf();
+	__u64 *start_time_ptr;
+
+	start_time_ptr = bpf_task_storage_get(&vmscan_start_time, task, 0,
+					      BPF_LOCAL_STORAGE_GET_F_CREATE);
+	if (start_time_ptr)
+		*start_time_ptr = bpf_ktime_get_ns();
+	return 0;
+}
+
+SEC("tp_btf/mm_vmscan_memcg_reclaim_end")
+int BPF_PROG(vmscan_end, unsigned long nr_reclaimed)
+{
+	struct vmscan_percpu *pcpu_stat;
+	struct task_struct *current = bpf_get_current_task_btf();
+	struct cgroup *cgrp;
+	__u64 *start_time_ptr;
+	__u64 current_elapsed, cg_id;
+	__u64 end_time = bpf_ktime_get_ns();
+
+	/*
+	 * cgrp is the first parent cgroup of current that has memcg enabled in
+	 * its subtree_control, or NULL if memcg is disabled in the entire tree.
+	 * In a cgroup hierarchy like this:
+	 *                               a
+	 *                              / \
+	 *                             b   c
+	 *  If "a" has memcg enabled, while "b" doesn't, then processes in "b"
+	 *  will accumulate their stats directly to "a". This makes sure that no
+	 *  stats are lost from processes in leaf cgroups that don't have memcg
+	 *  enabled, but only exposes stats for cgroups that have memcg enabled.
+	 */
+	cgrp = task_memcg(current);
+	if (!cgrp)
+		return 0;
+
+	cg_id = cgroup_id(cgrp);
+	start_time_ptr = bpf_task_storage_get(&vmscan_start_time, current, 0,
+					      BPF_LOCAL_STORAGE_GET_F_CREATE);
+	if (!start_time_ptr)
+		return 0;
+
+	current_elapsed = end_time - *start_time_ptr;
+	pcpu_stat = bpf_map_lookup_elem(&pcpu_cgroup_vmscan_elapsed,
+					&cg_id);
+	if (pcpu_stat)
+		pcpu_stat->state += current_elapsed;
+	else if (create_vmscan_percpu_elem(cg_id, current_elapsed))
+		return 0;
+
+	cgroup_rstat_updated(cgrp, bpf_get_smp_processor_id());
+	return 0;
+}
+
+SEC("fentry/bpf_rstat_flush")
+int BPF_PROG(vmscan_flush, struct cgroup *cgrp, struct cgroup *parent, int cpu)
+{
+	struct vmscan_percpu *pcpu_stat;
+	struct vmscan *total_stat, *parent_stat;
+	__u64 cg_id = cgroup_id(cgrp);
+	__u64 parent_cg_id = parent ? cgroup_id(parent) : 0;
+	__u64 *pcpu_vmscan;
+	__u64 state;
+	__u64 delta = 0;
+
+	/* Add CPU changes on this level since the last flush */
+	pcpu_stat = bpf_map_lookup_percpu_elem(&pcpu_cgroup_vmscan_elapsed,
+					       &cg_id, cpu);
+	if (pcpu_stat) {
+		state = pcpu_stat->state;
+		delta += state - pcpu_stat->prev;
+		pcpu_stat->prev = state;
+	}
+
+	total_stat = bpf_map_lookup_elem(&cgroup_vmscan_elapsed, &cg_id);
+	if (!total_stat) {
+		if (create_vmscan_elem(cg_id, delta, 0))
+			return 0;
+
+		goto update_parent;
+	}
+
+	/* Collect pending stats from subtree */
+	if (total_stat->pending) {
+		delta += total_stat->pending;
+		total_stat->pending = 0;
+	}
+
+	/* Propagate changes to this cgroup's total */
+	total_stat->state += delta;
+
+update_parent:
+	/* Skip if there are no changes to propagate, or no parent */
+	if (!delta || !parent_cg_id)
+		return 0;
+
+	/* Propagate changes to cgroup's parent */
+	parent_stat = bpf_map_lookup_elem(&cgroup_vmscan_elapsed,
+					  &parent_cg_id);
+	if (parent_stat)
+		parent_stat->pending += delta;
+	else
+		create_vmscan_elem(parent_cg_id, 0, delta);
+	return 0;
+}
+
+SEC("iter.s/cgroup")
+int BPF_PROG(dump_vmscan, struct bpf_iter_meta *meta, struct cgroup *cgrp)
+{
+	struct seq_file *seq = meta->seq;
+	struct vmscan *total_stat;
+	__u64 cg_id = cgrp ? cgroup_id(cgrp) : 0;
+
+	/* Do nothing for the terminal call */
+	if (!cg_id)
+		return 1;
+
+	/* Flush the stats to make sure we get the most updated numbers */
+	cgroup_rstat_flush(cgrp);
+
+	total_stat = bpf_map_lookup_elem(&cgroup_vmscan_elapsed, &cg_id);
+	if (!total_stat) {
+		BPF_SEQ_PRINTF(seq, "cg_id: %llu, total_vmscan_delay: 0\n",
+			       cg_id);
+	} else {
+		BPF_SEQ_PRINTF(seq, "cg_id: %llu, total_vmscan_delay: %llu\n",
+			       cg_id, total_stat->state);
+	}
+
+	/*
+	 * We only dump stats for one cgroup here, so return 1 to stop
+	 * iteration after the first cgroup.
+	 */
+	return 1;
+}
-- 
cgit v1.2.3


From 7184aef9c0f7a81db8fd18d183ee42481d89bf35 Mon Sep 17 00:00:00 2001
From: Lam Thai <lamthai@arista.com>
Date: Wed, 24 Aug 2022 15:59:00 -0700
Subject: bpftool: Fix a wrong type cast in btf_dumper_int

When `data` points to a boolean value, casting it to `int *` is problematic
and could lead to a wrong value being passed to `jsonw_bool`. Change the
cast to `bool *` instead.

Fixes: b12d6ec09730 ("bpf: btf: add btf print functionality")
Signed-off-by: Lam Thai <lamthai@arista.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Reviewed-by: Quentin Monnet <quentin@isovalent.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20220824225859.9038-1-lamthai@arista.com
---
 tools/bpf/bpftool/btf_dumper.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c
index 125798b0bc5d..19924b6ce796 100644
--- a/tools/bpf/bpftool/btf_dumper.c
+++ b/tools/bpf/bpftool/btf_dumper.c
@@ -452,7 +452,7 @@ static int btf_dumper_int(const struct btf_type *t, __u8 bit_offset,
 					     *(char *)data);
 		break;
 	case BTF_INT_BOOL:
-		jsonw_bool(jw, *(int *)data);
+		jsonw_bool(jw, *(bool *)data);
 		break;
 	default:
 		/* shouldn't happen */
-- 
cgit v1.2.3


From 88e500affe72fb704c4f201974b5199ca6f51e6c Mon Sep 17 00:00:00 2001
From: Adel Abouchaev <adel.abushaev@gmail.com>
Date: Wed, 24 Aug 2022 11:43:51 -0700
Subject: selftests/net: fix reinitialization of TEST_PROGS in net self tests.

Assinging will drop all previous tests.

Fixes: b690842d12fd ("selftests/net: test l2 tunnel TOS/TTL inheriting")
Signed-off-by: Adel Abouchaev <adel.abushaev@gmail.com>
Reviewed-by: Shuah Khan <skhan@linuxfoundation.org>
Link: https://lore.kernel.org/r/20220824184351.3759862-1-adel.abushaev@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index e6a951ba5ba0..f5ac1433c301 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -42,7 +42,7 @@ TEST_PROGS += arp_ndisc_evict_nocarrier.sh
 TEST_PROGS += ndisc_unsolicited_na_test.sh
 TEST_PROGS += arp_ndisc_untracked_subnets.sh
 TEST_PROGS += stress_reuseport_listen.sh
-TEST_PROGS := l2_tos_ttl_inherit.sh
+TEST_PROGS += l2_tos_ttl_inherit.sh
 TEST_PROGS += bind_bhash.sh
 TEST_PROGS_EXTENDED := in_netns.sh setup_loopback.sh setup_veth.sh
 TEST_PROGS_EXTENDED += toeplitz_client.sh toeplitz.sh
-- 
cgit v1.2.3


From d4ffb6f39f1a1b260966b43a4ffdb64779c650dd Mon Sep 17 00:00:00 2001
From: Hao Luo <haoluo@google.com>
Date: Thu, 25 Aug 2022 15:39:36 -0700
Subject: bpf: Add CGROUP prefix to cgroup_iter_order

bpf_cgroup_iter_order is globally visible but the entries do not have
CGROUP prefix. As requested by Andrii, put a CGROUP in the names
in bpf_cgroup_iter_order.

This patch fixes two previous commits: one introduced the API and
the other uses the API in bpf selftest (that is, the selftest
cgroup_hierarchical_stats).

I tested this patch via the following command:

  test_progs -t cgroup,iter,btf_dump

Fixes: d4ccaf58a847 ("bpf: Introduce cgroup iter")
Fixes: 88886309d2e8 ("selftests/bpf: add a selftest for cgroup hierarchical stats collection")
Suggested-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Hao Luo <haoluo@google.com>
Link: https://lore.kernel.org/r/20220825223936.1865810-1-haoluo@google.com
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
---
 include/uapi/linux/bpf.h                           | 10 +++----
 kernel/bpf/cgroup_iter.c                           | 32 +++++++++++-----------
 tools/include/uapi/linux/bpf.h                     | 10 +++----
 tools/testing/selftests/bpf/prog_tests/btf_dump.c  |  2 +-
 .../bpf/prog_tests/cgroup_hierarchical_stats.c     |  2 +-
 .../testing/selftests/bpf/prog_tests/cgroup_iter.c | 10 +++----
 6 files changed, 33 insertions(+), 33 deletions(-)

(limited to 'tools')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 0f61f09f467a..bdf4bc6d8d6b 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -88,11 +88,11 @@ struct bpf_cgroup_storage_key {
 };
 
 enum bpf_cgroup_iter_order {
-	BPF_ITER_ORDER_UNSPEC = 0,
-	BPF_ITER_SELF_ONLY,		/* process only a single object. */
-	BPF_ITER_DESCENDANTS_PRE,	/* walk descendants in pre-order. */
-	BPF_ITER_DESCENDANTS_POST,	/* walk descendants in post-order. */
-	BPF_ITER_ANCESTORS_UP,		/* walk ancestors upward. */
+	BPF_CGROUP_ITER_ORDER_UNSPEC = 0,
+	BPF_CGROUP_ITER_SELF_ONLY,		/* process only a single object. */
+	BPF_CGROUP_ITER_DESCENDANTS_PRE,	/* walk descendants in pre-order. */
+	BPF_CGROUP_ITER_DESCENDANTS_POST,	/* walk descendants in post-order. */
+	BPF_CGROUP_ITER_ANCESTORS_UP,		/* walk ancestors upward. */
 };
 
 union bpf_iter_link_info {
diff --git a/kernel/bpf/cgroup_iter.c b/kernel/bpf/cgroup_iter.c
index cf6d763a57d5..c69bce2f4403 100644
--- a/kernel/bpf/cgroup_iter.c
+++ b/kernel/bpf/cgroup_iter.c
@@ -74,13 +74,13 @@ static void *cgroup_iter_seq_start(struct seq_file *seq, loff_t *pos)
 	++*pos;
 	p->terminate = false;
 	p->visited_all = false;
-	if (p->order == BPF_ITER_DESCENDANTS_PRE)
+	if (p->order == BPF_CGROUP_ITER_DESCENDANTS_PRE)
 		return css_next_descendant_pre(NULL, p->start_css);
-	else if (p->order == BPF_ITER_DESCENDANTS_POST)
+	else if (p->order == BPF_CGROUP_ITER_DESCENDANTS_POST)
 		return css_next_descendant_post(NULL, p->start_css);
-	else if (p->order == BPF_ITER_ANCESTORS_UP)
+	else if (p->order == BPF_CGROUP_ITER_ANCESTORS_UP)
 		return p->start_css;
-	else /* BPF_ITER_SELF_ONLY */
+	else /* BPF_CGROUP_ITER_SELF_ONLY */
 		return p->start_css;
 }
 
@@ -109,13 +109,13 @@ static void *cgroup_iter_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	if (p->terminate)
 		return NULL;
 
-	if (p->order == BPF_ITER_DESCENDANTS_PRE)
+	if (p->order == BPF_CGROUP_ITER_DESCENDANTS_PRE)
 		return css_next_descendant_pre(curr, p->start_css);
-	else if (p->order == BPF_ITER_DESCENDANTS_POST)
+	else if (p->order == BPF_CGROUP_ITER_DESCENDANTS_POST)
 		return css_next_descendant_post(curr, p->start_css);
-	else if (p->order == BPF_ITER_ANCESTORS_UP)
+	else if (p->order == BPF_CGROUP_ITER_ANCESTORS_UP)
 		return curr->parent;
-	else  /* BPF_ITER_SELF_ONLY */
+	else  /* BPF_CGROUP_ITER_SELF_ONLY */
 		return NULL;
 }
 
@@ -188,10 +188,10 @@ static int bpf_iter_attach_cgroup(struct bpf_prog *prog,
 	int order = linfo->cgroup.order;
 	struct cgroup *cgrp;
 
-	if (order != BPF_ITER_DESCENDANTS_PRE &&
-	    order != BPF_ITER_DESCENDANTS_POST &&
-	    order != BPF_ITER_ANCESTORS_UP &&
-	    order != BPF_ITER_SELF_ONLY)
+	if (order != BPF_CGROUP_ITER_DESCENDANTS_PRE &&
+	    order != BPF_CGROUP_ITER_DESCENDANTS_POST &&
+	    order != BPF_CGROUP_ITER_ANCESTORS_UP &&
+	    order != BPF_CGROUP_ITER_SELF_ONLY)
 		return -EINVAL;
 
 	if (fd && id)
@@ -239,13 +239,13 @@ static void bpf_iter_cgroup_show_fdinfo(const struct bpf_iter_aux_info *aux,
 	kfree(buf);
 
 show_order:
-	if (aux->cgroup.order == BPF_ITER_DESCENDANTS_PRE)
+	if (aux->cgroup.order == BPF_CGROUP_ITER_DESCENDANTS_PRE)
 		seq_puts(seq, "order: descendants_pre\n");
-	else if (aux->cgroup.order == BPF_ITER_DESCENDANTS_POST)
+	else if (aux->cgroup.order == BPF_CGROUP_ITER_DESCENDANTS_POST)
 		seq_puts(seq, "order: descendants_post\n");
-	else if (aux->cgroup.order == BPF_ITER_ANCESTORS_UP)
+	else if (aux->cgroup.order == BPF_CGROUP_ITER_ANCESTORS_UP)
 		seq_puts(seq, "order: ancestors_up\n");
-	else /* BPF_ITER_SELF_ONLY */
+	else /* BPF_CGROUP_ITER_SELF_ONLY */
 		seq_puts(seq, "order: self_only\n");
 }
 
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 5056cef2112f..92f7387e378a 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -88,11 +88,11 @@ struct bpf_cgroup_storage_key {
 };
 
 enum bpf_cgroup_iter_order {
-	BPF_ITER_ORDER_UNSPEC = 0,
-	BPF_ITER_SELF_ONLY,		/* process only a single object. */
-	BPF_ITER_DESCENDANTS_PRE,	/* walk descendants in pre-order. */
-	BPF_ITER_DESCENDANTS_POST,	/* walk descendants in post-order. */
-	BPF_ITER_ANCESTORS_UP,		/* walk ancestors upward. */
+	BPF_CGROUP_ITER_ORDER_UNSPEC = 0,
+	BPF_CGROUP_ITER_SELF_ONLY,		/* process only a single object. */
+	BPF_CGROUP_ITER_DESCENDANTS_PRE,	/* walk descendants in pre-order. */
+	BPF_CGROUP_ITER_DESCENDANTS_POST,	/* walk descendants in post-order. */
+	BPF_CGROUP_ITER_ANCESTORS_UP,		/* walk ancestors upward. */
 };
 
 union bpf_iter_link_info {
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
index a1bae92be1fc..7b5bbe21b549 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
@@ -764,7 +764,7 @@ static void test_btf_dump_struct_data(struct btf *btf, struct btf_dump *d,
 
 	/* union with nested struct */
 	TEST_BTF_DUMP_DATA(btf, d, "union", str, union bpf_iter_link_info, BTF_F_COMPACT,
-			   "(union bpf_iter_link_info){.map = (struct){.map_fd = (__u32)1,},.cgroup = (struct){.order = (enum bpf_cgroup_iter_order)BPF_ITER_SELF_ONLY,.cgroup_fd = (__u32)1,},}",
+			   "(union bpf_iter_link_info){.map = (struct){.map_fd = (__u32)1,},.cgroup = (struct){.order = (enum bpf_cgroup_iter_order)BPF_CGROUP_ITER_SELF_ONLY,.cgroup_fd = (__u32)1,},}",
 			   { .cgroup = { .order = 1, .cgroup_fd = 1, }});
 
 	/* struct skb with nested structs/unions; because type output is so
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_hierarchical_stats.c b/tools/testing/selftests/bpf/prog_tests/cgroup_hierarchical_stats.c
index 101a6d70b863..bed1661596f7 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_hierarchical_stats.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_hierarchical_stats.c
@@ -275,7 +275,7 @@ static int setup_cgroup_iter(struct cgroup_hierarchical_stats *obj,
 	 * traverse one cgroup, so set the traversal order to "self".
 	 */
 	linfo.cgroup.cgroup_fd = cgroup_fd;
-	linfo.cgroup.order = BPF_ITER_SELF_ONLY;
+	linfo.cgroup.order = BPF_CGROUP_ITER_SELF_ONLY;
 	opts.link_info = &linfo;
 	opts.link_info_len = sizeof(linfo);
 	link = bpf_program__attach_iter(obj->progs.dump_vmscan, &opts);
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_iter.c b/tools/testing/selftests/bpf/prog_tests/cgroup_iter.c
index 38958c37b9ce..c4a2adb38da1 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_iter.c
@@ -134,7 +134,7 @@ static void test_walk_preorder(struct cgroup_iter *skel)
 		 cg_id[PARENT], cg_id[CHILD1], cg_id[CHILD2]);
 
 	read_from_cgroup_iter(skel->progs.cgroup_id_printer, cg_fd[PARENT],
-			      BPF_ITER_DESCENDANTS_PRE, "preorder");
+			      BPF_CGROUP_ITER_DESCENDANTS_PRE, "preorder");
 }
 
 /* Postorder walk prints child and parent in order. */
@@ -145,7 +145,7 @@ static void test_walk_postorder(struct cgroup_iter *skel)
 		 cg_id[CHILD1], cg_id[CHILD2], cg_id[PARENT]);
 
 	read_from_cgroup_iter(skel->progs.cgroup_id_printer, cg_fd[PARENT],
-			      BPF_ITER_DESCENDANTS_POST, "postorder");
+			      BPF_CGROUP_ITER_DESCENDANTS_POST, "postorder");
 }
 
 /* Walking parents prints parent and then root. */
@@ -159,7 +159,7 @@ static void test_walk_ancestors_up(struct cgroup_iter *skel)
 		 cg_id[PARENT], cg_id[ROOT]);
 
 	read_from_cgroup_iter(skel->progs.cgroup_id_printer, cg_fd[PARENT],
-			      BPF_ITER_ANCESTORS_UP, "ancestors_up");
+			      BPF_CGROUP_ITER_ANCESTORS_UP, "ancestors_up");
 
 	skel->bss->terminal_cgroup = 0;
 }
@@ -174,7 +174,7 @@ static void test_early_termination(struct cgroup_iter *skel)
 		 PROLOGUE "%8llu\n" EPILOGUE, cg_id[PARENT]);
 
 	read_from_cgroup_iter(skel->progs.cgroup_id_printer, cg_fd[PARENT],
-			      BPF_ITER_DESCENDANTS_PRE, "early_termination");
+			      BPF_CGROUP_ITER_DESCENDANTS_PRE, "early_termination");
 
 	skel->bss->terminate_early = 0;
 }
@@ -186,7 +186,7 @@ static void test_walk_self_only(struct cgroup_iter *skel)
 		 PROLOGUE "%8llu\n" EPILOGUE, cg_id[PARENT]);
 
 	read_from_cgroup_iter(skel->progs.cgroup_id_printer, cg_fd[PARENT],
-			      BPF_ITER_SELF_ONLY, "self_only");
+			      BPF_CGROUP_ITER_SELF_ONLY, "self_only");
 }
 
 void test_cgroup_iter(void)
-- 
cgit v1.2.3


From 343949e10798a52c6d6a14effc962e010ed471ae Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Date: Wed, 24 Aug 2022 15:40:37 +0200
Subject: libbpf: add map_get_fd_by_id and map_delete_elem in light skeleton

This allows to have a better control over maps from the kernel when
preloading eBPF programs.

Acked-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Link: https://lore.kernel.org/r/20220824134055.1328882-8-benjamin.tissoires@redhat.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/skel_internal.h | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

(limited to 'tools')

diff --git a/tools/lib/bpf/skel_internal.h b/tools/lib/bpf/skel_internal.h
index 00c5f94b43be..1e82ab06c3eb 100644
--- a/tools/lib/bpf/skel_internal.h
+++ b/tools/lib/bpf/skel_internal.h
@@ -251,6 +251,29 @@ static inline int skel_map_update_elem(int fd, const void *key,
 	return skel_sys_bpf(BPF_MAP_UPDATE_ELEM, &attr, attr_sz);
 }
 
+static inline int skel_map_delete_elem(int fd, const void *key)
+{
+	const size_t attr_sz = offsetofend(union bpf_attr, flags);
+	union bpf_attr attr;
+
+	memset(&attr, 0, attr_sz);
+	attr.map_fd = fd;
+	attr.key = (long)key;
+
+	return skel_sys_bpf(BPF_MAP_DELETE_ELEM, &attr, attr_sz);
+}
+
+static inline int skel_map_get_fd_by_id(__u32 id)
+{
+	const size_t attr_sz = offsetofend(union bpf_attr, flags);
+	union bpf_attr attr;
+
+	memset(&attr, 0, attr_sz);
+	attr.map_id = id;
+
+	return skel_sys_bpf(BPF_MAP_GET_FD_BY_ID, &attr, attr_sz);
+}
+
 static inline int skel_raw_tracepoint_open(const char *name, int prog_fd)
 {
 	const size_t attr_sz = offsetofend(union bpf_attr, raw_tracepoint.prog_fd);
-- 
cgit v1.2.3


From ab9ac19c4d0615fee40ec7d49fa16c9fd33f61f8 Mon Sep 17 00:00:00 2001
From: James Hilliard <james.hilliard1@gmail.com>
Date: Thu, 25 Aug 2022 23:06:59 -0600
Subject: selftests/bpf: fix type conflict in test_tc_dtime

The sys/socket.h header isn't required to build test_tc_dtime and may
cause a type conflict.

Fixes the following error:
In file included from /usr/include/x86_64-linux-gnu/sys/types.h:155,
                 from /usr/include/x86_64-linux-gnu/bits/socket.h:29,
                 from /usr/include/x86_64-linux-gnu/sys/socket.h:33,
                 from progs/test_tc_dtime.c:18:
/usr/include/x86_64-linux-gnu/bits/stdint-intn.h:24:18: error: conflicting types for 'int8_t'; have '__int8_t' {aka 'signed char'}
   24 | typedef __int8_t int8_t;
      |                  ^~~~~~
In file included from progs/test_tc_dtime.c:5:
/home/buildroot/opt/cross/lib/gcc/bpf/13.0.0/include/stdint.h:34:23: note: previous declaration of 'int8_t' with type 'int8_t' {aka 'char'}
   34 | typedef __INT8_TYPE__ int8_t;
      |                       ^~~~~~
/usr/include/x86_64-linux-gnu/bits/stdint-intn.h:27:19: error: conflicting types for 'int64_t'; have '__int64_t' {aka 'long long int'}
   27 | typedef __int64_t int64_t;
      |                   ^~~~~~~
/home/buildroot/opt/cross/lib/gcc/bpf/13.0.0/include/stdint.h:43:24: note: previous declaration of 'int64_t' with type 'int64_t' {aka 'long int'}
   43 | typedef __INT64_TYPE__ int64_t;
      |                        ^~~~~~~
make: *** [Makefile:537: /home/buildroot/bpf-next/tools/testing/selftests/bpf/bpf_gcc/test_tc_dtime.o] Error 1

Signed-off-by: James Hilliard <james.hilliard1@gmail.com>
Link: https://lore.kernel.org/r/20220826050703.869571-1-james.hilliard1@gmail.com
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
---
 tools/testing/selftests/bpf/progs/test_tc_dtime.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/test_tc_dtime.c b/tools/testing/selftests/bpf/progs/test_tc_dtime.c
index b596479a9ebe..125beec31834 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_dtime.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_dtime.c
@@ -15,7 +15,6 @@
 #include <linux/udp.h>
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_endian.h>
-#include <sys/socket.h>
 
 /* veth_src --- veth_src_fwd --- veth_det_fwd --- veth_dst
  *           |                                 |
-- 
cgit v1.2.3


From b05d64efbb21ad231516b44317af34d2b586cfc4 Mon Sep 17 00:00:00 2001
From: James Hilliard <james.hilliard1@gmail.com>
Date: Thu, 25 Aug 2022 21:51:39 -0600
Subject: selftests/bpf: Declare subprog_noise as static in tailcall_bpf2bpf4

Due to bpf_map_lookup_elem being declared static we need to also
declare subprog_noise as static.

Fixes the following error:
progs/tailcall_bpf2bpf4.c:26:9: error: 'bpf_map_lookup_elem' is static but used in inline function 'subprog_noise' which is not static [-Werror]
   26 |         bpf_map_lookup_elem(&nop_table, &key);
      |         ^~~~~~~~~~~~~~~~~~~

Signed-off-by: James Hilliard <james.hilliard1@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Song Liu <song@kernel.org>
Link: https://lore.kernel.org/bpf/20220826035141.737919-1-james.hilliard1@gmail.com
---
 tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c
index b67e8022d500..a017d6b2f1dd 100644
--- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c
@@ -19,7 +19,7 @@ struct {
 int count = 0;
 int noise = 0;
 
-__always_inline int subprog_noise(void)
+static __always_inline int subprog_noise(void)
 {
 	__u32 key = 0;
 
-- 
cgit v1.2.3


From aa75622c3be4d5819ce69c714acbcbd67bba5d65 Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin@isovalent.com>
Date: Thu, 25 Aug 2022 23:08:06 +0100
Subject: bpf: Fix a few typos in BPF helpers documentation

Address a few typos in the documentation for the BPF helper functions.
They were reported by Jakub [0], who ran spell checkers on the generated
man page [1].

[0] https://lore.kernel.org/linux-man/d22dcd47-023c-8f52-d369-7b5308e6c842@gmail.com/T/#mb02e7d4b7fb61d98fa914c77b581184e9a9537af
[1] https://lore.kernel.org/linux-man/eb6a1e41-c48e-ac45-5154-ac57a2c76108@gmail.com/T/#m4a8d1b003616928013ffcd1450437309ab652f9f

v3: Do not copy unrelated (and breaking) elements to tools/ header
v2: Turn a ',' into a ';'

Reported-by: Jakub Wilk <jwilk@jwilk.net>
Signed-off-by: Quentin Monnet <quentin@isovalent.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220825220806.107143-1-quentin@isovalent.com
---
 include/uapi/linux/bpf.h       | 16 ++++++++--------
 tools/include/uapi/linux/bpf.h | 16 ++++++++--------
 2 files changed, 16 insertions(+), 16 deletions(-)

(limited to 'tools')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index bdf4bc6d8d6b..962960a98835 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -4456,7 +4456,7 @@ union bpf_attr {
  *
  *		**-EEXIST** if the option already exists.
  *
- *		**-EFAULT** on failrue to parse the existing header options.
+ *		**-EFAULT** on failure to parse the existing header options.
  *
  *		**-EPERM** if the helper cannot be used under the current
  *		*skops*\ **->op**.
@@ -4665,7 +4665,7 @@ union bpf_attr {
  *		a *map* with *task* as the **key**.  From this
  *		perspective,  the usage is not much different from
  *		**bpf_map_lookup_elem**\ (*map*, **&**\ *task*) except this
- *		helper enforces the key must be an task_struct and the map must also
+ *		helper enforces the key must be a task_struct and the map must also
  *		be a **BPF_MAP_TYPE_TASK_STORAGE**.
  *
  *		Underneath, the value is stored locally at *task* instead of
@@ -4723,7 +4723,7 @@ union bpf_attr {
  *
  * long bpf_ima_inode_hash(struct inode *inode, void *dst, u32 size)
  *	Description
- *		Returns the stored IMA hash of the *inode* (if it's avaialable).
+ *		Returns the stored IMA hash of the *inode* (if it's available).
  *		If the hash is larger than *size*, then only *size*
  *		bytes will be copied to *dst*
  *	Return
@@ -4747,12 +4747,12 @@ union bpf_attr {
  *
  *		The argument *len_diff* can be used for querying with a planned
  *		size change. This allows to check MTU prior to changing packet
- *		ctx. Providing an *len_diff* adjustment that is larger than the
+ *		ctx. Providing a *len_diff* adjustment that is larger than the
  *		actual packet size (resulting in negative packet size) will in
- *		principle not exceed the MTU, why it is not considered a
- *		failure.  Other BPF-helpers are needed for performing the
- *		planned size change, why the responsability for catch a negative
- *		packet size belong in those helpers.
+ *		principle not exceed the MTU, which is why it is not considered
+ *		a failure.  Other BPF helpers are needed for performing the
+ *		planned size change; therefore the responsibility for catching
+ *		a negative packet size belongs in those helpers.
  *
  *		Specifying *ifindex* zero means the MTU check is performed
  *		against the current net device.  This is practical if this isn't
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 92f7387e378a..f4ba82a1eace 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -4456,7 +4456,7 @@ union bpf_attr {
  *
  *		**-EEXIST** if the option already exists.
  *
- *		**-EFAULT** on failrue to parse the existing header options.
+ *		**-EFAULT** on failure to parse the existing header options.
  *
  *		**-EPERM** if the helper cannot be used under the current
  *		*skops*\ **->op**.
@@ -4665,7 +4665,7 @@ union bpf_attr {
  *		a *map* with *task* as the **key**.  From this
  *		perspective,  the usage is not much different from
  *		**bpf_map_lookup_elem**\ (*map*, **&**\ *task*) except this
- *		helper enforces the key must be an task_struct and the map must also
+ *		helper enforces the key must be a task_struct and the map must also
  *		be a **BPF_MAP_TYPE_TASK_STORAGE**.
  *
  *		Underneath, the value is stored locally at *task* instead of
@@ -4723,7 +4723,7 @@ union bpf_attr {
  *
  * long bpf_ima_inode_hash(struct inode *inode, void *dst, u32 size)
  *	Description
- *		Returns the stored IMA hash of the *inode* (if it's avaialable).
+ *		Returns the stored IMA hash of the *inode* (if it's available).
  *		If the hash is larger than *size*, then only *size*
  *		bytes will be copied to *dst*
  *	Return
@@ -4747,12 +4747,12 @@ union bpf_attr {
  *
  *		The argument *len_diff* can be used for querying with a planned
  *		size change. This allows to check MTU prior to changing packet
- *		ctx. Providing an *len_diff* adjustment that is larger than the
+ *		ctx. Providing a *len_diff* adjustment that is larger than the
  *		actual packet size (resulting in negative packet size) will in
- *		principle not exceed the MTU, why it is not considered a
- *		failure.  Other BPF-helpers are needed for performing the
- *		planned size change, why the responsability for catch a negative
- *		packet size belong in those helpers.
+ *		principle not exceed the MTU, which is why it is not considered
+ *		a failure.  Other BPF helpers are needed for performing the
+ *		planned size change; therefore the responsibility for catching
+ *		a negative packet size belongs in those helpers.
  *
  *		Specifying *ifindex* zero means the MTU check is performed
  *		against the current net device.  This is practical if this isn't
-- 
cgit v1.2.3


From 3721359d3907c313833a2fd6e40c36a30179ea89 Mon Sep 17 00:00:00 2001
From: James Hilliard <james.hilliard1@gmail.com>
Date: Thu, 25 Aug 2022 23:29:22 -0600
Subject: selftests/bpf: Fix bind{4,6} tcp/socket header type conflict

There is a potential for us to hit a type conflict when including
netinet/tcp.h with sys/socket.h, we can remove these as they are not
actually needed.

Fixes errors like the below when compiling with gcc BPF backend:

  In file included from /usr/include/netinet/tcp.h:91,
                   from progs/bind4_prog.c:10:
  /home/buildroot/opt/cross/lib/gcc/bpf/13.0.0/include/stdint.h:34:23: error: conflicting types for 'int8_t'; have 'char'
     34 | typedef __INT8_TYPE__ int8_t;
        |                       ^~~~~~
  In file included from /usr/include/x86_64-linux-gnu/sys/types.h:155,
                   from /usr/include/x86_64-linux-gnu/bits/socket.h:29,
                   from /usr/include/x86_64-linux-gnu/sys/socket.h:33,
                   from progs/bind4_prog.c:9:
  /usr/include/x86_64-linux-gnu/bits/stdint-intn.h:24:18: note: previous declaration of 'int8_t' with type 'int8_t' {aka 'signed char'}
     24 | typedef __int8_t int8_t;
        |                  ^~~~~~
  /home/buildroot/opt/cross/lib/gcc/bpf/13.0.0/include/stdint.h:43:24: error: conflicting types for 'int64_t'; have 'long int'
     43 | typedef __INT64_TYPE__ int64_t;
        |                        ^~~~~~~
  /usr/include/x86_64-linux-gnu/bits/stdint-intn.h:27:19: note: previous declaration of 'int64_t' with type 'int64_t' {aka 'long long int'}
     27 | typedef __int64_t int64_t;
        |                   ^~~~~~~
  make: *** [Makefile:537: /home/buildroot/bpf-next/tools/testing/selftests/bpf/bpf_gcc/bind4_prog.o] Error 1

Signed-off-by: James Hilliard <james.hilliard1@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20220826052925.980431-1-james.hilliard1@gmail.com
---
 tools/testing/selftests/bpf/progs/bind4_prog.c | 2 --
 tools/testing/selftests/bpf/progs/bind6_prog.c | 2 --
 2 files changed, 4 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/bind4_prog.c b/tools/testing/selftests/bpf/progs/bind4_prog.c
index 474c6a62078a..a487f60b73ac 100644
--- a/tools/testing/selftests/bpf/progs/bind4_prog.c
+++ b/tools/testing/selftests/bpf/progs/bind4_prog.c
@@ -6,8 +6,6 @@
 #include <linux/bpf.h>
 #include <linux/in.h>
 #include <linux/in6.h>
-#include <sys/socket.h>
-#include <netinet/tcp.h>
 #include <linux/if.h>
 #include <errno.h>
 
diff --git a/tools/testing/selftests/bpf/progs/bind6_prog.c b/tools/testing/selftests/bpf/progs/bind6_prog.c
index c19cfa869f30..d62cd9e9cf0e 100644
--- a/tools/testing/selftests/bpf/progs/bind6_prog.c
+++ b/tools/testing/selftests/bpf/progs/bind6_prog.c
@@ -6,8 +6,6 @@
 #include <linux/bpf.h>
 #include <linux/in.h>
 #include <linux/in6.h>
-#include <sys/socket.h>
-#include <netinet/tcp.h>
 #include <linux/if.h>
 #include <errno.h>
 
-- 
cgit v1.2.3


From 2eb680401df62c035ff50a7faf1296565b030df7 Mon Sep 17 00:00:00 2001
From: James Hilliard <james.hilliard1@gmail.com>
Date: Mon, 29 Aug 2022 09:47:09 -0600
Subject: selftests/bpf: Fix connect4_prog tcp/socket header type conflict

There is a potential for us to hit a type conflict when including
netinet/tcp.h and sys/socket.h, we can replace both of these includes
with linux/tcp.h and bpf_tcp_helpers.h to avoid this conflict.

Fixes errors like the below when compiling with gcc BPF backend:

  In file included from /usr/include/netinet/tcp.h:91,
                   from progs/connect4_prog.c:11:
  /home/buildroot/opt/cross/lib/gcc/bpf/13.0.0/include/stdint.h:34:23: error: conflicting types for 'int8_t'; have 'char'
     34 | typedef __INT8_TYPE__ int8_t;
        |                       ^~~~~~
  In file included from /usr/include/x86_64-linux-gnu/sys/types.h:155,
                   from /usr/include/x86_64-linux-gnu/bits/socket.h:29,
                   from /usr/include/x86_64-linux-gnu/sys/socket.h:33,
                   from progs/connect4_prog.c:10:
  /usr/include/x86_64-linux-gnu/bits/stdint-intn.h:24:18: note: previous declaration of 'int8_t' with type 'int8_t' {aka 'signed char'}
     24 | typedef __int8_t int8_t;
        |                  ^~~~~~
  /home/buildroot/opt/cross/lib/gcc/bpf/13.0.0/include/stdint.h:43:24: error: conflicting types for 'int64_t'; have 'long int'
     43 | typedef __INT64_TYPE__ int64_t;
        |                        ^~~~~~~
  /usr/include/x86_64-linux-gnu/bits/stdint-intn.h:27:19: note: previous declaration of 'int64_t' with type 'int64_t' {aka 'long long int'}
     27 | typedef __int64_t int64_t;
        |                   ^~~~~~~

Signed-off-by: James Hilliard <james.hilliard1@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20220829154710.3870139-1-james.hilliard1@gmail.com
---
 tools/testing/selftests/bpf/progs/connect4_prog.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/connect4_prog.c b/tools/testing/selftests/bpf/progs/connect4_prog.c
index b241932911db..ec25371de789 100644
--- a/tools/testing/selftests/bpf/progs/connect4_prog.c
+++ b/tools/testing/selftests/bpf/progs/connect4_prog.c
@@ -7,14 +7,15 @@
 #include <linux/bpf.h>
 #include <linux/in.h>
 #include <linux/in6.h>
-#include <sys/socket.h>
-#include <netinet/tcp.h>
+#include <linux/tcp.h>
 #include <linux/if.h>
 #include <errno.h>
 
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_endian.h>
 
+#include "bpf_tcp_helpers.h"
+
 #define SRC_REWRITE_IP4		0x7f000004U
 #define DST_REWRITE_IP4		0x7f000001U
 #define DST_REWRITE_PORT4	4444
-- 
cgit v1.2.3


From 6f95de6d713130c953af0a40b13c1da519f91c4e Mon Sep 17 00:00:00 2001
From: Hao Luo <haoluo@google.com>
Date: Mon, 29 Aug 2022 16:18:28 -0700
Subject: bpftool: Add support for querying cgroup_iter link

Support dumping info of a cgroup_iter link. This includes
showing the cgroup's id and the order for walking the cgroup
hierarchy. Example output is as follows:

> bpftool link show
1: iter  prog 2  target_name bpf_map
2: iter  prog 3  target_name bpf_prog
3: iter  prog 12  target_name cgroup  cgroup_id 72  order self_only

> bpftool -p link show
[{
        "id": 1,
        "type": "iter",
        "prog_id": 2,
        "target_name": "bpf_map"
    },{
        "id": 2,
        "type": "iter",
        "prog_id": 3,
        "target_name": "bpf_prog"
    },{
        "id": 3,
        "type": "iter",
        "prog_id": 12,
        "target_name": "cgroup",
        "cgroup_id": 72,
        "order": "self_only"
    }
]

Signed-off-by: Hao Luo <haoluo@google.com>
Reviewed-by: Quentin Monnet <quentin@isovalent.com>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220829231828.1016835-1-haoluo@google.com
Signed-off-by: Martin KaFai Lau <martin.lau@linux.dev>
---
 tools/bpf/bpftool/link.c | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c
index 7a20931c3250..ef0dc2f8d5a2 100644
--- a/tools/bpf/bpftool/link.c
+++ b/tools/bpf/bpftool/link.c
@@ -83,6 +83,29 @@ static bool is_iter_map_target(const char *target_name)
 	       strcmp(target_name, "bpf_sk_storage_map") == 0;
 }
 
+static bool is_iter_cgroup_target(const char *target_name)
+{
+	return strcmp(target_name, "cgroup") == 0;
+}
+
+static const char *cgroup_order_string(__u32 order)
+{
+	switch (order) {
+	case BPF_CGROUP_ITER_ORDER_UNSPEC:
+		return "order_unspec";
+	case BPF_CGROUP_ITER_SELF_ONLY:
+		return "self_only";
+	case BPF_CGROUP_ITER_DESCENDANTS_PRE:
+		return "descendants_pre";
+	case BPF_CGROUP_ITER_DESCENDANTS_POST:
+		return "descendants_post";
+	case BPF_CGROUP_ITER_ANCESTORS_UP:
+		return "ancestors_up";
+	default: /* won't happen */
+		return "unknown";
+	}
+}
+
 static void show_iter_json(struct bpf_link_info *info, json_writer_t *wtr)
 {
 	const char *target_name = u64_to_ptr(info->iter.target_name);
@@ -91,6 +114,12 @@ static void show_iter_json(struct bpf_link_info *info, json_writer_t *wtr)
 
 	if (is_iter_map_target(target_name))
 		jsonw_uint_field(wtr, "map_id", info->iter.map.map_id);
+
+	if (is_iter_cgroup_target(target_name)) {
+		jsonw_lluint_field(wtr, "cgroup_id", info->iter.cgroup.cgroup_id);
+		jsonw_string_field(wtr, "order",
+				   cgroup_order_string(info->iter.cgroup.order));
+	}
 }
 
 static int get_prog_info(int prog_id, struct bpf_prog_info *info)
@@ -208,6 +237,12 @@ static void show_iter_plain(struct bpf_link_info *info)
 
 	if (is_iter_map_target(target_name))
 		printf("map_id %u  ", info->iter.map.map_id);
+
+	if (is_iter_cgroup_target(target_name)) {
+		printf("cgroup_id %llu  ", info->iter.cgroup.cgroup_id);
+		printf("order %s  ",
+		       cgroup_order_string(info->iter.cgroup.order));
+	}
 }
 
 static int show_link_close_plain(int fd, struct bpf_link_info *info)
-- 
cgit v1.2.3


From 8a7d61bdc2fac2c460a2f32a062f5c6dbd21a764 Mon Sep 17 00:00:00 2001
From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Date: Tue, 30 Aug 2022 15:39:05 +0200
Subject: selftests/xsk: Add missing close() on netns fd

Commit 1034b03e54ac ("selftests: xsk: Simplify cleanup of ifobjects")
removed close on netns fd, which is not correct, so let us restore it.

Fixes: 1034b03e54ac ("selftests: xsk: Simplify cleanup of ifobjects")
Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Magnus Karlsson <magnus.karlsson@intel.com>
Link: https://lore.kernel.org/bpf/20220830133905.9945-1-maciej.fijalkowski@intel.com
---
 tools/testing/selftests/bpf/xskxceiver.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c
index 74d56d971baf..091402dc5390 100644
--- a/tools/testing/selftests/bpf/xskxceiver.c
+++ b/tools/testing/selftests/bpf/xskxceiver.c
@@ -1606,6 +1606,8 @@ static struct ifobject *ifobject_create(void)
 	if (!ifobj->umem)
 		goto out_umem;
 
+	ifobj->ns_fd = -1;
+
 	return ifobj;
 
 out_umem:
@@ -1617,6 +1619,8 @@ out_xsk_arr:
 
 static void ifobject_delete(struct ifobject *ifobj)
 {
+	if (ifobj->ns_fd != -1)
+		close(ifobj->ns_fd);
 	free(ifobj->umem);
 	free(ifobj->xsk_arr);
 	free(ifobj);
-- 
cgit v1.2.3


From 14e5ce79943a72b9bf0fff8a5867320a9fa3e40d Mon Sep 17 00:00:00 2001
From: James Hilliard <james.hilliard1@gmail.com>
Date: Mon, 29 Aug 2022 15:05:46 -0600
Subject: libbpf: Add GCC support for bpf_tail_call_static

The bpf_tail_call_static function is currently not defined unless
using clang >= 8.

To support bpf_tail_call_static on GCC we can check if __clang__ is
not defined to enable bpf_tail_call_static.

We need to use GCC assembly syntax when the compiler does not define
__clang__ as LLVM inline assembly is not fully compatible with GCC.

Signed-off-by: James Hilliard <james.hilliard1@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20220829210546.755377-1-james.hilliard1@gmail.com
---
 tools/lib/bpf/bpf_helpers.h | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h
index 7349b16b8e2f..867b734839dd 100644
--- a/tools/lib/bpf/bpf_helpers.h
+++ b/tools/lib/bpf/bpf_helpers.h
@@ -131,7 +131,7 @@
 /*
  * Helper function to perform a tail call with a constant/immediate map slot.
  */
-#if __clang_major__ >= 8 && defined(__bpf__)
+#if (!defined(__clang__) || __clang_major__ >= 8) && defined(__bpf__)
 static __always_inline void
 bpf_tail_call_static(void *ctx, const void *map, const __u32 slot)
 {
@@ -139,8 +139,8 @@ bpf_tail_call_static(void *ctx, const void *map, const __u32 slot)
 		__bpf_unreachable();
 
 	/*
-	 * Provide a hard guarantee that LLVM won't optimize setting r2 (map
-	 * pointer) and r3 (constant map index) from _different paths_ ending
+	 * Provide a hard guarantee that the compiler won't optimize setting r2
+	 * (map pointer) and r3 (constant map index) from _different paths_ ending
 	 * up at the _same_ call insn as otherwise we won't be able to use the
 	 * jmpq/nopl retpoline-free patching by the x86-64 JIT in the kernel
 	 * given they mismatch. See also d2e4c1e6c294 ("bpf: Constant map key
@@ -148,12 +148,19 @@ bpf_tail_call_static(void *ctx, const void *map, const __u32 slot)
 	 *
 	 * Note on clobber list: we need to stay in-line with BPF calling
 	 * convention, so even if we don't end up using r0, r4, r5, we need
-	 * to mark them as clobber so that LLVM doesn't end up using them
-	 * before / after the call.
+	 * to mark them as clobber so that the compiler doesn't end up using
+	 * them before / after the call.
 	 */
-	asm volatile("r1 = %[ctx]\n\t"
+	asm volatile(
+#ifdef __clang__
+		     "r1 = %[ctx]\n\t"
 		     "r2 = %[map]\n\t"
 		     "r3 = %[slot]\n\t"
+#else
+		     "mov %%r1,%[ctx]\n\t"
+		     "mov %%r2,%[map]\n\t"
+		     "mov %%r3,%[slot]\n\t"
+#endif
 		     "call 12"
 		     :: [ctx]"r"(ctx), [map]"r"(map), [slot]"i"(slot)
 		     : "r0", "r1", "r2", "r3", "r4", "r5");
-- 
cgit v1.2.3


From 197072945a708d62181895409effdfcda80c7798 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <martin.lau@linux.dev>
Date: Tue, 30 Aug 2022 16:19:53 -0700
Subject: selftest/bpf: Ensure no module loading in
 bpf_setsockopt(TCP_CONGESTION)

This patch adds a test to ensure bpf_setsockopt(TCP_CONGESTION, "not_exist")
will not trigger the kernel module autoload.

Before the fix:

  [   40.535829] BUG: sleeping function called from invalid context at include/linux/sched/mm.h:274
  [...]
  [   40.552134]  tcp_ca_find_autoload.constprop.0+0xcb/0x200
  [   40.552689]  tcp_set_congestion_control+0x99/0x7b0
  [   40.553203]  do_tcp_setsockopt+0x3ed/0x2240
  [...]
  [   40.556041]  __bpf_setsockopt+0x124/0x640

Signed-off-by: Martin KaFai Lau <martin.lau@linux.dev>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20220830231953.792412-1-martin.lau@linux.dev
---
 tools/testing/selftests/bpf/progs/setget_sockopt.c | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/setget_sockopt.c b/tools/testing/selftests/bpf/progs/setget_sockopt.c
index 40606ef47a38..79debf3c2f44 100644
--- a/tools/testing/selftests/bpf/progs/setget_sockopt.c
+++ b/tools/testing/selftests/bpf/progs/setget_sockopt.c
@@ -32,6 +32,7 @@ struct sockopt_test {
 	unsigned int flip:1;
 };
 
+static const char not_exist_cc[] = "not_exist";
 static const char cubic_cc[] = "cubic";
 static const char reno_cc[] = "reno";
 
@@ -307,6 +308,9 @@ static int bpf_test_tcp_sockopt(__u32 i, struct loop_ctx *lc)
 		const char *new_cc;
 		int new_cc_len;
 
+		if (!bpf_setsockopt(ctx, IPPROTO_TCP, TCP_CONGESTION,
+				    (void *)not_exist_cc, sizeof(not_exist_cc)))
+			return 1;
 		if (bpf_getsockopt(ctx, IPPROTO_TCP, TCP_CONGESTION, old_cc, sizeof(old_cc)))
 			return 1;
 		if (!bpf_strncmp(old_cc, sizeof(old_cc), cubic_cc)) {
-- 
cgit v1.2.3


From 1c636b6277a2b2bf504df490b8dbadd2bd34ccd4 Mon Sep 17 00:00:00 2001
From: Hou Tao <houtao1@huawei.com>
Date: Wed, 31 Aug 2022 12:26:29 +0800
Subject: selftests/bpf: Add test cases for htab update

One test demonstrates the reentrancy of hash map update on the same
bucket should fail, and another one shows concureently updates of
the same hash map bucket should succeed and not fail due to
the reentrancy checking for bucket lock.

There is no trampoline support on s390x, so move htab_update to
denylist.

Signed-off-by: Hou Tao <houtao1@huawei.com>
Link: https://lore.kernel.org/r/20220831042629.130006-4-houtao@huaweicloud.com
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
 tools/testing/selftests/bpf/DENYLIST.s390x         |   1 +
 .../testing/selftests/bpf/prog_tests/htab_update.c | 126 +++++++++++++++++++++
 tools/testing/selftests/bpf/progs/htab_update.c    |  29 +++++
 3 files changed, 156 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/htab_update.c
 create mode 100644 tools/testing/selftests/bpf/progs/htab_update.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x
index 736b65f61022..ba02b559ca68 100644
--- a/tools/testing/selftests/bpf/DENYLIST.s390x
+++ b/tools/testing/selftests/bpf/DENYLIST.s390x
@@ -68,3 +68,4 @@ unpriv_bpf_disabled                      # fentry
 setget_sockopt                           # attach unexpected error: -524                                               (trampoline)
 cb_refs                                  # expected error message unexpected error: -524                               (trampoline)
 cgroup_hierarchical_stats                # JIT does not support calling kernel function                                (kfunc)
+htab_update                              # failed to attach: ERROR: strerror_r(-524)=22                                (trampoline)
diff --git a/tools/testing/selftests/bpf/prog_tests/htab_update.c b/tools/testing/selftests/bpf/prog_tests/htab_update.c
new file mode 100644
index 000000000000..2bc85f4814f4
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/htab_update.c
@@ -0,0 +1,126 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2022. Huawei Technologies Co., Ltd */
+#define _GNU_SOURCE
+#include <sched.h>
+#include <stdbool.h>
+#include <test_progs.h>
+#include "htab_update.skel.h"
+
+struct htab_update_ctx {
+	int fd;
+	int loop;
+	bool stop;
+};
+
+static void test_reenter_update(void)
+{
+	struct htab_update *skel;
+	unsigned int key, value;
+	int err;
+
+	skel = htab_update__open();
+	if (!ASSERT_OK_PTR(skel, "htab_update__open"))
+		return;
+
+	/* lookup_elem_raw() may be inlined and find_kernel_btf_id() will return -ESRCH */
+	bpf_program__set_autoload(skel->progs.lookup_elem_raw, true);
+	err = htab_update__load(skel);
+	if (!ASSERT_TRUE(!err || err == -ESRCH, "htab_update__load") || err)
+		goto out;
+
+	skel->bss->pid = getpid();
+	err = htab_update__attach(skel);
+	if (!ASSERT_OK(err, "htab_update__attach"))
+		goto out;
+
+	/* Will trigger the reentrancy of bpf_map_update_elem() */
+	key = 0;
+	value = 0;
+	err = bpf_map_update_elem(bpf_map__fd(skel->maps.htab), &key, &value, 0);
+	if (!ASSERT_OK(err, "add element"))
+		goto out;
+
+	ASSERT_EQ(skel->bss->update_err, -EBUSY, "no reentrancy");
+out:
+	htab_update__destroy(skel);
+}
+
+static void *htab_update_thread(void *arg)
+{
+	struct htab_update_ctx *ctx = arg;
+	cpu_set_t cpus;
+	int i;
+
+	/* Pinned on CPU 0 */
+	CPU_ZERO(&cpus);
+	CPU_SET(0, &cpus);
+	pthread_setaffinity_np(pthread_self(), sizeof(cpus), &cpus);
+
+	i = 0;
+	while (i++ < ctx->loop && !ctx->stop) {
+		unsigned int key = 0, value = 0;
+		int err;
+
+		err = bpf_map_update_elem(ctx->fd, &key, &value, 0);
+		if (err) {
+			ctx->stop = true;
+			return (void *)(long)err;
+		}
+	}
+
+	return NULL;
+}
+
+static void test_concurrent_update(void)
+{
+	struct htab_update_ctx ctx;
+	struct htab_update *skel;
+	unsigned int i, nr;
+	pthread_t *tids;
+	int err;
+
+	skel = htab_update__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "htab_update__open_and_load"))
+		return;
+
+	ctx.fd = bpf_map__fd(skel->maps.htab);
+	ctx.loop = 1000;
+	ctx.stop = false;
+
+	nr = 4;
+	tids = calloc(nr, sizeof(*tids));
+	if (!ASSERT_NEQ(tids, NULL, "no mem"))
+		goto out;
+
+	for (i = 0; i < nr; i++) {
+		err = pthread_create(&tids[i], NULL, htab_update_thread, &ctx);
+		if (!ASSERT_OK(err, "pthread_create")) {
+			unsigned int j;
+
+			ctx.stop = true;
+			for (j = 0; j < i; j++)
+				pthread_join(tids[j], NULL);
+			goto out;
+		}
+	}
+
+	for (i = 0; i < nr; i++) {
+		void *thread_err = NULL;
+
+		pthread_join(tids[i], &thread_err);
+		ASSERT_EQ(thread_err, NULL, "update error");
+	}
+
+out:
+	if (tids)
+		free(tids);
+	htab_update__destroy(skel);
+}
+
+void test_htab_update(void)
+{
+	if (test__start_subtest("reenter_update"))
+		test_reenter_update();
+	if (test__start_subtest("concurrent_update"))
+		test_concurrent_update();
+}
diff --git a/tools/testing/selftests/bpf/progs/htab_update.c b/tools/testing/selftests/bpf/progs/htab_update.c
new file mode 100644
index 000000000000..7481bb30b29b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/htab_update.c
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2022. Huawei Technologies Co., Ltd */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, 1);
+	__uint(key_size, sizeof(__u32));
+	__uint(value_size, sizeof(__u32));
+} htab SEC(".maps");
+
+int pid = 0;
+int update_err = 0;
+
+SEC("?fentry/lookup_elem_raw")
+int lookup_elem_raw(void *ctx)
+{
+	__u32 key = 0, value = 1;
+
+	if ((bpf_get_current_pid_tgid() >> 32) != pid)
+		return 0;
+
+	update_err = bpf_map_update_elem(&htab, &key, &value, 0);
+	return 0;
+}
-- 
cgit v1.2.3


From 0e4d354762cefd3e16b4cff8988ff276e45effc4 Mon Sep 17 00:00:00 2001
From: Richard Gobert <richardbgobert@gmail.com>
Date: Mon, 29 Aug 2022 13:18:51 +0200
Subject: net-next: Fix IP_UNICAST_IF option behavior for connected sockets

The IP_UNICAST_IF socket option is used to set the outgoing interface
for outbound packets.

The IP_UNICAST_IF socket option was added as it was needed by the
Wine project, since no other existing option (SO_BINDTODEVICE socket
option, IP_PKTINFO socket option or the bind function) provided the
needed characteristics needed by the IP_UNICAST_IF socket option. [1]
The IP_UNICAST_IF socket option works well for unconnected sockets,
that is, the interface specified by the IP_UNICAST_IF socket option
is taken into consideration in the route lookup process when a packet
is being sent. However, for connected sockets, the outbound interface
is chosen when connecting the socket, and in the route lookup process
which is done when a packet is being sent, the interface specified by
the IP_UNICAST_IF socket option is being ignored.

This inconsistent behavior was reported and discussed in an issue
opened on systemd's GitHub project [2]. Also, a bug report was
submitted in the kernel's bugzilla [3].

To understand the problem in more detail, we can look at what happens
for UDP packets over IPv4 (The same analysis was done separately in
the referenced systemd issue).
When a UDP packet is sent the udp_sendmsg function gets called and
the following happens:

1. The oif member of the struct ipcm_cookie ipc (which stores the
output interface of the packet) is initialized by the ipcm_init_sk
function to inet->sk.sk_bound_dev_if (the device set by the
SO_BINDTODEVICE socket option).

2. If the IP_PKTINFO socket option was set, the oif member gets
overridden by the call to the ip_cmsg_send function.

3. If no output interface was selected yet, the interface specified
by the IP_UNICAST_IF socket option is used.

4. If the socket is connected and no destination address is
specified in the send function, the struct ipcm_cookie ipc is not
taken into consideration and the cached route, that was calculated in
the connect function is being used.

Thus, for a connected socket, the IP_UNICAST_IF sockopt isn't taken
into consideration.

This patch corrects the behavior of the IP_UNICAST_IF socket option
for connect()ed sockets by taking into consideration the
IP_UNICAST_IF sockopt when connecting the socket.

In order to avoid reconnecting the socket, this option is still
ignored when applied on an already connected socket until connect()
is called again by the Richard Gobert.

Change the __ip4_datagram_connect function, which is called during
socket connection, to take into consideration the interface set by
the IP_UNICAST_IF socket option, in a similar way to what is done in
the udp_sendmsg function.

[1] https://lore.kernel.org/netdev/1328685717.4736.4.camel@edumazet-laptop/T/
[2] https://github.com/systemd/systemd/issues/11935#issuecomment-618691018
[3] https://bugzilla.kernel.org/show_bug.cgi?id=210255

Signed-off-by: Richard Gobert <richardbgobert@gmail.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Link: https://lore.kernel.org/r/20220829111554.GA1771@debian
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/ipv4/datagram.c                       |  2 ++
 tools/testing/selftests/net/fcnal-test.sh | 30 ++++++++++++++++++++++++++++++
 tools/testing/selftests/net/nettest.c     | 16 ++++++++++++++--
 3 files changed, 46 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c
index ffd57523331f..405a8c2aea64 100644
--- a/net/ipv4/datagram.c
+++ b/net/ipv4/datagram.c
@@ -42,6 +42,8 @@ int __ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len
 			oif = inet->mc_index;
 		if (!saddr)
 			saddr = inet->mc_addr;
+	} else if (!oif) {
+		oif = inet->uc_index;
 	}
 	fl4 = &inet->cork.fl.u.ip4;
 	rt = ip_route_connect(fl4, usin->sin_addr.s_addr, saddr, oif,
diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh
index 03b586760164..31c3b6ebd388 100755
--- a/tools/testing/selftests/net/fcnal-test.sh
+++ b/tools/testing/selftests/net/fcnal-test.sh
@@ -1466,6 +1466,13 @@ ipv4_udp_novrf()
 		run_cmd nettest -D -r ${a} -d ${NSA_DEV} -S -0 ${NSA_IP}
 		log_test_addr ${a} $? 0 "Client, device bind via IP_UNICAST_IF"
 
+		log_start
+		run_cmd_nsb nettest -D -s &
+		sleep 1
+		run_cmd nettest -D -r ${a} -d ${NSA_DEV} -S -0 ${NSA_IP} -U
+		log_test_addr ${a} $? 0 "Client, device bind via IP_UNICAST_IF, with connect()"
+
+
 		log_start
 		show_hint "Should fail 'Connection refused'"
 		run_cmd nettest -D -r ${a}
@@ -1525,6 +1532,13 @@ ipv4_udp_novrf()
 	run_cmd nettest -D -d ${NSA_DEV} -S -r ${a}
 	log_test_addr ${a} $? 0 "Global server, device client via IP_UNICAST_IF, local connection"
 
+	log_start
+	run_cmd nettest -s -D &
+	sleep 1
+	run_cmd nettest -D -d ${NSA_DEV} -S -r ${a} -U
+	log_test_addr ${a} $? 0 "Global server, device client via IP_UNICAST_IF, local connection, with connect()"
+
+
 	# IPv4 with device bind has really weird behavior - it overrides the
 	# fib lookup, generates an rtable and tries to send the packet. This
 	# causes failures for local traffic at different places
@@ -1550,6 +1564,15 @@ ipv4_udp_novrf()
 		sleep 1
 		run_cmd nettest -D -r ${a} -d ${NSA_DEV} -S
 		log_test_addr ${a} $? 1 "Global server, device client via IP_UNICAST_IF, local connection"
+
+		log_start
+		show_hint "Should fail since addresses on loopback are out of device scope"
+		run_cmd nettest -D -s &
+		sleep 1
+		run_cmd nettest -D -r ${a} -d ${NSA_DEV} -S -U
+		log_test_addr ${a} $? 1 "Global server, device client via IP_UNICAST_IF, local connection, with connect()"
+
+
 	done
 
 	a=${NSA_IP}
@@ -3157,6 +3180,13 @@ ipv6_udp_novrf()
 		sleep 1
 		run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV} -S
 		log_test_addr ${a} $? 1 "Global server, device client via IP_UNICAST_IF, local connection"
+
+		log_start
+		show_hint "Should fail 'No route to host' since addresses on loopback are out of device scope"
+		run_cmd nettest -6 -D -s &
+		sleep 1
+		run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV} -S -U
+		log_test_addr ${a} $? 1 "Global server, device client via IP_UNICAST_IF, local connection, with connect()"
 	done
 
 	a=${NSA_IP6}
diff --git a/tools/testing/selftests/net/nettest.c b/tools/testing/selftests/net/nettest.c
index d9a6fd2cd9d3..7900fa98eccb 100644
--- a/tools/testing/selftests/net/nettest.c
+++ b/tools/testing/selftests/net/nettest.c
@@ -127,6 +127,9 @@ struct sock_args {
 
 	/* ESP in UDP encap test */
 	int use_xfrm;
+
+	/* use send() and connect() instead of sendto */
+	int datagram_connect;
 };
 
 static int server_mode;
@@ -979,6 +982,11 @@ static int send_msg(int sd, void *addr, socklen_t alen, struct sock_args *args)
 			log_err_errno("write failed sending msg to peer");
 			return 1;
 		}
+	} else if (args->datagram_connect) {
+		if (send(sd, msg, msglen, 0) < 0) {
+			log_err_errno("send failed sending msg to peer");
+			return 1;
+		}
 	} else if (args->ifindex && args->use_cmsg) {
 		if (send_msg_cmsg(sd, addr, alen, args->ifindex, args->version))
 			return 1;
@@ -1659,7 +1667,7 @@ static int connectsock(void *addr, socklen_t alen, struct sock_args *args)
 	if (args->has_local_ip && bind_socket(sd, args))
 		goto err;
 
-	if (args->type != SOCK_STREAM)
+	if (args->type != SOCK_STREAM && !args->datagram_connect)
 		goto out;
 
 	if (args->password && tcp_md5sig(sd, addr, alen, args))
@@ -1854,7 +1862,7 @@ static int ipc_parent(int cpid, int fd, struct sock_args *args)
 	return client_status;
 }
 
-#define GETOPT_STR  "sr:l:c:p:t:g:P:DRn:M:X:m:d:I:BN:O:SCi6xL:0:1:2:3:Fbqf"
+#define GETOPT_STR  "sr:l:c:p:t:g:P:DRn:M:X:m:d:I:BN:O:SUCi6xL:0:1:2:3:Fbqf"
 #define OPT_FORCE_BIND_KEY_IFINDEX 1001
 #define OPT_NO_BIND_KEY_IFINDEX 1002
 
@@ -1891,6 +1899,7 @@ static void print_usage(char *prog)
 	"    -I dev        bind socket to given device name - server mode\n"
 	"    -S            use setsockopt (IP_UNICAST_IF or IP_MULTICAST_IF)\n"
 	"                  to set device binding\n"
+	"    -U            Use connect() and send() for datagram sockets\n"
 	"    -f            bind socket with the IP[V6]_FREEBIND option\n"
 	"    -C            use cmsg and IP_PKTINFO to specify device binding\n"
 	"\n"
@@ -2074,6 +2083,9 @@ int main(int argc, char *argv[])
 		case 'x':
 			args.use_xfrm = 1;
 			break;
+		case 'U':
+			args.datagram_connect = 1;
+			break;
 		default:
 			print_usage(argv[0]);
 			return 1;
-- 
cgit v1.2.3


From c710136e87747f1cc8e24948b3046ee57a1fe2eb Mon Sep 17 00:00:00 2001
From: Hou Tao <houtao1@huawei.com>
Date: Thu, 1 Sep 2022 14:19:37 +0800
Subject: selftests/bpf: Move sys_pidfd_open() into
 task_local_storage_helpers.h

sys_pidfd_open() is defined twice in both test_bprm_opts.c and
test_local_storage.c, so move it to a common header file. And it will be
used in map_tests as well.

Signed-off-by: Hou Tao <houtao1@huawei.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/r/20220901061938.3789460-4-houtao@huaweicloud.com
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
 .../testing/selftests/bpf/prog_tests/test_bprm_opts.c  | 10 +---------
 .../selftests/bpf/prog_tests/test_local_storage.c      | 10 +---------
 .../testing/selftests/bpf/task_local_storage_helpers.h | 18 ++++++++++++++++++
 3 files changed, 20 insertions(+), 18 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/task_local_storage_helpers.h

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/test_bprm_opts.c b/tools/testing/selftests/bpf/prog_tests/test_bprm_opts.c
index 2559bb775762..a0054019e677 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_bprm_opts.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_bprm_opts.c
@@ -9,18 +9,10 @@
 
 #include "bprm_opts.skel.h"
 #include "network_helpers.h"
-
-#ifndef __NR_pidfd_open
-#define __NR_pidfd_open 434
-#endif
+#include "task_local_storage_helpers.h"
 
 static const char * const bash_envp[] = { "TMPDIR=shouldnotbeset", NULL };
 
-static inline int sys_pidfd_open(pid_t pid, unsigned int flags)
-{
-	return syscall(__NR_pidfd_open, pid, flags);
-}
-
 static int update_storage(int map_fd, int secureexec)
 {
 	int task_fd, ret = 0;
diff --git a/tools/testing/selftests/bpf/prog_tests/test_local_storage.c b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c
index 26ac26a88026..9c77cd6b1eaf 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_local_storage.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c
@@ -11,15 +11,7 @@
 
 #include "local_storage.skel.h"
 #include "network_helpers.h"
-
-#ifndef __NR_pidfd_open
-#define __NR_pidfd_open 434
-#endif
-
-static inline int sys_pidfd_open(pid_t pid, unsigned int flags)
-{
-	return syscall(__NR_pidfd_open, pid, flags);
-}
+#include "task_local_storage_helpers.h"
 
 static unsigned int duration;
 
diff --git a/tools/testing/selftests/bpf/task_local_storage_helpers.h b/tools/testing/selftests/bpf/task_local_storage_helpers.h
new file mode 100644
index 000000000000..711d5abb7d51
--- /dev/null
+++ b/tools/testing/selftests/bpf/task_local_storage_helpers.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __TASK_LOCAL_STORAGE_HELPER_H
+#define __TASK_LOCAL_STORAGE_HELPER_H
+
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+
+#ifndef __NR_pidfd_open
+#define __NR_pidfd_open 434
+#endif
+
+static inline int sys_pidfd_open(pid_t pid, unsigned int flags)
+{
+	return syscall(__NR_pidfd_open, pid, flags);
+}
+
+#endif
-- 
cgit v1.2.3


From 73b97bc78b32eb739a7dd3394fa3981e8021c0ef Mon Sep 17 00:00:00 2001
From: Hou Tao <houtao1@huawei.com>
Date: Thu, 1 Sep 2022 14:19:38 +0800
Subject: selftests/bpf: Test concurrent updates on bpf_task_storage_busy

Under full preemptible kernel, task local storage lookup operations on
the same CPU may update per-cpu bpf_task_storage_busy concurrently. If
the update of bpf_task_storage_busy is not preemption safe, the final
value of bpf_task_storage_busy may become not-zero forever and
bpf_task_storage_trylock() will always fail. So add a test case to
ensure the update of bpf_task_storage_busy is preemption safe.

Will skip the test case when CONFIG_PREEMPT is disabled, and it can only
reproduce the problem probabilistically. By increasing
TASK_STORAGE_MAP_NR_LOOP and running it under ARM64 VM with 4-cpus, it
takes about four rounds to reproduce:

> test_maps is modified to only run test_task_storage_map_stress_lookup()
$ export TASK_STORAGE_MAP_NR_THREAD=256
$ export TASK_STORAGE_MAP_NR_LOOP=81920
$ export TASK_STORAGE_MAP_PIN_CPU=1
$ time ./test_maps
test_task_storage_map_stress_lookup(135):FAIL:bad bpf_task_storage_busy got -2

real    0m24.743s
user    0m6.772s
sys     0m17.966s

Signed-off-by: Hou Tao <houtao1@huawei.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/r/20220901061938.3789460-5-houtao@huaweicloud.com
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
 .../selftests/bpf/map_tests/task_storage_map.c     | 122 +++++++++++++++++++++
 .../bpf/progs/read_bpf_task_storage_busy.c         |  39 +++++++
 2 files changed, 161 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/map_tests/task_storage_map.c
 create mode 100644 tools/testing/selftests/bpf/progs/read_bpf_task_storage_busy.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/map_tests/task_storage_map.c b/tools/testing/selftests/bpf/map_tests/task_storage_map.c
new file mode 100644
index 000000000000..1adc9c292eb2
--- /dev/null
+++ b/tools/testing/selftests/bpf/map_tests/task_storage_map.c
@@ -0,0 +1,122 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2022. Huawei Technologies Co., Ltd */
+#define _GNU_SOURCE
+#include <sched.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <errno.h>
+#include <string.h>
+#include <pthread.h>
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "test_maps.h"
+#include "task_local_storage_helpers.h"
+#include "read_bpf_task_storage_busy.skel.h"
+
+struct lookup_ctx {
+	bool start;
+	bool stop;
+	int pid_fd;
+	int map_fd;
+	int loop;
+};
+
+static void *lookup_fn(void *arg)
+{
+	struct lookup_ctx *ctx = arg;
+	long value;
+	int i = 0;
+
+	while (!ctx->start)
+		usleep(1);
+
+	while (!ctx->stop && i++ < ctx->loop)
+		bpf_map_lookup_elem(ctx->map_fd, &ctx->pid_fd, &value);
+	return NULL;
+}
+
+static void abort_lookup(struct lookup_ctx *ctx, pthread_t *tids, unsigned int nr)
+{
+	unsigned int i;
+
+	ctx->stop = true;
+	ctx->start = true;
+	for (i = 0; i < nr; i++)
+		pthread_join(tids[i], NULL);
+}
+
+void test_task_storage_map_stress_lookup(void)
+{
+#define MAX_NR_THREAD 4096
+	unsigned int i, nr = 256, loop = 8192, cpu = 0;
+	struct read_bpf_task_storage_busy *skel;
+	pthread_t tids[MAX_NR_THREAD];
+	struct lookup_ctx ctx;
+	cpu_set_t old, new;
+	const char *cfg;
+	int err;
+
+	cfg = getenv("TASK_STORAGE_MAP_NR_THREAD");
+	if (cfg) {
+		nr = atoi(cfg);
+		if (nr > MAX_NR_THREAD)
+			nr = MAX_NR_THREAD;
+	}
+	cfg = getenv("TASK_STORAGE_MAP_NR_LOOP");
+	if (cfg)
+		loop = atoi(cfg);
+	cfg = getenv("TASK_STORAGE_MAP_PIN_CPU");
+	if (cfg)
+		cpu = atoi(cfg);
+
+	skel = read_bpf_task_storage_busy__open_and_load();
+	err = libbpf_get_error(skel);
+	CHECK(err, "open_and_load", "error %d\n", err);
+
+	/* Only for a fully preemptible kernel */
+	if (!skel->kconfig->CONFIG_PREEMPT)
+		return;
+
+	/* Save the old affinity setting */
+	sched_getaffinity(getpid(), sizeof(old), &old);
+
+	/* Pinned on a specific CPU */
+	CPU_ZERO(&new);
+	CPU_SET(cpu, &new);
+	sched_setaffinity(getpid(), sizeof(new), &new);
+
+	ctx.start = false;
+	ctx.stop = false;
+	ctx.pid_fd = sys_pidfd_open(getpid(), 0);
+	ctx.map_fd = bpf_map__fd(skel->maps.task);
+	ctx.loop = loop;
+	for (i = 0; i < nr; i++) {
+		err = pthread_create(&tids[i], NULL, lookup_fn, &ctx);
+		if (err) {
+			abort_lookup(&ctx, tids, i);
+			CHECK(err, "pthread_create", "error %d\n", err);
+			goto out;
+		}
+	}
+
+	ctx.start = true;
+	for (i = 0; i < nr; i++)
+		pthread_join(tids[i], NULL);
+
+	skel->bss->pid = getpid();
+	err = read_bpf_task_storage_busy__attach(skel);
+	CHECK(err, "attach", "error %d\n", err);
+
+	/* Trigger program */
+	syscall(SYS_gettid);
+	skel->bss->pid = 0;
+
+	CHECK(skel->bss->busy != 0, "bad bpf_task_storage_busy", "got %d\n", skel->bss->busy);
+out:
+	read_bpf_task_storage_busy__destroy(skel);
+	/* Restore affinity setting */
+	sched_setaffinity(getpid(), sizeof(old), &old);
+}
diff --git a/tools/testing/selftests/bpf/progs/read_bpf_task_storage_busy.c b/tools/testing/selftests/bpf/progs/read_bpf_task_storage_busy.c
new file mode 100644
index 000000000000..a47bb0120719
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/read_bpf_task_storage_busy.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2022. Huawei Technologies Co., Ltd */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+extern bool CONFIG_PREEMPT __kconfig __weak;
+extern const int bpf_task_storage_busy __ksym;
+
+char _license[] SEC("license") = "GPL";
+
+int pid = 0;
+int busy = 0;
+
+struct {
+	__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, int);
+	__type(value, long);
+} task SEC(".maps");
+
+SEC("raw_tp/sys_enter")
+int BPF_PROG(read_bpf_task_storage_busy)
+{
+	int *value;
+	int key;
+
+	if (!CONFIG_PREEMPT)
+		return 0;
+
+	if (bpf_get_current_pid_tgid() >> 32 != pid)
+		return 0;
+
+	value = bpf_this_cpu_ptr(&bpf_task_storage_busy);
+	if (value)
+		busy = *value;
+
+	return 0;
+}
-- 
cgit v1.2.3


From 1ab3d4175775378106965ce32a07248675680c2c Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Wed, 31 Aug 2022 20:08:39 +0300
Subject: selftests: net: dsa: symlink the tc_actions.sh test

This has been validated on the Ocelot/Felix switch family (NXP LS1028A)
and should be relevant to any switch driver that offloads the tc-flower
and/or tc-matchall actions trap, drop, accept, mirred, for which DSA has
operations.

TEST: gact drop and ok (skip_hw)                                    [ OK ]
TEST: mirred egress flower redirect (skip_hw)                       [ OK ]
TEST: mirred egress flower mirror (skip_hw)                         [ OK ]
TEST: mirred egress matchall mirror (skip_hw)                       [ OK ]
TEST: mirred_egress_to_ingress (skip_hw)                            [ OK ]
TEST: gact drop and ok (skip_sw)                                    [ OK ]
TEST: mirred egress flower redirect (skip_sw)                       [ OK ]
TEST: mirred egress flower mirror (skip_sw)                         [ OK ]
TEST: mirred egress matchall mirror (skip_sw)                       [ OK ]
TEST: trap (skip_sw)                                                [ OK ]
TEST: mirred_egress_to_ingress (skip_sw)                            [ OK ]

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Link: https://lore.kernel.org/r/20220831170839.931184-1-vladimir.oltean@nxp.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/drivers/net/dsa/Makefile      | 3 ++-
 tools/testing/selftests/drivers/net/dsa/tc_actions.sh | 1 +
 tools/testing/selftests/drivers/net/dsa/tc_common.sh  | 1 +
 3 files changed, 4 insertions(+), 1 deletion(-)
 create mode 120000 tools/testing/selftests/drivers/net/dsa/tc_actions.sh
 create mode 120000 tools/testing/selftests/drivers/net/dsa/tc_common.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/drivers/net/dsa/Makefile b/tools/testing/selftests/drivers/net/dsa/Makefile
index 2a731d5c6d85..c393e7b73805 100644
--- a/tools/testing/selftests/drivers/net/dsa/Makefile
+++ b/tools/testing/selftests/drivers/net/dsa/Makefile
@@ -8,9 +8,10 @@ TEST_PROGS = bridge_locked_port.sh \
 	bridge_vlan_unaware.sh \
 	local_termination.sh \
 	no_forwarding.sh \
+	tc_actions.sh \
 	test_bridge_fdb_stress.sh
 
-TEST_PROGS_EXTENDED := lib.sh
+TEST_PROGS_EXTENDED := lib.sh tc_common.sh
 
 TEST_FILES := forwarding.config
 
diff --git a/tools/testing/selftests/drivers/net/dsa/tc_actions.sh b/tools/testing/selftests/drivers/net/dsa/tc_actions.sh
new file mode 120000
index 000000000000..306213d9430e
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/tc_actions.sh
@@ -0,0 +1 @@
+../../../net/forwarding/tc_actions.sh
\ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/tc_common.sh b/tools/testing/selftests/drivers/net/dsa/tc_common.sh
new file mode 120000
index 000000000000..bc3465bdc36b
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/tc_common.sh
@@ -0,0 +1 @@
+../../../net/forwarding/tc_common.sh
\ No newline at end of file
-- 
cgit v1.2.3


From 44c51472bef83bb70b43e2f4b7a592096f32a855 Mon Sep 17 00:00:00 2001
From: Shmulik Ladkani <shmulik@metanetworks.com>
Date: Wed, 31 Aug 2022 17:40:09 +0300
Subject: bpf: Support getting tunnel flags

Existing 'bpf_skb_get_tunnel_key' extracts various tunnel parameters
(id, ttl, tos, local and remote) but does not expose ip_tunnel_info's
tun_flags to the BPF program.

It makes sense to expose tun_flags to the BPF program.

Assume for example multiple GRE tunnels maintained on a single GRE
interface in collect_md mode. The program expects origins to initiate
over GRE, however different origins use different GRE characteristics
(e.g. some prefer to use GRE checksum, some do not; some pass a GRE key,
some do not, etc..).

A BPF program getting tun_flags can therefore remember the relevant
flags (e.g. TUNNEL_CSUM, TUNNEL_SEQ...) for each initiating remote. In
the reply path, the program can use 'bpf_skb_set_tunnel_key' in order
to correctly reply to the remote, using similar characteristics, based
on the stored tunnel flags.

Introduce BPF_F_TUNINFO_FLAGS flag for bpf_skb_get_tunnel_key. If
specified, 'bpf_tunnel_key->tunnel_flags' is set with the tun_flags.

Decided to use the existing unused 'tunnel_ext' as the storage for the
'tunnel_flags' in order to avoid changing bpf_tunnel_key's layout.

Also, the following has been considered during the design:

  1. Convert the "interesting" internal TUNNEL_xxx flags back to BPF_F_yyy
     and place into the new 'tunnel_flags' field. This has 2 drawbacks:

     - The BPF_F_yyy flags are from *set_tunnel_key* enumeration space,
       e.g. BPF_F_ZERO_CSUM_TX. It is awkward that it is "returned" into
       tunnel_flags from a *get_tunnel_key* call.
     - Not all "interesting" TUNNEL_xxx flags can be mapped to existing
       BPF_F_yyy flags, and it doesn't make sense to create new BPF_F_yyy
       flags just for purposes of the returned tunnel_flags.

  2. Place key.tun_flags into 'tunnel_flags' but mask them, keeping only
     "interesting" flags. That's ok, but the drawback is that what's
     "interesting" for my usecase might be limiting for other usecases.

Therefore I decided to expose what's in key.tun_flags *as is*, which seems
most flexible. The BPF user can just choose to ignore bits he's not
interested in. The TUNNEL_xxx are also UAPI, so no harm exposing them
back in the get_tunnel_key call.

Signed-off-by: Shmulik Ladkani <shmulik.ladkani@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20220831144010.174110-1-shmulik.ladkani@gmail.com
---
 include/uapi/linux/bpf.h       | 10 +++++++++-
 net/core/filter.c              |  8 ++++++--
 tools/include/uapi/linux/bpf.h | 10 +++++++++-
 3 files changed, 24 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 962960a98835..837c0f9b7fdd 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5659,6 +5659,11 @@ enum {
 	BPF_F_SEQ_NUMBER		= (1ULL << 3),
 };
 
+/* BPF_FUNC_skb_get_tunnel_key flags. */
+enum {
+	BPF_F_TUNINFO_FLAGS		= (1ULL << 4),
+};
+
 /* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and
  * BPF_FUNC_perf_event_read_value flags.
  */
@@ -5848,7 +5853,10 @@ struct bpf_tunnel_key {
 	};
 	__u8 tunnel_tos;
 	__u8 tunnel_ttl;
-	__u16 tunnel_ext;	/* Padding, future use. */
+	union {
+		__u16 tunnel_ext;	/* compat */
+		__be16 tunnel_flags;
+	};
 	__u32 tunnel_label;
 	union {
 		__u32 local_ipv4;
diff --git a/net/core/filter.c b/net/core/filter.c
index 63e25d8ce501..74e2a4a0d747 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4488,7 +4488,8 @@ BPF_CALL_4(bpf_skb_get_tunnel_key, struct sk_buff *, skb, struct bpf_tunnel_key
 	void *to_orig = to;
 	int err;
 
-	if (unlikely(!info || (flags & ~(BPF_F_TUNINFO_IPV6)))) {
+	if (unlikely(!info || (flags & ~(BPF_F_TUNINFO_IPV6 |
+					 BPF_F_TUNINFO_FLAGS)))) {
 		err = -EINVAL;
 		goto err_clear;
 	}
@@ -4520,7 +4521,10 @@ set_compat:
 	to->tunnel_id = be64_to_cpu(info->key.tun_id);
 	to->tunnel_tos = info->key.tos;
 	to->tunnel_ttl = info->key.ttl;
-	to->tunnel_ext = 0;
+	if (flags & BPF_F_TUNINFO_FLAGS)
+		to->tunnel_flags = info->key.tun_flags;
+	else
+		to->tunnel_ext = 0;
 
 	if (flags & BPF_F_TUNINFO_IPV6) {
 		memcpy(to->remote_ipv6, &info->key.u.ipv6.src,
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index f4ba82a1eace..793103b10eab 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -5659,6 +5659,11 @@ enum {
 	BPF_F_SEQ_NUMBER		= (1ULL << 3),
 };
 
+/* BPF_FUNC_skb_get_tunnel_key flags. */
+enum {
+	BPF_F_TUNINFO_FLAGS		= (1ULL << 4),
+};
+
 /* BPF_FUNC_perf_event_output, BPF_FUNC_perf_event_read and
  * BPF_FUNC_perf_event_read_value flags.
  */
@@ -5848,7 +5853,10 @@ struct bpf_tunnel_key {
 	};
 	__u8 tunnel_tos;
 	__u8 tunnel_ttl;
-	__u16 tunnel_ext;	/* Padding, future use. */
+	union {
+		__u16 tunnel_ext;	/* compat */
+		__be16 tunnel_flags;
+	};
 	__u32 tunnel_label;
 	union {
 		__u32 local_ipv4;
-- 
cgit v1.2.3


From 8cc61b7a6416541261d56bcdd93a711407f711ba Mon Sep 17 00:00:00 2001
From: Shmulik Ladkani <shmulik@metanetworks.com>
Date: Wed, 31 Aug 2022 17:40:10 +0300
Subject: selftests/bpf: Amend test_tunnel to exercise BPF_F_TUNINFO_FLAGS

Get the tunnel flags in {ipv6}vxlan_get_tunnel_src and ensure they are
aligned with tunnel params set at {ipv6}vxlan_set_tunnel_dst.

Signed-off-by: Shmulik Ladkani <shmulik.ladkani@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20220831144010.174110-2-shmulik.ladkani@gmail.com
---
 .../testing/selftests/bpf/progs/test_tunnel_kern.c | 24 ++++++++++++++--------
 1 file changed, 16 insertions(+), 8 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
index df0673c4ecbe..98af55f0bcd3 100644
--- a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
@@ -12,6 +12,7 @@
 #include <linux/bpf.h>
 #include <linux/if_ether.h>
 #include <linux/if_packet.h>
+#include <linux/if_tunnel.h>
 #include <linux/ip.h>
 #include <linux/ipv6.h>
 #include <linux/icmp.h>
@@ -386,7 +387,8 @@ int vxlan_get_tunnel_src(struct __sk_buff *skb)
 	__u32 orig_daddr;
 	__u32 index = 0;
 
-	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
+	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
+				     BPF_F_TUNINFO_FLAGS);
 	if (ret < 0) {
 		log_err(ret);
 		return TC_ACT_SHOT;
@@ -398,10 +400,13 @@ int vxlan_get_tunnel_src(struct __sk_buff *skb)
 		return TC_ACT_SHOT;
 	}
 
-	if (key.local_ipv4 != ASSIGNED_ADDR_VETH1 || md.gbp != 0x800FF) {
-		bpf_printk("vxlan key %d local ip 0x%x remote ip 0x%x gbp 0x%x\n",
+	if (key.local_ipv4 != ASSIGNED_ADDR_VETH1 || md.gbp != 0x800FF ||
+	    !(key.tunnel_flags & TUNNEL_KEY) ||
+	    (key.tunnel_flags & TUNNEL_CSUM)) {
+		bpf_printk("vxlan key %d local ip 0x%x remote ip 0x%x gbp 0x%x flags 0x%x\n",
 			   key.tunnel_id, key.local_ipv4,
-			   key.remote_ipv4, md.gbp);
+			   key.remote_ipv4, md.gbp,
+			   bpf_ntohs(key.tunnel_flags));
 		log_err(ret);
 		return TC_ACT_SHOT;
 	}
@@ -541,16 +546,19 @@ int ip6vxlan_get_tunnel_src(struct __sk_buff *skb)
 	}
 
 	ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
-				     BPF_F_TUNINFO_IPV6);
+				     BPF_F_TUNINFO_IPV6 | BPF_F_TUNINFO_FLAGS);
 	if (ret < 0) {
 		log_err(ret);
 		return TC_ACT_SHOT;
 	}
 
-	if (bpf_ntohl(key.local_ipv6[3]) != *local_ip) {
-		bpf_printk("ip6vxlan key %d local ip6 ::%x remote ip6 ::%x label 0x%x\n",
+	if (bpf_ntohl(key.local_ipv6[3]) != *local_ip ||
+	    !(key.tunnel_flags & TUNNEL_KEY) ||
+	    !(key.tunnel_flags & TUNNEL_CSUM)) {
+		bpf_printk("ip6vxlan key %d local ip6 ::%x remote ip6 ::%x label 0x%x flags 0x%x\n",
 			   key.tunnel_id, bpf_ntohl(key.local_ipv6[3]),
-			   bpf_ntohl(key.remote_ipv6[3]), key.tunnel_label);
+			   bpf_ntohl(key.remote_ipv6[3]), key.tunnel_label,
+			   bpf_ntohs(key.tunnel_flags));
 		bpf_printk("local_ip 0x%x\n", *local_ip);
 		log_err(ret);
 		return TC_ACT_SHOT;
-- 
cgit v1.2.3


From 0d68e6fe12ada8fbaf35f0978aaf18dfb8d2dbb5 Mon Sep 17 00:00:00 2001
From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Date: Thu, 1 Sep 2022 13:48:08 +0200
Subject: selftests/xsk: Query for native XDP support

Currently, xdpxceiver assumes that underlying device supports XDP in
native mode - it is fine by now since tests can run only on a veth pair.
Future commit is going to allow running test suite against physical
devices, so let us query the device if it is capable of running XDP
programs in native mode. This way xdpxceiver will not try to run
TEST_MODE_DRV if device being tested is not supporting it.

Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Magnus Karlsson <magnus.karlsson@intel.com>
Link: https://lore.kernel.org/bpf/20220901114813.16275-2-maciej.fijalkowski@intel.com
---
 tools/testing/selftests/bpf/xskxceiver.c | 39 ++++++++++++++++++++++++++++++--
 1 file changed, 37 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c
index 14b4737b223c..19f65bce7f65 100644
--- a/tools/testing/selftests/bpf/xskxceiver.c
+++ b/tools/testing/selftests/bpf/xskxceiver.c
@@ -99,6 +99,8 @@
 #include <stdatomic.h>
 #include "xsk.h"
 #include "xskxceiver.h"
+#include <bpf/bpf.h>
+#include <linux/filter.h>
 #include "../kselftest.h"
 
 /* AF_XDP APIs were moved into libxdp and marked as deprecated in libbpf.
@@ -1712,10 +1714,40 @@ static void ifobject_delete(struct ifobject *ifobj)
 	free(ifobj);
 }
 
+static bool is_xdp_supported(struct ifobject *ifobject)
+{
+	int flags = XDP_FLAGS_DRV_MODE;
+
+	LIBBPF_OPTS(bpf_link_create_opts, opts, .flags = flags);
+	struct bpf_insn insns[2] = {
+		BPF_MOV64_IMM(BPF_REG_0, XDP_PASS),
+		BPF_EXIT_INSN()
+	};
+	int ifindex = if_nametoindex(ifobject->ifname);
+	int prog_fd, insn_cnt = ARRAY_SIZE(insns);
+	int err;
+
+	prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "GPL", insns, insn_cnt, NULL);
+	if (prog_fd < 0)
+		return false;
+
+	err = bpf_xdp_attach(ifindex, prog_fd, flags, NULL);
+	if (err) {
+		close(prog_fd);
+		return false;
+	}
+
+	bpf_xdp_detach(ifindex, flags, NULL);
+	close(prog_fd);
+
+	return true;
+}
+
 int main(int argc, char **argv)
 {
 	struct pkt_stream *pkt_stream_default;
 	struct ifobject *ifobj_tx, *ifobj_rx;
+	int modes = TEST_MODE_SKB + 1;
 	u32 i, j, failed_tests = 0;
 	struct test_spec test;
 
@@ -1743,15 +1775,18 @@ int main(int argc, char **argv)
 	init_iface(ifobj_rx, MAC2, MAC1, IP2, IP1, UDP_PORT2, UDP_PORT1,
 		   worker_testapp_validate_rx);
 
+	if (is_xdp_supported(ifobj_tx))
+		modes++;
+
 	test_spec_init(&test, ifobj_tx, ifobj_rx, 0);
 	pkt_stream_default = pkt_stream_generate(ifobj_tx->umem, DEFAULT_PKT_CNT, PKT_SIZE);
 	if (!pkt_stream_default)
 		exit_with_error(ENOMEM);
 	test.pkt_stream_default = pkt_stream_default;
 
-	ksft_set_plan(TEST_MODE_MAX * TEST_TYPE_MAX);
+	ksft_set_plan(modes * TEST_TYPE_MAX);
 
-	for (i = 0; i < TEST_MODE_MAX; i++)
+	for (i = 0; i < modes; i++)
 		for (j = 0; j < TEST_TYPE_MAX; j++) {
 			test_spec_init(&test, ifobj_tx, ifobj_rx, i);
 			run_pkt_test(&test, i, j);
-- 
cgit v1.2.3


From 1adef0643b7df1069a53f6f5b7bc66c8234db899 Mon Sep 17 00:00:00 2001
From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Date: Thu, 1 Sep 2022 13:48:09 +0200
Subject: selftests/xsk: Introduce default Rx pkt stream

In order to prepare xdpxceiver for physical device testing, let us
introduce default Rx pkt stream. Reason for doing it is that physical
device testing will use a UMEM with a doubled size where half of it will
be used by Tx and other half by Rx. This means that pkt addresses will
differ for Tx and Rx streams. Rx thread will initialize the
xsk_umem_info::base_addr that is added here so that pkt_set(), when
working on Rx UMEM will add this offset and second half of UMEM space
will be used. Note that currently base_addr is 0 on both sides. Future
commit will do the mentioned initialization.

Previously, veth based testing worked on separate UMEMs, so single
default stream was fine.

Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Magnus Karlsson <magnus.karlsson@intel.com>
Link: https://lore.kernel.org/bpf/20220901114813.16275-3-maciej.fijalkowski@intel.com
---
 tools/testing/selftests/bpf/xskxceiver.c | 74 +++++++++++++++++++++-----------
 tools/testing/selftests/bpf/xskxceiver.h |  4 +-
 2 files changed, 51 insertions(+), 27 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c
index 19f65bce7f65..fbf65135fef6 100644
--- a/tools/testing/selftests/bpf/xskxceiver.c
+++ b/tools/testing/selftests/bpf/xskxceiver.c
@@ -433,15 +433,16 @@ static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
 		ifobj->use_poll = false;
 		ifobj->use_fill_ring = true;
 		ifobj->release_rx = true;
-		ifobj->pkt_stream = test->pkt_stream_default;
 		ifobj->validation_func = NULL;
 
 		if (i == 0) {
 			ifobj->rx_on = false;
 			ifobj->tx_on = true;
+			ifobj->pkt_stream = test->tx_pkt_stream_default;
 		} else {
 			ifobj->rx_on = true;
 			ifobj->tx_on = false;
+			ifobj->pkt_stream = test->rx_pkt_stream_default;
 		}
 
 		memset(ifobj->umem, 0, sizeof(*ifobj->umem));
@@ -465,12 +466,15 @@ static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
 static void test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
 			   struct ifobject *ifobj_rx, enum test_mode mode)
 {
-	struct pkt_stream *pkt_stream;
+	struct pkt_stream *tx_pkt_stream;
+	struct pkt_stream *rx_pkt_stream;
 	u32 i;
 
-	pkt_stream = test->pkt_stream_default;
+	tx_pkt_stream = test->tx_pkt_stream_default;
+	rx_pkt_stream = test->rx_pkt_stream_default;
 	memset(test, 0, sizeof(*test));
-	test->pkt_stream_default = pkt_stream;
+	test->tx_pkt_stream_default = tx_pkt_stream;
+	test->rx_pkt_stream_default = rx_pkt_stream;
 
 	for (i = 0; i < MAX_INTERFACES; i++) {
 		struct ifobject *ifobj = i ? ifobj_rx : ifobj_tx;
@@ -531,16 +535,17 @@ static void pkt_stream_delete(struct pkt_stream *pkt_stream)
 static void pkt_stream_restore_default(struct test_spec *test)
 {
 	struct pkt_stream *tx_pkt_stream = test->ifobj_tx->pkt_stream;
+	struct pkt_stream *rx_pkt_stream = test->ifobj_rx->pkt_stream;
 
-	if (tx_pkt_stream != test->pkt_stream_default) {
+	if (tx_pkt_stream != test->tx_pkt_stream_default) {
 		pkt_stream_delete(test->ifobj_tx->pkt_stream);
-		test->ifobj_tx->pkt_stream = test->pkt_stream_default;
+		test->ifobj_tx->pkt_stream = test->tx_pkt_stream_default;
 	}
 
-	if (test->ifobj_rx->pkt_stream != test->pkt_stream_default &&
-	    test->ifobj_rx->pkt_stream != tx_pkt_stream)
+	if (rx_pkt_stream != test->rx_pkt_stream_default) {
 		pkt_stream_delete(test->ifobj_rx->pkt_stream);
-	test->ifobj_rx->pkt_stream = test->pkt_stream_default;
+		test->ifobj_rx->pkt_stream = test->rx_pkt_stream_default;
+	}
 }
 
 static struct pkt_stream *__pkt_stream_alloc(u32 nb_pkts)
@@ -563,7 +568,7 @@ static struct pkt_stream *__pkt_stream_alloc(u32 nb_pkts)
 
 static void pkt_set(struct xsk_umem_info *umem, struct pkt *pkt, u64 addr, u32 len)
 {
-	pkt->addr = addr;
+	pkt->addr = addr + umem->base_addr;
 	pkt->len = len;
 	if (len > umem->frame_size - XDP_PACKET_HEADROOM - MIN_PKT_SIZE * 2 - umem->frame_headroom)
 		pkt->valid = false;
@@ -602,22 +607,29 @@ static void pkt_stream_replace(struct test_spec *test, u32 nb_pkts, u32 pkt_len)
 
 	pkt_stream = pkt_stream_generate(test->ifobj_tx->umem, nb_pkts, pkt_len);
 	test->ifobj_tx->pkt_stream = pkt_stream;
+	pkt_stream = pkt_stream_generate(test->ifobj_rx->umem, nb_pkts, pkt_len);
 	test->ifobj_rx->pkt_stream = pkt_stream;
 }
 
-static void pkt_stream_replace_half(struct test_spec *test, u32 pkt_len, int offset)
+static void __pkt_stream_replace_half(struct ifobject *ifobj, u32 pkt_len,
+				      int offset)
 {
-	struct xsk_umem_info *umem = test->ifobj_tx->umem;
+	struct xsk_umem_info *umem = ifobj->umem;
 	struct pkt_stream *pkt_stream;
 	u32 i;
 
-	pkt_stream = pkt_stream_clone(umem, test->pkt_stream_default);
-	for (i = 1; i < test->pkt_stream_default->nb_pkts; i += 2)
+	pkt_stream = pkt_stream_clone(umem, ifobj->pkt_stream);
+	for (i = 1; i < ifobj->pkt_stream->nb_pkts; i += 2)
 		pkt_set(umem, &pkt_stream->pkts[i],
 			(i % umem->num_frames) * umem->frame_size + offset, pkt_len);
 
-	test->ifobj_tx->pkt_stream = pkt_stream;
-	test->ifobj_rx->pkt_stream = pkt_stream;
+	ifobj->pkt_stream = pkt_stream;
+}
+
+static void pkt_stream_replace_half(struct test_spec *test, u32 pkt_len, int offset)
+{
+	__pkt_stream_replace_half(test->ifobj_tx, pkt_len, offset);
+	__pkt_stream_replace_half(test->ifobj_rx, pkt_len, offset);
 }
 
 static void pkt_stream_receive_half(struct test_spec *test)
@@ -659,7 +671,8 @@ static struct pkt *pkt_generate(struct ifobject *ifobject, u32 pkt_nb)
 	return pkt;
 }
 
-static void pkt_stream_generate_custom(struct test_spec *test, struct pkt *pkts, u32 nb_pkts)
+static void __pkt_stream_generate_custom(struct ifobject *ifobj,
+					 struct pkt *pkts, u32 nb_pkts)
 {
 	struct pkt_stream *pkt_stream;
 	u32 i;
@@ -668,15 +681,20 @@ static void pkt_stream_generate_custom(struct test_spec *test, struct pkt *pkts,
 	if (!pkt_stream)
 		exit_with_error(ENOMEM);
 
-	test->ifobj_tx->pkt_stream = pkt_stream;
-	test->ifobj_rx->pkt_stream = pkt_stream;
-
 	for (i = 0; i < nb_pkts; i++) {
-		pkt_stream->pkts[i].addr = pkts[i].addr;
+		pkt_stream->pkts[i].addr = pkts[i].addr + ifobj->umem->base_addr;
 		pkt_stream->pkts[i].len = pkts[i].len;
 		pkt_stream->pkts[i].payload = i;
 		pkt_stream->pkts[i].valid = pkts[i].valid;
 	}
+
+	ifobj->pkt_stream = pkt_stream;
+}
+
+static void pkt_stream_generate_custom(struct test_spec *test, struct pkt *pkts, u32 nb_pkts)
+{
+	__pkt_stream_generate_custom(test->ifobj_tx, pkts, nb_pkts);
+	__pkt_stream_generate_custom(test->ifobj_rx, pkts, nb_pkts);
 }
 
 static void pkt_dump(void *pkt, u32 len)
@@ -1745,7 +1763,8 @@ static bool is_xdp_supported(struct ifobject *ifobject)
 
 int main(int argc, char **argv)
 {
-	struct pkt_stream *pkt_stream_default;
+	struct pkt_stream *rx_pkt_stream_default;
+	struct pkt_stream *tx_pkt_stream_default;
 	struct ifobject *ifobj_tx, *ifobj_rx;
 	int modes = TEST_MODE_SKB + 1;
 	u32 i, j, failed_tests = 0;
@@ -1779,10 +1798,12 @@ int main(int argc, char **argv)
 		modes++;
 
 	test_spec_init(&test, ifobj_tx, ifobj_rx, 0);
-	pkt_stream_default = pkt_stream_generate(ifobj_tx->umem, DEFAULT_PKT_CNT, PKT_SIZE);
-	if (!pkt_stream_default)
+	tx_pkt_stream_default = pkt_stream_generate(ifobj_tx->umem, DEFAULT_PKT_CNT, PKT_SIZE);
+	rx_pkt_stream_default = pkt_stream_generate(ifobj_rx->umem, DEFAULT_PKT_CNT, PKT_SIZE);
+	if (!tx_pkt_stream_default || !rx_pkt_stream_default)
 		exit_with_error(ENOMEM);
-	test.pkt_stream_default = pkt_stream_default;
+	test.tx_pkt_stream_default = tx_pkt_stream_default;
+	test.rx_pkt_stream_default = rx_pkt_stream_default;
 
 	ksft_set_plan(modes * TEST_TYPE_MAX);
 
@@ -1796,7 +1817,8 @@ int main(int argc, char **argv)
 				failed_tests++;
 		}
 
-	pkt_stream_delete(pkt_stream_default);
+	pkt_stream_delete(tx_pkt_stream_default);
+	pkt_stream_delete(rx_pkt_stream_default);
 	ifobject_delete(ifobj_tx);
 	ifobject_delete(ifobj_rx);
 
diff --git a/tools/testing/selftests/bpf/xskxceiver.h b/tools/testing/selftests/bpf/xskxceiver.h
index ee97576757a9..8d1c31f127e7 100644
--- a/tools/testing/selftests/bpf/xskxceiver.h
+++ b/tools/testing/selftests/bpf/xskxceiver.h
@@ -99,6 +99,7 @@ struct xsk_umem_info {
 	u32 frame_headroom;
 	void *buffer;
 	u32 frame_size;
+	u32 base_addr;
 	bool unaligned_mode;
 };
 
@@ -159,7 +160,8 @@ struct ifobject {
 struct test_spec {
 	struct ifobject *ifobj_tx;
 	struct ifobject *ifobj_rx;
-	struct pkt_stream *pkt_stream_default;
+	struct pkt_stream *tx_pkt_stream_default;
+	struct pkt_stream *rx_pkt_stream_default;
 	u16 total_steps;
 	u16 current_step;
 	u16 nb_sockets;
-- 
cgit v1.2.3


From 24037ba7c47b1a50ceb70079d08fc9c135f7df4b Mon Sep 17 00:00:00 2001
From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Date: Thu, 1 Sep 2022 13:48:10 +0200
Subject: selftests/xsk: Increase chars for interface name to 16

So that "enp240s0f0" or such name can be used against xskxceiver.
While at it, also extend character count for netns name.

Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Magnus Karlsson <magnus.karlsson@intel.com>
Link: https://lore.kernel.org/bpf/20220901114813.16275-4-maciej.fijalkowski@intel.com
---
 tools/testing/selftests/bpf/xskxceiver.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/xskxceiver.h b/tools/testing/selftests/bpf/xskxceiver.h
index 8d1c31f127e7..04b298c72f67 100644
--- a/tools/testing/selftests/bpf/xskxceiver.h
+++ b/tools/testing/selftests/bpf/xskxceiver.h
@@ -29,8 +29,8 @@
 #define TEST_FAILURE -1
 #define TEST_CONTINUE 1
 #define MAX_INTERFACES 2
-#define MAX_INTERFACE_NAME_CHARS 7
-#define MAX_INTERFACES_NAMESPACE_CHARS 10
+#define MAX_INTERFACE_NAME_CHARS 16
+#define MAX_INTERFACES_NAMESPACE_CHARS 16
 #define MAX_SOCKETS 2
 #define MAX_TEST_NAME_SIZE 32
 #define MAX_TEARDOWN_ITER 10
-- 
cgit v1.2.3


From a693ff3ed5610a07b1b0dd831d10f516e13cf6c6 Mon Sep 17 00:00:00 2001
From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Date: Thu, 1 Sep 2022 13:48:11 +0200
Subject: selftests/xsk: Add support for executing tests on physical device

Currently, architecture of xdpxceiver is designed strictly for
conducting veth based tests. Veth pair is created together with a
network namespace and one of the veth interfaces is moved to the
mentioned netns. Then, separate threads for Tx and Rx are spawned which
will utilize described setup.

Infrastructure described in the paragraph above can not be used for
testing AF_XDP support on physical devices. That testing will be
conducted on a single network interface and same queue. Xskxceiver
needs to be extended to distinguish between veth tests and physical
interface tests.

Since same iface/queue id pair will be used by both Tx/Rx threads for
physical device testing, Tx thread, which happen to run after the Rx
thread, is going to create XSK socket with shared umem flag. In order to
track this setting throughout the lifetime of spawned threads, introduce
'shared_umem' boolean variable to struct ifobject and set it to true
when xdpxceiver is run against physical device. In such case, UMEM size
needs to be doubled, so half of it will be used by Rx thread and other
half by Tx thread. For two step based test types, value of XSKMAP
element under key 0 has to be updated as there is now another socket for
the second step. Also, to avoid race conditions when destroying XSK
resources, move this activity to the main thread after spawned Rx and Tx
threads have finished its job. This way it is possible to gracefully
remove shared umem without introducing synchronization mechanisms.

To run xsk selftests suite on physical device, append "-i $IFACE" when
invoking test_xsk.sh. For veth based tests, simply skip it. When "-i
$IFACE" is in place, under the hood test_xsk.sh will use $IFACE for both
interfaces supplied to xdpxceiver, which in turn will interpret that
this execution of test suite is for a physical device.

Note that currently this makes it possible only to test SKB and DRV mode
(in case underlying device has native XDP support). ZC testing support
is added in a later patch.

Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Magnus Karlsson <magnus.karlsson@intel.com>
Link: https://lore.kernel.org/bpf/20220901114813.16275-5-maciej.fijalkowski@intel.com
---
 tools/testing/selftests/bpf/test_xsk.sh  |  52 +++++---
 tools/testing/selftests/bpf/xskxceiver.c | 204 ++++++++++++++++++++-----------
 tools/testing/selftests/bpf/xskxceiver.h |   1 +
 3 files changed, 170 insertions(+), 87 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_xsk.sh b/tools/testing/selftests/bpf/test_xsk.sh
index 096a957594cd..d821fd098504 100755
--- a/tools/testing/selftests/bpf/test_xsk.sh
+++ b/tools/testing/selftests/bpf/test_xsk.sh
@@ -73,14 +73,20 @@
 #
 # Run and dump packet contents:
 #   sudo ./test_xsk.sh -D
+#
+# Run test suite for physical device in loopback mode
+#   sudo ./test_xsk.sh -i IFACE
 
 . xsk_prereqs.sh
 
-while getopts "vD" flag
+ETH=""
+
+while getopts "vDi:" flag
 do
 	case "${flag}" in
 		v) verbose=1;;
 		D) dump_pkts=1;;
+		i) ETH=${OPTARG};;
 	esac
 done
 
@@ -132,18 +138,25 @@ setup_vethPairs() {
 	ip link set ${VETH0} up
 }
 
-validate_root_exec
-validate_veth_support ${VETH0}
-validate_ip_utility
-setup_vethPairs
-
-retval=$?
-if [ $retval -ne 0 ]; then
-	test_status $retval "${TEST_NAME}"
-	cleanup_exit ${VETH0} ${VETH1} ${NS1}
-	exit $retval
+if [ ! -z $ETH ]; then
+	VETH0=${ETH}
+	VETH1=${ETH}
+	NS1=""
+else
+	validate_root_exec
+	validate_veth_support ${VETH0}
+	validate_ip_utility
+	setup_vethPairs
+
+	retval=$?
+	if [ $retval -ne 0 ]; then
+		test_status $retval "${TEST_NAME}"
+		cleanup_exit ${VETH0} ${VETH1} ${NS1}
+		exit $retval
+	fi
 fi
 
+
 if [[ $verbose -eq 1 ]]; then
 	ARGS+="-v "
 fi
@@ -152,26 +165,33 @@ if [[ $dump_pkts -eq 1 ]]; then
 	ARGS="-D "
 fi
 
+retval=$?
 test_status $retval "${TEST_NAME}"
 
 ## START TESTS
 
 statusList=()
 
-TEST_NAME="XSK_SELFTESTS_SOFTIRQ"
+TEST_NAME="XSK_SELFTESTS_${VETH0}_SOFTIRQ"
 
 exec_xskxceiver
 
-cleanup_exit ${VETH0} ${VETH1} ${NS1}
-TEST_NAME="XSK_SELFTESTS_BUSY_POLL"
+if [ -z $ETH ]; then
+	cleanup_exit ${VETH0} ${VETH1} ${NS1}
+fi
+TEST_NAME="XSK_SELFTESTS_${VETH0}_BUSY_POLL"
 busy_poll=1
 
-setup_vethPairs
+if [ -z $ETH ]; then
+	setup_vethPairs
+fi
 exec_xskxceiver
 
 ## END TESTS
 
-cleanup_exit ${VETH0} ${VETH1} ${NS1}
+if [ -z $ETH ]; then
+	cleanup_exit ${VETH0} ${VETH1} ${NS1}
+fi
 
 failures=0
 echo -e "\nSummary:"
diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c
index fbf65135fef6..b54b844cae89 100644
--- a/tools/testing/selftests/bpf/xskxceiver.c
+++ b/tools/testing/selftests/bpf/xskxceiver.c
@@ -301,8 +301,8 @@ static void enable_busy_poll(struct xsk_socket_info *xsk)
 		exit_with_error(errno);
 }
 
-static int xsk_configure_socket(struct xsk_socket_info *xsk, struct xsk_umem_info *umem,
-				struct ifobject *ifobject, bool shared)
+static int __xsk_configure_socket(struct xsk_socket_info *xsk, struct xsk_umem_info *umem,
+				  struct ifobject *ifobject, bool shared)
 {
 	struct xsk_socket_config cfg = {};
 	struct xsk_ring_cons *rxr;
@@ -448,6 +448,9 @@ static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
 		memset(ifobj->umem, 0, sizeof(*ifobj->umem));
 		ifobj->umem->num_frames = DEFAULT_UMEM_BUFFERS;
 		ifobj->umem->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
+		if (ifobj->shared_umem && ifobj->rx_on)
+			ifobj->umem->base_addr = DEFAULT_UMEM_BUFFERS *
+				XSK_UMEM__DEFAULT_FRAME_SIZE;
 
 		for (j = 0; j < MAX_SOCKETS; j++) {
 			memset(&ifobj->xsk_arr[j], 0, sizeof(ifobj->xsk_arr[j]));
@@ -1146,6 +1149,70 @@ static int validate_tx_invalid_descs(struct ifobject *ifobject)
 	return TEST_PASS;
 }
 
+static void xsk_configure_socket(struct test_spec *test, struct ifobject *ifobject,
+				 struct xsk_umem_info *umem, bool tx)
+{
+	int i, ret;
+
+	for (i = 0; i < test->nb_sockets; i++) {
+		bool shared = (ifobject->shared_umem && tx) ? true : !!i;
+		u32 ctr = 0;
+
+		while (ctr++ < SOCK_RECONF_CTR) {
+			ret = __xsk_configure_socket(&ifobject->xsk_arr[i], umem,
+						     ifobject, shared);
+			if (!ret)
+				break;
+
+			/* Retry if it fails as xsk_socket__create() is asynchronous */
+			if (ctr >= SOCK_RECONF_CTR)
+				exit_with_error(-ret);
+			usleep(USLEEP_MAX);
+		}
+		if (ifobject->busy_poll)
+			enable_busy_poll(&ifobject->xsk_arr[i]);
+	}
+}
+
+static void thread_common_ops_tx(struct test_spec *test, struct ifobject *ifobject)
+{
+	xsk_configure_socket(test, ifobject, test->ifobj_rx->umem, true);
+	ifobject->xsk = &ifobject->xsk_arr[0];
+	ifobject->xsk_map_fd = test->ifobj_rx->xsk_map_fd;
+	memcpy(ifobject->umem, test->ifobj_rx->umem, sizeof(struct xsk_umem_info));
+}
+
+static void xsk_populate_fill_ring(struct xsk_umem_info *umem, struct pkt_stream *pkt_stream)
+{
+	u32 idx = 0, i, buffers_to_fill;
+	int ret;
+
+	if (umem->num_frames < XSK_RING_PROD__DEFAULT_NUM_DESCS)
+		buffers_to_fill = umem->num_frames;
+	else
+		buffers_to_fill = XSK_RING_PROD__DEFAULT_NUM_DESCS;
+
+	ret = xsk_ring_prod__reserve(&umem->fq, buffers_to_fill, &idx);
+	if (ret != buffers_to_fill)
+		exit_with_error(ENOSPC);
+	for (i = 0; i < buffers_to_fill; i++) {
+		u64 addr;
+
+		if (pkt_stream->use_addr_for_fill) {
+			struct pkt *pkt = pkt_stream_get_pkt(pkt_stream, i);
+
+			if (!pkt)
+				break;
+			addr = pkt->addr;
+		} else {
+			addr = i * umem->frame_size;
+		}
+
+		*xsk_ring_prod__fill_addr(&umem->fq, idx++) = addr;
+	}
+	xsk_ring_prod__submit(&umem->fq, buffers_to_fill);
+}
+
 static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject)
 {
 	u64 umem_sz = ifobject->umem->num_frames * ifobject->umem->frame_size;
@@ -1153,13 +1220,15 @@ static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject)
 	LIBBPF_OPTS(bpf_xdp_query_opts, opts);
 	int ret, ifindex;
 	void *bufs;
-	u32 i;
 
 	ifobject->ns_fd = switch_namespace(ifobject->nsname);
 
 	if (ifobject->umem->unaligned_mode)
 		mmap_flags |= MAP_HUGETLB;
 
+	if (ifobject->shared_umem)
+		umem_sz *= 2;
+
 	bufs = mmap(NULL, umem_sz, PROT_READ | PROT_WRITE, mmap_flags, -1, 0);
 	if (bufs == MAP_FAILED)
 		exit_with_error(errno);
@@ -1168,24 +1237,9 @@ static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject)
 	if (ret)
 		exit_with_error(-ret);
 
-	for (i = 0; i < test->nb_sockets; i++) {
-		u32 ctr = 0;
-
-		while (ctr++ < SOCK_RECONF_CTR) {
-			ret = xsk_configure_socket(&ifobject->xsk_arr[i], ifobject->umem,
-						   ifobject, !!i);
-			if (!ret)
-				break;
-
-			/* Retry if it fails as xsk_socket__create() is asynchronous */
-			if (ctr >= SOCK_RECONF_CTR)
-				exit_with_error(-ret);
-			usleep(USLEEP_MAX);
-		}
+	xsk_populate_fill_ring(ifobject->umem, ifobject->pkt_stream);
 
-		if (ifobject->busy_poll)
-			enable_busy_poll(&ifobject->xsk_arr[i]);
-	}
+	xsk_configure_socket(test, ifobject, ifobject->umem, false);
 
 	ifobject->xsk = &ifobject->xsk_arr[0];
 
@@ -1221,22 +1275,18 @@ static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject)
 		exit_with_error(-ret);
 }
 
-static void testapp_cleanup_xsk_res(struct ifobject *ifobj)
-{
-	print_verbose("Destroying socket\n");
-	xsk_socket__delete(ifobj->xsk->xsk);
-	munmap(ifobj->umem->buffer, ifobj->umem->num_frames * ifobj->umem->frame_size);
-	xsk_umem__delete(ifobj->umem->umem);
-}
-
 static void *worker_testapp_validate_tx(void *arg)
 {
 	struct test_spec *test = (struct test_spec *)arg;
 	struct ifobject *ifobject = test->ifobj_tx;
 	int err;
 
-	if (test->current_step == 1)
-		thread_common_ops(test, ifobject);
+	if (test->current_step == 1) {
+		if (!ifobject->shared_umem)
+			thread_common_ops(test, ifobject);
+		else
+			thread_common_ops_tx(test, ifobject);
+	}
 
 	print_verbose("Sending %d packets on interface %s\n", ifobject->pkt_stream->nb_pkts,
 		      ifobject->ifname);
@@ -1247,53 +1297,23 @@ static void *worker_testapp_validate_tx(void *arg)
 	if (err)
 		report_failure(test);
 
-	if (test->total_steps == test->current_step || err)
-		testapp_cleanup_xsk_res(ifobject);
 	pthread_exit(NULL);
 }
 
-static void xsk_populate_fill_ring(struct xsk_umem_info *umem, struct pkt_stream *pkt_stream)
-{
-	u32 idx = 0, i, buffers_to_fill;
-	int ret;
-
-	if (umem->num_frames < XSK_RING_PROD__DEFAULT_NUM_DESCS)
-		buffers_to_fill = umem->num_frames;
-	else
-		buffers_to_fill = XSK_RING_PROD__DEFAULT_NUM_DESCS;
-
-	ret = xsk_ring_prod__reserve(&umem->fq, buffers_to_fill, &idx);
-	if (ret != buffers_to_fill)
-		exit_with_error(ENOSPC);
-	for (i = 0; i < buffers_to_fill; i++) {
-		u64 addr;
-
-		if (pkt_stream->use_addr_for_fill) {
-			struct pkt *pkt = pkt_stream_get_pkt(pkt_stream, i);
-
-			if (!pkt)
-				break;
-			addr = pkt->addr;
-		} else {
-			addr = i * umem->frame_size;
-		}
-
-		*xsk_ring_prod__fill_addr(&umem->fq, idx++) = addr;
-	}
-	xsk_ring_prod__submit(&umem->fq, buffers_to_fill);
-}
-
 static void *worker_testapp_validate_rx(void *arg)
 {
 	struct test_spec *test = (struct test_spec *)arg;
 	struct ifobject *ifobject = test->ifobj_rx;
 	struct pollfd fds = { };
+	int id = 0;
 	int err;
 
-	if (test->current_step == 1)
+	if (test->current_step == 1) {
 		thread_common_ops(test, ifobject);
-
-	xsk_populate_fill_ring(ifobject->umem, ifobject->pkt_stream);
+	} else {
+		bpf_map_delete_elem(ifobject->xsk_map_fd, &id);
+		xsk_socket__update_xskmap(ifobject->xsk->xsk, ifobject->xsk_map_fd);
+	}
 
 	fds.fd = xsk_socket__fd(ifobject->xsk->xsk);
 	fds.events = POLLIN;
@@ -1311,25 +1331,38 @@ static void *worker_testapp_validate_rx(void *arg)
 		pthread_mutex_unlock(&pacing_mutex);
 	}
 
-	if (test->total_steps == test->current_step || err)
-		testapp_cleanup_xsk_res(ifobject);
 	pthread_exit(NULL);
 }
 
+static void testapp_clean_xsk_umem(struct ifobject *ifobj)
+{
+	u64 umem_sz = ifobj->umem->num_frames * ifobj->umem->frame_size;
+
+	if (ifobj->shared_umem)
+		umem_sz *= 2;
+
+	xsk_umem__delete(ifobj->umem->umem);
+	munmap(ifobj->umem->buffer, umem_sz);
+}
+
 static int testapp_validate_traffic_single_thread(struct test_spec *test, struct ifobject *ifobj,
 						  enum test_type type)
 {
+	bool old_shared_umem = ifobj->shared_umem;
 	pthread_t t0;
 
 	if (pthread_barrier_init(&barr, NULL, 2))
 		exit_with_error(errno);
 
 	test->current_step++;
-	if (type  == TEST_TYPE_POLL_RXQ_TMOUT)
+	if (type == TEST_TYPE_POLL_RXQ_TMOUT)
 		pkt_stream_reset(ifobj->pkt_stream);
 	pkts_in_flight = 0;
 
-	/*Spawn thread */
+	test->ifobj_rx->shared_umem = false;
+	test->ifobj_tx->shared_umem = false;
+
+	/* Spawn thread */
 	pthread_create(&t0, NULL, ifobj->func_ptr, test);
 
 	if (type != TEST_TYPE_POLL_TXQ_TMOUT)
@@ -1340,6 +1373,14 @@ static int testapp_validate_traffic_single_thread(struct test_spec *test, struct
 
 	pthread_join(t0, NULL);
 
+	if (test->total_steps == test->current_step || test->fail) {
+		xsk_socket__delete(ifobj->xsk->xsk);
+		testapp_clean_xsk_umem(ifobj);
+	}
+
+	test->ifobj_rx->shared_umem = old_shared_umem;
+	test->ifobj_tx->shared_umem = old_shared_umem;
+
 	return !!test->fail;
 }
 
@@ -1369,6 +1410,14 @@ static int testapp_validate_traffic(struct test_spec *test)
 	pthread_join(t1, NULL);
 	pthread_join(t0, NULL);
 
+	if (test->total_steps == test->current_step || test->fail) {
+		xsk_socket__delete(ifobj_tx->xsk->xsk);
+		xsk_socket__delete(ifobj_rx->xsk->xsk);
+		testapp_clean_xsk_umem(ifobj_rx);
+		if (!ifobj_tx->shared_umem)
+			testapp_clean_xsk_umem(ifobj_tx);
+	}
+
 	return !!test->fail;
 }
 
@@ -1448,9 +1497,9 @@ static void testapp_headroom(struct test_spec *test)
 static void testapp_stats_rx_dropped(struct test_spec *test)
 {
 	test_spec_set_name(test, "STAT_RX_DROPPED");
+	pkt_stream_replace_half(test, MIN_PKT_SIZE * 4, 0);
 	test->ifobj_rx->umem->frame_headroom = test->ifobj_rx->umem->frame_size -
 		XDP_PACKET_HEADROOM - MIN_PKT_SIZE * 3;
-	pkt_stream_replace_half(test, MIN_PKT_SIZE * 4, 0);
 	pkt_stream_receive_half(test);
 	test->ifobj_rx->validation_func = validate_rx_dropped;
 	testapp_validate_traffic(test);
@@ -1573,6 +1622,11 @@ static void testapp_invalid_desc(struct test_spec *test)
 		pkts[7].valid = false;
 	}
 
+	if (test->ifobj_tx->shared_umem) {
+		pkts[4].addr += UMEM_SIZE;
+		pkts[5].addr += UMEM_SIZE;
+	}
+
 	pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts));
 	testapp_validate_traffic(test);
 	pkt_stream_restore_default(test);
@@ -1769,6 +1823,7 @@ int main(int argc, char **argv)
 	int modes = TEST_MODE_SKB + 1;
 	u32 i, j, failed_tests = 0;
 	struct test_spec test;
+	bool shared_umem;
 
 	/* Use libbpf 1.0 API mode */
 	libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
@@ -1783,6 +1838,10 @@ int main(int argc, char **argv)
 	setlocale(LC_ALL, "");
 
 	parse_command_line(ifobj_tx, ifobj_rx, argc, argv);
+	shared_umem = !strcmp(ifobj_tx->ifname, ifobj_rx->ifname);
+
+	ifobj_tx->shared_umem = shared_umem;
+	ifobj_rx->shared_umem = shared_umem;
 
 	if (!validate_interface(ifobj_tx) || !validate_interface(ifobj_rx)) {
 		usage(basename(argv[0]));
@@ -1819,6 +1878,9 @@ int main(int argc, char **argv)
 
 	pkt_stream_delete(tx_pkt_stream_default);
 	pkt_stream_delete(rx_pkt_stream_default);
+	free(ifobj_rx->umem);
+	if (!ifobj_tx->shared_umem)
+		free(ifobj_tx->umem);
 	ifobject_delete(ifobj_tx);
 	ifobject_delete(ifobj_rx);
 
diff --git a/tools/testing/selftests/bpf/xskxceiver.h b/tools/testing/selftests/bpf/xskxceiver.h
index 04b298c72f67..11f017785986 100644
--- a/tools/testing/selftests/bpf/xskxceiver.h
+++ b/tools/testing/selftests/bpf/xskxceiver.h
@@ -153,6 +153,7 @@ struct ifobject {
 	bool busy_poll;
 	bool use_fill_ring;
 	bool release_rx;
+	bool shared_umem;
 	u8 dst_mac[ETH_ALEN];
 	u8 src_mac[ETH_ALEN];
 };
-- 
cgit v1.2.3


From c29fe883defcbc6cd16176787a2084b8e05dabf0 Mon Sep 17 00:00:00 2001
From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Date: Thu, 1 Sep 2022 13:48:12 +0200
Subject: selftests/xsk: Make sure single threaded test terminates

For single threaded poll tests call pthread_kill() from main thread so
that we are sure worker thread has finished its job and it is possible
to proceed with next test types from test suite. It was observed that on
some platforms it takes a bit longer for worker thread to exit and next
test case sees device as busy in this case.

Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Magnus Karlsson <magnus.karlsson@intel.com>
Link: https://lore.kernel.org/bpf/20220901114813.16275-6-maciej.fijalkowski@intel.com
---
 tools/testing/selftests/bpf/xskxceiver.c | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c
index b54b844cae89..74b21ddf5a98 100644
--- a/tools/testing/selftests/bpf/xskxceiver.c
+++ b/tools/testing/selftests/bpf/xskxceiver.c
@@ -1345,6 +1345,11 @@ static void testapp_clean_xsk_umem(struct ifobject *ifobj)
 	munmap(ifobj->umem->buffer, umem_sz);
 }
 
+static void handler(int signum)
+{
+	pthread_exit(NULL);
+}
+
 static int testapp_validate_traffic_single_thread(struct test_spec *test, struct ifobject *ifobj,
 						  enum test_type type)
 {
@@ -1362,6 +1367,7 @@ static int testapp_validate_traffic_single_thread(struct test_spec *test, struct
 	test->ifobj_rx->shared_umem = false;
 	test->ifobj_tx->shared_umem = false;
 
+	signal(SIGUSR1, handler);
 	/* Spawn thread */
 	pthread_create(&t0, NULL, ifobj->func_ptr, test);
 
@@ -1371,6 +1377,7 @@ static int testapp_validate_traffic_single_thread(struct test_spec *test, struct
 	if (pthread_barrier_destroy(&barr))
 		exit_with_error(errno);
 
+	pthread_kill(t0, SIGUSR1);
 	pthread_join(t0, NULL);
 
 	if (test->total_steps == test->current_step || test->fail) {
-- 
cgit v1.2.3


From fe2ad08e1e1df77b6941916b87d4871d751b88b6 Mon Sep 17 00:00:00 2001
From: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Date: Thu, 1 Sep 2022 13:48:13 +0200
Subject: selftests/xsk: Add support for zero copy testing

Introduce new mode to xdpxceiver responsible for testing AF_XDP zero
copy support of driver that serves underlying physical device. When
setting up test suite, determine whether driver has ZC support or not by
trying to bind XSK ZC socket to the interface. If it succeeded,
interpret it as ZC support being in place and do softirq and busy poll
tests for zero copy mode.

Note that Rx dropped tests are skipped since ZC path is not touching
rx_dropped stat at all.

Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Magnus Karlsson <magnus.karlsson@intel.com>
Link: https://lore.kernel.org/bpf/20220901114813.16275-7-maciej.fijalkowski@intel.com
---
 tools/testing/selftests/bpf/xskxceiver.c | 76 ++++++++++++++++++++++++++++++--
 tools/testing/selftests/bpf/xskxceiver.h |  2 +
 2 files changed, 74 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c
index 74b21ddf5a98..ef33309bbe49 100644
--- a/tools/testing/selftests/bpf/xskxceiver.c
+++ b/tools/testing/selftests/bpf/xskxceiver.c
@@ -124,9 +124,20 @@ static void __exit_with_error(int error, const char *file, const char *func, int
 }
 
 #define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, __LINE__)
-
-#define mode_string(test) (test)->ifobj_tx->xdp_flags & XDP_FLAGS_SKB_MODE ? "SKB" : "DRV"
 #define busy_poll_string(test) (test)->ifobj_tx->busy_poll ? "BUSY-POLL " : ""
+static char *mode_string(struct test_spec *test)
+{
+	switch (test->mode) {
+	case TEST_MODE_SKB:
+		return "SKB";
+	case TEST_MODE_DRV:
+		return "DRV";
+	case TEST_MODE_ZC:
+		return "ZC";
+	default:
+		return "BOGUS";
+	}
+}
 
 static void report_failure(struct test_spec *test)
 {
@@ -322,6 +333,51 @@ static int __xsk_configure_socket(struct xsk_socket_info *xsk, struct xsk_umem_i
 	return xsk_socket__create(&xsk->xsk, ifobject->ifname, 0, umem->umem, rxr, txr, &cfg);
 }
 
+static bool ifobj_zc_avail(struct ifobject *ifobject)
+{
+	size_t umem_sz = DEFAULT_UMEM_BUFFERS * XSK_UMEM__DEFAULT_FRAME_SIZE;
+	int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
+	struct xsk_socket_info *xsk;
+	struct xsk_umem_info *umem;
+	bool zc_avail = false;
+	void *bufs;
+	int ret;
+
+	bufs = mmap(NULL, umem_sz, PROT_READ | PROT_WRITE, mmap_flags, -1, 0);
+	if (bufs == MAP_FAILED)
+		exit_with_error(errno);
+
+	umem = calloc(1, sizeof(struct xsk_umem_info));
+	if (!umem) {
+		munmap(bufs, umem_sz);
+		exit_with_error(-ENOMEM);
+	}
+	umem->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
+	ret = xsk_configure_umem(umem, bufs, umem_sz);
+	if (ret)
+		exit_with_error(-ret);
+
+	xsk = calloc(1, sizeof(struct xsk_socket_info));
+	if (!xsk)
+		goto out;
+	ifobject->xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
+	ifobject->xdp_flags |= XDP_FLAGS_DRV_MODE;
+	ifobject->bind_flags = XDP_USE_NEED_WAKEUP | XDP_ZEROCOPY;
+	ifobject->rx_on = true;
+	xsk->rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS;
+	ret = __xsk_configure_socket(xsk, umem, ifobject, false);
+	if (!ret)
+		zc_avail = true;
+
+	xsk_socket__delete(xsk->xsk);
+	free(xsk);
+out:
+	munmap(umem->buffer, umem_sz);
+	xsk_umem__delete(umem->umem);
+	free(umem);
+	return zc_avail;
+}
+
 static struct option long_options[] = {
 	{"interface", required_argument, 0, 'i'},
 	{"busy-poll", no_argument, 0, 'b'},
@@ -488,9 +544,14 @@ static void test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
 		else
 			ifobj->xdp_flags |= XDP_FLAGS_DRV_MODE;
 
-		ifobj->bind_flags = XDP_USE_NEED_WAKEUP | XDP_COPY;
+		ifobj->bind_flags = XDP_USE_NEED_WAKEUP;
+		if (mode == TEST_MODE_ZC)
+			ifobj->bind_flags |= XDP_ZEROCOPY;
+		else
+			ifobj->bind_flags |= XDP_COPY;
 	}
 
+	test->mode = mode;
 	__test_spec_init(test, ifobj_tx, ifobj_rx);
 }
 
@@ -1664,6 +1725,10 @@ static void run_pkt_test(struct test_spec *test, enum test_mode mode, enum test_
 {
 	switch (type) {
 	case TEST_TYPE_STATS_RX_DROPPED:
+		if (mode == TEST_MODE_ZC) {
+			ksft_test_result_skip("Can not run RX_DROPPED test for ZC mode\n");
+			return;
+		}
 		testapp_stats_rx_dropped(test);
 		break;
 	case TEST_TYPE_STATS_TX_INVALID_DESCS:
@@ -1860,8 +1925,11 @@ int main(int argc, char **argv)
 	init_iface(ifobj_rx, MAC2, MAC1, IP2, IP1, UDP_PORT2, UDP_PORT1,
 		   worker_testapp_validate_rx);
 
-	if (is_xdp_supported(ifobj_tx))
+	if (is_xdp_supported(ifobj_tx)) {
 		modes++;
+		if (ifobj_zc_avail(ifobj_tx))
+			modes++;
+	}
 
 	test_spec_init(&test, ifobj_tx, ifobj_rx, 0);
 	tx_pkt_stream_default = pkt_stream_generate(ifobj_tx->umem, DEFAULT_PKT_CNT, PKT_SIZE);
diff --git a/tools/testing/selftests/bpf/xskxceiver.h b/tools/testing/selftests/bpf/xskxceiver.h
index 11f017785986..edb76d2def9f 100644
--- a/tools/testing/selftests/bpf/xskxceiver.h
+++ b/tools/testing/selftests/bpf/xskxceiver.h
@@ -62,6 +62,7 @@
 enum test_mode {
 	TEST_MODE_SKB,
 	TEST_MODE_DRV,
+	TEST_MODE_ZC,
 	TEST_MODE_MAX
 };
 
@@ -167,6 +168,7 @@ struct test_spec {
 	u16 current_step;
 	u16 nb_sockets;
 	bool fail;
+	enum test_mode mode;
 	char name[MAX_TEST_NAME_SIZE];
 };
 
-- 
cgit v1.2.3


From afef88e65554c3e8691513b8350d6445e292560e Mon Sep 17 00:00:00 2001
From: Daniel Müller <deso@posteo.net>
Date: Thu, 1 Sep 2022 22:22:53 +0000
Subject: selftests/bpf: Store BPF object files with .bpf.o extension
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

BPF object files are, in a way, the final artifact produced as part of
the ahead-of-time compilation process. That makes them somewhat special
compared to "regular" object files, which are a intermediate build
artifacts that can typically be removed safely. As such, it can make
sense to name them differently to make it easier to spot this difference
at a glance.

Among others, libbpf-bootstrap [0] has established the extension .bpf.o
for BPF object files. It seems reasonable to follow this example and
establish the same denomination for selftest build artifacts. To that
end, this change adjusts the corresponding part of the build system and
the test programs loading BPF object files to work with .bpf.o files.

  [0] https://github.com/libbpf/libbpf-bootstrap

Suggested-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Müller <deso@posteo.net>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20220901222253.1199242-1-deso@posteo.net
---
 tools/testing/selftests/bpf/Makefile               | 36 +++++------
 tools/testing/selftests/bpf/README.rst             |  8 +--
 tools/testing/selftests/bpf/get_cgroup_id_user.c   |  2 +-
 .../testing/selftests/bpf/prog_tests/bpf_obj_id.c  |  2 +-
 .../selftests/bpf/prog_tests/bpf_verif_scale.c     | 54 ++++++++--------
 tools/testing/selftests/bpf/prog_tests/btf.c       |  4 +-
 tools/testing/selftests/bpf/prog_tests/btf_dump.c  |  6 +-
 .../testing/selftests/bpf/prog_tests/btf_endian.c  |  2 +-
 .../selftests/bpf/prog_tests/connect_force_port.c  |  2 +-
 .../testing/selftests/bpf/prog_tests/core_reloc.c  | 74 +++++++++++-----------
 .../selftests/bpf/prog_tests/fexit_bpf2bpf.c       | 44 ++++++-------
 .../selftests/bpf/prog_tests/get_stack_raw_tp.c    |  4 +-
 .../testing/selftests/bpf/prog_tests/global_data.c |  2 +-
 .../selftests/bpf/prog_tests/global_data_init.c    |  2 +-
 .../selftests/bpf/prog_tests/global_func_args.c    |  2 +-
 tools/testing/selftests/bpf/prog_tests/kfree_skb.c |  2 +-
 tools/testing/selftests/bpf/prog_tests/l4lb_all.c  |  4 +-
 .../selftests/bpf/prog_tests/load_bytes_relative.c |  4 +-
 tools/testing/selftests/bpf/prog_tests/map_lock.c  |  2 +-
 tools/testing/selftests/bpf/prog_tests/pinning.c   |  4 +-
 .../testing/selftests/bpf/prog_tests/pkt_access.c  |  2 +-
 .../selftests/bpf/prog_tests/pkt_md_access.c       |  2 +-
 .../testing/selftests/bpf/prog_tests/probe_user.c  |  2 +-
 .../selftests/bpf/prog_tests/queue_stack_map.c     |  4 +-
 .../testing/selftests/bpf/prog_tests/rdonly_maps.c |  2 +-
 .../selftests/bpf/prog_tests/reference_tracking.c  |  2 +-
 .../selftests/bpf/prog_tests/resolve_btfids.c      |  2 +-
 .../selftests/bpf/prog_tests/select_reuseport.c    |  4 +-
 tools/testing/selftests/bpf/prog_tests/sk_assign.c |  2 +-
 tools/testing/selftests/bpf/prog_tests/skb_ctx.c   |  2 +-
 .../testing/selftests/bpf/prog_tests/skb_helpers.c |  2 +-
 .../selftests/bpf/prog_tests/sockopt_inherit.c     |  2 +-
 .../selftests/bpf/prog_tests/sockopt_multi.c       |  2 +-
 tools/testing/selftests/bpf/prog_tests/spinlock.c  |  2 +-
 .../selftests/bpf/prog_tests/stacktrace_map.c      |  2 +-
 .../bpf/prog_tests/stacktrace_map_raw_tp.c         |  2 +-
 tools/testing/selftests/bpf/prog_tests/tailcalls.c | 36 +++++------
 .../selftests/bpf/prog_tests/task_fd_query_rawtp.c |  2 +-
 .../selftests/bpf/prog_tests/task_fd_query_tp.c    |  2 +-
 .../testing/selftests/bpf/prog_tests/tcp_estats.c  |  2 +-
 .../selftests/bpf/prog_tests/test_global_funcs.c   | 34 +++++-----
 .../selftests/bpf/prog_tests/test_overhead.c       |  2 +-
 .../selftests/bpf/prog_tests/tp_attach_query.c     |  2 +-
 .../selftests/bpf/prog_tests/trampoline_count.c    |  2 +-
 tools/testing/selftests/bpf/prog_tests/xdp.c       |  2 +-
 .../selftests/bpf/prog_tests/xdp_adjust_frags.c    |  2 +-
 .../selftests/bpf/prog_tests/xdp_adjust_tail.c     | 10 +--
 .../testing/selftests/bpf/prog_tests/xdp_attach.c  |  2 +-
 tools/testing/selftests/bpf/prog_tests/xdp_info.c  |  2 +-
 tools/testing/selftests/bpf/prog_tests/xdp_perf.c  |  2 +-
 .../selftests/bpf/prog_tests/xdp_synproxy.c        |  2 +-
 tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c  |  8 +--
 tools/testing/selftests/bpf/test_dev_cgroup.c      |  2 +-
 tools/testing/selftests/bpf/test_lirc_mode2_user.c |  2 +-
 tools/testing/selftests/bpf/test_maps.c            | 10 +--
 tools/testing/selftests/bpf/test_offload.py        | 22 +++----
 tools/testing/selftests/bpf/test_skb_cgroup_id.sh  |  2 +-
 tools/testing/selftests/bpf/test_sock_addr.c       | 16 ++---
 tools/testing/selftests/bpf/test_sockmap.c         |  4 +-
 tools/testing/selftests/bpf/test_sysctl.c          |  6 +-
 .../selftests/bpf/test_tcp_check_syncookie.sh      |  2 +-
 tools/testing/selftests/bpf/test_tcpnotify_user.c  |  2 +-
 tools/testing/selftests/bpf/test_xdp_redirect.sh   |  8 +--
 .../selftests/bpf/test_xdp_redirect_multi.sh       |  2 +-
 tools/testing/selftests/bpf/test_xdp_veth.sh       |  8 +--
 tools/testing/selftests/bpf/xdp_redirect_multi.c   |  2 +-
 tools/testing/selftests/bpf/xdp_synproxy.c         |  2 +-
 tools/testing/selftests/bpf/xdping.c               |  2 +-
 68 files changed, 250 insertions(+), 250 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index eecad99f1735..c10adecb5a73 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -45,7 +45,7 @@ ifneq ($(BPF_GCC),)
 TEST_GEN_PROGS += test_progs-bpf_gcc
 endif
 
-TEST_GEN_FILES = test_lwt_ip_encap.o test_tc_edt.o
+TEST_GEN_FILES = test_lwt_ip_encap.bpf.o test_tc_edt.bpf.o
 TEST_FILES = xsk_prereqs.sh $(wildcard progs/btf_dump_test_case_*.c)
 
 # Order correspond to 'make run_tests' order
@@ -358,17 +358,17 @@ LSKELS := kfunc_call_test.c fentry_test.c fexit_test.c fexit_sleep.c \
 LSKELS_EXTRA := test_ksyms_module.c test_ksyms_weak.c kfunc_call_test_subprog.c
 SKEL_BLACKLIST += $$(LSKELS)
 
-test_static_linked.skel.h-deps := test_static_linked1.o test_static_linked2.o
-linked_funcs.skel.h-deps := linked_funcs1.o linked_funcs2.o
-linked_vars.skel.h-deps := linked_vars1.o linked_vars2.o
-linked_maps.skel.h-deps := linked_maps1.o linked_maps2.o
+test_static_linked.skel.h-deps := test_static_linked1.bpf.o test_static_linked2.bpf.o
+linked_funcs.skel.h-deps := linked_funcs1.bpf.o linked_funcs2.bpf.o
+linked_vars.skel.h-deps := linked_vars1.bpf.o linked_vars2.bpf.o
+linked_maps.skel.h-deps := linked_maps1.bpf.o linked_maps2.bpf.o
 # In the subskeleton case, we want the test_subskeleton_lib.subskel.h file
 # but that's created as a side-effect of the skel.h generation.
-test_subskeleton.skel.h-deps := test_subskeleton_lib2.o test_subskeleton_lib.o test_subskeleton.o
-test_subskeleton_lib.skel.h-deps := test_subskeleton_lib2.o test_subskeleton_lib.o
-test_usdt.skel.h-deps := test_usdt.o test_usdt_multispec.o
+test_subskeleton.skel.h-deps := test_subskeleton_lib2.bpf.o test_subskeleton_lib.bpf.o test_subskeleton.bpf.o
+test_subskeleton_lib.skel.h-deps := test_subskeleton_lib2.bpf.o test_subskeleton_lib.bpf.o
+test_usdt.skel.h-deps := test_usdt.bpf.o test_usdt_multispec.bpf.o
 
-LINKED_BPF_SRCS := $(patsubst %.o,%.c,$(foreach skel,$(LINKED_SKELS),$($(skel)-deps)))
+LINKED_BPF_SRCS := $(patsubst %.bpf.o,%.c,$(foreach skel,$(LINKED_SKELS),$($(skel)-deps)))
 
 # Set up extra TRUNNER_XXX "temporary" variables in the environment (relies on
 # $eval()) and pass control to DEFINE_TEST_RUNNER_RULES.
@@ -386,7 +386,7 @@ TRUNNER_EXTRA_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.o,		\
 TRUNNER_EXTRA_HDRS := $$(filter %.h,$(TRUNNER_EXTRA_SOURCES))
 TRUNNER_TESTS_HDR := $(TRUNNER_TESTS_DIR)/tests.h
 TRUNNER_BPF_SRCS := $$(notdir $$(wildcard $(TRUNNER_BPF_PROGS_DIR)/*.c))
-TRUNNER_BPF_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.o, $$(TRUNNER_BPF_SRCS))
+TRUNNER_BPF_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.bpf.o, $$(TRUNNER_BPF_SRCS))
 TRUNNER_BPF_SKELS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.skel.h,	\
 				 $$(filter-out $(SKEL_BLACKLIST) $(LINKED_BPF_SRCS),\
 					       $$(TRUNNER_BPF_SRCS)))
@@ -416,7 +416,7 @@ endif
 # input/output directory combination
 ifeq ($($(TRUNNER_BPF_PROGS_DIR)$(if $2,-)$2-bpfobjs),)
 $(TRUNNER_BPF_PROGS_DIR)$(if $2,-)$2-bpfobjs := y
-$(TRUNNER_BPF_OBJS): $(TRUNNER_OUTPUT)/%.o:				\
+$(TRUNNER_BPF_OBJS): $(TRUNNER_OUTPUT)/%.bpf.o:				\
 		     $(TRUNNER_BPF_PROGS_DIR)/%.c			\
 		     $(TRUNNER_BPF_PROGS_DIR)/*.h			\
 		     $$(INCLUDE_DIR)/vmlinux.h				\
@@ -426,25 +426,25 @@ $(TRUNNER_BPF_OBJS): $(TRUNNER_OUTPUT)/%.o:				\
 	$$(call $(TRUNNER_BPF_BUILD_RULE),$$<,$$@,			\
 					  $(TRUNNER_BPF_CFLAGS))
 
-$(TRUNNER_BPF_SKELS): %.skel.h: %.o $(BPFTOOL) | $(TRUNNER_OUTPUT)
+$(TRUNNER_BPF_SKELS): %.skel.h: %.bpf.o $(BPFTOOL) | $(TRUNNER_OUTPUT)
 	$$(call msg,GEN-SKEL,$(TRUNNER_BINARY),$$@)
 	$(Q)$$(BPFTOOL) gen object $$(<:.o=.linked1.o) $$<
 	$(Q)$$(BPFTOOL) gen object $$(<:.o=.linked2.o) $$(<:.o=.linked1.o)
 	$(Q)$$(BPFTOOL) gen object $$(<:.o=.linked3.o) $$(<:.o=.linked2.o)
 	$(Q)diff $$(<:.o=.linked2.o) $$(<:.o=.linked3.o)
-	$(Q)$$(BPFTOOL) gen skeleton $$(<:.o=.linked3.o) name $$(notdir $$(<:.o=)) > $$@
-	$(Q)$$(BPFTOOL) gen subskeleton $$(<:.o=.linked3.o) name $$(notdir $$(<:.o=)) > $$(@:.skel.h=.subskel.h)
+	$(Q)$$(BPFTOOL) gen skeleton $$(<:.o=.linked3.o) name $$(notdir $$(<:.bpf.o=)) > $$@
+	$(Q)$$(BPFTOOL) gen subskeleton $$(<:.o=.linked3.o) name $$(notdir $$(<:.bpf.o=)) > $$(@:.skel.h=.subskel.h)
 
-$(TRUNNER_BPF_LSKELS): %.lskel.h: %.o $(BPFTOOL) | $(TRUNNER_OUTPUT)
+$(TRUNNER_BPF_LSKELS): %.lskel.h: %.bpf.o $(BPFTOOL) | $(TRUNNER_OUTPUT)
 	$$(call msg,GEN-SKEL,$(TRUNNER_BINARY),$$@)
 	$(Q)$$(BPFTOOL) gen object $$(<:.o=.llinked1.o) $$<
 	$(Q)$$(BPFTOOL) gen object $$(<:.o=.llinked2.o) $$(<:.o=.llinked1.o)
 	$(Q)$$(BPFTOOL) gen object $$(<:.o=.llinked3.o) $$(<:.o=.llinked2.o)
 	$(Q)diff $$(<:.o=.llinked2.o) $$(<:.o=.llinked3.o)
-	$(Q)$$(BPFTOOL) gen skeleton -L $$(<:.o=.llinked3.o) name $$(notdir $$(<:.o=_lskel)) > $$@
+	$(Q)$$(BPFTOOL) gen skeleton -L $$(<:.o=.llinked3.o) name $$(notdir $$(<:.bpf.o=_lskel)) > $$@
 
 $(TRUNNER_BPF_SKELS_LINKED): $(TRUNNER_BPF_OBJS) $(BPFTOOL) | $(TRUNNER_OUTPUT)
-	$$(call msg,LINK-BPF,$(TRUNNER_BINARY),$$(@:.skel.h=.o))
+	$$(call msg,LINK-BPF,$(TRUNNER_BINARY),$$(@:.skel.h=.bpf.o))
 	$(Q)$$(BPFTOOL) gen object $$(@:.skel.h=.linked1.o) $$(addprefix $(TRUNNER_OUTPUT)/,$$($$(@F)-deps))
 	$(Q)$$(BPFTOOL) gen object $$(@:.skel.h=.linked2.o) $$(@:.skel.h=.linked1.o)
 	$(Q)$$(BPFTOOL) gen object $$(@:.skel.h=.linked3.o) $$(@:.skel.h=.linked2.o)
@@ -500,7 +500,7 @@ $(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS)			\
 			     | $(TRUNNER_BINARY)-extras
 	$$(call msg,BINARY,,$$@)
 	$(Q)$$(CC) $$(CFLAGS) $$(filter %.a %.o,$$^) $$(LDLIBS) -o $$@
-	$(Q)$(RESOLVE_BTFIDS) --btf $(TRUNNER_OUTPUT)/btf_data.o $$@
+	$(Q)$(RESOLVE_BTFIDS) --btf $(TRUNNER_OUTPUT)/btf_data.bpf.o $$@
 	$(Q)ln -sf $(if $2,..,.)/tools/build/bpftool/bootstrap/bpftool $(if $2,$2/)bpftool
 
 endef
diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst
index eb1b7541f39d..d3c6b3da0bb1 100644
--- a/tools/testing/selftests/bpf/README.rst
+++ b/tools/testing/selftests/bpf/README.rst
@@ -126,11 +126,11 @@ available in 10.0.1. The patch is available in llvm 11.0.0 trunk.
 
 __  https://reviews.llvm.org/D78466
 
-bpf_verif_scale/loop6.o test failure with Clang 12
-==================================================
+bpf_verif_scale/loop6.bpf.o test failure with Clang 12
+======================================================
 
 With Clang 12, the following bpf_verif_scale test failed:
-  * ``bpf_verif_scale/loop6.o``
+  * ``bpf_verif_scale/loop6.bpf.o``
 
 The verifier output looks like
 
@@ -245,7 +245,7 @@ See `kernel llvm reloc`_ for more explanation and some examples.
 Using clang 13 to compile old libbpf which has static linker support,
 there will be a compilation failure::
 
-  libbpf: ELF relo #0 in section #6 has unexpected type 2 in .../bpf_tcp_nogpl.o
+  libbpf: ELF relo #0 in section #6 has unexpected type 2 in .../bpf_tcp_nogpl.bpf.o
 
 Here, ``type 2`` refers to new relocation type ``R_BPF_64_ABS64``.
 To fix this issue, user newer libbpf.
diff --git a/tools/testing/selftests/bpf/get_cgroup_id_user.c b/tools/testing/selftests/bpf/get_cgroup_id_user.c
index e021cc67dc02..156743cf5870 100644
--- a/tools/testing/selftests/bpf/get_cgroup_id_user.c
+++ b/tools/testing/selftests/bpf/get_cgroup_id_user.c
@@ -48,7 +48,7 @@ static int bpf_find_map(const char *test, struct bpf_object *obj,
 int main(int argc, char **argv)
 {
 	const char *probe_name = "syscalls/sys_enter_nanosleep";
-	const char *file = "get_cgroup_id_kern.o";
+	const char *file = "get_cgroup_id_kern.bpf.o";
 	int err, bytes, efd, prog_fd, pmu_fd;
 	int cgroup_fd, cgidmap_fd, pidmap_fd;
 	struct perf_event_attr attr = {};
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c
index dbe56fa8582d..e1c1e521cca2 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c
@@ -7,7 +7,7 @@ void serial_test_bpf_obj_id(void)
 {
 	const __u64 array_magic_value = 0xfaceb00c;
 	const __u32 array_key = 0;
-	const char *file = "./test_obj_id.o";
+	const char *file = "./test_obj_id.bpf.o";
 	const char *expected_prog_name = "test_obj_id";
 	const char *expected_map_name = "test_map_id";
 	const __u64 nsec_per_sec = 1000000000;
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
index ff6cce9fef06..5ca252823294 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
@@ -75,45 +75,45 @@ static void scale_test(const char *file,
 
 void test_verif_scale1()
 {
-	scale_test("test_verif_scale1.o", BPF_PROG_TYPE_SCHED_CLS, false);
+	scale_test("test_verif_scale1.bpf.o", BPF_PROG_TYPE_SCHED_CLS, false);
 }
 
 void test_verif_scale2()
 {
-	scale_test("test_verif_scale2.o", BPF_PROG_TYPE_SCHED_CLS, false);
+	scale_test("test_verif_scale2.bpf.o", BPF_PROG_TYPE_SCHED_CLS, false);
 }
 
 void test_verif_scale3()
 {
-	scale_test("test_verif_scale3.o", BPF_PROG_TYPE_SCHED_CLS, false);
+	scale_test("test_verif_scale3.bpf.o", BPF_PROG_TYPE_SCHED_CLS, false);
 }
 
 void test_verif_scale_pyperf_global()
 {
-	scale_test("pyperf_global.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
+	scale_test("pyperf_global.bpf.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
 }
 
 void test_verif_scale_pyperf_subprogs()
 {
-	scale_test("pyperf_subprogs.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
+	scale_test("pyperf_subprogs.bpf.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
 }
 
 void test_verif_scale_pyperf50()
 {
 	/* full unroll by llvm */
-	scale_test("pyperf50.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
+	scale_test("pyperf50.bpf.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
 }
 
 void test_verif_scale_pyperf100()
 {
 	/* full unroll by llvm */
-	scale_test("pyperf100.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
+	scale_test("pyperf100.bpf.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
 }
 
 void test_verif_scale_pyperf180()
 {
 	/* full unroll by llvm */
-	scale_test("pyperf180.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
+	scale_test("pyperf180.bpf.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
 }
 
 void test_verif_scale_pyperf600()
@@ -124,13 +124,13 @@ void test_verif_scale_pyperf600()
 	 * 16k insns in loop body.
 	 * Total of 5 such loops. Total program size ~82k insns.
 	 */
-	scale_test("pyperf600.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
+	scale_test("pyperf600.bpf.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
 }
 
 void test_verif_scale_pyperf600_bpf_loop(void)
 {
 	/* use the bpf_loop helper*/
-	scale_test("pyperf600_bpf_loop.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
+	scale_test("pyperf600_bpf_loop.bpf.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
 }
 
 void test_verif_scale_pyperf600_nounroll()
@@ -141,37 +141,37 @@ void test_verif_scale_pyperf600_nounroll()
 	 * ~110 insns in loop body.
 	 * Total of 5 such loops. Total program size ~1500 insns.
 	 */
-	scale_test("pyperf600_nounroll.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
+	scale_test("pyperf600_nounroll.bpf.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
 }
 
 void test_verif_scale_loop1()
 {
-	scale_test("loop1.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
+	scale_test("loop1.bpf.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
 }
 
 void test_verif_scale_loop2()
 {
-	scale_test("loop2.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
+	scale_test("loop2.bpf.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
 }
 
 void test_verif_scale_loop3_fail()
 {
-	scale_test("loop3.o", BPF_PROG_TYPE_RAW_TRACEPOINT, true /* fails */);
+	scale_test("loop3.bpf.o", BPF_PROG_TYPE_RAW_TRACEPOINT, true /* fails */);
 }
 
 void test_verif_scale_loop4()
 {
-	scale_test("loop4.o", BPF_PROG_TYPE_SCHED_CLS, false);
+	scale_test("loop4.bpf.o", BPF_PROG_TYPE_SCHED_CLS, false);
 }
 
 void test_verif_scale_loop5()
 {
-	scale_test("loop5.o", BPF_PROG_TYPE_SCHED_CLS, false);
+	scale_test("loop5.bpf.o", BPF_PROG_TYPE_SCHED_CLS, false);
 }
 
 void test_verif_scale_loop6()
 {
-	scale_test("loop6.o", BPF_PROG_TYPE_KPROBE, false);
+	scale_test("loop6.bpf.o", BPF_PROG_TYPE_KPROBE, false);
 }
 
 void test_verif_scale_strobemeta()
@@ -180,54 +180,54 @@ void test_verif_scale_strobemeta()
 	 * Total program size 20.8k insn.
 	 * ~350k processed_insns
 	 */
-	scale_test("strobemeta.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
+	scale_test("strobemeta.bpf.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
 }
 
 void test_verif_scale_strobemeta_bpf_loop(void)
 {
 	/* use the bpf_loop helper*/
-	scale_test("strobemeta_bpf_loop.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
+	scale_test("strobemeta_bpf_loop.bpf.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
 }
 
 void test_verif_scale_strobemeta_nounroll1()
 {
 	/* no unroll, tiny loops */
-	scale_test("strobemeta_nounroll1.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
+	scale_test("strobemeta_nounroll1.bpf.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
 }
 
 void test_verif_scale_strobemeta_nounroll2()
 {
 	/* no unroll, tiny loops */
-	scale_test("strobemeta_nounroll2.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
+	scale_test("strobemeta_nounroll2.bpf.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
 }
 
 void test_verif_scale_strobemeta_subprogs()
 {
 	/* non-inlined subprogs */
-	scale_test("strobemeta_subprogs.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
+	scale_test("strobemeta_subprogs.bpf.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
 }
 
 void test_verif_scale_sysctl_loop1()
 {
-	scale_test("test_sysctl_loop1.o", BPF_PROG_TYPE_CGROUP_SYSCTL, false);
+	scale_test("test_sysctl_loop1.bpf.o", BPF_PROG_TYPE_CGROUP_SYSCTL, false);
 }
 
 void test_verif_scale_sysctl_loop2()
 {
-	scale_test("test_sysctl_loop2.o", BPF_PROG_TYPE_CGROUP_SYSCTL, false);
+	scale_test("test_sysctl_loop2.bpf.o", BPF_PROG_TYPE_CGROUP_SYSCTL, false);
 }
 
 void test_verif_scale_xdp_loop()
 {
-	scale_test("test_xdp_loop.o", BPF_PROG_TYPE_XDP, false);
+	scale_test("test_xdp_loop.bpf.o", BPF_PROG_TYPE_XDP, false);
 }
 
 void test_verif_scale_seg6_loop()
 {
-	scale_test("test_seg6_loop.o", BPF_PROG_TYPE_LWT_SEG6LOCAL, false);
+	scale_test("test_seg6_loop.bpf.o", BPF_PROG_TYPE_LWT_SEG6LOCAL, false);
 }
 
 void test_verif_twfw()
 {
-	scale_test("twfw.o", BPF_PROG_TYPE_CGROUP_SKB, false);
+	scale_test("twfw.bpf.o", BPF_PROG_TYPE_CGROUP_SKB, false);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c
index ef6528b8084c..127b8caa3dc1 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf.c
@@ -4651,8 +4651,8 @@ struct btf_file_test {
 };
 
 static struct btf_file_test file_tests[] = {
-	{ .file = "test_btf_newkv.o", },
-	{ .file = "test_btf_nokv.o", .btf_kv_notfound = true, },
+	{ .file = "test_btf_newkv.bpf.o", },
+	{ .file = "test_btf_nokv.bpf.o", .btf_kv_notfound = true, },
 };
 
 static void do_test_file(unsigned int test_num)
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
index 7b5bbe21b549..b1ca954ed1e5 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
@@ -52,7 +52,7 @@ static int test_btf_dump_case(int n, struct btf_dump_test_case *t)
 	int err = 0, fd = -1;
 	FILE *f = NULL;
 
-	snprintf(test_file, sizeof(test_file), "%s.o", t->file);
+	snprintf(test_file, sizeof(test_file), "%s.bpf.o", t->file);
 
 	btf = btf__parse_elf(test_file, NULL);
 	if (!ASSERT_OK_PTR(btf, "btf_parse_elf")) {
@@ -841,8 +841,8 @@ static void test_btf_dump_datasec_data(char *str)
 	char license[4] = "GPL";
 	struct btf_dump *d;
 
-	btf = btf__parse("xdping_kern.o", NULL);
-	if (!ASSERT_OK_PTR(btf, "xdping_kern.o BTF not found"))
+	btf = btf__parse("xdping_kern.bpf.o", NULL);
+	if (!ASSERT_OK_PTR(btf, "xdping_kern.bpf.o BTF not found"))
 		return;
 
 	d = btf_dump__new(btf, btf_dump_snprintf, str, NULL);
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_endian.c b/tools/testing/selftests/bpf/prog_tests/btf_endian.c
index 8afbf3d0b89a..5b9f84dbeb43 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_endian.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_endian.c
@@ -23,7 +23,7 @@ void test_btf_endian() {
 	int var_id;
 
 	/* Load BTF in native endianness */
-	btf = btf__parse_elf("btf_dump_test_case_syntax.o", NULL);
+	btf = btf__parse_elf("btf_dump_test_case_syntax.bpf.o", NULL);
 	if (!ASSERT_OK_PTR(btf, "parse_native_btf"))
 		goto err_out;
 
diff --git a/tools/testing/selftests/bpf/prog_tests/connect_force_port.c b/tools/testing/selftests/bpf/prog_tests/connect_force_port.c
index 9c4325f4aef2..24d553109f8d 100644
--- a/tools/testing/selftests/bpf/prog_tests/connect_force_port.c
+++ b/tools/testing/selftests/bpf/prog_tests/connect_force_port.c
@@ -53,7 +53,7 @@ static int run_test(int cgroup_fd, int server_fd, int family, int type)
 	__u16 expected_peer_port = 60000;
 	struct bpf_program *prog;
 	struct bpf_object *obj;
-	const char *obj_file = v4 ? "connect_force_port4.o" : "connect_force_port6.o";
+	const char *obj_file = v4 ? "connect_force_port4.bpf.o" : "connect_force_port6.bpf.o";
 	int fd, err;
 	__u32 duration = 0;
 
diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
index c8655ba9a88f..47f42e680105 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
@@ -13,7 +13,7 @@ static int duration = 0;
 
 #define MODULES_CASE(name, pg_name, tp_name) {				\
 	.case_name = name,						\
-	.bpf_obj_file = "test_core_reloc_module.o",			\
+	.bpf_obj_file = "test_core_reloc_module.bpf.o",			\
 	.btf_src_file = NULL, /* find in kernel module BTFs */		\
 	.input = "",							\
 	.input_len = 0,							\
@@ -43,8 +43,8 @@ static int duration = 0;
 
 #define FLAVORS_CASE_COMMON(name)					\
 	.case_name = #name,						\
-	.bpf_obj_file = "test_core_reloc_flavors.o",			\
-	.btf_src_file = "btf__core_reloc_" #name ".o",			\
+	.bpf_obj_file = "test_core_reloc_flavors.bpf.o",		\
+	.btf_src_file = "btf__core_reloc_" #name ".bpf.o",		\
 	.raw_tp_name = "sys_enter",					\
 	.prog_name = "test_core_flavors"				\
 
@@ -68,8 +68,8 @@ static int duration = 0;
 
 #define NESTING_CASE_COMMON(name)					\
 	.case_name = #name,						\
-	.bpf_obj_file = "test_core_reloc_nesting.o",			\
-	.btf_src_file = "btf__core_reloc_" #name ".o",			\
+	.bpf_obj_file = "test_core_reloc_nesting.bpf.o",		\
+	.btf_src_file = "btf__core_reloc_" #name ".bpf.o",		\
 	.raw_tp_name = "sys_enter",					\
 	.prog_name = "test_core_nesting"				\
 
@@ -96,8 +96,8 @@ static int duration = 0;
 
 #define ARRAYS_CASE_COMMON(name)					\
 	.case_name = #name,						\
-	.bpf_obj_file = "test_core_reloc_arrays.o",			\
-	.btf_src_file = "btf__core_reloc_" #name ".o",			\
+	.bpf_obj_file = "test_core_reloc_arrays.bpf.o",			\
+	.btf_src_file = "btf__core_reloc_" #name ".bpf.o",		\
 	.raw_tp_name = "sys_enter",					\
 	.prog_name = "test_core_arrays"					\
 
@@ -130,8 +130,8 @@ static int duration = 0;
 
 #define PRIMITIVES_CASE_COMMON(name)					\
 	.case_name = #name,						\
-	.bpf_obj_file = "test_core_reloc_primitives.o",			\
-	.btf_src_file = "btf__core_reloc_" #name ".o",			\
+	.bpf_obj_file = "test_core_reloc_primitives.bpf.o",		\
+	.btf_src_file = "btf__core_reloc_" #name ".bpf.o",		\
 	.raw_tp_name = "sys_enter",					\
 	.prog_name = "test_core_primitives"				\
 
@@ -150,8 +150,8 @@ static int duration = 0;
 
 #define MODS_CASE(name) {						\
 	.case_name = #name,						\
-	.bpf_obj_file = "test_core_reloc_mods.o",			\
-	.btf_src_file = "btf__core_reloc_" #name ".o",			\
+	.bpf_obj_file = "test_core_reloc_mods.bpf.o",			\
+	.btf_src_file = "btf__core_reloc_" #name ".bpf.o",		\
 	.input = STRUCT_TO_CHAR_PTR(core_reloc_##name) {		\
 		.a = 1,							\
 		.b = 2,							\
@@ -174,8 +174,8 @@ static int duration = 0;
 
 #define PTR_AS_ARR_CASE(name) {						\
 	.case_name = #name,						\
-	.bpf_obj_file = "test_core_reloc_ptr_as_arr.o",			\
-	.btf_src_file = "btf__core_reloc_" #name ".o",			\
+	.bpf_obj_file = "test_core_reloc_ptr_as_arr.bpf.o",		\
+	.btf_src_file = "btf__core_reloc_" #name ".bpf.o",		\
 	.input = (const char *)&(struct core_reloc_##name []){		\
 		{ .a = 1 },						\
 		{ .a = 2 },						\
@@ -203,8 +203,8 @@ static int duration = 0;
 
 #define INTS_CASE_COMMON(name)						\
 	.case_name = #name,						\
-	.bpf_obj_file = "test_core_reloc_ints.o",			\
-	.btf_src_file = "btf__core_reloc_" #name ".o",			\
+	.bpf_obj_file = "test_core_reloc_ints.bpf.o",			\
+	.btf_src_file = "btf__core_reloc_" #name ".bpf.o",		\
 	.raw_tp_name = "sys_enter",					\
 	.prog_name = "test_core_ints"
 
@@ -223,18 +223,18 @@ static int duration = 0;
 
 #define FIELD_EXISTS_CASE_COMMON(name)					\
 	.case_name = #name,						\
-	.bpf_obj_file = "test_core_reloc_existence.o",			\
-	.btf_src_file = "btf__core_reloc_" #name ".o",			\
+	.bpf_obj_file = "test_core_reloc_existence.bpf.o",		\
+	.btf_src_file = "btf__core_reloc_" #name ".bpf.o",		\
 	.raw_tp_name = "sys_enter",					\
 	.prog_name = "test_core_existence"
 
 #define BITFIELDS_CASE_COMMON(objfile, test_name_prefix,  name)		\
 	.case_name = test_name_prefix#name,				\
 	.bpf_obj_file = objfile,					\
-	.btf_src_file = "btf__core_reloc_" #name ".o"
+	.btf_src_file = "btf__core_reloc_" #name ".bpf.o"
 
 #define BITFIELDS_CASE(name, ...) {					\
-	BITFIELDS_CASE_COMMON("test_core_reloc_bitfields_probed.o",	\
+	BITFIELDS_CASE_COMMON("test_core_reloc_bitfields_probed.bpf.o",	\
 			      "probed:", name),				\
 	.input = STRUCT_TO_CHAR_PTR(core_reloc_##name) __VA_ARGS__,	\
 	.input_len = sizeof(struct core_reloc_##name),			\
@@ -244,7 +244,7 @@ static int duration = 0;
 	.raw_tp_name = "sys_enter",					\
 	.prog_name = "test_core_bitfields",				\
 }, {									\
-	BITFIELDS_CASE_COMMON("test_core_reloc_bitfields_direct.o",	\
+	BITFIELDS_CASE_COMMON("test_core_reloc_bitfields_direct.bpf.o",	\
 			      "direct:", name),				\
 	.input = STRUCT_TO_CHAR_PTR(core_reloc_##name) __VA_ARGS__,	\
 	.input_len = sizeof(struct core_reloc_##name),			\
@@ -256,14 +256,14 @@ static int duration = 0;
 
 
 #define BITFIELDS_ERR_CASE(name) {					\
-	BITFIELDS_CASE_COMMON("test_core_reloc_bitfields_probed.o",	\
+	BITFIELDS_CASE_COMMON("test_core_reloc_bitfields_probed.bpf.o",	\
 			      "probed:", name),				\
 	.fails = true,							\
-	.run_btfgen_fails = true,							\
+	.run_btfgen_fails = true,					\
 	.raw_tp_name = "sys_enter",					\
 	.prog_name = "test_core_bitfields",				\
 }, {									\
-	BITFIELDS_CASE_COMMON("test_core_reloc_bitfields_direct.o",	\
+	BITFIELDS_CASE_COMMON("test_core_reloc_bitfields_direct.bpf.o",	\
 			      "direct:", name),				\
 	.fails = true,							\
 	.run_btfgen_fails = true,							\
@@ -272,8 +272,8 @@ static int duration = 0;
 
 #define SIZE_CASE_COMMON(name)						\
 	.case_name = #name,						\
-	.bpf_obj_file = "test_core_reloc_size.o",			\
-	.btf_src_file = "btf__core_reloc_" #name ".o",			\
+	.bpf_obj_file = "test_core_reloc_size.bpf.o",			\
+	.btf_src_file = "btf__core_reloc_" #name ".bpf.o",		\
 	.raw_tp_name = "sys_enter",					\
 	.prog_name = "test_core_size"
 
@@ -307,13 +307,13 @@ static int duration = 0;
 #define SIZE_ERR_CASE(name) {						\
 	SIZE_CASE_COMMON(name),						\
 	.fails = true,							\
-	.run_btfgen_fails = true,							\
+	.run_btfgen_fails = true,					\
 }
 
 #define TYPE_BASED_CASE_COMMON(name)					\
 	.case_name = #name,						\
-	.bpf_obj_file = "test_core_reloc_type_based.o",			\
-	.btf_src_file = "btf__core_reloc_" #name ".o",			\
+	.bpf_obj_file = "test_core_reloc_type_based.bpf.o",		\
+	.btf_src_file = "btf__core_reloc_" #name ".bpf.o",		\
 	.raw_tp_name = "sys_enter",					\
 	.prog_name = "test_core_type_based"
 
@@ -331,8 +331,8 @@ static int duration = 0;
 
 #define TYPE_ID_CASE_COMMON(name)					\
 	.case_name = #name,						\
-	.bpf_obj_file = "test_core_reloc_type_id.o",			\
-	.btf_src_file = "btf__core_reloc_" #name ".o",			\
+	.bpf_obj_file = "test_core_reloc_type_id.bpf.o",		\
+	.btf_src_file = "btf__core_reloc_" #name ".bpf.o",		\
 	.raw_tp_name = "sys_enter",					\
 	.prog_name = "test_core_type_id"
 
@@ -350,8 +350,8 @@ static int duration = 0;
 
 #define ENUMVAL_CASE_COMMON(name)					\
 	.case_name = #name,						\
-	.bpf_obj_file = "test_core_reloc_enumval.o",			\
-	.btf_src_file = "btf__core_reloc_" #name ".o",			\
+	.bpf_obj_file = "test_core_reloc_enumval.bpf.o",		\
+	.btf_src_file = "btf__core_reloc_" #name ".bpf.o",		\
 	.raw_tp_name = "sys_enter",					\
 	.prog_name = "test_core_enumval"
 
@@ -369,8 +369,8 @@ static int duration = 0;
 
 #define ENUM64VAL_CASE_COMMON(name)					\
 	.case_name = #name,						\
-	.bpf_obj_file = "test_core_reloc_enum64val.o",			\
-	.btf_src_file = "btf__core_reloc_" #name ".o",			\
+	.bpf_obj_file = "test_core_reloc_enum64val.bpf.o",		\
+	.btf_src_file = "btf__core_reloc_" #name ".bpf.o",		\
 	.raw_tp_name = "sys_enter",					\
 	.prog_name = "test_core_enum64val"
 
@@ -547,7 +547,7 @@ static const struct core_reloc_test_case test_cases[] = {
 	/* validate we can find kernel image and use its BTF for relocs */
 	{
 		.case_name = "kernel",
-		.bpf_obj_file = "test_core_reloc_kernel.o",
+		.bpf_obj_file = "test_core_reloc_kernel.bpf.o",
 		.btf_src_file = NULL, /* load from /lib/modules/$(uname -r) */
 		.input = "",
 		.input_len = 0,
@@ -629,8 +629,8 @@ static const struct core_reloc_test_case test_cases[] = {
 	/* validate edge cases of capturing relocations */
 	{
 		.case_name = "misc",
-		.bpf_obj_file = "test_core_reloc_misc.o",
-		.btf_src_file = "btf__core_reloc_misc.o",
+		.bpf_obj_file = "test_core_reloc_misc.bpf.o",
+		.btf_src_file = "btf__core_reloc_misc.bpf.o",
 		.input = (const char *)&(struct core_reloc_misc_extensible[]){
 			{ .a = 1 },
 			{ .a = 2 }, /* not read */
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
index da860b07abb5..d1e32e792536 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
@@ -174,8 +174,8 @@ static void test_target_no_callees(void)
 	const char *prog_name[] = {
 		"fexit/test_pkt_md_access",
 	};
-	test_fexit_bpf2bpf_common("./fexit_bpf2bpf_simple.o",
-				  "./test_pkt_md_access.o",
+	test_fexit_bpf2bpf_common("./fexit_bpf2bpf_simple.bpf.o",
+				  "./test_pkt_md_access.bpf.o",
 				  ARRAY_SIZE(prog_name),
 				  prog_name, true, NULL);
 }
@@ -188,8 +188,8 @@ static void test_target_yes_callees(void)
 		"fexit/test_pkt_access_subprog2",
 		"fexit/test_pkt_access_subprog3",
 	};
-	test_fexit_bpf2bpf_common("./fexit_bpf2bpf.o",
-				  "./test_pkt_access.o",
+	test_fexit_bpf2bpf_common("./fexit_bpf2bpf.bpf.o",
+				  "./test_pkt_access.bpf.o",
 				  ARRAY_SIZE(prog_name),
 				  prog_name, true, NULL);
 }
@@ -206,8 +206,8 @@ static void test_func_replace(void)
 		"freplace/get_constant",
 		"freplace/test_pkt_write_access_subprog",
 	};
-	test_fexit_bpf2bpf_common("./fexit_bpf2bpf.o",
-				  "./test_pkt_access.o",
+	test_fexit_bpf2bpf_common("./fexit_bpf2bpf.bpf.o",
+				  "./test_pkt_access.bpf.o",
 				  ARRAY_SIZE(prog_name),
 				  prog_name, true, NULL);
 }
@@ -217,8 +217,8 @@ static void test_func_replace_verify(void)
 	const char *prog_name[] = {
 		"freplace/do_bind",
 	};
-	test_fexit_bpf2bpf_common("./freplace_connect4.o",
-				  "./connect4_prog.o",
+	test_fexit_bpf2bpf_common("./freplace_connect4.bpf.o",
+				  "./connect4_prog.bpf.o",
 				  ARRAY_SIZE(prog_name),
 				  prog_name, false, NULL);
 }
@@ -227,7 +227,7 @@ static int test_second_attach(struct bpf_object *obj)
 {
 	const char *prog_name = "security_new_get_constant";
 	const char *tgt_name = "get_constant";
-	const char *tgt_obj_file = "./test_pkt_access.o";
+	const char *tgt_obj_file = "./test_pkt_access.bpf.o";
 	struct bpf_program *prog = NULL;
 	struct bpf_object *tgt_obj;
 	struct bpf_link *link;
@@ -272,8 +272,8 @@ static void test_func_replace_multi(void)
 	const char *prog_name[] = {
 		"freplace/get_constant",
 	};
-	test_fexit_bpf2bpf_common("./freplace_get_constant.o",
-				  "./test_pkt_access.o",
+	test_fexit_bpf2bpf_common("./freplace_get_constant.bpf.o",
+				  "./test_pkt_access.bpf.o",
 				  ARRAY_SIZE(prog_name),
 				  prog_name, true, test_second_attach);
 }
@@ -281,10 +281,10 @@ static void test_func_replace_multi(void)
 static void test_fmod_ret_freplace(void)
 {
 	struct bpf_object *freplace_obj = NULL, *pkt_obj, *fmod_obj = NULL;
-	const char *freplace_name = "./freplace_get_constant.o";
-	const char *fmod_ret_name = "./fmod_ret_freplace.o";
+	const char *freplace_name = "./freplace_get_constant.bpf.o";
+	const char *fmod_ret_name = "./fmod_ret_freplace.bpf.o";
 	DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts);
-	const char *tgt_name = "./test_pkt_access.o";
+	const char *tgt_name = "./test_pkt_access.bpf.o";
 	struct bpf_link *freplace_link = NULL;
 	struct bpf_program *prog;
 	__u32 duration = 0;
@@ -339,8 +339,8 @@ static void test_func_sockmap_update(void)
 	const char *prog_name[] = {
 		"freplace/cls_redirect",
 	};
-	test_fexit_bpf2bpf_common("./freplace_cls_redirect.o",
-				  "./test_cls_redirect.o",
+	test_fexit_bpf2bpf_common("./freplace_cls_redirect.bpf.o",
+				  "./test_cls_redirect.bpf.o",
 				  ARRAY_SIZE(prog_name),
 				  prog_name, false, NULL);
 }
@@ -385,15 +385,15 @@ close_prog:
 static void test_func_replace_return_code(void)
 {
 	/* test invalid return code in the replaced program */
-	test_obj_load_failure_common("./freplace_connect_v4_prog.o",
-				     "./connect4_prog.o");
+	test_obj_load_failure_common("./freplace_connect_v4_prog.bpf.o",
+				     "./connect4_prog.bpf.o");
 }
 
 static void test_func_map_prog_compatibility(void)
 {
 	/* test with spin lock map value in the replaced program */
-	test_obj_load_failure_common("./freplace_attach_probe.o",
-				     "./test_attach_probe.o");
+	test_obj_load_failure_common("./freplace_attach_probe.bpf.o",
+				     "./test_attach_probe.bpf.o");
 }
 
 static void test_func_replace_global_func(void)
@@ -402,8 +402,8 @@ static void test_func_replace_global_func(void)
 		"freplace/test_pkt_access",
 	};
 
-	test_fexit_bpf2bpf_common("./freplace_global_func.o",
-				  "./test_pkt_access.o",
+	test_fexit_bpf2bpf_common("./freplace_global_func.bpf.o",
+				  "./test_pkt_access.bpf.o",
 				  ARRAY_SIZE(prog_name),
 				  prog_name, false, NULL);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c b/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c
index 16048978a1ef..858e0575f502 100644
--- a/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c
+++ b/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c
@@ -84,8 +84,8 @@ static void get_stack_print_output(void *ctx, int cpu, void *data, __u32 size)
 
 void test_get_stack_raw_tp(void)
 {
-	const char *file = "./test_get_stack_rawtp.o";
-	const char *file_err = "./test_get_stack_rawtp_err.o";
+	const char *file = "./test_get_stack_rawtp.bpf.o";
+	const char *file_err = "./test_get_stack_rawtp_err.bpf.o";
 	const char *prog_name = "bpf_prog1";
 	int i, err, prog_fd, exp_cnt = MAX_CNT_RAWTP;
 	struct perf_buffer *pb = NULL;
diff --git a/tools/testing/selftests/bpf/prog_tests/global_data.c b/tools/testing/selftests/bpf/prog_tests/global_data.c
index 027685858925..fadfb64e2a71 100644
--- a/tools/testing/selftests/bpf/prog_tests/global_data.c
+++ b/tools/testing/selftests/bpf/prog_tests/global_data.c
@@ -131,7 +131,7 @@ static void test_global_data_rdonly(struct bpf_object *obj, __u32 duration)
 
 void test_global_data(void)
 {
-	const char *file = "./test_global_data.o";
+	const char *file = "./test_global_data.bpf.o";
 	struct bpf_object *obj;
 	int err, prog_fd;
 	LIBBPF_OPTS(bpf_test_run_opts, topts,
diff --git a/tools/testing/selftests/bpf/prog_tests/global_data_init.c b/tools/testing/selftests/bpf/prog_tests/global_data_init.c
index 57331c606964..8466332d7406 100644
--- a/tools/testing/selftests/bpf/prog_tests/global_data_init.c
+++ b/tools/testing/selftests/bpf/prog_tests/global_data_init.c
@@ -3,7 +3,7 @@
 
 void test_global_data_init(void)
 {
-	const char *file = "./test_global_data.o";
+	const char *file = "./test_global_data.bpf.o";
 	int err = -ENOMEM, map_fd, zero = 0;
 	__u8 *buff = NULL, *newval = NULL;
 	struct bpf_object *obj;
diff --git a/tools/testing/selftests/bpf/prog_tests/global_func_args.c b/tools/testing/selftests/bpf/prog_tests/global_func_args.c
index 29039a36cce5..d997099f62d0 100644
--- a/tools/testing/selftests/bpf/prog_tests/global_func_args.c
+++ b/tools/testing/selftests/bpf/prog_tests/global_func_args.c
@@ -39,7 +39,7 @@ static void test_global_func_args0(struct bpf_object *obj)
 
 void test_global_func_args(void)
 {
-	const char *file = "./test_global_func_args.o";
+	const char *file = "./test_global_func_args.bpf.o";
 	struct bpf_object *obj;
 	int err, prog_fd;
 	LIBBPF_OPTS(bpf_test_run_opts, topts,
diff --git a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c
index 1cee6957285e..73579370bfbd 100644
--- a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c
+++ b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c
@@ -69,7 +69,7 @@ void serial_test_kfree_skb(void)
 	const int zero = 0;
 	bool test_ok[2];
 
-	err = bpf_prog_test_load("./test_pkt_access.o", BPF_PROG_TYPE_SCHED_CLS,
+	err = bpf_prog_test_load("./test_pkt_access.bpf.o", BPF_PROG_TYPE_SCHED_CLS,
 				 &obj, &prog_fd);
 	if (CHECK(err, "prog_load sched cls", "err %d errno %d\n", err, errno))
 		return;
diff --git a/tools/testing/selftests/bpf/prog_tests/l4lb_all.c b/tools/testing/selftests/bpf/prog_tests/l4lb_all.c
index 55f733ff4109..9c1a18573ffd 100644
--- a/tools/testing/selftests/bpf/prog_tests/l4lb_all.c
+++ b/tools/testing/selftests/bpf/prog_tests/l4lb_all.c
@@ -90,7 +90,7 @@ out:
 void test_l4lb_all(void)
 {
 	if (test__start_subtest("l4lb_inline"))
-		test_l4lb("test_l4lb.o");
+		test_l4lb("test_l4lb.bpf.o");
 	if (test__start_subtest("l4lb_noinline"))
-		test_l4lb("test_l4lb_noinline.o");
+		test_l4lb("test_l4lb_noinline.bpf.o");
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/load_bytes_relative.c b/tools/testing/selftests/bpf/prog_tests/load_bytes_relative.c
index 4e0b2ec057aa..581c0eb0a0a1 100644
--- a/tools/testing/selftests/bpf/prog_tests/load_bytes_relative.c
+++ b/tools/testing/selftests/bpf/prog_tests/load_bytes_relative.c
@@ -27,8 +27,8 @@ void test_load_bytes_relative(void)
 	if (CHECK_FAIL(server_fd < 0))
 		goto close_cgroup_fd;
 
-	err = bpf_prog_test_load("./load_bytes_relative.o", BPF_PROG_TYPE_CGROUP_SKB,
-			    &obj, &prog_fd);
+	err = bpf_prog_test_load("./load_bytes_relative.bpf.o", BPF_PROG_TYPE_CGROUP_SKB,
+				 &obj, &prog_fd);
 	if (CHECK_FAIL(err))
 		goto close_server_fd;
 
diff --git a/tools/testing/selftests/bpf/prog_tests/map_lock.c b/tools/testing/selftests/bpf/prog_tests/map_lock.c
index e4e99b37df64..1d6726f01dd2 100644
--- a/tools/testing/selftests/bpf/prog_tests/map_lock.c
+++ b/tools/testing/selftests/bpf/prog_tests/map_lock.c
@@ -49,7 +49,7 @@ out:
 
 void test_map_lock(void)
 {
-	const char *file = "./test_map_lock.o";
+	const char *file = "./test_map_lock.bpf.o";
 	int prog_fd, map_fd[2], vars[17] = {};
 	pthread_t thread_id[6];
 	struct bpf_object *obj = NULL;
diff --git a/tools/testing/selftests/bpf/prog_tests/pinning.c b/tools/testing/selftests/bpf/prog_tests/pinning.c
index 31c09ba577eb..d95cee5867b7 100644
--- a/tools/testing/selftests/bpf/prog_tests/pinning.c
+++ b/tools/testing/selftests/bpf/prog_tests/pinning.c
@@ -26,13 +26,13 @@ __u32 get_map_id(struct bpf_object *obj, const char *name)
 
 void test_pinning(void)
 {
-	const char *file_invalid = "./test_pinning_invalid.o";
+	const char *file_invalid = "./test_pinning_invalid.bpf.o";
 	const char *custpinpath = "/sys/fs/bpf/custom/pinmap";
 	const char *nopinpath = "/sys/fs/bpf/nopinmap";
 	const char *nopinpath2 = "/sys/fs/bpf/nopinmap2";
 	const char *custpath = "/sys/fs/bpf/custom";
 	const char *pinpath = "/sys/fs/bpf/pinmap";
-	const char *file = "./test_pinning.o";
+	const char *file = "./test_pinning.bpf.o";
 	__u32 map_id, map_id2, duration = 0;
 	struct stat statbuf = {};
 	struct bpf_object *obj;
diff --git a/tools/testing/selftests/bpf/prog_tests/pkt_access.c b/tools/testing/selftests/bpf/prog_tests/pkt_access.c
index 0bcccdc34fbc..682e4ff45b01 100644
--- a/tools/testing/selftests/bpf/prog_tests/pkt_access.c
+++ b/tools/testing/selftests/bpf/prog_tests/pkt_access.c
@@ -4,7 +4,7 @@
 
 void test_pkt_access(void)
 {
-	const char *file = "./test_pkt_access.o";
+	const char *file = "./test_pkt_access.bpf.o";
 	struct bpf_object *obj;
 	int err, prog_fd;
 	LIBBPF_OPTS(bpf_test_run_opts, topts,
diff --git a/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c b/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c
index 00ee1dd792aa..0d85e0642811 100644
--- a/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c
+++ b/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c
@@ -4,7 +4,7 @@
 
 void test_pkt_md_access(void)
 {
-	const char *file = "./test_pkt_md_access.o";
+	const char *file = "./test_pkt_md_access.bpf.o";
 	struct bpf_object *obj;
 	int err, prog_fd;
 	LIBBPF_OPTS(bpf_test_run_opts, topts,
diff --git a/tools/testing/selftests/bpf/prog_tests/probe_user.c b/tools/testing/selftests/bpf/prog_tests/probe_user.c
index 34dbd2adc157..8721671321de 100644
--- a/tools/testing/selftests/bpf/prog_tests/probe_user.c
+++ b/tools/testing/selftests/bpf/prog_tests/probe_user.c
@@ -11,7 +11,7 @@ void serial_test_probe_user(void)
 #endif
 	};
 	enum { prog_count = ARRAY_SIZE(prog_names) };
-	const char *obj_file = "./test_probe_user.o";
+	const char *obj_file = "./test_probe_user.bpf.o";
 	DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts, );
 	int err, results_map_fd, sock_fd, duration = 0;
 	struct sockaddr curr, orig, tmp;
diff --git a/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c b/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c
index d2743fc10032..722c5f2a7776 100644
--- a/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c
+++ b/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c
@@ -28,9 +28,9 @@ static void test_queue_stack_map_by_type(int type)
 		vals[i] = rand();
 
 	if (type == QUEUE)
-		strncpy(file, "./test_queue_map.o", sizeof(file));
+		strncpy(file, "./test_queue_map.bpf.o", sizeof(file));
 	else if (type == STACK)
-		strncpy(file, "./test_stack_map.o", sizeof(file));
+		strncpy(file, "./test_stack_map.bpf.o", sizeof(file));
 	else
 		return;
 
diff --git a/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c b/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c
index fd5d2ddfb062..19e2f2526dbd 100644
--- a/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c
+++ b/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c
@@ -16,7 +16,7 @@ struct rdonly_map_subtest {
 
 void test_rdonly_maps(void)
 {
-	const char *file = "test_rdonly_maps.o";
+	const char *file = "test_rdonly_maps.bpf.o";
 	struct rdonly_map_subtest subtests[] = {
 		{ "skip loop", "skip_loop", 0, 0 },
 		{ "part loop", "part_loop", 3, 2 + 3 + 4 },
diff --git a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c
index 739d2ea6ca55..d863205bbe95 100644
--- a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c
+++ b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c
@@ -3,7 +3,7 @@
 
 void test_reference_tracking(void)
 {
-	const char *file = "test_sk_lookup_kern.o";
+	const char *file = "test_sk_lookup_kern.bpf.o";
 	const char *obj_name = "ref_track";
 	DECLARE_LIBBPF_OPTS(bpf_object_open_opts, open_opts,
 		.object_name = obj_name,
diff --git a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
index c197261d02e2..f81d08d429a2 100644
--- a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
+++ b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
@@ -101,7 +101,7 @@ static int resolve_symbols(void)
 	int type_id;
 	__u32 nr;
 
-	btf = btf__parse_elf("btf_data.o", NULL);
+	btf = btf__parse_elf("btf_data.bpf.o", NULL);
 	if (CHECK(libbpf_get_error(btf), "resolve",
 		  "Failed to load BTF from btf_data.o\n"))
 		return -1;
diff --git a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c
index 1cbd8cd64044..64c5f5eb2994 100644
--- a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c
+++ b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c
@@ -91,9 +91,9 @@ static int prepare_bpf_obj(void)
 	struct bpf_map *map;
 	int err;
 
-	obj = bpf_object__open("test_select_reuseport_kern.o");
+	obj = bpf_object__open("test_select_reuseport_kern.bpf.o");
 	err = libbpf_get_error(obj);
-	RET_ERR(err, "open test_select_reuseport_kern.o",
+	RET_ERR(err, "open test_select_reuseport_kern.bpf.o",
 		"obj:%p PTR_ERR(obj):%d\n", obj, err);
 
 	map = bpf_object__find_map_by_name(obj, "outer_map");
diff --git a/tools/testing/selftests/bpf/prog_tests/sk_assign.c b/tools/testing/selftests/bpf/prog_tests/sk_assign.c
index 1d272e05188e..3e190ed63976 100644
--- a/tools/testing/selftests/bpf/prog_tests/sk_assign.c
+++ b/tools/testing/selftests/bpf/prog_tests/sk_assign.c
@@ -47,7 +47,7 @@ configure_stack(void)
 	if (CHECK_FAIL(system("tc qdisc add dev lo clsact")))
 		return false;
 	sprintf(tc_cmd, "%s %s %s %s", "tc filter add dev lo ingress bpf",
-		       "direct-action object-file ./test_sk_assign.o",
+		       "direct-action object-file ./test_sk_assign.bpf.o",
 		       "section tc",
 		       (env.verbosity < VERBOSE_VERY) ? " 2>/dev/null" : "verbose");
 	if (CHECK(system(tc_cmd), "BPF load failed;",
diff --git a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
index ce0e555b5e38..33f950e2dae3 100644
--- a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
+++ b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
@@ -31,7 +31,7 @@ void test_skb_ctx(void)
 	struct bpf_object *obj;
 	int err, prog_fd, i;
 
-	err = bpf_prog_test_load("./test_skb_ctx.o", BPF_PROG_TYPE_SCHED_CLS,
+	err = bpf_prog_test_load("./test_skb_ctx.bpf.o", BPF_PROG_TYPE_SCHED_CLS,
 				 &obj, &prog_fd);
 	if (!ASSERT_OK(err, "load"))
 		return;
diff --git a/tools/testing/selftests/bpf/prog_tests/skb_helpers.c b/tools/testing/selftests/bpf/prog_tests/skb_helpers.c
index 97dc8b14be48..f7ee25f290f7 100644
--- a/tools/testing/selftests/bpf/prog_tests/skb_helpers.c
+++ b/tools/testing/selftests/bpf/prog_tests/skb_helpers.c
@@ -20,7 +20,7 @@ void test_skb_helpers(void)
 	struct bpf_object *obj;
 	int err, prog_fd;
 
-	err = bpf_prog_test_load("./test_skb_helpers.o",
+	err = bpf_prog_test_load("./test_skb_helpers.bpf.o",
 				 BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
 	if (!ASSERT_OK(err, "load"))
 		return;
diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c b/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c
index 8ed78a9383ba..c5cb6e8374b6 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c
@@ -174,7 +174,7 @@ static void run_test(int cgroup_fd)
 	pthread_t tid;
 	int err;
 
-	obj = bpf_object__open_file("sockopt_inherit.o", NULL);
+	obj = bpf_object__open_file("sockopt_inherit.bpf.o", NULL);
 	if (!ASSERT_OK_PTR(obj, "obj_open"))
 		return;
 
diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c b/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c
index abce12ddcc37..28d592dc54a7 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c
@@ -310,7 +310,7 @@ void test_sockopt_multi(void)
 	if (CHECK_FAIL(cg_child < 0))
 		goto out;
 
-	obj = bpf_object__open_file("sockopt_multi.o", NULL);
+	obj = bpf_object__open_file("sockopt_multi.bpf.o", NULL);
 	if (!ASSERT_OK_PTR(obj, "obj_load"))
 		goto out;
 
diff --git a/tools/testing/selftests/bpf/prog_tests/spinlock.c b/tools/testing/selftests/bpf/prog_tests/spinlock.c
index 8e329eaee6d7..15eb1372d771 100644
--- a/tools/testing/selftests/bpf/prog_tests/spinlock.c
+++ b/tools/testing/selftests/bpf/prog_tests/spinlock.c
@@ -19,7 +19,7 @@ static void *spin_lock_thread(void *arg)
 
 void test_spinlock(void)
 {
-	const char *file = "./test_spin_lock.o";
+	const char *file = "./test_spin_lock.bpf.o";
 	pthread_t thread_id[4];
 	struct bpf_object *obj = NULL;
 	int prog_fd;
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c
index 313f0a66232e..df59e4ae2951 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c
@@ -6,7 +6,7 @@ void test_stacktrace_map(void)
 	int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd;
 	const char *prog_name = "oncpu";
 	int err, prog_fd, stack_trace_len;
-	const char *file = "./test_stacktrace_map.o";
+	const char *file = "./test_stacktrace_map.bpf.o";
 	__u32 key, val, duration = 0;
 	struct bpf_program *prog;
 	struct bpf_object *obj;
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c
index 1cb8dd36bd8f..c6ef06f55cdb 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c
@@ -5,7 +5,7 @@ void test_stacktrace_map_raw_tp(void)
 {
 	const char *prog_name = "oncpu";
 	int control_map_fd, stackid_hmap_fd, stackmap_fd;
-	const char *file = "./test_stacktrace_map.o";
+	const char *file = "./test_stacktrace_map.bpf.o";
 	__u32 key, val, duration = 0;
 	int err, prog_fd;
 	struct bpf_program *prog;
diff --git a/tools/testing/selftests/bpf/prog_tests/tailcalls.c b/tools/testing/selftests/bpf/prog_tests/tailcalls.c
index 19c70880cfb3..58fe2c586ed7 100644
--- a/tools/testing/selftests/bpf/prog_tests/tailcalls.c
+++ b/tools/testing/selftests/bpf/prog_tests/tailcalls.c
@@ -20,8 +20,8 @@ static void test_tailcall_1(void)
 		.repeat = 1,
 	);
 
-	err = bpf_prog_test_load("tailcall1.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
-			    &prog_fd);
+	err = bpf_prog_test_load("tailcall1.bpf.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
+				 &prog_fd);
 	if (CHECK_FAIL(err))
 		return;
 
@@ -156,8 +156,8 @@ static void test_tailcall_2(void)
 		.repeat = 1,
 	);
 
-	err = bpf_prog_test_load("tailcall2.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
-			    &prog_fd);
+	err = bpf_prog_test_load("tailcall2.bpf.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
+				 &prog_fd);
 	if (CHECK_FAIL(err))
 		return;
 
@@ -299,7 +299,7 @@ out:
  */
 static void test_tailcall_3(void)
 {
-	test_tailcall_count("tailcall3.o");
+	test_tailcall_count("tailcall3.bpf.o");
 }
 
 /* test_tailcall_6 checks that the count value of the tail call limit
@@ -307,7 +307,7 @@ static void test_tailcall_3(void)
  */
 static void test_tailcall_6(void)
 {
-	test_tailcall_count("tailcall6.o");
+	test_tailcall_count("tailcall6.bpf.o");
 }
 
 /* test_tailcall_4 checks that the kernel properly selects indirect jump
@@ -329,8 +329,8 @@ static void test_tailcall_4(void)
 		.repeat = 1,
 	);
 
-	err = bpf_prog_test_load("tailcall4.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
-			    &prog_fd);
+	err = bpf_prog_test_load("tailcall4.bpf.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
+				 &prog_fd);
 	if (CHECK_FAIL(err))
 		return;
 
@@ -419,8 +419,8 @@ static void test_tailcall_5(void)
 		.repeat = 1,
 	);
 
-	err = bpf_prog_test_load("tailcall5.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
-			    &prog_fd);
+	err = bpf_prog_test_load("tailcall5.bpf.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
+				 &prog_fd);
 	if (CHECK_FAIL(err))
 		return;
 
@@ -507,8 +507,8 @@ static void test_tailcall_bpf2bpf_1(void)
 		.repeat = 1,
 	);
 
-	err = bpf_prog_test_load("tailcall_bpf2bpf1.o", BPF_PROG_TYPE_SCHED_CLS,
-			    &obj, &prog_fd);
+	err = bpf_prog_test_load("tailcall_bpf2bpf1.bpf.o", BPF_PROG_TYPE_SCHED_CLS,
+				 &obj, &prog_fd);
 	if (CHECK_FAIL(err))
 		return;
 
@@ -591,8 +591,8 @@ static void test_tailcall_bpf2bpf_2(void)
 		.repeat = 1,
 	);
 
-	err = bpf_prog_test_load("tailcall_bpf2bpf2.o", BPF_PROG_TYPE_SCHED_CLS,
-			    &obj, &prog_fd);
+	err = bpf_prog_test_load("tailcall_bpf2bpf2.bpf.o", BPF_PROG_TYPE_SCHED_CLS,
+				 &obj, &prog_fd);
 	if (CHECK_FAIL(err))
 		return;
 
@@ -671,8 +671,8 @@ static void test_tailcall_bpf2bpf_3(void)
 		.repeat = 1,
 	);
 
-	err = bpf_prog_test_load("tailcall_bpf2bpf3.o", BPF_PROG_TYPE_SCHED_CLS,
-			    &obj, &prog_fd);
+	err = bpf_prog_test_load("tailcall_bpf2bpf3.bpf.o", BPF_PROG_TYPE_SCHED_CLS,
+				 &obj, &prog_fd);
 	if (CHECK_FAIL(err))
 		return;
 
@@ -766,8 +766,8 @@ static void test_tailcall_bpf2bpf_4(bool noise)
 		.repeat = 1,
 	);
 
-	err = bpf_prog_test_load("tailcall_bpf2bpf4.o", BPF_PROG_TYPE_SCHED_CLS,
-			    &obj, &prog_fd);
+	err = bpf_prog_test_load("tailcall_bpf2bpf4.bpf.o", BPF_PROG_TYPE_SCHED_CLS,
+				 &obj, &prog_fd);
 	if (CHECK_FAIL(err))
 		return;
 
diff --git a/tools/testing/selftests/bpf/prog_tests/task_fd_query_rawtp.c b/tools/testing/selftests/bpf/prog_tests/task_fd_query_rawtp.c
index 17947c9e1d66..3d34bab01e48 100644
--- a/tools/testing/selftests/bpf/prog_tests/task_fd_query_rawtp.c
+++ b/tools/testing/selftests/bpf/prog_tests/task_fd_query_rawtp.c
@@ -3,7 +3,7 @@
 
 void test_task_fd_query_rawtp(void)
 {
-	const char *file = "./test_get_stack_rawtp.o";
+	const char *file = "./test_get_stack_rawtp.bpf.o";
 	__u64 probe_offset, probe_addr;
 	__u32 len, prog_id, fd_type;
 	struct bpf_object *obj;
diff --git a/tools/testing/selftests/bpf/prog_tests/task_fd_query_tp.c b/tools/testing/selftests/bpf/prog_tests/task_fd_query_tp.c
index c2a98a7a8dfc..c717741bf8b6 100644
--- a/tools/testing/selftests/bpf/prog_tests/task_fd_query_tp.c
+++ b/tools/testing/selftests/bpf/prog_tests/task_fd_query_tp.c
@@ -4,7 +4,7 @@
 static void test_task_fd_query_tp_core(const char *probe_name,
 				       const char *tp_name)
 {
-	const char *file = "./test_tracepoint.o";
+	const char *file = "./test_tracepoint.bpf.o";
 	int err, bytes, efd, prog_fd, pmu_fd;
 	struct perf_event_attr attr = {};
 	__u64 probe_offset, probe_addr;
diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_estats.c b/tools/testing/selftests/bpf/prog_tests/tcp_estats.c
index 11bf755be4c9..032dbfb26256 100644
--- a/tools/testing/selftests/bpf/prog_tests/tcp_estats.c
+++ b/tools/testing/selftests/bpf/prog_tests/tcp_estats.c
@@ -3,7 +3,7 @@
 
 void test_tcp_estats(void)
 {
-	const char *file = "./test_tcp_estats.o";
+	const char *file = "./test_tcp_estats.bpf.o";
 	int err, prog_fd;
 	struct bpf_object *obj;
 	__u32 duration = 0;
diff --git a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c
index b90ee47d3111..7295cc60f724 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c
@@ -65,23 +65,23 @@ struct test_def {
 void test_test_global_funcs(void)
 {
 	struct test_def tests[] = {
-		{ "test_global_func1.o", "combined stack size of 4 calls is 544" },
-		{ "test_global_func2.o" },
-		{ "test_global_func3.o" , "the call stack of 8 frames" },
-		{ "test_global_func4.o" },
-		{ "test_global_func5.o" , "expected pointer to ctx, but got PTR" },
-		{ "test_global_func6.o" , "modified ctx ptr R2" },
-		{ "test_global_func7.o" , "foo() doesn't return scalar" },
-		{ "test_global_func8.o" },
-		{ "test_global_func9.o" },
-		{ "test_global_func10.o", "invalid indirect read from stack" },
-		{ "test_global_func11.o", "Caller passes invalid args into func#1" },
-		{ "test_global_func12.o", "invalid mem access 'mem_or_null'" },
-		{ "test_global_func13.o", "Caller passes invalid args into func#1" },
-		{ "test_global_func14.o", "reference type('FWD S') size cannot be determined" },
-		{ "test_global_func15.o", "At program exit the register R0 has value" },
-		{ "test_global_func16.o", "invalid indirect read from stack" },
-		{ "test_global_func17.o", "Caller passes invalid args into func#1" },
+		{ "test_global_func1.bpf.o", "combined stack size of 4 calls is 544" },
+		{ "test_global_func2.bpf.o" },
+		{ "test_global_func3.bpf.o", "the call stack of 8 frames" },
+		{ "test_global_func4.bpf.o" },
+		{ "test_global_func5.bpf.o", "expected pointer to ctx, but got PTR" },
+		{ "test_global_func6.bpf.o", "modified ctx ptr R2" },
+		{ "test_global_func7.bpf.o", "foo() doesn't return scalar" },
+		{ "test_global_func8.bpf.o" },
+		{ "test_global_func9.bpf.o" },
+		{ "test_global_func10.bpf.o", "invalid indirect read from stack" },
+		{ "test_global_func11.bpf.o", "Caller passes invalid args into func#1" },
+		{ "test_global_func12.bpf.o", "invalid mem access 'mem_or_null'" },
+		{ "test_global_func13.bpf.o", "Caller passes invalid args into func#1" },
+		{ "test_global_func14.bpf.o", "reference type('FWD S') size cannot be determined" },
+		{ "test_global_func15.bpf.o", "At program exit the register R0 has value" },
+		{ "test_global_func16.bpf.o", "invalid indirect read from stack" },
+		{ "test_global_func17.bpf.o", "Caller passes invalid args into func#1" },
 	};
 	libbpf_print_fn_t old_print_fn = NULL;
 	int err, i, duration = 0;
diff --git a/tools/testing/selftests/bpf/prog_tests/test_overhead.c b/tools/testing/selftests/bpf/prog_tests/test_overhead.c
index 05acb376f74d..f27013e38d03 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_overhead.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_overhead.c
@@ -72,7 +72,7 @@ void test_test_overhead(void)
 	if (CHECK_FAIL(prctl(PR_GET_NAME, comm, 0L, 0L, 0L)))
 		return;
 
-	obj = bpf_object__open_file("./test_overhead.o", NULL);
+	obj = bpf_object__open_file("./test_overhead.bpf.o", NULL);
 	if (!ASSERT_OK_PTR(obj, "obj_open_file"))
 		return;
 
diff --git a/tools/testing/selftests/bpf/prog_tests/tp_attach_query.c b/tools/testing/selftests/bpf/prog_tests/tp_attach_query.c
index 39e79291c82b..a479080533db 100644
--- a/tools/testing/selftests/bpf/prog_tests/tp_attach_query.c
+++ b/tools/testing/selftests/bpf/prog_tests/tp_attach_query.c
@@ -6,7 +6,7 @@ void serial_test_tp_attach_query(void)
 	const int num_progs = 3;
 	int i, j, bytes, efd, err, prog_fd[num_progs], pmu_fd[num_progs];
 	__u32 duration = 0, info_len, saved_prog_ids[num_progs];
-	const char *file = "./test_tracepoint.o";
+	const char *file = "./test_tracepoint.bpf.o";
 	struct perf_event_query_bpf *query;
 	struct perf_event_attr attr = {};
 	struct bpf_object *obj[num_progs];
diff --git a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c
index b0acbda6dbf5..564b75bc087f 100644
--- a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c
+++ b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c
@@ -35,7 +35,7 @@ static struct bpf_program *load_prog(char *file, char *name, struct inst *inst)
 /* TODO: use different target function to run in concurrent mode */
 void serial_test_trampoline_count(void)
 {
-	char *file = "test_trampoline_count.o";
+	char *file = "test_trampoline_count.bpf.o";
 	char *const progs[] = { "fentry_test", "fmod_ret_test", "fexit_test" };
 	struct inst inst[MAX_TRAMP_PROGS + 1] = {};
 	struct bpf_program *prog;
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp.c b/tools/testing/selftests/bpf/prog_tests/xdp.c
index ec21c53cb1da..947863a1d536 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp.c
@@ -8,7 +8,7 @@ void test_xdp(void)
 	struct vip key6 = {.protocol = 6, .family = AF_INET6};
 	struct iptnl_info value4 = {.family = AF_INET};
 	struct iptnl_info value6 = {.family = AF_INET6};
-	const char *file = "./test_xdp.o";
+	const char *file = "./test_xdp.bpf.o";
 	struct bpf_object *obj;
 	char buf[128];
 	struct ipv6hdr iph6;
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_frags.c b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_frags.c
index 2f033da4cd45..fce203640f8c 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_frags.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_frags.c
@@ -4,7 +4,7 @@
 
 static void test_xdp_update_frags(void)
 {
-	const char *file = "./test_xdp_update_frags.o";
+	const char *file = "./test_xdp_update_frags.bpf.o";
 	int err, prog_fd, max_skb_frags, buf_size, num;
 	struct bpf_program *prog;
 	struct bpf_object *obj;
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
index 21ceac24e174..9b9cf8458adf 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
@@ -4,7 +4,7 @@
 
 static void test_xdp_adjust_tail_shrink(void)
 {
-	const char *file = "./test_xdp_adjust_tail_shrink.o";
+	const char *file = "./test_xdp_adjust_tail_shrink.bpf.o";
 	__u32 expect_sz;
 	struct bpf_object *obj;
 	int err, prog_fd;
@@ -39,7 +39,7 @@ static void test_xdp_adjust_tail_shrink(void)
 
 static void test_xdp_adjust_tail_grow(void)
 {
-	const char *file = "./test_xdp_adjust_tail_grow.o";
+	const char *file = "./test_xdp_adjust_tail_grow.bpf.o";
 	struct bpf_object *obj;
 	char buf[4096]; /* avoid segfault: large buf to hold grow results */
 	__u32 expect_sz;
@@ -73,7 +73,7 @@ static void test_xdp_adjust_tail_grow(void)
 
 static void test_xdp_adjust_tail_grow2(void)
 {
-	const char *file = "./test_xdp_adjust_tail_grow.o";
+	const char *file = "./test_xdp_adjust_tail_grow.bpf.o";
 	char buf[4096]; /* avoid segfault: large buf to hold grow results */
 	int tailroom = 320; /* SKB_DATA_ALIGN(sizeof(struct skb_shared_info))*/;
 	struct bpf_object *obj;
@@ -135,7 +135,7 @@ static void test_xdp_adjust_tail_grow2(void)
 
 static void test_xdp_adjust_frags_tail_shrink(void)
 {
-	const char *file = "./test_xdp_adjust_tail_shrink.o";
+	const char *file = "./test_xdp_adjust_tail_shrink.bpf.o";
 	__u32 exp_size;
 	struct bpf_program *prog;
 	struct bpf_object *obj;
@@ -202,7 +202,7 @@ out:
 
 static void test_xdp_adjust_frags_tail_grow(void)
 {
-	const char *file = "./test_xdp_adjust_tail_grow.o";
+	const char *file = "./test_xdp_adjust_tail_grow.bpf.o";
 	__u32 exp_size;
 	struct bpf_program *prog;
 	struct bpf_object *obj;
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_attach.c
index 62aa3edda5e6..062fbc8c8e5e 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_attach.c
@@ -8,7 +8,7 @@ void serial_test_xdp_attach(void)
 {
 	__u32 duration = 0, id1, id2, id0 = 0, len;
 	struct bpf_object *obj1, *obj2, *obj3;
-	const char *file = "./test_xdp.o";
+	const char *file = "./test_xdp.bpf.o";
 	struct bpf_prog_info info = {};
 	int err, fd1, fd2, fd3;
 	LIBBPF_OPTS(bpf_xdp_attach_opts, opts);
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_info.c b/tools/testing/selftests/bpf/prog_tests/xdp_info.c
index 0d01ff6cb91a..cd3aa340e65e 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_info.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_info.c
@@ -7,7 +7,7 @@
 void serial_test_xdp_info(void)
 {
 	__u32 len = sizeof(struct bpf_prog_info), duration = 0, prog_id;
-	const char *file = "./xdp_dummy.o";
+	const char *file = "./xdp_dummy.bpf.o";
 	struct bpf_prog_info info = {};
 	struct bpf_object *obj;
 	int err, prog_fd;
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_perf.c b/tools/testing/selftests/bpf/prog_tests/xdp_perf.c
index f543d1bd21b8..ec5369f247cb 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_perf.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_perf.c
@@ -3,7 +3,7 @@
 
 void test_xdp_perf(void)
 {
-	const char *file = "./xdp_dummy.o";
+	const char *file = "./xdp_dummy.bpf.o";
 	struct bpf_object *obj;
 	char in[128], out[128];
 	int err, prog_fd;
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c b/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c
index 874a846e298c..75550a40e029 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c
@@ -82,7 +82,7 @@ static void test_synproxy(bool xdp)
 	SYS("ethtool -K tmp0 tx off");
 	if (xdp)
 		/* Workaround required for veth. */
-		SYS("ip link set tmp0 xdp object xdp_dummy.o section xdp 2> /dev/null");
+		SYS("ip link set tmp0 xdp object xdp_dummy.bpf.o section xdp 2> /dev/null");
 
 	ns = open_netns("synproxy");
 	if (!ASSERT_OK_PTR(ns, "setns"))
diff --git a/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c
index 48cd14b43741..4547b059d487 100644
--- a/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c
@@ -73,10 +73,10 @@ int test_subprog2(struct args_subprog2 *ctx)
 			      __builtin_preserve_access_index(&skb->len));
 
 	ret = ctx->ret;
-	/* bpf_prog_test_load() loads "test_pkt_access.o" with BPF_F_TEST_RND_HI32
-	 * which randomizes upper 32 bits after BPF_ALU32 insns.
-	 * Hence after 'w0 <<= 1' upper bits of $rax are random.
-	 * That is expected and correct. Trim them.
+	/* bpf_prog_test_load() loads "test_pkt_access.bpf.o" with
+	 * BPF_F_TEST_RND_HI32 which randomizes upper 32 bits after BPF_ALU32
+	 * insns. Hence after 'w0 <<= 1' upper bits of $rax are random. That is
+	 * expected and correct. Trim them.
 	 */
 	ret = (__u32) ret;
 	if (len != 74 || ret != 148)
diff --git a/tools/testing/selftests/bpf/test_dev_cgroup.c b/tools/testing/selftests/bpf/test_dev_cgroup.c
index 7886265846a0..adeaf63cb6fa 100644
--- a/tools/testing/selftests/bpf/test_dev_cgroup.c
+++ b/tools/testing/selftests/bpf/test_dev_cgroup.c
@@ -16,7 +16,7 @@
 #include "cgroup_helpers.h"
 #include "testing_helpers.h"
 
-#define DEV_CGROUP_PROG "./dev_cgroup.o"
+#define DEV_CGROUP_PROG "./dev_cgroup.bpf.o"
 
 #define TEST_CGROUP "/test-bpf-based-device-cgroup/"
 
diff --git a/tools/testing/selftests/bpf/test_lirc_mode2_user.c b/tools/testing/selftests/bpf/test_lirc_mode2_user.c
index 2893e9f2f1e0..4694422aa76c 100644
--- a/tools/testing/selftests/bpf/test_lirc_mode2_user.c
+++ b/tools/testing/selftests/bpf/test_lirc_mode2_user.c
@@ -59,7 +59,7 @@ int main(int argc, char **argv)
 		return 2;
 	}
 
-	ret = bpf_prog_test_load("test_lirc_mode2_kern.o",
+	ret = bpf_prog_test_load("test_lirc_mode2_kern.bpf.o",
 				 BPF_PROG_TYPE_LIRC_MODE2, &obj, &progfd);
 	if (ret) {
 		printf("Failed to load bpf program\n");
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index cbebfaa7c1e8..c49f2056e14f 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -651,9 +651,9 @@ static void test_stackmap(unsigned int task, void *data)
 #include <arpa/inet.h>
 #include <sys/select.h>
 #include <linux/err.h>
-#define SOCKMAP_PARSE_PROG "./sockmap_parse_prog.o"
-#define SOCKMAP_VERDICT_PROG "./sockmap_verdict_prog.o"
-#define SOCKMAP_TCP_MSG_PROG "./sockmap_tcp_msg_prog.o"
+#define SOCKMAP_PARSE_PROG "./sockmap_parse_prog.bpf.o"
+#define SOCKMAP_VERDICT_PROG "./sockmap_verdict_prog.bpf.o"
+#define SOCKMAP_TCP_MSG_PROG "./sockmap_tcp_msg_prog.bpf.o"
 static void test_sockmap(unsigned int tasks, void *data)
 {
 	struct bpf_map *bpf_map_rx, *bpf_map_tx, *bpf_map_msg, *bpf_map_break;
@@ -1143,8 +1143,8 @@ out_sockmap:
 	exit(1);
 }
 
-#define MAPINMAP_PROG "./test_map_in_map.o"
-#define MAPINMAP_INVALID_PROG "./test_map_in_map_invalid.o"
+#define MAPINMAP_PROG "./test_map_in_map.bpf.o"
+#define MAPINMAP_INVALID_PROG "./test_map_in_map_invalid.bpf.o"
 static void test_map_in_map(void)
 {
 	struct bpf_object *obj;
diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py
index 6cd6ef9fc20b..7fc15e0d24a9 100755
--- a/tools/testing/selftests/bpf/test_offload.py
+++ b/tools/testing/selftests/bpf/test_offload.py
@@ -782,7 +782,7 @@ if out.find("/sys/kernel/debug type debugfs") == -1:
     cmd("mount -t debugfs none /sys/kernel/debug")
 
 # Check samples are compiled
-samples = ["sample_ret0.o", "sample_map_ret0.o"]
+samples = ["sample_ret0.bpf.o", "sample_map_ret0.bpf.o"]
 for s in samples:
     ret, out = cmd("ls %s/%s" % (bpf_test_dir, s), fail=False)
     skip(ret != 0, "sample %s/%s not found, please compile it" %
@@ -803,7 +803,7 @@ cmd("ip netns delete %s" % (ns))
 netns = []
 
 try:
-    obj = bpf_obj("sample_ret0.o")
+    obj = bpf_obj("sample_ret0.bpf.o")
     bytecode = bpf_bytecode("1,6 0 0 4294967295,")
 
     start_test("Test destruction of generic XDP...")
@@ -1023,7 +1023,7 @@ try:
 
     sim.wait_for_flush()
     start_test("Test non-offload XDP attaching to HW...")
-    bpftool_prog_load("sample_ret0.o", "/sys/fs/bpf/nooffload")
+    bpftool_prog_load("sample_ret0.bpf.o", "/sys/fs/bpf/nooffload")
     nooffload = bpf_pinned("/sys/fs/bpf/nooffload")
     ret, _, err = sim.set_xdp(nooffload, "offload",
                               fail=False, include_stderr=True)
@@ -1032,7 +1032,7 @@ try:
     rm("/sys/fs/bpf/nooffload")
 
     start_test("Test offload XDP attaching to drv...")
-    bpftool_prog_load("sample_ret0.o", "/sys/fs/bpf/offload",
+    bpftool_prog_load("sample_ret0.bpf.o", "/sys/fs/bpf/offload",
                       dev=sim['ifname'])
     offload = bpf_pinned("/sys/fs/bpf/offload")
     ret, _, err = sim.set_xdp(offload, "drv", fail=False, include_stderr=True)
@@ -1043,7 +1043,7 @@ try:
 
     start_test("Test XDP load failure...")
     sim.dfs["dev/bpf_bind_verifier_accept"] = 0
-    ret, _, err = bpftool_prog_load("sample_ret0.o", "/sys/fs/bpf/offload",
+    ret, _, err = bpftool_prog_load("sample_ret0.bpf.o", "/sys/fs/bpf/offload",
                                  dev=sim['ifname'], fail=False, include_stderr=True)
     fail(ret == 0, "verifier should fail on load")
     check_verifier_log(err, "[netdevsim] Hello from netdevsim!")
@@ -1169,7 +1169,7 @@ try:
 
     simdev = NetdevSimDev()
     sim, = simdev.nsims
-    map_obj = bpf_obj("sample_map_ret0.o")
+    map_obj = bpf_obj("sample_map_ret0.bpf.o")
     start_test("Test loading program with maps...")
     sim.set_xdp(map_obj, "offload", JSON=False) # map fixup msg breaks JSON
 
@@ -1307,10 +1307,10 @@ try:
     sims = (simA, simB1, simB2, simB3)
     simB = (simB1, simB2, simB3)
 
-    bpftool_prog_load("sample_map_ret0.o", "/sys/fs/bpf/nsimA",
+    bpftool_prog_load("sample_map_ret0.bpf.o", "/sys/fs/bpf/nsimA",
                       dev=simA['ifname'])
     progA = bpf_pinned("/sys/fs/bpf/nsimA")
-    bpftool_prog_load("sample_map_ret0.o", "/sys/fs/bpf/nsimB",
+    bpftool_prog_load("sample_map_ret0.bpf.o", "/sys/fs/bpf/nsimB",
                       dev=simB1['ifname'])
     progB = bpf_pinned("/sys/fs/bpf/nsimB")
 
@@ -1344,14 +1344,14 @@ try:
     mapA = bpftool("prog show %s" % (progA))[1]["map_ids"][0]
     mapB = bpftool("prog show %s" % (progB))[1]["map_ids"][0]
 
-    ret, _ = bpftool_prog_load("sample_map_ret0.o", "/sys/fs/bpf/nsimB_",
+    ret, _ = bpftool_prog_load("sample_map_ret0.bpf.o", "/sys/fs/bpf/nsimB_",
                                dev=simB3['ifname'],
                                maps=["idx 0 id %d" % (mapB)],
                                fail=False)
     fail(ret != 0, "couldn't reuse a map on the same ASIC")
     rm("/sys/fs/bpf/nsimB_")
 
-    ret, _, err = bpftool_prog_load("sample_map_ret0.o", "/sys/fs/bpf/nsimA_",
+    ret, _, err = bpftool_prog_load("sample_map_ret0.bpf.o", "/sys/fs/bpf/nsimA_",
                                     dev=simA['ifname'],
                                     maps=["idx 0 id %d" % (mapB)],
                                     fail=False, include_stderr=True)
@@ -1359,7 +1359,7 @@ try:
     fail(err.count("offload device mismatch between prog and map") == 0,
          "error message missing for cross-ASIC map")
 
-    ret, _, err = bpftool_prog_load("sample_map_ret0.o", "/sys/fs/bpf/nsimB_",
+    ret, _, err = bpftool_prog_load("sample_map_ret0.bpf.o", "/sys/fs/bpf/nsimB_",
                                     dev=simB1['ifname'],
                                     maps=["idx 0 id %d" % (mapA)],
                                     fail=False, include_stderr=True)
diff --git a/tools/testing/selftests/bpf/test_skb_cgroup_id.sh b/tools/testing/selftests/bpf/test_skb_cgroup_id.sh
index a9bc6f82abc1..515c2eafc97f 100755
--- a/tools/testing/selftests/bpf/test_skb_cgroup_id.sh
+++ b/tools/testing/selftests/bpf/test_skb_cgroup_id.sh
@@ -54,7 +54,7 @@ DIR=$(dirname $0)
 TEST_IF="test_cgid_1"
 TEST_IF_PEER="test_cgid_2"
 MAX_PING_TRIES=5
-BPF_PROG_OBJ="${DIR}/test_skb_cgroup_id_kern.o"
+BPF_PROG_OBJ="${DIR}/test_skb_cgroup_id_kern.bpf.o"
 BPF_PROG_SECTION="cgroup_id_logger"
 BPF_PROG_ID=0
 PROG="${DIR}/test_skb_cgroup_id_user"
diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c
index 458564fcfc82..2c89674fc62c 100644
--- a/tools/testing/selftests/bpf/test_sock_addr.c
+++ b/tools/testing/selftests/bpf/test_sock_addr.c
@@ -26,14 +26,14 @@
 #endif
 
 #define CG_PATH	"/foo"
-#define CONNECT4_PROG_PATH	"./connect4_prog.o"
-#define CONNECT6_PROG_PATH	"./connect6_prog.o"
-#define SENDMSG4_PROG_PATH	"./sendmsg4_prog.o"
-#define SENDMSG6_PROG_PATH	"./sendmsg6_prog.o"
-#define RECVMSG4_PROG_PATH	"./recvmsg4_prog.o"
-#define RECVMSG6_PROG_PATH	"./recvmsg6_prog.o"
-#define BIND4_PROG_PATH		"./bind4_prog.o"
-#define BIND6_PROG_PATH		"./bind6_prog.o"
+#define CONNECT4_PROG_PATH	"./connect4_prog.bpf.o"
+#define CONNECT6_PROG_PATH	"./connect6_prog.bpf.o"
+#define SENDMSG4_PROG_PATH	"./sendmsg4_prog.bpf.o"
+#define SENDMSG6_PROG_PATH	"./sendmsg6_prog.bpf.o"
+#define RECVMSG4_PROG_PATH	"./recvmsg4_prog.bpf.o"
+#define RECVMSG6_PROG_PATH	"./recvmsg6_prog.bpf.o"
+#define BIND4_PROG_PATH		"./bind4_prog.bpf.o"
+#define BIND6_PROG_PATH		"./bind6_prog.bpf.o"
 
 #define SERV4_IP		"192.168.1.254"
 #define SERV4_REWRITE_IP	"127.0.0.1"
diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c
index 0fbaccdc8861..dcb038e342d8 100644
--- a/tools/testing/selftests/bpf/test_sockmap.c
+++ b/tools/testing/selftests/bpf/test_sockmap.c
@@ -52,8 +52,8 @@ static void running_handler(int a);
 #define S1_PORT 10000
 #define S2_PORT 10001
 
-#define BPF_SOCKMAP_FILENAME  "test_sockmap_kern.o"
-#define BPF_SOCKHASH_FILENAME "test_sockhash_kern.o"
+#define BPF_SOCKMAP_FILENAME  "test_sockmap_kern.bpf.o"
+#define BPF_SOCKHASH_FILENAME "test_sockhash_kern.bpf.o"
 #define CG_PATH "/sockmap"
 
 /* global sockets */
diff --git a/tools/testing/selftests/bpf/test_sysctl.c b/tools/testing/selftests/bpf/test_sysctl.c
index 57620e7c9048..bcdbd27f22f0 100644
--- a/tools/testing/selftests/bpf/test_sysctl.c
+++ b/tools/testing/selftests/bpf/test_sysctl.c
@@ -1372,7 +1372,7 @@ static struct sysctl_test tests[] = {
 	},
 	{
 		"C prog: deny all writes",
-		.prog_file = "./test_sysctl_prog.o",
+		.prog_file = "./test_sysctl_prog.bpf.o",
 		.attach_type = BPF_CGROUP_SYSCTL,
 		.sysctl = "net/ipv4/tcp_mem",
 		.open_flags = O_WRONLY,
@@ -1381,7 +1381,7 @@ static struct sysctl_test tests[] = {
 	},
 	{
 		"C prog: deny access by name",
-		.prog_file = "./test_sysctl_prog.o",
+		.prog_file = "./test_sysctl_prog.bpf.o",
 		.attach_type = BPF_CGROUP_SYSCTL,
 		.sysctl = "net/ipv4/route/mtu_expires",
 		.open_flags = O_RDONLY,
@@ -1389,7 +1389,7 @@ static struct sysctl_test tests[] = {
 	},
 	{
 		"C prog: read tcp_mem",
-		.prog_file = "./test_sysctl_prog.o",
+		.prog_file = "./test_sysctl_prog.bpf.o",
 		.attach_type = BPF_CGROUP_SYSCTL,
 		.sysctl = "net/ipv4/tcp_mem",
 		.open_flags = O_RDONLY,
diff --git a/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh b/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh
index 102e6588e2fe..b42c24282c25 100755
--- a/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh
+++ b/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh
@@ -76,7 +76,7 @@ main()
 DIR=$(dirname $0)
 TEST_IF=lo
 MAX_PING_TRIES=5
-BPF_PROG_OBJ="${DIR}/test_tcp_check_syncookie_kern.o"
+BPF_PROG_OBJ="${DIR}/test_tcp_check_syncookie_kern.bpf.o"
 CLSACT_SECTION="tc"
 XDP_SECTION="xdp"
 BPF_PROG_ID=0
diff --git a/tools/testing/selftests/bpf/test_tcpnotify_user.c b/tools/testing/selftests/bpf/test_tcpnotify_user.c
index 8284db8b0f13..595194453ff8 100644
--- a/tools/testing/selftests/bpf/test_tcpnotify_user.c
+++ b/tools/testing/selftests/bpf/test_tcpnotify_user.c
@@ -69,7 +69,7 @@ int verify_result(const struct tcpnotify_globals *result)
 
 int main(int argc, char **argv)
 {
-	const char *file = "test_tcpnotify_kern.o";
+	const char *file = "test_tcpnotify_kern.bpf.o";
 	struct bpf_map *perf_map, *global_map;
 	struct tcpnotify_globals g = {0};
 	struct perf_buffer *pb = NULL;
diff --git a/tools/testing/selftests/bpf/test_xdp_redirect.sh b/tools/testing/selftests/bpf/test_xdp_redirect.sh
index 1d79f31480ad..0746a4fde9d3 100755
--- a/tools/testing/selftests/bpf/test_xdp_redirect.sh
+++ b/tools/testing/selftests/bpf/test_xdp_redirect.sh
@@ -54,10 +54,10 @@ test_xdp_redirect()
 		return 0
 	fi
 
-	ip -n ${NS1} link set veth11 $xdpmode obj xdp_dummy.o sec xdp &> /dev/null
-	ip -n ${NS2} link set veth22 $xdpmode obj xdp_dummy.o sec xdp &> /dev/null
-	ip link set dev veth1 $xdpmode obj test_xdp_redirect.o sec redirect_to_222 &> /dev/null
-	ip link set dev veth2 $xdpmode obj test_xdp_redirect.o sec redirect_to_111 &> /dev/null
+	ip -n ${NS1} link set veth11 $xdpmode obj xdp_dummy.bpf.o sec xdp &> /dev/null
+	ip -n ${NS2} link set veth22 $xdpmode obj xdp_dummy.bpf.o sec xdp &> /dev/null
+	ip link set dev veth1 $xdpmode obj test_xdp_redirect.bpf.o sec redirect_to_222 &> /dev/null
+	ip link set dev veth2 $xdpmode obj test_xdp_redirect.bpf.o sec redirect_to_111 &> /dev/null
 
 	if ip netns exec ${NS1} ping -c 1 10.1.1.22 &> /dev/null &&
 	   ip netns exec ${NS2} ping -c 1 10.1.1.11 &> /dev/null; then
diff --git a/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh b/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh
index cc57cb87e65f..4c3c3fdd2d73 100755
--- a/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh
+++ b/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh
@@ -94,7 +94,7 @@ setup_ns()
 		# Add a neigh entry for IPv4 ping test
 		ip -n ${NS[$i]} neigh add 192.0.2.253 lladdr 00:00:00:00:00:01 dev veth0
 		ip -n ${NS[$i]} link set veth0 $mode obj \
-			xdp_dummy.o sec xdp &> /dev/null || \
+			xdp_dummy.bpf.o sec xdp &> /dev/null || \
 			{ test_fail "Unable to load dummy xdp" && exit 1; }
 		IFACES="$IFACES veth$i"
 		veth_mac[$i]=$(ip -n ${NS[0]} link show veth$i | awk '/link\/ether/ {print $2}')
diff --git a/tools/testing/selftests/bpf/test_xdp_veth.sh b/tools/testing/selftests/bpf/test_xdp_veth.sh
index 49936c4c8567..5211ca9a0239 100755
--- a/tools/testing/selftests/bpf/test_xdp_veth.sh
+++ b/tools/testing/selftests/bpf/test_xdp_veth.sh
@@ -101,7 +101,7 @@ ip -n ${NS3} link set dev veth33 up
 
 mkdir $BPF_DIR
 bpftool prog loadall \
-	xdp_redirect_map.o $BPF_DIR/progs type xdp \
+	xdp_redirect_map.bpf.o $BPF_DIR/progs type xdp \
 	pinmaps $BPF_DIR/maps
 bpftool map update pinned $BPF_DIR/maps/tx_port key 0 0 0 0 value 122 0 0 0
 bpftool map update pinned $BPF_DIR/maps/tx_port key 1 0 0 0 value 133 0 0 0
@@ -110,9 +110,9 @@ ip link set dev veth1 xdp pinned $BPF_DIR/progs/xdp_redirect_map_0
 ip link set dev veth2 xdp pinned $BPF_DIR/progs/xdp_redirect_map_1
 ip link set dev veth3 xdp pinned $BPF_DIR/progs/xdp_redirect_map_2
 
-ip -n ${NS1} link set dev veth11 xdp obj xdp_dummy.o sec xdp
-ip -n ${NS2} link set dev veth22 xdp obj xdp_tx.o sec xdp
-ip -n ${NS3} link set dev veth33 xdp obj xdp_dummy.o sec xdp
+ip -n ${NS1} link set dev veth11 xdp obj xdp_dummy.bpf.o sec xdp
+ip -n ${NS2} link set dev veth22 xdp obj xdp_tx.bpf.o sec xdp
+ip -n ${NS3} link set dev veth33 xdp obj xdp_dummy.bpf.o sec xdp
 
 trap cleanup EXIT
 
diff --git a/tools/testing/selftests/bpf/xdp_redirect_multi.c b/tools/testing/selftests/bpf/xdp_redirect_multi.c
index c03b3a75991f..c1fc44c87c30 100644
--- a/tools/testing/selftests/bpf/xdp_redirect_multi.c
+++ b/tools/testing/selftests/bpf/xdp_redirect_multi.c
@@ -142,7 +142,7 @@ int main(int argc, char **argv)
 	}
 	printf("\n");
 
-	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+	snprintf(filename, sizeof(filename), "%s_kern.bpf.o", argv[0]);
 	obj = bpf_object__open_file(filename, NULL);
 	err = libbpf_get_error(obj);
 	if (err)
diff --git a/tools/testing/selftests/bpf/xdp_synproxy.c b/tools/testing/selftests/bpf/xdp_synproxy.c
index d874ddfb39c4..ff35320d2be9 100644
--- a/tools/testing/selftests/bpf/xdp_synproxy.c
+++ b/tools/testing/selftests/bpf/xdp_synproxy.c
@@ -193,7 +193,7 @@ static int syncookie_attach(const char *argv0, unsigned int ifindex, bool tc)
 	int prog_fd;
 	int err;
 
-	snprintf(xdp_filename, sizeof(xdp_filename), "%s_kern.o", argv0);
+	snprintf(xdp_filename, sizeof(xdp_filename), "%s_kern.bpf.o", argv0);
 	obj = bpf_object__open_file(xdp_filename, NULL);
 	err = libbpf_get_error(obj);
 	if (err < 0) {
diff --git a/tools/testing/selftests/bpf/xdping.c b/tools/testing/selftests/bpf/xdping.c
index 5b6f977870f8..1503a1d2faa0 100644
--- a/tools/testing/selftests/bpf/xdping.c
+++ b/tools/testing/selftests/bpf/xdping.c
@@ -168,7 +168,7 @@ int main(int argc, char **argv)
 	/* Use libbpf 1.0 API mode */
 	libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
 
-	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+	snprintf(filename, sizeof(filename), "%s_kern.bpf.o", argv[0]);
 
 	if (bpf_prog_test_load(filename, BPF_PROG_TYPE_XDP, &obj, &prog_fd)) {
 		fprintf(stderr, "load of %s failed\n", filename);
-- 
cgit v1.2.3


From af515a5587b8f45f19e11657746e0c89411b0380 Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Thu, 1 Sep 2022 13:26:45 -0700
Subject: selftests/xsk: Avoid use-after-free on ctx

The put lowers the reference count to 0 and frees ctx, reading it
afterwards is invalid. Move the put after the uses and determine the
last use by the reference count being 1.

Fixes: 39e940d4abfa ("selftests/xsk: Destroy BPF resources only when ctx refcount drops to 0")
Signed-off-by: Ian Rogers <irogers@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Magnus Karlsson <magnus.karlsson@intel.com>
Link: https://lore.kernel.org/bpf/20220901202645.1463552-1-irogers@google.com
---
 tools/testing/selftests/bpf/xsk.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/xsk.c b/tools/testing/selftests/bpf/xsk.c
index f2721a4ae7c5..0b3ff49c740d 100644
--- a/tools/testing/selftests/bpf/xsk.c
+++ b/tools/testing/selftests/bpf/xsk.c
@@ -1237,15 +1237,15 @@ void xsk_socket__delete(struct xsk_socket *xsk)
 	ctx = xsk->ctx;
 	umem = ctx->umem;
 
-	xsk_put_ctx(ctx, true);
-
-	if (!ctx->refcount) {
+	if (ctx->refcount == 1) {
 		xsk_delete_bpf_maps(xsk);
 		close(ctx->prog_fd);
 		if (ctx->has_bpf_link)
 			close(ctx->link_fd);
 	}
 
+	xsk_put_ctx(ctx, true);
+
 	err = xsk_get_mmap_offsets(xsk->fd, &off);
 	if (!err) {
 		if (xsk->rx) {
-- 
cgit v1.2.3


From f649f992deeeab020257b886e054cc407154cbfc Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <martin.lau@kernel.org>
Date: Thu, 1 Sep 2022 17:29:37 -0700
Subject: selftest/bpf: Add test for bpf_getsockopt()

This patch removes the __bpf_getsockopt() which directly
reads the sk by using PTR_TO_BTF_ID.  Instead, the test now directly
uses the kernel bpf helper bpf_getsockopt() which supports all
the required optname now.

TCP_SAVE[D]_SYN and TCP_MAXSEG are not tested in a loop for all
the hooks and sock_ops's cb.  TCP_SAVE[D]_SYN only works
in passive connection.  TCP_MAXSEG only works when
it is setsockopt before the connection is established and
the getsockopt return value can only be tested after
the connection is established.

Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
Link: https://lore.kernel.org/r/20220902002937.2896904-1-kafai@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../testing/selftests/bpf/progs/bpf_tracing_net.h  |   1 +
 tools/testing/selftests/bpf/progs/setget_sockopt.c | 148 ++++++---------------
 2 files changed, 43 insertions(+), 106 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
index 5ebc6dabef84..adb087aecc9e 100644
--- a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
+++ b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
@@ -38,6 +38,7 @@
 #define TCP_USER_TIMEOUT	18
 #define TCP_NOTSENT_LOWAT	25
 #define TCP_SAVE_SYN		27
+#define TCP_SAVED_SYN		28
 #define TCP_CA_NAME_MAX		16
 #define TCP_NAGLE_OFF		1
 
diff --git a/tools/testing/selftests/bpf/progs/setget_sockopt.c b/tools/testing/selftests/bpf/progs/setget_sockopt.c
index 79debf3c2f44..9523333b8905 100644
--- a/tools/testing/selftests/bpf/progs/setget_sockopt.c
+++ b/tools/testing/selftests/bpf/progs/setget_sockopt.c
@@ -52,7 +52,6 @@ static const struct sockopt_test sol_socket_tests[] = {
 
 static const struct sockopt_test sol_tcp_tests[] = {
 	{ .opt = TCP_NODELAY, .flip = 1, },
-	{ .opt = TCP_MAXSEG, .new = 1314, .expected = 1314, },
 	{ .opt = TCP_KEEPIDLE, .new = 123, .expected = 123, .restore = 321, },
 	{ .opt = TCP_KEEPINTVL, .new = 123, .expected = 123, .restore = 321, },
 	{ .opt = TCP_KEEPCNT, .new = 123, .expected = 123, .restore = 124, },
@@ -62,7 +61,6 @@ static const struct sockopt_test sol_tcp_tests[] = {
 	{ .opt = TCP_THIN_LINEAR_TIMEOUTS, .flip = 1, },
 	{ .opt = TCP_USER_TIMEOUT, .new = 123400, .expected = 123400, },
 	{ .opt = TCP_NOTSENT_LOWAT, .new = 1314, .expected = 1314, },
-	{ .opt = TCP_SAVE_SYN, .new = 1, .expected = 1, },
 	{ .opt = 0, },
 };
 
@@ -82,102 +80,6 @@ struct loop_ctx {
 	struct sock *sk;
 };
 
-static int __bpf_getsockopt(void *ctx, struct sock *sk,
-			    int level, int opt, int *optval,
-			    int optlen)
-{
-	if (level == SOL_SOCKET) {
-		switch (opt) {
-		case SO_REUSEADDR:
-			*optval = !!BPF_CORE_READ_BITFIELD(sk, sk_reuse);
-			break;
-		case SO_KEEPALIVE:
-			*optval = !!(sk->sk_flags & (1UL << 3));
-			break;
-		case SO_RCVLOWAT:
-			*optval = sk->sk_rcvlowat;
-			break;
-		case SO_MAX_PACING_RATE:
-			*optval = sk->sk_max_pacing_rate;
-			break;
-		default:
-			return bpf_getsockopt(ctx, level, opt, optval, optlen);
-		}
-		return 0;
-	}
-
-	if (level == IPPROTO_TCP) {
-		struct tcp_sock *tp = bpf_skc_to_tcp_sock(sk);
-
-		if (!tp)
-			return -1;
-
-		switch (opt) {
-		case TCP_NODELAY:
-			*optval = !!(BPF_CORE_READ_BITFIELD(tp, nonagle) & TCP_NAGLE_OFF);
-			break;
-		case TCP_MAXSEG:
-			*optval = tp->rx_opt.user_mss;
-			break;
-		case TCP_KEEPIDLE:
-			*optval = tp->keepalive_time / CONFIG_HZ;
-			break;
-		case TCP_SYNCNT:
-			*optval = tp->inet_conn.icsk_syn_retries;
-			break;
-		case TCP_KEEPINTVL:
-			*optval = tp->keepalive_intvl / CONFIG_HZ;
-			break;
-		case TCP_KEEPCNT:
-			*optval = tp->keepalive_probes;
-			break;
-		case TCP_WINDOW_CLAMP:
-			*optval = tp->window_clamp;
-			break;
-		case TCP_THIN_LINEAR_TIMEOUTS:
-			*optval = !!BPF_CORE_READ_BITFIELD(tp, thin_lto);
-			break;
-		case TCP_USER_TIMEOUT:
-			*optval = tp->inet_conn.icsk_user_timeout;
-			break;
-		case TCP_NOTSENT_LOWAT:
-			*optval = tp->notsent_lowat;
-			break;
-		case TCP_SAVE_SYN:
-			*optval = BPF_CORE_READ_BITFIELD(tp, save_syn);
-			break;
-		default:
-			return bpf_getsockopt(ctx, level, opt, optval, optlen);
-		}
-		return 0;
-	}
-
-	if (level == IPPROTO_IPV6) {
-		switch (opt) {
-		case IPV6_AUTOFLOWLABEL: {
-			__u16 proto = sk->sk_protocol;
-			struct inet_sock *inet_sk;
-
-			if (proto == IPPROTO_TCP)
-				inet_sk = (struct inet_sock *)bpf_skc_to_tcp_sock(sk);
-			else
-				inet_sk = (struct inet_sock *)bpf_skc_to_udp6_sock(sk);
-
-			if (!inet_sk)
-				return -1;
-
-			*optval = !!inet_sk->pinet6->autoflowlabel;
-			break;
-		}
-		default:
-			return bpf_getsockopt(ctx, level, opt, optval, optlen);
-		}
-		return 0;
-	}
-
-	return bpf_getsockopt(ctx, level, opt, optval, optlen);
-}
-
 static int bpf_test_sockopt_flip(void *ctx, struct sock *sk,
 				 const struct sockopt_test *t,
 				 int level)
@@ -186,7 +88,7 @@ static int bpf_test_sockopt_flip(void *ctx, struct sock *sk,
 
 	opt = t->opt;
 
-	if (__bpf_getsockopt(ctx, sk, level, opt, &old, sizeof(old)))
+	if (bpf_getsockopt(ctx, level, opt, &old, sizeof(old)))
 		return 1;
 	/* kernel initialized txrehash to 255 */
 	if (level == SOL_SOCKET && opt == SO_TXREHASH && old != 0 && old != 1)
@@ -195,7 +97,7 @@ static int bpf_test_sockopt_flip(void *ctx, struct sock *sk,
 	new = !old;
 	if (bpf_setsockopt(ctx, level, opt, &new, sizeof(new)))
 		return 1;
-	if (__bpf_getsockopt(ctx, sk, level, opt, &tmp, sizeof(tmp)) ||
+	if (bpf_getsockopt(ctx, level, opt, &tmp, sizeof(tmp)) ||
 	    tmp != new)
 		return 1;
 
@@ -218,13 +120,13 @@ static int bpf_test_sockopt_int(void *ctx, struct sock *sk,
 	else
 		expected = t->expected;
 
-	if (__bpf_getsockopt(ctx, sk, level, opt, &old, sizeof(old)) ||
+	if (bpf_getsockopt(ctx, level, opt, &old, sizeof(old)) ||
 	    old == new)
 		return 1;
 
 	if (bpf_setsockopt(ctx, level, opt, &new, sizeof(new)))
 		return 1;
-	if (__bpf_getsockopt(ctx, sk, level, opt, &tmp, sizeof(tmp)) ||
+	if (bpf_getsockopt(ctx, level, opt, &tmp, sizeof(tmp)) ||
 	    tmp != expected)
 		return 1;
 
@@ -410,6 +312,34 @@ static int binddev_test(void *ctx)
 	return 0;
 }
 
+static int test_tcp_maxseg(void *ctx, struct sock *sk)
+{
+	int val = 1314, tmp;
+
+	if (sk->sk_state != TCP_ESTABLISHED)
+		return bpf_setsockopt(ctx, IPPROTO_TCP, TCP_MAXSEG,
+				      &val, sizeof(val));
+
+	if (bpf_getsockopt(ctx, IPPROTO_TCP, TCP_MAXSEG, &tmp, sizeof(tmp)) ||
+	    tmp > val)
+		return -1;
+
+	return 0;
+}
+
+static int test_tcp_saved_syn(void *ctx, struct sock *sk)
+{
+	__u8 saved_syn[20];
+	int one = 1;
+
+	if (sk->sk_state == TCP_LISTEN)
+		return bpf_setsockopt(ctx, IPPROTO_TCP, TCP_SAVE_SYN,
+				      &one, sizeof(one));
+
+	return bpf_getsockopt(ctx, IPPROTO_TCP, TCP_SAVED_SYN,
+			      saved_syn, sizeof(saved_syn));
+}
+
 SEC("lsm_cgroup/socket_post_create")
 int BPF_PROG(socket_post_create, struct socket *sock, int family,
 	     int type, int protocol, int kern)
@@ -440,16 +370,22 @@ int skops_sockopt(struct bpf_sock_ops *skops)
 
 	switch (skops->op) {
 	case BPF_SOCK_OPS_TCP_LISTEN_CB:
-		nr_listen += !bpf_test_sockopt(skops, sk);
+		nr_listen += !(bpf_test_sockopt(skops, sk) ||
+			       test_tcp_maxseg(skops, sk) ||
+			       test_tcp_saved_syn(skops, sk));
 		break;
 	case BPF_SOCK_OPS_TCP_CONNECT_CB:
-		nr_connect += !bpf_test_sockopt(skops, sk);
+		nr_connect += !(bpf_test_sockopt(skops, sk) ||
+				test_tcp_maxseg(skops, sk));
 		break;
 	case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
-		nr_active += !bpf_test_sockopt(skops, sk);
+		nr_active += !(bpf_test_sockopt(skops, sk) ||
+			       test_tcp_maxseg(skops, sk));
 		break;
 	case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
-		nr_passive += !bpf_test_sockopt(skops, sk);
+		nr_passive += !(bpf_test_sockopt(skops, sk) ||
+				test_tcp_maxseg(skops, sk) ||
+				test_tcp_saved_syn(skops, sk));
 		break;
 	}
 
-- 
cgit v1.2.3


From 37521bffdd2d1efcb1dbdfd3ee89584c8943421c Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Fri, 2 Sep 2022 14:10:45 -0700
Subject: selftests/bpf: Improve test coverage of test_maps

Make test_maps more stressful with more parallelism in
update/delete/lookup/walk including different value sizes.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220902211058.60789-4-alexei.starovoitov@gmail.com
---
 tools/testing/selftests/bpf/test_maps.c | 38 +++++++++++++++++++++------------
 1 file changed, 24 insertions(+), 14 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index c49f2056e14f..00b9cc305e58 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -264,10 +264,11 @@ static void test_hashmap_percpu(unsigned int task, void *data)
 	close(fd);
 }
 
+#define VALUE_SIZE 3
 static int helper_fill_hashmap(int max_entries)
 {
 	int i, fd, ret;
-	long long key, value;
+	long long key, value[VALUE_SIZE] = {};
 
 	fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(key), sizeof(value),
 			    max_entries, &map_opts);
@@ -276,8 +277,8 @@ static int helper_fill_hashmap(int max_entries)
 	      "err: %s, flags: 0x%x\n", strerror(errno), map_opts.map_flags);
 
 	for (i = 0; i < max_entries; i++) {
-		key = i; value = key;
-		ret = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST);
+		key = i; value[0] = key;
+		ret = bpf_map_update_elem(fd, &key, value, BPF_NOEXIST);
 		CHECK(ret != 0,
 		      "can't update hashmap",
 		      "err: %s\n", strerror(ret));
@@ -288,8 +289,8 @@ static int helper_fill_hashmap(int max_entries)
 
 static void test_hashmap_walk(unsigned int task, void *data)
 {
-	int fd, i, max_entries = 1000;
-	long long key, value, next_key;
+	int fd, i, max_entries = 10000;
+	long long key, value[VALUE_SIZE], next_key;
 	bool next_key_valid = true;
 
 	fd = helper_fill_hashmap(max_entries);
@@ -297,7 +298,7 @@ static void test_hashmap_walk(unsigned int task, void *data)
 	for (i = 0; bpf_map_get_next_key(fd, !i ? NULL : &key,
 					 &next_key) == 0; i++) {
 		key = next_key;
-		assert(bpf_map_lookup_elem(fd, &key, &value) == 0);
+		assert(bpf_map_lookup_elem(fd, &key, value) == 0);
 	}
 
 	assert(i == max_entries);
@@ -305,9 +306,9 @@ static void test_hashmap_walk(unsigned int task, void *data)
 	assert(bpf_map_get_next_key(fd, NULL, &key) == 0);
 	for (i = 0; next_key_valid; i++) {
 		next_key_valid = bpf_map_get_next_key(fd, &key, &next_key) == 0;
-		assert(bpf_map_lookup_elem(fd, &key, &value) == 0);
-		value++;
-		assert(bpf_map_update_elem(fd, &key, &value, BPF_EXIST) == 0);
+		assert(bpf_map_lookup_elem(fd, &key, value) == 0);
+		value[0]++;
+		assert(bpf_map_update_elem(fd, &key, value, BPF_EXIST) == 0);
 		key = next_key;
 	}
 
@@ -316,8 +317,8 @@ static void test_hashmap_walk(unsigned int task, void *data)
 	for (i = 0; bpf_map_get_next_key(fd, !i ? NULL : &key,
 					 &next_key) == 0; i++) {
 		key = next_key;
-		assert(bpf_map_lookup_elem(fd, &key, &value) == 0);
-		assert(value - 1 == key);
+		assert(bpf_map_lookup_elem(fd, &key, value) == 0);
+		assert(value[0] - 1 == key);
 	}
 
 	assert(i == max_entries);
@@ -1371,16 +1372,16 @@ static void __run_parallel(unsigned int tasks,
 
 static void test_map_stress(void)
 {
+	run_parallel(100, test_hashmap_walk, NULL);
 	run_parallel(100, test_hashmap, NULL);
 	run_parallel(100, test_hashmap_percpu, NULL);
 	run_parallel(100, test_hashmap_sizes, NULL);
-	run_parallel(100, test_hashmap_walk, NULL);
 
 	run_parallel(100, test_arraymap, NULL);
 	run_parallel(100, test_arraymap_percpu, NULL);
 }
 
-#define TASKS 1024
+#define TASKS 100
 
 #define DO_UPDATE 1
 #define DO_DELETE 0
@@ -1432,6 +1433,8 @@ static void test_update_delete(unsigned int fn, void *data)
 	int fd = ((int *)data)[0];
 	int i, key, value, err;
 
+	if (fn & 1)
+		test_hashmap_walk(fn, NULL);
 	for (i = fn; i < MAP_SIZE; i += TASKS) {
 		key = value = i;
 
@@ -1455,7 +1458,7 @@ static void test_update_delete(unsigned int fn, void *data)
 
 static void test_map_parallel(void)
 {
-	int i, fd, key = 0, value = 0;
+	int i, fd, key = 0, value = 0, j = 0;
 	int data[2];
 
 	fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(key), sizeof(value),
@@ -1466,6 +1469,7 @@ static void test_map_parallel(void)
 		exit(1);
 	}
 
+again:
 	/* Use the same fd in children to add elements to this map:
 	 * child_0 adds key=0, key=1024, key=2048, ...
 	 * child_1 adds key=1, key=1025, key=2049, ...
@@ -1502,6 +1506,12 @@ static void test_map_parallel(void)
 	key = -1;
 	assert(bpf_map_get_next_key(fd, NULL, &key) < 0 && errno == ENOENT);
 	assert(bpf_map_get_next_key(fd, &key, &key) < 0 && errno == ENOENT);
+
+	key = 0;
+	bpf_map_delete_elem(fd, &key);
+	if (j++ < 5)
+		goto again;
+	close(fd);
 }
 
 static void test_map_rdonly(void)
-- 
cgit v1.2.3


From 0fd7c5d43339b783ee3301a05f925d1e52ac87c9 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Fri, 2 Sep 2022 14:10:49 -0700
Subject: bpf: Optimize call_rcu in non-preallocated hash map.

Doing call_rcu() million times a second becomes a bottle neck.
Convert non-preallocated hash map from call_rcu to SLAB_TYPESAFE_BY_RCU.
The rcu critical section is no longer observed for one htab element
which makes non-preallocated hash map behave just like preallocated hash map.
The map elements are released back to kernel memory after observing
rcu critical section.
This improves 'map_perf_test 4' performance from 100k events per second
to 250k events per second.

bpf_mem_alloc + percpu_counter + typesafe_by_rcu provide 10x performance
boost to non-preallocated hash map and make it within few % of preallocated map
while consuming fraction of memory.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220902211058.60789-8-alexei.starovoitov@gmail.com
---
 kernel/bpf/hashtab.c                      |  8 ++++++--
 kernel/bpf/memalloc.c                     |  2 +-
 tools/testing/selftests/bpf/progs/timer.c | 11 -----------
 3 files changed, 7 insertions(+), 14 deletions(-)

(limited to 'tools')

diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index 36aa16dc43ad..0d888a90a805 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -953,8 +953,12 @@ static void free_htab_elem(struct bpf_htab *htab, struct htab_elem *l)
 		__pcpu_freelist_push(&htab->freelist, &l->fnode);
 	} else {
 		dec_elem_count(htab);
-		l->htab = htab;
-		call_rcu(&l->rcu, htab_elem_free_rcu);
+		if (htab->map.map_type == BPF_MAP_TYPE_PERCPU_HASH) {
+			l->htab = htab;
+			call_rcu(&l->rcu, htab_elem_free_rcu);
+		} else {
+			htab_elem_free(htab, l);
+		}
 	}
 }
 
diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c
index 1c46763d855e..da0721f8c28f 100644
--- a/kernel/bpf/memalloc.c
+++ b/kernel/bpf/memalloc.c
@@ -281,7 +281,7 @@ int bpf_mem_alloc_init(struct bpf_mem_alloc *ma, int size)
 			return -ENOMEM;
 		size += LLIST_NODE_SZ; /* room for llist_node */
 		snprintf(buf, sizeof(buf), "bpf-%u", size);
-		kmem_cache = kmem_cache_create(buf, size, 8, 0, NULL);
+		kmem_cache = kmem_cache_create(buf, size, 8, SLAB_TYPESAFE_BY_RCU, NULL);
 		if (!kmem_cache) {
 			free_percpu(pc);
 			return -ENOMEM;
diff --git a/tools/testing/selftests/bpf/progs/timer.c b/tools/testing/selftests/bpf/progs/timer.c
index 5f5309791649..0053c5402173 100644
--- a/tools/testing/selftests/bpf/progs/timer.c
+++ b/tools/testing/selftests/bpf/progs/timer.c
@@ -208,17 +208,6 @@ static int timer_cb2(void *map, int *key, struct hmap_elem *val)
 		 */
 		bpf_map_delete_elem(map, key);
 
-		/* in non-preallocated hashmap both 'key' and 'val' are RCU
-		 * protected and still valid though this element was deleted
-		 * from the map. Arm this timer for ~35 seconds. When callback
-		 * finishes the call_rcu will invoke:
-		 * htab_elem_free_rcu
-		 *   check_and_free_timer
-		 *     bpf_timer_cancel_and_free
-		 * to cancel this 35 second sleep and delete the timer for real.
-		 */
-		if (bpf_timer_start(&val->timer, 1ull << 35, 0) != 0)
-			err |= 256;
 		ok |= 4;
 	}
 	return 0;
-- 
cgit v1.2.3


From 27ed9353aec9de4277b3389c9f2b04beb6ab7622 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Wed, 31 Aug 2022 08:26:57 -0700
Subject: bpf: Update descriptions for helpers bpf_get_func_arg[_cnt]()

Now instead of the number of arguments, the number of registers
holding argument values are stored in trampoline. Update
the description of bpf_get_func_arg[_cnt]() helpers. Previous
programs without struct arguments should continue to work
as usual.

Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220831152657.2078805-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/uapi/linux/bpf.h       | 9 +++++----
 tools/include/uapi/linux/bpf.h | 9 +++++----
 2 files changed, 10 insertions(+), 8 deletions(-)

(limited to 'tools')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 793103b10eab..3df78c56c1bf 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5079,12 +5079,12 @@ union bpf_attr {
  *
  * long bpf_get_func_arg(void *ctx, u32 n, u64 *value)
  *	Description
- *		Get **n**-th argument (zero based) of the traced function (for tracing programs)
+ *		Get **n**-th argument register (zero based) of the traced function (for tracing programs)
  *		returned in **value**.
  *
  *	Return
  *		0 on success.
- *		**-EINVAL** if n >= arguments count of traced function.
+ *		**-EINVAL** if n >= argument register count of traced function.
  *
  * long bpf_get_func_ret(void *ctx, u64 *value)
  *	Description
@@ -5097,10 +5097,11 @@ union bpf_attr {
  *
  * long bpf_get_func_arg_cnt(void *ctx)
  *	Description
- *		Get number of arguments of the traced function (for tracing programs).
+ *		Get number of registers of the traced function (for tracing programs) where
+ *		function arguments are stored in these registers.
  *
  *	Return
- *		The number of arguments of the traced function.
+ *		The number of argument registers of the traced function.
  *
  * int bpf_get_retval(void)
  *	Description
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 793103b10eab..3df78c56c1bf 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -5079,12 +5079,12 @@ union bpf_attr {
  *
  * long bpf_get_func_arg(void *ctx, u32 n, u64 *value)
  *	Description
- *		Get **n**-th argument (zero based) of the traced function (for tracing programs)
+ *		Get **n**-th argument register (zero based) of the traced function (for tracing programs)
  *		returned in **value**.
  *
  *	Return
  *		0 on success.
- *		**-EINVAL** if n >= arguments count of traced function.
+ *		**-EINVAL** if n >= argument register count of traced function.
  *
  * long bpf_get_func_ret(void *ctx, u64 *value)
  *	Description
@@ -5097,10 +5097,11 @@ union bpf_attr {
  *
  * long bpf_get_func_arg_cnt(void *ctx)
  *	Description
- *		Get number of arguments of the traced function (for tracing programs).
+ *		Get number of registers of the traced function (for tracing programs) where
+ *		function arguments are stored in these registers.
  *
  *	Return
- *		The number of arguments of the traced function.
+ *		The number of argument registers of the traced function.
  *
  * int bpf_get_retval(void)
  *	Description
-- 
cgit v1.2.3


From 34586d29f8dfc4ae30642c5b9a4db8a4a7af6869 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Wed, 31 Aug 2022 08:27:07 -0700
Subject: libbpf: Add new BPF_PROG2 macro

To support struct arguments in trampoline based programs,
existing BPF_PROG doesn't work any more since
the type size is needed to find whether a parameter
takes one or two registers. So this patch added a new
BPF_PROG2 macro to support such trampoline programs.

The idea is suggested by Andrii. For example, if the
to-be-traced function has signature like
  typedef struct {
       void *x;
       int t;
  } sockptr;
  int blah(sockptr x, char y);

In the new BPF_PROG2 macro, the argument can be
represented as
  __bpf_prog_call(
     ({ union {
          struct { __u64 x, y; } ___z;
          sockptr x;
        } ___tmp = { .___z = { ctx[0], ctx[1] }};
        ___tmp.x;
     }),
     ({ union {
          struct { __u8 x; } ___z;
          char y;
        } ___tmp = { .___z = { ctx[2] }};
        ___tmp.y;
     }));
In the above, the values stored on the stack are properly
assigned to the actual argument type value by using 'union'
magic. Note that the macro also works even if no arguments
are with struct types.

Note that new BPF_PROG2 works for both llvm16 and pre-llvm16
compilers where llvm16 supports bpf target passing value
with struct up to 16 byte size and pre-llvm16 will pass
by reference by storing values on the stack. With static functions
with struct argument as always inline, the compiler is able
to optimize and remove additional stack saving of struct values.

Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220831152707.2079473-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/bpf_tracing.h | 79 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 79 insertions(+)

(limited to 'tools')

diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h
index 5fdb93da423b..8d4bdd18cb3d 100644
--- a/tools/lib/bpf/bpf_tracing.h
+++ b/tools/lib/bpf/bpf_tracing.h
@@ -438,6 +438,85 @@ typeof(name(0)) name(unsigned long long *ctx)				    \
 static __always_inline typeof(name(0))					    \
 ____##name(unsigned long long *ctx, ##args)
 
+#ifndef ____bpf_nth
+#define ____bpf_nth(_, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, N, ...) N
+#endif
+#ifndef ____bpf_narg
+#define ____bpf_narg(...) ____bpf_nth(_, ##__VA_ARGS__, 12, 12, 11, 11, 10, 10, 9, 9, 8, 8, 7, 7, 6, 6, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0)
+#endif
+
+#define BPF_REG_CNT(t) \
+	(__builtin_choose_expr(sizeof(t) == 1 || sizeof(t) == 2 || sizeof(t) == 4 || sizeof(t) == 8, 1,	\
+	 __builtin_choose_expr(sizeof(t) == 16, 2,							\
+			       (void)0)))
+
+#define ____bpf_reg_cnt0()			(0)
+#define ____bpf_reg_cnt1(t, x)			(____bpf_reg_cnt0() + BPF_REG_CNT(t))
+#define ____bpf_reg_cnt2(t, x, args...)		(____bpf_reg_cnt1(args) + BPF_REG_CNT(t))
+#define ____bpf_reg_cnt3(t, x, args...)		(____bpf_reg_cnt2(args) + BPF_REG_CNT(t))
+#define ____bpf_reg_cnt4(t, x, args...)		(____bpf_reg_cnt3(args) + BPF_REG_CNT(t))
+#define ____bpf_reg_cnt5(t, x, args...)		(____bpf_reg_cnt4(args) + BPF_REG_CNT(t))
+#define ____bpf_reg_cnt6(t, x, args...)		(____bpf_reg_cnt5(args) + BPF_REG_CNT(t))
+#define ____bpf_reg_cnt7(t, x, args...)		(____bpf_reg_cnt6(args) + BPF_REG_CNT(t))
+#define ____bpf_reg_cnt8(t, x, args...)		(____bpf_reg_cnt7(args) + BPF_REG_CNT(t))
+#define ____bpf_reg_cnt9(t, x, args...)		(____bpf_reg_cnt8(args) + BPF_REG_CNT(t))
+#define ____bpf_reg_cnt10(t, x, args...)	(____bpf_reg_cnt9(args) + BPF_REG_CNT(t))
+#define ____bpf_reg_cnt11(t, x, args...)	(____bpf_reg_cnt10(args) + BPF_REG_CNT(t))
+#define ____bpf_reg_cnt12(t, x, args...)	(____bpf_reg_cnt11(args) + BPF_REG_CNT(t))
+#define ____bpf_reg_cnt(args...)	 ___bpf_apply(____bpf_reg_cnt, ____bpf_narg(args))(args)
+
+#define ____bpf_union_arg(t, x, n) \
+	__builtin_choose_expr(sizeof(t) == 1, ({ union { struct { __u8 x; } ___z; t x; } ___tmp = { .___z = {ctx[n]}}; ___tmp.x; }), \
+	__builtin_choose_expr(sizeof(t) == 2, ({ union { struct { __u16 x; } ___z; t x; } ___tmp = { .___z = {ctx[n]} }; ___tmp.x; }), \
+	__builtin_choose_expr(sizeof(t) == 4, ({ union { struct { __u32 x; } ___z; t x; } ___tmp = { .___z = {ctx[n]} }; ___tmp.x; }), \
+	__builtin_choose_expr(sizeof(t) == 8, ({ union { struct { __u64 x; } ___z; t x; } ___tmp = {.___z = {ctx[n]} }; ___tmp.x; }), \
+	__builtin_choose_expr(sizeof(t) == 16, ({ union { struct { __u64 x, y; } ___z; t x; } ___tmp = {.___z = {ctx[n], ctx[n + 1]} }; ___tmp.x; }), \
+			      (void)0)))))
+
+#define ____bpf_ctx_arg0(n, args...)
+#define ____bpf_ctx_arg1(n, t, x)		, ____bpf_union_arg(t, x, n - ____bpf_reg_cnt1(t, x))
+#define ____bpf_ctx_arg2(n, t, x, args...)	, ____bpf_union_arg(t, x, n - ____bpf_reg_cnt2(t, x, args)) ____bpf_ctx_arg1(n, args)
+#define ____bpf_ctx_arg3(n, t, x, args...)	, ____bpf_union_arg(t, x, n - ____bpf_reg_cnt3(t, x, args)) ____bpf_ctx_arg2(n, args)
+#define ____bpf_ctx_arg4(n, t, x, args...)	, ____bpf_union_arg(t, x, n - ____bpf_reg_cnt4(t, x, args)) ____bpf_ctx_arg3(n, args)
+#define ____bpf_ctx_arg5(n, t, x, args...)	, ____bpf_union_arg(t, x, n - ____bpf_reg_cnt5(t, x, args)) ____bpf_ctx_arg4(n, args)
+#define ____bpf_ctx_arg6(n, t, x, args...)	, ____bpf_union_arg(t, x, n - ____bpf_reg_cnt6(t, x, args)) ____bpf_ctx_arg5(n, args)
+#define ____bpf_ctx_arg7(n, t, x, args...)	, ____bpf_union_arg(t, x, n - ____bpf_reg_cnt7(t, x, args)) ____bpf_ctx_arg6(n, args)
+#define ____bpf_ctx_arg8(n, t, x, args...)	, ____bpf_union_arg(t, x, n - ____bpf_reg_cnt8(t, x, args)) ____bpf_ctx_arg7(n, args)
+#define ____bpf_ctx_arg9(n, t, x, args...)	, ____bpf_union_arg(t, x, n - ____bpf_reg_cnt9(t, x, args)) ____bpf_ctx_arg8(n, args)
+#define ____bpf_ctx_arg10(n, t, x, args...)	, ____bpf_union_arg(t, x, n - ____bpf_reg_cnt10(t, x, args)) ____bpf_ctx_arg9(n, args)
+#define ____bpf_ctx_arg11(n, t, x, args...)	, ____bpf_union_arg(t, x, n - ____bpf_reg_cnt11(t, x, args)) ____bpf_ctx_arg10(n, args)
+#define ____bpf_ctx_arg12(n, t, x, args...)	, ____bpf_union_arg(t, x, n - ____bpf_reg_cnt12(t, x, args)) ____bpf_ctx_arg11(n, args)
+#define ____bpf_ctx_arg(n, args...)	___bpf_apply(____bpf_ctx_arg, ____bpf_narg(args))(n, args)
+
+#define ____bpf_ctx_decl0()
+#define ____bpf_ctx_decl1(t, x)			, t x
+#define ____bpf_ctx_decl2(t, x, args...)	, t x ____bpf_ctx_decl1(args)
+#define ____bpf_ctx_decl3(t, x, args...)	, t x ____bpf_ctx_decl2(args)
+#define ____bpf_ctx_decl4(t, x, args...)	, t x ____bpf_ctx_decl3(args)
+#define ____bpf_ctx_decl5(t, x, args...)	, t x ____bpf_ctx_decl4(args)
+#define ____bpf_ctx_decl6(t, x, args...)	, t x ____bpf_ctx_decl5(args)
+#define ____bpf_ctx_decl7(t, x, args...)	, t x ____bpf_ctx_decl6(args)
+#define ____bpf_ctx_decl8(t, x, args...)	, t x ____bpf_ctx_decl7(args)
+#define ____bpf_ctx_decl9(t, x, args...)	, t x ____bpf_ctx_decl8(args)
+#define ____bpf_ctx_decl10(t, x, args...)	, t x ____bpf_ctx_decl9(args)
+#define ____bpf_ctx_decl11(t, x, args...)	, t x ____bpf_ctx_decl10(args)
+#define ____bpf_ctx_decl12(t, x, args...)	, t x ____bpf_ctx_decl11(args)
+#define ____bpf_ctx_decl(args...)	___bpf_apply(____bpf_ctx_decl, ____bpf_narg(args))(args)
+
+/*
+ * BPF_PROG2 can handle struct arguments.
+ */
+#define BPF_PROG2(name, args...)						\
+name(unsigned long long *ctx);							\
+static __always_inline typeof(name(0))						\
+____##name(unsigned long long *ctx ____bpf_ctx_decl(args));			\
+typeof(name(0)) name(unsigned long long *ctx)					\
+{										\
+	return ____##name(ctx ____bpf_ctx_arg(____bpf_reg_cnt(args), args));	\
+}										\
+static __always_inline typeof(name(0))						\
+____##name(unsigned long long *ctx ____bpf_ctx_decl(args))
+
 struct pt_regs;
 
 #define ___bpf_kprobe_args0()           ctx
-- 
cgit v1.2.3


From 1642a3945e223a922312fab2401ecdf58b3825b9 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Wed, 31 Aug 2022 08:27:13 -0700
Subject: selftests/bpf: Add struct argument tests with fentry/fexit programs.

Add various struct argument tests with fentry/fexit programs.
Also add one test with a kernel func which does not have any
argument to test BPF_PROG2 macro in such situation.

Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220831152713.2080039-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../selftests/bpf/bpf_testmod/bpf_testmod.c        |  48 +++++++++
 .../selftests/bpf/prog_tests/tracing_struct.c      |  63 +++++++++++
 tools/testing/selftests/bpf/progs/tracing_struct.c | 120 +++++++++++++++++++++
 3 files changed, 231 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/tracing_struct.c
 create mode 100644 tools/testing/selftests/bpf/progs/tracing_struct.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
index 792cb15bac40..a6021d6117b5 100644
--- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
+++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
@@ -18,6 +18,46 @@ typedef int (*func_proto_typedef_nested1)(func_proto_typedef);
 typedef int (*func_proto_typedef_nested2)(func_proto_typedef_nested1);
 
 DEFINE_PER_CPU(int, bpf_testmod_ksym_percpu) = 123;
+long bpf_testmod_test_struct_arg_result;
+
+struct bpf_testmod_struct_arg_1 {
+	int a;
+};
+struct bpf_testmod_struct_arg_2 {
+	long a;
+	long b;
+};
+
+noinline int
+bpf_testmod_test_struct_arg_1(struct bpf_testmod_struct_arg_2 a, int b, int c) {
+	bpf_testmod_test_struct_arg_result = a.a + a.b  + b + c;
+	return bpf_testmod_test_struct_arg_result;
+}
+
+noinline int
+bpf_testmod_test_struct_arg_2(int a, struct bpf_testmod_struct_arg_2 b, int c) {
+	bpf_testmod_test_struct_arg_result = a + b.a + b.b + c;
+	return bpf_testmod_test_struct_arg_result;
+}
+
+noinline int
+bpf_testmod_test_struct_arg_3(int a, int b, struct bpf_testmod_struct_arg_2 c) {
+	bpf_testmod_test_struct_arg_result = a + b + c.a + c.b;
+	return bpf_testmod_test_struct_arg_result;
+}
+
+noinline int
+bpf_testmod_test_struct_arg_4(struct bpf_testmod_struct_arg_1 a, int b,
+			      int c, int d, struct bpf_testmod_struct_arg_2 e) {
+	bpf_testmod_test_struct_arg_result = a.a + b + c + d + e.a + e.b;
+	return bpf_testmod_test_struct_arg_result;
+}
+
+noinline int
+bpf_testmod_test_struct_arg_5(void) {
+	bpf_testmod_test_struct_arg_result = 1;
+	return bpf_testmod_test_struct_arg_result;
+}
 
 noinline void
 bpf_testmod_test_mod_kfunc(int i)
@@ -98,11 +138,19 @@ bpf_testmod_test_read(struct file *file, struct kobject *kobj,
 		.off = off,
 		.len = len,
 	};
+	struct bpf_testmod_struct_arg_1 struct_arg1 = {10};
+	struct bpf_testmod_struct_arg_2 struct_arg2 = {2, 3};
 	int i = 1;
 
 	while (bpf_testmod_return_ptr(i))
 		i++;
 
+	(void)bpf_testmod_test_struct_arg_1(struct_arg2, 1, 4);
+	(void)bpf_testmod_test_struct_arg_2(1, struct_arg2, 4);
+	(void)bpf_testmod_test_struct_arg_3(1, 4, struct_arg2);
+	(void)bpf_testmod_test_struct_arg_4(struct_arg1, 1, 2, 3, struct_arg2);
+	(void)bpf_testmod_test_struct_arg_5();
+
 	/* This is always true. Use the check to make sure the compiler
 	 * doesn't remove bpf_testmod_loop_test.
 	 */
diff --git a/tools/testing/selftests/bpf/prog_tests/tracing_struct.c b/tools/testing/selftests/bpf/prog_tests/tracing_struct.c
new file mode 100644
index 000000000000..d5022b91d1e4
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/tracing_struct.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <test_progs.h>
+#include "tracing_struct.skel.h"
+
+static void test_fentry(void)
+{
+	struct tracing_struct *skel;
+	int err;
+
+	skel = tracing_struct__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "tracing_struct__open_and_load"))
+		return;
+
+	err = tracing_struct__attach(skel);
+	if (!ASSERT_OK(err, "tracing_struct__attach"))
+		return;
+
+	ASSERT_OK(trigger_module_test_read(256), "trigger_read");
+
+	ASSERT_EQ(skel->bss->t1_a_a, 2, "t1:a.a");
+	ASSERT_EQ(skel->bss->t1_a_b, 3, "t1:a.b");
+	ASSERT_EQ(skel->bss->t1_b, 1, "t1:b");
+	ASSERT_EQ(skel->bss->t1_c, 4, "t1:c");
+
+	ASSERT_EQ(skel->bss->t1_nregs, 4, "t1 nregs");
+	ASSERT_EQ(skel->bss->t1_reg0, 2, "t1 reg0");
+	ASSERT_EQ(skel->bss->t1_reg1, 3, "t1 reg1");
+	ASSERT_EQ(skel->bss->t1_reg2, 1, "t1 reg2");
+	ASSERT_EQ(skel->bss->t1_reg3, 4, "t1 reg3");
+	ASSERT_EQ(skel->bss->t1_ret, 10, "t1 ret");
+
+	ASSERT_EQ(skel->bss->t2_a, 1, "t2:a");
+	ASSERT_EQ(skel->bss->t2_b_a, 2, "t2:b.a");
+	ASSERT_EQ(skel->bss->t2_b_b, 3, "t2:b.b");
+	ASSERT_EQ(skel->bss->t2_c, 4, "t2:c");
+	ASSERT_EQ(skel->bss->t2_ret, 10, "t2 ret");
+
+	ASSERT_EQ(skel->bss->t3_a, 1, "t3:a");
+	ASSERT_EQ(skel->bss->t3_b, 4, "t3:b");
+	ASSERT_EQ(skel->bss->t3_c_a, 2, "t3:c.a");
+	ASSERT_EQ(skel->bss->t3_c_b, 3, "t3:c.b");
+	ASSERT_EQ(skel->bss->t3_ret, 10, "t3 ret");
+
+	ASSERT_EQ(skel->bss->t4_a_a, 10, "t4:a.a");
+	ASSERT_EQ(skel->bss->t4_b, 1, "t4:b");
+	ASSERT_EQ(skel->bss->t4_c, 2, "t4:c");
+	ASSERT_EQ(skel->bss->t4_d, 3, "t4:d");
+	ASSERT_EQ(skel->bss->t4_e_a, 2, "t4:e.a");
+	ASSERT_EQ(skel->bss->t4_e_b, 3, "t4:e.b");
+	ASSERT_EQ(skel->bss->t4_ret, 21, "t4 ret");
+
+	ASSERT_EQ(skel->bss->t5_ret, 1, "t5 ret");
+
+	tracing_struct__detach(skel);
+	tracing_struct__destroy(skel);
+}
+
+void test_tracing_struct(void)
+{
+	test_fentry();
+}
diff --git a/tools/testing/selftests/bpf/progs/tracing_struct.c b/tools/testing/selftests/bpf/progs/tracing_struct.c
new file mode 100644
index 000000000000..e718f0ebee7d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tracing_struct.c
@@ -0,0 +1,120 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+struct bpf_testmod_struct_arg_1 {
+	int a;
+};
+struct bpf_testmod_struct_arg_2 {
+	long a;
+	long b;
+};
+
+long t1_a_a, t1_a_b, t1_b, t1_c, t1_ret, t1_nregs;
+__u64 t1_reg0, t1_reg1, t1_reg2, t1_reg3;
+long t2_a, t2_b_a, t2_b_b, t2_c, t2_ret;
+long t3_a, t3_b, t3_c_a, t3_c_b, t3_ret;
+long t4_a_a, t4_b, t4_c, t4_d, t4_e_a, t4_e_b, t4_ret;
+long t5_ret;
+
+SEC("fentry/bpf_testmod_test_struct_arg_1")
+int BPF_PROG2(test_struct_arg_1, struct bpf_testmod_struct_arg_2, a, int, b, int, c)
+{
+	t1_a_a = a.a;
+	t1_a_b = a.b;
+	t1_b = b;
+	t1_c = c;
+	return 0;
+}
+
+SEC("fexit/bpf_testmod_test_struct_arg_1")
+int BPF_PROG2(test_struct_arg_2, struct bpf_testmod_struct_arg_2, a, int, b, int, c, int, ret)
+{
+	t1_nregs =  bpf_get_func_arg_cnt(ctx);
+	/* a.a */
+	bpf_get_func_arg(ctx, 0, &t1_reg0);
+	/* a.b */
+	bpf_get_func_arg(ctx, 1, &t1_reg1);
+	/* b */
+	bpf_get_func_arg(ctx, 2, &t1_reg2);
+	t1_reg2 = (int)t1_reg2;
+	/* c */
+	bpf_get_func_arg(ctx, 3, &t1_reg3);
+	t1_reg3 = (int)t1_reg3;
+
+	t1_ret = ret;
+	return 0;
+}
+
+SEC("fentry/bpf_testmod_test_struct_arg_2")
+int BPF_PROG2(test_struct_arg_3, int, a, struct bpf_testmod_struct_arg_2, b, int, c)
+{
+	t2_a = a;
+	t2_b_a = b.a;
+	t2_b_b = b.b;
+	t2_c = c;
+	return 0;
+}
+
+SEC("fexit/bpf_testmod_test_struct_arg_2")
+int BPF_PROG2(test_struct_arg_4, int, a, struct bpf_testmod_struct_arg_2, b, int, c, int, ret)
+{
+	t2_ret = ret;
+	return 0;
+}
+
+SEC("fentry/bpf_testmod_test_struct_arg_3")
+int BPF_PROG2(test_struct_arg_5, int, a, int, b, struct bpf_testmod_struct_arg_2, c)
+{
+	t3_a = a;
+	t3_b = b;
+	t3_c_a = c.a;
+	t3_c_b = c.b;
+	return 0;
+}
+
+SEC("fexit/bpf_testmod_test_struct_arg_3")
+int BPF_PROG2(test_struct_arg_6, int, a, int, b, struct bpf_testmod_struct_arg_2, c, int, ret)
+{
+	t3_ret = ret;
+	return 0;
+}
+
+SEC("fentry/bpf_testmod_test_struct_arg_4")
+int BPF_PROG2(test_struct_arg_7, struct bpf_testmod_struct_arg_1, a, int, b,
+	     int, c, int, d, struct bpf_testmod_struct_arg_2, e)
+{
+	t4_a_a = a.a;
+	t4_b = b;
+	t4_c = c;
+	t4_d = d;
+	t4_e_a = e.a;
+	t4_e_b = e.b;
+	return 0;
+}
+
+SEC("fexit/bpf_testmod_test_struct_arg_4")
+int BPF_PROG2(test_struct_arg_8, struct bpf_testmod_struct_arg_1, a, int, b,
+	     int, c, int, d, struct bpf_testmod_struct_arg_2, e, int, ret)
+{
+	t4_ret = ret;
+	return 0;
+}
+
+SEC("fentry/bpf_testmod_test_struct_arg_5")
+int BPF_PROG2(test_struct_arg_9)
+{
+	return 0;
+}
+
+SEC("fexit/bpf_testmod_test_struct_arg_5")
+int BPF_PROG2(test_struct_arg_10, int, ret)
+{
+	t5_ret = ret;
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
-- 
cgit v1.2.3


From a7c2ca3a2f697044094475055b3fba3929b234e4 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Wed, 31 Aug 2022 08:27:18 -0700
Subject: selftests/bpf: Use BPF_PROG2 for some fentry programs without struct
 arguments

Use BPF_PROG2 instead of BPF_PROG for programs in progs/timer.c
to test BPF_PROG2 for cases without struct arguments.

Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220831152718.2081091-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/progs/timer.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/timer.c b/tools/testing/selftests/bpf/progs/timer.c
index 0053c5402173..acda5c9cea93 100644
--- a/tools/testing/selftests/bpf/progs/timer.c
+++ b/tools/testing/selftests/bpf/progs/timer.c
@@ -120,7 +120,7 @@ static int timer_cb1(void *map, int *key, struct bpf_timer *timer)
 }
 
 SEC("fentry/bpf_fentry_test1")
-int BPF_PROG(test1, int a)
+int BPF_PROG2(test1, int, a)
 {
 	struct bpf_timer *arr_timer, *lru_timer;
 	struct elem init = {};
@@ -236,7 +236,7 @@ int bpf_timer_test(void)
 }
 
 SEC("fentry/bpf_fentry_test2")
-int BPF_PROG(test2, int a, int b)
+int BPF_PROG2(test2, int, a, int, b)
 {
 	struct hmap_elem init = {}, *val;
 	int key = HTAB, key_malloc = HTAB_MALLOC;
-- 
cgit v1.2.3


From ae63c10fc241a94bb916da96d40c8810f9ad7f18 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Wed, 31 Aug 2022 08:27:23 -0700
Subject: selftests/bpf: Add tracing_struct test in DENYLIST.s390x

Add tracing_struct test in DENYLIST.s390x since s390x does not
support trampoline now.

Signed-off-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/r/20220831152723.2081551-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/DENYLIST.s390x | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x
index 18fbb6eab1e2..168c5b287b5c 100644
--- a/tools/testing/selftests/bpf/DENYLIST.s390x
+++ b/tools/testing/selftests/bpf/DENYLIST.s390x
@@ -70,3 +70,4 @@ setget_sockopt                           # attach unexpected error: -524
 cb_refs                                  # expected error message unexpected error: -524                               (trampoline)
 cgroup_hierarchical_stats                # JIT does not support calling kernel function                                (kfunc)
 htab_update                              # failed to attach: ERROR: strerror_r(-524)=22                                (trampoline)
+tracing_struct                           # failed to auto-attach: -524                                                 (trampoline)
-- 
cgit v1.2.3


From 012ba1156e4a7b38062d109b818cb479a68c87ba Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Date: Tue, 6 Sep 2022 17:12:57 +0200
Subject: selftests/bpf: regroup and declare similar kfuncs selftests in an
 array

Similar to tools/testing/selftests/bpf/prog_tests/dynptr.c:
we declare an array of tests that we run one by one in a for loop.

Followup patches will add more similar-ish tests, so avoid a lot of copy
paste by grouping the declaration in an array.

For light skeletons, we have to rely on the offsetof() macro so we can
statically declare which program we are using.
In the libbpf case, we can rely on bpf_object__find_program_by_name().
So also change the Makefile to generate both light skeletons and normal
ones.

Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Acked-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220906151303.2780789-2-benjamin.tissoires@redhat.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/Makefile               |  5 +-
 .../testing/selftests/bpf/prog_tests/kfunc_call.c  | 81 +++++++++++++++++-----
 2 files changed, 68 insertions(+), 18 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index c10adecb5a73..6cd327f1f216 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -351,11 +351,12 @@ LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h		\
 		test_subskeleton.skel.h test_subskeleton_lib.skel.h	\
 		test_usdt.skel.h
 
-LSKELS := kfunc_call_test.c fentry_test.c fexit_test.c fexit_sleep.c \
+LSKELS := fentry_test.c fexit_test.c fexit_sleep.c \
 	test_ringbuf.c atomics.c trace_printk.c trace_vprintk.c \
 	map_ptr_kern.c core_kern.c core_kern_overflow.c
 # Generate both light skeleton and libbpf skeleton for these
-LSKELS_EXTRA := test_ksyms_module.c test_ksyms_weak.c kfunc_call_test_subprog.c
+LSKELS_EXTRA := test_ksyms_module.c test_ksyms_weak.c kfunc_call_test.c \
+	kfunc_call_test_subprog.c
 SKEL_BLACKLIST += $$(LSKELS)
 
 test_static_linked.skel.h-deps := test_static_linked1.bpf.o test_static_linked2.bpf.o
diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
index eede7c304f86..9dfbe5355a2d 100644
--- a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
+++ b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
@@ -2,6 +2,7 @@
 /* Copyright (c) 2021 Facebook */
 #include <test_progs.h>
 #include <network_helpers.h>
+#include "kfunc_call_test.skel.h"
 #include "kfunc_call_test.lskel.h"
 #include "kfunc_call_test_subprog.skel.h"
 #include "kfunc_call_test_subprog.lskel.h"
@@ -9,9 +10,31 @@
 
 #include "cap_helpers.h"
 
-static void test_main(void)
+struct kfunc_test_params {
+	const char *prog_name;
+	unsigned long lskel_prog_desc_offset;
+	int retval;
+};
+
+#define TC_TEST(name, __retval) \
+	{ \
+	  .prog_name = #name, \
+	  .lskel_prog_desc_offset = offsetof(struct kfunc_call_test_lskel, progs.name), \
+	  .retval = __retval, \
+	}
+
+static struct kfunc_test_params kfunc_tests[] = {
+	TC_TEST(kfunc_call_test1, 12),
+	TC_TEST(kfunc_call_test2, 3),
+	TC_TEST(kfunc_call_test_ref_btf_id, 0),
+};
+
+static void verify_success(struct kfunc_test_params *param)
 {
-	struct kfunc_call_test_lskel *skel;
+	struct kfunc_call_test_lskel *lskel = NULL;
+	struct bpf_prog_desc *lskel_prog;
+	struct kfunc_call_test *skel;
+	struct bpf_program *prog;
 	int prog_fd, err;
 	LIBBPF_OPTS(bpf_test_run_opts, topts,
 		.data_in = &pkt_v4,
@@ -19,26 +42,53 @@ static void test_main(void)
 		.repeat = 1,
 	);
 
-	skel = kfunc_call_test_lskel__open_and_load();
+	/* first test with normal libbpf */
+	skel = kfunc_call_test__open_and_load();
 	if (!ASSERT_OK_PTR(skel, "skel"))
 		return;
 
-	prog_fd = skel->progs.kfunc_call_test1.prog_fd;
-	err = bpf_prog_test_run_opts(prog_fd, &topts);
-	ASSERT_OK(err, "bpf_prog_test_run(test1)");
-	ASSERT_EQ(topts.retval, 12, "test1-retval");
+	prog = bpf_object__find_program_by_name(skel->obj, param->prog_name);
+	if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
+		goto cleanup;
 
-	prog_fd = skel->progs.kfunc_call_test2.prog_fd;
+	prog_fd = bpf_program__fd(prog);
 	err = bpf_prog_test_run_opts(prog_fd, &topts);
-	ASSERT_OK(err, "bpf_prog_test_run(test2)");
-	ASSERT_EQ(topts.retval, 3, "test2-retval");
+	if (!ASSERT_OK(err, param->prog_name))
+		goto cleanup;
+
+	if (!ASSERT_EQ(topts.retval, param->retval, "retval"))
+		goto cleanup;
+
+	/* second test with light skeletons */
+	lskel = kfunc_call_test_lskel__open_and_load();
+	if (!ASSERT_OK_PTR(lskel, "lskel"))
+		goto cleanup;
+
+	lskel_prog = (struct bpf_prog_desc *)((char *)lskel + param->lskel_prog_desc_offset);
 
-	prog_fd = skel->progs.kfunc_call_test_ref_btf_id.prog_fd;
+	prog_fd = lskel_prog->prog_fd;
 	err = bpf_prog_test_run_opts(prog_fd, &topts);
-	ASSERT_OK(err, "bpf_prog_test_run(test_ref_btf_id)");
-	ASSERT_EQ(topts.retval, 0, "test_ref_btf_id-retval");
+	if (!ASSERT_OK(err, param->prog_name))
+		goto cleanup;
+
+	ASSERT_EQ(topts.retval, param->retval, "retval");
+
+cleanup:
+	kfunc_call_test__destroy(skel);
+	if (lskel)
+		kfunc_call_test_lskel__destroy(lskel);
+}
+
+static void test_main(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(kfunc_tests); i++) {
+		if (!test__start_subtest(kfunc_tests[i].prog_name))
+			continue;
 
-	kfunc_call_test_lskel__destroy(skel);
+		verify_success(&kfunc_tests[i]);
+	}
 }
 
 static void test_subprog(void)
@@ -121,8 +171,7 @@ static void test_destructive(void)
 
 void test_kfunc_call(void)
 {
-	if (test__start_subtest("main"))
-		test_main();
+	test_main();
 
 	if (test__start_subtest("subprog"))
 		test_subprog();
-- 
cgit v1.2.3


From fb66223a244f252273995557b23e0fa53092e92c Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Date: Tue, 6 Sep 2022 17:13:00 +0200
Subject: selftests/bpf: add test for accessing ctx from syscall program type

We need to also export the kfunc set to the syscall program type,
and then add a couple of eBPF programs that are testing those calls.

The first one checks for valid access, and the second one is OK
from a static analysis point of view but fails at run time because
we are trying to access outside of the allocated memory.

Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Link: https://lore.kernel.org/r/20220906151303.2780789-5-benjamin.tissoires@redhat.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 net/bpf/test_run.c                                 |   1 +
 .../testing/selftests/bpf/prog_tests/kfunc_call.c  | 143 ++++++++++++++++++++-
 .../testing/selftests/bpf/progs/kfunc_call_fail.c  |  39 ++++++
 .../testing/selftests/bpf/progs/kfunc_call_test.c  |  38 ++++++
 4 files changed, 214 insertions(+), 7 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/progs/kfunc_call_fail.c

(limited to 'tools')

diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 25d8ecf105aa..f16baf977a21 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -1634,6 +1634,7 @@ static int __init bpf_prog_test_run_init(void)
 
 	ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_prog_test_kfunc_set);
 	ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_prog_test_kfunc_set);
+	ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SYSCALL, &bpf_prog_test_kfunc_set);
 	return ret ?: register_btf_id_dtor_kfuncs(bpf_prog_test_dtor_kfunc,
 						  ARRAY_SIZE(bpf_prog_test_dtor_kfunc),
 						  THIS_MODULE);
diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
index 9dfbe5355a2d..d5881c3331a8 100644
--- a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
+++ b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
@@ -2,6 +2,7 @@
 /* Copyright (c) 2021 Facebook */
 #include <test_progs.h>
 #include <network_helpers.h>
+#include "kfunc_call_fail.skel.h"
 #include "kfunc_call_test.skel.h"
 #include "kfunc_call_test.lskel.h"
 #include "kfunc_call_test_subprog.skel.h"
@@ -10,37 +11,96 @@
 
 #include "cap_helpers.h"
 
+static size_t log_buf_sz = 1048576; /* 1 MB */
+static char obj_log_buf[1048576];
+
+enum kfunc_test_type {
+	tc_test = 0,
+	syscall_test,
+	syscall_null_ctx_test,
+};
+
 struct kfunc_test_params {
 	const char *prog_name;
 	unsigned long lskel_prog_desc_offset;
 	int retval;
+	enum kfunc_test_type test_type;
+	const char *expected_err_msg;
 };
 
-#define TC_TEST(name, __retval) \
+#define __BPF_TEST_SUCCESS(name, __retval, type) \
 	{ \
 	  .prog_name = #name, \
 	  .lskel_prog_desc_offset = offsetof(struct kfunc_call_test_lskel, progs.name), \
 	  .retval = __retval, \
+	  .test_type = type, \
+	  .expected_err_msg = NULL, \
+	}
+
+#define __BPF_TEST_FAIL(name, __retval, type, error_msg) \
+	{ \
+	  .prog_name = #name, \
+	  .lskel_prog_desc_offset = 0 /* unused when test is failing */, \
+	  .retval = __retval, \
+	  .test_type = type, \
+	  .expected_err_msg = error_msg, \
 	}
 
+#define TC_TEST(name, retval) __BPF_TEST_SUCCESS(name, retval, tc_test)
+#define SYSCALL_TEST(name, retval) __BPF_TEST_SUCCESS(name, retval, syscall_test)
+#define SYSCALL_NULL_CTX_TEST(name, retval) __BPF_TEST_SUCCESS(name, retval, syscall_null_ctx_test)
+
+#define SYSCALL_NULL_CTX_FAIL(name, retval, error_msg) \
+	__BPF_TEST_FAIL(name, retval, syscall_null_ctx_test, error_msg)
+
 static struct kfunc_test_params kfunc_tests[] = {
+	/* failure cases:
+	 * if retval is 0 -> the program will fail to load and the error message is an error
+	 * if retval is not 0 -> the program can be loaded but running it will gives the
+	 *                       provided return value. The error message is thus the one
+	 *                       from a successful load
+	 */
+	SYSCALL_NULL_CTX_FAIL(kfunc_syscall_test_fail, -EINVAL, "processed 4 insns"),
+	SYSCALL_NULL_CTX_FAIL(kfunc_syscall_test_null_fail, -EINVAL, "processed 4 insns"),
+
+	/* success cases */
 	TC_TEST(kfunc_call_test1, 12),
 	TC_TEST(kfunc_call_test2, 3),
 	TC_TEST(kfunc_call_test_ref_btf_id, 0),
+	SYSCALL_TEST(kfunc_syscall_test, 0),
+	SYSCALL_NULL_CTX_TEST(kfunc_syscall_test_null, 0),
+};
+
+struct syscall_test_args {
+	__u8 data[16];
+	size_t size;
 };
 
 static void verify_success(struct kfunc_test_params *param)
 {
 	struct kfunc_call_test_lskel *lskel = NULL;
+	LIBBPF_OPTS(bpf_test_run_opts, topts);
 	struct bpf_prog_desc *lskel_prog;
 	struct kfunc_call_test *skel;
 	struct bpf_program *prog;
 	int prog_fd, err;
-	LIBBPF_OPTS(bpf_test_run_opts, topts,
-		.data_in = &pkt_v4,
-		.data_size_in = sizeof(pkt_v4),
-		.repeat = 1,
-	);
+	struct syscall_test_args args = {
+		.size = 10,
+	};
+
+	switch (param->test_type) {
+	case syscall_test:
+		topts.ctx_in = &args;
+		topts.ctx_size_in = sizeof(args);
+		/* fallthrough */
+	case syscall_null_ctx_test:
+		break;
+	case tc_test:
+		topts.data_in = &pkt_v4;
+		topts.data_size_in = sizeof(pkt_v4);
+		topts.repeat = 1;
+		break;
+	}
 
 	/* first test with normal libbpf */
 	skel = kfunc_call_test__open_and_load();
@@ -79,6 +139,72 @@ cleanup:
 		kfunc_call_test_lskel__destroy(lskel);
 }
 
+static void verify_fail(struct kfunc_test_params *param)
+{
+	LIBBPF_OPTS(bpf_object_open_opts, opts);
+	LIBBPF_OPTS(bpf_test_run_opts, topts);
+	struct bpf_program *prog;
+	struct kfunc_call_fail *skel;
+	int prog_fd, err;
+	struct syscall_test_args args = {
+		.size = 10,
+	};
+
+	opts.kernel_log_buf = obj_log_buf;
+	opts.kernel_log_size = log_buf_sz;
+	opts.kernel_log_level = 1;
+
+	switch (param->test_type) {
+	case syscall_test:
+		topts.ctx_in = &args;
+		topts.ctx_size_in = sizeof(args);
+		/* fallthrough */
+	case syscall_null_ctx_test:
+		break;
+	case tc_test:
+		topts.data_in = &pkt_v4;
+		topts.data_size_in = sizeof(pkt_v4);
+		break;
+		topts.repeat = 1;
+	}
+
+	skel = kfunc_call_fail__open_opts(&opts);
+	if (!ASSERT_OK_PTR(skel, "kfunc_call_fail__open_opts"))
+		goto cleanup;
+
+	prog = bpf_object__find_program_by_name(skel->obj, param->prog_name);
+	if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
+		goto cleanup;
+
+	bpf_program__set_autoload(prog, true);
+
+	err = kfunc_call_fail__load(skel);
+	if (!param->retval) {
+		/* the verifier is supposed to complain and refuses to load */
+		if (!ASSERT_ERR(err, "unexpected load success"))
+			goto out_err;
+
+	} else {
+		/* the program is loaded but must dynamically fail */
+		if (!ASSERT_OK(err, "unexpected load error"))
+			goto out_err;
+
+		prog_fd = bpf_program__fd(prog);
+		err = bpf_prog_test_run_opts(prog_fd, &topts);
+		if (!ASSERT_EQ(err, param->retval, param->prog_name))
+			goto out_err;
+	}
+
+out_err:
+	if (!ASSERT_OK_PTR(strstr(obj_log_buf, param->expected_err_msg), "expected_err_msg")) {
+		fprintf(stderr, "Expected err_msg: %s\n", param->expected_err_msg);
+		fprintf(stderr, "Verifier output: %s\n", obj_log_buf);
+	}
+
+cleanup:
+	kfunc_call_fail__destroy(skel);
+}
+
 static void test_main(void)
 {
 	int i;
@@ -87,7 +213,10 @@ static void test_main(void)
 		if (!test__start_subtest(kfunc_tests[i].prog_name))
 			continue;
 
-		verify_success(&kfunc_tests[i]);
+		if (!kfunc_tests[i].expected_err_msg)
+			verify_success(&kfunc_tests[i]);
+		else
+			verify_fail(&kfunc_tests[i]);
 	}
 }
 
diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_fail.c b/tools/testing/selftests/bpf/progs/kfunc_call_fail.c
new file mode 100644
index 000000000000..4168027f2ab1
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/kfunc_call_fail.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+
+extern void bpf_kfunc_call_test_mem_len_pass1(void *mem, int len) __ksym;
+
+struct syscall_test_args {
+	__u8 data[16];
+	size_t size;
+};
+
+SEC("?syscall")
+int kfunc_syscall_test_fail(struct syscall_test_args *args)
+{
+	bpf_kfunc_call_test_mem_len_pass1(&args->data, sizeof(*args) + 1);
+
+	return 0;
+}
+
+SEC("?syscall")
+int kfunc_syscall_test_null_fail(struct syscall_test_args *args)
+{
+	/* Must be called with args as a NULL pointer
+	 * we do not check for it to have the verifier consider that
+	 * the pointer might not be null, and so we can load it.
+	 *
+	 * So the following can not be added:
+	 *
+	 * if (args)
+	 *      return -22;
+	 */
+
+	bpf_kfunc_call_test_mem_len_pass1(args, sizeof(*args));
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_test.c b/tools/testing/selftests/bpf/progs/kfunc_call_test.c
index 5aecbb9fdc68..9e1914916f1d 100644
--- a/tools/testing/selftests/bpf/progs/kfunc_call_test.c
+++ b/tools/testing/selftests/bpf/progs/kfunc_call_test.c
@@ -92,4 +92,42 @@ int kfunc_call_test_pass(struct __sk_buff *skb)
 	return 0;
 }
 
+struct syscall_test_args {
+	__u8 data[16];
+	size_t size;
+};
+
+SEC("syscall")
+int kfunc_syscall_test(struct syscall_test_args *args)
+{
+	const long size = args->size;
+
+	if (size > sizeof(args->data))
+		return -7; /* -E2BIG */
+
+	bpf_kfunc_call_test_mem_len_pass1(&args->data, sizeof(args->data));
+	bpf_kfunc_call_test_mem_len_pass1(&args->data, sizeof(*args));
+	bpf_kfunc_call_test_mem_len_pass1(&args->data, size);
+
+	return 0;
+}
+
+SEC("syscall")
+int kfunc_syscall_test_null(struct syscall_test_args *args)
+{
+	/* Must be called with args as a NULL pointer
+	 * we do not check for it to have the verifier consider that
+	 * the pointer might not be null, and so we can load it.
+	 *
+	 * So the following can not be added:
+	 *
+	 * if (args)
+	 *      return -22;
+	 */
+
+	bpf_kfunc_call_test_mem_len_pass1(args, 0);
+
+	return 0;
+}
+
 char _license[] SEC("license") = "GPL";
-- 
cgit v1.2.3


From 22ed8d5a46520ef0f060e7c0ee91f1cc6f684400 Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Date: Tue, 6 Sep 2022 17:13:03 +0200
Subject: selftests/bpf: Add tests for kfunc returning a memory pointer

We add 2 new kfuncs that are following the RET_PTR_TO_MEM
capability from the previous commit.
Then we test them in selftests:
the first tests are testing valid case, and are not failing,
and the later ones are actually preventing the program to be loaded
because they are wrong.

To work around that, we mark the failing ones as not autoloaded
(with SEC("?tc")), and we manually enable them one by one, ensuring
the verifier rejects them.

Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Link: https://lore.kernel.org/r/20220906151303.2780789-8-benjamin.tissoires@redhat.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 net/bpf/test_run.c                                 |  36 ++++++
 .../testing/selftests/bpf/prog_tests/kfunc_call.c  |   7 ++
 .../testing/selftests/bpf/progs/kfunc_call_fail.c  | 121 +++++++++++++++++++++
 .../testing/selftests/bpf/progs/kfunc_call_test.c  |  33 ++++++
 4 files changed, 197 insertions(+)

(limited to 'tools')

diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index f16baf977a21..13d578ce2a09 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -606,6 +606,38 @@ noinline void bpf_kfunc_call_memb1_release(struct prog_test_member1 *p)
 	WARN_ON_ONCE(1);
 }
 
+static int *__bpf_kfunc_call_test_get_mem(struct prog_test_ref_kfunc *p, const int size)
+{
+	if (size > 2 * sizeof(int))
+		return NULL;
+
+	return (int *)p;
+}
+
+noinline int *bpf_kfunc_call_test_get_rdwr_mem(struct prog_test_ref_kfunc *p, const int rdwr_buf_size)
+{
+	return __bpf_kfunc_call_test_get_mem(p, rdwr_buf_size);
+}
+
+noinline int *bpf_kfunc_call_test_get_rdonly_mem(struct prog_test_ref_kfunc *p, const int rdonly_buf_size)
+{
+	return __bpf_kfunc_call_test_get_mem(p, rdonly_buf_size);
+}
+
+/* the next 2 ones can't be really used for testing expect to ensure
+ * that the verifier rejects the call.
+ * Acquire functions must return struct pointers, so these ones are
+ * failing.
+ */
+noinline int *bpf_kfunc_call_test_acq_rdonly_mem(struct prog_test_ref_kfunc *p, const int rdonly_buf_size)
+{
+	return __bpf_kfunc_call_test_get_mem(p, rdonly_buf_size);
+}
+
+noinline void bpf_kfunc_call_int_mem_release(int *p)
+{
+}
+
 noinline struct prog_test_ref_kfunc *
 bpf_kfunc_call_test_kptr_get(struct prog_test_ref_kfunc **pp, int a, int b)
 {
@@ -712,6 +744,10 @@ BTF_ID_FLAGS(func, bpf_kfunc_call_memb_acquire, KF_ACQUIRE | KF_RET_NULL)
 BTF_ID_FLAGS(func, bpf_kfunc_call_test_release, KF_RELEASE)
 BTF_ID_FLAGS(func, bpf_kfunc_call_memb_release, KF_RELEASE)
 BTF_ID_FLAGS(func, bpf_kfunc_call_memb1_release, KF_RELEASE)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_get_rdwr_mem, KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_get_rdonly_mem, KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_acq_rdonly_mem, KF_ACQUIRE | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_kfunc_call_int_mem_release, KF_RELEASE)
 BTF_ID_FLAGS(func, bpf_kfunc_call_test_kptr_get, KF_ACQUIRE | KF_RET_NULL | KF_KPTR_GET)
 BTF_ID_FLAGS(func, bpf_kfunc_call_test_pass_ctx)
 BTF_ID_FLAGS(func, bpf_kfunc_call_test_pass1)
diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
index d5881c3331a8..5af1ee8f0e6e 100644
--- a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
+++ b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
@@ -50,6 +50,7 @@ struct kfunc_test_params {
 #define SYSCALL_TEST(name, retval) __BPF_TEST_SUCCESS(name, retval, syscall_test)
 #define SYSCALL_NULL_CTX_TEST(name, retval) __BPF_TEST_SUCCESS(name, retval, syscall_null_ctx_test)
 
+#define TC_FAIL(name, retval, error_msg) __BPF_TEST_FAIL(name, retval, tc_test, error_msg)
 #define SYSCALL_NULL_CTX_FAIL(name, retval, error_msg) \
 	__BPF_TEST_FAIL(name, retval, syscall_null_ctx_test, error_msg)
 
@@ -62,11 +63,17 @@ static struct kfunc_test_params kfunc_tests[] = {
 	 */
 	SYSCALL_NULL_CTX_FAIL(kfunc_syscall_test_fail, -EINVAL, "processed 4 insns"),
 	SYSCALL_NULL_CTX_FAIL(kfunc_syscall_test_null_fail, -EINVAL, "processed 4 insns"),
+	TC_FAIL(kfunc_call_test_get_mem_fail_rdonly, 0, "R0 cannot write into rdonly_mem"),
+	TC_FAIL(kfunc_call_test_get_mem_fail_use_after_free, 0, "invalid mem access 'scalar'"),
+	TC_FAIL(kfunc_call_test_get_mem_fail_oob, 0, "min value is outside of the allowed memory range"),
+	TC_FAIL(kfunc_call_test_get_mem_fail_not_const, 0, "is not a const"),
+	TC_FAIL(kfunc_call_test_mem_acquire_fail, 0, "acquire kernel function does not return PTR_TO_BTF_ID"),
 
 	/* success cases */
 	TC_TEST(kfunc_call_test1, 12),
 	TC_TEST(kfunc_call_test2, 3),
 	TC_TEST(kfunc_call_test_ref_btf_id, 0),
+	TC_TEST(kfunc_call_test_get_mem, 42),
 	SYSCALL_TEST(kfunc_syscall_test, 0),
 	SYSCALL_NULL_CTX_TEST(kfunc_syscall_test_null, 0),
 };
diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_fail.c b/tools/testing/selftests/bpf/progs/kfunc_call_fail.c
index 4168027f2ab1..b98313d391c6 100644
--- a/tools/testing/selftests/bpf/progs/kfunc_call_fail.c
+++ b/tools/testing/selftests/bpf/progs/kfunc_call_fail.c
@@ -3,7 +3,13 @@
 #include <vmlinux.h>
 #include <bpf/bpf_helpers.h>
 
+extern struct prog_test_ref_kfunc *bpf_kfunc_call_test_acquire(unsigned long *sp) __ksym;
+extern void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym;
 extern void bpf_kfunc_call_test_mem_len_pass1(void *mem, int len) __ksym;
+extern int *bpf_kfunc_call_test_get_rdwr_mem(struct prog_test_ref_kfunc *p, const int rdwr_buf_size) __ksym;
+extern int *bpf_kfunc_call_test_get_rdonly_mem(struct prog_test_ref_kfunc *p, const int rdonly_buf_size) __ksym;
+extern int *bpf_kfunc_call_test_acq_rdonly_mem(struct prog_test_ref_kfunc *p, const int rdonly_buf_size) __ksym;
+extern void bpf_kfunc_call_int_mem_release(int *p) __ksym;
 
 struct syscall_test_args {
 	__u8 data[16];
@@ -36,4 +42,119 @@ int kfunc_syscall_test_null_fail(struct syscall_test_args *args)
 	return 0;
 }
 
+SEC("?tc")
+int kfunc_call_test_get_mem_fail_rdonly(struct __sk_buff *skb)
+{
+	struct prog_test_ref_kfunc *pt;
+	unsigned long s = 0;
+	int *p = NULL;
+	int ret = 0;
+
+	pt = bpf_kfunc_call_test_acquire(&s);
+	if (pt) {
+		p = bpf_kfunc_call_test_get_rdonly_mem(pt, 2 * sizeof(int));
+		if (p)
+			p[0] = 42; /* this is a read-only buffer, so -EACCES */
+		else
+			ret = -1;
+
+		bpf_kfunc_call_test_release(pt);
+	}
+	return ret;
+}
+
+SEC("?tc")
+int kfunc_call_test_get_mem_fail_use_after_free(struct __sk_buff *skb)
+{
+	struct prog_test_ref_kfunc *pt;
+	unsigned long s = 0;
+	int *p = NULL;
+	int ret = 0;
+
+	pt = bpf_kfunc_call_test_acquire(&s);
+	if (pt) {
+		p = bpf_kfunc_call_test_get_rdwr_mem(pt, 2 * sizeof(int));
+		if (p) {
+			p[0] = 42;
+			ret = p[1]; /* 108 */
+		} else {
+			ret = -1;
+		}
+
+		bpf_kfunc_call_test_release(pt);
+	}
+	if (p)
+		ret = p[0]; /* p is not valid anymore */
+
+	return ret;
+}
+
+SEC("?tc")
+int kfunc_call_test_get_mem_fail_oob(struct __sk_buff *skb)
+{
+	struct prog_test_ref_kfunc *pt;
+	unsigned long s = 0;
+	int *p = NULL;
+	int ret = 0;
+
+	pt = bpf_kfunc_call_test_acquire(&s);
+	if (pt) {
+		p = bpf_kfunc_call_test_get_rdonly_mem(pt, 2 * sizeof(int));
+		if (p)
+			ret = p[2 * sizeof(int)]; /* oob access, so -EACCES */
+		else
+			ret = -1;
+
+		bpf_kfunc_call_test_release(pt);
+	}
+	return ret;
+}
+
+int not_const_size = 2 * sizeof(int);
+
+SEC("?tc")
+int kfunc_call_test_get_mem_fail_not_const(struct __sk_buff *skb)
+{
+	struct prog_test_ref_kfunc *pt;
+	unsigned long s = 0;
+	int *p = NULL;
+	int ret = 0;
+
+	pt = bpf_kfunc_call_test_acquire(&s);
+	if (pt) {
+		p = bpf_kfunc_call_test_get_rdonly_mem(pt, not_const_size); /* non const size, -EINVAL */
+		if (p)
+			ret = p[0];
+		else
+			ret = -1;
+
+		bpf_kfunc_call_test_release(pt);
+	}
+	return ret;
+}
+
+SEC("?tc")
+int kfunc_call_test_mem_acquire_fail(struct __sk_buff *skb)
+{
+	struct prog_test_ref_kfunc *pt;
+	unsigned long s = 0;
+	int *p = NULL;
+	int ret = 0;
+
+	pt = bpf_kfunc_call_test_acquire(&s);
+	if (pt) {
+		/* we are failing on this one, because we are not acquiring a PTR_TO_BTF_ID (a struct ptr) */
+		p = bpf_kfunc_call_test_acq_rdonly_mem(pt, 2 * sizeof(int));
+		if (p)
+			ret = p[0];
+		else
+			ret = -1;
+
+		bpf_kfunc_call_int_mem_release(p);
+
+		bpf_kfunc_call_test_release(pt);
+	}
+	return ret;
+}
+
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_test.c b/tools/testing/selftests/bpf/progs/kfunc_call_test.c
index 9e1914916f1d..f636e50be259 100644
--- a/tools/testing/selftests/bpf/progs/kfunc_call_test.c
+++ b/tools/testing/selftests/bpf/progs/kfunc_call_test.c
@@ -14,6 +14,8 @@ extern void bpf_kfunc_call_test_pass1(struct prog_test_pass1 *p) __ksym;
 extern void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p) __ksym;
 extern void bpf_kfunc_call_test_mem_len_pass1(void *mem, int len) __ksym;
 extern void bpf_kfunc_call_test_mem_len_fail2(__u64 *mem, int len) __ksym;
+extern int *bpf_kfunc_call_test_get_rdwr_mem(struct prog_test_ref_kfunc *p, const int rdwr_buf_size) __ksym;
+extern int *bpf_kfunc_call_test_get_rdonly_mem(struct prog_test_ref_kfunc *p, const int rdonly_buf_size) __ksym;
 
 SEC("tc")
 int kfunc_call_test2(struct __sk_buff *skb)
@@ -130,4 +132,35 @@ int kfunc_syscall_test_null(struct syscall_test_args *args)
 	return 0;
 }
 
+SEC("tc")
+int kfunc_call_test_get_mem(struct __sk_buff *skb)
+{
+	struct prog_test_ref_kfunc *pt;
+	unsigned long s = 0;
+	int *p = NULL;
+	int ret = 0;
+
+	pt = bpf_kfunc_call_test_acquire(&s);
+	if (pt) {
+		p = bpf_kfunc_call_test_get_rdwr_mem(pt, 2 * sizeof(int));
+		if (p) {
+			p[0] = 42;
+			ret = p[1]; /* 108 */
+		} else {
+			ret = -1;
+		}
+
+		if (ret >= 0) {
+			p = bpf_kfunc_call_test_get_rdonly_mem(pt, 2 * sizeof(int));
+			if (p)
+				ret = p[0]; /* 42 */
+			else
+				ret = -1;
+		}
+
+		bpf_kfunc_call_test_release(pt);
+	}
+	return ret;
+}
+
 char _license[] SEC("license") = "GPL";
-- 
cgit v1.2.3


From 665f5d3577ef43e929d59cf39683037887c351bf Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Fri, 9 Sep 2022 16:15:11 +0200
Subject: libbpf: Remove gcc support for bpf_tail_call_static for now

This reverts commit 14e5ce79943a ("libbpf: Add GCC support for
bpf_tail_call_static"). Reason is that gcc invented their own BPF asm
which is not conform with LLVM one, and going forward this would be
more painful to maintain here and in other areas of the library. Thus
remove it; ask to gcc folks is to align with LLVM one to use exact
same syntax.

Fixes: 14e5ce79943a ("libbpf: Add GCC support for bpf_tail_call_static")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Cc: James Hilliard <james.hilliard1@gmail.com>
Cc: Jose E. Marchesi <jose.marchesi@oracle.com>
---
 tools/lib/bpf/bpf_helpers.h | 19 ++++++-------------
 1 file changed, 6 insertions(+), 13 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h
index 867b734839dd..7349b16b8e2f 100644
--- a/tools/lib/bpf/bpf_helpers.h
+++ b/tools/lib/bpf/bpf_helpers.h
@@ -131,7 +131,7 @@
 /*
  * Helper function to perform a tail call with a constant/immediate map slot.
  */
-#if (!defined(__clang__) || __clang_major__ >= 8) && defined(__bpf__)
+#if __clang_major__ >= 8 && defined(__bpf__)
 static __always_inline void
 bpf_tail_call_static(void *ctx, const void *map, const __u32 slot)
 {
@@ -139,8 +139,8 @@ bpf_tail_call_static(void *ctx, const void *map, const __u32 slot)
 		__bpf_unreachable();
 
 	/*
-	 * Provide a hard guarantee that the compiler won't optimize setting r2
-	 * (map pointer) and r3 (constant map index) from _different paths_ ending
+	 * Provide a hard guarantee that LLVM won't optimize setting r2 (map
+	 * pointer) and r3 (constant map index) from _different paths_ ending
 	 * up at the _same_ call insn as otherwise we won't be able to use the
 	 * jmpq/nopl retpoline-free patching by the x86-64 JIT in the kernel
 	 * given they mismatch. See also d2e4c1e6c294 ("bpf: Constant map key
@@ -148,19 +148,12 @@ bpf_tail_call_static(void *ctx, const void *map, const __u32 slot)
 	 *
 	 * Note on clobber list: we need to stay in-line with BPF calling
 	 * convention, so even if we don't end up using r0, r4, r5, we need
-	 * to mark them as clobber so that the compiler doesn't end up using
-	 * them before / after the call.
+	 * to mark them as clobber so that LLVM doesn't end up using them
+	 * before / after the call.
 	 */
-	asm volatile(
-#ifdef __clang__
-		     "r1 = %[ctx]\n\t"
+	asm volatile("r1 = %[ctx]\n\t"
 		     "r2 = %[map]\n\t"
 		     "r3 = %[slot]\n\t"
-#else
-		     "mov %%r1,%[ctx]\n\t"
-		     "mov %%r2,%[map]\n\t"
-		     "mov %%r3,%[slot]\n\t"
-#endif
 		     "call 12"
 		     :: [ctx]"r"(ctx), [map]"r"(map), [slot]"i"(slot)
 		     : "r0", "r1", "r2", "r3", "r4", "r5");
-- 
cgit v1.2.3


From e42921c3c346b1b49068af3f3881322081e1dddd Mon Sep 17 00:00:00 2001
From: YiFei Zhu <zhuyifei@google.com>
Date: Fri, 9 Sep 2022 00:49:40 +0000
Subject: selftests/bpf: Deduplicate write_sysctl() to test_progs.c

This helper is needed in multiple tests. Instead of copying it over
and over, better to deduplicate this helper to test_progs.c.

test_progs.c is chosen over testing_helpers.c because of this helper's
use of CHECK / ASSERT_*, and the CHECK was modified to use ASSERT_*
so it does not rely on a duration variable.

Suggested-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: YiFei Zhu <zhuyifei@google.com>
Link: https://lore.kernel.org/r/9b4fc9a27bd52f771b657b4c4090fc8d61f3a6b5.1662682323.git.zhuyifei@google.com
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
 .../selftests/bpf/prog_tests/btf_skc_cls_ingress.c   | 20 --------------------
 .../selftests/bpf/prog_tests/tcp_hdr_options.c       | 20 --------------------
 tools/testing/selftests/bpf/test_progs.c             | 17 +++++++++++++++++
 tools/testing/selftests/bpf/test_progs.h             |  1 +
 4 files changed, 18 insertions(+), 40 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c b/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c
index 664ffc0364f4..7a277035c275 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c
@@ -22,26 +22,6 @@ static __u32 duration;
 
 #define PROG_PIN_FILE "/sys/fs/bpf/btf_skc_cls_ingress"
 
-static int write_sysctl(const char *sysctl, const char *value)
-{
-	int fd, err, len;
-
-	fd = open(sysctl, O_WRONLY);
-	if (CHECK(fd == -1, "open sysctl", "open(%s): %s (%d)\n",
-		  sysctl, strerror(errno), errno))
-		return -1;
-
-	len = strlen(value);
-	err = write(fd, value, len);
-	close(fd);
-	if (CHECK(err != len, "write sysctl",
-		  "write(%s, %s, %d): err:%d %s (%d)\n",
-		  sysctl, value, len, err, strerror(errno), errno))
-		return -1;
-
-	return 0;
-}
-
 static int prepare_netns(void)
 {
 	if (CHECK(unshare(CLONE_NEWNET), "create netns",
diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c
index 1fa772079967..f24436d33cd6 100644
--- a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c
+++ b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c
@@ -54,26 +54,6 @@ static int create_netns(void)
 	return 0;
 }
 
-static int write_sysctl(const char *sysctl, const char *value)
-{
-	int fd, err, len;
-
-	fd = open(sysctl, O_WRONLY);
-	if (CHECK(fd == -1, "open sysctl", "open(%s): %s (%d)\n",
-		  sysctl, strerror(errno), errno))
-		return -1;
-
-	len = strlen(value);
-	err = write(fd, value, len);
-	close(fd);
-	if (CHECK(err != len, "write sysctl",
-		  "write(%s, %s): err:%d %s (%d)\n",
-		  sysctl, value, err, strerror(errno), errno))
-		return -1;
-
-	return 0;
-}
-
 static void print_hdr_stg(const struct hdr_stg *hdr_stg, const char *prefix)
 {
 	fprintf(stderr, "%s{active:%u, resend_syn:%u, syncookie:%u, fastopen:%u}\n",
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 3561c97701f2..0e9a47f97890 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -943,6 +943,23 @@ int trigger_module_test_write(int write_sz)
 	return 0;
 }
 
+int write_sysctl(const char *sysctl, const char *value)
+{
+	int fd, err, len;
+
+	fd = open(sysctl, O_WRONLY);
+	if (!ASSERT_NEQ(fd, -1, "open sysctl"))
+		return -1;
+
+	len = strlen(value);
+	err = write(fd, value, len);
+	close(fd);
+	if (!ASSERT_EQ(err, len, "write sysctl"))
+		return -1;
+
+	return 0;
+}
+
 #define MAX_BACKTRACE_SZ 128
 void crash_handler(int signum)
 {
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index 5fe1365c2bb1..b090996daee5 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -384,6 +384,7 @@ int extract_build_id(char *build_id, size_t size);
 int kern_sync_rcu(void);
 int trigger_module_test_read(int read_sz);
 int trigger_module_test_write(int write_sz);
+int write_sysctl(const char *sysctl, const char *value);
 
 #ifdef __x86_64__
 #define SYS_NANOSLEEP_KPROBE_NAME "__x64_sys_nanosleep"
-- 
cgit v1.2.3


From 58c449a96946929467b537589c8a23f11e04af39 Mon Sep 17 00:00:00 2001
From: YiFei Zhu <zhuyifei@google.com>
Date: Fri, 9 Sep 2022 00:49:41 +0000
Subject: selftests/bpf: Ensure cgroup/connect{4,6} programs can bind unpriv
 ICMP ping

This tests that when an unprivileged ICMP ping socket connects,
the hooks are actually invoked. We also ensure that if the hook does
not call bpf_bind(), the bound address is unmodified, and if the
hook calls bpf_bind(), the bound address is exactly what we provided
to the helper.

A new netns is used to enable ping_group_range in the test without
affecting ouside of the test, because by default, not even root is
permitted to use unprivileged ICMP ping...

Signed-off-by: YiFei Zhu <zhuyifei@google.com>
Link: https://lore.kernel.org/r/086b227c1b97f4e94193e58aae7576d0261b68a4.1662682323.git.zhuyifei@google.com
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
 .../selftests/bpf/prog_tests/connect_ping.c        | 178 +++++++++++++++++++++
 tools/testing/selftests/bpf/progs/connect_ping.c   |  53 ++++++
 2 files changed, 231 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/connect_ping.c
 create mode 100644 tools/testing/selftests/bpf/progs/connect_ping.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/connect_ping.c b/tools/testing/selftests/bpf/prog_tests/connect_ping.c
new file mode 100644
index 000000000000..289218c2216c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/connect_ping.c
@@ -0,0 +1,178 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright 2022 Google LLC.
+ */
+
+#define _GNU_SOURCE
+#include <sys/mount.h>
+
+#include "test_progs.h"
+#include "cgroup_helpers.h"
+#include "network_helpers.h"
+
+#include "connect_ping.skel.h"
+
+/* 2001:db8::1 */
+#define BINDADDR_V6 { { { 0x20,0x01,0x0d,0xb8,0,0,0,0,0,0,0,0,0,0,0,1 } } }
+static const struct in6_addr bindaddr_v6 = BINDADDR_V6;
+
+static void subtest(int cgroup_fd, struct connect_ping *skel,
+		    int family, int do_bind)
+{
+	struct sockaddr_in sa4 = {
+		.sin_family = AF_INET,
+		.sin_addr.s_addr = htonl(INADDR_LOOPBACK),
+	};
+	struct sockaddr_in6 sa6 = {
+		.sin6_family = AF_INET6,
+		.sin6_addr = IN6ADDR_LOOPBACK_INIT,
+	};
+	struct sockaddr *sa;
+	socklen_t sa_len;
+	int protocol;
+	int sock_fd;
+
+	switch (family) {
+	case AF_INET:
+		sa = (struct sockaddr *)&sa4;
+		sa_len = sizeof(sa4);
+		protocol = IPPROTO_ICMP;
+		break;
+	case AF_INET6:
+		sa = (struct sockaddr *)&sa6;
+		sa_len = sizeof(sa6);
+		protocol = IPPROTO_ICMPV6;
+		break;
+	}
+
+	memset(skel->bss, 0, sizeof(*skel->bss));
+	skel->bss->do_bind = do_bind;
+
+	sock_fd = socket(family, SOCK_DGRAM, protocol);
+	if (!ASSERT_GE(sock_fd, 0, "sock-create"))
+		return;
+
+	if (!ASSERT_OK(connect(sock_fd, sa, sa_len), "connect"))
+		goto close_sock;
+
+	if (!ASSERT_EQ(skel->bss->invocations_v4, family == AF_INET ? 1 : 0,
+		       "invocations_v4"))
+		goto close_sock;
+	if (!ASSERT_EQ(skel->bss->invocations_v6, family == AF_INET6 ? 1 : 0,
+		       "invocations_v6"))
+		goto close_sock;
+	if (!ASSERT_EQ(skel->bss->has_error, 0, "has_error"))
+		goto close_sock;
+
+	if (!ASSERT_OK(getsockname(sock_fd, sa, &sa_len),
+		       "getsockname"))
+		goto close_sock;
+
+	switch (family) {
+	case AF_INET:
+		if (!ASSERT_EQ(sa4.sin_family, family, "sin_family"))
+			goto close_sock;
+		if (!ASSERT_EQ(sa4.sin_addr.s_addr,
+			       htonl(do_bind ? 0x01010101 : INADDR_LOOPBACK),
+			       "sin_addr"))
+			goto close_sock;
+		break;
+	case AF_INET6:
+		if (!ASSERT_EQ(sa6.sin6_family, AF_INET6, "sin6_family"))
+			goto close_sock;
+		if (!ASSERT_EQ(memcmp(&sa6.sin6_addr,
+				      do_bind ? &bindaddr_v6 : &in6addr_loopback,
+				      sizeof(sa6.sin6_addr)),
+			       0, "sin6_addr"))
+			goto close_sock;
+		break;
+	}
+
+close_sock:
+	close(sock_fd);
+}
+
+void test_connect_ping(void)
+{
+	struct connect_ping *skel;
+	int cgroup_fd;
+
+	if (!ASSERT_OK(unshare(CLONE_NEWNET | CLONE_NEWNS), "unshare"))
+		return;
+
+	/* overmount sysfs, and making original sysfs private so overmount
+	 * does not propagate to other mntns.
+	 */
+	if (!ASSERT_OK(mount("none", "/sys", NULL, MS_PRIVATE, NULL),
+		       "remount-private-sys"))
+		return;
+	if (!ASSERT_OK(mount("sysfs", "/sys", "sysfs", 0, NULL),
+		       "mount-sys"))
+		return;
+	if (!ASSERT_OK(mount("bpffs", "/sys/fs/bpf", "bpf", 0, NULL),
+		       "mount-bpf"))
+		goto clean_mount;
+
+	if (!ASSERT_OK(system("ip link set dev lo up"), "lo-up"))
+		goto clean_mount;
+	if (!ASSERT_OK(system("ip addr add 1.1.1.1 dev lo"), "lo-addr-v4"))
+		goto clean_mount;
+	if (!ASSERT_OK(system("ip -6 addr add 2001:db8::1 dev lo"), "lo-addr-v6"))
+		goto clean_mount;
+	if (write_sysctl("/proc/sys/net/ipv4/ping_group_range", "0 0"))
+		goto clean_mount;
+
+	cgroup_fd = test__join_cgroup("/connect_ping");
+	if (!ASSERT_GE(cgroup_fd, 0, "cg-create"))
+		goto clean_mount;
+
+	skel = connect_ping__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "skel-load"))
+		goto close_cgroup;
+	skel->links.connect_v4_prog =
+		bpf_program__attach_cgroup(skel->progs.connect_v4_prog, cgroup_fd);
+	if (!ASSERT_OK_PTR(skel->links.connect_v4_prog, "cg-attach-v4"))
+		goto skel_destroy;
+	skel->links.connect_v6_prog =
+		bpf_program__attach_cgroup(skel->progs.connect_v6_prog, cgroup_fd);
+	if (!ASSERT_OK_PTR(skel->links.connect_v6_prog, "cg-attach-v6"))
+		goto skel_destroy;
+
+	/* Connect a v4 ping socket to localhost, assert that only v4 is called,
+	 * and called exactly once, and that the socket's bound address is
+	 * original loopback address.
+	 */
+	if (test__start_subtest("ipv4"))
+		subtest(cgroup_fd, skel, AF_INET, 0);
+
+	/* Connect a v4 ping socket to localhost, assert that only v4 is called,
+	 * and called exactly once, and that the socket's bound address is
+	 * address we explicitly bound.
+	 */
+	if (test__start_subtest("ipv4-bind"))
+		subtest(cgroup_fd, skel, AF_INET, 1);
+
+	/* Connect a v6 ping socket to localhost, assert that only v6 is called,
+	 * and called exactly once, and that the socket's bound address is
+	 * original loopback address.
+	 */
+	if (test__start_subtest("ipv6"))
+		subtest(cgroup_fd, skel, AF_INET6, 0);
+
+	/* Connect a v6 ping socket to localhost, assert that only v6 is called,
+	 * and called exactly once, and that the socket's bound address is
+	 * address we explicitly bound.
+	 */
+	if (test__start_subtest("ipv6-bind"))
+		subtest(cgroup_fd, skel, AF_INET6, 1);
+
+skel_destroy:
+	connect_ping__destroy(skel);
+
+close_cgroup:
+	close(cgroup_fd);
+
+clean_mount:
+	umount2("/sys", MNT_DETACH);
+}
diff --git a/tools/testing/selftests/bpf/progs/connect_ping.c b/tools/testing/selftests/bpf/progs/connect_ping.c
new file mode 100644
index 000000000000..60178192b672
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/connect_ping.c
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright 2022 Google LLC.
+ */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+#include <netinet/in.h>
+#include <sys/socket.h>
+
+/* 2001:db8::1 */
+#define BINDADDR_V6 { { { 0x20,0x01,0x0d,0xb8,0,0,0,0,0,0,0,0,0,0,0,1 } } }
+
+__u32 do_bind = 0;
+__u32 has_error = 0;
+__u32 invocations_v4 = 0;
+__u32 invocations_v6 = 0;
+
+SEC("cgroup/connect4")
+int connect_v4_prog(struct bpf_sock_addr *ctx)
+{
+	struct sockaddr_in sa = {
+		.sin_family = AF_INET,
+		.sin_addr.s_addr = bpf_htonl(0x01010101),
+	};
+
+	__sync_fetch_and_add(&invocations_v4, 1);
+
+	if (do_bind && bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)))
+		has_error = 1;
+
+	return 1;
+}
+
+SEC("cgroup/connect6")
+int connect_v6_prog(struct bpf_sock_addr *ctx)
+{
+	struct sockaddr_in6 sa = {
+		.sin6_family = AF_INET6,
+		.sin6_addr = BINDADDR_V6,
+	};
+
+	__sync_fetch_and_add(&invocations_v6, 1);
+
+	if (do_bind && bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)))
+		has_error = 1;
+
+	return 1;
+}
+
+char _license[] SEC("license") = "GPL";
-- 
cgit v1.2.3


From e2d75e954c0a277b8fa0ddf666ddd4f9b73195f7 Mon Sep 17 00:00:00 2001
From: Daniel Xu <dxu@dxuuu.xyz>
Date: Wed, 7 Sep 2022 10:40:41 -0600
Subject: selftests/bpf: Add tests for writing to nf_conn:mark

Add a simple extension to the existing selftest to write to
nf_conn:mark. Also add a failure test for writing to unsupported field.

Signed-off-by: Daniel Xu <dxu@dxuuu.xyz>
Link: https://lore.kernel.org/r/f78966b81b9349d2b8ebb4cee2caf15cb6b38ee2.1662568410.git.dxu@dxuuu.xyz
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/prog_tests/bpf_nf.c      |  2 ++
 tools/testing/selftests/bpf/progs/test_bpf_nf.c      |  9 +++++++--
 tools/testing/selftests/bpf/progs/test_bpf_nf_fail.c | 14 ++++++++++++++
 3 files changed, 23 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
index 544bf90ac2a7..ab9117ae7545 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
@@ -17,6 +17,7 @@ struct {
 	{ "set_status_after_insert", "kernel function bpf_ct_set_status args#0 expected pointer to STRUCT nf_conn___init but" },
 	{ "change_timeout_after_alloc", "kernel function bpf_ct_change_timeout args#0 expected pointer to STRUCT nf_conn but" },
 	{ "change_status_after_alloc", "kernel function bpf_ct_change_status args#0 expected pointer to STRUCT nf_conn but" },
+	{ "write_not_allowlisted_field", "no write support to nf_conn at off" },
 };
 
 enum {
@@ -113,6 +114,7 @@ static void test_bpf_nf_ct(int mode)
 	ASSERT_LE(skel->bss->test_delta_timeout, 10, "Test for max ct timeout update");
 	/* expected status is IPS_SEEN_REPLY */
 	ASSERT_EQ(skel->bss->test_status, 2, "Test for ct status update ");
+	ASSERT_EQ(skel->bss->test_insert_lookup_mark, 77, "Test for insert and lookup mark value");
 	ASSERT_EQ(skel->data->test_exist_lookup, 0, "Test existing connection lookup");
 	ASSERT_EQ(skel->bss->test_exist_lookup_mark, 43, "Test existing connection lookup ctmark");
 end:
diff --git a/tools/testing/selftests/bpf/progs/test_bpf_nf.c b/tools/testing/selftests/bpf/progs/test_bpf_nf.c
index 2722441850cc..b5e7079701e8 100644
--- a/tools/testing/selftests/bpf/progs/test_bpf_nf.c
+++ b/tools/testing/selftests/bpf/progs/test_bpf_nf.c
@@ -23,6 +23,7 @@ int test_insert_entry = -EAFNOSUPPORT;
 int test_succ_lookup = -ENOENT;
 u32 test_delta_timeout = 0;
 u32 test_status = 0;
+u32 test_insert_lookup_mark = 0;
 __be32 saddr = 0;
 __be16 sport = 0;
 __be32 daddr = 0;
@@ -144,6 +145,7 @@ nf_ct_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32,
 
 		bpf_ct_set_timeout(ct, 10000);
 		bpf_ct_set_status(ct, IPS_CONFIRMED);
+		ct->mark = 77;
 
 		ct_ins = bpf_ct_insert_entry(ct);
 		if (ct_ins) {
@@ -157,6 +159,7 @@ nf_ct_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32,
 				test_delta_timeout = ct_lk->timeout - bpf_jiffies64();
 				test_delta_timeout /= CONFIG_HZ;
 				test_status = IPS_SEEN_REPLY;
+				test_insert_lookup_mark = ct_lk->mark;
 				bpf_ct_change_status(ct_lk, IPS_SEEN_REPLY);
 				bpf_ct_release(ct_lk);
 				test_succ_lookup = 0;
@@ -175,8 +178,10 @@ nf_ct_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32,
 		       sizeof(opts_def));
 	if (ct) {
 		test_exist_lookup = 0;
-		if (ct->mark == 42)
-			test_exist_lookup_mark = 43;
+		if (ct->mark == 42) {
+			ct->mark++;
+			test_exist_lookup_mark = ct->mark;
+		}
 		bpf_ct_release(ct);
 	} else {
 		test_exist_lookup = opts_def.error;
diff --git a/tools/testing/selftests/bpf/progs/test_bpf_nf_fail.c b/tools/testing/selftests/bpf/progs/test_bpf_nf_fail.c
index bf79af15c808..0e4759ab38ff 100644
--- a/tools/testing/selftests/bpf/progs/test_bpf_nf_fail.c
+++ b/tools/testing/selftests/bpf/progs/test_bpf_nf_fail.c
@@ -69,6 +69,20 @@ int lookup_insert(struct __sk_buff *ctx)
 	return 0;
 }
 
+SEC("?tc")
+int write_not_allowlisted_field(struct __sk_buff *ctx)
+{
+	struct bpf_ct_opts___local opts = {};
+	struct bpf_sock_tuple tup = {};
+	struct nf_conn *ct;
+
+	ct = bpf_skb_ct_lookup(ctx, &tup, sizeof(tup.ipv4), &opts, sizeof(opts));
+	if (!ct)
+		return 0;
+	ct->status = 0xF00;
+	return 0;
+}
+
 SEC("?tc")
 int set_timeout_after_insert(struct __sk_buff *ctx)
 {
-- 
cgit v1.2.3


From f7c946f288e32fd8b5fd69825683420d473672bd Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Thu, 8 Sep 2022 10:06:12 +0200
Subject: selftests/bpf: fix ct status check in bpf_nf selftests

Check properly the connection tracking entry status configured running
bpf_ct_change_status kfunc.
Remove unnecessary IPS_CONFIRMED status configuration since it is
already done during entry allocation.

Fixes: 6eb7fba007a7 ("selftests/bpf: Add tests for new nf_conntrack kfuncs")
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Acked-by: Song Liu <song@kernel.org>
Link: https://lore.kernel.org/r/813a5161a71911378dfac8770ec890428e4998aa.1662623574.git.lorenzo@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/prog_tests/bpf_nf.c | 5 +++--
 tools/testing/selftests/bpf/progs/test_bpf_nf.c | 7 ++++---
 2 files changed, 7 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
index ab9117ae7545..0677a51694c9 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <test_progs.h>
 #include <network_helpers.h>
+#include <linux/netfilter/nf_conntrack_common.h>
 #include "test_bpf_nf.skel.h"
 #include "test_bpf_nf_fail.skel.h"
 
@@ -112,9 +113,9 @@ static void test_bpf_nf_ct(int mode)
 	/* allow some tolerance for test_delta_timeout value to avoid races. */
 	ASSERT_GT(skel->bss->test_delta_timeout, 8, "Test for min ct timeout update");
 	ASSERT_LE(skel->bss->test_delta_timeout, 10, "Test for max ct timeout update");
-	/* expected status is IPS_SEEN_REPLY */
-	ASSERT_EQ(skel->bss->test_status, 2, "Test for ct status update ");
 	ASSERT_EQ(skel->bss->test_insert_lookup_mark, 77, "Test for insert and lookup mark value");
+	ASSERT_EQ(skel->bss->test_status, IPS_CONFIRMED | IPS_SEEN_REPLY,
+		  "Test for ct status update ");
 	ASSERT_EQ(skel->data->test_exist_lookup, 0, "Test existing connection lookup");
 	ASSERT_EQ(skel->bss->test_exist_lookup_mark, 43, "Test existing connection lookup ctmark");
 end:
diff --git a/tools/testing/selftests/bpf/progs/test_bpf_nf.c b/tools/testing/selftests/bpf/progs/test_bpf_nf.c
index b5e7079701e8..88842da86ddc 100644
--- a/tools/testing/selftests/bpf/progs/test_bpf_nf.c
+++ b/tools/testing/selftests/bpf/progs/test_bpf_nf.c
@@ -144,7 +144,6 @@ nf_ct_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32,
 		struct nf_conn *ct_ins;
 
 		bpf_ct_set_timeout(ct, 10000);
-		bpf_ct_set_status(ct, IPS_CONFIRMED);
 		ct->mark = 77;
 
 		ct_ins = bpf_ct_insert_entry(ct);
@@ -158,9 +157,11 @@ nf_ct_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32,
 				bpf_ct_change_timeout(ct_lk, 10000);
 				test_delta_timeout = ct_lk->timeout - bpf_jiffies64();
 				test_delta_timeout /= CONFIG_HZ;
-				test_status = IPS_SEEN_REPLY;
 				test_insert_lookup_mark = ct_lk->mark;
-				bpf_ct_change_status(ct_lk, IPS_SEEN_REPLY);
+				bpf_ct_change_status(ct_lk,
+						     IPS_CONFIRMED | IPS_SEEN_REPLY);
+				test_status = ct_lk->status;
+
 				bpf_ct_release(ct_lk);
 				test_succ_lookup = 0;
 			}
-- 
cgit v1.2.3


From a1c3bdd9c5dfcba3b4550a28890724f519434e15 Mon Sep 17 00:00:00 2001
From: Geliang Tang <geliang.tang@suse.com>
Date: Tue, 6 Sep 2022 22:55:40 +0200
Subject: selftests: mptcp: move prefix tests of addr_nr_ns2 together

Move the fullmesh prefix test of addr_nr_ns2 together with its other
prefix tests.

Signed-off-by: Geliang Tang <geliang.tang@suse.com>
Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 tools/testing/selftests/net/mptcp/mptcp_join.sh | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index ff83ef426df5..2957fe414639 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -706,6 +706,7 @@ do_transfer()
 		addr_nr_ns1=${addr_nr_ns1:10}
 	fi
 
+	local flags="subflow"
 	if [[ "${addr_nr_ns2}" = "fastclose_"* ]]; then
 		# disconnect
 		extra_args="$extra_args -I ${addr_nr_ns2:10}"
@@ -713,6 +714,9 @@ do_transfer()
 	elif [[ "${addr_nr_ns2}" = "userspace_"* ]]; then
 		userspace_pm=1
 		addr_nr_ns2=${addr_nr_ns2:10}
+	elif [[ "${addr_nr_ns2}" = "fullmesh_"* ]]; then
+		flags="${flags},fullmesh"
+		addr_nr_ns2=${addr_nr_ns2:9}
 	fi
 
 	if [ $userspace_pm -eq 1 ]; then
@@ -832,12 +836,6 @@ do_transfer()
 		fi
 	fi
 
-	local flags="subflow"
-	if [[ "${addr_nr_ns2}" = "fullmesh_"* ]]; then
-		flags="${flags},fullmesh"
-		addr_nr_ns2=${addr_nr_ns2:9}
-	fi
-
 	# if newly added endpoints must be deleted, give the background msk
 	# some time to created them
 	[ $addr_nr_ns1 -gt 0 ] && [ $addr_nr_ns2 -lt 0 ] && sleep 1
-- 
cgit v1.2.3


From 77cba3879f1bc9259b3683ea5862b5ca5ff98f43 Mon Sep 17 00:00:00 2001
From: Zhengchao Shao <shaozhengchao@huawei.com>
Date: Fri, 9 Sep 2022 09:29:29 +0800
Subject: selftests/tc-testings: add selftests for ctinfo action

Test c826: Add ctinfo action with default setting
Test 0286: Add ctinfo action with dscp
Test 4938: Add ctinfo action with valid cpmark and zone
Test 7593: Add ctinfo action with drop control
Test 2961: Replace ctinfo action zone and action control
Test e567: Delete ctinfo action with valid index
Test 6a91: Delete ctinfo action with invalid index
Test 5232: List ctinfo actions
Test 7702: Flush ctinfo actions
Test 3201: Add ctinfo action with duplicate index
Test 8295: Add ctinfo action with invalid index
Test 3964: Replace ctinfo action with invalid goto_chain control

Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../tc-testing/tc-tests/actions/ctinfo.json        | 316 +++++++++++++++++++++
 1 file changed, 316 insertions(+)
 create mode 100644 tools/testing/selftests/tc-testing/tc-tests/actions/ctinfo.json

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/ctinfo.json b/tools/testing/selftests/tc-testing/tc-tests/actions/ctinfo.json
new file mode 100644
index 000000000000..d9710c067eb7
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/ctinfo.json
@@ -0,0 +1,316 @@
+[
+    {
+        "id": "c826",
+        "name": "Add ctinfo action with default setting",
+        "category": [
+            "actions",
+            "ctinfo"
+        ],
+        "setup": [
+            [
+                "$TC action flush action ctinfo",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC action add action ctinfo index 10",
+        "expExitCode": "0",
+        "verifyCmd": "$TC action get action ctinfo index 10",
+        "matchPattern": "action order [0-9]*: ctinfo zone 0 pipe.*index 10 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC action flush action ctinfo"
+        ]
+    },
+    {
+        "id": "0286",
+        "name": "Add ctinfo action with dscp",
+        "category": [
+            "actions",
+            "ctinfo"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ctinfo",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC action add action ctinfo dscp 0xfc000000 0x01000000 index 100",
+        "expExitCode": "0",
+        "verifyCmd": "$TC action ls action ctinfo",
+        "matchPattern": "action order [0-9]*: ctinfo zone 0 pipe.*index 100 ref.*dscp 0xfc000000 0x01000000",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ctinfo"
+        ]
+    },
+    {
+        "id": "4938",
+        "name": "Add ctinfo action with valid cpmark and zone",
+        "category": [
+            "actions",
+            "ctinfo"
+        ],
+        "setup": [
+            [
+                "$TC action flush action ctinfo",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC action add action ctinfo cpmark 0x01000000 zone 1 index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC action get action ctinfo index 1",
+        "matchPattern": "action order [0-9]*: ctinfo zone 1 pipe.*index 1 ref.*cpmark 0x01000000",
+        "matchCount": "1",
+        "teardown": [
+            "$TC action flush action ctinfo"
+        ]
+    },
+    {
+        "id": "7593",
+        "name": "Add ctinfo action with drop control",
+        "category": [
+            "actions",
+            "ctinfo"
+        ],
+        "setup": [
+            [
+                "$TC action flush action ctinfo",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC action add action ctinfo drop index 1000",
+        "expExitCode": "0",
+        "verifyCmd": "$TC action ls action ctinfo",
+        "matchPattern": "action order [0-9]*: ctinfo zone 0 drop.*index 1000 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC action flush action ctinfo"
+        ]
+    },
+    {
+        "id": "2961",
+        "name": "Replace ctinfo action zone and action control",
+        "category": [
+            "actions",
+            "ctinfo"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ctinfo",
+                0,
+                1,
+                255
+            ],
+            [
+                "$TC action add action ctinfo zone 1 drop index 1",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC action replace action ctinfo zone 200 pass index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC action get action ctinfo index 1",
+        "matchPattern": "action order [0-9]*: ctinfo zone 200 pass.*index 1 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC action flush action ctinfo"
+        ]
+    },
+    {
+        "id": "e567",
+        "name": "Delete ctinfo action with valid index",
+        "category": [
+            "actions",
+            "ctinfo"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ctinfo",
+                0,
+                1,
+                255
+            ],
+            [
+                "$TC action add action ctinfo zone 200 pass index 1",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC action delete action ctinfo index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC action get action ctinfo index 1",
+        "matchPattern": "action order [0-9]*: ctinfo zone 200 pass.*index 1 ref",
+        "matchCount": "0",
+        "teardown": [
+            "$TC action flush action ctinfo"
+        ]
+    },
+    {
+        "id": "6a91",
+        "name": "Delete ctinfo action with invalid index",
+        "category": [
+            "actions",
+            "ctinfo"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ctinfo",
+                0,
+                1,
+                255
+            ],
+            [
+                "$TC action add action ctinfo zone 200 pass index 1",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC action delete action ctinfo index 333",
+        "expExitCode": "255",
+        "verifyCmd": "$TC action get action ctinfo index 1",
+        "matchPattern": "action order [0-9]*: ctinfo zone 200 pass.*index 1 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC action flush action ctinfo"
+        ]
+    },
+    {
+        "id": "5232",
+        "name": "List ctinfo actions",
+        "category": [
+            "actions",
+            "ctinfo"
+        ],
+        "setup": [
+            [
+                "$TC action flush action ctinfo",
+                0,
+                1,
+                255
+            ],
+            "$TC action add action ctinfo zone 20 pass index 101",
+            "$TC action add action ctinfo cpmark 0x02000000 drop index 102",
+            "$TC action add action ctinfo continue index 103"
+        ],
+        "cmdUnderTest": "$TC action list action ctinfo",
+        "expExitCode": "0",
+        "verifyCmd": "$TC action list action ctinfo",
+        "matchPattern": "action order [0-9]*: ctinfo",
+        "matchCount": "3",
+        "teardown": [
+            "$TC actions flush action ctinfo"
+        ]
+    },
+    {
+        "id": "7702",
+        "name": "Flush ctinfo actions",
+        "category": [
+            "actions",
+            "ctinfo"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ctinfo",
+                0,
+                1,
+                255
+            ],
+	    "$TC action add action ctinfo zone 20 pass index 101",
+            "$TC action add action ctinfo cpmark 0x02000000 drop index 102",
+            "$TC action add action ctinfo continue index 103"
+        ],
+        "cmdUnderTest": "$TC action flush action ctinfo",
+        "expExitCode": "0",
+        "verifyCmd": "$TC action list action ctinfo",
+        "matchPattern": "action order [0-9]*: ctinfo",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action ctinfo"
+        ]
+    },
+    {
+        "id": "3201",
+        "name": "Add ctinfo action with duplicate index",
+        "category": [
+            "actions",
+            "ctinfo"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ctinfo",
+                0,
+                1,
+                255
+            ],
+            "$TC action add action ctinfo zone 20 pass index 101"
+        ],
+        "cmdUnderTest": "$TC action add action ctinfo cpmark 0x02000000 drop index 101",
+        "expExitCode": "255",
+        "verifyCmd": "$TC action get action ctinfo index 101",
+        "matchPattern": "action order [0-9]*: ctinfo zone 20 pass.*index 101",
+        "matchCount": "1",
+        "teardown": [
+            "$TC action flush action ctinfo"
+        ]
+    },
+    {
+        "id": "8295",
+        "name": "Add ctinfo action with invalid index",
+        "category": [
+            "actions",
+            "ctinfo"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ctinfo",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC action add action ctinfo zone 20 index 4294967296",
+        "expExitCode": "255",
+        "verifyCmd": "$TC action ls action ctinfo",
+        "matchPattern": "action order [0-9]*: ctinfo",
+        "matchCount": "0",
+        "teardown": [
+            "$TC action flush action ctinfo"
+        ]
+    },
+    {
+        "id": "3964",
+        "name": "Replace ctinfo action with invalid goto_chain control",
+        "category": [
+            "actions",
+            "ctinfo"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ctinfo",
+                0,
+                1,
+                255
+            ],
+            "$TC action add action ctinfo pass index 90"
+        ],
+        "cmdUnderTest": "$TC action replace action ctinfo goto chain 42 index 90",
+        "expExitCode": "255",
+        "verifyCmd": "$TC action list action ctinfo",
+        "matchPattern": "action order [0-9]*: ctinfo.*pass.*index 90",
+        "matchCount": "1",
+        "teardown": [
+            "$TC action flush action ctinfo"
+        ]
+    }
+]
-- 
cgit v1.2.3


From 4a1db5251cfac32290fa006afb7415222b8ff8f3 Mon Sep 17 00:00:00 2001
From: Zhengchao Shao <shaozhengchao@huawei.com>
Date: Fri, 9 Sep 2022 09:29:30 +0800
Subject: selftests/tc-testings: add selftests for gate action

Test 5153: Add gate action with priority and sched-entry
Test 7189: Add gate action with base-time
Test a721: Add gate action with cycle-time
Test c029: Add gate action with cycle-time-ext
Test 3719: Replace gate base-time action
Test d821: Delete gate action with valid index
Test 3128: Delete gate action with invalid index
Test 7837: List gate actions
Test 9273: Flush gate actions
Test c829: Add gate action with duplicate index
Test 3043: Add gate action with invalid index
Test 2930: Add gate action with cookie

Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../tc-testing/tc-tests/actions/gate.json          | 315 +++++++++++++++++++++
 1 file changed, 315 insertions(+)
 create mode 100644 tools/testing/selftests/tc-testing/tc-tests/actions/gate.json

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/gate.json b/tools/testing/selftests/tc-testing/tc-tests/actions/gate.json
new file mode 100644
index 000000000000..e16a4963fdd2
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/gate.json
@@ -0,0 +1,315 @@
+[
+    {
+        "id": "5153",
+        "name": "Add gate action with priority and sched-entry",
+        "category": [
+            "actions",
+            "gate"
+        ],
+        "setup": [
+            [
+                "$TC action flush action gate",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC action add action gate priority 1 sched-entry close 100000000ns index 100",
+        "expExitCode": "0",
+        "verifyCmd": "$TC action get action gate index 100",
+        "matchPattern": "action order [0-9]*: .*priority 1.*index 100 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC action flush action gate"
+        ]
+    },
+    {
+        "id": "7189",
+        "name": "Add gate action with base-time",
+        "category": [
+            "actions",
+            "gate"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action gate",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC action add action gate base-time 200000000000ns sched-entry close 100000000ns index 10",
+        "expExitCode": "0",
+        "verifyCmd": "$TC action ls action gate",
+        "matchPattern": "action order [0-9]*: .*base-time 200s.*index 10 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action gate"
+        ]
+    },
+    {
+        "id": "a721",
+        "name": "Add gate action with cycle-time",
+        "category": [
+            "actions",
+            "gate"
+        ],
+        "setup": [
+            [
+                "$TC action flush action gate",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC action add action gate cycle-time 200000000000ns sched-entry close 100000000ns index 1000",
+        "expExitCode": "0",
+        "verifyCmd": "$TC action ls action gate",
+        "matchPattern": "action order [0-9]*: .*cycle-time 200s.*index 1000 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC action flush action gate"
+        ]
+    },
+    {
+        "id": "c029",
+        "name": "Add gate action with cycle-time-ext",
+        "category": [
+            "actions",
+            "gate"
+        ],
+        "setup": [
+            [
+                "$TC action flush action gate",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC action add action gate cycle-time-ext 20000000000ns sched-entry close 100000000ns index 1000",
+        "expExitCode": "0",
+        "verifyCmd": "$TC action get action gate index 1000",
+        "matchPattern": "action order [0-9]*: .*cycle-time-ext 20s.*index 1000 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC action flush action gate"
+        ]
+    },
+    {
+        "id": "3719",
+        "name": "Replace gate base-time action",
+        "category": [
+            "actions",
+            "gate"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action gate",
+                0,
+                1,
+                255
+            ],
+            [
+                "$TC action add action gate base-time 200000000000ns sched-entry open 200000000ns -1 8000000b index 20",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC action replace action gate base-time 400000000000ns index 20",
+        "expExitCode": "0",
+        "verifyCmd": "$TC action get action gate index 20",
+        "matchPattern": "action order [0-9]*: .*base-time 400s.*index 20 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC action flush action gate"
+        ]
+    },
+    {
+        "id": "d821",
+        "name": "Delete gate action with valid index",
+        "category": [
+            "actions",
+            "gate"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action gate",
+                0,
+                1,
+                255
+            ],
+            [
+                "$TC action add action gate base-time 200000000000ns sched-entry open 200000000ns -1 8000000b index 302",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC action delete action gate index 302",
+        "expExitCode": "0",
+        "verifyCmd": "$TC action get action bpf index 302",
+        "matchPattern": "action order [0-9]*: .*base-time 200s.*index 302 ref",
+        "matchCount": "0",
+        "teardown": [
+            "$TC action flush action gate"
+        ]
+    },
+    {
+        "id": "3128",
+        "name": "Delete gate action with invalid index",
+        "category": [
+            "actions",
+            "gate"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action gate",
+                0,
+                1,
+                255
+            ],
+            [
+                "$TC action add action gate base-time 600000000000ns sched-entry open 200000000ns -1 8000000b index 999",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC action delete action gate index 333",
+        "expExitCode": "255",
+        "verifyCmd": "$TC action get action gate index 999",
+        "matchPattern": "action order [0-9]*: .*base-time 600s.*index 999 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC action flush action gate"
+        ]
+    },
+    {
+        "id": "7837",
+        "name": "List gate actions",
+        "category": [
+            "actions",
+            "gate"
+        ],
+        "setup": [
+            [
+                "$TC action flush action gate",
+                0,
+                1,
+                255
+            ],
+            "$TC action add action gate base-time 600000000000ns sched-entry open 200000000ns -1 8000000b index 101",
+            "$TC action add action gate cycle-time 600000000000ns sched-entry open 600000000ns -1 8000000b index 102",
+            "$TC action add action gate cycle-time-ext 400000000000ns sched-entry close 100000000ns index 103"
+        ],
+        "cmdUnderTest": "$TC action list action gate",
+        "expExitCode": "0",
+        "verifyCmd": "$TC action list action gate",
+        "matchPattern": "action order [0-9]*:",
+        "matchCount": "3",
+        "teardown": [
+            "$TC actions flush action gate"
+        ]
+    },
+    {
+        "id": "9273",
+        "name": "Flush gate actions",
+        "category": [
+            "actions",
+            "gate"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action gate",
+                0,
+                1,
+                255
+            ],
+            "$TC action add action gate base-time 600000000000ns sched-entry open 200000000ns -1 8000000b index 101",
+            "$TC action add action gate cycle-time 600000000000ns sched-entry open 600000000ns -1 8000000b index 102",
+            "$TC action add action gate cycle-time-ext 400000000000ns sched-entry close 100000000ns index 103"
+	],
+        "cmdUnderTest": "$TC action flush action gate",
+        "expExitCode": "0",
+        "verifyCmd": "$TC action list action gate",
+        "matchPattern": "action order [0-9]*: .*priority",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action gate"
+        ]
+    },
+    {
+        "id": "c829",
+        "name": "Add gate action with duplicate index",
+        "category": [
+            "actions",
+            "gate"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action gate",
+                0,
+                1,
+                255
+            ],
+            "$TC action add action gate cycle-time 600000000000ns sched-entry open 600000000ns -1 8000000b index 4294967295"
+        ],
+        "cmdUnderTest": "$TC action add action gate cycle-time 600000000000ns sched-entry open 600000000ns -1 8000000b index 4294967295",
+        "expExitCode": "255",
+        "verifyCmd": "$TC action get action gate index 4294967295",
+        "matchPattern": "action order [0-9]*: .*index 4294967295",
+        "matchCount": "1",
+        "teardown": [
+            "$TC action flush action gate"
+        ]
+    },
+    {
+        "id": "3043",
+        "name": "Add gate action with invalid index",
+        "category": [
+            "actions",
+            "gate"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action gate",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC action add action gate cycle-time-ext 400000000000ns sched-entry close 100000000ns index 4294967296",
+        "expExitCode": "255",
+        "verifyCmd": "$TC action ls action gate",
+        "matchPattern": "action order [0-9]*:",
+        "matchCount": "0",
+        "teardown": [
+            "$TC action flush action gate"
+        ]
+    },
+    {
+        "id": "2930",
+        "name": "Add gate action with cookie",
+        "category": [
+            "actions",
+            "gate"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action gate",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC action add action gate cycle-time-ext 400000000000ns sched-entry close 100000000ns index 4294 cookie d0d0d0d0d0d0d0d0",
+        "expExitCode": "0",
+        "verifyCmd": "$TC action list action gate",
+        "matchPattern": "action order [0-9]*: .*cookie d0d0d0d0d0d0d0",
+        "matchCount": "1",
+        "teardown": [
+            "$TC action flush action gate"
+        ]
+    }
+]
-- 
cgit v1.2.3


From 910d504bc1879f2f89ac54b288924b0af1664287 Mon Sep 17 00:00:00 2001
From: Zhengchao Shao <shaozhengchao@huawei.com>
Date: Fri, 9 Sep 2022 09:29:31 +0800
Subject: selftests/tc-testings: add selftests for xt action

Test 2029: Add xt action with log-prefix
Test 3562: Replace xt action log-prefix
Test 8291: Delete xt action with valid index
Test 5169: Delete xt action with invalid index
Test 7284: List xt actions
Test 5010: Flush xt actions
Test 8437: Add xt action with duplicate index
Test 2837: Add xt action with invalid index

Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../selftests/tc-testing/tc-tests/actions/xt.json  | 219 +++++++++++++++++++++
 1 file changed, 219 insertions(+)
 create mode 100644 tools/testing/selftests/tc-testing/tc-tests/actions/xt.json

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/xt.json b/tools/testing/selftests/tc-testing/tc-tests/actions/xt.json
new file mode 100644
index 000000000000..c9f002aea6d4
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/xt.json
@@ -0,0 +1,219 @@
+[
+    {
+        "id": "2029",
+        "name": "Add xt action with log-prefix",
+        "category": [
+            "actions",
+            "xt"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action xt",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC action add action xt -j LOG --log-prefix PONG index 100",
+        "expExitCode": "0",
+        "verifyCmd": "$TC action ls action xt",
+        "matchPattern": "action order [0-9]*:.*target  LOG level warning prefix \"PONG\".*index 100 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action xt"
+        ]
+    },
+    {
+        "id": "3562",
+        "name": "Replace xt action log-prefix",
+        "category": [
+            "actions",
+            "xt"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action xt",
+                0,
+                1,
+                255
+            ],
+            [
+                "$TC action add action xt -j LOG --log-prefix PONG index 1",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC action replace action xt -j LOG --log-prefix WIN index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC action get action xt index 1",
+        "matchPattern": "action order [0-9]*:.*target  LOG level warning prefix \"WIN\".*index 1 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC action flush action xt"
+        ]
+    },
+    {
+        "id": "8291",
+        "name": "Delete xt action with valid index",
+        "category": [
+            "actions",
+            "xt"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action xt",
+                0,
+                1,
+                255
+            ],
+            [
+                "$TC action add action xt -j LOG --log-prefix PONG index 1000",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC action delete action xt index 1000",
+        "expExitCode": "0",
+        "verifyCmd": "$TC action get action xt index 1000",
+        "matchPattern": "action order [0-9]*:.*target  LOG level warning prefix \"PONG\".*index 1000 ref",
+        "matchCount": "0",
+        "teardown": [
+            "$TC action flush action xt"
+        ]
+    },
+    {
+        "id": "5169",
+        "name": "Delete xt action with invalid index",
+        "category": [
+            "actions",
+            "xt"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action xt",
+                0,
+                1,
+                255
+            ],
+            [
+                "$TC action add action xt -j LOG --log-prefix PONG index 1000",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC action delete action xt index 333",
+        "expExitCode": "255",
+        "verifyCmd": "$TC action get action xt index 1000",
+        "matchPattern": "action order [0-9]*:.*target  LOG level warning prefix \"PONG\".*index 1000 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC action flush action xt"
+        ]
+    },
+    {
+        "id": "7284",
+        "name": "List xt actions",
+        "category": [
+            "actions",
+            "xt"
+        ],
+        "setup": [
+            [
+                "$TC action flush action xt",
+                0,
+                1,
+                255
+            ],
+            "$TC action add action xt -j LOG --log-prefix PONG index 1001",
+            "$TC action add action xt -j LOG --log-prefix WIN index 1002",
+            "$TC action add action xt -j LOG --log-prefix LOSE index 1003"
+        ],
+        "cmdUnderTest": "$TC action list action xt",
+        "expExitCode": "0",
+        "verifyCmd": "$TC action list action xt",
+        "matchPattern": "action order [0-9]*: tablename:",
+        "matchCount": "3",
+        "teardown": [
+            "$TC actions flush action xt"
+        ]
+    },
+    {
+        "id": "5010",
+        "name": "Flush xt actions",
+        "category": [
+            "actions",
+            "xt"
+        ],
+        "setup": [
+            [
+		"$TC actions flush action xt",
+                0,
+                1,
+                255
+            ],
+            "$TC action add action xt -j LOG --log-prefix PONG index 1001",
+            "$TC action add action xt -j LOG --log-prefix WIN index 1002",
+            "$TC action add action xt -j LOG --log-prefix LOSE index 1003"
+	],
+        "cmdUnderTest": "$TC action flush action xt",
+        "expExitCode": "0",
+        "verifyCmd": "$TC action list action xt",
+        "matchPattern": "action order [0-9]*: tablename:",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action xt"
+        ]
+    },
+    {
+        "id": "8437",
+        "name": "Add xt action with duplicate index",
+        "category": [
+            "actions",
+            "xt"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action xt",
+                0,
+                1,
+                255
+            ],
+            "$TC action add action xt -j LOG --log-prefix PONG index 101"
+        ],
+        "cmdUnderTest": "$TC action add action xt -j LOG --log-prefix WIN index 101",
+        "expExitCode": "255",
+        "verifyCmd": "$TC action get action xt index 101",
+        "matchPattern": "action order [0-9]*:.*target  LOG level warning prefix \"PONG\".*index 101",
+        "matchCount": "1",
+        "teardown": [
+            "$TC action flush action xt"
+        ]
+    },
+    {
+        "id": "2837",
+        "name": "Add xt action with invalid index",
+        "category": [
+            "actions",
+            "xt"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action xt",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC action add action xt -j LOG --log-prefix WIN index 4294967296",
+        "expExitCode": "255",
+        "verifyCmd": "$TC action ls action xt",
+        "matchPattern": "action order [0-9]*:*target  LOG level warning prefix \"WIN\"",
+        "matchCount": "0",
+        "teardown": [
+            "$TC action flush action xt"
+        ]
+    }
+]
-- 
cgit v1.2.3


From 0fc8674663f6327fdeddc40f9f1abe26726341ab Mon Sep 17 00:00:00 2001
From: Zhengchao Shao <shaozhengchao@huawei.com>
Date: Fri, 9 Sep 2022 09:29:32 +0800
Subject: selftests/tc-testings: add connmark action deleting test case

Test 6571: Delete connmark action with valid index
Test 3426: Delete connmark action with invalid index

Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../tc-testing/tc-tests/actions/connmark.json      | 50 ++++++++++++++++++++++
 1 file changed, 50 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/connmark.json b/tools/testing/selftests/tc-testing/tc-tests/actions/connmark.json
index cadde8f41fcd..0de2f79ea329 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/connmark.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/connmark.json
@@ -312,5 +312,55 @@
         "teardown": [
             "$TC actions flush action connmark"
         ]
+    },
+    {
+        "id": "6571",
+        "name": "Delete connmark action with valid index",
+        "category": [
+            "actions",
+            "connmark"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action connmark",
+                0,
+                1,
+                255
+            ],
+	    "$TC actions add action connmark pass index 20"
+        ],
+        "cmdUnderTest": "$TC actions del action connmark index 20",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action connmark index 20",
+        "matchPattern": "action order [0-9]+: connmark zone 0 pass.*index 20 ref",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action connmark"
+        ]
+    },
+    {
+        "id": "3426",
+        "name": "Delete connmark action with invalid index",
+        "category": [
+            "actions",
+            "connmark"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action connmark",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action connmark pass index 20"
+        ],
+        "cmdUnderTest": "$TC actions del action connmark index 1",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action connmark index 20",
+        "matchPattern": "action order [0-9]+: connmark zone 0 pass.*index 20 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action connmark"
+        ]
     }
 ]
-- 
cgit v1.2.3


From af649e7a6a53fe5f2e21d930d1d1cc3b19310f11 Mon Sep 17 00:00:00 2001
From: Zhengchao Shao <shaozhengchao@huawei.com>
Date: Fri, 9 Sep 2022 09:29:33 +0800
Subject: selftests/tc-testings: add ife action deleting test case

Test a972: Delete ife encode action with valid index
Test 1272: Delete ife encode action with invalid index

Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../selftests/tc-testing/tc-tests/actions/ife.json | 50 ++++++++++++++++++++++
 1 file changed, 50 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json b/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json
index c13a68b98fc7..459bcad35810 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json
@@ -1085,5 +1085,55 @@
         "teardown": [
             "$TC actions flush action ife"
         ]
+    },
+    {
+        "id": "a972",
+        "name": "Delete ife encode action with valid index",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ],
+	    "$TC actions add action ife encode allow mark pass index 20"
+        ],
+        "cmdUnderTest": "$TC actions del action ife index 20",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions ls action ife index 20",
+        "matchPattern": "action order [0-9]*: ife encode action pass.*type 0[xX]ED3E.*allow mark.*index 20",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action ife"
+        ]
+    },
+    {
+        "id": "1272",
+        "name": "Delete ife encode action with invalid index",
+        "category": [
+            "actions",
+            "ife"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action ife",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action ife encode allow mark pass index 20"
+        ],
+        "cmdUnderTest": "$TC actions del action ife index 10",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions ls action ife index 20",
+        "matchPattern": "action order [0-9]*: ife encode action pass.*type 0[xX]ED3E.*allow mark.*index 20",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action ife"
+        ]
     }
 ]
-- 
cgit v1.2.3


From 043b16435f3d9b32e26df528c0528052b053869e Mon Sep 17 00:00:00 2001
From: Zhengchao Shao <shaozhengchao@huawei.com>
Date: Fri, 9 Sep 2022 09:29:34 +0800
Subject: selftests/tc-testings: add nat action deleting test case

Test b811: Delete nat action with valid index
Test a521: Delete nat action with invalid index

Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../selftests/tc-testing/tc-tests/actions/nat.json | 50 ++++++++++++++++++++++
 1 file changed, 50 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/nat.json b/tools/testing/selftests/tc-testing/tc-tests/actions/nat.json
index bc12c1ccad30..0a3c491edbc5 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/nat.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/nat.json
@@ -614,5 +614,55 @@
         "teardown": [
             "$TC actions flush action nat"
         ]
+    },
+    {
+        "id": "b811",
+        "name": "Delete nat action with valid index",
+        "category": [
+            "actions",
+            "nat"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action nat",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action nat ingress 1.1.1.1 2.2.2.2 drop index 20"
+        ],
+        "cmdUnderTest": "$TC actions del action nat index 20",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions ls action nat index 20",
+        "matchPattern": "action order [0-9]+:  nat ingress 1.1.1.1/32 2.2.2.2 drop.*index 20 ref",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action nat"
+        ]
+    },
+    {
+        "id": "a521",
+        "name": "Delete nat action with invalid index",
+        "category": [
+            "actions",
+            "nat"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action nat",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action nat ingress 1.1.1.1 2.2.2.2 drop index 20"
+        ],
+        "cmdUnderTest": "$TC actions del action nat index 10",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions ls action nat index 20",
+        "matchPattern": "action order [0-9]+:  nat ingress 1.1.1.1/32 2.2.2.2 drop.*index 20 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action nat"
+        ]
     }
 ]
-- 
cgit v1.2.3


From a32a4fa447f58edda2ee2eb1be9ce1260cfd6a40 Mon Sep 17 00:00:00 2001
From: Zhengchao Shao <shaozhengchao@huawei.com>
Date: Fri, 9 Sep 2022 09:29:35 +0800
Subject: selftests/tc-testings: add sample action deleting test case

Test 3872: Delete sample action with valid index
Test a394: Delete sample action with invalid index

Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../tc-testing/tc-tests/actions/sample.json        | 50 ++++++++++++++++++++++
 1 file changed, 50 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/sample.json b/tools/testing/selftests/tc-testing/tc-tests/actions/sample.json
index ddabb160a11b..148d8bcb8606 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/sample.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/sample.json
@@ -633,5 +633,55 @@
         "teardown": [
             "$TC actions flush action sample"
         ]
+    },
+    {
+        "id": "3872",
+        "name": "Delete sample action with valid index",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ],
+	    "$TC actions add action sample rate 10 group 1 index 20"
+        ],
+        "cmdUnderTest": "$TC actions del action sample index 20",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions get action sample index 20",
+        "matchPattern": "action order [0-9]+: sample rate 1/10 group 1.*index 20 ref",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action sample"
+        ]
+    },
+    {
+        "id": "a394",
+        "name": "Delete sample action with invalid index",
+        "category": [
+            "actions",
+            "sample"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action sample",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action sample rate 10 group 1 index 20"
+        ],
+        "cmdUnderTest": "$TC actions del action sample index 10",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions get action sample index 20",
+        "matchPattern": "action order [0-9]+: sample rate 1/10 group 1.*index 20 ref",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action sample"
+        ]
     }
 ]
-- 
cgit v1.2.3


From eed791d3ca9514656421d8b657031d80433eeda4 Mon Sep 17 00:00:00 2001
From: Zhengchao Shao <shaozhengchao@huawei.com>
Date: Fri, 9 Sep 2022 09:29:36 +0800
Subject: selftests/tc-testings: add tunnel_key action deleting test case

Test 3671: Delete tunnel_key set action with valid index
Test 8597: Delete tunnel_key set action with invalid index

Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../tc-testing/tc-tests/actions/tunnel_key.json    | 50 ++++++++++++++++++++++
 1 file changed, 50 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json b/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json
index d06346968bcb..b40ee602918a 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json
@@ -933,5 +933,55 @@
         "teardown": [
             "$TC actions flush action tunnel_key"
         ]
+    },
+    {
+        "id": "3671",
+        "name": "Delete tunnel_key set action with valid index",
+	"category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ],
+	    "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 index 1"
+        ],
+        "cmdUnderTest": "$TC actions del action tunnel_key index 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action tunnel_key",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*index 1",
+        "matchCount": "0",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
+    },
+    {
+        "id": "8597",
+        "name": "Delete tunnel_key set action with invalid index",
+        "category": [
+            "actions",
+            "tunnel_key"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action tunnel_key",
+                0,
+                1,
+                255
+            ],
+            "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 index 1"
+        ],
+        "cmdUnderTest": "$TC actions del action tunnel_key index 10",
+        "expExitCode": "255",
+        "verifyCmd": "$TC actions list action tunnel_key",
+        "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*index 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action tunnel_key"
+        ]
     }
 ]
-- 
cgit v1.2.3


From 9440155ccb948f8e3ce5308907a2e7378799be60 Mon Sep 17 00:00:00 2001
From: "Peter Zijlstra (Intel)" <peterz@infradead.org>
Date: Sat, 3 Sep 2022 15:11:53 +0200
Subject: ftrace: Add HAVE_DYNAMIC_FTRACE_NO_PATCHABLE

x86 will shortly start using -fpatchable-function-entry for purposes
other than ftrace, make sure the __patchable_function_entry section
isn't merged in the mcount_loc section.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20220903131154.420467-2-jolsa@kernel.org
---
 include/asm-generic/vmlinux.lds.h | 11 ++++++++++-
 kernel/trace/Kconfig              |  6 ++++++
 tools/objtool/check.c             |  3 ++-
 3 files changed, 18 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index 7515a465ec03..13b197ef0d63 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -154,6 +154,14 @@
 #define MEM_DISCARD(sec) *(.mem##sec)
 #endif
 
+#ifndef CONFIG_HAVE_DYNAMIC_FTRACE_NO_PATCHABLE
+#define KEEP_PATCHABLE		KEEP(*(__patchable_function_entries))
+#define PATCHABLE_DISCARDS
+#else
+#define KEEP_PATCHABLE
+#define PATCHABLE_DISCARDS	*(__patchable_function_entries)
+#endif
+
 #ifdef CONFIG_FTRACE_MCOUNT_RECORD
 /*
  * The ftrace call sites are logged to a section whose name depends on the
@@ -172,7 +180,7 @@
 #define MCOUNT_REC()	. = ALIGN(8);				\
 			__start_mcount_loc = .;			\
 			KEEP(*(__mcount_loc))			\
-			KEEP(*(__patchable_function_entries))	\
+			KEEP_PATCHABLE				\
 			__stop_mcount_loc = .;			\
 			ftrace_stub_graph = ftrace_stub;	\
 			ftrace_ops_list_func = arch_ftrace_ops_list_func;
@@ -1024,6 +1032,7 @@
 
 #define COMMON_DISCARDS							\
 	SANITIZER_DISCARDS						\
+	PATCHABLE_DISCARDS						\
 	*(.discard)							\
 	*(.discard.*)							\
 	*(.modinfo)							\
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 1052126bdca2..e9e95c790b8e 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -51,6 +51,12 @@ config HAVE_DYNAMIC_FTRACE_WITH_ARGS
 	 This allows for use of regs_get_kernel_argument() and
 	 kernel_stack_pointer().
 
+config HAVE_DYNAMIC_FTRACE_NO_PATCHABLE
+	bool
+	help
+	  If the architecture generates __patchable_function_entries sections
+	  but does not want them included in the ftrace locations.
+
 config HAVE_FTRACE_MCOUNT_RECORD
 	bool
 	help
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index e55fdf952a3a..9216060c3408 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -4113,7 +4113,8 @@ static int validate_ibt(struct objtool_file *file)
 		    !strcmp(sec->name, "__bug_table")			||
 		    !strcmp(sec->name, "__ex_table")			||
 		    !strcmp(sec->name, "__jump_table")			||
-		    !strcmp(sec->name, "__mcount_loc"))
+		    !strcmp(sec->name, "__mcount_loc")			||
+		    strstr(sec->name, "__patchable_function_entries"))
 			continue;
 
 		list_for_each_entry(reloc, &sec->reloc->reloc_list, list)
-- 
cgit v1.2.3


From cf060c2c399fa457569123bb9806b455ff53e64c Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Fri, 9 Sep 2022 12:30:51 -0700
Subject: selftests/bpf: Fix test_verif_scale{1,3} SEC() annotations

Use proper SEC("tc") for test_verif_scale{1,3} programs. It's not
a problem for selftests right now because we manually set type
programmatically, but not having correct SEC() definitions makes it
harded to generically load BPF object files.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20220909193053.577111-2-andrii@kernel.org
---
 tools/testing/selftests/bpf/progs/test_verif_scale1.c | 2 +-
 tools/testing/selftests/bpf/progs/test_verif_scale3.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/progs/test_verif_scale1.c b/tools/testing/selftests/bpf/progs/test_verif_scale1.c
index d38153dab3dd..ac6135d9374c 100644
--- a/tools/testing/selftests/bpf/progs/test_verif_scale1.c
+++ b/tools/testing/selftests/bpf/progs/test_verif_scale1.c
@@ -5,7 +5,7 @@
 #define ATTR __attribute__((noinline))
 #include "test_jhash.h"
 
-SEC("scale90_noinline")
+SEC("tc")
 int balancer_ingress(struct __sk_buff *ctx)
 {
 	void *data_end = (void *)(long)ctx->data_end;
diff --git a/tools/testing/selftests/bpf/progs/test_verif_scale3.c b/tools/testing/selftests/bpf/progs/test_verif_scale3.c
index 9beb5bf80373..ca33a9b711c4 100644
--- a/tools/testing/selftests/bpf/progs/test_verif_scale3.c
+++ b/tools/testing/selftests/bpf/progs/test_verif_scale3.c
@@ -5,7 +5,7 @@
 #define ATTR __attribute__((noinline))
 #include "test_jhash.h"
 
-SEC("scale90_noinline32")
+SEC("tc")
 int balancer_ingress(struct __sk_buff *ctx)
 {
 	void *data_end = (void *)(long)ctx->data_end;
-- 
cgit v1.2.3


From 749c202cb6ea40f4d7ac95c4a1217a7b506f43a8 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Fri, 9 Sep 2022 12:30:52 -0700
Subject: libbpf: Fix crash if SEC("freplace") programs don't have
 attach_prog_fd set

Fix SIGSEGV caused by libbpf trying to find attach type in vmlinux BTF
for freplace programs. It's wrong to search in vmlinux BTF and libbpf
doesn't even mark vmlinux BTF as required for freplace programs. So
trying to search anything in obj->vmlinux_btf might cause NULL
dereference if nothing else in BPF object requires vmlinux BTF.

Instead, error out if freplace (EXT) program doesn't specify
attach_prog_fd during at the load time.

Fixes: 91abb4a6d79d ("libbpf: Support attachment of BPF tracing programs to kernel modules")
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20220909193053.577111-3-andrii@kernel.org
---
 tools/lib/bpf/libbpf.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 3ad139285fad..2ca30ccc774c 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -9084,11 +9084,15 @@ static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attac
 	int err = 0;
 
 	/* BPF program's BTF ID */
-	if (attach_prog_fd) {
+	if (prog->type == BPF_PROG_TYPE_EXT || attach_prog_fd) {
+		if (!attach_prog_fd) {
+			pr_warn("prog '%s': attach program FD is not set\n", prog->name);
+			return -EINVAL;
+		}
 		err = libbpf_find_prog_btf_id(attach_name, attach_prog_fd);
 		if (err < 0) {
-			pr_warn("failed to find BPF program (FD %d) BTF ID for '%s': %d\n",
-				 attach_prog_fd, attach_name, err);
+			pr_warn("prog '%s': failed to find BPF program (FD %d) BTF ID for '%s': %d\n",
+				 prog->name, attach_prog_fd, attach_name, err);
 			return err;
 		}
 		*btf_obj_fd = 0;
@@ -9105,7 +9109,8 @@ static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attac
 		err = find_kernel_btf_id(prog->obj, attach_name, attach_type, btf_obj_fd, btf_type_id);
 	}
 	if (err) {
-		pr_warn("failed to find kernel BTF type ID of '%s': %d\n", attach_name, err);
+		pr_warn("prog '%s': failed to find kernel BTF type ID of '%s': %d\n",
+			prog->name, attach_name, err);
 		return err;
 	}
 	return 0;
-- 
cgit v1.2.3


From c8bc5e0509767e51b35ae2f4af6ff90fa6a5f27f Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Fri, 9 Sep 2022 12:30:53 -0700
Subject: selftests/bpf: Add veristat tool for mass-verifying BPF object files

Add a small tool, veristat, that allows mass-verification of
a set of *libbpf-compatible* BPF ELF object files. For each such object
file, veristat will attempt to verify each BPF program *individually*.
Regardless of success or failure, it parses BPF verifier stats and
outputs them in human-readable table format. In the future we can also
add CSV and JSON output for more scriptable post-processing, if necessary.

veristat allows to specify a set of stats that should be output and
ordering between multiple objects and files (e.g., so that one can
easily order by total instructions processed, instead of default file
name, prog name, verdict, total instructions order).

This tool should be useful for validating various BPF verifier changes
or even validating different kernel versions for regressions.

Here's an example for some of the heaviest selftests/bpf BPF object
files:

  $ sudo ./veristat -s insns,file,prog {pyperf,loop,test_verif_scale,strobemeta,test_cls_redirect,profiler}*.linked3.o
  File                                  Program                               Verdict  Duration, us  Total insns  Total states  Peak states
  ------------------------------------  ------------------------------------  -------  ------------  -----------  ------------  -----------
  loop3.linked3.o                       while_true                            failure        350990      1000001          9663         9663
  test_verif_scale3.linked3.o           balancer_ingress                      success        115244       845499          8636         2141
  test_verif_scale2.linked3.o           balancer_ingress                      success         77688       773445          3048          788
  pyperf600.linked3.o                   on_event                              success       2079872       624585         30335        30241
  pyperf600_nounroll.linked3.o          on_event                              success        353972       568128         37101         2115
  strobemeta.linked3.o                  on_event                              success        455230       557149         15915        13537
  test_verif_scale1.linked3.o           balancer_ingress                      success         89880       554754          8636         2141
  strobemeta_nounroll2.linked3.o        on_event                              success        433906       501725         17087         1912
  loop6.linked3.o                       trace_virtqueue_add_sgs               success        282205       398057          8717          919
  loop1.linked3.o                       nested_loops                          success        125630       361349          5504         5504
  pyperf180.linked3.o                   on_event                              success       2511740       160398         11470        11446
  pyperf100.linked3.o                   on_event                              success        744329        87681          6213         6191
  test_cls_redirect.linked3.o           cls_redirect                          success         54087        78925          4782          903
  strobemeta_subprogs.linked3.o         on_event                              success         57898        65420          1954          403
  test_cls_redirect_subprogs.linked3.o  cls_redirect                          success         54522        64965          4619          958
  strobemeta_nounroll1.linked3.o        on_event                              success         43313        57240          1757          382
  pyperf50.linked3.o                    on_event                              success        194355        46378          3263         3241
  profiler2.linked3.o                   tracepoint__syscalls__sys_enter_kill  success         23869        43372          1423          542
  pyperf_subprogs.linked3.o             on_event                              success         29179        36358          2499         2499
  profiler1.linked3.o                   tracepoint__syscalls__sys_enter_kill  success         13052        27036          1946          936
  profiler3.linked3.o                   tracepoint__syscalls__sys_enter_kill  success         21023        26016          2186          915
  profiler2.linked3.o                   kprobe__vfs_link                      success          5255        13896           303          271
  profiler1.linked3.o                   kprobe__vfs_link                      success          7792        12687          1042         1041
  profiler3.linked3.o                   kprobe__vfs_link                      success          7332        10601           865          865
  profiler2.linked3.o                   kprobe_ret__do_filp_open              success          3417         8900           216          199
  profiler2.linked3.o                   kprobe__vfs_symlink                   success          3548         8775           203          186
  pyperf_global.linked3.o               on_event                              success         10007         7563           520          520
  profiler3.linked3.o                   kprobe_ret__do_filp_open              success          4708         6464           532          532
  profiler1.linked3.o                   kprobe_ret__do_filp_open              success          3090         6445           508          508
  profiler3.linked3.o                   kprobe__vfs_symlink                   success          4477         6358           521          521
  profiler1.linked3.o                   kprobe__vfs_symlink                   success          3381         6347           507          507
  profiler2.linked3.o                   raw_tracepoint__sched_process_exec    success          2464         5874           292          189
  profiler3.linked3.o                   raw_tracepoint__sched_process_exec    success          2677         4363           397          283
  profiler2.linked3.o                   kprobe__proc_sys_write                success          1800         4355           143          138
  profiler1.linked3.o                   raw_tracepoint__sched_process_exec    success          1649         4019           333          240
  pyperf600_bpf_loop.linked3.o          on_event                              success          2711         3966           306          306
  profiler2.linked3.o                   raw_tracepoint__sched_process_exit    success          1234         3138            83           66
  profiler3.linked3.o                   kprobe__proc_sys_write                success          1755         2623           223          223
  profiler1.linked3.o                   kprobe__proc_sys_write                success          1222         2456           193          193
  loop2.linked3.o                       while_true                            success           608         1783            57           30
  profiler3.linked3.o                   raw_tracepoint__sched_process_exit    success           789         1680           146          146
  profiler1.linked3.o                   raw_tracepoint__sched_process_exit    success           592         1526           133          133
  strobemeta_bpf_loop.linked3.o         on_event                              success          1015         1512           106          106
  loop4.linked3.o                       combinations                          success           165          524            18           17
  profiler3.linked3.o                   raw_tracepoint__sched_process_fork    success           196          299            25           25
  profiler1.linked3.o                   raw_tracepoint__sched_process_fork    success           109          265            19           19
  profiler2.linked3.o                   raw_tracepoint__sched_process_fork    success           111          265            19           19
  loop5.linked3.o                       while_true                            success            47           84             9            9
  ------------------------------------  ------------------------------------  -------  ------------  -----------  ------------  -----------

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20220909193053.577111-4-andrii@kernel.org
---
 tools/testing/selftests/bpf/.gitignore |   1 +
 tools/testing/selftests/bpf/Makefile   |   7 +-
 tools/testing/selftests/bpf/veristat.c | 537 +++++++++++++++++++++++++++++++++
 3 files changed, 544 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/bpf/veristat.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index 3a8cb2404ea6..3b288562963e 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -39,6 +39,7 @@ test_cpp
 /tools
 /runqslower
 /bench
+/veristat
 *.ko
 *.tmp
 xskxceiver
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 6cd327f1f216..1a0296bd744a 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -82,7 +82,7 @@ TEST_PROGS_EXTENDED := with_addr.sh \
 TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \
 	flow_dissector_load test_flow_dissector test_tcp_check_syncookie_user \
 	test_lirc_mode2_user xdping test_cpp runqslower bench bpf_testmod.ko \
-	xskxceiver xdp_redirect_multi xdp_synproxy
+	xskxceiver xdp_redirect_multi xdp_synproxy veristat
 
 TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read
 
@@ -595,6 +595,11 @@ $(OUTPUT)/bench: $(OUTPUT)/bench.o \
 	$(call msg,BINARY,,$@)
 	$(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@
 
+$(OUTPUT)/veristat.o: $(BPFOBJ)
+$(OUTPUT)/veristat: $(OUTPUT)/veristat.o
+	$(call msg,BINARY,,$@)
+	$(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@
+
 EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR) $(HOST_SCRATCH_DIR)	\
 	prog_tests/tests.h map_tests/tests.h verifier/tests.h		\
 	feature bpftool							\
diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c
new file mode 100644
index 000000000000..39e6dc41e504
--- /dev/null
+++ b/tools/testing/selftests/bpf/veristat.c
@@ -0,0 +1,537 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+#define _GNU_SOURCE
+#include <argp.h>
+#include <string.h>
+#include <stdlib.h>
+#include <linux/compiler.h>
+#include <sched.h>
+#include <pthread.h>
+#include <dirent.h>
+#include <signal.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/time.h>
+#include <sys/sysinfo.h>
+#include <sys/stat.h>
+#include <bpf/libbpf.h>
+
+enum stat_id {
+	VERDICT,
+	DURATION,
+	TOTAL_INSNS,
+	TOTAL_STATES,
+	PEAK_STATES,
+	MAX_STATES_PER_INSN,
+	MARK_READ_MAX_LEN,
+
+	FILE_NAME,
+	PROG_NAME,
+
+	ALL_STATS_CNT,
+	NUM_STATS_CNT = FILE_NAME - VERDICT,
+};
+
+struct verif_stats {
+	char *file_name;
+	char *prog_name;
+
+	long stats[NUM_STATS_CNT];
+};
+
+struct stat_specs {
+	int spec_cnt;
+	enum stat_id ids[ALL_STATS_CNT];
+	bool asc[ALL_STATS_CNT];
+	int lens[ALL_STATS_CNT];
+};
+
+static struct env {
+	char **filenames;
+	int filename_cnt;
+	bool verbose;
+
+	struct verif_stats *prog_stats;
+	int prog_stat_cnt;
+
+	struct stat_specs output_spec;
+	struct stat_specs sort_spec;
+} env;
+
+static int libbpf_print_fn(enum libbpf_print_level level,
+		    const char *format, va_list args)
+{
+	if (!env.verbose)
+		return 0;
+	if (level == LIBBPF_DEBUG /* && !env.verbose */)
+		return 0;
+	return vfprintf(stderr, format, args);
+}
+
+const char *argp_program_version = "veristat";
+const char *argp_program_bug_address = "<bpf@vger.kernel.org>";
+const char argp_program_doc[] =
+"veristat    BPF verifier stats collection tool.\n"
+"\n"
+"USAGE: veristat <obj-file> [<obj-file>...]\n";
+
+static const struct argp_option opts[] = {
+	{ NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help" },
+	{ "verbose", 'v', NULL, 0, "Verbose mode" },
+	{ "output", 'o', "SPEC", 0, "Specify output stats" },
+	{ "sort", 's', "SPEC", 0, "Specify sort order" },
+	{},
+};
+
+static int parse_stats(const char *stats_str, struct stat_specs *specs);
+
+static error_t parse_arg(int key, char *arg, struct argp_state *state)
+{
+	void *tmp;
+	int err;
+
+	switch (key) {
+	case 'h':
+		argp_state_help(state, stderr, ARGP_HELP_STD_HELP);
+		break;
+	case 'v':
+		env.verbose = true;
+		break;
+	case 'o':
+		err = parse_stats(arg, &env.output_spec);
+		if (err)
+			return err;
+		break;
+	case 's':
+		err = parse_stats(arg, &env.sort_spec);
+		if (err)
+			return err;
+		break;
+	case ARGP_KEY_ARG:
+		tmp = realloc(env.filenames, (env.filename_cnt + 1) * sizeof(*env.filenames));
+		if (!tmp)
+			return -ENOMEM;
+		env.filenames = tmp;
+		env.filenames[env.filename_cnt] = strdup(arg);
+		if (!env.filenames[env.filename_cnt])
+			return -ENOMEM;
+		env.filename_cnt++;
+		break;
+	default:
+		return ARGP_ERR_UNKNOWN;
+	}
+	return 0;
+}
+
+static const struct stat_specs default_output_spec = {
+	.spec_cnt = 7,
+	.ids = {
+		FILE_NAME, PROG_NAME, VERDICT, DURATION,
+		TOTAL_INSNS, TOTAL_STATES, PEAK_STATES,
+	},
+};
+
+static const struct stat_specs default_sort_spec = {
+	.spec_cnt = 2,
+	.ids = {
+		FILE_NAME, PROG_NAME,
+	},
+	.asc = { true, true, },
+};
+
+static struct stat_def {
+	const char *header;
+	const char *names[4];
+	bool asc_by_default;
+} stat_defs[] = {
+	[FILE_NAME] = { "File", {"file_name", "filename", "file"}, true /* asc */ },
+	[PROG_NAME] = { "Program", {"prog_name", "progname", "prog"}, true /* asc */ },
+	[VERDICT] = { "Verdict", {"verdict"}, true /* asc: failure, success */ },
+	[DURATION] = { "Duration, us", {"duration", "dur"}, },
+	[TOTAL_INSNS] = { "Total insns", {"total_insns", "insns"}, },
+	[TOTAL_STATES] = { "Total states", {"total_states", "states"}, },
+	[PEAK_STATES] = { "Peak states", {"peak_states"}, },
+	[MAX_STATES_PER_INSN] = { "Max states per insn", {"max_states_per_insn"}, },
+	[MARK_READ_MAX_LEN] = { "Max mark read length", {"max_mark_read_len", "mark_read"}, },
+};
+
+static int parse_stat(const char *stat_name, struct stat_specs *specs)
+{
+	int id, i;
+
+	if (specs->spec_cnt >= ARRAY_SIZE(specs->ids)) {
+		fprintf(stderr, "Can't specify more than %zd stats\n", ARRAY_SIZE(specs->ids));
+		return -E2BIG;
+	}
+
+	for (id = 0; id < ARRAY_SIZE(stat_defs); id++) {
+		struct stat_def *def = &stat_defs[id];
+
+		for (i = 0; i < ARRAY_SIZE(stat_defs[id].names); i++) {
+			if (!def->names[i] || strcmp(def->names[i], stat_name) != 0)
+				continue;
+
+			specs->ids[specs->spec_cnt] = id;
+			specs->asc[specs->spec_cnt] = def->asc_by_default;
+			specs->spec_cnt++;
+
+			return 0;
+		}
+	}
+
+	fprintf(stderr, "Unrecognized stat name '%s'\n", stat_name);
+	return -ESRCH;
+}
+
+static int parse_stats(const char *stats_str, struct stat_specs *specs)
+{
+	char *input, *state = NULL, *next;
+	int err;
+
+	input = strdup(stats_str);
+	if (!input)
+		return -ENOMEM;
+
+	while ((next = strtok_r(state ? NULL : input, ",", &state))) {
+		err = parse_stat(next, specs);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static char verif_log_buf[64 * 1024];
+
+static int parse_verif_log(const char *buf, size_t buf_sz, struct verif_stats *s)
+{
+	const char *next;
+	int pos;
+
+	for (pos = 0; buf[0]; buf = next) {
+		if (buf[0] == '\n')
+			buf++;
+		next = strchrnul(&buf[pos], '\n');
+
+		if (1 == sscanf(buf, "verification time %ld usec\n", &s->stats[DURATION]))
+			continue;
+		if (6 == sscanf(buf, "processed %ld insns (limit %*d) max_states_per_insn %ld total_states %ld peak_states %ld mark_read %ld",
+				&s->stats[TOTAL_INSNS],
+				&s->stats[MAX_STATES_PER_INSN],
+				&s->stats[TOTAL_STATES],
+				&s->stats[PEAK_STATES],
+				&s->stats[MARK_READ_MAX_LEN]))
+			continue;
+	}
+
+	return 0;
+}
+
+static int process_prog(const char *filename, struct bpf_object *obj, struct bpf_program *prog)
+{
+	const char *prog_name = bpf_program__name(prog);
+	size_t buf_sz = sizeof(verif_log_buf);
+	char *buf = verif_log_buf;
+	struct verif_stats *stats;
+	int err = 0;
+	void *tmp;
+
+	tmp = realloc(env.prog_stats, (env.prog_stat_cnt + 1) * sizeof(*env.prog_stats));
+	if (!tmp)
+		return -ENOMEM;
+	env.prog_stats = tmp;
+	stats = &env.prog_stats[env.prog_stat_cnt++];
+	memset(stats, 0, sizeof(*stats));
+
+	if (env.verbose) {
+		buf_sz = 16 * 1024 * 1024;
+		buf = malloc(buf_sz);
+		if (!buf)
+			return -ENOMEM;
+		bpf_program__set_log_buf(prog, buf, buf_sz);
+		bpf_program__set_log_level(prog, 1 | 4); /* stats + log */
+	} else {
+		bpf_program__set_log_buf(prog, buf, buf_sz);
+		bpf_program__set_log_level(prog, 4); /* only verifier stats */
+	}
+	verif_log_buf[0] = '\0';
+
+	err = bpf_object__load(obj);
+
+	stats->file_name = strdup(basename(filename));
+	stats->prog_name = strdup(bpf_program__name(prog));
+	stats->stats[VERDICT] = err == 0; /* 1 - success, 0 - failure */
+	parse_verif_log(buf, buf_sz, stats);
+
+	if (env.verbose) {
+		printf("PROCESSING %s/%s, DURATION US: %ld, VERDICT: %s, VERIFIER LOG:\n%s\n",
+		       filename, prog_name, stats->stats[DURATION],
+		       err ? "failure" : "success", buf);
+	}
+
+	if (verif_log_buf != buf)
+		free(buf);
+
+	return 0;
+};
+
+static int process_obj(const char *filename)
+{
+	struct bpf_object *obj = NULL, *tobj;
+	struct bpf_program *prog, *tprog, *lprog;
+	libbpf_print_fn_t old_libbpf_print_fn;
+	LIBBPF_OPTS(bpf_object_open_opts, opts);
+	int err = 0, prog_cnt = 0;
+
+	old_libbpf_print_fn = libbpf_set_print(libbpf_print_fn);
+
+	obj = bpf_object__open_file(filename, &opts);
+	if (!obj) {
+		err = -errno;
+		fprintf(stderr, "Failed to open '%s': %d\n", filename, err);
+		goto cleanup;
+	}
+
+	bpf_object__for_each_program(prog, obj) {
+		prog_cnt++;
+	}
+
+	if (prog_cnt == 1) {
+		prog = bpf_object__next_program(obj, NULL);
+		bpf_program__set_autoload(prog, true);
+		process_prog(filename, obj, prog);
+		bpf_object__close(obj);
+		goto cleanup;
+	}
+
+	bpf_object__for_each_program(prog, obj) {
+		const char *prog_name = bpf_program__name(prog);
+
+		tobj = bpf_object__open_file(filename, &opts);
+		if (!tobj) {
+			err = -errno;
+			fprintf(stderr, "Failed to open '%s': %d\n", filename, err);
+			goto cleanup;
+		}
+
+		bpf_object__for_each_program(tprog, tobj) {
+			const char *tprog_name = bpf_program__name(tprog);
+
+			if (strcmp(prog_name, tprog_name) == 0) {
+				bpf_program__set_autoload(tprog, true);
+				lprog = tprog;
+			} else {
+				bpf_program__set_autoload(tprog, false);
+			}
+		}
+
+		process_prog(filename, tobj, lprog);
+		bpf_object__close(tobj);
+	}
+
+cleanup:
+	bpf_object__close(obj);
+	libbpf_set_print(old_libbpf_print_fn);
+	return err;
+}
+
+static int cmp_stat(const struct verif_stats *s1, const struct verif_stats *s2,
+		    enum stat_id id, bool asc)
+{
+	int cmp = 0;
+
+	switch (id) {
+	case FILE_NAME:
+		cmp = strcmp(s1->file_name, s2->file_name);
+		break;
+	case PROG_NAME:
+		cmp = strcmp(s1->prog_name, s2->prog_name);
+		break;
+	case VERDICT:
+	case DURATION:
+	case TOTAL_INSNS:
+	case TOTAL_STATES:
+	case PEAK_STATES:
+	case MAX_STATES_PER_INSN:
+	case MARK_READ_MAX_LEN: {
+		long v1 = s1->stats[id];
+		long v2 = s2->stats[id];
+
+		if (v1 != v2)
+			cmp = v1 < v2 ? -1 : 1;
+		break;
+	}
+	default:
+		fprintf(stderr, "Unrecognized stat #%d\n", id);
+		exit(1);
+	}
+
+	return asc ? cmp : -cmp;
+}
+
+static int cmp_prog_stats(const void *v1, const void *v2)
+{
+	const struct verif_stats *s1 = v1, *s2 = v2;
+	int i, cmp;
+
+	for (i = 0; i < env.sort_spec.spec_cnt; i++) {
+		cmp = cmp_stat(s1, s2, env.sort_spec.ids[i], env.sort_spec.asc[i]);
+		if (cmp != 0)
+			return cmp;
+	}
+
+	return 0;
+}
+
+#define HEADER_CHAR '-'
+#define COLUMN_SEP "  "
+
+static void output_headers(bool calc_len)
+{
+	int i, len;
+
+	for (i = 0; i < env.output_spec.spec_cnt; i++) {
+		int id = env.output_spec.ids[i];
+		int *max_len = &env.output_spec.lens[i];
+
+		if (calc_len) {
+			len = snprintf(NULL, 0, "%s", stat_defs[id].header);
+			if (len > *max_len)
+				*max_len = len;
+		} else {
+			printf("%s%-*s", i == 0 ? "" : COLUMN_SEP,  *max_len, stat_defs[id].header);
+		}
+	}
+
+	if (!calc_len)
+		printf("\n");
+}
+
+static void output_header_underlines(void)
+{
+	int i, j, len;
+
+	for (i = 0; i < env.output_spec.spec_cnt; i++) {
+		len = env.output_spec.lens[i];
+
+		printf("%s", i == 0 ? "" : COLUMN_SEP);
+		for (j = 0; j < len; j++)
+			printf("%c", HEADER_CHAR);
+	}
+	printf("\n");
+}
+
+static void output_stats(const struct verif_stats *s, bool calc_len)
+{
+	int i;
+
+	for (i = 0; i < env.output_spec.spec_cnt; i++) {
+		int id = env.output_spec.ids[i];
+		int *max_len = &env.output_spec.lens[i], len;
+		const char *str = NULL;
+		long val = 0;
+
+		switch (id) {
+		case FILE_NAME:
+			str = s->file_name;
+			break;
+		case PROG_NAME:
+			str = s->prog_name;
+			break;
+		case VERDICT:
+			str = s->stats[VERDICT] ? "success" : "failure";
+			break;
+		case DURATION:
+		case TOTAL_INSNS:
+		case TOTAL_STATES:
+		case PEAK_STATES:
+		case MAX_STATES_PER_INSN:
+		case MARK_READ_MAX_LEN:
+			val = s->stats[id];
+			break;
+		default:
+			fprintf(stderr, "Unrecognized stat #%d\n", id);
+			exit(1);
+		}
+
+		if (calc_len) {
+			if (str)
+				len = snprintf(NULL, 0, "%s", str);
+			else
+				len = snprintf(NULL, 0, "%ld", val);
+			if (len > *max_len)
+				*max_len = len;
+		} else {
+			if (str)
+				printf("%s%-*s", i == 0 ? "" : COLUMN_SEP, *max_len, str);
+			else
+				printf("%s%*ld", i == 0 ? "" : COLUMN_SEP,  *max_len, val);
+		}
+	}
+
+	if (!calc_len)
+		printf("\n");
+}
+
+int main(int argc, char **argv)
+{
+	static const struct argp argp = {
+		.options = opts,
+		.parser = parse_arg,
+		.doc = argp_program_doc,
+	};
+	int err = 0, i;
+
+	if (argp_parse(&argp, argc, argv, 0, NULL, NULL))
+		return 1;
+
+	if (env.filename_cnt == 0) {
+		fprintf(stderr, "Please provide path to BPF object file!\n");
+		argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
+		return 1;
+	}
+
+	if (env.output_spec.spec_cnt == 0)
+		env.output_spec = default_output_spec;
+	if (env.sort_spec.spec_cnt == 0)
+		env.sort_spec = default_sort_spec;
+
+	for (i = 0; i < env.filename_cnt; i++) {
+		err = process_obj(env.filenames[i]);
+		if (err) {
+			fprintf(stderr, "Failed to process '%s': %d\n", env.filenames[i], err);
+			goto cleanup;
+		}
+	}
+
+	qsort(env.prog_stats, env.prog_stat_cnt, sizeof(*env.prog_stats), cmp_prog_stats);
+
+	/* calculate column widths */
+	output_headers(true);
+	for (i = 0; i < env.prog_stat_cnt; i++) {
+		output_stats(&env.prog_stats[i], true);
+	}
+
+	/* actually output the table */
+	output_headers(false);
+	output_header_underlines();
+	for (i = 0; i < env.prog_stat_cnt; i++) {
+		output_stats(&env.prog_stats[i], false);
+	}
+	output_header_underlines();
+	printf("\n");
+
+	printf("Done. Processed %d object files, %d programs.\n",
+	       env.filename_cnt, env.prog_stat_cnt);
+
+cleanup:
+	for (i = 0; i < env.prog_stat_cnt; i++) {
+		free(env.prog_stats[i].file_name);
+		free(env.prog_stats[i].prog_name);
+	}
+	free(env.prog_stats);
+	for (i = 0; i < env.filename_cnt; i++)
+		free(env.filenames[i]);
+	free(env.filenames);
+	return -err;
+}
-- 
cgit v1.2.3


From dc567045f1590f6460d3e9a6ea6ad5e600b58b84 Mon Sep 17 00:00:00 2001
From: Xin Liu <liuxin350@huawei.com>
Date: Tue, 13 Sep 2022 15:36:43 +0800
Subject: libbpf: Clean up legacy bpf maps declaration in bpf_helpers

Legacy BPF map declarations are no longer supported in libbpf v1.0 [0].
Only BTF-defined maps are supported starting from v1.0, so it is time to
remove the definition of bpf_map_def in bpf_helpers.h.

  [0] https://github.com/libbpf/libbpf/wiki/Libbpf:-the-road-to-v1.0

Signed-off-by: Xin Liu <liuxin350@huawei.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Song Liu <song@kernel.org>
Link: https://lore.kernel.org/bpf/20220913073643.19960-1-liuxin350@huawei.com
---
 tools/lib/bpf/bpf_helpers.h | 12 ------------
 1 file changed, 12 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h
index 7349b16b8e2f..d37c4fe2849d 100644
--- a/tools/lib/bpf/bpf_helpers.h
+++ b/tools/lib/bpf/bpf_helpers.h
@@ -160,18 +160,6 @@ bpf_tail_call_static(void *ctx, const void *map, const __u32 slot)
 }
 #endif
 
-/*
- * Helper structure used by eBPF C program
- * to describe BPF map attributes to libbpf loader
- */
-struct bpf_map_def {
-	unsigned int type;
-	unsigned int key_size;
-	unsigned int value_size;
-	unsigned int max_entries;
-	unsigned int map_flags;
-} __attribute__((deprecated("use BTF-defined maps in .maps section")));
-
 enum libbpf_pin_type {
 	LIBBPF_PIN_NONE,
 	/* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */
-- 
cgit v1.2.3


From a7e85406bdbd0c376f3997e571f7073b9527272e Mon Sep 17 00:00:00 2001
From: Hou Tao <houtao1@huawei.com>
Date: Mon, 19 Sep 2022 11:57:14 +0800
Subject: selftests/bpf: Add test result messages for
 test_task_storage_map_stress_lookup

Add test result message when test_task_storage_map_stress_lookup()
succeeds or is skipped. The test case can be skipped due to the choose
of preemption model in kernel config, so export skips in test_maps.c and
increase it when needed.

The following is the output of test_maps when the test case succeeds or
is skipped:

  test_task_storage_map_stress_lookup:PASS
  test_maps: OK, 0 SKIPPED

  test_task_storage_map_stress_lookup SKIP (no CONFIG_PREEMPT)
  test_maps: OK, 1 SKIPPED

Fixes: 73b97bc78b32 ("selftests/bpf: Test concurrent updates on bpf_task_storage_busy")
Signed-off-by: Hou Tao <houtao1@huawei.com>
Link: https://lore.kernel.org/r/20220919035714.2195144-1-houtao@huaweicloud.com
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
 tools/testing/selftests/bpf/map_tests/task_storage_map.c | 6 +++++-
 tools/testing/selftests/bpf/test_maps.c                  | 2 +-
 tools/testing/selftests/bpf/test_maps.h                  | 2 ++
 3 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/map_tests/task_storage_map.c b/tools/testing/selftests/bpf/map_tests/task_storage_map.c
index 1adc9c292eb2..aac08c85240b 100644
--- a/tools/testing/selftests/bpf/map_tests/task_storage_map.c
+++ b/tools/testing/selftests/bpf/map_tests/task_storage_map.c
@@ -77,8 +77,11 @@ void test_task_storage_map_stress_lookup(void)
 	CHECK(err, "open_and_load", "error %d\n", err);
 
 	/* Only for a fully preemptible kernel */
-	if (!skel->kconfig->CONFIG_PREEMPT)
+	if (!skel->kconfig->CONFIG_PREEMPT) {
+		printf("%s SKIP (no CONFIG_PREEMPT)\n", __func__);
+		skips++;
 		return;
+	}
 
 	/* Save the old affinity setting */
 	sched_getaffinity(getpid(), sizeof(old), &old);
@@ -119,4 +122,5 @@ out:
 	read_bpf_task_storage_busy__destroy(skel);
 	/* Restore affinity setting */
 	sched_setaffinity(getpid(), sizeof(old), &old);
+	printf("%s:PASS\n", __func__);
 }
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 00b9cc305e58..289ff310e283 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -30,7 +30,7 @@
 #define ENOTSUPP 524
 #endif
 
-static int skips;
+int skips;
 
 static struct bpf_map_create_opts map_opts = { .sz = sizeof(map_opts) };
 
diff --git a/tools/testing/selftests/bpf/test_maps.h b/tools/testing/selftests/bpf/test_maps.h
index 77d8587ac4ed..f6fbca761732 100644
--- a/tools/testing/selftests/bpf/test_maps.h
+++ b/tools/testing/selftests/bpf/test_maps.h
@@ -14,4 +14,6 @@
 	}								\
 })
 
+extern int skips;
+
 #endif
-- 
cgit v1.2.3


From 9e7aaa7c65f170039501c4d4b24d99640e2d519a Mon Sep 17 00:00:00 2001
From: Amit Cohen <amcohen@nvidia.com>
Date: Wed, 14 Sep 2022 13:21:48 +0200
Subject: selftests: mlxsw: Use shapers in QOS tests instead of forcing speed

QOS tests create congestion and verify the switch behavior. To create
congestion, they need to have more traffic than the port can handle, so
some of them force 1Gbps speed.

The tests assume that 1Gbps speed is supported, otherwise, they will fail.
Spectrum-4 ASIC will not support this speed in all ports, so to be able
to run QOS tests there, some adjustments are required. Use shapers to
limit the traffic instead of forcing speed. Note that for several ports,
the speed configuration is just for autoneg issues, so shaper is not needed
instead.

In tests that already use shapers, set the existing shaper to be a child of
a new TBF shaper which is added as a root qdisc and acts as a port shaper.

Signed-off-by: Amit Cohen <amcohen@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh |  5 +++--
 tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh |  9 +++++----
 tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh      | 15 ++++++++-------
 3 files changed, 16 insertions(+), 13 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh
index e9f8718af979..690d8daa71b4 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh
@@ -130,7 +130,8 @@ switch_create()
 
 	ip link set dev $swp3 up
 	mtu_set $swp3 10000
-	ethtool -s $swp3 speed 1000 autoneg off
+	tc qdisc replace dev $swp3 root handle 101: tbf rate 1gbit \
+		burst 128K limit 1G
 
 	vlan_create $swp1 111
 	vlan_create $swp2 222
@@ -193,7 +194,7 @@ switch_destroy()
 	vlan_destroy $swp2 222
 	vlan_destroy $swp1 111
 
-	ethtool -s $swp3 autoneg on
+	tc qdisc del dev $swp3 root handle 101:
 	mtu_restore $swp3
 	ip link set dev $swp3 down
 	lldptool -T -i $swp3 -V ETS-CFG up2tc=0:0,1:0,2:0,3:0,4:0,5:0,6:0,7:0
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh
index 8f164c80e215..c8e55fa91660 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh
@@ -129,9 +129,10 @@ switch_create()
 	vlan_create $swp2 111
 	vlan_create $swp3 111
 
-	ethtool -s $swp3 speed 1000 autoneg off
-	tc qdisc replace dev $swp3 root handle 3: \
-	   prio bands 8 priomap 7 7 7 7 7 7 7 7
+	tc qdisc replace dev $swp3 root handle 3: tbf rate 1gbit \
+		burst 128K limit 1G
+	tc qdisc replace dev $swp3 parent 3:3 handle 33: \
+		prio bands 8 priomap 7 7 7 7 7 7 7 7
 
 	ip link add name br1 type bridge vlan_filtering 0
 	ip link set dev br1 up
@@ -172,8 +173,8 @@ switch_destroy()
 	ip link del dev br111
 	ip link del dev br1
 
+	tc qdisc del dev $swp3 parent 3:3 handle 33:
 	tc qdisc del dev $swp3 root handle 3:
-	ethtool -s $swp3 autoneg on
 
 	vlan_destroy $swp3 111
 	vlan_destroy $swp2 111
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh
index af64bc9ea8ab..ceaa76b17a43 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh
@@ -15,13 +15,15 @@ ALL_TESTS="
 	ets_test_dwrr
 "
 
+PARENT="parent 3:3"
+
 switch_create()
 {
-	ets_switch_create
-
 	# Create a bottleneck so that the DWRR process can kick in.
-	ethtool -s $h2 speed 1000 autoneg off
-	ethtool -s $swp2 speed 1000 autoneg off
+	tc qdisc replace dev $swp2 root handle 3: tbf rate 1gbit \
+		burst 128K limit 1G
+
+	ets_switch_create
 
 	# Set the ingress quota high and use the three egress TCs to limit the
 	# amount of traffic that is admitted to the shared buffers. This makes
@@ -55,10 +57,9 @@ switch_destroy()
 	devlink_tc_bind_pool_th_restore $swp1 0 ingress
 	devlink_port_pool_th_restore $swp1 0
 
-	ethtool -s $swp2 autoneg on
-	ethtool -s $h2 autoneg on
-
 	ets_switch_destroy
+
+	tc qdisc del dev $swp2 root handle 3:
 }
 
 # Callback from sch_ets_tests.sh
-- 
cgit v1.2.3


From 61a00b196aaf5ba3d6ffb94e93c5d57bed449a32 Mon Sep 17 00:00:00 2001
From: Amit Cohen <amcohen@nvidia.com>
Date: Wed, 14 Sep 2022 13:21:49 +0200
Subject: selftests: mlxsw: Use shapers in QOS RED tests instead of forcing
 speed

QOS tests create congestion and verify the switch behavior. To create
congestion, they need to have more traffic than the port can handle, so
some of them force 1Gbps speed.

The tests assume that 1Gbps speed is supported, otherwise, they will fail.
Spectrum-4 ASIC will not support this speed in all ports, so to be able
to run the tests there, some adjustments are required. Use shapers to limit
the traffic instead of forcing speed. Note that for several ports, the
speed configuration is just for autoneg issues, so shaper is not needed
instead.

The tests already use ETS qdisc as a root and RED qdiscs as children. Add
a new TBF shaper to limit the rate of traffic, and use it as a root qdisc,
then save the previous hierarchy of qdiscs under the new TBF root.

In some ASICs, the shapers do not limit the traffic as accurately as
forcing speed. To make the tests stable, allow the backlog size to be up to
+-10% of the threshold. The aim of the tests is to make sure that with
backlog << threshold, there are no drops, and that packets are dropped
somewhere in vicinity of the configured threshold.

Signed-off-by: Amit Cohen <amcohen@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/drivers/net/mlxsw/sch_red_core.sh    | 23 +++++++++++-----------
 .../selftests/drivers/net/mlxsw/sch_red_ets.sh     |  4 ++--
 .../selftests/drivers/net/mlxsw/sch_red_root.sh    |  4 ++--
 3 files changed, 15 insertions(+), 16 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh
index f260f01db0e8..45b41b8f3232 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh
@@ -135,14 +135,16 @@ h2_create()
 	# cause packets to fail to queue up at $swp3 due to shared buffer
 	# quotas, and the test to spuriously fail.
 	#
-	# Prevent this by setting the speed of $h2 to 1Gbps.
+	# Prevent this by adding a shaper which limits the traffic in $h2 to
+	# 1Gbps.
 
-	ethtool -s $h2 speed 1000 autoneg off
+	tc qdisc replace dev $h2 root handle 10: tbf rate 1gbit \
+		burst 128K limit 1G
 }
 
 h2_destroy()
 {
-	ethtool -s $h2 autoneg on
+	tc qdisc del dev $h2 root handle 10:
 	tc qdisc del dev $h2 clsact
 	host_destroy $h2
 }
@@ -150,12 +152,10 @@ h2_destroy()
 h3_create()
 {
 	host_create $h3 3
-	ethtool -s $h3 speed 1000 autoneg off
 }
 
 h3_destroy()
 {
-	ethtool -s $h3 autoneg on
 	host_destroy $h3
 }
 
@@ -199,8 +199,9 @@ switch_create()
 		done
 	done
 
-	for intf in $swp2 $swp3 $swp4 $swp5; do
-		ethtool -s $intf speed 1000 autoneg off
+	for intf in $swp3 $swp4; do
+		tc qdisc replace dev $intf root handle 1: tbf rate 1gbit \
+			burst 128K limit 1G
 	done
 
 	ip link set dev br1_10 up
@@ -220,15 +221,13 @@ switch_destroy()
 
 	devlink_port_pool_th_restore $swp3 8
 
-	tc qdisc del dev $swp3 root 2>/dev/null
-
 	ip link set dev br2_11 down
 	ip link set dev br2_10 down
 	ip link set dev br1_11 down
 	ip link set dev br1_10 down
 
-	for intf in $swp5 $swp4 $swp3 $swp2; do
-		ethtool -s $intf autoneg on
+	for intf in $swp4 $swp3; do
+		tc qdisc del dev $intf root handle 1:
 	done
 
 	for intf in $swp5 $swp3 $swp2 $swp4 $swp1; do
@@ -536,7 +535,7 @@ do_red_test()
 	check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0."
 	local diff=$((limit - backlog))
 	pct=$((100 * diff / limit))
-	((0 <= pct && pct <= 10))
+	((-10 <= pct && pct <= 10))
 	check_err $? "backlog $backlog / $limit expected <= 10% distance"
 	log_test "TC $((vlan - 10)): RED backlog > limit"
 
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh
index 7a73057206cd..0d01c7cd82a1 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh
@@ -25,7 +25,7 @@ BACKLOG2=500000
 
 install_root_qdisc()
 {
-	tc qdisc add dev $swp3 root handle 10: $QDISC \
+	tc qdisc add dev $swp3 parent 1: handle 10: $QDISC \
 	   bands 8 priomap 7 6 5 4 3 2 1 0
 }
 
@@ -67,7 +67,7 @@ uninstall_qdisc_tc1()
 
 uninstall_root_qdisc()
 {
-	tc qdisc del dev $swp3 root
+	tc qdisc del dev $swp3 parent 1:
 }
 
 uninstall_qdisc()
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh
index 501d192529ac..860205338e6f 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh
@@ -18,7 +18,7 @@ install_qdisc()
 {
 	local -a args=("$@")
 
-	tc qdisc add dev $swp3 root handle 108: red \
+	tc qdisc add dev $swp3 parent 1: handle 108: red \
 	   limit 1000000 min $BACKLOG max $((BACKLOG + 1)) \
 	   probability 1.0 avpkt 8000 burst 38 "${args[@]}"
 	sleep 1
@@ -26,7 +26,7 @@ install_qdisc()
 
 uninstall_qdisc()
 {
-	tc qdisc del dev $swp3 root
+	tc qdisc del dev $swp3 parent 1:
 }
 
 ecn_test()
-- 
cgit v1.2.3


From bd3f7850720c2f1086718546034742783dc3a371 Mon Sep 17 00:00:00 2001
From: Amit Cohen <amcohen@nvidia.com>
Date: Wed, 14 Sep 2022 13:21:50 +0200
Subject: selftests: devlink_lib: Add function for querying maximum pool size

The maximum pool size is exposed via 'devlink sb' command. The next
patch will add a test which increases some pools to the maximum size.

Add a function to query the value.

Signed-off-by: Amit Cohen <amcohen@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/forwarding/devlink_lib.sh | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh
index de9944d42027..601990c6881b 100644
--- a/tools/testing/selftests/net/forwarding/devlink_lib.sh
+++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh
@@ -584,3 +584,8 @@ devlink_cell_size_get()
 	devlink sb pool show "$DEVLINK_DEV" pool 0 -j \
 	    | jq '.pool[][].cell_size'
 }
+
+devlink_pool_size_get()
+{
+	devlink sb show "$DEVLINK_DEV" -j | jq '.[][][]["size"]'
+}
-- 
cgit v1.2.3


From 5ab0cf142bb7242b37ab678b09886a2aa65e8bfb Mon Sep 17 00:00:00 2001
From: Amit Cohen <amcohen@nvidia.com>
Date: Wed, 14 Sep 2022 13:21:51 +0200
Subject: selftests: mlxsw: Add QOS test for maximum use of descriptors

Add an equivalent test to qos_burst, the test's purpose is same, but the
new test uses simpler topology and does not require forcing low speed.
In addition, it can be run Spectrum-2 and not only Spectrum-3+. The idea
is to use a shaper in order to limit the traffic and create congestion.

qos_burst test uses small pool, sends many small packets, and verify that
packets are not dropped, which means that many descriptors can be handled.
This test should check the change that commit c864769add96
("mlxsw: Configure descriptor buffers") pushed.

Instead, the new test tries to use more than 85% of maximum supported
descriptors. The idea is to use big pool (as much as the ASIC supports),
such that the pool size does not limit the traffic, then send many small
packets, which means that many descriptors are used, and check how many
packets the switch can handle.

The usage of shaper allows to run the test in all ASICs, regardless of
the CPU abilities, as it is able to create the congestion with low rate
of packets.

Signed-off-by: Amit Cohen <amcohen@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/drivers/net/mlxsw/mlxsw_lib.sh       |  14 +
 .../drivers/net/mlxsw/qos_max_descriptors.sh       | 282 +++++++++++++++++++++
 2 files changed, 296 insertions(+)
 create mode 100755 tools/testing/selftests/drivers/net/mlxsw/qos_max_descriptors.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/drivers/net/mlxsw/mlxsw_lib.sh b/tools/testing/selftests/drivers/net/mlxsw/mlxsw_lib.sh
index a95856aafd2a..6369927e9c37 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/mlxsw_lib.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/mlxsw_lib.sh
@@ -61,3 +61,17 @@ mlxsw_only_on_spectrum()
 
 	return 1
 }
+
+mlxsw_max_descriptors_get()
+{
+	local spectrum_rev=$MLXSW_SPECTRUM_REV
+
+	case $spectrum_rev in
+	1) echo 81920 ;;
+	2) echo 136960 ;;
+	3) echo 204800 ;;
+	4) echo 220000 ;;
+	*) echo "Unknown max descriptors for chip revision." > /dev/stderr
+	   return 1 ;;
+	esac
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_max_descriptors.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_max_descriptors.sh
new file mode 100755
index 000000000000..5ac4f795e333
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_max_descriptors.sh
@@ -0,0 +1,282 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# This test sends many small packets (size is less than cell size) through the
+# switch. A shaper is used in $swp2, so the traffic is limited there. Packets
+# are queued till they will be sent.
+#
+# The idea is to verify that the switch can handle at least 85% of maximum
+# supported descrpitors by hardware. Then, we verify that the driver configures
+# firmware to allow infinite size of egress descriptor pool, and does not use a
+# lower limitation. Increase the size of the relevant pools such that the pool's
+# size does not limit the traffic.
+
+# +-----------------------+
+# | H1                    |
+# |   + $h1.111           |
+# |   | 192.0.2.33/28     |
+# |   |                   |
+# |   + $h1               |
+# +---|-------------------+
+#     |
+# +---|-----------------------------+
+# |   + $swp1                       |
+# |   | iPOOL1                      |
+# |   |                             |
+# | +-|------------------------+    |
+# | | + $swp1.111              |    |
+# | |                          |    |
+# | | BR1                      |    |
+# | |                          |    |
+# | | + $swp2.111              |    |
+# | +-|------------------------+    |
+# |   |                             |
+# |   + $swp2                       |
+# |   | ePOOL6                      |
+# |   | 1mbit                       |
+# +---+-----------------------------+
+#     |
+# +---|-------------------+
+# |   + $h2            H2 |
+# |   |                   |
+# |   + $h2.111           |
+# |     192.0.2.34/28     |
+# +-----------------------+
+#
+
+ALL_TESTS="
+	ping_ipv4
+	max_descriptors
+"
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+NUM_NETIFS=4
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+source mlxsw_lib.sh
+
+MAX_POOL_SIZE=$(devlink_pool_size_get)
+SHAPER_RATE=1mbit
+
+# The current TBF qdisc interface does not allow us to configure the shaper to
+# flat zero. The ASIC shaper is guaranteed to work with a granularity of
+# 200Mbps. On Spectrum-2, writing a value close to zero instead of zero works
+# well, but the performance on Spectrum-1 is unpredictable. Thus, do not run the
+# test on Spectrum-1.
+mlxsw_only_on_spectrum 2+ || exit
+
+h1_create()
+{
+	simple_if_init $h1
+
+	vlan_create $h1 111 v$h1 192.0.2.33/28
+	ip link set dev $h1.111 type vlan egress-qos-map 0:1
+}
+
+h1_destroy()
+{
+	vlan_destroy $h1 111
+
+	simple_if_fini $h1
+}
+
+h2_create()
+{
+	simple_if_init $h2
+
+	vlan_create $h2 111 v$h2 192.0.2.34/28
+}
+
+h2_destroy()
+{
+	vlan_destroy $h2 111
+
+	simple_if_fini $h2
+}
+
+switch_create()
+{
+	# pools
+	# -----
+
+	devlink_pool_size_thtype_save 1
+	devlink_pool_size_thtype_save 6
+
+	devlink_port_pool_th_save $swp1 1
+	devlink_port_pool_th_save $swp2 6
+
+	devlink_tc_bind_pool_th_save $swp1 1 ingress
+	devlink_tc_bind_pool_th_save $swp2 1 egress
+
+	devlink_pool_size_thtype_set 1 dynamic $MAX_POOL_SIZE
+	devlink_pool_size_thtype_set 6 static $MAX_POOL_SIZE
+
+	# $swp1
+	# -----
+
+	ip link set dev $swp1 up
+	vlan_create $swp1 111
+	ip link set dev $swp1.111 type vlan ingress-qos-map 0:0 1:1
+
+	devlink_port_pool_th_set $swp1 1 16
+	devlink_tc_bind_pool_th_set $swp1 1 ingress 1 16
+
+	tc qdisc replace dev $swp1 root handle 1: \
+	   ets bands 8 strict 8 priomap 7 6
+	dcb buffer set dev $swp1 prio-buffer all:0 1:1
+
+	# $swp2
+	# -----
+
+	ip link set dev $swp2 up
+	vlan_create $swp2 111
+	ip link set dev $swp2.111 type vlan egress-qos-map 0:0 1:1
+
+	devlink_port_pool_th_set $swp2 6 $MAX_POOL_SIZE
+	devlink_tc_bind_pool_th_set $swp2 1 egress 6 $MAX_POOL_SIZE
+
+	tc qdisc replace dev $swp2 root handle 1: tbf rate $SHAPER_RATE \
+		burst 128K limit 500M
+	tc qdisc replace dev $swp2 parent 1:1 handle 11: \
+		ets bands 8 strict 8 priomap 7 6
+
+	# bridge
+	# ------
+
+	ip link add name br1 type bridge vlan_filtering 0
+	ip link set dev $swp1.111 master br1
+	ip link set dev br1 up
+
+	ip link set dev $swp2.111 master br1
+}
+
+switch_destroy()
+{
+	# Do this first so that we can reset the limits to values that are only
+	# valid for the original static / dynamic setting.
+	devlink_pool_size_thtype_restore 6
+	devlink_pool_size_thtype_restore 1
+
+	# bridge
+	# ------
+
+	ip link set dev $swp2.111 nomaster
+
+	ip link set dev br1 down
+	ip link set dev $swp1.111 nomaster
+	ip link del dev br1
+
+	# $swp2
+	# -----
+
+	tc qdisc del dev $swp2 parent 1:1 handle 11:
+	tc qdisc del dev $swp2 root
+
+	devlink_tc_bind_pool_th_restore $swp2 1 egress
+	devlink_port_pool_th_restore $swp2 6
+
+	vlan_destroy $swp2 111
+	ip link set dev $swp2 down
+
+	# $swp1
+	# -----
+
+	dcb buffer set dev $swp1 prio-buffer all:0
+	tc qdisc del dev $swp1 root
+
+	devlink_tc_bind_pool_th_restore $swp1 1 ingress
+	devlink_port_pool_th_restore $swp1 1
+
+	vlan_destroy $swp1 111
+	ip link set dev $swp1 down
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	h2mac=$(mac_get $h2)
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+ping_ipv4()
+{
+	ping_test $h1 192.0.2.34 " h1->h2"
+}
+
+percentage_used()
+{
+	local num_packets=$1; shift
+	local max_packets=$1; shift
+
+	bc <<< "
+	    scale=2
+	    100 * $num_packets / $max_packets
+	"
+}
+
+max_descriptors()
+{
+	local cell_size=$(devlink_cell_size_get)
+	local exp_perc_used=85
+	local max_descriptors
+	local pktsize=30
+
+	RET=0
+
+	max_descriptors=$(mlxsw_max_descriptors_get) || exit 1
+
+	local d0=$(ethtool_stats_get $swp2 tc_no_buffer_discard_uc_tc_1)
+
+	log_info "Send many small packets, packet size = $pktsize bytes"
+	start_traffic_pktsize $pktsize $h1.111 192.0.2.33 192.0.2.34 $h2mac
+
+	# Sleep to wait for congestion.
+	sleep 5
+
+	local d1=$(ethtool_stats_get $swp2 tc_no_buffer_discard_uc_tc_1)
+	((d1 == d0))
+	check_err $? "Drops seen on egress port: $d0 -> $d1 ($((d1 - d0)))"
+
+	# Check how many packets the switch can handle, the limitation is
+	# maximum descriptors.
+	local pkts_bytes=$(ethtool_stats_get $swp2 tc_transmit_queue_tc_1)
+	local pkts_num=$((pkts_bytes / cell_size))
+	local perc_used=$(percentage_used $pkts_num $max_descriptors)
+
+	check_err $(bc <<< "$perc_used < $exp_perc_used") \
+		"Expected > $exp_perc_used% of descriptors, handle $perc_used%"
+
+	stop_traffic
+	sleep 1
+
+	log_test "Maximum descriptors usage. The percentage used is $perc_used%"
+}
+
+trap cleanup EXIT
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
-- 
cgit v1.2.3


From 72981ef2d196d03ebab2bf9e4578a4c17e8078dd Mon Sep 17 00:00:00 2001
From: Amit Cohen <amcohen@nvidia.com>
Date: Wed, 14 Sep 2022 13:21:52 +0200
Subject: selftests: mlxsw: Remove qos_burst test

The previous patch added a test which can be used instead of qos_burst.sh.
Remove this test.

Signed-off-by: Amit Cohen <amcohen@nvidia.com>
Reviewed-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Petr Machata <petrm@nvidia.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/drivers/net/mlxsw/qos_burst.sh       | 480 ---------------------
 1 file changed, 480 deletions(-)
 delete mode 100755 tools/testing/selftests/drivers/net/mlxsw/qos_burst.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_burst.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_burst.sh
deleted file mode 100755
index 82a47b903f92..000000000000
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_burst.sh
+++ /dev/null
@@ -1,480 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-#
-# This test sends 1Gbps of traffic through the switch, into which it then
-# injects a burst of traffic and tests that there are no drops.
-#
-# The 1Gbps stream is created by sending >1Gbps stream from H1. This stream
-# ingresses through $swp1, and is forwarded thtrough a small temporary pool to a
-# 1Gbps $swp3.
-#
-# Thus a 1Gbps stream enters $swp4, and is forwarded through a large pool to
-# $swp2, and eventually to H2. Since $swp2 is a 1Gbps port as well, no backlog
-# is generated.
-#
-# At this point, a burst of traffic is forwarded from H3. This enters $swp5, is
-# forwarded to $swp2, which is fully subscribed by the 1Gbps stream. The
-# expectation is that the burst is wholly absorbed by the large pool and no
-# drops are caused. After the burst, there should be a backlog that is hard to
-# get rid of, because $sw2 is fully subscribed. But because each individual
-# packet is scheduled soon after getting enqueued, SLL and HLL do not impact the
-# test.
-#
-# +-----------------------+                           +-----------------------+
-# | H1                    |			      | H3                    |
-# |   + $h1.111           |			      |          $h3.111 +    |
-# |   | 192.0.2.33/28     |			      |    192.0.2.35/28 |    |
-# |   |                   |			      |                  |    |
-# |   + $h1               |			      |              $h3 +    |
-# +---|-------------------+  +--------------------+   +------------------|----+
-#     |                      |                    |       		 |
-# +---|----------------------|--------------------|----------------------|----+
-# |   + $swp1          $swp3 +                    + $swp4          $swp5 |    |
-# |   | iPOOL1        iPOOL0 |                    | iPOOL2        iPOOL2 |    |
-# |   | ePOOL4        ePOOL5 |                    | ePOOL4        ePOOL4 |    |
-# |   |                1Gbps |                    | 1Gbps                |    |
-# | +-|----------------------|-+                +-|----------------------|-+  |
-# | | + $swp1.111  $swp3.111 + |                | + $swp4.111  $swp5.111 + |  |
-# | |                          |                |                          |  |
-# | | BR1                      |                | BR2                      |  |
-# | |                          |                |                          |  |
-# | |                          |                |         + $swp2.111      |  |
-# | +--------------------------+                +---------|----------------+  |
-# |                                                       |                   |
-# | iPOOL0: 500KB dynamic                                 |                   |
-# | iPOOL1: 500KB dynamic                                 |                   |
-# | iPOOL2: 10MB dynamic                                  + $swp2             |
-# | ePOOL4: 500KB dynamic                                 | iPOOL0            |
-# | ePOOL5: 500KB dnamic                                  | ePOOL6            |
-# | ePOOL6: 10MB dynamic                                  | 1Gbps             |
-# +-------------------------------------------------------|-------------------+
-#                                                         |
-#                                                     +---|-------------------+
-#                                                     |   + $h2            H2 |
-#                                                     |   | 1Gbps             |
-#                                                     |   |                   |
-#                                                     |   + $h2.111           |
-#                                                     |     192.0.2.34/28     |
-#                                                     +-----------------------+
-#
-# iPOOL0+ePOOL4 are helper pools for control traffic etc.
-# iPOOL1+ePOOL5 are helper pools for modeling the 1Gbps stream
-# iPOOL2+ePOOL6 are pools for soaking the burst traffic
-
-ALL_TESTS="
-	ping_ipv4
-	test_8K
-	test_800
-"
-
-lib_dir=$(dirname $0)/../../../net/forwarding
-
-NUM_NETIFS=8
-source $lib_dir/lib.sh
-source $lib_dir/devlink_lib.sh
-source qos_lib.sh
-source mlxsw_lib.sh
-
-_1KB=1000
-_500KB=$((500 * _1KB))
-_1MB=$((1000 * _1KB))
-
-# The failure mode that this specifically tests is exhaustion of descriptor
-# buffer. The point is to produce a burst that shared buffer should be able
-# to accommodate, but produce it with small enough packets that the machine
-# runs out of the descriptor buffer space with default configuration.
-#
-# The machine therefore needs to be able to produce line rate with as small
-# packets as possible, and at the same time have large enough buffer that
-# when filled with these small packets, it runs out of descriptors.
-# Spectrum-2 is very close, but cannot perform this test. Therefore use
-# Spectrum-3 as a minimum, and permit larger burst size, and therefore
-# larger packets, to reduce spurious failures.
-#
-mlxsw_only_on_spectrum 3+ || exit
-
-BURST_SIZE=$((50000000))
-POOL_SIZE=$BURST_SIZE
-
-h1_create()
-{
-	simple_if_init $h1
-	mtu_set $h1 10000
-
-	vlan_create $h1 111 v$h1 192.0.2.33/28
-	ip link set dev $h1.111 type vlan egress-qos-map 0:1
-}
-
-h1_destroy()
-{
-	vlan_destroy $h1 111
-
-	mtu_restore $h1
-	simple_if_fini $h1
-}
-
-h2_create()
-{
-	simple_if_init $h2
-	mtu_set $h2 10000
-	ethtool -s $h2 speed 1000 autoneg off
-
-	vlan_create $h2 111 v$h2 192.0.2.34/28
-}
-
-h2_destroy()
-{
-	vlan_destroy $h2 111
-
-	ethtool -s $h2 autoneg on
-	mtu_restore $h2
-	simple_if_fini $h2
-}
-
-h3_create()
-{
-	simple_if_init $h3
-	mtu_set $h3 10000
-
-	vlan_create $h3 111 v$h3 192.0.2.35/28
-}
-
-h3_destroy()
-{
-	vlan_destroy $h3 111
-
-	mtu_restore $h3
-	simple_if_fini $h3
-}
-
-switch_create()
-{
-	# pools
-	# -----
-
-	devlink_pool_size_thtype_save 0
-	devlink_pool_size_thtype_save 4
-	devlink_pool_size_thtype_save 1
-	devlink_pool_size_thtype_save 5
-	devlink_pool_size_thtype_save 2
-	devlink_pool_size_thtype_save 6
-
-	devlink_port_pool_th_save $swp1 1
-	devlink_port_pool_th_save $swp2 6
-	devlink_port_pool_th_save $swp3 5
-	devlink_port_pool_th_save $swp4 2
-	devlink_port_pool_th_save $swp5 2
-
-	devlink_tc_bind_pool_th_save $swp1 1 ingress
-	devlink_tc_bind_pool_th_save $swp2 1 egress
-	devlink_tc_bind_pool_th_save $swp3 1 egress
-	devlink_tc_bind_pool_th_save $swp4 1 ingress
-	devlink_tc_bind_pool_th_save $swp5 1 ingress
-
-	# Control traffic pools. Just reduce the size.
-	devlink_pool_size_thtype_set 0 dynamic $_500KB
-	devlink_pool_size_thtype_set 4 dynamic $_500KB
-
-	# Stream modeling pools.
-	devlink_pool_size_thtype_set 1 dynamic $_500KB
-	devlink_pool_size_thtype_set 5 dynamic $_500KB
-
-	# Burst soak pools.
-	devlink_pool_size_thtype_set 2 static $POOL_SIZE
-	devlink_pool_size_thtype_set 6 static $POOL_SIZE
-
-	# $swp1
-	# -----
-
-	ip link set dev $swp1 up
-	mtu_set $swp1 10000
-	vlan_create $swp1 111
-	ip link set dev $swp1.111 type vlan ingress-qos-map 0:0 1:1
-
-	devlink_port_pool_th_set $swp1 1 16
-	devlink_tc_bind_pool_th_set $swp1 1 ingress 1 16
-
-	# Configure qdisc...
-	tc qdisc replace dev $swp1 root handle 1: \
-	   ets bands 8 strict 8 priomap 7 6
-	# ... so that we can assign prio1 traffic to PG1.
-	dcb buffer set dev $swp1 prio-buffer all:0 1:1
-
-	# $swp2
-	# -----
-
-	ip link set dev $swp2 up
-	mtu_set $swp2 10000
-	ethtool -s $swp2 speed 1000 autoneg off
-	vlan_create $swp2 111
-	ip link set dev $swp2.111 type vlan egress-qos-map 0:0 1:1
-
-	devlink_port_pool_th_set $swp2 6 $POOL_SIZE
-	devlink_tc_bind_pool_th_set $swp2 1 egress 6 $POOL_SIZE
-
-	# prio 0->TC0 (band 7), 1->TC1 (band 6)
-	tc qdisc replace dev $swp2 root handle 1: \
-	   ets bands 8 strict 8 priomap 7 6
-
-	# $swp3
-	# -----
-
-	ip link set dev $swp3 up
-	mtu_set $swp3 10000
-	ethtool -s $swp3 speed 1000 autoneg off
-	vlan_create $swp3 111
-	ip link set dev $swp3.111 type vlan egress-qos-map 0:0 1:1
-
-	devlink_port_pool_th_set $swp3 5 16
-	devlink_tc_bind_pool_th_set $swp3 1 egress 5 16
-
-	# prio 0->TC0 (band 7), 1->TC1 (band 6)
-	tc qdisc replace dev $swp3 root handle 1: \
-	   ets bands 8 strict 8 priomap 7 6
-
-	# $swp4
-	# -----
-
-	ip link set dev $swp4 up
-	mtu_set $swp4 10000
-	ethtool -s $swp4 speed 1000 autoneg off
-	vlan_create $swp4 111
-	ip link set dev $swp4.111 type vlan ingress-qos-map 0:0 1:1
-
-	devlink_port_pool_th_set $swp4 2 $POOL_SIZE
-	devlink_tc_bind_pool_th_set $swp4 1 ingress 2 $POOL_SIZE
-
-	# Configure qdisc...
-	tc qdisc replace dev $swp4 root handle 1: \
-	   ets bands 8 strict 8 priomap 7 6
-	# ... so that we can assign prio1 traffic to PG1.
-	dcb buffer set dev $swp4 prio-buffer all:0 1:1
-
-	# $swp5
-	# -----
-
-	ip link set dev $swp5 up
-	mtu_set $swp5 10000
-	vlan_create $swp5 111
-	ip link set dev $swp5.111 type vlan ingress-qos-map 0:0 1:1
-
-	devlink_port_pool_th_set $swp5 2 $POOL_SIZE
-	devlink_tc_bind_pool_th_set $swp5 1 ingress 2 $POOL_SIZE
-
-	# Configure qdisc...
-	tc qdisc replace dev $swp5 root handle 1: \
-	   ets bands 8 strict 8 priomap 7 6
-	# ... so that we can assign prio1 traffic to PG1.
-	dcb buffer set dev $swp5 prio-buffer all:0 1:1
-
-	# bridges
-	# -------
-
-	ip link add name br1 type bridge vlan_filtering 0
-	ip link set dev $swp1.111 master br1
-	ip link set dev $swp3.111 master br1
-	ip link set dev br1 up
-
-	ip link add name br2 type bridge vlan_filtering 0
-	ip link set dev $swp2.111 master br2
-	ip link set dev $swp4.111 master br2
-	ip link set dev $swp5.111 master br2
-	ip link set dev br2 up
-}
-
-switch_destroy()
-{
-	# Do this first so that we can reset the limits to values that are only
-	# valid for the original static / dynamic setting.
-	devlink_pool_size_thtype_restore 6
-	devlink_pool_size_thtype_restore 5
-	devlink_pool_size_thtype_restore 4
-	devlink_pool_size_thtype_restore 2
-	devlink_pool_size_thtype_restore 1
-	devlink_pool_size_thtype_restore 0
-
-	# bridges
-	# -------
-
-	ip link set dev br2 down
-	ip link set dev $swp5.111 nomaster
-	ip link set dev $swp4.111 nomaster
-	ip link set dev $swp2.111 nomaster
-	ip link del dev br2
-
-	ip link set dev br1 down
-	ip link set dev $swp3.111 nomaster
-	ip link set dev $swp1.111 nomaster
-	ip link del dev br1
-
-	# $swp5
-	# -----
-
-	dcb buffer set dev $swp5 prio-buffer all:0
-	tc qdisc del dev $swp5 root
-
-	devlink_tc_bind_pool_th_restore $swp5 1 ingress
-	devlink_port_pool_th_restore $swp5 2
-
-	vlan_destroy $swp5 111
-	mtu_restore $swp5
-	ip link set dev $swp5 down
-
-	# $swp4
-	# -----
-
-	dcb buffer set dev $swp4 prio-buffer all:0
-	tc qdisc del dev $swp4 root
-
-	devlink_tc_bind_pool_th_restore $swp4 1 ingress
-	devlink_port_pool_th_restore $swp4 2
-
-	vlan_destroy $swp4 111
-	ethtool -s $swp4 autoneg on
-	mtu_restore $swp4
-	ip link set dev $swp4 down
-
-	# $swp3
-	# -----
-
-	tc qdisc del dev $swp3 root
-
-	devlink_tc_bind_pool_th_restore $swp3 1 egress
-	devlink_port_pool_th_restore $swp3 5
-
-	vlan_destroy $swp3 111
-	ethtool -s $swp3 autoneg on
-	mtu_restore $swp3
-	ip link set dev $swp3 down
-
-	# $swp2
-	# -----
-
-	tc qdisc del dev $swp2 root
-
-	devlink_tc_bind_pool_th_restore $swp2 1 egress
-	devlink_port_pool_th_restore $swp2 6
-
-	vlan_destroy $swp2 111
-	ethtool -s $swp2 autoneg on
-	mtu_restore $swp2
-	ip link set dev $swp2 down
-
-	# $swp1
-	# -----
-
-	dcb buffer set dev $swp1 prio-buffer all:0
-	tc qdisc del dev $swp1 root
-
-	devlink_tc_bind_pool_th_restore $swp1 1 ingress
-	devlink_port_pool_th_restore $swp1 1
-
-	vlan_destroy $swp1 111
-	mtu_restore $swp1
-	ip link set dev $swp1 down
-}
-
-setup_prepare()
-{
-	h1=${NETIFS[p1]}
-	swp1=${NETIFS[p2]}
-
-	swp2=${NETIFS[p3]}
-	h2=${NETIFS[p4]}
-
-	swp3=${NETIFS[p5]}
-	swp4=${NETIFS[p6]}
-
-	swp5=${NETIFS[p7]}
-	h3=${NETIFS[p8]}
-
-	h2mac=$(mac_get $h2)
-
-	vrf_prepare
-
-	h1_create
-	h2_create
-	h3_create
-	switch_create
-}
-
-cleanup()
-{
-	pre_cleanup
-
-	switch_destroy
-	h3_destroy
-	h2_destroy
-	h1_destroy
-
-	vrf_cleanup
-}
-
-ping_ipv4()
-{
-	ping_test $h1 192.0.2.34 " h1->h2"
-	ping_test $h3 192.0.2.34 " h3->h2"
-}
-
-__test_qos_burst()
-{
-	local pktsize=$1; shift
-
-	RET=0
-
-	start_traffic_pktsize $pktsize $h1.111 192.0.2.33 192.0.2.34 $h2mac
-	sleep 1
-
-	local q0=$(ethtool_stats_get $swp2 tc_transmit_queue_tc_1)
-	((q0 == 0))
-	check_err $? "Transmit queue non-zero?"
-
-	local d0=$(ethtool_stats_get $swp2 tc_no_buffer_discard_uc_tc_1)
-
-	local cell_size=$(devlink_cell_size_get)
-	local cells=$((BURST_SIZE / cell_size))
-	# Each packet is $pktsize of payload + headers.
-	local pkt_cells=$(((pktsize + 50 + cell_size - 1)  / cell_size))
-	# How many packets can we admit:
-	local pkts=$((cells / pkt_cells))
-
-	$MZ $h3 -p $pktsize -Q 1:111 -A 192.0.2.35 -B 192.0.2.34 \
-		-a own -b $h2mac -c $pkts -t udp -q
-	sleep 1
-
-	local d1=$(ethtool_stats_get $swp2 tc_no_buffer_discard_uc_tc_1)
-	((d1 == d0))
-	check_err $? "Drops seen on egress port: $d0 -> $d1 ($((d1 - d0)))"
-
-	# Check that the queue is somewhat close to the burst size This
-	# makes sure that the lack of drops above was not due to port
-	# undersubscribtion.
-	local q0=$(ethtool_stats_get $swp2 tc_transmit_queue_tc_1)
-	local qe=$((90 * BURST_SIZE / 100))
-	((q0 > qe))
-	check_err $? "Queue size expected >$qe, got $q0"
-
-	stop_traffic
-	sleep 2
-
-	log_test "Burst: absorb $pkts ${pktsize}-B packets"
-}
-
-test_8K()
-{
-	__test_qos_burst 8000
-}
-
-test_800()
-{
-	__test_qos_burst 800
-}
-
-bail_on_lldpad
-
-trap cleanup EXIT
-setup_prepare
-setup_wait
-tests_run
-
-exit $EXIT_STATUS
-- 
cgit v1.2.3


From 19d6356ab3f0ee40756115fffa01244235f7f400 Mon Sep 17 00:00:00 2001
From: Andrea Mayer <andrea.mayer@uniroma2.it>
Date: Mon, 12 Sep 2022 19:16:19 +0200
Subject: selftests: seg6: add selftest for NEXT-C-SID flavor in SRv6 End
 behavior

This selftest is designed for testing the support of NEXT-C-SID flavor
for SRv6 End behavior. It instantiates a virtual network composed of
several nodes: hosts and SRv6 routers. Each node is realized using a
network namespace that is properly interconnected to others through veth
pairs.
The test considers SRv6 routers implementing IPv4/IPv6 L3 VPNs leveraged
by hosts for communicating with each other. Such routers i) apply
different SRv6 Policies to the traffic received from connected hosts,
considering the IPv4 or IPv6 protocols; ii) use the NEXT-C-SID
compression mechanism for encoding several SRv6 segments within a single
128-bit SID address, referred to as a Compressed SID (C-SID) container.

The NEXT-C-SID is provided as a "flavor" of the SRv6 End behavior,
enabling it to properly process the C-SID containers. The correct
execution of the enabled NEXT-C-SID SRv6 End behavior is verified
through reachability tests carried out between hosts belonging to the
same VPN.

Signed-off-by: Andrea Mayer <andrea.mayer@uniroma2.it>
Acked-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
 tools/testing/selftests/net/Makefile               |    1 +
 .../selftests/net/srv6_end_next_csid_l3vpn_test.sh | 1145 ++++++++++++++++++++
 2 files changed, 1146 insertions(+)
 create mode 100755 tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index f5ac1433c301..d87e8739bb30 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -37,6 +37,7 @@ TEST_PROGS += srv6_end_dt4_l3vpn_test.sh
 TEST_PROGS += srv6_end_dt6_l3vpn_test.sh
 TEST_PROGS += srv6_hencap_red_l3vpn_test.sh
 TEST_PROGS += srv6_hl2encap_red_l2vpn_test.sh
+TEST_PROGS += srv6_end_next_csid_l3vpn_test.sh
 TEST_PROGS += vrf_strict_mode_test.sh
 TEST_PROGS += arp_ndisc_evict_nocarrier.sh
 TEST_PROGS += ndisc_unsolicited_na_test.sh
diff --git a/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh
new file mode 100755
index 000000000000..87e414cc417c
--- /dev/null
+++ b/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh
@@ -0,0 +1,1145 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# author: Andrea Mayer <andrea.mayer@uniroma2.it>
+#
+# This script is designed for testing the support of NEXT-C-SID flavor for SRv6
+# End behavior.
+# A basic knowledge of SRv6 architecture [1] and of the compressed SID approach
+# [2] is assumed for the reader.
+#
+# The network topology used in the selftest is depicted hereafter, composed by
+# two hosts and four routers. Hosts hs-1 and hs-2 are connected through an
+# IPv4/IPv6 L3 VPN service, offered by routers rt-1, rt-2, rt-3 and rt-4 using
+# the NEXT-C-SID flavor. The key components for such VPNs are:
+#
+#    i) The SRv6 H.Encaps/H.Encaps.Red behaviors [1] apply SRv6 Policies on
+#       traffic received by connected hosts, initiating the VPN tunnel;
+#
+#   ii) The SRv6 End behavior [1] advances the active SID in the SID List
+#       carried by the SRH;
+#
+#  iii) The NEXT-C-SID mechanism [2] offers the possibility of encoding several
+#       SRv6 segments within a single 128-bit SID address, referred to as a
+#       Compressed SID (C-SID) container. In this way, the length of the SID
+#       List can be drastically reduced.
+#       The NEXT-C-SID is provided as a "flavor" of the SRv6 End behavior
+#       which advances the current C-SID (i.e. the Locator-Node Function defined
+#       in [2]) with the next one carried in the Argument, if available.
+#       When no more C-SIDs are available in the Argument, the SRv6 End behavior
+#       will apply the End function selecting the next SID in the SID List.
+#
+#   iv) The SRv6 End.DT46 behavior [1] is used for removing the SRv6 Policy and,
+#       thus, it terminates the VPN tunnel. Such a behavior is capable of
+#       handling, at the same time, both tunneled IPv4 and IPv6 traffic.
+#
+# [1] https://datatracker.ietf.org/doc/html/rfc8986
+# [2] https://datatracker.ietf.org/doc/html/draft-ietf-spring-srv6-srh-compression
+#
+#
+#               cafe::1                      cafe::2
+#              10.0.0.1                     10.0.0.2
+#             +--------+                   +--------+
+#             |        |                   |        |
+#             |  hs-1  |                   |  hs-2  |
+#             |        |                   |        |
+#             +---+----+                   +----+---+
+#    cafe::/64    |                             |      cafe::/64
+#  10.0.0.0/24    |                             |    10.0.0.0/24
+#             +---+----+                   +----+---+
+#             |        |  fcf0:0:1:2::/64  |        |
+#             |  rt-1  +-------------------+  rt-2  |
+#             |        |                   |        |
+#             +---+----+                   +----+---+
+#                 |      .               .      |
+#                 |  fcf0:0:1:3::/64   .        |
+#                 |          .       .          |
+#                 |            .   .            |
+# fcf0:0:1:4::/64 |              .              | fcf0:0:2:3::/64
+#                 |            .   .            |
+#                 |          .       .          |
+#                 |  fcf0:0:2:4::/64   .        |
+#                 |      .               .      |
+#             +---+----+                   +----+---+
+#             |        |                   |        |
+#             |  rt-4  +-------------------+  rt-3  |
+#             |        |  fcf0:0:3:4::/64  |        |
+#             +---+----+                   +----+---+
+#
+# Every fcf0:0:x:y::/64 network interconnects the SRv6 routers rt-x with rt-y in
+# the selftest network.
+#
+# Local SID/C-SID table
+# =====================
+#
+# Each SRv6 router is configured with a Local SID/C-SID table in which
+# SIDs/C-SIDs are stored. Considering an SRv6 router rt-x, SIDs/C-SIDs are
+# configured in the Local SID/C-SIDs table as follows:
+#
+#   Local SID/C-SID table for SRv6 router rt-x
+#   +-----------------------------------------------------------+
+#   |fcff:x::d46 is associated with the non-compressed SRv6     |
+#   |   End.DT46 behavior                                       |
+#   +-----------------------------------------------------------+
+#   |fcbb:0:0x00::/48 is associated with the NEXT-C-SID flavor  |
+#   |   of SRv6 End behavior                                    |
+#   +-----------------------------------------------------------+
+#   |fcbb:0:0x00:d46::/64 is associated with the SRv6 End.DT46  |
+#   |   behavior when NEXT-C-SID compression is turned on       |
+#   +-----------------------------------------------------------+
+#
+# The fcff::/16 prefix is reserved for implementing SRv6 services with regular
+# (non compressed) SIDs. Reachability of SIDs is ensured by proper configuration
+# of the IPv6 routing tables in the routers.
+# Similarly, the fcbb:0::/32 prefix is reserved for implementing SRv6 VPN
+# services leveraging the NEXT-C-SID compression mechanism. Indeed, the
+# fcbb:0::/32 is used for encoding the Locator-Block while the Locator-Node
+# Function is encoded with 16 bits.
+#
+# Incoming traffic classification and application of SRv6 Policies
+# ================================================================
+#
+# An SRv6 ingress router applies different SRv6 Policies to the traffic received
+# from a connected host, considering the IPv4 or IPv6 destination address.
+# SRv6 policy enforcement consists of encapsulating the received traffic into a
+# new IPv6 packet with a given SID List contained in the SRH.
+# When the SID List contains only one SID, the SRH could be omitted completely
+# and that SID is stored directly in the IPv6 Destination Address (DA) (this is
+# called "reduced" encapsulation).
+#
+# Test cases for NEXT-C-SID
+# =========================
+#
+# We consider two test cases for NEXT-C-SID: i) single SID and ii) double SID.
+#
+# In the single SID test case we have a number of segments that are all
+# contained in a single Compressed SID (C-SID) container. Therefore the
+# resulting SID List has only one SID. Using the reduced encapsulation format
+# this will result in a packet with no SRH.
+#
+# In the double SID test case we have one segment carried in a Compressed SID
+# (C-SID) container, followed by a regular (non compressed) SID. The resulting
+# SID List has two segments and it is possible to test the advance to the next
+# SID when all the C-SIDs in a C-SID container have been processed. Using the
+# reduced encapsulation format this will result in a packet with an SRH
+# containing 1 segment.
+#
+# For the single SID test case, we use the IPv4 addresses of hs-1 and hs-2, for
+# the double SID test case, we use their IPv6 addresses. This is only done to
+# simplify the test setup and avoid adding other hosts or multiple addresses on
+# the same interface of a host.
+#
+# Traffic from hs-1 to hs-2
+# -------------------------
+#
+# Packets generated from hs-1 and directed towards hs-2 are handled by rt-1
+# which applies the SRv6 Policies as follows:
+#
+#   i) IPv6 DA=cafe::2, H.Encaps.Red with SID List=fcbb:0:0400:0300:0200:d46::
+#  ii) IPv4 DA=10.0.0.2, H.Encaps.Red with SID List=fcbb:0:0300::,fcff:2::d46
+#
+# ### i) single SID
+#
+# The router rt-1 is configured to enforce the given Policy through the SRv6
+# H.Encaps.Red behavior which avoids the presence of the SRH at all, since it
+# pushes the single SID directly in the IPv6 DA. Such a SID encodes a whole
+# C-SID container carrying several C-SIDs (e.g. 0400, 0300, etc).
+#
+# As the packet reaches the router rt-4, the enabled NEXT-C-SID SRv6 End
+# behavior (associated with fcbb:0:0400::/48) is triggered. This behavior
+# analyzes the IPv6 DA and checks whether the Argument of the C-SID container
+# is zero or not. In this case, the Argument is *NOT* zero and the IPv6 DA is
+# updated as follows:
+#
+# +---------------------------------------------------------------+
+# | Before applying the rt-4 enabled NEXT-C-SID SRv6 End behavior |
+# +---------------------------------------------------------------+
+# |                            +---------- Argument               |
+# |                     vvvvvvvvvvvvvvvv                          |
+# | IPv6 DA fcbb:0:0400:0300:0200:d46::                           |
+# |                ^^^^    <-- shifting                           |
+# |                  |                                            |
+# |          Locator-Node Function                                |
+# +---------------------------------------------------------------+
+# | After applying the rt-4 enabled NEXT-C-SID SRv6 End behavior  |
+# +---------------------------------------------------------------+
+# |                          +---------- Argument                 |
+# |                    vvvvvvvvvvvv                               |
+# | IPv6 DA fcbb:0:0300:0200:d46::                                |
+# |                ^^^^                                           |
+# |                  |                                            |
+# |          Locator-Node Function                                |
+# +---------------------------------------------------------------+
+#
+# After having applied the enabled NEXT-C-SID SRv6 End behavior, the packet is
+# sent to the next node, i.e. rt-3.
+#
+# The enabled NEXT-C-SID SRv6 End behavior on rt-3 is executed as the packet is
+# received. This behavior processes the packet and updates the IPv6 DA with
+# fcbb:0:0200:d46::, since the Argument is *NOT* zero. Then, the packet is sent
+# to the router rt-2.
+#
+# The router rt-2 is configured for decapsulating the inner IPv6 packet and,
+# for this reason, it applies the SRv6 End.DT46 behavior on the received
+# packet. It is worth noting that the SRv6 End.DT46 behavior does not require
+# the presence of the SRH: it is fully capable to operate properly on
+# IPv4/IPv6-in-IPv6 encapsulations.
+# At the end of the decap operation, the packet is sent to the
+# host hs-2.
+#
+# ### ii) double SID
+#
+# The router rt-1 is configured to enforce the given Policy through the SRv6
+# H.Encaps.Red. As a result, the first SID fcbb:0:0300:: is stored into the
+# IPv6 DA, while the SRH pushed into the packet is made of only one SID, i.e.
+# fcff:2::d46. Hence, the packet sent by hs-1 to hs-2 is encapsulated in an
+# outer IPv6 header plus the SRH.
+#
+# As the packet reaches the node rt-3, the router applies the enabled NEXT-C-SID
+# SRv6 End behavior.
+#
+# +---------------------------------------------------------------+
+# | Before applying the rt-3 enabled NEXT-C-SID SRv6 End behavior |
+# +---------------------------------------------------------------+
+# |                            +---------- Argument               |
+# |                      vvvv (Argument is all filled with zeros) |
+# | IPv6 DA fcbb:0:0300::                                         |
+# |                ^^^^                                           |
+# |                  |                                            |
+# |          Locator-Node Function                                |
+# +---------------------------------------------------------------+
+# | After applying the rt-3 enabled NEXT-C-SID SRv6 End behavior  |
+# +---------------------------------------------------------------+
+# |                                                               |
+# | IPv6 DA fcff:2::d46                                           |
+# |         ^^^^^^^^^^^                                           |
+# |              |                                                |
+# |        SID copied from the SID List contained in the SRH      |
+# +---------------------------------------------------------------+
+#
+# Since the Argument of the C-SID container is zero, the behavior can not
+# update the Locator-Node function with the next C-SID carried in the Argument
+# itself. Thus, the enabled NEXT-C-SID SRv6 End behavior operates as the
+# traditional End behavior: it updates the IPv6 DA by copying the next
+# available SID in the SID List carried by the SRH. After that, the packet is
+# sent to the node rt-2.
+#
+# Once the packet is received by rt-2, the router decapsulates the inner IPv6
+# packet using the SRv6 End.DT46 behavior (associated with the SID fcff:2::d46)
+# and sends it to the host hs-2.
+#
+# Traffic from hs-2 to hs-1
+# -------------------------
+#
+# Packets generated from hs-2 and directed towards hs-1 are handled by rt-2
+# which applies the SRv6 Policies as follows:
+#
+#   i) IPv6 DA=cafe::1, SID List=fcbb:0:0300:0400:0100:d46::
+#  ii) IPv4 DA=10.0.0.1, SID List=fcbb:0:0300::,fcff:1::d46
+#
+# For simplicity, such SRv6 Policies were chosen so that, in both use cases (i)
+# and (ii), the network paths crossed by traffic from hs-2 to hs-1 are the same
+# as those taken by traffic from hs-1 to hs-2.
+# In this way, traffic from hs-2 to hs-1 is processed similarly to traffic from
+# hs-1 to hs-2. So, the traffic processing scheme turns out to be the same as
+# that adopted in the use cases already examined (of course, it is necessary to
+# consider the different SIDs/C-SIDs).
+
+# Kselftest framework requirement - SKIP code is 4.
+readonly ksft_skip=4
+
+readonly RDMSUFF="$(mktemp -u XXXXXXXX)"
+readonly DUMMY_DEVNAME="dum0"
+readonly VRF_TID=100
+readonly VRF_DEVNAME="vrf-${VRF_TID}"
+readonly RT2HS_DEVNAME="veth-t${VRF_TID}"
+readonly LOCALSID_TABLE_ID=90
+readonly IPv6_RT_NETWORK=fcf0:0
+readonly IPv6_HS_NETWORK=cafe
+readonly IPv4_HS_NETWORK=10.0.0
+readonly VPN_LOCATOR_SERVICE=fcff
+readonly DT46_FUNC=0d46
+readonly HEADEND_ENCAP="encap.red"
+
+# do not add ':' as separator
+readonly LCBLOCK_ADDR=fcbb0000
+readonly LCBLOCK_BLEN=32
+# do not add ':' as separator
+readonly LCNODEFUNC_FMT="0%d00"
+readonly LCNODEFUNC_BLEN=16
+
+readonly LCBLOCK_NODEFUNC_BLEN=$((LCBLOCK_BLEN + LCNODEFUNC_BLEN))
+
+readonly CSID_CNTR_PREFIX="dead:beaf::/32"
+# ID of the router used for testing the C-SID container cfgs
+readonly CSID_CNTR_RT_ID_TEST=1
+# Routing table used for testing the C-SID container cfgs
+readonly CSID_CNTR_RT_TABLE=91
+
+# C-SID container configurations to be tested
+#
+# An entry of the array is defined as "a,b,c" where:
+# - 'a' and 'b' elements represent respectively the Locator-Block length
+#   (lblen) in bits and the Locator-Node Function length (nflen) in bits.
+#   'a' and 'b' can be set to default values using the placeholder "d" which
+#   indicates the default kernel values (32 for lblen and 16 for nflen);
+#   otherwise, any numeric value is accepted;
+# - 'c' indicates whether the C-SID configuration provided by the values 'a'
+#   and 'b' should be considered valid ("y") or invalid ("n").
+declare -ra CSID_CONTAINER_CFGS=(
+	"d,d,y"
+	"d,16,y"
+	"16,d,y"
+	"16,32,y"
+	"32,16,y"
+	"48,8,y"
+	"8,48,y"
+	"d,0,n"
+	"0,d,n"
+	"32,0,n"
+	"0,32,n"
+	"17,d,n"
+	"d,17,n"
+	"120,16,n"
+	"16,120,n"
+	"0,128,n"
+	"128,0,n"
+	"130,0,n"
+	"0,130,n"
+	"0,0,n"
+)
+
+PING_TIMEOUT_SEC=4
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+
+# IDs of routers and hosts are initialized during the setup of the testing
+# network
+ROUTERS=''
+HOSTS=''
+
+SETUP_ERR=1
+
+ret=${ksft_skip}
+nsuccess=0
+nfail=0
+
+log_test()
+{
+	local rc="$1"
+	local expected="$2"
+	local msg="$3"
+
+	if [ "${rc}" -eq "${expected}" ]; then
+		nsuccess=$((nsuccess+1))
+		printf "\n    TEST: %-60s  [ OK ]\n" "${msg}"
+	else
+		ret=1
+		nfail=$((nfail+1))
+		printf "\n    TEST: %-60s  [FAIL]\n" "${msg}"
+		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+			echo
+			echo "hit enter to continue, 'q' to quit"
+			read a
+			[ "$a" = "q" ] && exit 1
+		fi
+	fi
+}
+
+print_log_test_results()
+{
+	printf "\nTests passed: %3d\n" "${nsuccess}"
+	printf "Tests failed: %3d\n"   "${nfail}"
+
+	# when a test fails, the value of 'ret' is set to 1 (error code).
+	# Conversely, when all tests are passed successfully, the 'ret' value
+	# is set to 0 (success code).
+	if [ "${ret}" -ne 1 ]; then
+		ret=0
+	fi
+}
+
+log_section()
+{
+	echo
+	echo "################################################################################"
+	echo "TEST SECTION: $*"
+	echo "################################################################################"
+}
+
+test_command_or_ksft_skip()
+{
+	local cmd="$1"
+
+	if [ ! -x "$(command -v "${cmd}")" ]; then
+		echo "SKIP: Could not run test without \"${cmd}\" tool";
+		exit "${ksft_skip}"
+	fi
+}
+
+get_nodename()
+{
+	local name="$1"
+
+	echo "${name}-${RDMSUFF}"
+}
+
+get_rtname()
+{
+	local rtid="$1"
+
+	get_nodename "rt-${rtid}"
+}
+
+get_hsname()
+{
+	local hsid="$1"
+
+	get_nodename "hs-${hsid}"
+}
+
+__create_namespace()
+{
+	local name="$1"
+
+	ip netns add "${name}"
+}
+
+create_router()
+{
+	local rtid="$1"
+	local nsname
+
+	nsname="$(get_rtname "${rtid}")"
+
+	__create_namespace "${nsname}"
+}
+
+create_host()
+{
+	local hsid="$1"
+	local nsname
+
+	nsname="$(get_hsname "${hsid}")"
+
+	__create_namespace "${nsname}"
+}
+
+cleanup()
+{
+	local nsname
+	local i
+
+	# destroy routers
+	for i in ${ROUTERS}; do
+		nsname="$(get_rtname "${i}")"
+
+		ip netns del "${nsname}" &>/dev/null || true
+	done
+
+	# destroy hosts
+	for i in ${HOSTS}; do
+		nsname="$(get_hsname "${i}")"
+
+		ip netns del "${nsname}" &>/dev/null || true
+	done
+
+	# check whether the setup phase was completed successfully or not. In
+	# case of an error during the setup phase of the testing environment,
+	# the selftest is considered as "skipped".
+	if [ "${SETUP_ERR}" -ne 0 ]; then
+		echo "SKIP: Setting up the testing environment failed"
+		exit "${ksft_skip}"
+	fi
+
+	exit "${ret}"
+}
+
+add_link_rt_pairs()
+{
+	local rt="$1"
+	local rt_neighs="$2"
+	local neigh
+	local nsname
+	local neigh_nsname
+
+	nsname="$(get_rtname "${rt}")"
+
+	for neigh in ${rt_neighs}; do
+		neigh_nsname="$(get_rtname "${neigh}")"
+
+		ip link add "veth-rt-${rt}-${neigh}" netns "${nsname}" \
+			type veth peer name "veth-rt-${neigh}-${rt}" \
+			netns "${neigh_nsname}"
+	done
+}
+
+get_network_prefix()
+{
+	local rt="$1"
+	local neigh="$2"
+	local p="${rt}"
+	local q="${neigh}"
+
+	if [ "${p}" -gt "${q}" ]; then
+		p="${q}"; q="${rt}"
+	fi
+
+	echo "${IPv6_RT_NETWORK}:${p}:${q}"
+}
+
+# Setup the basic networking for the routers
+setup_rt_networking()
+{
+	local rt="$1"
+	local rt_neighs="$2"
+	local nsname
+	local net_prefix
+	local devname
+	local neigh
+
+	nsname="$(get_rtname "${rt}")"
+
+	for neigh in ${rt_neighs}; do
+		devname="veth-rt-${rt}-${neigh}"
+
+		net_prefix="$(get_network_prefix "${rt}" "${neigh}")"
+
+		ip -netns "${nsname}" addr \
+			add "${net_prefix}::${rt}/64" dev "${devname}" nodad
+
+		ip -netns "${nsname}" link set "${devname}" up
+	done
+
+        ip -netns "${nsname}" link add "${DUMMY_DEVNAME}" type dummy
+
+        ip -netns "${nsname}" link set "${DUMMY_DEVNAME}" up
+	ip -netns "${nsname}" link set lo up
+
+	ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
+	ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
+	ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.forwarding=1
+
+	ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.all.rp_filter=0
+	ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.default.rp_filter=0
+	ip netns exec "${nsname}" sysctl -wq net.ipv4.ip_forward=1
+}
+
+# build an ipv6 prefix/address based on the input string
+# Note that the input string does not contain ':' and '::' which are considered
+# to be implicit.
+# e.g.:
+#  - input:  fbcc00000400300
+#  - output: fbcc:0000:0400:0300:0000:0000:0000:0000
+#                                ^^^^^^^^^^^^^^^^^^^
+#                              fill the address with 0s
+build_ipv6_addr()
+{
+	local addr="$1"
+	local out=""
+	local strlen="${#addr}"
+	local padn
+	local i
+
+	# add ":" every 4 digits (16 bits)
+	for (( i = 0; i < strlen; i++ )); do
+		if (( i > 0 && i < 32 && (i % 4) == 0 )); then
+			out="${out}:"
+		fi
+
+		out="${out}${addr:$i:1}"
+	done
+
+	# fill the remaining bits of the address with 0s
+	padn=$((32 - strlen))
+	for (( i = padn; i > 0; i-- )); do
+		if (( i > 0 && i < 32 && (i % 4) == 0 )); then
+			out="${out}:"
+		fi
+
+		out="${out}0"
+	done
+
+	printf "${out}"
+}
+
+build_csid()
+{
+	local nodeid="$1"
+
+	printf "${LCNODEFUNC_FMT}" "${nodeid}"
+}
+
+build_lcnode_func_prefix()
+{
+	local nodeid="$1"
+	local lcnodefunc
+	local prefix
+	local out
+
+	lcnodefunc="$(build_csid "${nodeid}")"
+	prefix="$(build_ipv6_addr "${LCBLOCK_ADDR}${lcnodefunc}")"
+
+	out="${prefix}/${LCBLOCK_NODEFUNC_BLEN}"
+
+	echo "${out}"
+}
+
+# Setup local SIDs for an SRv6 router
+setup_rt_local_sids()
+{
+	local rt="$1"
+	local rt_neighs="$2"
+	local net_prefix
+	local devname
+	local nsname
+	local neigh
+	local lcnode_func_prefix
+	local lcblock_prefix
+
+	nsname="$(get_rtname "${rt}")"
+
+	for neigh in ${rt_neighs}; do
+		devname="veth-rt-${rt}-${neigh}"
+
+		net_prefix="$(get_network_prefix "${rt}" "${neigh}")"
+
+		# set underlay network routes for SIDs reachability
+		ip -netns "${nsname}" -6 route \
+			add "${VPN_LOCATOR_SERVICE}:${neigh}::/32" \
+			table "${LOCALSID_TABLE_ID}" \
+			via "${net_prefix}::${neigh}" dev "${devname}"
+
+		# set the underlay network for C-SIDs reachability
+		lcnode_func_prefix="$(build_lcnode_func_prefix "${neigh}")"
+
+		ip -netns "${nsname}" -6 route \
+			add "${lcnode_func_prefix}" \
+			table "${LOCALSID_TABLE_ID}" \
+			via "${net_prefix}::${neigh}" dev "${devname}"
+	done
+
+	lcnode_func_prefix="$(build_lcnode_func_prefix "${rt}")"
+
+	# enabled NEXT-C-SID SRv6 End behavior (note that "dev" is the dummy
+	# dum0 device chosen for the sake of simplicity).
+	ip -netns "${nsname}" -6 route \
+		add "${lcnode_func_prefix}" \
+		table "${LOCALSID_TABLE_ID}" \
+		encap seg6local action End flavors next-csid \
+		lblen "${LCBLOCK_BLEN}" nflen "${LCNODEFUNC_BLEN}" \
+		dev "${DUMMY_DEVNAME}"
+
+	# all SIDs for VPNs start with a common locator. Routes and SRv6
+	# Endpoint behavior instaces are grouped together in the 'localsid'
+	# table.
+	ip -netns "${nsname}" -6 rule \
+		add to "${VPN_LOCATOR_SERVICE}::/16" \
+		lookup "${LOCALSID_TABLE_ID}" prio 999
+
+	# common locator block for NEXT-C-SIDS compression mechanism.
+	lcblock_prefix="$(build_ipv6_addr "${LCBLOCK_ADDR}")"
+	ip -netns "${nsname}" -6 rule \
+		add to "${lcblock_prefix}/${LCBLOCK_BLEN}" \
+		lookup "${LOCALSID_TABLE_ID}" prio 999
+}
+
+# build and install the SRv6 policy into the ingress SRv6 router as well as the
+# decap SID in the egress one.
+# args:
+#  $1 - src host (evaluate automatically the ingress router)
+#  $2 - dst host (evaluate automatically the egress router)
+#  $3 - SRv6 routers configured for steering traffic (End behaviors)
+#  $4 - single SID or double SID
+#  $5 - traffic type (IPv6 or IPv4)
+__setup_l3vpn()
+{
+	local src="$1"
+	local dst="$2"
+	local end_rts="$3"
+	local mode="$4"
+	local traffic="$5"
+	local nsname
+	local policy
+	local container
+	local decapsid
+	local lcnfunc
+	local dt
+	local n
+	local rtsrc_nsname
+	local rtdst_nsname
+
+	rtsrc_nsname="$(get_rtname "${src}")"
+	rtdst_nsname="$(get_rtname "${dst}")"
+
+	container="${LCBLOCK_ADDR}"
+
+	# build first SID (C-SID container)
+	for n in ${end_rts}; do
+		lcnfunc="$(build_csid "${n}")"
+
+		container="${container}${lcnfunc}"
+	done
+
+	if [ "${mode}" -eq 1 ]; then
+		# single SID policy
+		dt="$(build_csid "${dst}")${DT46_FUNC}"
+		container="${container}${dt}"
+		# build the full ipv6 address for the container
+		policy="$(build_ipv6_addr "${container}")"
+
+		# build the decap SID used in the decap node
+		container="${LCBLOCK_ADDR}${dt}"
+		decapsid="$(build_ipv6_addr "${container}")"
+	else
+		# double SID policy
+		decapsid="${VPN_LOCATOR_SERVICE}:${dst}::${DT46_FUNC}"
+
+		policy="$(build_ipv6_addr "${container}"),${decapsid}"
+	fi
+
+	# apply encap policy
+	if [ "${traffic}" -eq 6 ]; then
+		ip -netns "${rtsrc_nsname}" -6 route \
+			add "${IPv6_HS_NETWORK}::${dst}" vrf "${VRF_DEVNAME}" \
+			encap seg6 mode "${HEADEND_ENCAP}" segs "${policy}" \
+			dev "${VRF_DEVNAME}"
+
+		ip -netns "${rtsrc_nsname}" -6 neigh \
+			add proxy "${IPv6_HS_NETWORK}::${dst}" \
+			dev "${RT2HS_DEVNAME}"
+	else
+		# "dev" must be different from the one where the packet is
+		# received, otherwise the proxy arp does not work.
+		ip -netns "${rtsrc_nsname}" -4 route \
+			add "${IPv4_HS_NETWORK}.${dst}" vrf "${VRF_DEVNAME}" \
+			encap seg6 mode "${HEADEND_ENCAP}" segs "${policy}" \
+			dev "${VRF_DEVNAME}"
+	fi
+
+	# apply decap
+	# Local End.DT46 behavior (decap)
+	ip -netns "${rtdst_nsname}" -6 route \
+		add "${decapsid}" \
+		table "${LOCALSID_TABLE_ID}" \
+		encap seg6local action End.DT46 vrftable "${VRF_TID}" \
+		dev "${VRF_DEVNAME}"
+}
+
+# see __setup_l3vpn()
+setup_ipv4_vpn_2sids()
+{
+	__setup_l3vpn "$1" "$2" "$3" 2 4
+}
+
+# see __setup_l3vpn()
+setup_ipv6_vpn_1sid()
+{
+	__setup_l3vpn "$1" "$2" "$3" 1 6
+}
+
+setup_hs()
+{
+	local hs="$1"
+	local rt="$2"
+	local hsname
+	local rtname
+
+	hsname="$(get_hsname "${hs}")"
+	rtname="$(get_rtname "${rt}")"
+
+	ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
+	ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
+
+	ip -netns "${hsname}" link add veth0 type veth \
+		peer name "${RT2HS_DEVNAME}" netns "${rtname}"
+
+	ip -netns "${hsname}" addr \
+		add "${IPv6_HS_NETWORK}::${hs}/64" dev veth0 nodad
+	ip -netns "${hsname}" addr add "${IPv4_HS_NETWORK}.${hs}/24" dev veth0
+
+	ip -netns "${hsname}" link set veth0 up
+	ip -netns "${hsname}" link set lo up
+
+	# configure the VRF on the router which is directly connected to the
+	# source host.
+	ip -netns "${rtname}" link \
+		add "${VRF_DEVNAME}" type vrf table "${VRF_TID}"
+	ip -netns "${rtname}" link set "${VRF_DEVNAME}" up
+
+	# enslave the veth interface connecting the router with the host to the
+	# VRF in the access router
+	ip -netns "${rtname}" link \
+		set "${RT2HS_DEVNAME}" master "${VRF_DEVNAME}"
+
+	# set default routes to unreachable for both ipv6 and ipv4
+	ip -netns "${rtname}" -6 route \
+		add unreachable default metric 4278198272 \
+		vrf "${VRF_DEVNAME}"
+	ip -netns "${rtname}" -4 route \
+		add unreachable default metric 4278198272 \
+		vrf "${VRF_DEVNAME}"
+
+	ip -netns "${rtname}" addr \
+		add "${IPv6_HS_NETWORK}::254/64" dev "${RT2HS_DEVNAME}" nodad
+	ip -netns "${rtname}" addr \
+		add "${IPv4_HS_NETWORK}.254/24" dev "${RT2HS_DEVNAME}"
+
+	ip -netns "${rtname}" link set "${RT2HS_DEVNAME}" up
+
+	ip netns exec "${rtname}" \
+		sysctl -wq net.ipv6.conf."${RT2HS_DEVNAME}".proxy_ndp=1
+	ip netns exec "${rtname}" \
+		sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".proxy_arp=1
+
+	# disable the rp_filter otherwise the kernel gets confused about how
+	# to route decap ipv4 packets.
+	ip netns exec "${rtname}" \
+		sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".rp_filter=0
+
+	ip netns exec "${rtname}" sh -c "echo 1 > /proc/sys/net/vrf/strict_mode"
+}
+
+setup()
+{
+	local i
+
+	# create routers
+	ROUTERS="1 2 3 4"; readonly ROUTERS
+	for i in ${ROUTERS}; do
+		create_router "${i}"
+	done
+
+	# create hosts
+	HOSTS="1 2"; readonly HOSTS
+	for i in ${HOSTS}; do
+		create_host "${i}"
+	done
+
+	# set up the links for connecting routers
+	add_link_rt_pairs 1 "2 3 4"
+	add_link_rt_pairs 2 "3 4"
+	add_link_rt_pairs 3 "4"
+
+	# set up the basic connectivity of routers and routes required for
+	# reachability of SIDs.
+	setup_rt_networking 1 "2 3 4"
+	setup_rt_networking 2 "1 3 4"
+	setup_rt_networking 3 "1 2 4"
+	setup_rt_networking 4 "1 2 3"
+
+	# set up the hosts connected to routers
+	setup_hs 1 1
+	setup_hs 2 2
+
+	# set up default SRv6 Endpoints (i.e. SRv6 End and SRv6 End.DT46)
+	setup_rt_local_sids 1 "2 3 4"
+	setup_rt_local_sids 2 "1 3 4"
+	setup_rt_local_sids 3 "1 2 4"
+	setup_rt_local_sids 4 "1 2 3"
+
+	# set up SRv6 Policies
+
+	# create an IPv6 VPN between hosts hs-1 and hs-2.
+	#
+	# Direction hs-1 -> hs-2
+	# - rt-1 encap (H.Encaps.Red)
+	# - rt-4 SRv6 End behavior (NEXT-C-SID flavor)
+	# - rt-3 SRv6 End behavior (NEXT-C-SID flavor)
+	# - rt-2 SRv6 End.DT46 behavior
+	setup_ipv6_vpn_1sid 1 2 "4 3"
+
+	# Direction hs2 -> hs-1
+	# - rt-2 encap (H.Encaps.Red)
+	# - rt-3 SRv6 End behavior (NEXT-C-SID flavor)
+	# - rt-4 SRv6 End behavior (NEXT-C-SID flavor)
+	# - rt-1 SRv6 End.DT46 behavior
+	setup_ipv6_vpn_1sid 2 1 "3 4"
+
+	# create an IPv4 VPN between hosts hs-1 and hs-2
+	#
+	# Direction hs-1 -> hs-2
+	# - rt-1 encap (H.Encaps.Red)
+	# - rt-3 SRv6 End behavior (NEXT-C-SID flavor)
+	# - rt-2 SRv6 End.DT46 behavior
+	setup_ipv4_vpn_2sids 1 2 "3"
+
+	# Direction hs-2 -> hs-1
+	# - rt-2 encap (H.Encaps.Red)
+	# - rt-3 SRv6 End behavior (NEXT-C-SID flavor)
+	# - rt-1 SRv6 End.DT46 behavior
+	setup_ipv4_vpn_2sids 2 1 "3"
+
+	# testing environment was set up successfully
+	SETUP_ERR=0
+}
+
+check_rt_connectivity()
+{
+	local rtsrc="$1"
+	local rtdst="$2"
+	local prefix
+	local rtsrc_nsname
+
+	rtsrc_nsname="$(get_rtname "${rtsrc}")"
+
+	prefix="$(get_network_prefix "${rtsrc}" "${rtdst}")"
+
+	ip netns exec "${rtsrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
+		"${prefix}::${rtdst}" >/dev/null 2>&1
+}
+
+check_and_log_rt_connectivity()
+{
+	local rtsrc="$1"
+	local rtdst="$2"
+
+	check_rt_connectivity "${rtsrc}" "${rtdst}"
+	log_test $? 0 "Routers connectivity: rt-${rtsrc} -> rt-${rtdst}"
+}
+
+check_hs_ipv6_connectivity()
+{
+	local hssrc="$1"
+	local hsdst="$2"
+	local hssrc_nsname
+
+	hssrc_nsname="$(get_hsname "${hssrc}")"
+
+	ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
+		"${IPv6_HS_NETWORK}::${hsdst}" >/dev/null 2>&1
+}
+
+check_hs_ipv4_connectivity()
+{
+	local hssrc="$1"
+	local hsdst="$2"
+	local hssrc_nsname
+
+	hssrc_nsname="$(get_hsname "${hssrc}")"
+
+	ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
+		"${IPv4_HS_NETWORK}.${hsdst}" >/dev/null 2>&1
+}
+
+check_and_log_hs2gw_connectivity()
+{
+	local hssrc="$1"
+
+	check_hs_ipv6_connectivity "${hssrc}" 254
+	log_test $? 0 "IPv6 Hosts connectivity: hs-${hssrc} -> gw"
+
+	check_hs_ipv4_connectivity "${hssrc}" 254
+	log_test $? 0 "IPv4 Hosts connectivity: hs-${hssrc} -> gw"
+}
+
+check_and_log_hs_ipv6_connectivity()
+{
+	local hssrc="$1"
+	local hsdst="$2"
+
+	check_hs_ipv6_connectivity "${hssrc}" "${hsdst}"
+	log_test $? 0 "IPv6 Hosts connectivity: hs-${hssrc} -> hs-${hsdst}"
+}
+
+check_and_log_hs_ipv4_connectivity()
+{
+	local hssrc="$1"
+	local hsdst="$2"
+
+	check_hs_ipv4_connectivity "${hssrc}" "${hsdst}"
+	log_test $? 0 "IPv4 Hosts connectivity: hs-${hssrc} -> hs-${hsdst}"
+}
+
+router_tests()
+{
+	local i
+	local j
+
+	log_section "IPv6 routers connectivity test"
+
+	for i in ${ROUTERS}; do
+		for j in ${ROUTERS}; do
+			if [ "${i}" -eq "${j}" ]; then
+				continue
+			fi
+
+			check_and_log_rt_connectivity "${i}" "${j}"
+		done
+	done
+}
+
+host2gateway_tests()
+{
+	local hs
+
+	log_section "IPv4/IPv6 connectivity test among hosts and gateways"
+
+	for hs in ${HOSTS}; do
+		check_and_log_hs2gw_connectivity "${hs}"
+	done
+}
+
+host_vpn_tests()
+{
+	log_section "SRv6 VPN connectivity test hosts (h1 <-> h2, IPv6)"
+
+	check_and_log_hs_ipv6_connectivity 1 2
+	check_and_log_hs_ipv6_connectivity 2 1
+
+	log_section "SRv6 VPN connectivity test hosts (h1 <-> h2, IPv4)"
+
+	check_and_log_hs_ipv4_connectivity 1 2
+	check_and_log_hs_ipv4_connectivity 2 1
+}
+
+__nextcsid_end_behavior_test()
+{
+	local nsname="$1"
+	local cmd="$2"
+	local blen="$3"
+	local flen="$4"
+	local layout=""
+
+	if [ "${blen}" != "d" ]; then
+		layout="${layout} lblen ${blen}"
+	fi
+
+	if [ "${flen}" != "d" ]; then
+		layout="${layout} nflen ${flen}"
+	fi
+
+	ip -netns "${nsname}" -6 route \
+		"${cmd}" "${CSID_CNTR_PREFIX}" \
+		table "${CSID_CNTR_RT_TABLE}" \
+		encap seg6local action End flavors next-csid ${layout} \
+		dev "${DUMMY_DEVNAME}" &>/dev/null
+
+	return "$?"
+}
+
+rt_x_nextcsid_end_behavior_test()
+{
+	local rt="$1"
+	local blen="$2"
+	local flen="$3"
+	local nsname
+	local ret
+
+	nsname="$(get_rtname "${rt}")"
+
+	__nextcsid_end_behavior_test "${nsname}" "add" "${blen}" "${flen}"
+	ret="$?"
+	__nextcsid_end_behavior_test "${nsname}" "del" "${blen}" "${flen}"
+
+	return "${ret}"
+}
+
+__parse_csid_container_cfg()
+{
+	local cfg="$1"
+	local index="$2"
+	local out
+
+	echo "${cfg}" | cut -d',' -f"${index}"
+}
+
+csid_container_cfg_tests()
+{
+	local valid
+	local blen
+	local flen
+	local cfg
+	local ret
+
+	log_section "C-SID Container config tests (legend: d='kernel default')"
+
+	for cfg in "${CSID_CONTAINER_CFGS[@]}"; do
+		blen="$(__parse_csid_container_cfg "${cfg}" 1)"
+		flen="$(__parse_csid_container_cfg "${cfg}" 2)"
+		valid="$(__parse_csid_container_cfg "${cfg}" 3)"
+
+		rt_x_nextcsid_end_behavior_test \
+			"${CSID_CNTR_RT_ID_TEST}" \
+			"${blen}" \
+			"${flen}"
+		ret="$?"
+
+		if [ "${valid}" == "y" ]; then
+			log_test "${ret}" 0 \
+				"Accept valid C-SID container cfg (lblen=${blen}, nflen=${flen})"
+		else
+			log_test "${ret}" 2 \
+				"Reject invalid C-SID container cfg (lblen=${blen}, nflen=${flen})"
+		fi
+	done
+}
+
+test_iproute2_supp_or_ksft_skip()
+{
+	if ! ip route help 2>&1 | grep -qo "next-csid"; then
+		echo "SKIP: Missing SRv6 NEXT-C-SID flavor support in iproute2"
+		exit "${ksft_skip}"
+	fi
+}
+
+test_dummy_dev_or_ksft_skip()
+{
+        local test_netns
+
+        test_netns="dummy-$(mktemp -u XXXXXXXX)"
+
+        if ! ip netns add "${test_netns}"; then
+                echo "SKIP: Cannot set up netns for testing dummy dev support"
+                exit "${ksft_skip}"
+        fi
+
+        modprobe dummy &>/dev/null || true
+        if ! ip -netns "${test_netns}" link \
+                add "${DUMMY_DEVNAME}" type dummy; then
+                echo "SKIP: dummy dev not supported"
+
+                ip netns del "${test_netns}"
+                exit "${ksft_skip}"
+        fi
+
+        ip netns del "${test_netns}"
+}
+
+test_vrf_or_ksft_skip()
+{
+	modprobe vrf &>/dev/null || true
+	if [ ! -e /proc/sys/net/vrf/strict_mode ]; then
+		echo "SKIP: vrf sysctl does not exist"
+		exit "${ksft_skip}"
+	fi
+}
+
+if [ "$(id -u)" -ne 0 ]; then
+	echo "SKIP: Need root privileges"
+	exit "${ksft_skip}"
+fi
+
+# required programs to carry out this selftest
+test_command_or_ksft_skip ip
+test_command_or_ksft_skip ping
+test_command_or_ksft_skip sysctl
+test_command_or_ksft_skip grep
+test_command_or_ksft_skip cut
+
+test_iproute2_supp_or_ksft_skip
+test_dummy_dev_or_ksft_skip
+test_vrf_or_ksft_skip
+
+set -e
+trap cleanup EXIT
+
+setup
+set +e
+
+csid_container_cfg_tests
+
+router_tests
+host2gateway_tests
+host_vpn_tests
+
+print_log_test_results
-- 
cgit v1.2.3


From 152e8ec7764056bc667d0291a2dd1ef4204e216c Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Tue, 20 Sep 2022 11:30:47 +0800
Subject: selftests/bonding: add a test for bonding lladdr target

This is a regression test for commit 592335a4164c ("bonding: accept
unsolicited NA message") and commit b7f14132bf58 ("bonding: use unspecified
address if no available link local address"). When the bond interface
up and no available link local address, unspecified address(::) is used to
send the NS message. The unsolicited NA message should also be accepted
for validation.

Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Acked-by: Jonathan Toppins <jtoppins@redhat.com>
Link: https://lore.kernel.org/r/20220920033047.173244-1-liuhangbin@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../testing/selftests/drivers/net/bonding/Makefile |  1 +
 .../drivers/net/bonding/bond-lladdr-target.sh      | 65 ++++++++++++++++++++++
 2 files changed, 66 insertions(+)
 create mode 100755 tools/testing/selftests/drivers/net/bonding/bond-lladdr-target.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/drivers/net/bonding/Makefile b/tools/testing/selftests/drivers/net/bonding/Makefile
index ab6c54b12098..d209f7a98b6c 100644
--- a/tools/testing/selftests/drivers/net/bonding/Makefile
+++ b/tools/testing/selftests/drivers/net/bonding/Makefile
@@ -2,5 +2,6 @@
 # Makefile for net selftests
 
 TEST_PROGS := bond-break-lacpdu-tx.sh
+TEST_PROGS += bond-lladdr-target.sh
 
 include ../../../lib.mk
diff --git a/tools/testing/selftests/drivers/net/bonding/bond-lladdr-target.sh b/tools/testing/selftests/drivers/net/bonding/bond-lladdr-target.sh
new file mode 100755
index 000000000000..89af402fabbe
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/bond-lladdr-target.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Regression Test:
+#   Verify bond interface could up when set IPv6 link local address target.
+#
+#  +----------------+
+#  |      br0       |
+#  |       |        |    sw
+#  | veth0   veth1  |
+#  +---+-------+----+
+#      |       |
+#  +---+-------+----+
+#  | veth0   veth1  |
+#  |       |        |    host
+#  |     bond0      |
+#  +----------------+
+#
+# We use veths instead of physical interfaces
+sw="sw-$(mktemp -u XXXXXX)"
+host="ns-$(mktemp -u XXXXXX)"
+
+cleanup()
+{
+	ip netns del $sw
+	ip netns del $host
+}
+
+trap cleanup 0 1 2
+
+ip netns add $sw
+ip netns add $host
+
+ip -n $host link add veth0 type veth peer name veth0 netns $sw
+ip -n $host link add veth1 type veth peer name veth1 netns $sw
+
+ip -n $sw link add br0 type bridge
+ip -n $sw link set br0 up
+sw_lladdr=$(ip -n $sw addr show br0 | awk '/fe80/{print $2}' | cut -d'/' -f1)
+# sleep some time to make sure bridge lladdr pass DAD
+sleep 2
+
+ip -n $host link add bond0 type bond mode 1 ns_ip6_target ${sw_lladdr} \
+	arp_validate 3 arp_interval 1000
+# add a lladdr for bond to make sure there is a route to target
+ip -n $host addr add fe80::beef/64 dev bond0
+ip -n $host link set bond0 up
+ip -n $host link set veth0 master bond0
+ip -n $host link set veth1 master bond0
+
+ip -n $sw link set veth0 master br0
+ip -n $sw link set veth1 master br0
+ip -n $sw link set veth0 up
+ip -n $sw link set veth1 up
+
+sleep 5
+
+rc=0
+if ip -n $host link show bond0 | grep -q LOWER_UP; then
+	echo "PASS"
+else
+	echo "FAIL"
+	rc=1
+fi
+exit $rc
-- 
cgit v1.2.3


From 93f3f2eaa4c945dc733110ebf4e49b0c12a89e99 Mon Sep 17 00:00:00 2001
From: Zhengchao Shao <shaozhengchao@huawei.com>
Date: Fri, 16 Sep 2022 10:02:45 +0800
Subject: selftests/tc-testings: add selftests for bpf filter

Test 23c3: Add cBPF filter with valid bytecode
Test 1563: Add cBPF filter with invalid bytecode
Test 2334: Add eBPF filter with valid object-file
Test 2373: Add eBPF filter with invalid object-file
Test 4423: Replace cBPF bytecode
Test 5122: Delete cBPF filter
Test e0a9: List cBPF filters

Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
Reviewed-by: Jamal Hadi Salim <jhs@mojatatu.com>
Reviewed-by: Victor Nogueira <victor@mojatatu.com>
Tested-by: Victor Nogueira <victor@mojatatu.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/tc-testing/tc-tests/filters/bpf.json | 171 +++++++++++++++++++++
 1 file changed, 171 insertions(+)
 create mode 100644 tools/testing/selftests/tc-testing/tc-tests/filters/bpf.json

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/bpf.json b/tools/testing/selftests/tc-testing/tc-tests/filters/bpf.json
new file mode 100644
index 000000000000..1f0cae474db2
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/bpf.json
@@ -0,0 +1,171 @@
+[
+    {
+        "id": "23c3",
+        "name": "Add cBPF filter with valid bytecode",
+        "category": [
+            "filter",
+            "bpf-filter"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 100 bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0'",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 100 bpf",
+        "matchPattern": "filter parent ffff: protocol ip pref 100 bpf chain [0-9]+ handle 0x1.*bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0'",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "1563",
+        "name": "Add cBPF filter with invalid bytecode",
+        "category": [
+            "filter",
+            "bpf-filter"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 100 bpf bytecode '4,40 0 0 12,31 0 1 2048,6 0 0 262144,6 0 0 0'",
+        "expExitCode": "2",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 100 bpf",
+        "matchPattern": "filter parent ffff: protocol ip pref 100 bpf chain [0-9]+ handle 0x1.*bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0'",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "2334",
+        "name": "Add eBPF filter with valid object-file",
+        "category": [
+            "filter",
+            "bpf-filter"
+        ],
+        "plugins": {
+            "requires": "buildebpfPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 100 bpf object-file $EBPFDIR/action.o section action-ok",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 100 bpf",
+        "matchPattern": "filter parent ffff: protocol ip pref 100 bpf chain [0-9]+ handle 0x1 action.o:\\[action-ok\\].*tag [0-9a-f]{16}( jited)?",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "2373",
+        "name": "Add eBPF filter with invalid object-file",
+        "category": [
+            "filter",
+            "bpf-filter"
+        ],
+        "plugins": {
+            "requires": "buildebpfPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 100 bpf object-file $EBPFDIR/action.o section action-ko",
+        "expExitCode": "1",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 100 bpf",
+        "matchPattern": "filter parent ffff: protocol ip pref 100 bpf chain [0-9]+ handle 0x1 action.o:\\[action-ko\\].*tag [0-9a-f]{16}( jited)?",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "4423",
+        "name": "Replace cBPF bytecode",
+        "category": [
+            "filter",
+            "bpf-filter"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress",
+            [
+                "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 100 bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0'",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC filter replace dev $DEV1 parent ffff: handle 1 protocol ip prio 100 bpf bytecode '4,40 0 0 12,21 0 1 2054,6 0 0 262144,6 0 0 0'",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 100 bpf",
+        "matchPattern": "filter parent ffff: protocol ip pref 100 bpf chain [0-9]+ handle 0x1.*bytecode '4,40 0 0 12,21 0 1 2054,6 0 0 262144,6 0 0 0'",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "5122",
+        "name": "Delete cBPF filter",
+        "category": [
+            "filter",
+            "bpf-filter"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress",
+            [
+                "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 100 bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0'",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC filter del dev $DEV1 parent ffff: handle 1 protocol ip prio 100 bpf",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 100 bpf",
+        "matchPattern": "filter parent ffff: protocol ip pref 100 bpf chain [0-9]+ handle 0x1.*bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0'",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "e0a9",
+        "name": "List cBPF filters",
+        "category": [
+            "filter",
+            "bpf-filter"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress",
+            "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 100 bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0'",
+            "$TC filter add dev $DEV1 parent ffff: handle 2 protocol ip prio 100 bpf bytecode '4,40 0 0 12,21 0 1 2054,6 0 0 262144,6 0 0 0'",
+            "$TC filter add dev $DEV1 parent ffff: handle 100 protocol ip prio 100 bpf bytecode '4,40 0 0 12,21 0 1 33024,6 0 0 262144,6 0 0 0'"
+        ],
+        "cmdUnderTest": "$TC filter show dev $DEV1 parent ffff:",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "filter protocol ip pref 100 bpf chain [0-9]+ handle",
+        "matchCount": "3",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    }
+]
-- 
cgit v1.2.3


From 33c4119276152096fae44868ff5f4c71965a6905 Mon Sep 17 00:00:00 2001
From: Zhengchao Shao <shaozhengchao@huawei.com>
Date: Fri, 16 Sep 2022 10:02:46 +0800
Subject: selftests/tc-testings: add selftests for cgroup filter

Test 6273: Add cgroup filter with cmp ematch u8/link layer and drop action
Test 4721: Add cgroup filter with cmp ematch u8/link layer with trans
flag and pass action
Test d392: Add cgroup filter with cmp ematch u16/link layer and pipe action
Test 0234: Add cgroup filter with cmp ematch u32/link layer and miltiple
actions
Test 8499: Add cgroup filter with cmp ematch u8/network layer and pass
action
Test b273: Add cgroup filter with cmp ematch u8/network layer with trans
flag and drop action
Test 1934: Add cgroup filter with cmp ematch u16/network layer and pipe
action
Test 2733: Add cgroup filter with cmp ematch u32/network layer and
miltiple actions
Test 3271: Add cgroup filter with NOT cmp ematch rule and pass action
Test 2362: Add cgroup filter with two ANDed cmp ematch rules and single
action
Test 9993: Add cgroup filter with two ORed cmp ematch rules and single
action
Test 2331: Add cgroup filter with two ANDed cmp ematch rules and one ORed
ematch rule and single action
Test 3645: Add cgroup filter with two ANDed cmp ematch rules and one NOT
ORed ematch rule and single action
Test b124: Add cgroup filter with u32 ematch u8/zero offset and drop
action
Test 7381: Add cgroup filter with u32 ematch u8/zero offset and invalid
value >0xFF
Test 2231: Add cgroup filter with u32 ematch u8/positive offset and drop
action
Test 1882: Add cgroup filter with u32 ematch u8/invalid mask >0xFF
Test 1237: Add cgroup filter with u32 ematch u8/missing offset
Test 3812: Add cgroup filter with u32 ematch u8/missing AT keyword
Test 1112: Add cgroup filter with u32 ematch u8/missing value
Test 3241: Add cgroup filter with u32 ematch u8/non-numeric value
Test e231: Add cgroup filter with u32 ematch u8/non-numeric mask
Test 4652: Add cgroup filter with u32 ematch u8/negative offset and pass
Test 1331: Add cgroup filter with u32 ematch u16/zero offset and pipe
action
Test e354: Add cgroup filter with u32 ematch u16/zero offset and invalid
value >0xFFFF
Test 3538: Add cgroup filter with u32 ematch u16/positive offset and drop
action
Test 4576: Add cgroup filter with u32 ematch u16/invalid mask >0xFFFF
Test b842: Add cgroup filter with u32 ematch u16/missing offset
Test c924: Add cgroup filter with u32 ematch u16/missing AT keyword
Test cc93: Add cgroup filter with u32 ematch u16/missing value
Test 123c: Add cgroup filter with u32 ematch u16/non-numeric value
Test 3675: Add cgroup filter with u32 ematch u16/non-numeric mask
Test 1123: Add cgroup filter with u32 ematch u16/negative offset and drop
action
Test 4234: Add cgroup filter with u32 ematch u16/nexthdr+ offset and pass
action
Test e912: Add cgroup filter with u32 ematch u32/zero offset and pipe
action
Test 1435: Add cgroup filter with u32 ematch u32/positive offset and drop
action
Test 1282: Add cgroup filter with u32 ematch u32/missing offset
Test 6456: Add cgroup filter with u32 ematch u32/missing AT keyword
Test 4231: Add cgroup filter with u32 ematch u32/missing value
Test 2131: Add cgroup filter with u32 ematch u32/non-numeric value
Test f125: Add cgroup filter with u32 ematch u32/non-numeric mask
Test 4316: Add cgroup filter with u32 ematch u32/negative offset and drop
action
Test 23ae: Add cgroup filter with u32 ematch u32/nexthdr+ offset and pipe
action
Test 23a1: Add cgroup filter with canid ematch and single SFF
Test 324f: Add cgroup filter with canid ematch and single SFF with mask
Test 2576: Add cgroup filter with canid ematch and multiple SFF
Test 4839: Add cgroup filter with canid ematch and multiple SFF with masks
Test 6713: Add cgroup filter with canid ematch and single EFF
Test ab9d: Add cgroup filter with canid ematch and multiple EFF with masks
Test 5349: Add cgroup filter with canid ematch and a combination of
SFF/EFF
Test c934: Add cgroup filter with canid ematch and a combination of
SFF/EFF with masks
Test 4319: Replace cgroup filter with diffferent match
Test 4636: Delete cgroup filter

Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
Reviewed-by: Jamal Hadi Salim <jhs@mojatatu.com>
Reviewed-by: Victor Nogueira <victor@mojatatu.com>
Tested-by: Victor Nogueira <victor@mojatatu.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../tc-testing/tc-tests/filters/cgroup.json        | 1236 ++++++++++++++++++++
 1 file changed, 1236 insertions(+)
 create mode 100644 tools/testing/selftests/tc-testing/tc-tests/filters/cgroup.json

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/cgroup.json b/tools/testing/selftests/tc-testing/tc-tests/filters/cgroup.json
new file mode 100644
index 000000000000..03723cf84379
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/cgroup.json
@@ -0,0 +1,1236 @@
+[
+    {
+        "id": "6273",
+        "name": "Add cgroup filter with cmp ematch u8/link layer and drop action",
+        "category": [
+            "filter",
+            "cgroup"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'cmp(u8 at 0 layer link mask 0xff gt 10)' action drop",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "^filter protocol ip pref [0-9]+ cgroup chain [0-9]+.*handle 0x1.*cmp\\(u8 at 0 layer 0 mask 0xff gt 10\\)",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "4721",
+        "name": "Add cgroup filter with cmp ematch u8/link layer with trans flag and pass action",
+        "category": [
+            "filter",
+            "cgroup"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'cmp(u8 at 0 layer link mask 0xff trans gt 10)' action pass",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "^filter protocol ip pref [0-9]+ cgroup chain [0-9]+.*handle 0x1.*cmp\\(u8 at 0 layer 0 mask 0xff trans gt 10\\)",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "d392",
+        "name": "Add cgroup filter with cmp ematch u16/link layer and pipe action",
+        "category": [
+            "filter",
+            "cgroup"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'cmp(u16 at 0 layer 0 mask 0xff00 lt 3)' action pipe",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "^filter protocol ip pref [0-9]+ cgroup chain [0-9]+.*handle 0x1.*cmp\\(u16 at 0 layer 0 mask 0xff00 lt 3\\).*action pipe",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "0234",
+        "name": "Add cgroup filter with cmp ematch u32/link layer and miltiple actions",
+        "category": [
+            "filter",
+            "cgroup"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'cmp(u32 at 4 layer link mask 0xff00ff00 eq 3)' action skbedit mark 7 pipe action gact drop",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "^filter protocol ip pref [0-9]+ cgroup chain [0-9]+.*handle 0x1.*cmp\\(u32 at 4 layer 0 mask 0xff00ff00 eq 3\\).*action.*skbedit.*mark 7 pipe.*action.*gact action drop",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "8499",
+        "name": "Add cgroup filter with cmp ematch u8/network layer and pass action",
+        "category": [
+            "filter",
+            "cgroup"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 0xab protocol ip prio 11 cgroup match 'cmp(u8 at 0 layer 1 mask 0xff gt 10)' action pass",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show  dev $DEV1 parent ffff:",
+        "matchPattern": "^filter protocol ip pref 11 cgroup.*handle 0xab.*cmp\\(u8 at 0 layer 1 mask 0xff gt 10\\)",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "b273",
+        "name": "Add cgroup filter with cmp ematch u8/network layer with trans flag and drop action",
+        "category": [
+            "filter",
+            "cgroup"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 0xab protocol ip prio 11 cgroup match 'cmp(u8 at 0 layer 1 mask 0xff trans gt 10)' action drop",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "^filter protocol ip pref 11 cgroup.*handle 0xab.*cmp\\(u8 at 0 layer 1 mask 0xff trans gt 10\\)",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "1934",
+        "name": "Add cgroup filter with cmp ematch u16/network layer and pipe action",
+        "category": [
+            "filter",
+            "cgroup"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 0x100 protocol ip prio 100 cgroup match 'cmp(u16 at 0 layer network mask 0xff00 lt 3)' action pipe",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "filter protocol ip pref 100 cgroup.*handle 0x100..*cmp\\(u16 at 0 layer 1 mask 0xff00 lt 3\\)",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "2733",
+        "name": "Add cgroup filter with cmp ematch u32/network layer and miltiple actions",
+        "category": [
+            "filter",
+            "cgroup"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 0x112233 protocol ip prio 7 cgroup match 'cmp(u32 at 4 layer network mask 0xff00ff00 eq 3)' action skbedit mark 7 pipe action gact drop",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "^filter protocol ip pref 7 cgroup.*handle 0x112233.*cmp\\(u32 at 4 layer 1 mask 0xff00ff00 eq 3\\).*action.*skbedit.*mark 7 pipe.*action.*gact action drop",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "3271",
+        "name": "Add cgroup filter with NOT cmp ematch rule and pass action",
+        "category": [
+            "filter",
+            "cgroup"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'not cmp(u8 at 0 layer link mask 0xff eq 3)' action pass",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*NOT cmp\\(u8 at 0 layer 0 mask 0xff eq 3\\)",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "2362",
+        "name": "Add cgroup filter with two ANDed cmp ematch rules and single action",
+        "category": [
+            "filter",
+            "cgroup"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'cmp(u8 at 0 layer link mask 0xff eq 3) and cmp(u16 at 8 layer link mask 0x00ff gt 7)' action gact drop",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*cmp\\(u8 at 0 layer 0 mask 0xff eq 3\\).*AND cmp\\(u16 at 8 layer 0 mask 0xff gt 7\\).*action.*gact action drop",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "9993",
+        "name": "Add cgroup filter with two ORed cmp ematch rules and single action",
+        "category": [
+            "filter",
+            "cgroup"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'cmp(u8 at 0 layer link mask 0xff eq 3) or cmp(u16 at 8 layer link mask 0x00ff gt 7)' action gact drop",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*cmp\\(u8 at 0 layer 0 mask 0xff eq 3\\).*OR cmp\\(u16 at 8 layer 0 mask 0xff gt 7\\).*action.*gact action drop",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "2331",
+        "name": "Add cgroup filter with two ANDed cmp ematch rules and one ORed ematch rule and single action",
+        "category": [
+            "filter",
+            "cgroup"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'cmp(u8 at 0 layer link mask 0xff eq 3) and cmp(u16 at 8 layer link mask 0x00ff gt 7) or cmp(u32 at 4 layer network mask 0xa0a0 lt 3)' action gact drop",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*cmp\\(u8 at 0 layer 0 mask 0xff eq 3\\).*AND cmp\\(u16 at 8 layer 0 mask 0xff gt 7\\).*OR cmp\\(u32 at 4 layer 1 mask 0xa0a0 lt 3\\).*action.*gact action drop",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "3645",
+        "name": "Add cgroup filter with two ANDed cmp ematch rules and one NOT ORed ematch rule and single action",
+        "category": [
+            "filter",
+            "cgroup"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'cmp(u8 at 0 layer link mask 0xff eq 3) and cmp(u16 at 8 layer link mask 0x00ff gt 7) or not cmp(u32 at 4 layer network mask 0xa0a0 lt 3)' action gact drop",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*cmp\\(u8 at 0 layer 0 mask 0xff eq 3\\).*AND cmp\\(u16 at 8 layer 0 mask 0xff gt 7\\).*OR NOT cmp\\(u32 at 4 layer 1 mask 0xa0a0 lt 3\\).*action.*gact action drop",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "b124",
+        "name": "Add cgroup filter with u32 ematch u8/zero offset and drop action",
+        "category": [
+            "filter",
+            "cgroup"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'u32(u8 0x11 0x0f at 0)' action drop",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*u32\\(01000000/0f000000 at 0\\)",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "7381",
+        "name": "Add cgroup filter with u32 ematch u8/zero offset and invalid value >0xFF",
+        "category": [
+            "filter",
+            "cgroup"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u8 0x1122 0x0f at 0)' action pass",
+        "expExitCode": "1",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*u32\\(11220000/0f000000 at 0\\)",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "2231",
+        "name": "Add cgroup filter with u32 ematch u8/positive offset and drop action",
+        "category": [
+            "filter",
+            "cgroup"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'u32(u8 0x77 0x1f at 12)' action drop",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*u32\\(17000000/1f000000 at 12\\)",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "1882",
+        "name": "Add cgroup filter with u32 ematch u8/invalid mask >0xFF",
+        "category": [
+            "filter",
+            "cgroup"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'u32(u8 0x77 0xff00 at 12)' action drop",
+        "expExitCode": "1",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*u32\\(77000000/ff000000 at 12\\)",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "1237",
+        "name": "Add cgroup filter with u32 ematch u8/missing offset",
+        "category": [
+            "filter",
+            "cgroup"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'u32(u8 0x77 0xff at)' action pipe",
+        "expExitCode": "1",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*u32\\(77000000 at 12\\)",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "3812",
+        "name": "Add cgroup filter with u32 ematch u8/missing AT keyword",
+        "category": [
+            "filter",
+            "cgroup"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'u32(u8 0x77 0xff 0)' action pass",
+        "expExitCode": "1",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*u32\\(77000000 at 12\\)",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "1112",
+        "name": "Add cgroup filter with u32 ematch u8/missing value",
+        "category": [
+            "filter",
+            "cgroup"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'u32(u8 at 12)' action drop",
+        "expExitCode": "1",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*u32\\(at 12\\)",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "3241",
+        "name": "Add cgroup filter with u32 ematch u8/non-numeric value",
+        "category": [
+            "filter",
+            "cgroup"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'u32(u8 zero 0xff at 0)' action pipe",
+        "expExitCode": "1",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1 flowid 1:1.*u32\\(00000000/ff000000 at 0\\)",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "e231",
+        "name": "Add cgroup filter with u32 ematch u8/non-numeric mask",
+        "category": [
+            "filter",
+            "cgroup"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'u32(u8 0x11 mask at 0)' action pass",
+        "expExitCode": "1",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*u32\\(11000000/00000000 at 0\\)",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "4652",
+        "name": "Add cgroup filter with u32 ematch u8/negative offset and pass action",
+        "category": [
+            "filter",
+            "cgroup"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'u32(u8 0xaa 0xf0 at -14)' action pass",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*u32\\(0000a000/0000f000 at -16\\)",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "7566",
+        "name": "Add cgroup filter with u32 ematch u8/nexthdr+ offset and drop action",
+        "category": [
+            "filter",
+            "drop"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'u32(u8 0xaa 0xf0 at nexthdr+0)' action drop",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*u32\\(a0000000/f0000000 at nexthdr\\+0\\)",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "1331",
+        "name": "Add cgroup filter with u32 ematch u16/zero offset and pipe action",
+        "category": [
+            "filter",
+            "cgroup"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'u32(u16 0x1122 0xffff at 0)' action pipe",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*u32\\(11220000/ffff0000 at 0\\)",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "e354",
+        "name": "Add cgroup filter with u32 ematch u16/zero offset and invalid value >0xFFFF",
+        "category": [
+            "filter",
+            "cgroup"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'u32(u16 0x112233 0xffff at 0)'",
+        "expExitCode": "1",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*u32\\(11223300/ffff0000 at 0\\)",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "3538",
+        "name": "Add cgroup filter with u32 ematch u16/positive offset and drop action",
+        "category": [
+            "filter",
+            "cgroup"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'u32(u16 0x7788 0x1fff at 12)' action drop",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*u32\\(17880000/1fff0000 at 12\\)",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "4576",
+        "name": "Add cgroup filter with u32 ematch u16/invalid mask >0xFFFF",
+        "category": [
+            "filter",
+            "cgroup"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'u32(u16 0x7788 0xffffffff at 12)' action pass",
+        "expExitCode": "1",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*u32\\(77880000/ffffffff at 12\\)",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "b842",
+        "name": "Add cgroup filter with u32 ematch u16/missing offset",
+        "category": [
+            "filter",
+            "cgroup"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'u32(u16 0x7788 0xffff at)' action pass",
+        "expExitCode": "1",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*u32\\(77880000 at 12\\)",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "c924",
+        "name": "Add cgroup filter with u32 ematch u16/missing AT keyword",
+        "category": [
+            "filter",
+            "cgroup"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'u32(u16 0x7788 0xffff 0)' action pass",
+        "expExitCode": "1",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*u32\\(77880000/ffff0000 at 0\\)",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "cc93",
+        "name": "Add cgroup filter with u32 ematch u16/missing value",
+        "category": [
+            "filter",
+            "cgroup"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'u32(u16 at 12)' action pass",
+        "expExitCode": "1",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*u32\\(at 12\\)",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "123c",
+        "name": "Add cgroup filter with u32 ematch u16/non-numeric value",
+        "category": [
+            "filter",
+            "cgroup"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'u32(u16 zero 0xffff at 0)' action pass",
+        "expExitCode": "1",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*u32\\(00000000/ffff0000 at 0\\)",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "3675",
+        "name": "Add cgroup filter with u32 ematch u16/non-numeric mask",
+        "category": [
+            "filter",
+            "cgroup"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'u32(u8 0x1122 mask at 0)' action pass",
+        "expExitCode": "1",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*u32\\(11220000/00000000 at 0\\)",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "1123",
+        "name": "Add cgroup filter with u32 ematch u16/negative offset and drop action",
+        "category": [
+            "filter",
+            "cgroup"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'u32(u16 0xaabb 0xffff at -12)' action drop",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*u32\\(aabb0000/ffff0000 at -12\\)",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "4234",
+        "name": "Add cgroup filter with u32 ematch u16/nexthdr+ offset and pass action",
+        "category": [
+            "filter",
+            "cgroup"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'u32(u16 0xaabb 0xf0f0 at nexthdr+0)' action pass",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*u32\\(a0b00000/f0f00000 at nexthdr\\+0\\)",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "e912",
+        "name": "Add cgroup filter with u32 ematch u32/zero offset and pipe action",
+        "category": [
+            "filter",
+            "cgroup"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'u32(u32 0xaabbccdd 0xffffffff at 0)' action pipe",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*u32\\(aabbccdd/ffffffff at 0\\)",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "1435",
+        "name": "Add cgroup filter with u32 ematch u32/positive offset and drop action",
+        "category": [
+            "filter",
+            "cgroup"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'u32(u32 0x11227788 0x1ffff0f0 at 12)' action drop",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*u32\\(11227080/1ffff0f0 at 12\\)",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "1282",
+        "name": "Add cgroup filter with u32 ematch u32/missing offset",
+        "category": [
+            "filter",
+            "cgroup"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'u32(u32 0x11227788 0xffffffff at)' action pass",
+        "expExitCode": "1",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*u32\\(11227788/ffffffff at 12\\)",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "6456",
+        "name": "Add cgroup filter with u32 ematch u32/missing AT keyword",
+        "category": [
+            "filter",
+            "cgroup"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'u32(u32 0x77889900 0xfffff0f0 0)' action pipe",
+        "expExitCode": "1",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*u32\\(77889900/fffff0f0 at 0\\)",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "4231",
+        "name": "Add cgroup filter with u32 ematch u32/missing value",
+        "category": [
+            "filter",
+            "cgroup"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'u32(u32 at 12)' action pipe",
+        "expExitCode": "1",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*u32\\(at 12\\)",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "2131",
+        "name": "Add cgroup filter with u32 ematch u32/non-numeric value",
+        "category": [
+            "filter",
+            "cgroup"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'u32(u32 zero 0xffff at 0)' action pipe",
+        "expExitCode": "1",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*u32\\(00000000/ffff0000 at 0\\)",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "f125",
+        "name": "Add cgroup filter with u32 ematch u32/non-numeric mask",
+        "category": [
+            "filter",
+            "cgroup"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'u32(u32 0x11223344 mask at 0)' action pass",
+        "expExitCode": "1",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*u32\\(11223344/00000000 at 0\\)",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "4316",
+        "name": "Add cgroup filter with u32 ematch u32/negative offset and drop action",
+        "category": [
+            "filter",
+            "cgroup"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'u32(u32 0xaabbccdd 0xff00ff00 at -12)' action drop",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*u32\\(aa00cc00/ff00ff00 at -12\\)",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "23ae",
+        "name": "Add cgroup filter with u32 ematch u32/nexthdr+ offset and pipe action",
+        "category": [
+            "filter",
+            "cgroup"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'u32(u32 0xaabbccdd 0xffffffff at nexthdr+0)' action pipe",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*u32\\(aabbccdd/ffffffff at nexthdr\\+0\\)",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "23a1",
+        "name": "Add cgroup filter with canid ematch and single SFF",
+        "category": [
+            "filter",
+            "cgroup"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'canid(sff 1)' action pass",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*canid\\(sff 0x1\\)",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "324f",
+        "name": "Add cgroup filter with canid ematch and single SFF with mask",
+        "category": [
+            "filter",
+            "cgroup"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'canid(sff 0xaabb:0x00ff)' action pass",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*canid\\(sff 0x2BB:0xFF\\)",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "2576",
+        "name": "Add cgroup filter with canid ematch and multiple SFF",
+        "category": [
+            "filter",
+            "cgroup"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'canid(sff 1 sff 2 sff 3)' action pass",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*canid\\(sff 0x1 sff 0x2 sff 0x3\\)",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "4839",
+        "name": "Add cgroup filter with canid ematch and multiple SFF with masks",
+        "category": [
+            "filter",
+            "cgroup"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'canid(sff 0xaa:0x01 sff 0xbb:0x02 sff 0xcc:0x03)' action drop",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*canid\\(sff 0xAA:0x1 sff 0xBB:0x2 sff 0xCC:0x3\\)",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "6713",
+        "name": "Add cgroup filter with canid ematch and single EFF",
+        "category": [
+            "filter",
+            "cgroup"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'canid(eff 1)' action pipe",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*canid\\(eff 0x1\\)",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "4572",
+        "name": "Add cgroup filter with canid ematch and single EFF with mask",
+        "category": [
+            "filter",
+            "cgroup"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'canid(eff 0xaabb:0xf1f1)' action pass",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*canid\\(eff 0xAABB:0xF1F1\\)",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "8031",
+        "name": "Add cgroup filter with canid ematch and multiple EFF",
+        "category": [
+            "filter",
+            "cgroup"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'canid(eff 1 eff 2 eff 3)' action pass",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*canid\\(eff 0x1 eff 0x2 eff 0x3\\)",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "ab9d",
+        "name": "Add cgroup filter with canid ematch and multiple EFF with masks",
+        "category": [
+            "filter",
+            "cgroup"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'canid(eff 0xaa:0x01 eff 0xbb:0x02 eff 0xcc:0x03)' action pass",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*canid\\(eff 0xAA:0x1 eff 0xBB:0x2 eff 0xCC:0x3\\)",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "5349",
+        "name": "Add cgroup filter with canid ematch and a combination of SFF/EFF",
+        "category": [
+            "filter",
+            "cgroup"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'canid(sff 0x01 eff 0x02)' action pass",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*canid\\(eff 0x2 sff 0x1\\)",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "c934",
+        "name": "Add cgroup filter with canid ematch and a combination of SFF/EFF with masks",
+        "category": [
+            "filter",
+            "cgroup"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'canid(sff 0x01:0xf eff 0x02:0xf)' action drop",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*canid\\(eff 0x2:0xF sff 0x1:0xF\\)",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "4319",
+        "name": "Replace cgroup filter with diffferent match",
+        "category": [
+            "filter",
+            "cgroup"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress",
+            "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'cmp(u8 at 0 layer link mask 0xff gt 10)' action pass"
+        ],
+        "cmdUnderTest": "$TC filter replace dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'cmp(u8 at 0 layer link mask 0xff gt 8)' action pass",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "cmp\\(u8 at 0 layer 0 mask 0xff gt 8\\)",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "4636",
+        "name": "Delete cgroup filter",
+        "category": [
+            "filter",
+            "cgroup"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress",
+            "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'cmp(u8 at 0 layer link mask 0xff gt 10)' action pass"
+        ],
+        "cmdUnderTest": "$TC filter delete dev $DEV1 parent ffff: protocol ip prio 1 cgroup match 'cmp(u8 at 0 layer link mask 0xff gt 10)' action pass",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "cmp\\(u8 at 0 layer 0 mask 0xff gt 8\\)",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    }
+]
-- 
cgit v1.2.3


From 58f82b3a0b05c9cfced8ee65729fb0c6386403c7 Mon Sep 17 00:00:00 2001
From: Zhengchao Shao <shaozhengchao@huawei.com>
Date: Fri, 16 Sep 2022 10:02:47 +0800
Subject: selftests/tc-testings: add selftests for flow filter

Test 5294: Add flow filter with map key and ops
Test 3514: Add flow filter with map key or ops
Test 7534: Add flow filter with map key xor ops
Test 4524: Add flow filter with map key rshift ops
Test 0230: Add flow filter with map key addend ops
Test 2344: Add flow filter with src map key
Test 9304: Add flow filter with proto map key
Test 9038: Add flow filter with proto-src map key
Test 2a03: Add flow filter with proto-dst map key
Test a073: Add flow filter with iif map key
Test 3b20: Add flow filter with priority map key
Test 8945: Add flow filter with mark map key
Test c034: Add flow filter with nfct map key
Test 0205: Add flow filter with nfct-src map key
Test 5315: Add flow filter with nfct-src map key
Test 7849: Add flow filter with nfct-proto-src map key
Test 9902: Add flow filter with nfct-proto-dst map key
Test 6742: Add flow filter with rt-classid map key
Test 5432: Add flow filter with sk-uid map key
Test 4134: Add flow filter with sk-gid map key
Test 4522: Add flow filter with vlan-tag map key
Test 4253: Add flow filter with rxhash map key
Test 4452: Add flow filter with hash key list
Test 4341: Add flow filter with muliple ops
Test 4392: List flow filters
Test 4322: Change flow filter with map key num
Test 2320: Replace flow filter with map key num
Test 3213: Delete flow filter with map key num

Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
Reviewed-by: Jamal Hadi Salim <jhs@mojatatu.com>
Reviewed-by: Victor Nogueira <victor@mojatatu.com>
Tested-by: Victor Nogueira <victor@mojatatu.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../tc-testing/tc-tests/filters/flow.json          | 623 +++++++++++++++++++++
 1 file changed, 623 insertions(+)
 create mode 100644 tools/testing/selftests/tc-testing/tc-tests/filters/flow.json

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/flow.json b/tools/testing/selftests/tc-testing/tc-tests/filters/flow.json
new file mode 100644
index 000000000000..58189327f644
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/flow.json
@@ -0,0 +1,623 @@
+[
+    {
+        "id": "5294",
+        "name": "Add flow filter with map key and ops",
+        "category": [
+            "filter",
+            "flow"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow map key dst and 0xff",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 1 flow",
+        "matchPattern": "filter parent ffff: protocol ip pref 1 flow chain [0-9]+ handle 0x1 map keys dst and 0x000000ff baseclass",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "3514",
+        "name": "Add flow filter with map key or ops",
+        "category": [
+            "filter",
+            "flow"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow map key dst or 0xff",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 1 flow",
+        "matchPattern": "filter parent ffff: protocol ip pref 1 flow chain [0-9]+ handle 0x1 map keys dst.*or 0x000000ff baseclass",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "7534",
+        "name": "Add flow filter with map key xor ops",
+        "category": [
+            "filter",
+            "flow"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow map key dst xor 0xff",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 1 flow",
+        "matchPattern": "filter parent ffff: protocol ip pref 1 flow chain [0-9]+ handle 0x1 map keys dst xor 0x000000ff baseclass",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "4524",
+        "name": "Add flow filter with map key rshift ops",
+        "category": [
+            "filter",
+            "flow"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow map key dst rshift 0xff",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 1 flow",
+        "matchPattern": "filter parent ffff: protocol ip pref 1 flow chain [0-9]+ handle 0x1 map keys dst rshift 255 baseclass",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "0230",
+        "name": "Add flow filter with map key addend ops",
+        "category": [
+            "filter",
+            "flow"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow map key dst addend 0xff",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 1 flow",
+        "matchPattern": "filter parent ffff: protocol ip pref 1 flow chain [0-9]+ handle 0x1 map keys dst addend 0xff baseclass",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "2344",
+        "name": "Add flow filter with src map key",
+        "category": [
+            "filter",
+            "flow"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow map key src addend 0xff",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 1 flow",
+        "matchPattern": "filter parent ffff: protocol ip pref 1 flow chain [0-9]+ handle 0x1 map keys src addend 0xff baseclass",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "9304",
+        "name": "Add flow filter with proto map key",
+        "category": [
+            "filter",
+            "flow"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow map key proto addend 0xff",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 1 flow",
+        "matchPattern": "filter parent ffff: protocol ip pref 1 flow chain [0-9]+ handle 0x1 map keys proto addend 0xff baseclass",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "9038",
+        "name": "Add flow filter with proto-src map key",
+        "category": [
+            "filter",
+            "flow"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow map key proto-src addend 0xff",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 1 flow",
+        "matchPattern": "filter parent ffff: protocol ip pref 1 flow chain [0-9]+ handle 0x1 map keys proto-src addend 0xff baseclass",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "2a03",
+        "name": "Add flow filter with proto-dst map key",
+        "category": [
+            "filter",
+            "flow"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow map key proto-dst addend 0xff",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 1 flow",
+        "matchPattern": "filter parent ffff: protocol ip pref 1 flow chain [0-9]+ handle 0x1 map keys proto-dst addend 0xff baseclass",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "a073",
+        "name": "Add flow filter with iif map key",
+        "category": [
+            "filter",
+            "flow"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow map key iif addend 0xff",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 1 flow",
+        "matchPattern": "filter parent ffff: protocol ip pref 1 flow chain [0-9]+ handle 0x1 map keys iif addend 0xff baseclass",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "3b20",
+        "name": "Add flow filter with priority map key",
+        "category": [
+            "filter",
+            "flow"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow map key priority addend 0xff",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 1 flow",
+        "matchPattern": "filter parent ffff: protocol ip pref 1 flow chain [0-9]+ handle 0x1 map keys priority addend 0xff baseclass",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "8945",
+        "name": "Add flow filter with mark map key",
+        "category": [
+            "filter",
+            "flow"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow map key mark addend 0xff",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 1 flow",
+        "matchPattern": "filter parent ffff: protocol ip pref 1 flow chain [0-9]+ handle 0x1 map keys mark addend 0xff baseclass",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "c034",
+        "name": "Add flow filter with nfct map key",
+        "category": [
+            "filter",
+            "flow"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow map key nfct addend 0xff",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 1 flow",
+        "matchPattern": "filter parent ffff: protocol ip pref 1 flow chain [0-9]+ handle 0x1 map keys nfct addend 0xff baseclass",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "0205",
+        "name": "Add flow filter with nfct-src map key",
+        "category": [
+            "filter",
+            "flow"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow map key nfct-dst addend 0xff",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 1 flow",
+        "matchPattern": "filter parent ffff: protocol ip pref 1 flow chain [0-9]+ handle 0x1 map keys nfct-dst addend 0xff baseclass",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "5315",
+        "name": "Add flow filter with nfct-src map key",
+        "category": [
+            "filter",
+            "flow"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow map key nfct-src addend 0xff",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 1 flow",
+        "matchPattern": "filter parent ffff: protocol ip pref 1 flow chain [0-9]+ handle 0x1 map keys nfct-src addend 0xff baseclass",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "7849",
+        "name": "Add flow filter with nfct-proto-src map key",
+        "category": [
+            "filter",
+            "flow"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow map key nfct-proto-src addend 0xff",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 1 flow",
+        "matchPattern": "filter parent ffff: protocol ip pref 1 flow chain [0-9]+ handle 0x1 map keys nfct-proto-src addend 0xff baseclass",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "9902",
+        "name": "Add flow filter with nfct-proto-dst map key",
+        "category": [
+            "filter",
+            "flow"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow map key nfct-proto-dst addend 0xff",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 1 flow",
+        "matchPattern": "filter parent ffff: protocol ip pref 1 flow chain [0-9]+ handle 0x1 map keys nfct-proto-dst addend 0xff baseclass",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "6742",
+        "name": "Add flow filter with rt-classid map key",
+        "category": [
+            "filter",
+            "flow"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow map key rt-classid addend 0xff",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 1 flow",
+        "matchPattern": "filter parent ffff: protocol ip pref 1 flow chain [0-9]+ handle 0x1 map keys rt-classid addend 0xff baseclass",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "5432",
+        "name": "Add flow filter with sk-uid map key",
+        "category": [
+            "filter",
+            "flow"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow map key sk-uid addend 0xff",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 1 flow",
+        "matchPattern": "filter parent ffff: protocol ip pref 1 flow chain [0-9]+ handle 0x1 map keys sk-uid addend 0xff baseclass",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "4134",
+        "name": "Add flow filter with sk-gid map key",
+        "category": [
+            "filter",
+            "flow"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow map key sk-gid addend 0xff",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 1 flow",
+        "matchPattern": "filter parent ffff: protocol ip pref 1 flow chain [0-9]+ handle 0x1 map keys sk-gid addend 0xff baseclass",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "4522",
+        "name": "Add flow filter with vlan-tag map key",
+        "category": [
+            "filter",
+            "flow"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow map key vlan-tag addend 0xff",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 1 flow",
+        "matchPattern": "filter parent ffff: protocol ip pref 1 flow chain [0-9]+ handle 0x1 map keys vlan-tag addend 0xff baseclass",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "4253",
+        "name": "Add flow filter with rxhash map key",
+        "category": [
+            "filter",
+            "flow"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow map key rxhash addend 0xff",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 1 flow",
+        "matchPattern": "filter parent ffff: protocol ip pref 1 flow chain [0-9]+ handle 0x1 map keys rxhash addend 0xff baseclass",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "4452",
+        "name": "Add flow filter with hash key list",
+        "category": [
+            "filter",
+            "flow"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow hash keys src",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 1 flow",
+        "matchPattern": "filter parent ffff: protocol ip pref 1 flow chain [0-9]+ handle 0x1 hash keys src baseclass",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "4341",
+        "name": "Add flow filter with muliple ops",
+        "category": [
+            "filter",
+            "flow"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow hash keys src divisor 1024 baseclass 1:1 match 'cmp(u8 at 0 layer link mask 0xff gt 10)' action drop",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 1 flow",
+        "matchPattern": "filter parent ffff: protocol ip pref 1 flow chain [0-9]+ handle 0x1 hash keys src divisor 1024 baseclass 1:1.*cmp\\(u8 at 0 layer 0 mask 0xff gt 10\\)",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "4392",
+        "name": "List flow filters",
+        "category": [
+            "filter",
+            "flow"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress",
+            "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow map key rxhash addend 0xff",
+            "$TC filter add dev $DEV1 parent ffff: handle 2 prio 1 protocol ip flow map key rxhash or 0xff"
+        ],
+        "cmdUnderTest": "$TC filter show dev $DEV1 parent ffff:",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "filter protocol ip pref 1 flow chain 0 handle 0x[0-9]+ map keys rxhash",
+        "matchCount": "2",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "4322",
+        "name": "Change flow filter with map key num",
+        "category": [
+            "filter",
+            "flow"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress",
+            "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow map key rxhash addend 0xff"
+        ],
+        "cmdUnderTest": "$TC filter change dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow map key rxhash addend 0x22",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow",
+        "matchPattern": "filter parent ffff: protocol ip pref 1 flow chain [0-9]+ handle 0x1 map keys rxhash addend 0x22 baseclass",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "2320",
+        "name": "Replace flow filter with map key num",
+        "category": [
+            "filter",
+            "flow"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress",
+            "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow map key rxhash addend 0xff"
+        ],
+        "cmdUnderTest": "$TC filter replace dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow map key rxhash addend 0x88",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow",
+        "matchPattern": "filter parent ffff: protocol ip pref 1 flow chain [0-9]+ handle 0x1 map keys rxhash addend 0x88 baseclass",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "3213",
+        "name": "Delete flow filter with map key num",
+        "category": [
+            "filter",
+            "flow"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress",
+            "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow map key rxhash addend 0xff"
+        ],
+        "cmdUnderTest": "$TC filter delete dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow map key rxhash addend 0xff",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow",
+        "matchPattern": "filter parent ffff: protocol ip pref 1 flow chain [0-9]+ handle 0x1 map keys rxhash addend 0x88 baseclass",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    }
+]
-- 
cgit v1.2.3


From 67107e7fcfbeda945ba2b828cf6462361ee7771d Mon Sep 17 00:00:00 2001
From: Zhengchao Shao <shaozhengchao@huawei.com>
Date: Fri, 16 Sep 2022 10:02:48 +0800
Subject: selftests/tc-testings: add selftests for route filter

Test e122: Add route filter with from and to tag
Test 6573: Add route filter with fromif and to tag
Test 1362: Add route filter with to flag and reclassify action
Test 4720: Add route filter with from flag and continue actions
Test 2812: Add route filter with form tag and pipe action
Test 7994: Add route filter with miltiple actions
Test 4312: List route filters
Test 2634: Delete route filter with pipe action

Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
Reviewed-by: Jamal Hadi Salim <jhs@mojatatu.com>
Reviewed-by: Victor Nogueira <victor@mojatatu.com>
Tested-by: Victor Nogueira <victor@mojatatu.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../tc-testing/tc-tests/filters/route.json         | 181 +++++++++++++++++++++
 1 file changed, 181 insertions(+)
 create mode 100644 tools/testing/selftests/tc-testing/tc-tests/filters/route.json

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/route.json b/tools/testing/selftests/tc-testing/tc-tests/filters/route.json
new file mode 100644
index 000000000000..1f6f19f02997
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/route.json
@@ -0,0 +1,181 @@
+[
+    {
+        "id": "e122",
+        "name": "Add route filter with from and to tag",
+        "category": [
+            "filter",
+            "route"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: protocol ip prio 100 route from 1 to 10 classid 1:10",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter ls dev $DEV1 parent ffff:",
+        "matchPattern": "flowid 1:10 to 10 from 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "6573",
+        "name": "Add route filter with fromif and to tag",
+        "category": [
+            "filter",
+            "route"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: protocol ip prio 100 route fromif $DEV1 to 10 classid 1:10",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter ls dev $DEV1 parent ffff:",
+        "matchPattern": "flowid 1:10 to 10 fromif",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "1362",
+        "name": "Add route filter with to flag and reclassify action",
+        "category": [
+            "filter",
+            "route"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: protocol ip prio 2 route to 10 classid 1:20 action reclassify",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter ls dev $DEV1 parent ffff:",
+        "matchPattern": "filter protocol ip pref.*route chain [0-9]+.*flowid 1:20 to 10.*action order [0-9]+: gact action reclassify",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "4720",
+        "name": "Add route filter with from flag and continue actions",
+        "category": [
+            "filter",
+            "route"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: protocol ip prio 2 route from 10 classid 1:100 action continue",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter ls dev $DEV1 parent ffff:",
+        "matchPattern": "filter protocol ip pref.*route chain [0-9]+.*flowid 1:100 from 10.*action continue",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "2812",
+        "name": "Add route filter with form tag and pipe action",
+        "category": [
+            "filter",
+            "route"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: protocol ip prio 2 route from 10 to 2 classid 1:1 action pipe",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter ls dev $DEV1 parent ffff:",
+        "matchPattern": "filter protocol ip pref.*route chain [0-9]+.*flowid 1:1 to 2 from 10.*action pipe",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "7994",
+        "name": "Add route filter with miltiple actions",
+        "category": [
+            "filter",
+            "route"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: protocol ip prio 2 route from 10 to 2 classid 1:1 action skbedit mark 7 pipe action gact drop",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter ls dev $DEV1 parent ffff:",
+        "matchPattern": "filter protocol ip pref.*route chain [0-9]+.*flowid 1:1 to 2 from 10.*action order [0-9]+: skbedit  mark 7 pipe.*action order [0-9]+: gact action drop",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "4312",
+        "name": "List route filters",
+        "category": [
+            "filter",
+            "route"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress",
+            "$TC filter add dev $DEV1 parent ffff: protocol ip prio 2 route from 10 to 2 classid 1:1 action pipe",
+            "$TC filter add dev $DEV1 parent ffff: protocol ip prio 2 route from 20 to 1 classid 1:20 action pipe"
+        ],
+        "cmdUnderTest": "$TC filter show dev $DEV1 parent ffff:",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "action order [0-9]+: gact action pipe",
+        "matchCount": "2",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "2634",
+        "name": "Delete route filter with pipe action",
+        "category": [
+            "filter",
+            "route"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress",
+            "$TC filter add dev $DEV1 parent ffff: protocol ip prio 2 route from 10 to 2 classid 1:1 action pipe"
+        ],
+        "cmdUnderTest": "$TC filter del dev $DEV1 parent ffff: protocol ip prio 2 route from 10 to 2 classid 1:1 action pipe",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "filter protocol ip pref.*route chain [0-9]+.*flowid 1:1 to 2 from 10.*action pipe",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    }
+]
-- 
cgit v1.2.3


From 23020350eb6abf2df9c5c6363d64f61058567a5a Mon Sep 17 00:00:00 2001
From: Zhengchao Shao <shaozhengchao@huawei.com>
Date: Fri, 16 Sep 2022 10:02:49 +0800
Subject: selftests/tc-testings: add selftests for rsvp filter

Test 2141: Add rsvp filter with tcp proto and specific IP address
Test 5267: Add rsvp filter with udp proto and specific IP address
Test 2819: Add rsvp filter with src ip and src port
Test c967: Add rsvp filter with tunnelid and continue action
Test 5463: Add rsvp filter with tunnel and pipe action
Test 2332: Add rsvp filter with miltiple actions
Test 8879: Add rsvp filter with tunnel and skp flag
Test 8261: List rsvp filters
Test 8989: Delete rsvp filter

Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
Reviewed-by: Jamal Hadi Salim <jhs@mojatatu.com>
Reviewed-by: Victor Nogueira <victor@mojatatu.com>
Tested-by: Victor Nogueira <victor@mojatatu.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../tc-testing/tc-tests/filters/rsvp.json          | 203 +++++++++++++++++++++
 1 file changed, 203 insertions(+)
 create mode 100644 tools/testing/selftests/tc-testing/tc-tests/filters/rsvp.json

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/rsvp.json b/tools/testing/selftests/tc-testing/tc-tests/filters/rsvp.json
new file mode 100644
index 000000000000..bdcbaa4c5663
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/rsvp.json
@@ -0,0 +1,203 @@
+[
+    {
+        "id": "2141",
+        "name": "Add rsvp filter with tcp proto and specific IP address",
+        "category": [
+            "filter",
+            "rsvp"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: protocol ip prio 1 rsvp ipproto tcp session 198.168.10.64",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "^filter protocol ip pref [0-9]+ rsvp chain [0-9]+ fh 0x.*session 198.168.10.64 ipproto tcp",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "5267",
+        "name": "Add rsvp filter with udp proto and specific IP address",
+        "category": [
+            "filter",
+            "rsvp"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: protocol ip prio 1 rsvp ipproto udp session 1.1.1.1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "^filter protocol ip pref [0-9]+ rsvp chain [0-9]+ fh 0x.*session 1.1.1.1 ipproto udp",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "2819",
+        "name": "Add rsvp filter with src ip and src port",
+        "category": [
+            "filter",
+            "rsvp"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: protocol ip prio 1 rsvp ipproto udp session 1.1.1.1 sender 2.2.2.2/5021 classid 1:1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "^filter protocol ip pref [0-9]+ rsvp chain [0-9]+ fh 0x.*flowid 1:1 session 1.1.1.1 ipproto udp sender  2.2.2.2/5021",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "c967",
+        "name": "Add rsvp filter with tunnelid and continue action",
+        "category": [
+            "filter",
+            "rsvp"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: protocol ip prio 1 rsvp ipproto udp session 1.1.1.1 tunnelid 2 classid 1:1 action continue",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "^filter protocol ip pref [0-9]+ rsvp chain [0-9]+ fh 0x.*flowid 1:1 session 1.1.1.1 ipproto udp tunnelid 2.*action order [0-9]+: gact action continue",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "5463",
+        "name": "Add rsvp filter with tunnel and pipe action",
+        "category": [
+            "filter",
+            "rsvp"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: protocol ip prio 1 rsvp ipproto udp session 1.1.1.1 tunnel 2 skip 1 action pipe",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "^filter protocol ip pref [0-9]+ rsvp chain [0-9]+ fh 0x.*tunnel 2 skip 1 session 1.1.1.1 ipproto udp.*action order [0-9]+: gact action pipe",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "2332",
+        "name": "Add rsvp filter with miltiple actions",
+        "category": [
+            "filter",
+            "rsvp"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: protocol ip prio 7 rsvp ipproto udp session 1.1.1.1 classid 1:1 action skbedit mark 7 pipe action gact drop",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "^filter protocol ip pref [0-9]+ rsvp chain [0-9]+ fh 0x.*flowid 1:1 session 1.1.1.1 ipproto udp.*action order [0-9]+: skbedit  mark 7 pipe.*action order [0-9]+: gact action drop",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "8879",
+        "name": "Add rsvp filter with tunnel and skp flag",
+        "category": [
+            "filter",
+            "rsvp"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: protocol ip prio 1 rsvp ipproto udp session 1.1.1.1 tunnel 2 skip 1 action pipe",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "^filter protocol ip pref [0-9]+ rsvp chain [0-9]+ fh 0x.*tunnel 2 skip 1 session 1.1.1.1 ipproto udp.*action order [0-9]+: gact action pipe",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "8261",
+        "name": "List rsvp filters",
+        "category": [
+            "filter",
+            "rsvp"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress",
+            "$TC filter add dev $DEV1 parent ffff: protocol ip prio 1 rsvp ipproto udp session 1.1.1.1/1234 classid 1:1",
+            "$TC filter add dev $DEV1 parent ffff: protocol ip prio 1 rsvp ipproto tcp session 2.2.2.2/1234 classid 2:1"
+        ],
+        "cmdUnderTest": "$TC filter show dev $DEV1 parent ffff:",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "^filter protocol ip pref [0-9]+ rsvp chain [0-9]+ fh",
+        "matchCount": "2",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "8989",
+        "name": "Delete rsvp filter",
+        "category": [
+            "filter",
+            "rsvp"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress",
+            "$TC filter add dev $DEV1 parent ffff: protocol ip prio 1 rsvp ipproto udp session 1.1.1.1/1234 tunnelid 9 classid 2:1"
+        ],
+        "cmdUnderTest": "$TC filter del dev $DEV1 parent ffff: protocol ip prio 1 rsvp ipproto udp session 1.1.1.1/1234 tunnelid 9 classid 2:1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "filter protocol ip pref [0-9]+ rsvp chain [0-9]+ fh 0x.*flowid 2:1 session 1.1.1.1/1234 ipproto udp tunnelid 9",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    }
+]
-- 
cgit v1.2.3


From fa8dfba59e78a7ba2f523a1b7e5beb5451a84d44 Mon Sep 17 00:00:00 2001
From: Zhengchao Shao <shaozhengchao@huawei.com>
Date: Fri, 16 Sep 2022 10:02:50 +0800
Subject: selftests/tc-testings: add selftests for tcindex filter

Test 8293: Add tcindex filter with default action
Test 7281: Add tcindex filter with hash size and pass action
Test b294: Add tcindex filter with mask shift and reclassify action
Test 0532: Add tcindex filter with pass_on and continue actions
Test d473: Add tcindex filter with pipe action
Test 2940: Add tcindex filter with miltiple actions
Test 1893: List tcindex filters
Test 2041: Change tcindex filter with pass action
Test 9203: Replace tcindex filter with pass action
Test 7957: Delete tcindex filter with drop action

Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
Reviewed-by: Jamal Hadi Salim <jhs@mojatatu.com>
Reviewed-by: Victor Nogueira <victor@mojatatu.com>
Tested-by: Victor Nogueira <victor@mojatatu.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../tc-testing/tc-tests/filters/tcindex.json       | 227 +++++++++++++++++++++
 1 file changed, 227 insertions(+)
 create mode 100644 tools/testing/selftests/tc-testing/tc-tests/filters/tcindex.json

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/tcindex.json b/tools/testing/selftests/tc-testing/tc-tests/filters/tcindex.json
new file mode 100644
index 000000000000..44901db70376
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/tcindex.json
@@ -0,0 +1,227 @@
+[
+    {
+        "id": "8293",
+        "name": "Add tcindex filter with default action",
+        "category": [
+            "filter",
+            "tcindex"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 tcindex classid 1:1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip tcindex",
+        "matchPattern": "^filter parent ffff: protocol ip pref 1 tcindex chain 0 handle 0x0001 classid 1:1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "7281",
+        "name": "Add tcindex filter with hash size and pass action",
+        "category": [
+            "filter",
+            "tcindex"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 tcindex hash 32 fall_through classid 1:1 action pass",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip tcindex",
+        "matchPattern": "^filter parent ffff: protocol ip pref.*tcindex chain [0-9]+ handle 0x0001 classid 1:1.*action order [0-9]+: gact action pass",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "b294",
+        "name": "Add tcindex filter with mask shift and reclassify action",
+        "category": [
+            "filter",
+            "tcindex"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 tcindex hash 32 mask 1 shift 2 fall_through classid 1:1 action reclassify",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip tcindex",
+        "matchPattern": "^filter parent ffff: protocol ip pref.*tcindex chain [0-9]+ handle 0x0001 classid 1:1.*action order [0-9]+: gact action reclassify",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "0532",
+        "name": "Add tcindex filter with pass_on and continue actions",
+        "category": [
+            "filter",
+            "tcindex"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 tcindex hash 32 mask 1 shift 2 pass_on classid 1:1 action continue",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip tcindex",
+        "matchPattern": "^filter parent ffff: protocol ip pref.*tcindex chain [0-9]+ handle 0x0001 classid 1:1.*action order [0-9]+: gact action continue",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "d473",
+        "name": "Add tcindex filter with pipe action",
+        "category": [
+            "filter",
+            "tcindex"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 tcindex hash 32 mask 1 shift 2 fall_through classid 1:1 action pipe",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip tcindex",
+        "matchPattern": "^filter parent ffff: protocol ip pref.*tcindex chain [0-9]+ handle 0x0001 classid 1:1.*action order [0-9]+: gact action pipe",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "2940",
+        "name": "Add tcindex filter with miltiple actions",
+        "category": [
+            "filter",
+            "tcindex"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 7 tcindex hash 32 mask 1 shift 2 fall_through classid 1:1 action skbedit mark 7 pipe action gact drop",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 7 protocol ip tcindex",
+        "matchPattern": "^filter parent ffff: protocol ip pref 7 tcindex.*handle 0x0001.*action.*skbedit.*mark 7 pipe.*action.*gact action drop",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "1893",
+        "name": "List tcindex filters",
+        "category": [
+            "filter",
+            "tcindex"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress",
+            "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 tcindex classid 1:1",
+            "$TC filter add dev $DEV1 parent ffff: handle 2 protocol ip prio 1 tcindex classid 1:1"
+        ],
+        "cmdUnderTest": "$TC filter show dev $DEV1 parent ffff:",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "handle 0x000[0-9]+ classid 1:1",
+        "matchCount": "2",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "2041",
+        "name": "Change tcindex filter with pass action",
+        "category": [
+            "filter",
+            "tcindex"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress",
+            "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 tcindex classid 1:1 action drop"
+        ],
+        "cmdUnderTest": "$TC filter change dev $DEV1 parent ffff: handle 1 protocol ip prio 1 tcindex classid 1:1 action pass",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip tcindex",
+        "matchPattern": "handle 0x0001 classid 1:1.*action order [0-9]+: gact action pass",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "9203",
+        "name": "Replace tcindex filter with pass action",
+        "category": [
+            "filter",
+            "tcindex"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress",
+            "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 tcindex classid 1:1 action drop"
+        ],
+        "cmdUnderTest": "$TC filter replace dev $DEV1 parent ffff: handle 1 protocol ip prio 1 tcindex classid 1:1 action pass",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip tcindex",
+        "matchPattern": "handle 0x0001 classid 1:1.*action order [0-9]+: gact action pass",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "7957",
+        "name": "Delete tcindex filter with drop action",
+        "category": [
+            "filter",
+            "tcindex"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress",
+            "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 tcindex classid 1:1 action drop"
+        ],
+        "cmdUnderTest": "$TC filter del dev $DEV1 parent ffff: handle 1 protocol ip prio 1 tcindex classid 1:1 action drop",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip tcindex",
+        "matchPattern": "handle 0x0001 classid 1:1.*action order [0-9]+: gact action drop",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    }
+]
-- 
cgit v1.2.3


From 972e886112402b73f062f82d885928387dee4cff Mon Sep 17 00:00:00 2001
From: Zhengchao Shao <shaozhengchao@huawei.com>
Date: Fri, 16 Sep 2022 10:02:51 +0800
Subject: selftests/tc-testings: add list case for basic filter

Test 0811: Add multiple basic filter with cmp ematch u8/link layer and
default action and dump them
Test 5129: List basic filters

Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
Reviewed-by: Jamal Hadi Salim <jhs@mojatatu.com>
Reviewed-by: Victor Nogueira <victor@mojatatu.com>
Tested-by: Victor Nogueira <victor@mojatatu.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../tc-testing/tc-tests/filters/basic.json         | 47 ++++++++++++++++++++++
 1 file changed, 47 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/basic.json b/tools/testing/selftests/tc-testing/tc-tests/filters/basic.json
index e788c114a484..d1278de8ebc3 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/filters/basic.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/basic.json
@@ -1274,5 +1274,52 @@
         "teardown": [
             "$TC qdisc del dev $DEV1 ingress"
         ]
+    },
+    {
+        "id": "0811",
+        "name": "Add multiple basic filter with cmp ematch u8/link layer and default action and dump them",
+        "category": [
+            "filter",
+            "basic"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress",
+            "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'cmp(u8 at 0 layer link mask 0xff gt 10)' classid 1:1"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 2 protocol ip prio 1 basic match 'cmp(u8 at 0 layer link mask 0xff gt 10)' classid 1:1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "^filter protocol ip pref 1 basic",
+        "matchCount": "3",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
+    },
+    {
+        "id": "5129",
+        "name": "List basic filters",
+        "category": [
+            "filter",
+            "basic"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$TC qdisc add dev $DEV1 ingress",
+            "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'cmp(u8 at 0 layer link mask 0xff gt 10)' classid 1:1",
+            "$TC filter add dev $DEV1 parent ffff: handle 2 protocol ip prio 1 basic match 'cmp(u8 at 0 layer link mask 0xff gt 10)' classid 1:1"
+        ],
+        "cmdUnderTest": "$TC filter show dev $DEV1 parent ffff:",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+        "matchPattern": "cmp\\(u8 at 0 layer 0 mask 0xff gt 10\\)",
+        "matchCount": "2",
+        "teardown": [
+            "$TC qdisc del dev $DEV1 ingress"
+        ]
     }
 ]
-- 
cgit v1.2.3


From 7620bffbf72cd66a5d18e444a143b5b5989efa87 Mon Sep 17 00:00:00 2001
From: Xin Liu <liuxin350@huawei.com>
Date: Sat, 17 Sep 2022 16:48:09 +0800
Subject: libbpf: Fix NULL pointer exception in API btf_dump__dump_type_data

We found that function btf_dump__dump_type_data can be called by the
user as an API, but in this function, the `opts` parameter may be used
as a null pointer.This causes `opts->indent_str` to trigger a NULL
pointer exception.

Fixes: 2ce8450ef5a3 ("libbpf: add bpf_object__open_{file, mem} w/ extensible opts")
Signed-off-by: Xin Liu <liuxin350@huawei.com>
Signed-off-by: Weibin Kong <kongweibin2@huawei.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220917084809.30770-1-liuxin350@huawei.com
---
 tools/lib/bpf/btf_dump.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
index 627edb5bb6de..4221f73a74d0 100644
--- a/tools/lib/bpf/btf_dump.c
+++ b/tools/lib/bpf/btf_dump.c
@@ -2385,7 +2385,7 @@ int btf_dump__dump_type_data(struct btf_dump *d, __u32 id,
 	d->typed_dump->indent_lvl = OPTS_GET(opts, indent_level, 0);
 
 	/* default indent string is a tab */
-	if (!opts->indent_str)
+	if (!OPTS_GET(opts, indent_str, NULL))
 		d->typed_dump->indent_str[0] = '\t';
 	else
 		libbpf_strlcpy(d->typed_dump->indent_str, opts->indent_str,
-- 
cgit v1.2.3


From 0e426a3ae030a9e891899370229e117158b35de6 Mon Sep 17 00:00:00 2001
From: Pu Lehui <pulehui@huawei.com>
Date: Wed, 21 Sep 2022 10:46:02 +0000
Subject: bpf, cgroup: Reject prog_attach_flags array when effective query

Attach flags is only valid for attached progs of this layer cgroup,
but not for effective progs. For querying with EFFECTIVE flags,
exporting attach flags does not make sense. So when effective query,
we reject prog_attach_flags array and don't need to populate it.
Also we limit attach_flags to output 0 during effective query.

Fixes: b79c9fc9551b ("bpf: implement BPF_PROG_QUERY for BPF_LSM_CGROUP")
Signed-off-by: Pu Lehui <pulehui@huawei.com>
Link: https://lore.kernel.org/r/20220921104604.2340580-2-pulehui@huaweicloud.com
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
 include/uapi/linux/bpf.h       |  7 +++++--
 kernel/bpf/cgroup.c            | 28 ++++++++++++++++++----------
 tools/include/uapi/linux/bpf.h |  7 +++++--
 3 files changed, 28 insertions(+), 14 deletions(-)

(limited to 'tools')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 59a217ca2dfd..4eff7fc7ae58 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1233,7 +1233,7 @@ enum {
 
 /* Query effective (directly attached + inherited from ancestor cgroups)
  * programs that will be executed for events within a cgroup.
- * attach_flags with this flag are returned only for directly attached programs.
+ * attach_flags with this flag are always returned 0.
  */
 #define BPF_F_QUERY_EFFECTIVE	(1U << 0)
 
@@ -1432,7 +1432,10 @@ union bpf_attr {
 		__u32		attach_flags;
 		__aligned_u64	prog_ids;
 		__u32		prog_cnt;
-		__aligned_u64	prog_attach_flags; /* output: per-program attach_flags */
+		/* output: per-program attach_flags.
+		 * not allowed to be set during effective query.
+		 */
+		__aligned_u64	prog_attach_flags;
 	} query;
 
 	struct { /* anonymous struct used by BPF_RAW_TRACEPOINT_OPEN command */
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 4a400cd63731..22888aaa68b6 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -1020,6 +1020,7 @@ static int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
 			      union bpf_attr __user *uattr)
 {
 	__u32 __user *prog_attach_flags = u64_to_user_ptr(attr->query.prog_attach_flags);
+	bool effective_query = attr->query.query_flags & BPF_F_QUERY_EFFECTIVE;
 	__u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
 	enum bpf_attach_type type = attr->query.attach_type;
 	enum cgroup_bpf_attach_type from_atype, to_atype;
@@ -1029,8 +1030,12 @@ static int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
 	int total_cnt = 0;
 	u32 flags;
 
+	if (effective_query && prog_attach_flags)
+		return -EINVAL;
+
 	if (type == BPF_LSM_CGROUP) {
-		if (attr->query.prog_cnt && prog_ids && !prog_attach_flags)
+		if (!effective_query && attr->query.prog_cnt &&
+		    prog_ids && !prog_attach_flags)
 			return -EINVAL;
 
 		from_atype = CGROUP_LSM_START;
@@ -1045,7 +1050,7 @@ static int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
 	}
 
 	for (atype = from_atype; atype <= to_atype; atype++) {
-		if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) {
+		if (effective_query) {
 			effective = rcu_dereference_protected(cgrp->bpf.effective[atype],
 							      lockdep_is_held(&cgroup_mutex));
 			total_cnt += bpf_prog_array_length(effective);
@@ -1054,6 +1059,8 @@ static int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
 		}
 	}
 
+	/* always output uattr->query.attach_flags as 0 during effective query */
+	flags = effective_query ? 0 : flags;
 	if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags)))
 		return -EFAULT;
 	if (copy_to_user(&uattr->query.prog_cnt, &total_cnt, sizeof(total_cnt)))
@@ -1068,7 +1075,7 @@ static int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
 	}
 
 	for (atype = from_atype; atype <= to_atype && total_cnt; atype++) {
-		if (attr->query.query_flags & BPF_F_QUERY_EFFECTIVE) {
+		if (effective_query) {
 			effective = rcu_dereference_protected(cgrp->bpf.effective[atype],
 							      lockdep_is_held(&cgroup_mutex));
 			cnt = min_t(int, bpf_prog_array_length(effective), total_cnt);
@@ -1090,15 +1097,16 @@ static int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
 				if (++i == cnt)
 					break;
 			}
-		}
 
-		if (prog_attach_flags) {
-			flags = cgrp->bpf.flags[atype];
+			if (prog_attach_flags) {
+				flags = cgrp->bpf.flags[atype];
 
-			for (i = 0; i < cnt; i++)
-				if (copy_to_user(prog_attach_flags + i, &flags, sizeof(flags)))
-					return -EFAULT;
-			prog_attach_flags += cnt;
+				for (i = 0; i < cnt; i++)
+					if (copy_to_user(prog_attach_flags + i,
+							 &flags, sizeof(flags)))
+						return -EFAULT;
+				prog_attach_flags += cnt;
+			}
 		}
 
 		prog_ids += cnt;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 59a217ca2dfd..4eff7fc7ae58 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1233,7 +1233,7 @@ enum {
 
 /* Query effective (directly attached + inherited from ancestor cgroups)
  * programs that will be executed for events within a cgroup.
- * attach_flags with this flag are returned only for directly attached programs.
+ * attach_flags with this flag are always returned 0.
  */
 #define BPF_F_QUERY_EFFECTIVE	(1U << 0)
 
@@ -1432,7 +1432,10 @@ union bpf_attr {
 		__u32		attach_flags;
 		__aligned_u64	prog_ids;
 		__u32		prog_cnt;
-		__aligned_u64	prog_attach_flags; /* output: per-program attach_flags */
+		/* output: per-program attach_flags.
+		 * not allowed to be set during effective query.
+		 */
+		__aligned_u64	prog_attach_flags;
 	} query;
 
 	struct { /* anonymous struct used by BPF_RAW_TRACEPOINT_OPEN command */
-- 
cgit v1.2.3


From bdcee1b0b0834d031c76a12209840afe949b048a Mon Sep 17 00:00:00 2001
From: Pu Lehui <pulehui@huawei.com>
Date: Wed, 21 Sep 2022 10:46:03 +0000
Subject: bpftool: Fix wrong cgroup attach flags being assigned to effective
 progs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When root-cgroup attach multi progs and sub-cgroup attach a override prog,
bpftool will display incorrectly for the attach flags of the sub-cgroup’s
effective progs:

$ bpftool cgroup tree /sys/fs/cgroup effective
CgroupPath
ID       AttachType      AttachFlags     Name
/sys/fs/cgroup
6        cgroup_sysctl   multi           sysctl_tcp_mem
13       cgroup_sysctl   multi           sysctl_tcp_mem
/sys/fs/cgroup/cg1
20       cgroup_sysctl   override        sysctl_tcp_mem
6        cgroup_sysctl   override        sysctl_tcp_mem <- wrong
13       cgroup_sysctl   override        sysctl_tcp_mem <- wrong
/sys/fs/cgroup/cg1/cg2
20       cgroup_sysctl                   sysctl_tcp_mem
6        cgroup_sysctl                   sysctl_tcp_mem
13       cgroup_sysctl                   sysctl_tcp_mem

Attach flags is only valid for attached progs of this layer cgroup,
but not for effective progs. For querying with EFFECTIVE flags,
exporting attach flags does not make sense. So let's remove the
AttachFlags field and the associated logic. After this patch, the
above effective cgroup tree will show as bellow:

$ bpftool cgroup tree /sys/fs/cgroup effective
CgroupPath
ID       AttachType      Name
/sys/fs/cgroup
6        cgroup_sysctl   sysctl_tcp_mem
13       cgroup_sysctl   sysctl_tcp_mem
/sys/fs/cgroup/cg1
20       cgroup_sysctl   sysctl_tcp_mem
6        cgroup_sysctl   sysctl_tcp_mem
13       cgroup_sysctl   sysctl_tcp_mem
/sys/fs/cgroup/cg1/cg2
20       cgroup_sysctl   sysctl_tcp_mem
6        cgroup_sysctl   sysctl_tcp_mem
13       cgroup_sysctl   sysctl_tcp_mem

Fixes: b79c9fc9551b ("bpf: implement BPF_PROG_QUERY for BPF_LSM_CGROUP")
Fixes: a98bf57391a2 ("tools: bpftool: add support for reporting the effective cgroup progs")
Signed-off-by: Pu Lehui <pulehui@huawei.com>
Link: https://lore.kernel.org/r/20220921104604.2340580-3-pulehui@huaweicloud.com
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
 tools/bpf/bpftool/cgroup.c | 54 +++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 49 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c
index cced668fb2a3..b46a998d8f8d 100644
--- a/tools/bpf/bpftool/cgroup.c
+++ b/tools/bpf/bpftool/cgroup.c
@@ -136,8 +136,8 @@ static int show_bpf_prog(int id, enum bpf_attach_type attach_type,
 			jsonw_string_field(json_wtr, "attach_type", attach_type_str);
 		else
 			jsonw_uint_field(json_wtr, "attach_type", attach_type);
-		jsonw_string_field(json_wtr, "attach_flags",
-				   attach_flags_str);
+		if (!(query_flags & BPF_F_QUERY_EFFECTIVE))
+			jsonw_string_field(json_wtr, "attach_flags", attach_flags_str);
 		jsonw_string_field(json_wtr, "name", prog_name);
 		if (attach_btf_name)
 			jsonw_string_field(json_wtr, "attach_btf_name", attach_btf_name);
@@ -150,7 +150,10 @@ static int show_bpf_prog(int id, enum bpf_attach_type attach_type,
 			printf("%-15s", attach_type_str);
 		else
 			printf("type %-10u", attach_type);
-		printf(" %-15s %-15s", attach_flags_str, prog_name);
+		if (query_flags & BPF_F_QUERY_EFFECTIVE)
+			printf(" %-15s", prog_name);
+		else
+			printf(" %-15s %-15s", attach_flags_str, prog_name);
 		if (attach_btf_name)
 			printf(" %-15s", attach_btf_name);
 		else if (info.attach_btf_id)
@@ -195,6 +198,32 @@ static int cgroup_has_attached_progs(int cgroup_fd)
 
 	return no_prog ? 0 : 1;
 }
+
+static int show_effective_bpf_progs(int cgroup_fd, enum bpf_attach_type type,
+				    int level)
+{
+	LIBBPF_OPTS(bpf_prog_query_opts, p);
+	__u32 prog_ids[1024] = {0};
+	__u32 iter;
+	int ret;
+
+	p.query_flags = query_flags;
+	p.prog_cnt = ARRAY_SIZE(prog_ids);
+	p.prog_ids = prog_ids;
+
+	ret = bpf_prog_query_opts(cgroup_fd, type, &p);
+	if (ret)
+		return ret;
+
+	if (p.prog_cnt == 0)
+		return 0;
+
+	for (iter = 0; iter < p.prog_cnt; iter++)
+		show_bpf_prog(prog_ids[iter], type, NULL, level);
+
+	return 0;
+}
+
 static int show_attached_bpf_progs(int cgroup_fd, enum bpf_attach_type type,
 				   int level)
 {
@@ -245,6 +274,14 @@ static int show_attached_bpf_progs(int cgroup_fd, enum bpf_attach_type type,
 	return 0;
 }
 
+static int show_bpf_progs(int cgroup_fd, enum bpf_attach_type type,
+			  int level)
+{
+	return query_flags & BPF_F_QUERY_EFFECTIVE ?
+	       show_effective_bpf_progs(cgroup_fd, type, level) :
+	       show_attached_bpf_progs(cgroup_fd, type, level);
+}
+
 static int do_show(int argc, char **argv)
 {
 	enum bpf_attach_type type;
@@ -292,6 +329,8 @@ static int do_show(int argc, char **argv)
 
 	if (json_output)
 		jsonw_start_array(json_wtr);
+	else if (query_flags & BPF_F_QUERY_EFFECTIVE)
+		printf("%-8s %-15s %-15s\n", "ID", "AttachType", "Name");
 	else
 		printf("%-8s %-15s %-15s %-15s\n", "ID", "AttachType",
 		       "AttachFlags", "Name");
@@ -304,7 +343,7 @@ static int do_show(int argc, char **argv)
 		 * If we were able to get the show for at least one
 		 * attach type, let's return 0.
 		 */
-		if (show_attached_bpf_progs(cgroup_fd, type, 0) == 0)
+		if (show_bpf_progs(cgroup_fd, type, 0) == 0)
 			ret = 0;
 	}
 
@@ -362,7 +401,7 @@ static int do_show_tree_fn(const char *fpath, const struct stat *sb,
 
 	btf_vmlinux = libbpf_find_kernel_btf();
 	for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++)
-		show_attached_bpf_progs(cgroup_fd, type, ftw->level);
+		show_bpf_progs(cgroup_fd, type, ftw->level);
 
 	if (errno == EINVAL)
 		/* Last attach type does not support query.
@@ -436,6 +475,11 @@ static int do_show_tree(int argc, char **argv)
 
 	if (json_output)
 		jsonw_start_array(json_wtr);
+	else if (query_flags & BPF_F_QUERY_EFFECTIVE)
+		printf("%s\n"
+		       "%-8s %-15s %-15s\n",
+		       "CgroupPath",
+		       "ID", "AttachType", "Name");
 	else
 		printf("%s\n"
 		       "%-8s %-15s %-15s %-15s\n",
-- 
cgit v1.2.3


From d2aa993b7d9de6deeb1df6c9a6b9b6193c337cc6 Mon Sep 17 00:00:00 2001
From: Pu Lehui <pulehui@huawei.com>
Date: Wed, 21 Sep 2022 10:46:04 +0000
Subject: selftests/bpf: Adapt cgroup effective query uapi change

The attach flags is meaningless for effective query and
its value will always be set as 0 during effective query.
Root cg's effective progs is always its attached progs,
so we use non-effective query to get its progs count and
attach flags. And we don't need the remain attach flags
check.

Fixes: b79c9fc9551b ("bpf: implement BPF_PROG_QUERY for BPF_LSM_CGROUP")
Signed-off-by: Pu Lehui <pulehui@huawei.com>
Link: https://lore.kernel.org/r/20220921104604.2340580-4-pulehui@huaweicloud.com
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
 tools/testing/selftests/bpf/prog_tests/cgroup_link.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_link.c b/tools/testing/selftests/bpf/prog_tests/cgroup_link.c
index 9e6e6aad347c..15093a69510e 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_link.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_link.c
@@ -71,10 +71,9 @@ void serial_test_cgroup_link(void)
 
 	ping_and_check(cg_nr, 0);
 
-	/* query the number of effective progs and attach flags in root cg */
+	/* query the number of attached progs and attach flags in root cg */
 	err = bpf_prog_query(cgs[0].fd, BPF_CGROUP_INET_EGRESS,
-			     BPF_F_QUERY_EFFECTIVE, &attach_flags, NULL,
-			     &prog_cnt);
+			     0, &attach_flags, NULL, &prog_cnt);
 	CHECK_FAIL(err);
 	CHECK_FAIL(attach_flags != BPF_F_ALLOW_MULTI);
 	if (CHECK(prog_cnt != 1, "effect_cnt", "exp %d, got %d\n", 1, prog_cnt))
@@ -85,17 +84,15 @@ void serial_test_cgroup_link(void)
 			     BPF_F_QUERY_EFFECTIVE, NULL, NULL,
 			     &prog_cnt);
 	CHECK_FAIL(err);
-	CHECK_FAIL(attach_flags != BPF_F_ALLOW_MULTI);
 	if (CHECK(prog_cnt != cg_nr, "effect_cnt", "exp %d, got %d\n",
 		  cg_nr, prog_cnt))
 		goto cleanup;
 
 	/* query the effective prog IDs in last cg */
 	err = bpf_prog_query(cgs[last_cg].fd, BPF_CGROUP_INET_EGRESS,
-			     BPF_F_QUERY_EFFECTIVE, &attach_flags,
-			     prog_ids, &prog_cnt);
+			     BPF_F_QUERY_EFFECTIVE, NULL, prog_ids,
+			     &prog_cnt);
 	CHECK_FAIL(err);
-	CHECK_FAIL(attach_flags != BPF_F_ALLOW_MULTI);
 	if (CHECK(prog_cnt != cg_nr, "effect_cnt", "exp %d, got %d\n",
 		  cg_nr, prog_cnt))
 		goto cleanup;
-- 
cgit v1.2.3


From 583c1f420173f7d84413a1a1fbf5109d798b4faa Mon Sep 17 00:00:00 2001
From: David Vernet <void@manifault.com>
Date: Mon, 19 Sep 2022 19:00:57 -0500
Subject: bpf: Define new BPF_MAP_TYPE_USER_RINGBUF map type

We want to support a ringbuf map type where samples are published from
user-space, to be consumed by BPF programs. BPF currently supports a
kernel -> user-space circular ring buffer via the BPF_MAP_TYPE_RINGBUF
map type.  We'll need to define a new map type for user-space -> kernel,
as none of the helpers exported for BPF_MAP_TYPE_RINGBUF will apply
to a user-space producer ring buffer, and we'll want to add one or
more helper functions that would not apply for a kernel-producer
ring buffer.

This patch therefore adds a new BPF_MAP_TYPE_USER_RINGBUF map type
definition. The map type is useless in its current form, as there is no
way to access or use it for anything until we one or more BPF helpers. A
follow-on patch will therefore add a new helper function that allows BPF
programs to run callbacks on samples that are published to the ring
buffer.

Signed-off-by: David Vernet <void@manifault.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220920000100.477320-2-void@manifault.com
---
 include/linux/bpf_types.h                       |  1 +
 include/uapi/linux/bpf.h                        |  1 +
 kernel/bpf/ringbuf.c                            | 62 ++++++++++++++++++++++---
 kernel/bpf/verifier.c                           |  3 ++
 tools/bpf/bpftool/Documentation/bpftool-map.rst |  2 +-
 tools/bpf/bpftool/map.c                         |  2 +-
 tools/include/uapi/linux/bpf.h                  |  1 +
 tools/lib/bpf/libbpf.c                          |  1 +
 8 files changed, 65 insertions(+), 8 deletions(-)

(limited to 'tools')

diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 2b9112b80171..2c6a4f2562a7 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -126,6 +126,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_STRUCT_OPS, bpf_struct_ops_map_ops)
 #endif
 BPF_MAP_TYPE(BPF_MAP_TYPE_RINGBUF, ringbuf_map_ops)
 BPF_MAP_TYPE(BPF_MAP_TYPE_BLOOM_FILTER, bloom_filter_map_ops)
+BPF_MAP_TYPE(BPF_MAP_TYPE_USER_RINGBUF, user_ringbuf_map_ops)
 
 BPF_LINK_TYPE(BPF_LINK_TYPE_RAW_TRACEPOINT, raw_tracepoint)
 BPF_LINK_TYPE(BPF_LINK_TYPE_TRACING, tracing)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 3df78c56c1bf..e18c85324db6 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -928,6 +928,7 @@ enum bpf_map_type {
 	BPF_MAP_TYPE_INODE_STORAGE,
 	BPF_MAP_TYPE_TASK_STORAGE,
 	BPF_MAP_TYPE_BLOOM_FILTER,
+	BPF_MAP_TYPE_USER_RINGBUF,
 };
 
 /* Note that tracing related programs such as
diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c
index b483aea35f41..754e915748fb 100644
--- a/kernel/bpf/ringbuf.c
+++ b/kernel/bpf/ringbuf.c
@@ -38,10 +38,27 @@ struct bpf_ringbuf {
 	struct page **pages;
 	int nr_pages;
 	spinlock_t spinlock ____cacheline_aligned_in_smp;
-	/* Consumer and producer counters are put into separate pages to allow
-	 * mapping consumer page as r/w, but restrict producer page to r/o.
-	 * This protects producer position from being modified by user-space
-	 * application and ruining in-kernel position tracking.
+	/* Consumer and producer counters are put into separate pages to
+	 * allow each position to be mapped with different permissions.
+	 * This prevents a user-space application from modifying the
+	 * position and ruining in-kernel tracking. The permissions of the
+	 * pages depend on who is producing samples: user-space or the
+	 * kernel.
+	 *
+	 * Kernel-producer
+	 * ---------------
+	 * The producer position and data pages are mapped as r/o in
+	 * userspace. For this approach, bits in the header of samples are
+	 * used to signal to user-space, and to other producers, whether a
+	 * sample is currently being written.
+	 *
+	 * User-space producer
+	 * -------------------
+	 * Only the page containing the consumer position is mapped r/o in
+	 * user-space. User-space producers also use bits of the header to
+	 * communicate to the kernel, but the kernel must carefully check and
+	 * validate each sample to ensure that they're correctly formatted, and
+	 * fully contained within the ring buffer.
 	 */
 	unsigned long consumer_pos __aligned(PAGE_SIZE);
 	unsigned long producer_pos __aligned(PAGE_SIZE);
@@ -224,7 +241,7 @@ static int ringbuf_map_get_next_key(struct bpf_map *map, void *key,
 	return -ENOTSUPP;
 }
 
-static int ringbuf_map_mmap(struct bpf_map *map, struct vm_area_struct *vma)
+static int ringbuf_map_mmap_kern(struct bpf_map *map, struct vm_area_struct *vma)
 {
 	struct bpf_ringbuf_map *rb_map;
 
@@ -242,6 +259,26 @@ static int ringbuf_map_mmap(struct bpf_map *map, struct vm_area_struct *vma)
 				   vma->vm_pgoff + RINGBUF_PGOFF);
 }
 
+static int ringbuf_map_mmap_user(struct bpf_map *map, struct vm_area_struct *vma)
+{
+	struct bpf_ringbuf_map *rb_map;
+
+	rb_map = container_of(map, struct bpf_ringbuf_map, map);
+
+	if (vma->vm_flags & VM_WRITE) {
+		if (vma->vm_pgoff == 0)
+			/* Disallow writable mappings to the consumer pointer,
+			 * and allow writable mappings to both the producer
+			 * position, and the ring buffer data itself.
+			 */
+			return -EPERM;
+	} else {
+		vma->vm_flags &= ~VM_MAYWRITE;
+	}
+	/* remap_vmalloc_range() checks size and offset constraints */
+	return remap_vmalloc_range(vma, rb_map->rb, vma->vm_pgoff + RINGBUF_PGOFF);
+}
+
 static unsigned long ringbuf_avail_data_sz(struct bpf_ringbuf *rb)
 {
 	unsigned long cons_pos, prod_pos;
@@ -269,7 +306,7 @@ const struct bpf_map_ops ringbuf_map_ops = {
 	.map_meta_equal = bpf_map_meta_equal,
 	.map_alloc = ringbuf_map_alloc,
 	.map_free = ringbuf_map_free,
-	.map_mmap = ringbuf_map_mmap,
+	.map_mmap = ringbuf_map_mmap_kern,
 	.map_poll = ringbuf_map_poll,
 	.map_lookup_elem = ringbuf_map_lookup_elem,
 	.map_update_elem = ringbuf_map_update_elem,
@@ -278,6 +315,19 @@ const struct bpf_map_ops ringbuf_map_ops = {
 	.map_btf_id = &ringbuf_map_btf_ids[0],
 };
 
+BTF_ID_LIST_SINGLE(user_ringbuf_map_btf_ids, struct, bpf_ringbuf_map)
+const struct bpf_map_ops user_ringbuf_map_ops = {
+	.map_meta_equal = bpf_map_meta_equal,
+	.map_alloc = ringbuf_map_alloc,
+	.map_free = ringbuf_map_free,
+	.map_mmap = ringbuf_map_mmap_user,
+	.map_lookup_elem = ringbuf_map_lookup_elem,
+	.map_update_elem = ringbuf_map_update_elem,
+	.map_delete_elem = ringbuf_map_delete_elem,
+	.map_get_next_key = ringbuf_map_get_next_key,
+	.map_btf_id = &user_ringbuf_map_btf_ids[0],
+};
+
 /* Given pointer to ring buffer record metadata and struct bpf_ringbuf itself,
  * calculate offset from record metadata to ring buffer in pages, rounded
  * down. This page offset is stored as part of record metadata and allows to
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 8c6fbcd0afaf..83710b60e708 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -6240,6 +6240,8 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
 		    func_id != BPF_FUNC_ringbuf_discard_dynptr)
 			goto error;
 		break;
+	case BPF_MAP_TYPE_USER_RINGBUF:
+		goto error;
 	case BPF_MAP_TYPE_STACK_TRACE:
 		if (func_id != BPF_FUNC_get_stackid)
 			goto error;
@@ -12635,6 +12637,7 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env,
 		case BPF_MAP_TYPE_ARRAY_OF_MAPS:
 		case BPF_MAP_TYPE_HASH_OF_MAPS:
 		case BPF_MAP_TYPE_RINGBUF:
+		case BPF_MAP_TYPE_USER_RINGBUF:
 		case BPF_MAP_TYPE_INODE_STORAGE:
 		case BPF_MAP_TYPE_SK_STORAGE:
 		case BPF_MAP_TYPE_TASK_STORAGE:
diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst
index 7c188a598444..7f3b67a8b48f 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-map.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst
@@ -55,7 +55,7 @@ MAP COMMANDS
 |		| **devmap** | **devmap_hash** | **sockmap** | **cpumap** | **xskmap** | **sockhash**
 |		| **cgroup_storage** | **reuseport_sockarray** | **percpu_cgroup_storage**
 |		| **queue** | **stack** | **sk_storage** | **struct_ops** | **ringbuf** | **inode_storage**
-|		| **task_storage** | **bloom_filter** }
+|		| **task_storage** | **bloom_filter** | **user_ringbuf** }
 
 DESCRIPTION
 ===========
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index 38b6bc9c26c3..9a6ca9f31133 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -1459,7 +1459,7 @@ static int do_help(int argc, char **argv)
 		"                 devmap | devmap_hash | sockmap | cpumap | xskmap | sockhash |\n"
 		"                 cgroup_storage | reuseport_sockarray | percpu_cgroup_storage |\n"
 		"                 queue | stack | sk_storage | struct_ops | ringbuf | inode_storage |\n"
-		"                 task_storage | bloom_filter }\n"
+		"                 task_storage | bloom_filter | user_ringbuf }\n"
 		"       " HELP_SPEC_OPTIONS " |\n"
 		"                    {-f|--bpffs} | {-n|--nomount} }\n"
 		"",
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 3df78c56c1bf..e18c85324db6 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -928,6 +928,7 @@ enum bpf_map_type {
 	BPF_MAP_TYPE_INODE_STORAGE,
 	BPF_MAP_TYPE_TASK_STORAGE,
 	BPF_MAP_TYPE_BLOOM_FILTER,
+	BPF_MAP_TYPE_USER_RINGBUF,
 };
 
 /* Note that tracing related programs such as
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 2ca30ccc774c..d480da05b6de 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -163,6 +163,7 @@ static const char * const map_type_name[] = {
 	[BPF_MAP_TYPE_INODE_STORAGE]		= "inode_storage",
 	[BPF_MAP_TYPE_TASK_STORAGE]		= "task_storage",
 	[BPF_MAP_TYPE_BLOOM_FILTER]		= "bloom_filter",
+	[BPF_MAP_TYPE_USER_RINGBUF]             = "user_ringbuf",
 };
 
 static const char * const prog_type_name[] = {
-- 
cgit v1.2.3


From 20571567384428dfc9fe5cf9f2e942e1df13c2dd Mon Sep 17 00:00:00 2001
From: David Vernet <void@manifault.com>
Date: Mon, 19 Sep 2022 19:00:58 -0500
Subject: bpf: Add bpf_user_ringbuf_drain() helper

In a prior change, we added a new BPF_MAP_TYPE_USER_RINGBUF map type which
will allow user-space applications to publish messages to a ring buffer
that is consumed by a BPF program in kernel-space. In order for this
map-type to be useful, it will require a BPF helper function that BPF
programs can invoke to drain samples from the ring buffer, and invoke
callbacks on those samples. This change adds that capability via a new BPF
helper function:

bpf_user_ringbuf_drain(struct bpf_map *map, void *callback_fn, void *ctx,
                       u64 flags)

BPF programs may invoke this function to run callback_fn() on a series of
samples in the ring buffer. callback_fn() has the following signature:

long callback_fn(struct bpf_dynptr *dynptr, void *context);

Samples are provided to the callback in the form of struct bpf_dynptr *'s,
which the program can read using BPF helper functions for querying
struct bpf_dynptr's.

In order to support bpf_ringbuf_drain(), a new PTR_TO_DYNPTR register
type is added to the verifier to reflect a dynptr that was allocated by
a helper function and passed to a BPF program. Unlike PTR_TO_STACK
dynptrs which are allocated on the stack by a BPF program, PTR_TO_DYNPTR
dynptrs need not use reference tracking, as the BPF helper is trusted to
properly free the dynptr before returning. The verifier currently only
supports PTR_TO_DYNPTR registers that are also DYNPTR_TYPE_LOCAL.

Note that while the corresponding user-space libbpf logic will be added
in a subsequent patch, this patch does contain an implementation of the
.map_poll() callback for BPF_MAP_TYPE_USER_RINGBUF maps. This
.map_poll() callback guarantees that an epoll-waiting user-space
producer will receive at least one event notification whenever at least
one sample is drained in an invocation of bpf_user_ringbuf_drain(),
provided that the function is not invoked with the BPF_RB_NO_WAKEUP
flag. If the BPF_RB_FORCE_WAKEUP flag is provided, a wakeup
notification is sent even if no sample was drained.

Signed-off-by: David Vernet <void@manifault.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220920000100.477320-3-void@manifault.com
---
 include/linux/bpf.h            |  11 ++-
 include/uapi/linux/bpf.h       |  38 +++++++++
 kernel/bpf/helpers.c           |   2 +
 kernel/bpf/ringbuf.c           | 181 +++++++++++++++++++++++++++++++++++++++--
 kernel/bpf/verifier.c          |  61 +++++++++++++-
 tools/include/uapi/linux/bpf.h |  38 +++++++++
 6 files changed, 320 insertions(+), 11 deletions(-)

(limited to 'tools')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index e0dbe0c0a17e..33e543b86e1a 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -451,7 +451,7 @@ enum bpf_type_flag {
 	/* DYNPTR points to memory local to the bpf program. */
 	DYNPTR_TYPE_LOCAL	= BIT(8 + BPF_BASE_TYPE_BITS),
 
-	/* DYNPTR points to a ringbuf record. */
+	/* DYNPTR points to a kernel-produced ringbuf record. */
 	DYNPTR_TYPE_RINGBUF	= BIT(9 + BPF_BASE_TYPE_BITS),
 
 	/* Size is known at compile time. */
@@ -656,6 +656,7 @@ enum bpf_reg_type {
 	PTR_TO_MEM,		 /* reg points to valid memory region */
 	PTR_TO_BUF,		 /* reg points to a read/write buffer */
 	PTR_TO_FUNC,		 /* reg points to a bpf program function */
+	PTR_TO_DYNPTR,		 /* reg points to a dynptr */
 	__BPF_REG_TYPE_MAX,
 
 	/* Extended reg_types. */
@@ -1394,6 +1395,11 @@ struct bpf_array {
 #define BPF_MAP_CAN_READ	BIT(0)
 #define BPF_MAP_CAN_WRITE	BIT(1)
 
+/* Maximum number of user-producer ring buffer samples that can be drained in
+ * a call to bpf_user_ringbuf_drain().
+ */
+#define BPF_MAX_USER_RINGBUF_SAMPLES (128 * 1024)
+
 static inline u32 bpf_map_flags_to_cap(struct bpf_map *map)
 {
 	u32 access_flags = map->map_flags & (BPF_F_RDONLY_PROG | BPF_F_WRONLY_PROG);
@@ -2495,6 +2501,7 @@ extern const struct bpf_func_proto bpf_loop_proto;
 extern const struct bpf_func_proto bpf_copy_from_user_task_proto;
 extern const struct bpf_func_proto bpf_set_retval_proto;
 extern const struct bpf_func_proto bpf_get_retval_proto;
+extern const struct bpf_func_proto bpf_user_ringbuf_drain_proto;
 
 const struct bpf_func_proto *tracing_prog_func_proto(
   enum bpf_func_id func_id, const struct bpf_prog *prog);
@@ -2639,7 +2646,7 @@ enum bpf_dynptr_type {
 	BPF_DYNPTR_TYPE_INVALID,
 	/* Points to memory that is local to the bpf program */
 	BPF_DYNPTR_TYPE_LOCAL,
-	/* Underlying data is a ringbuf record */
+	/* Underlying data is a kernel-produced ringbuf record */
 	BPF_DYNPTR_TYPE_RINGBUF,
 };
 
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index e18c85324db6..ead35f39f185 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -5388,6 +5388,43 @@ union bpf_attr {
  *	Return
  *		Current *ktime*.
  *
+ * long bpf_user_ringbuf_drain(struct bpf_map *map, void *callback_fn, void *ctx, u64 flags)
+ *	Description
+ *		Drain samples from the specified user ring buffer, and invoke
+ *		the provided callback for each such sample:
+ *
+ *		long (\*callback_fn)(struct bpf_dynptr \*dynptr, void \*ctx);
+ *
+ *		If **callback_fn** returns 0, the helper will continue to try
+ *		and drain the next sample, up to a maximum of
+ *		BPF_MAX_USER_RINGBUF_SAMPLES samples. If the return value is 1,
+ *		the helper will skip the rest of the samples and return. Other
+ *		return values are not used now, and will be rejected by the
+ *		verifier.
+ *	Return
+ *		The number of drained samples if no error was encountered while
+ *		draining samples, or 0 if no samples were present in the ring
+ *		buffer. If a user-space producer was epoll-waiting on this map,
+ *		and at least one sample was drained, they will receive an event
+ *		notification notifying them of available space in the ring
+ *		buffer. If the BPF_RB_NO_WAKEUP flag is passed to this
+ *		function, no wakeup notification will be sent. If the
+ *		BPF_RB_FORCE_WAKEUP flag is passed, a wakeup notification will
+ *		be sent even if no sample was drained.
+ *
+ *		On failure, the returned value is one of the following:
+ *
+ *		**-EBUSY** if the ring buffer is contended, and another calling
+ *		context was concurrently draining the ring buffer.
+ *
+ *		**-EINVAL** if user-space is not properly tracking the ring
+ *		buffer due to the producer position not being aligned to 8
+ *		bytes, a sample not being aligned to 8 bytes, or the producer
+ *		position not matching the advertised length of a sample.
+ *
+ *		**-E2BIG** if user-space has tried to publish a sample which is
+ *		larger than the size of the ring buffer, or which cannot fit
+ *		within a struct bpf_dynptr.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5599,6 +5636,7 @@ union bpf_attr {
 	FN(tcp_raw_check_syncookie_ipv4),	\
 	FN(tcp_raw_check_syncookie_ipv6),	\
 	FN(ktime_get_tai_ns),		\
+	FN(user_ringbuf_drain),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 41aeaf3862ec..cb5564c77482 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -1659,6 +1659,8 @@ bpf_base_func_proto(enum bpf_func_id func_id)
 		return &bpf_for_each_map_elem_proto;
 	case BPF_FUNC_loop:
 		return &bpf_loop_proto;
+	case BPF_FUNC_user_ringbuf_drain:
+		return &bpf_user_ringbuf_drain_proto;
 	default:
 		break;
 	}
diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c
index 754e915748fb..9e832acf4692 100644
--- a/kernel/bpf/ringbuf.c
+++ b/kernel/bpf/ringbuf.c
@@ -38,6 +38,22 @@ struct bpf_ringbuf {
 	struct page **pages;
 	int nr_pages;
 	spinlock_t spinlock ____cacheline_aligned_in_smp;
+	/* For user-space producer ring buffers, an atomic_t busy bit is used
+	 * to synchronize access to the ring buffers in the kernel, rather than
+	 * the spinlock that is used for kernel-producer ring buffers. This is
+	 * done because the ring buffer must hold a lock across a BPF program's
+	 * callback:
+	 *
+	 *    __bpf_user_ringbuf_peek() // lock acquired
+	 * -> program callback_fn()
+	 * -> __bpf_user_ringbuf_sample_release() // lock released
+	 *
+	 * It is unsafe and incorrect to hold an IRQ spinlock across what could
+	 * be a long execution window, so we instead simply disallow concurrent
+	 * access to the ring buffer by kernel consumers, and return -EBUSY from
+	 * __bpf_user_ringbuf_peek() if the busy bit is held by another task.
+	 */
+	atomic_t busy ____cacheline_aligned_in_smp;
 	/* Consumer and producer counters are put into separate pages to
 	 * allow each position to be mapped with different permissions.
 	 * This prevents a user-space application from modifying the
@@ -153,6 +169,7 @@ static struct bpf_ringbuf *bpf_ringbuf_alloc(size_t data_sz, int numa_node)
 		return NULL;
 
 	spin_lock_init(&rb->spinlock);
+	atomic_set(&rb->busy, 0);
 	init_waitqueue_head(&rb->waitq);
 	init_irq_work(&rb->work, bpf_ringbuf_notify);
 
@@ -288,8 +305,13 @@ static unsigned long ringbuf_avail_data_sz(struct bpf_ringbuf *rb)
 	return prod_pos - cons_pos;
 }
 
-static __poll_t ringbuf_map_poll(struct bpf_map *map, struct file *filp,
-				 struct poll_table_struct *pts)
+static u32 ringbuf_total_data_sz(const struct bpf_ringbuf *rb)
+{
+	return rb->mask + 1;
+}
+
+static __poll_t ringbuf_map_poll_kern(struct bpf_map *map, struct file *filp,
+				      struct poll_table_struct *pts)
 {
 	struct bpf_ringbuf_map *rb_map;
 
@@ -301,13 +323,26 @@ static __poll_t ringbuf_map_poll(struct bpf_map *map, struct file *filp,
 	return 0;
 }
 
+static __poll_t ringbuf_map_poll_user(struct bpf_map *map, struct file *filp,
+				      struct poll_table_struct *pts)
+{
+	struct bpf_ringbuf_map *rb_map;
+
+	rb_map = container_of(map, struct bpf_ringbuf_map, map);
+	poll_wait(filp, &rb_map->rb->waitq, pts);
+
+	if (ringbuf_avail_data_sz(rb_map->rb) < ringbuf_total_data_sz(rb_map->rb))
+		return EPOLLOUT | EPOLLWRNORM;
+	return 0;
+}
+
 BTF_ID_LIST_SINGLE(ringbuf_map_btf_ids, struct, bpf_ringbuf_map)
 const struct bpf_map_ops ringbuf_map_ops = {
 	.map_meta_equal = bpf_map_meta_equal,
 	.map_alloc = ringbuf_map_alloc,
 	.map_free = ringbuf_map_free,
 	.map_mmap = ringbuf_map_mmap_kern,
-	.map_poll = ringbuf_map_poll,
+	.map_poll = ringbuf_map_poll_kern,
 	.map_lookup_elem = ringbuf_map_lookup_elem,
 	.map_update_elem = ringbuf_map_update_elem,
 	.map_delete_elem = ringbuf_map_delete_elem,
@@ -321,6 +356,7 @@ const struct bpf_map_ops user_ringbuf_map_ops = {
 	.map_alloc = ringbuf_map_alloc,
 	.map_free = ringbuf_map_free,
 	.map_mmap = ringbuf_map_mmap_user,
+	.map_poll = ringbuf_map_poll_user,
 	.map_lookup_elem = ringbuf_map_lookup_elem,
 	.map_update_elem = ringbuf_map_update_elem,
 	.map_delete_elem = ringbuf_map_delete_elem,
@@ -362,7 +398,7 @@ static void *__bpf_ringbuf_reserve(struct bpf_ringbuf *rb, u64 size)
 		return NULL;
 
 	len = round_up(size + BPF_RINGBUF_HDR_SZ, 8);
-	if (len > rb->mask + 1)
+	if (len > ringbuf_total_data_sz(rb))
 		return NULL;
 
 	cons_pos = smp_load_acquire(&rb->consumer_pos);
@@ -509,7 +545,7 @@ BPF_CALL_2(bpf_ringbuf_query, struct bpf_map *, map, u64, flags)
 	case BPF_RB_AVAIL_DATA:
 		return ringbuf_avail_data_sz(rb);
 	case BPF_RB_RING_SIZE:
-		return rb->mask + 1;
+		return ringbuf_total_data_sz(rb);
 	case BPF_RB_CONS_POS:
 		return smp_load_acquire(&rb->consumer_pos);
 	case BPF_RB_PROD_POS:
@@ -603,3 +639,138 @@ const struct bpf_func_proto bpf_ringbuf_discard_dynptr_proto = {
 	.arg1_type	= ARG_PTR_TO_DYNPTR | DYNPTR_TYPE_RINGBUF | OBJ_RELEASE,
 	.arg2_type	= ARG_ANYTHING,
 };
+
+static int __bpf_user_ringbuf_peek(struct bpf_ringbuf *rb, void **sample, u32 *size)
+{
+	int err;
+	u32 hdr_len, sample_len, total_len, flags, *hdr;
+	u64 cons_pos, prod_pos;
+
+	/* Synchronizes with smp_store_release() in user-space producer. */
+	prod_pos = smp_load_acquire(&rb->producer_pos);
+	if (prod_pos % 8)
+		return -EINVAL;
+
+	/* Synchronizes with smp_store_release() in __bpf_user_ringbuf_sample_release() */
+	cons_pos = smp_load_acquire(&rb->consumer_pos);
+	if (cons_pos >= prod_pos)
+		return -ENODATA;
+
+	hdr = (u32 *)((uintptr_t)rb->data + (uintptr_t)(cons_pos & rb->mask));
+	/* Synchronizes with smp_store_release() in user-space producer. */
+	hdr_len = smp_load_acquire(hdr);
+	flags = hdr_len & (BPF_RINGBUF_BUSY_BIT | BPF_RINGBUF_DISCARD_BIT);
+	sample_len = hdr_len & ~flags;
+	total_len = round_up(sample_len + BPF_RINGBUF_HDR_SZ, 8);
+
+	/* The sample must fit within the region advertised by the producer position. */
+	if (total_len > prod_pos - cons_pos)
+		return -EINVAL;
+
+	/* The sample must fit within the data region of the ring buffer. */
+	if (total_len > ringbuf_total_data_sz(rb))
+		return -E2BIG;
+
+	/* The sample must fit into a struct bpf_dynptr. */
+	err = bpf_dynptr_check_size(sample_len);
+	if (err)
+		return -E2BIG;
+
+	if (flags & BPF_RINGBUF_DISCARD_BIT) {
+		/* If the discard bit is set, the sample should be skipped.
+		 *
+		 * Update the consumer pos, and return -EAGAIN so the caller
+		 * knows to skip this sample and try to read the next one.
+		 */
+		smp_store_release(&rb->consumer_pos, cons_pos + total_len);
+		return -EAGAIN;
+	}
+
+	if (flags & BPF_RINGBUF_BUSY_BIT)
+		return -ENODATA;
+
+	*sample = (void *)((uintptr_t)rb->data +
+			   (uintptr_t)((cons_pos + BPF_RINGBUF_HDR_SZ) & rb->mask));
+	*size = sample_len;
+	return 0;
+}
+
+static void __bpf_user_ringbuf_sample_release(struct bpf_ringbuf *rb, size_t size, u64 flags)
+{
+	u64 consumer_pos;
+	u32 rounded_size = round_up(size + BPF_RINGBUF_HDR_SZ, 8);
+
+	/* Using smp_load_acquire() is unnecessary here, as the busy-bit
+	 * prevents another task from writing to consumer_pos after it was read
+	 * by this task with smp_load_acquire() in __bpf_user_ringbuf_peek().
+	 */
+	consumer_pos = rb->consumer_pos;
+	 /* Synchronizes with smp_load_acquire() in user-space producer. */
+	smp_store_release(&rb->consumer_pos, consumer_pos + rounded_size);
+}
+
+BPF_CALL_4(bpf_user_ringbuf_drain, struct bpf_map *, map,
+	   void *, callback_fn, void *, callback_ctx, u64, flags)
+{
+	struct bpf_ringbuf *rb;
+	long samples, discarded_samples = 0, ret = 0;
+	bpf_callback_t callback = (bpf_callback_t)callback_fn;
+	u64 wakeup_flags = BPF_RB_NO_WAKEUP | BPF_RB_FORCE_WAKEUP;
+	int busy = 0;
+
+	if (unlikely(flags & ~wakeup_flags))
+		return -EINVAL;
+
+	rb = container_of(map, struct bpf_ringbuf_map, map)->rb;
+
+	/* If another consumer is already consuming a sample, wait for them to finish. */
+	if (!atomic_try_cmpxchg(&rb->busy, &busy, 1))
+		return -EBUSY;
+
+	for (samples = 0; samples < BPF_MAX_USER_RINGBUF_SAMPLES && ret == 0; samples++) {
+		int err;
+		u32 size;
+		void *sample;
+		struct bpf_dynptr_kern dynptr;
+
+		err = __bpf_user_ringbuf_peek(rb, &sample, &size);
+		if (err) {
+			if (err == -ENODATA) {
+				break;
+			} else if (err == -EAGAIN) {
+				discarded_samples++;
+				continue;
+			} else {
+				ret = err;
+				goto schedule_work_return;
+			}
+		}
+
+		bpf_dynptr_init(&dynptr, sample, BPF_DYNPTR_TYPE_LOCAL, 0, size);
+		ret = callback((uintptr_t)&dynptr, (uintptr_t)callback_ctx, 0, 0, 0);
+		__bpf_user_ringbuf_sample_release(rb, size, flags);
+	}
+	ret = samples - discarded_samples;
+
+schedule_work_return:
+	/* Prevent the clearing of the busy-bit from being reordered before the
+	 * storing of any rb consumer or producer positions.
+	 */
+	smp_mb__before_atomic();
+	atomic_set(&rb->busy, 0);
+
+	if (flags & BPF_RB_FORCE_WAKEUP)
+		irq_work_queue(&rb->work);
+	else if (!(flags & BPF_RB_NO_WAKEUP) && samples > 0)
+		irq_work_queue(&rb->work);
+	return ret;
+}
+
+const struct bpf_func_proto bpf_user_ringbuf_drain_proto = {
+	.func		= bpf_user_ringbuf_drain,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_CONST_MAP_PTR,
+	.arg2_type	= ARG_PTR_TO_FUNC,
+	.arg3_type	= ARG_PTR_TO_STACK_OR_NULL,
+	.arg4_type	= ARG_ANYTHING,
+};
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 83710b60e708..c76fa45a5906 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -563,6 +563,7 @@ static const char *reg_type_str(struct bpf_verifier_env *env,
 		[PTR_TO_BUF]		= "buf",
 		[PTR_TO_FUNC]		= "func",
 		[PTR_TO_MAP_KEY]	= "map_key",
+		[PTR_TO_DYNPTR]		= "dynptr_ptr",
 	};
 
 	if (type & PTR_MAYBE_NULL) {
@@ -5688,6 +5689,12 @@ static const struct bpf_reg_types stack_ptr_types = { .types = { PTR_TO_STACK }
 static const struct bpf_reg_types const_str_ptr_types = { .types = { PTR_TO_MAP_VALUE } };
 static const struct bpf_reg_types timer_types = { .types = { PTR_TO_MAP_VALUE } };
 static const struct bpf_reg_types kptr_types = { .types = { PTR_TO_MAP_VALUE } };
+static const struct bpf_reg_types dynptr_types = {
+	.types = {
+		PTR_TO_STACK,
+		PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL,
+	}
+};
 
 static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
 	[ARG_PTR_TO_MAP_KEY]		= &map_key_value_types,
@@ -5714,7 +5721,7 @@ static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
 	[ARG_PTR_TO_CONST_STR]		= &const_str_ptr_types,
 	[ARG_PTR_TO_TIMER]		= &timer_types,
 	[ARG_PTR_TO_KPTR]		= &kptr_types,
-	[ARG_PTR_TO_DYNPTR]		= &stack_ptr_types,
+	[ARG_PTR_TO_DYNPTR]		= &dynptr_types,
 };
 
 static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
@@ -6066,6 +6073,13 @@ skip_type_check:
 		err = check_mem_size_reg(env, reg, regno, true, meta);
 		break;
 	case ARG_PTR_TO_DYNPTR:
+		/* We only need to check for initialized / uninitialized helper
+		 * dynptr args if the dynptr is not PTR_TO_DYNPTR, as the
+		 * assumption is that if it is, that a helper function
+		 * initialized the dynptr on behalf of the BPF program.
+		 */
+		if (base_type(reg->type) == PTR_TO_DYNPTR)
+			break;
 		if (arg_type & MEM_UNINIT) {
 			if (!is_dynptr_reg_valid_uninit(env, reg)) {
 				verbose(env, "Dynptr has to be an uninitialized dynptr\n");
@@ -6241,7 +6255,9 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
 			goto error;
 		break;
 	case BPF_MAP_TYPE_USER_RINGBUF:
-		goto error;
+		if (func_id != BPF_FUNC_user_ringbuf_drain)
+			goto error;
+		break;
 	case BPF_MAP_TYPE_STACK_TRACE:
 		if (func_id != BPF_FUNC_get_stackid)
 			goto error;
@@ -6361,6 +6377,10 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
 		if (map->map_type != BPF_MAP_TYPE_RINGBUF)
 			goto error;
 		break;
+	case BPF_FUNC_user_ringbuf_drain:
+		if (map->map_type != BPF_MAP_TYPE_USER_RINGBUF)
+			goto error;
+		break;
 	case BPF_FUNC_get_stackid:
 		if (map->map_type != BPF_MAP_TYPE_STACK_TRACE)
 			goto error;
@@ -6887,6 +6907,29 @@ static int set_find_vma_callback_state(struct bpf_verifier_env *env,
 	return 0;
 }
 
+static int set_user_ringbuf_callback_state(struct bpf_verifier_env *env,
+					   struct bpf_func_state *caller,
+					   struct bpf_func_state *callee,
+					   int insn_idx)
+{
+	/* bpf_user_ringbuf_drain(struct bpf_map *map, void *callback_fn, void
+	 *			  callback_ctx, u64 flags);
+	 * callback_fn(struct bpf_dynptr_t* dynptr, void *callback_ctx);
+	 */
+	__mark_reg_not_init(env, &callee->regs[BPF_REG_0]);
+	callee->regs[BPF_REG_1].type = PTR_TO_DYNPTR | DYNPTR_TYPE_LOCAL;
+	__mark_reg_known_zero(&callee->regs[BPF_REG_1]);
+	callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3];
+
+	/* unused */
+	__mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
+	__mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
+	__mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
+
+	callee->in_callback_fn = true;
+	return 0;
+}
+
 static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
 {
 	struct bpf_verifier_state *state = env->cur_state;
@@ -7346,12 +7389,18 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
 	case BPF_FUNC_dynptr_data:
 		for (i = 0; i < MAX_BPF_FUNC_REG_ARGS; i++) {
 			if (arg_type_is_dynptr(fn->arg_type[i])) {
+				struct bpf_reg_state *reg = &regs[BPF_REG_1 + i];
+
 				if (meta.ref_obj_id) {
 					verbose(env, "verifier internal error: meta.ref_obj_id already set\n");
 					return -EFAULT;
 				}
-				/* Find the id of the dynptr we're tracking the reference of */
-				meta.ref_obj_id = stack_slot_get_id(env, &regs[BPF_REG_1 + i]);
+
+				if (base_type(reg->type) != PTR_TO_DYNPTR)
+					/* Find the id of the dynptr we're
+					 * tracking the reference of
+					 */
+					meta.ref_obj_id = stack_slot_get_id(env, reg);
 				break;
 			}
 		}
@@ -7360,6 +7409,10 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
 			return -EFAULT;
 		}
 		break;
+	case BPF_FUNC_user_ringbuf_drain:
+		err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
+					set_user_ringbuf_callback_state);
+		break;
 	}
 
 	if (err)
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index e18c85324db6..ead35f39f185 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -5388,6 +5388,43 @@ union bpf_attr {
  *	Return
  *		Current *ktime*.
  *
+ * long bpf_user_ringbuf_drain(struct bpf_map *map, void *callback_fn, void *ctx, u64 flags)
+ *	Description
+ *		Drain samples from the specified user ring buffer, and invoke
+ *		the provided callback for each such sample:
+ *
+ *		long (\*callback_fn)(struct bpf_dynptr \*dynptr, void \*ctx);
+ *
+ *		If **callback_fn** returns 0, the helper will continue to try
+ *		and drain the next sample, up to a maximum of
+ *		BPF_MAX_USER_RINGBUF_SAMPLES samples. If the return value is 1,
+ *		the helper will skip the rest of the samples and return. Other
+ *		return values are not used now, and will be rejected by the
+ *		verifier.
+ *	Return
+ *		The number of drained samples if no error was encountered while
+ *		draining samples, or 0 if no samples were present in the ring
+ *		buffer. If a user-space producer was epoll-waiting on this map,
+ *		and at least one sample was drained, they will receive an event
+ *		notification notifying them of available space in the ring
+ *		buffer. If the BPF_RB_NO_WAKEUP flag is passed to this
+ *		function, no wakeup notification will be sent. If the
+ *		BPF_RB_FORCE_WAKEUP flag is passed, a wakeup notification will
+ *		be sent even if no sample was drained.
+ *
+ *		On failure, the returned value is one of the following:
+ *
+ *		**-EBUSY** if the ring buffer is contended, and another calling
+ *		context was concurrently draining the ring buffer.
+ *
+ *		**-EINVAL** if user-space is not properly tracking the ring
+ *		buffer due to the producer position not being aligned to 8
+ *		bytes, a sample not being aligned to 8 bytes, or the producer
+ *		position not matching the advertised length of a sample.
+ *
+ *		**-E2BIG** if user-space has tried to publish a sample which is
+ *		larger than the size of the ring buffer, or which cannot fit
+ *		within a struct bpf_dynptr.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -5599,6 +5636,7 @@ union bpf_attr {
 	FN(tcp_raw_check_syncookie_ipv4),	\
 	FN(tcp_raw_check_syncookie_ipv6),	\
 	FN(ktime_get_tai_ns),		\
+	FN(user_ringbuf_drain),		\
 	/* */
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
-- 
cgit v1.2.3


From b66ccae01f1ddce47fe2c7f393a3a5c5ab3d7f06 Mon Sep 17 00:00:00 2001
From: David Vernet <void@manifault.com>
Date: Mon, 19 Sep 2022 19:00:59 -0500
Subject: bpf: Add libbpf logic for user-space ring buffer

Now that all of the logic is in place in the kernel to support user-space
produced ring buffers, we can add the user-space logic to libbpf. This
patch therefore adds the following public symbols to libbpf:

struct user_ring_buffer *
user_ring_buffer__new(int map_fd,
		      const struct user_ring_buffer_opts *opts);
void *user_ring_buffer__reserve(struct user_ring_buffer *rb, __u32 size);
void *user_ring_buffer__reserve_blocking(struct user_ring_buffer *rb,
                                         __u32 size, int timeout_ms);
void user_ring_buffer__submit(struct user_ring_buffer *rb, void *sample);
void user_ring_buffer__discard(struct user_ring_buffer *rb,
void user_ring_buffer__free(struct user_ring_buffer *rb);

A user-space producer must first create a struct user_ring_buffer * object
with user_ring_buffer__new(), and can then reserve samples in the
ring buffer using one of the following two symbols:

void *user_ring_buffer__reserve(struct user_ring_buffer *rb, __u32 size);
void *user_ring_buffer__reserve_blocking(struct user_ring_buffer *rb,
                                         __u32 size, int timeout_ms);

With user_ring_buffer__reserve(), a pointer to a 'size' region of the ring
buffer will be returned if sufficient space is available in the buffer.
user_ring_buffer__reserve_blocking() provides similar semantics, but will
block for up to 'timeout_ms' in epoll_wait if there is insufficient space
in the buffer. This function has the guarantee from the kernel that it will
receive at least one event-notification per invocation to
bpf_ringbuf_drain(), provided that at least one sample is drained, and the
BPF program did not pass the BPF_RB_NO_WAKEUP flag to bpf_ringbuf_drain().

Once a sample is reserved, it must either be committed to the ring buffer
with user_ring_buffer__submit(), or discarded with
user_ring_buffer__discard().

Signed-off-by: David Vernet <void@manifault.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220920000100.477320-4-void@manifault.com
---
 tools/lib/bpf/libbpf.c         |  10 +-
 tools/lib/bpf/libbpf.h         | 107 ++++++++++++++++
 tools/lib/bpf/libbpf.map       |  10 ++
 tools/lib/bpf/libbpf_probes.c  |   1 +
 tools/lib/bpf/libbpf_version.h |   2 +-
 tools/lib/bpf/ringbuf.c        | 271 +++++++++++++++++++++++++++++++++++++++++
 6 files changed, 398 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index d480da05b6de..67bc18506150 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -2373,6 +2373,12 @@ static size_t adjust_ringbuf_sz(size_t sz)
 	return sz;
 }
 
+static bool map_is_ringbuf(const struct bpf_map *map)
+{
+	return map->def.type == BPF_MAP_TYPE_RINGBUF ||
+	       map->def.type == BPF_MAP_TYPE_USER_RINGBUF;
+}
+
 static void fill_map_from_def(struct bpf_map *map, const struct btf_map_def *def)
 {
 	map->def.type = def->map_type;
@@ -2387,7 +2393,7 @@ static void fill_map_from_def(struct bpf_map *map, const struct btf_map_def *def
 	map->btf_value_type_id = def->value_type_id;
 
 	/* auto-adjust BPF ringbuf map max_entries to be a multiple of page size */
-	if (map->def.type == BPF_MAP_TYPE_RINGBUF)
+	if (map_is_ringbuf(map))
 		map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries);
 
 	if (def->parts & MAP_DEF_MAP_TYPE)
@@ -4370,7 +4376,7 @@ int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries)
 	map->def.max_entries = max_entries;
 
 	/* auto-adjust BPF ringbuf map max_entries to be a multiple of page size */
-	if (map->def.type == BPF_MAP_TYPE_RINGBUF)
+	if (map_is_ringbuf(map))
 		map->def.max_entries = adjust_ringbuf_sz(map->def.max_entries);
 
 	return 0;
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 88a1ac34b12a..e2d8c17f2e85 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -1011,6 +1011,7 @@ LIBBPF_API int bpf_tc_query(const struct bpf_tc_hook *hook,
 
 /* Ring buffer APIs */
 struct ring_buffer;
+struct user_ring_buffer;
 
 typedef int (*ring_buffer_sample_fn)(void *ctx, void *data, size_t size);
 
@@ -1030,6 +1031,112 @@ LIBBPF_API int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms);
 LIBBPF_API int ring_buffer__consume(struct ring_buffer *rb);
 LIBBPF_API int ring_buffer__epoll_fd(const struct ring_buffer *rb);
 
+struct user_ring_buffer_opts {
+	size_t sz; /* size of this struct, for forward/backward compatibility */
+};
+
+#define user_ring_buffer_opts__last_field sz
+
+/* @brief **user_ring_buffer__new()** creates a new instance of a user ring
+ * buffer.
+ *
+ * @param map_fd A file descriptor to a BPF_MAP_TYPE_USER_RINGBUF map.
+ * @param opts Options for how the ring buffer should be created.
+ * @return A user ring buffer on success; NULL and errno being set on a
+ * failure.
+ */
+LIBBPF_API struct user_ring_buffer *
+user_ring_buffer__new(int map_fd, const struct user_ring_buffer_opts *opts);
+
+/* @brief **user_ring_buffer__reserve()** reserves a pointer to a sample in the
+ * user ring buffer.
+ * @param rb A pointer to a user ring buffer.
+ * @param size The size of the sample, in bytes.
+ * @return A pointer to an 8-byte aligned reserved region of the user ring
+ * buffer; NULL, and errno being set if a sample could not be reserved.
+ *
+ * This function is *not* thread safe, and callers must synchronize accessing
+ * this function if there are multiple producers.  If a size is requested that
+ * is larger than the size of the entire ring buffer, errno will be set to
+ * E2BIG and NULL is returned. If the ring buffer could accommodate the size,
+ * but currently does not have enough space, errno is set to ENOSPC and NULL is
+ * returned.
+ *
+ * After initializing the sample, callers must invoke
+ * **user_ring_buffer__submit()** to post the sample to the kernel. Otherwise,
+ * the sample must be freed with **user_ring_buffer__discard()**.
+ */
+LIBBPF_API void *user_ring_buffer__reserve(struct user_ring_buffer *rb, __u32 size);
+
+/* @brief **user_ring_buffer__reserve_blocking()** reserves a record in the
+ * ring buffer, possibly blocking for up to @timeout_ms until a sample becomes
+ * available.
+ * @param rb The user ring buffer.
+ * @param size The size of the sample, in bytes.
+ * @param timeout_ms The amount of time, in milliseconds, for which the caller
+ * should block when waiting for a sample. -1 causes the caller to block
+ * indefinitely.
+ * @return A pointer to an 8-byte aligned reserved region of the user ring
+ * buffer; NULL, and errno being set if a sample could not be reserved.
+ *
+ * This function is *not* thread safe, and callers must synchronize
+ * accessing this function if there are multiple producers
+ *
+ * If **timeout_ms** is -1, the function will block indefinitely until a sample
+ * becomes available. Otherwise, **timeout_ms** must be non-negative, or errno
+ * is set to EINVAL, and NULL is returned. If **timeout_ms** is 0, no blocking
+ * will occur and the function will return immediately after attempting to
+ * reserve a sample.
+ *
+ * If **size** is larger than the size of the entire ring buffer, errno is set
+ * to E2BIG and NULL is returned. If the ring buffer could accommodate
+ * **size**, but currently does not have enough space, the caller will block
+ * until at most **timeout_ms** has elapsed. If insufficient space is available
+ * at that time, errno is set to ENOSPC, and NULL is returned.
+ *
+ * The kernel guarantees that it will wake up this thread to check if
+ * sufficient space is available in the ring buffer at least once per
+ * invocation of the **bpf_ringbuf_drain()** helper function, provided that at
+ * least one sample is consumed, and the BPF program did not invoke the
+ * function with BPF_RB_NO_WAKEUP. A wakeup may occur sooner than that, but the
+ * kernel does not guarantee this. If the helper function is invoked with
+ * BPF_RB_FORCE_WAKEUP, a wakeup event will be sent even if no sample is
+ * consumed.
+ *
+ * When a sample of size **size** is found within **timeout_ms**, a pointer to
+ * the sample is returned. After initializing the sample, callers must invoke
+ * **user_ring_buffer__submit()** to post the sample to the ring buffer.
+ * Otherwise, the sample must be freed with **user_ring_buffer__discard()**.
+ */
+LIBBPF_API void *user_ring_buffer__reserve_blocking(struct user_ring_buffer *rb,
+						    __u32 size,
+						    int timeout_ms);
+
+/* @brief **user_ring_buffer__submit()** submits a previously reserved sample
+ * into the ring buffer.
+ * @param rb The user ring buffer.
+ * @param sample A reserved sample.
+ *
+ * It is not necessary to synchronize amongst multiple producers when invoking
+ * this function.
+ */
+LIBBPF_API void user_ring_buffer__submit(struct user_ring_buffer *rb, void *sample);
+
+/* @brief **user_ring_buffer__discard()** discards a previously reserved sample.
+ * @param rb The user ring buffer.
+ * @param sample A reserved sample.
+ *
+ * It is not necessary to synchronize amongst multiple producers when invoking
+ * this function.
+ */
+LIBBPF_API void user_ring_buffer__discard(struct user_ring_buffer *rb, void *sample);
+
+/* @brief **user_ring_buffer__free()** frees a ring buffer that was previously
+ * created with **user_ring_buffer__new()**.
+ * @param rb The user ring buffer being freed.
+ */
+LIBBPF_API void user_ring_buffer__free(struct user_ring_buffer *rb);
+
 /* Perf buffer APIs */
 struct perf_buffer;
 
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 2b928dc21af0..c1d6aa7c82b6 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -368,3 +368,13 @@ LIBBPF_1.0.0 {
 		libbpf_bpf_prog_type_str;
 		perf_buffer__buffer;
 };
+
+LIBBPF_1.1.0 {
+	global:
+		user_ring_buffer__discard;
+		user_ring_buffer__free;
+		user_ring_buffer__new;
+		user_ring_buffer__reserve;
+		user_ring_buffer__reserve_blocking;
+		user_ring_buffer__submit;
+} LIBBPF_1.0.0;
diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c
index 6d495656f554..f3a8e8e74eb8 100644
--- a/tools/lib/bpf/libbpf_probes.c
+++ b/tools/lib/bpf/libbpf_probes.c
@@ -231,6 +231,7 @@ static int probe_map_create(enum bpf_map_type map_type)
 			return btf_fd;
 		break;
 	case BPF_MAP_TYPE_RINGBUF:
+	case BPF_MAP_TYPE_USER_RINGBUF:
 		key_size = 0;
 		value_size = 0;
 		max_entries = 4096;
diff --git a/tools/lib/bpf/libbpf_version.h b/tools/lib/bpf/libbpf_version.h
index 2fb2f4290080..e944f5bce728 100644
--- a/tools/lib/bpf/libbpf_version.h
+++ b/tools/lib/bpf/libbpf_version.h
@@ -4,6 +4,6 @@
 #define __LIBBPF_VERSION_H
 
 #define LIBBPF_MAJOR_VERSION 1
-#define LIBBPF_MINOR_VERSION 0
+#define LIBBPF_MINOR_VERSION 1
 
 #endif /* __LIBBPF_VERSION_H */
diff --git a/tools/lib/bpf/ringbuf.c b/tools/lib/bpf/ringbuf.c
index 8bc117bcc7bc..d285171d4b69 100644
--- a/tools/lib/bpf/ringbuf.c
+++ b/tools/lib/bpf/ringbuf.c
@@ -16,6 +16,7 @@
 #include <asm/barrier.h>
 #include <sys/mman.h>
 #include <sys/epoll.h>
+#include <time.h>
 
 #include "libbpf.h"
 #include "libbpf_internal.h"
@@ -39,6 +40,23 @@ struct ring_buffer {
 	int ring_cnt;
 };
 
+struct user_ring_buffer {
+	struct epoll_event event;
+	unsigned long *consumer_pos;
+	unsigned long *producer_pos;
+	void *data;
+	unsigned long mask;
+	size_t page_size;
+	int map_fd;
+	int epoll_fd;
+};
+
+/* 8-byte ring buffer header structure */
+struct ringbuf_hdr {
+	__u32 len;
+	__u32 pad;
+};
+
 static void ringbuf_unmap_ring(struct ring_buffer *rb, struct ring *r)
 {
 	if (r->consumer_pos) {
@@ -300,3 +318,256 @@ int ring_buffer__epoll_fd(const struct ring_buffer *rb)
 {
 	return rb->epoll_fd;
 }
+
+static void user_ringbuf_unmap_ring(struct user_ring_buffer *rb)
+{
+	if (rb->consumer_pos) {
+		munmap(rb->consumer_pos, rb->page_size);
+		rb->consumer_pos = NULL;
+	}
+	if (rb->producer_pos) {
+		munmap(rb->producer_pos, rb->page_size + 2 * (rb->mask + 1));
+		rb->producer_pos = NULL;
+	}
+}
+
+void user_ring_buffer__free(struct user_ring_buffer *rb)
+{
+	if (!rb)
+		return;
+
+	user_ringbuf_unmap_ring(rb);
+
+	if (rb->epoll_fd >= 0)
+		close(rb->epoll_fd);
+
+	free(rb);
+}
+
+static int user_ringbuf_map(struct user_ring_buffer *rb, int map_fd)
+{
+	struct bpf_map_info info;
+	__u32 len = sizeof(info);
+	void *tmp;
+	struct epoll_event *rb_epoll;
+	int err;
+
+	memset(&info, 0, sizeof(info));
+
+	err = bpf_obj_get_info_by_fd(map_fd, &info, &len);
+	if (err) {
+		err = -errno;
+		pr_warn("user ringbuf: failed to get map info for fd=%d: %d\n", map_fd, err);
+		return err;
+	}
+
+	if (info.type != BPF_MAP_TYPE_USER_RINGBUF) {
+		pr_warn("user ringbuf: map fd=%d is not BPF_MAP_TYPE_USER_RINGBUF\n", map_fd);
+		return -EINVAL;
+	}
+
+	rb->map_fd = map_fd;
+	rb->mask = info.max_entries - 1;
+
+	/* Map read-only consumer page */
+	tmp = mmap(NULL, rb->page_size, PROT_READ, MAP_SHARED, map_fd, 0);
+	if (tmp == MAP_FAILED) {
+		err = -errno;
+		pr_warn("user ringbuf: failed to mmap consumer page for map fd=%d: %d\n",
+			map_fd, err);
+		return err;
+	}
+	rb->consumer_pos = tmp;
+
+	/* Map read-write the producer page and data pages. We map the data
+	 * region as twice the total size of the ring buffer to allow the
+	 * simple reading and writing of samples that wrap around the end of
+	 * the buffer.  See the kernel implementation for details.
+	 */
+	tmp = mmap(NULL, rb->page_size + 2 * info.max_entries,
+		   PROT_READ | PROT_WRITE, MAP_SHARED, map_fd, rb->page_size);
+	if (tmp == MAP_FAILED) {
+		err = -errno;
+		pr_warn("user ringbuf: failed to mmap data pages for map fd=%d: %d\n",
+			map_fd, err);
+		return err;
+	}
+
+	rb->producer_pos = tmp;
+	rb->data = tmp + rb->page_size;
+
+	rb_epoll = &rb->event;
+	rb_epoll->events = EPOLLOUT;
+	if (epoll_ctl(rb->epoll_fd, EPOLL_CTL_ADD, map_fd, rb_epoll) < 0) {
+		err = -errno;
+		pr_warn("user ringbuf: failed to epoll add map fd=%d: %d\n", map_fd, err);
+		return err;
+	}
+
+	return 0;
+}
+
+struct user_ring_buffer *
+user_ring_buffer__new(int map_fd, const struct user_ring_buffer_opts *opts)
+{
+	struct user_ring_buffer *rb;
+	int err;
+
+	if (!OPTS_VALID(opts, user_ring_buffer_opts))
+		return errno = EINVAL, NULL;
+
+	rb = calloc(1, sizeof(*rb));
+	if (!rb)
+		return errno = ENOMEM, NULL;
+
+	rb->page_size = getpagesize();
+
+	rb->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
+	if (rb->epoll_fd < 0) {
+		err = -errno;
+		pr_warn("user ringbuf: failed to create epoll instance: %d\n", err);
+		goto err_out;
+	}
+
+	err = user_ringbuf_map(rb, map_fd);
+	if (err)
+		goto err_out;
+
+	return rb;
+
+err_out:
+	user_ring_buffer__free(rb);
+	return errno = -err, NULL;
+}
+
+static void user_ringbuf_commit(struct user_ring_buffer *rb, void *sample, bool discard)
+{
+	__u32 new_len;
+	struct ringbuf_hdr *hdr;
+	uintptr_t hdr_offset;
+
+	hdr_offset = rb->mask + 1 + (sample - rb->data) - BPF_RINGBUF_HDR_SZ;
+	hdr = rb->data + (hdr_offset & rb->mask);
+
+	new_len = hdr->len & ~BPF_RINGBUF_BUSY_BIT;
+	if (discard)
+		new_len |= BPF_RINGBUF_DISCARD_BIT;
+
+	/* Synchronizes with smp_load_acquire() in __bpf_user_ringbuf_peek() in
+	 * the kernel.
+	 */
+	__atomic_exchange_n(&hdr->len, new_len, __ATOMIC_ACQ_REL);
+}
+
+void user_ring_buffer__discard(struct user_ring_buffer *rb, void *sample)
+{
+	user_ringbuf_commit(rb, sample, true);
+}
+
+void user_ring_buffer__submit(struct user_ring_buffer *rb, void *sample)
+{
+	user_ringbuf_commit(rb, sample, false);
+}
+
+void *user_ring_buffer__reserve(struct user_ring_buffer *rb, __u32 size)
+{
+	__u32 avail_size, total_size, max_size;
+	/* 64-bit to avoid overflow in case of extreme application behavior */
+	__u64 cons_pos, prod_pos;
+	struct ringbuf_hdr *hdr;
+
+	/* Synchronizes with smp_store_release() in __bpf_user_ringbuf_peek() in
+	 * the kernel.
+	 */
+	cons_pos = smp_load_acquire(rb->consumer_pos);
+	/* Synchronizes with smp_store_release() in user_ringbuf_commit() */
+	prod_pos = smp_load_acquire(rb->producer_pos);
+
+	max_size = rb->mask + 1;
+	avail_size = max_size - (prod_pos - cons_pos);
+	/* Round up total size to a multiple of 8. */
+	total_size = (size + BPF_RINGBUF_HDR_SZ + 7) / 8 * 8;
+
+	if (total_size > max_size)
+		return errno = E2BIG, NULL;
+
+	if (avail_size < total_size)
+		return errno = ENOSPC, NULL;
+
+	hdr = rb->data + (prod_pos & rb->mask);
+	hdr->len = size | BPF_RINGBUF_BUSY_BIT;
+	hdr->pad = 0;
+
+	/* Synchronizes with smp_load_acquire() in __bpf_user_ringbuf_peek() in
+	 * the kernel.
+	 */
+	smp_store_release(rb->producer_pos, prod_pos + total_size);
+
+	return (void *)rb->data + ((prod_pos + BPF_RINGBUF_HDR_SZ) & rb->mask);
+}
+
+static __u64 ns_elapsed_timespec(const struct timespec *start, const struct timespec *end)
+{
+	__u64 start_ns, end_ns, ns_per_s = 1000000000;
+
+	start_ns = (__u64)start->tv_sec * ns_per_s + start->tv_nsec;
+	end_ns = (__u64)end->tv_sec * ns_per_s + end->tv_nsec;
+
+	return end_ns - start_ns;
+}
+
+void *user_ring_buffer__reserve_blocking(struct user_ring_buffer *rb, __u32 size, int timeout_ms)
+{
+	void *sample;
+	int err, ms_remaining = timeout_ms;
+	struct timespec start;
+
+	if (timeout_ms < 0 && timeout_ms != -1)
+		return errno = EINVAL, NULL;
+
+	if (timeout_ms != -1) {
+		err = clock_gettime(CLOCK_MONOTONIC, &start);
+		if (err)
+			return NULL;
+	}
+
+	do {
+		int cnt, ms_elapsed;
+		struct timespec curr;
+		__u64 ns_per_ms = 1000000;
+
+		sample = user_ring_buffer__reserve(rb, size);
+		if (sample)
+			return sample;
+		else if (errno != ENOSPC)
+			return NULL;
+
+		/* The kernel guarantees at least one event notification
+		 * delivery whenever at least one sample is drained from the
+		 * ring buffer in an invocation to bpf_ringbuf_drain(). Other
+		 * additional events may be delivered at any time, but only one
+		 * event is guaranteed per bpf_ringbuf_drain() invocation,
+		 * provided that a sample is drained, and the BPF program did
+		 * not pass BPF_RB_NO_WAKEUP to bpf_ringbuf_drain(). If
+		 * BPF_RB_FORCE_WAKEUP is passed to bpf_ringbuf_drain(), a
+		 * wakeup event will be delivered even if no samples are
+		 * drained.
+		 */
+		cnt = epoll_wait(rb->epoll_fd, &rb->event, 1, ms_remaining);
+		if (cnt < 0)
+			return NULL;
+
+		if (timeout_ms == -1)
+			continue;
+
+		err = clock_gettime(CLOCK_MONOTONIC, &curr);
+		if (err)
+			return NULL;
+
+		ms_elapsed = ns_elapsed_timespec(&start, &curr) / ns_per_ms;
+		ms_remaining = timeout_ms - ms_elapsed;
+	} while (ms_remaining > 0);
+
+	/* Try one more time to reserve a sample after the specified timeout has elapsed. */
+	return user_ring_buffer__reserve(rb, size);
+}
-- 
cgit v1.2.3


From e5a9df51c74671cfe15af1d50e5f508bd3efddab Mon Sep 17 00:00:00 2001
From: David Vernet <void@manifault.com>
Date: Mon, 19 Sep 2022 19:01:00 -0500
Subject: selftests/bpf: Add selftests validating the user ringbuf

This change includes selftests that validate the expected behavior and
APIs of the new BPF_MAP_TYPE_USER_RINGBUF map type.

Signed-off-by: David Vernet <void@manifault.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220920000100.477320-5-void@manifault.com
---
 tools/testing/selftests/bpf/DENYLIST.s390x         |   1 +
 .../selftests/bpf/prog_tests/user_ringbuf.c        | 754 +++++++++++++++++++++
 .../selftests/bpf/progs/test_user_ringbuf.h        |  35 +
 .../selftests/bpf/progs/user_ringbuf_fail.c        | 177 +++++
 .../selftests/bpf/progs/user_ringbuf_success.c     | 218 ++++++
 5 files changed, 1185 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/user_ringbuf.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_user_ringbuf.h
 create mode 100644 tools/testing/selftests/bpf/progs/user_ringbuf_fail.c
 create mode 100644 tools/testing/selftests/bpf/progs/user_ringbuf_success.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x
index 168c5b287b5c..981c2be922f4 100644
--- a/tools/testing/selftests/bpf/DENYLIST.s390x
+++ b/tools/testing/selftests/bpf/DENYLIST.s390x
@@ -71,3 +71,4 @@ cb_refs                                  # expected error message unexpected err
 cgroup_hierarchical_stats                # JIT does not support calling kernel function                                (kfunc)
 htab_update                              # failed to attach: ERROR: strerror_r(-524)=22                                (trampoline)
 tracing_struct                           # failed to auto-attach: -524                                                 (trampoline)
+user_ringbuf                             # failed to find kernel BTF type ID of '__s390x_sys_prctl': -3                (?)
diff --git a/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c b/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c
new file mode 100644
index 000000000000..02b18d018b36
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c
@@ -0,0 +1,754 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#define _GNU_SOURCE
+#include <linux/compiler.h>
+#include <linux/ring_buffer.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <sys/syscall.h>
+#include <sys/sysinfo.h>
+#include <test_progs.h>
+#include <uapi/linux/bpf.h>
+#include <unistd.h>
+
+#include "user_ringbuf_fail.skel.h"
+#include "user_ringbuf_success.skel.h"
+
+#include "../progs/test_user_ringbuf.h"
+
+static size_t log_buf_sz = 1 << 20; /* 1 MB */
+static char obj_log_buf[1048576];
+static const long c_sample_size = sizeof(struct sample) + BPF_RINGBUF_HDR_SZ;
+static const long c_ringbuf_size = 1 << 12; /* 1 small page */
+static const long c_max_entries = c_ringbuf_size / c_sample_size;
+
+static void drain_current_samples(void)
+{
+	syscall(__NR_getpgid);
+}
+
+static int write_samples(struct user_ring_buffer *ringbuf, uint32_t num_samples)
+{
+	int i, err = 0;
+
+	/* Write some number of samples to the ring buffer. */
+	for (i = 0; i < num_samples; i++) {
+		struct sample *entry;
+		int read;
+
+		entry = user_ring_buffer__reserve(ringbuf, sizeof(*entry));
+		if (!entry) {
+			err = -errno;
+			goto done;
+		}
+
+		entry->pid = getpid();
+		entry->seq = i;
+		entry->value = i * i;
+
+		read = snprintf(entry->comm, sizeof(entry->comm), "%u", i);
+		if (read <= 0) {
+			/* Assert on the error path to avoid spamming logs with
+			 * mostly success messages.
+			 */
+			ASSERT_GT(read, 0, "snprintf_comm");
+			err = read;
+			user_ring_buffer__discard(ringbuf, entry);
+			goto done;
+		}
+
+		user_ring_buffer__submit(ringbuf, entry);
+	}
+
+done:
+	drain_current_samples();
+
+	return err;
+}
+
+static struct user_ringbuf_success *open_load_ringbuf_skel(void)
+{
+	struct user_ringbuf_success *skel;
+	int err;
+
+	skel = user_ringbuf_success__open();
+	if (!ASSERT_OK_PTR(skel, "skel_open"))
+		return NULL;
+
+	err = bpf_map__set_max_entries(skel->maps.user_ringbuf, c_ringbuf_size);
+	if (!ASSERT_OK(err, "set_max_entries"))
+		goto cleanup;
+
+	err = bpf_map__set_max_entries(skel->maps.kernel_ringbuf, c_ringbuf_size);
+	if (!ASSERT_OK(err, "set_max_entries"))
+		goto cleanup;
+
+	err = user_ringbuf_success__load(skel);
+	if (!ASSERT_OK(err, "skel_load"))
+		goto cleanup;
+
+	return skel;
+
+cleanup:
+	user_ringbuf_success__destroy(skel);
+	return NULL;
+}
+
+static void test_user_ringbuf_mappings(void)
+{
+	int err, rb_fd;
+	int page_size = getpagesize();
+	void *mmap_ptr;
+	struct user_ringbuf_success *skel;
+
+	skel = open_load_ringbuf_skel();
+	if (!skel)
+		return;
+
+	rb_fd = bpf_map__fd(skel->maps.user_ringbuf);
+	/* cons_pos can be mapped R/O, can't add +X with mprotect. */
+	mmap_ptr = mmap(NULL, page_size, PROT_READ, MAP_SHARED, rb_fd, 0);
+	ASSERT_OK_PTR(mmap_ptr, "ro_cons_pos");
+	ASSERT_ERR(mprotect(mmap_ptr, page_size, PROT_WRITE), "write_cons_pos_protect");
+	ASSERT_ERR(mprotect(mmap_ptr, page_size, PROT_EXEC), "exec_cons_pos_protect");
+	ASSERT_ERR_PTR(mremap(mmap_ptr, 0, 4 * page_size, MREMAP_MAYMOVE), "wr_prod_pos");
+	err = -errno;
+	ASSERT_ERR(err, "wr_prod_pos_err");
+	ASSERT_OK(munmap(mmap_ptr, page_size), "unmap_ro_cons");
+
+	/* prod_pos can be mapped RW, can't add +X with mprotect. */
+	mmap_ptr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED,
+			rb_fd, page_size);
+	ASSERT_OK_PTR(mmap_ptr, "rw_prod_pos");
+	ASSERT_ERR(mprotect(mmap_ptr, page_size, PROT_EXEC), "exec_prod_pos_protect");
+	err = -errno;
+	ASSERT_ERR(err, "wr_prod_pos_err");
+	ASSERT_OK(munmap(mmap_ptr, page_size), "unmap_rw_prod");
+
+	/* data pages can be mapped RW, can't add +X with mprotect. */
+	mmap_ptr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, rb_fd,
+			2 * page_size);
+	ASSERT_OK_PTR(mmap_ptr, "rw_data");
+	ASSERT_ERR(mprotect(mmap_ptr, page_size, PROT_EXEC), "exec_data_protect");
+	err = -errno;
+	ASSERT_ERR(err, "exec_data_err");
+	ASSERT_OK(munmap(mmap_ptr, page_size), "unmap_rw_data");
+
+	user_ringbuf_success__destroy(skel);
+}
+
+static int load_skel_create_ringbufs(struct user_ringbuf_success **skel_out,
+				     struct ring_buffer **kern_ringbuf_out,
+				     ring_buffer_sample_fn callback,
+				     struct user_ring_buffer **user_ringbuf_out)
+{
+	struct user_ringbuf_success *skel;
+	struct ring_buffer *kern_ringbuf = NULL;
+	struct user_ring_buffer *user_ringbuf = NULL;
+	int err = -ENOMEM, rb_fd;
+
+	skel = open_load_ringbuf_skel();
+	if (!skel)
+		return err;
+
+	/* only trigger BPF program for current process */
+	skel->bss->pid = getpid();
+
+	if (kern_ringbuf_out) {
+		rb_fd = bpf_map__fd(skel->maps.kernel_ringbuf);
+		kern_ringbuf = ring_buffer__new(rb_fd, callback, skel, NULL);
+		if (!ASSERT_OK_PTR(kern_ringbuf, "kern_ringbuf_create"))
+			goto cleanup;
+
+		*kern_ringbuf_out = kern_ringbuf;
+	}
+
+	if (user_ringbuf_out) {
+		rb_fd = bpf_map__fd(skel->maps.user_ringbuf);
+		user_ringbuf = user_ring_buffer__new(rb_fd, NULL);
+		if (!ASSERT_OK_PTR(user_ringbuf, "user_ringbuf_create"))
+			goto cleanup;
+
+		*user_ringbuf_out = user_ringbuf;
+		ASSERT_EQ(skel->bss->read, 0, "no_reads_after_load");
+	}
+
+	err = user_ringbuf_success__attach(skel);
+	if (!ASSERT_OK(err, "skel_attach"))
+		goto cleanup;
+
+	*skel_out = skel;
+	return 0;
+
+cleanup:
+	if (kern_ringbuf_out)
+		*kern_ringbuf_out = NULL;
+	if (user_ringbuf_out)
+		*user_ringbuf_out = NULL;
+	ring_buffer__free(kern_ringbuf);
+	user_ring_buffer__free(user_ringbuf);
+	user_ringbuf_success__destroy(skel);
+	return err;
+}
+
+static int load_skel_create_user_ringbuf(struct user_ringbuf_success **skel_out,
+					 struct user_ring_buffer **ringbuf_out)
+{
+	return load_skel_create_ringbufs(skel_out, NULL, NULL, ringbuf_out);
+}
+
+static void manually_write_test_invalid_sample(struct user_ringbuf_success *skel,
+					       __u32 size, __u64 producer_pos, int err)
+{
+	void *data_ptr;
+	__u64 *producer_pos_ptr;
+	int rb_fd, page_size = getpagesize();
+
+	rb_fd = bpf_map__fd(skel->maps.user_ringbuf);
+
+	ASSERT_EQ(skel->bss->read, 0, "num_samples_before_bad_sample");
+
+	/* Map the producer_pos as RW. */
+	producer_pos_ptr = mmap(NULL, page_size, PROT_READ | PROT_WRITE,
+				MAP_SHARED, rb_fd, page_size);
+	ASSERT_OK_PTR(producer_pos_ptr, "producer_pos_ptr");
+
+	/* Map the data pages as RW. */
+	data_ptr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, rb_fd, 2 * page_size);
+	ASSERT_OK_PTR(data_ptr, "rw_data");
+
+	memset(data_ptr, 0, BPF_RINGBUF_HDR_SZ);
+	*(__u32 *)data_ptr = size;
+
+	/* Synchronizes with smp_load_acquire() in __bpf_user_ringbuf_peek() in the kernel. */
+	smp_store_release(producer_pos_ptr, producer_pos + BPF_RINGBUF_HDR_SZ);
+
+	drain_current_samples();
+	ASSERT_EQ(skel->bss->read, 0, "num_samples_after_bad_sample");
+	ASSERT_EQ(skel->bss->err, err, "err_after_bad_sample");
+
+	ASSERT_OK(munmap(producer_pos_ptr, page_size), "unmap_producer_pos");
+	ASSERT_OK(munmap(data_ptr, page_size), "unmap_data_ptr");
+}
+
+static void test_user_ringbuf_post_misaligned(void)
+{
+	struct user_ringbuf_success *skel;
+	struct user_ring_buffer *ringbuf;
+	int err;
+	__u32 size = (1 << 5) + 7;
+
+	err = load_skel_create_user_ringbuf(&skel, &ringbuf);
+	if (!ASSERT_OK(err, "misaligned_skel"))
+		return;
+
+	manually_write_test_invalid_sample(skel, size, size, -EINVAL);
+	user_ring_buffer__free(ringbuf);
+	user_ringbuf_success__destroy(skel);
+}
+
+static void test_user_ringbuf_post_producer_wrong_offset(void)
+{
+	struct user_ringbuf_success *skel;
+	struct user_ring_buffer *ringbuf;
+	int err;
+	__u32 size = (1 << 5);
+
+	err = load_skel_create_user_ringbuf(&skel, &ringbuf);
+	if (!ASSERT_OK(err, "wrong_offset_skel"))
+		return;
+
+	manually_write_test_invalid_sample(skel, size, size - 8, -EINVAL);
+	user_ring_buffer__free(ringbuf);
+	user_ringbuf_success__destroy(skel);
+}
+
+static void test_user_ringbuf_post_larger_than_ringbuf_sz(void)
+{
+	struct user_ringbuf_success *skel;
+	struct user_ring_buffer *ringbuf;
+	int err;
+	__u32 size = c_ringbuf_size;
+
+	err = load_skel_create_user_ringbuf(&skel, &ringbuf);
+	if (!ASSERT_OK(err, "huge_sample_skel"))
+		return;
+
+	manually_write_test_invalid_sample(skel, size, size, -E2BIG);
+	user_ring_buffer__free(ringbuf);
+	user_ringbuf_success__destroy(skel);
+}
+
+static void test_user_ringbuf_basic(void)
+{
+	struct user_ringbuf_success *skel;
+	struct user_ring_buffer *ringbuf;
+	int err;
+
+	err = load_skel_create_user_ringbuf(&skel, &ringbuf);
+	if (!ASSERT_OK(err, "ringbuf_basic_skel"))
+		return;
+
+	ASSERT_EQ(skel->bss->read, 0, "num_samples_read_before");
+
+	err = write_samples(ringbuf, 2);
+	if (!ASSERT_OK(err, "write_samples"))
+		goto cleanup;
+
+	ASSERT_EQ(skel->bss->read, 2, "num_samples_read_after");
+
+cleanup:
+	user_ring_buffer__free(ringbuf);
+	user_ringbuf_success__destroy(skel);
+}
+
+static void test_user_ringbuf_sample_full_ring_buffer(void)
+{
+	struct user_ringbuf_success *skel;
+	struct user_ring_buffer *ringbuf;
+	int err;
+	void *sample;
+
+	err = load_skel_create_user_ringbuf(&skel, &ringbuf);
+	if (!ASSERT_OK(err, "ringbuf_full_sample_skel"))
+		return;
+
+	sample = user_ring_buffer__reserve(ringbuf, c_ringbuf_size - BPF_RINGBUF_HDR_SZ);
+	if (!ASSERT_OK_PTR(sample, "full_sample"))
+		goto cleanup;
+
+	user_ring_buffer__submit(ringbuf, sample);
+	ASSERT_EQ(skel->bss->read, 0, "num_samples_read_before");
+	drain_current_samples();
+	ASSERT_EQ(skel->bss->read, 1, "num_samples_read_after");
+
+cleanup:
+	user_ring_buffer__free(ringbuf);
+	user_ringbuf_success__destroy(skel);
+}
+
+static void test_user_ringbuf_post_alignment_autoadjust(void)
+{
+	struct user_ringbuf_success *skel;
+	struct user_ring_buffer *ringbuf;
+	struct sample *sample;
+	int err;
+
+	err = load_skel_create_user_ringbuf(&skel, &ringbuf);
+	if (!ASSERT_OK(err, "ringbuf_align_autoadjust_skel"))
+		return;
+
+	/* libbpf should automatically round any sample up to an 8-byte alignment. */
+	sample = user_ring_buffer__reserve(ringbuf, sizeof(*sample) + 1);
+	ASSERT_OK_PTR(sample, "reserve_autoaligned");
+	user_ring_buffer__submit(ringbuf, sample);
+
+	ASSERT_EQ(skel->bss->read, 0, "num_samples_read_before");
+	drain_current_samples();
+	ASSERT_EQ(skel->bss->read, 1, "num_samples_read_after");
+
+	user_ring_buffer__free(ringbuf);
+	user_ringbuf_success__destroy(skel);
+}
+
+static void test_user_ringbuf_overfill(void)
+{
+	struct user_ringbuf_success *skel;
+	struct user_ring_buffer *ringbuf;
+	int err;
+
+	err = load_skel_create_user_ringbuf(&skel, &ringbuf);
+	if (err)
+		return;
+
+	err = write_samples(ringbuf, c_max_entries * 5);
+	ASSERT_ERR(err, "write_samples");
+	ASSERT_EQ(skel->bss->read, c_max_entries, "max_entries");
+
+	user_ring_buffer__free(ringbuf);
+	user_ringbuf_success__destroy(skel);
+}
+
+static void test_user_ringbuf_discards_properly_ignored(void)
+{
+	struct user_ringbuf_success *skel;
+	struct user_ring_buffer *ringbuf;
+	int err, num_discarded = 0;
+	__u64 *token;
+
+	err = load_skel_create_user_ringbuf(&skel, &ringbuf);
+	if (err)
+		return;
+
+	ASSERT_EQ(skel->bss->read, 0, "num_samples_read_before");
+
+	while (1) {
+		/* Write samples until the buffer is full. */
+		token = user_ring_buffer__reserve(ringbuf, sizeof(*token));
+		if (!token)
+			break;
+
+		user_ring_buffer__discard(ringbuf, token);
+		num_discarded++;
+	}
+
+	if (!ASSERT_GE(num_discarded, 0, "num_discarded"))
+		goto cleanup;
+
+	/* Should not read any samples, as they are all discarded. */
+	ASSERT_EQ(skel->bss->read, 0, "num_pre_kick");
+	drain_current_samples();
+	ASSERT_EQ(skel->bss->read, 0, "num_post_kick");
+
+	/* Now that the ring buffer has been drained, we should be able to
+	 * reserve another token.
+	 */
+	token = user_ring_buffer__reserve(ringbuf, sizeof(*token));
+
+	if (!ASSERT_OK_PTR(token, "new_token"))
+		goto cleanup;
+
+	user_ring_buffer__discard(ringbuf, token);
+cleanup:
+	user_ring_buffer__free(ringbuf);
+	user_ringbuf_success__destroy(skel);
+}
+
+static void test_user_ringbuf_loop(void)
+{
+	struct user_ringbuf_success *skel;
+	struct user_ring_buffer *ringbuf;
+	uint32_t total_samples = 8192;
+	uint32_t remaining_samples = total_samples;
+	int err;
+
+	BUILD_BUG_ON(total_samples <= c_max_entries);
+	err = load_skel_create_user_ringbuf(&skel, &ringbuf);
+	if (err)
+		return;
+
+	do  {
+		uint32_t curr_samples;
+
+		curr_samples = remaining_samples > c_max_entries
+			? c_max_entries : remaining_samples;
+		err = write_samples(ringbuf, curr_samples);
+		if (err != 0) {
+			/* Assert inside of if statement to avoid flooding logs
+			 * on the success path.
+			 */
+			ASSERT_OK(err, "write_samples");
+			goto cleanup;
+		}
+
+		remaining_samples -= curr_samples;
+		ASSERT_EQ(skel->bss->read, total_samples - remaining_samples,
+			  "current_batched_entries");
+	} while (remaining_samples > 0);
+	ASSERT_EQ(skel->bss->read, total_samples, "total_batched_entries");
+
+cleanup:
+	user_ring_buffer__free(ringbuf);
+	user_ringbuf_success__destroy(skel);
+}
+
+static int send_test_message(struct user_ring_buffer *ringbuf,
+			     enum test_msg_op op, s64 operand_64,
+			     s32 operand_32)
+{
+	struct test_msg *msg;
+
+	msg = user_ring_buffer__reserve(ringbuf, sizeof(*msg));
+	if (!msg) {
+		/* Assert on the error path to avoid spamming logs with mostly
+		 * success messages.
+		 */
+		ASSERT_OK_PTR(msg, "reserve_msg");
+		return -ENOMEM;
+	}
+
+	msg->msg_op = op;
+
+	switch (op) {
+	case TEST_MSG_OP_INC64:
+	case TEST_MSG_OP_MUL64:
+		msg->operand_64 = operand_64;
+		break;
+	case TEST_MSG_OP_INC32:
+	case TEST_MSG_OP_MUL32:
+		msg->operand_32 = operand_32;
+		break;
+	default:
+		PRINT_FAIL("Invalid operand %d\n", op);
+		user_ring_buffer__discard(ringbuf, msg);
+		return -EINVAL;
+	}
+
+	user_ring_buffer__submit(ringbuf, msg);
+
+	return 0;
+}
+
+static void kick_kernel_read_messages(void)
+{
+	syscall(__NR_prctl);
+}
+
+static int handle_kernel_msg(void *ctx, void *data, size_t len)
+{
+	struct user_ringbuf_success *skel = ctx;
+	struct test_msg *msg = data;
+
+	switch (msg->msg_op) {
+	case TEST_MSG_OP_INC64:
+		skel->bss->user_mutated += msg->operand_64;
+		return 0;
+	case TEST_MSG_OP_INC32:
+		skel->bss->user_mutated += msg->operand_32;
+		return 0;
+	case TEST_MSG_OP_MUL64:
+		skel->bss->user_mutated *= msg->operand_64;
+		return 0;
+	case TEST_MSG_OP_MUL32:
+		skel->bss->user_mutated *= msg->operand_32;
+		return 0;
+	default:
+		fprintf(stderr, "Invalid operand %d\n", msg->msg_op);
+		return -EINVAL;
+	}
+}
+
+static void drain_kernel_messages_buffer(struct ring_buffer *kern_ringbuf,
+					 struct user_ringbuf_success *skel)
+{
+	int cnt;
+
+	cnt = ring_buffer__consume(kern_ringbuf);
+	ASSERT_EQ(cnt, 8, "consume_kern_ringbuf");
+	ASSERT_OK(skel->bss->err, "consume_kern_ringbuf_err");
+}
+
+static void test_user_ringbuf_msg_protocol(void)
+{
+	struct user_ringbuf_success *skel;
+	struct user_ring_buffer *user_ringbuf;
+	struct ring_buffer *kern_ringbuf;
+	int err, i;
+	__u64 expected_kern = 0;
+
+	err = load_skel_create_ringbufs(&skel, &kern_ringbuf, handle_kernel_msg, &user_ringbuf);
+	if (!ASSERT_OK(err, "create_ringbufs"))
+		return;
+
+	for (i = 0; i < 64; i++) {
+		enum test_msg_op op = i % TEST_MSG_OP_NUM_OPS;
+		__u64 operand_64 = TEST_OP_64;
+		__u32 operand_32 = TEST_OP_32;
+
+		err = send_test_message(user_ringbuf, op, operand_64, operand_32);
+		if (err) {
+			/* Only assert on a failure to avoid spamming success logs. */
+			ASSERT_OK(err, "send_test_message");
+			goto cleanup;
+		}
+
+		switch (op) {
+		case TEST_MSG_OP_INC64:
+			expected_kern += operand_64;
+			break;
+		case TEST_MSG_OP_INC32:
+			expected_kern += operand_32;
+			break;
+		case TEST_MSG_OP_MUL64:
+			expected_kern *= operand_64;
+			break;
+		case TEST_MSG_OP_MUL32:
+			expected_kern *= operand_32;
+			break;
+		default:
+			PRINT_FAIL("Unexpected op %d\n", op);
+			goto cleanup;
+		}
+
+		if (i % 8 == 0) {
+			kick_kernel_read_messages();
+			ASSERT_EQ(skel->bss->kern_mutated, expected_kern, "expected_kern");
+			ASSERT_EQ(skel->bss->err, 0, "bpf_prog_err");
+			drain_kernel_messages_buffer(kern_ringbuf, skel);
+		}
+	}
+
+cleanup:
+	ring_buffer__free(kern_ringbuf);
+	user_ring_buffer__free(user_ringbuf);
+	user_ringbuf_success__destroy(skel);
+}
+
+static void *kick_kernel_cb(void *arg)
+{
+	/* Kick the kernel, causing it to drain the ring buffer and then wake
+	 * up the test thread waiting on epoll.
+	 */
+	syscall(__NR_getrlimit);
+
+	return NULL;
+}
+
+static int spawn_kick_thread_for_poll(void)
+{
+	pthread_t thread;
+
+	return pthread_create(&thread, NULL, kick_kernel_cb, NULL);
+}
+
+static void test_user_ringbuf_blocking_reserve(void)
+{
+	struct user_ringbuf_success *skel;
+	struct user_ring_buffer *ringbuf;
+	int err, num_written = 0;
+	__u64 *token;
+
+	err = load_skel_create_user_ringbuf(&skel, &ringbuf);
+	if (err)
+		return;
+
+	ASSERT_EQ(skel->bss->read, 0, "num_samples_read_before");
+
+	while (1) {
+		/* Write samples until the buffer is full. */
+		token = user_ring_buffer__reserve(ringbuf, sizeof(*token));
+		if (!token)
+			break;
+
+		*token = 0xdeadbeef;
+
+		user_ring_buffer__submit(ringbuf, token);
+		num_written++;
+	}
+
+	if (!ASSERT_GE(num_written, 0, "num_written"))
+		goto cleanup;
+
+	/* Should not have read any samples until the kernel is kicked. */
+	ASSERT_EQ(skel->bss->read, 0, "num_pre_kick");
+
+	/* We correctly time out after 1 second, without a sample. */
+	token = user_ring_buffer__reserve_blocking(ringbuf, sizeof(*token), 1000);
+	if (!ASSERT_EQ(token, NULL, "pre_kick_timeout_token"))
+		goto cleanup;
+
+	err = spawn_kick_thread_for_poll();
+	if (!ASSERT_EQ(err, 0, "deferred_kick_thread\n"))
+		goto cleanup;
+
+	/* After spawning another thread that asychronously kicks the kernel to
+	 * drain the messages, we're able to block and successfully get a
+	 * sample once we receive an event notification.
+	 */
+	token = user_ring_buffer__reserve_blocking(ringbuf, sizeof(*token), 10000);
+
+	if (!ASSERT_OK_PTR(token, "block_token"))
+		goto cleanup;
+
+	ASSERT_GT(skel->bss->read, 0, "num_post_kill");
+	ASSERT_LE(skel->bss->read, num_written, "num_post_kill");
+	ASSERT_EQ(skel->bss->err, 0, "err_post_poll");
+	user_ring_buffer__discard(ringbuf, token);
+
+cleanup:
+	user_ring_buffer__free(ringbuf);
+	user_ringbuf_success__destroy(skel);
+}
+
+static struct {
+	const char *prog_name;
+	const char *expected_err_msg;
+} failure_tests[] = {
+	/* failure cases */
+	{"user_ringbuf_callback_bad_access1", "negative offset dynptr_ptr ptr"},
+	{"user_ringbuf_callback_bad_access2", "dereference of modified dynptr_ptr ptr"},
+	{"user_ringbuf_callback_write_forbidden", "invalid mem access 'dynptr_ptr'"},
+	{"user_ringbuf_callback_null_context_write", "invalid mem access 'scalar'"},
+	{"user_ringbuf_callback_null_context_read", "invalid mem access 'scalar'"},
+	{"user_ringbuf_callback_discard_dynptr", "arg 1 is an unacquired reference"},
+	{"user_ringbuf_callback_submit_dynptr", "arg 1 is an unacquired reference"},
+	{"user_ringbuf_callback_invalid_return", "At callback return the register R0 has value"},
+};
+
+#define SUCCESS_TEST(_func) { _func, #_func }
+
+static struct {
+	void (*test_callback)(void);
+	const char *test_name;
+} success_tests[] = {
+	SUCCESS_TEST(test_user_ringbuf_mappings),
+	SUCCESS_TEST(test_user_ringbuf_post_misaligned),
+	SUCCESS_TEST(test_user_ringbuf_post_producer_wrong_offset),
+	SUCCESS_TEST(test_user_ringbuf_post_larger_than_ringbuf_sz),
+	SUCCESS_TEST(test_user_ringbuf_basic),
+	SUCCESS_TEST(test_user_ringbuf_sample_full_ring_buffer),
+	SUCCESS_TEST(test_user_ringbuf_post_alignment_autoadjust),
+	SUCCESS_TEST(test_user_ringbuf_overfill),
+	SUCCESS_TEST(test_user_ringbuf_discards_properly_ignored),
+	SUCCESS_TEST(test_user_ringbuf_loop),
+	SUCCESS_TEST(test_user_ringbuf_msg_protocol),
+	SUCCESS_TEST(test_user_ringbuf_blocking_reserve),
+};
+
+static void verify_fail(const char *prog_name, const char *expected_err_msg)
+{
+	LIBBPF_OPTS(bpf_object_open_opts, opts);
+	struct bpf_program *prog;
+	struct user_ringbuf_fail *skel;
+	int err;
+
+	opts.kernel_log_buf = obj_log_buf;
+	opts.kernel_log_size = log_buf_sz;
+	opts.kernel_log_level = 1;
+
+	skel = user_ringbuf_fail__open_opts(&opts);
+	if (!ASSERT_OK_PTR(skel, "dynptr_fail__open_opts"))
+		goto cleanup;
+
+	prog = bpf_object__find_program_by_name(skel->obj, prog_name);
+	if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
+		goto cleanup;
+
+	bpf_program__set_autoload(prog, true);
+
+	bpf_map__set_max_entries(skel->maps.user_ringbuf, getpagesize());
+
+	err = user_ringbuf_fail__load(skel);
+	if (!ASSERT_ERR(err, "unexpected load success"))
+		goto cleanup;
+
+	if (!ASSERT_OK_PTR(strstr(obj_log_buf, expected_err_msg), "expected_err_msg")) {
+		fprintf(stderr, "Expected err_msg: %s\n", expected_err_msg);
+		fprintf(stderr, "Verifier output: %s\n", obj_log_buf);
+	}
+
+cleanup:
+	user_ringbuf_fail__destroy(skel);
+}
+
+void test_user_ringbuf(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(success_tests); i++) {
+		if (!test__start_subtest(success_tests[i].test_name))
+			continue;
+
+		success_tests[i].test_callback();
+	}
+
+	for (i = 0; i < ARRAY_SIZE(failure_tests); i++) {
+		if (!test__start_subtest(failure_tests[i].prog_name))
+			continue;
+
+		verify_fail(failure_tests[i].prog_name, failure_tests[i].expected_err_msg);
+	}
+}
diff --git a/tools/testing/selftests/bpf/progs/test_user_ringbuf.h b/tools/testing/selftests/bpf/progs/test_user_ringbuf.h
new file mode 100644
index 000000000000..1643b4d59ba7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_user_ringbuf.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#ifndef _TEST_USER_RINGBUF_H
+#define _TEST_USER_RINGBUF_H
+
+#define TEST_OP_64 4
+#define TEST_OP_32 2
+
+enum test_msg_op {
+	TEST_MSG_OP_INC64,
+	TEST_MSG_OP_INC32,
+	TEST_MSG_OP_MUL64,
+	TEST_MSG_OP_MUL32,
+
+	// Must come last.
+	TEST_MSG_OP_NUM_OPS,
+};
+
+struct test_msg {
+	enum test_msg_op msg_op;
+	union {
+		__s64 operand_64;
+		__s32 operand_32;
+	};
+};
+
+struct sample {
+	int pid;
+	int seq;
+	long value;
+	char comm[16];
+};
+
+#endif /* _TEST_USER_RINGBUF_H */
diff --git a/tools/testing/selftests/bpf/progs/user_ringbuf_fail.c b/tools/testing/selftests/bpf/progs/user_ringbuf_fail.c
new file mode 100644
index 000000000000..82aba4529aa9
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/user_ringbuf_fail.c
@@ -0,0 +1,177 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct sample {
+	int pid;
+	int seq;
+	long value;
+	char comm[16];
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_USER_RINGBUF);
+} user_ringbuf SEC(".maps");
+
+static long
+bad_access1(struct bpf_dynptr *dynptr, void *context)
+{
+	const struct sample *sample;
+
+	sample = bpf_dynptr_data(dynptr - 1, 0, sizeof(*sample));
+	bpf_printk("Was able to pass bad pointer %lx\n", (__u64)dynptr - 1);
+
+	return 0;
+}
+
+/* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should
+ * not be able to read before the pointer.
+ */
+SEC("?raw_tp/sys_nanosleep")
+int user_ringbuf_callback_bad_access1(void *ctx)
+{
+	bpf_user_ringbuf_drain(&user_ringbuf, bad_access1, NULL, 0);
+
+	return 0;
+}
+
+static long
+bad_access2(struct bpf_dynptr *dynptr, void *context)
+{
+	const struct sample *sample;
+
+	sample = bpf_dynptr_data(dynptr + 1, 0, sizeof(*sample));
+	bpf_printk("Was able to pass bad pointer %lx\n", (__u64)dynptr + 1);
+
+	return 0;
+}
+
+/* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should
+ * not be able to read past the end of the pointer.
+ */
+SEC("?raw_tp/sys_nanosleep")
+int user_ringbuf_callback_bad_access2(void *ctx)
+{
+	bpf_user_ringbuf_drain(&user_ringbuf, bad_access2, NULL, 0);
+
+	return 0;
+}
+
+static long
+write_forbidden(struct bpf_dynptr *dynptr, void *context)
+{
+	*((long *)dynptr) = 0;
+
+	return 0;
+}
+
+/* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should
+ * not be able to write to that pointer.
+ */
+SEC("?raw_tp/sys_nanosleep")
+int user_ringbuf_callback_write_forbidden(void *ctx)
+{
+	bpf_user_ringbuf_drain(&user_ringbuf, write_forbidden, NULL, 0);
+
+	return 0;
+}
+
+static long
+null_context_write(struct bpf_dynptr *dynptr, void *context)
+{
+	*((__u64 *)context) = 0;
+
+	return 0;
+}
+
+/* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should
+ * not be able to write to that pointer.
+ */
+SEC("?raw_tp/sys_nanosleep")
+int user_ringbuf_callback_null_context_write(void *ctx)
+{
+	bpf_user_ringbuf_drain(&user_ringbuf, null_context_write, NULL, 0);
+
+	return 0;
+}
+
+static long
+null_context_read(struct bpf_dynptr *dynptr, void *context)
+{
+	__u64 id = *((__u64 *)context);
+
+	bpf_printk("Read id %lu\n", id);
+
+	return 0;
+}
+
+/* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should
+ * not be able to write to that pointer.
+ */
+SEC("?raw_tp/sys_nanosleep")
+int user_ringbuf_callback_null_context_read(void *ctx)
+{
+	bpf_user_ringbuf_drain(&user_ringbuf, null_context_read, NULL, 0);
+
+	return 0;
+}
+
+static long
+try_discard_dynptr(struct bpf_dynptr *dynptr, void *context)
+{
+	bpf_ringbuf_discard_dynptr(dynptr, 0);
+
+	return 0;
+}
+
+/* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should
+ * not be able to read past the end of the pointer.
+ */
+SEC("?raw_tp/sys_nanosleep")
+int user_ringbuf_callback_discard_dynptr(void *ctx)
+{
+	bpf_user_ringbuf_drain(&user_ringbuf, try_discard_dynptr, NULL, 0);
+
+	return 0;
+}
+
+static long
+try_submit_dynptr(struct bpf_dynptr *dynptr, void *context)
+{
+	bpf_ringbuf_submit_dynptr(dynptr, 0);
+
+	return 0;
+}
+
+/* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should
+ * not be able to read past the end of the pointer.
+ */
+SEC("?raw_tp/sys_nanosleep")
+int user_ringbuf_callback_submit_dynptr(void *ctx)
+{
+	bpf_user_ringbuf_drain(&user_ringbuf, try_submit_dynptr, NULL, 0);
+
+	return 0;
+}
+
+static long
+invalid_drain_callback_return(struct bpf_dynptr *dynptr, void *context)
+{
+	return 2;
+}
+
+/* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should
+ * not be able to write to that pointer.
+ */
+SEC("?raw_tp/sys_nanosleep")
+int user_ringbuf_callback_invalid_return(void *ctx)
+{
+	bpf_user_ringbuf_drain(&user_ringbuf, invalid_drain_callback_return, NULL, 0);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/user_ringbuf_success.c b/tools/testing/selftests/bpf/progs/user_ringbuf_success.c
new file mode 100644
index 000000000000..099c23d9aa21
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/user_ringbuf_success.c
@@ -0,0 +1,218 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+#include "test_user_ringbuf.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+	__uint(type, BPF_MAP_TYPE_USER_RINGBUF);
+} user_ringbuf SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_RINGBUF);
+} kernel_ringbuf SEC(".maps");
+
+/* inputs */
+int pid, err, val;
+
+int read = 0;
+
+/* Counter used for end-to-end protocol test */
+__u64 kern_mutated = 0;
+__u64 user_mutated = 0;
+__u64 expected_user_mutated = 0;
+
+static int
+is_test_process(void)
+{
+	int cur_pid = bpf_get_current_pid_tgid() >> 32;
+
+	return cur_pid == pid;
+}
+
+static long
+record_sample(struct bpf_dynptr *dynptr, void *context)
+{
+	const struct sample *sample = NULL;
+	struct sample stack_sample;
+	int status;
+	static int num_calls;
+
+	if (num_calls++ % 2 == 0) {
+		status = bpf_dynptr_read(&stack_sample, sizeof(stack_sample), dynptr, 0, 0);
+		if (status) {
+			bpf_printk("bpf_dynptr_read() failed: %d\n", status);
+			err = 1;
+			return 0;
+		}
+	} else {
+		sample = bpf_dynptr_data(dynptr, 0, sizeof(*sample));
+		if (!sample) {
+			bpf_printk("Unexpectedly failed to get sample\n");
+			err = 2;
+			return 0;
+		}
+		stack_sample = *sample;
+	}
+
+	__sync_fetch_and_add(&read, 1);
+	return 0;
+}
+
+static void
+handle_sample_msg(const struct test_msg *msg)
+{
+	switch (msg->msg_op) {
+	case TEST_MSG_OP_INC64:
+		kern_mutated += msg->operand_64;
+		break;
+	case TEST_MSG_OP_INC32:
+		kern_mutated += msg->operand_32;
+		break;
+	case TEST_MSG_OP_MUL64:
+		kern_mutated *= msg->operand_64;
+		break;
+	case TEST_MSG_OP_MUL32:
+		kern_mutated *= msg->operand_32;
+		break;
+	default:
+		bpf_printk("Unrecognized op %d\n", msg->msg_op);
+		err = 2;
+	}
+}
+
+static long
+read_protocol_msg(struct bpf_dynptr *dynptr, void *context)
+{
+	const struct test_msg *msg = NULL;
+
+	msg = bpf_dynptr_data(dynptr, 0, sizeof(*msg));
+	if (!msg) {
+		err = 1;
+		bpf_printk("Unexpectedly failed to get msg\n");
+		return 0;
+	}
+
+	handle_sample_msg(msg);
+
+	return 0;
+}
+
+static int publish_next_kern_msg(__u32 index, void *context)
+{
+	struct test_msg *msg = NULL;
+	int operand_64 = TEST_OP_64;
+	int operand_32 = TEST_OP_32;
+
+	msg = bpf_ringbuf_reserve(&kernel_ringbuf, sizeof(*msg), 0);
+	if (!msg) {
+		err = 4;
+		return 1;
+	}
+
+	switch (index % TEST_MSG_OP_NUM_OPS) {
+	case TEST_MSG_OP_INC64:
+		msg->operand_64 = operand_64;
+		msg->msg_op = TEST_MSG_OP_INC64;
+		expected_user_mutated += operand_64;
+		break;
+	case TEST_MSG_OP_INC32:
+		msg->operand_32 = operand_32;
+		msg->msg_op = TEST_MSG_OP_INC32;
+		expected_user_mutated += operand_32;
+		break;
+	case TEST_MSG_OP_MUL64:
+		msg->operand_64 = operand_64;
+		msg->msg_op = TEST_MSG_OP_MUL64;
+		expected_user_mutated *= operand_64;
+		break;
+	case TEST_MSG_OP_MUL32:
+		msg->operand_32 = operand_32;
+		msg->msg_op = TEST_MSG_OP_MUL32;
+		expected_user_mutated *= operand_32;
+		break;
+	default:
+		bpf_ringbuf_discard(msg, 0);
+		err = 5;
+		return 1;
+	}
+
+	bpf_ringbuf_submit(msg, 0);
+
+	return 0;
+}
+
+static void
+publish_kern_messages(void)
+{
+	if (expected_user_mutated != user_mutated) {
+		bpf_printk("%lu != %lu\n", expected_user_mutated, user_mutated);
+		err = 3;
+		return;
+	}
+
+	bpf_loop(8, publish_next_kern_msg, NULL, 0);
+}
+
+SEC("fentry/" SYS_PREFIX "sys_prctl")
+int test_user_ringbuf_protocol(void *ctx)
+{
+	long status = 0;
+	struct sample *sample = NULL;
+	struct bpf_dynptr ptr;
+
+	if (!is_test_process())
+		return 0;
+
+	status = bpf_user_ringbuf_drain(&user_ringbuf, read_protocol_msg, NULL, 0);
+	if (status < 0) {
+		bpf_printk("Drain returned: %ld\n", status);
+		err = 1;
+		return 0;
+	}
+
+	publish_kern_messages();
+
+	return 0;
+}
+
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
+int test_user_ringbuf(void *ctx)
+{
+	int status = 0;
+	struct sample *sample = NULL;
+	struct bpf_dynptr ptr;
+
+	if (!is_test_process())
+		return 0;
+
+	err = bpf_user_ringbuf_drain(&user_ringbuf, record_sample, NULL, 0);
+
+	return 0;
+}
+
+static long
+do_nothing_cb(struct bpf_dynptr *dynptr, void *context)
+{
+	__sync_fetch_and_add(&read, 1);
+	return 0;
+}
+
+SEC("fentry/" SYS_PREFIX "sys_getrlimit")
+int test_user_ringbuf_epoll(void *ctx)
+{
+	long num_samples;
+
+	if (!is_test_process())
+		return 0;
+
+	num_samples = bpf_user_ringbuf_drain(&user_ringbuf, do_nothing_cb, NULL, 0);
+	if (num_samples <= 0)
+		err = 1;
+
+	return 0;
+}
-- 
cgit v1.2.3


From 9f2f5d7830ddfeeca147595f473e14eadbeb3db1 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Fri, 9 Sep 2022 19:52:14 -0700
Subject: libbpf: Improve BPF_PROG2 macro code quality and description

Commit 34586d29f8df ("libbpf: Add new BPF_PROG2 macro") added BPF_PROG2
macro for trampoline based programs with struct arguments. Andrii
made a few suggestions to improve code quality and description.
This patch implemented these suggestions including better internal
macro name, consistent usage pattern for __builtin_choose_expr(),
simpler macro definition for always-inline func arguments and
better macro description.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/bpf/20220910025214.1536510-1-yhs@fb.com
---
 tools/lib/bpf/bpf_tracing.h | 154 ++++++++++++++++++++++++++------------------
 1 file changed, 91 insertions(+), 63 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h
index 8d4bdd18cb3d..2972dc25ff72 100644
--- a/tools/lib/bpf/bpf_tracing.h
+++ b/tools/lib/bpf/bpf_tracing.h
@@ -438,84 +438,112 @@ typeof(name(0)) name(unsigned long long *ctx)				    \
 static __always_inline typeof(name(0))					    \
 ____##name(unsigned long long *ctx, ##args)
 
-#ifndef ____bpf_nth
-#define ____bpf_nth(_, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, N, ...) N
+#ifndef ___bpf_nth2
+#define ___bpf_nth2(_, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13,	\
+		    _14, _15, _16, _17, _18, _19, _20, _21, _22, _23, _24, N, ...) N
 #endif
-#ifndef ____bpf_narg
-#define ____bpf_narg(...) ____bpf_nth(_, ##__VA_ARGS__, 12, 12, 11, 11, 10, 10, 9, 9, 8, 8, 7, 7, 6, 6, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0)
+#ifndef ___bpf_narg2
+#define ___bpf_narg2(...)	\
+	___bpf_nth2(_, ##__VA_ARGS__, 12, 12, 11, 11, 10, 10, 9, 9, 8, 8, 7, 7,	\
+		    6, 6, 5, 5, 4, 4, 3, 3, 2, 2, 1, 1, 0)
 #endif
 
-#define BPF_REG_CNT(t) \
-	(__builtin_choose_expr(sizeof(t) == 1 || sizeof(t) == 2 || sizeof(t) == 4 || sizeof(t) == 8, 1,	\
-	 __builtin_choose_expr(sizeof(t) == 16, 2,							\
-			       (void)0)))
-
-#define ____bpf_reg_cnt0()			(0)
-#define ____bpf_reg_cnt1(t, x)			(____bpf_reg_cnt0() + BPF_REG_CNT(t))
-#define ____bpf_reg_cnt2(t, x, args...)		(____bpf_reg_cnt1(args) + BPF_REG_CNT(t))
-#define ____bpf_reg_cnt3(t, x, args...)		(____bpf_reg_cnt2(args) + BPF_REG_CNT(t))
-#define ____bpf_reg_cnt4(t, x, args...)		(____bpf_reg_cnt3(args) + BPF_REG_CNT(t))
-#define ____bpf_reg_cnt5(t, x, args...)		(____bpf_reg_cnt4(args) + BPF_REG_CNT(t))
-#define ____bpf_reg_cnt6(t, x, args...)		(____bpf_reg_cnt5(args) + BPF_REG_CNT(t))
-#define ____bpf_reg_cnt7(t, x, args...)		(____bpf_reg_cnt6(args) + BPF_REG_CNT(t))
-#define ____bpf_reg_cnt8(t, x, args...)		(____bpf_reg_cnt7(args) + BPF_REG_CNT(t))
-#define ____bpf_reg_cnt9(t, x, args...)		(____bpf_reg_cnt8(args) + BPF_REG_CNT(t))
-#define ____bpf_reg_cnt10(t, x, args...)	(____bpf_reg_cnt9(args) + BPF_REG_CNT(t))
-#define ____bpf_reg_cnt11(t, x, args...)	(____bpf_reg_cnt10(args) + BPF_REG_CNT(t))
-#define ____bpf_reg_cnt12(t, x, args...)	(____bpf_reg_cnt11(args) + BPF_REG_CNT(t))
-#define ____bpf_reg_cnt(args...)	 ___bpf_apply(____bpf_reg_cnt, ____bpf_narg(args))(args)
-
-#define ____bpf_union_arg(t, x, n) \
-	__builtin_choose_expr(sizeof(t) == 1, ({ union { struct { __u8 x; } ___z; t x; } ___tmp = { .___z = {ctx[n]}}; ___tmp.x; }), \
-	__builtin_choose_expr(sizeof(t) == 2, ({ union { struct { __u16 x; } ___z; t x; } ___tmp = { .___z = {ctx[n]} }; ___tmp.x; }), \
-	__builtin_choose_expr(sizeof(t) == 4, ({ union { struct { __u32 x; } ___z; t x; } ___tmp = { .___z = {ctx[n]} }; ___tmp.x; }), \
-	__builtin_choose_expr(sizeof(t) == 8, ({ union { struct { __u64 x; } ___z; t x; } ___tmp = {.___z = {ctx[n]} }; ___tmp.x; }), \
-	__builtin_choose_expr(sizeof(t) == 16, ({ union { struct { __u64 x, y; } ___z; t x; } ___tmp = {.___z = {ctx[n], ctx[n + 1]} }; ___tmp.x; }), \
+#define ___bpf_treg_cnt(t) \
+	__builtin_choose_expr(sizeof(t) == 1, 1,	\
+	__builtin_choose_expr(sizeof(t) == 2, 1,	\
+	__builtin_choose_expr(sizeof(t) == 4, 1,	\
+	__builtin_choose_expr(sizeof(t) == 8, 1,	\
+	__builtin_choose_expr(sizeof(t) == 16, 2,	\
 			      (void)0)))))
 
-#define ____bpf_ctx_arg0(n, args...)
-#define ____bpf_ctx_arg1(n, t, x)		, ____bpf_union_arg(t, x, n - ____bpf_reg_cnt1(t, x))
-#define ____bpf_ctx_arg2(n, t, x, args...)	, ____bpf_union_arg(t, x, n - ____bpf_reg_cnt2(t, x, args)) ____bpf_ctx_arg1(n, args)
-#define ____bpf_ctx_arg3(n, t, x, args...)	, ____bpf_union_arg(t, x, n - ____bpf_reg_cnt3(t, x, args)) ____bpf_ctx_arg2(n, args)
-#define ____bpf_ctx_arg4(n, t, x, args...)	, ____bpf_union_arg(t, x, n - ____bpf_reg_cnt4(t, x, args)) ____bpf_ctx_arg3(n, args)
-#define ____bpf_ctx_arg5(n, t, x, args...)	, ____bpf_union_arg(t, x, n - ____bpf_reg_cnt5(t, x, args)) ____bpf_ctx_arg4(n, args)
-#define ____bpf_ctx_arg6(n, t, x, args...)	, ____bpf_union_arg(t, x, n - ____bpf_reg_cnt6(t, x, args)) ____bpf_ctx_arg5(n, args)
-#define ____bpf_ctx_arg7(n, t, x, args...)	, ____bpf_union_arg(t, x, n - ____bpf_reg_cnt7(t, x, args)) ____bpf_ctx_arg6(n, args)
-#define ____bpf_ctx_arg8(n, t, x, args...)	, ____bpf_union_arg(t, x, n - ____bpf_reg_cnt8(t, x, args)) ____bpf_ctx_arg7(n, args)
-#define ____bpf_ctx_arg9(n, t, x, args...)	, ____bpf_union_arg(t, x, n - ____bpf_reg_cnt9(t, x, args)) ____bpf_ctx_arg8(n, args)
-#define ____bpf_ctx_arg10(n, t, x, args...)	, ____bpf_union_arg(t, x, n - ____bpf_reg_cnt10(t, x, args)) ____bpf_ctx_arg9(n, args)
-#define ____bpf_ctx_arg11(n, t, x, args...)	, ____bpf_union_arg(t, x, n - ____bpf_reg_cnt11(t, x, args)) ____bpf_ctx_arg10(n, args)
-#define ____bpf_ctx_arg12(n, t, x, args...)	, ____bpf_union_arg(t, x, n - ____bpf_reg_cnt12(t, x, args)) ____bpf_ctx_arg11(n, args)
-#define ____bpf_ctx_arg(n, args...)	___bpf_apply(____bpf_ctx_arg, ____bpf_narg(args))(n, args)
-
-#define ____bpf_ctx_decl0()
-#define ____bpf_ctx_decl1(t, x)			, t x
-#define ____bpf_ctx_decl2(t, x, args...)	, t x ____bpf_ctx_decl1(args)
-#define ____bpf_ctx_decl3(t, x, args...)	, t x ____bpf_ctx_decl2(args)
-#define ____bpf_ctx_decl4(t, x, args...)	, t x ____bpf_ctx_decl3(args)
-#define ____bpf_ctx_decl5(t, x, args...)	, t x ____bpf_ctx_decl4(args)
-#define ____bpf_ctx_decl6(t, x, args...)	, t x ____bpf_ctx_decl5(args)
-#define ____bpf_ctx_decl7(t, x, args...)	, t x ____bpf_ctx_decl6(args)
-#define ____bpf_ctx_decl8(t, x, args...)	, t x ____bpf_ctx_decl7(args)
-#define ____bpf_ctx_decl9(t, x, args...)	, t x ____bpf_ctx_decl8(args)
-#define ____bpf_ctx_decl10(t, x, args...)	, t x ____bpf_ctx_decl9(args)
-#define ____bpf_ctx_decl11(t, x, args...)	, t x ____bpf_ctx_decl10(args)
-#define ____bpf_ctx_decl12(t, x, args...)	, t x ____bpf_ctx_decl11(args)
-#define ____bpf_ctx_decl(args...)	___bpf_apply(____bpf_ctx_decl, ____bpf_narg(args))(args)
+#define ___bpf_reg_cnt0()		(0)
+#define ___bpf_reg_cnt1(t, x)		(___bpf_reg_cnt0() + ___bpf_treg_cnt(t))
+#define ___bpf_reg_cnt2(t, x, args...)	(___bpf_reg_cnt1(args) + ___bpf_treg_cnt(t))
+#define ___bpf_reg_cnt3(t, x, args...)	(___bpf_reg_cnt2(args) + ___bpf_treg_cnt(t))
+#define ___bpf_reg_cnt4(t, x, args...)	(___bpf_reg_cnt3(args) + ___bpf_treg_cnt(t))
+#define ___bpf_reg_cnt5(t, x, args...)	(___bpf_reg_cnt4(args) + ___bpf_treg_cnt(t))
+#define ___bpf_reg_cnt6(t, x, args...)	(___bpf_reg_cnt5(args) + ___bpf_treg_cnt(t))
+#define ___bpf_reg_cnt7(t, x, args...)	(___bpf_reg_cnt6(args) + ___bpf_treg_cnt(t))
+#define ___bpf_reg_cnt8(t, x, args...)	(___bpf_reg_cnt7(args) + ___bpf_treg_cnt(t))
+#define ___bpf_reg_cnt9(t, x, args...)	(___bpf_reg_cnt8(args) + ___bpf_treg_cnt(t))
+#define ___bpf_reg_cnt10(t, x, args...)	(___bpf_reg_cnt9(args) + ___bpf_treg_cnt(t))
+#define ___bpf_reg_cnt11(t, x, args...)	(___bpf_reg_cnt10(args) + ___bpf_treg_cnt(t))
+#define ___bpf_reg_cnt12(t, x, args...)	(___bpf_reg_cnt11(args) + ___bpf_treg_cnt(t))
+#define ___bpf_reg_cnt(args...)	 ___bpf_apply(___bpf_reg_cnt, ___bpf_narg2(args))(args)
+
+#define ___bpf_union_arg(t, x, n) \
+	__builtin_choose_expr(sizeof(t) == 1, ({ union { __u8 z[1]; t x; } ___t = { .z = {ctx[n]}}; ___t.x; }), \
+	__builtin_choose_expr(sizeof(t) == 2, ({ union { __u16 z[1]; t x; } ___t = { .z = {ctx[n]} }; ___t.x; }), \
+	__builtin_choose_expr(sizeof(t) == 4, ({ union { __u32 z[1]; t x; } ___t = { .z = {ctx[n]} }; ___t.x; }), \
+	__builtin_choose_expr(sizeof(t) == 8, ({ union { __u64 z[1]; t x; } ___t = {.z = {ctx[n]} }; ___t.x; }), \
+	__builtin_choose_expr(sizeof(t) == 16, ({ union { __u64 z[2]; t x; } ___t = {.z = {ctx[n], ctx[n + 1]} }; ___t.x; }), \
+			      (void)0)))))
+
+#define ___bpf_ctx_arg0(n, args...)
+#define ___bpf_ctx_arg1(n, t, x)		, ___bpf_union_arg(t, x, n - ___bpf_reg_cnt1(t, x))
+#define ___bpf_ctx_arg2(n, t, x, args...)	, ___bpf_union_arg(t, x, n - ___bpf_reg_cnt2(t, x, args)) ___bpf_ctx_arg1(n, args)
+#define ___bpf_ctx_arg3(n, t, x, args...)	, ___bpf_union_arg(t, x, n - ___bpf_reg_cnt3(t, x, args)) ___bpf_ctx_arg2(n, args)
+#define ___bpf_ctx_arg4(n, t, x, args...)	, ___bpf_union_arg(t, x, n - ___bpf_reg_cnt4(t, x, args)) ___bpf_ctx_arg3(n, args)
+#define ___bpf_ctx_arg5(n, t, x, args...)	, ___bpf_union_arg(t, x, n - ___bpf_reg_cnt5(t, x, args)) ___bpf_ctx_arg4(n, args)
+#define ___bpf_ctx_arg6(n, t, x, args...)	, ___bpf_union_arg(t, x, n - ___bpf_reg_cnt6(t, x, args)) ___bpf_ctx_arg5(n, args)
+#define ___bpf_ctx_arg7(n, t, x, args...)	, ___bpf_union_arg(t, x, n - ___bpf_reg_cnt7(t, x, args)) ___bpf_ctx_arg6(n, args)
+#define ___bpf_ctx_arg8(n, t, x, args...)	, ___bpf_union_arg(t, x, n - ___bpf_reg_cnt8(t, x, args)) ___bpf_ctx_arg7(n, args)
+#define ___bpf_ctx_arg9(n, t, x, args...)	, ___bpf_union_arg(t, x, n - ___bpf_reg_cnt9(t, x, args)) ___bpf_ctx_arg8(n, args)
+#define ___bpf_ctx_arg10(n, t, x, args...)	, ___bpf_union_arg(t, x, n - ___bpf_reg_cnt10(t, x, args)) ___bpf_ctx_arg9(n, args)
+#define ___bpf_ctx_arg11(n, t, x, args...)	, ___bpf_union_arg(t, x, n - ___bpf_reg_cnt11(t, x, args)) ___bpf_ctx_arg10(n, args)
+#define ___bpf_ctx_arg12(n, t, x, args...)	, ___bpf_union_arg(t, x, n - ___bpf_reg_cnt12(t, x, args)) ___bpf_ctx_arg11(n, args)
+#define ___bpf_ctx_arg(args...)	___bpf_apply(___bpf_ctx_arg, ___bpf_narg2(args))(___bpf_reg_cnt(args), args)
+
+#define ___bpf_ctx_decl0()
+#define ___bpf_ctx_decl1(t, x)			, t x
+#define ___bpf_ctx_decl2(t, x, args...)		, t x ___bpf_ctx_decl1(args)
+#define ___bpf_ctx_decl3(t, x, args...)		, t x ___bpf_ctx_decl2(args)
+#define ___bpf_ctx_decl4(t, x, args...)		, t x ___bpf_ctx_decl3(args)
+#define ___bpf_ctx_decl5(t, x, args...)		, t x ___bpf_ctx_decl4(args)
+#define ___bpf_ctx_decl6(t, x, args...)		, t x ___bpf_ctx_decl5(args)
+#define ___bpf_ctx_decl7(t, x, args...)		, t x ___bpf_ctx_decl6(args)
+#define ___bpf_ctx_decl8(t, x, args...)		, t x ___bpf_ctx_decl7(args)
+#define ___bpf_ctx_decl9(t, x, args...)		, t x ___bpf_ctx_decl8(args)
+#define ___bpf_ctx_decl10(t, x, args...)	, t x ___bpf_ctx_decl9(args)
+#define ___bpf_ctx_decl11(t, x, args...)	, t x ___bpf_ctx_decl10(args)
+#define ___bpf_ctx_decl12(t, x, args...)	, t x ___bpf_ctx_decl11(args)
+#define ___bpf_ctx_decl(args...)	___bpf_apply(___bpf_ctx_decl, ___bpf_narg2(args))(args)
 
 /*
- * BPF_PROG2 can handle struct arguments.
+ * BPF_PROG2 is an enhanced version of BPF_PROG in order to handle struct
+ * arguments. Since each struct argument might take one or two u64 values
+ * in the trampoline stack, argument type size is needed to place proper number
+ * of u64 values for each argument. Therefore, BPF_PROG2 has different
+ * syntax from BPF_PROG. For example, for the following BPF_PROG syntax:
+ *
+ *   int BPF_PROG(test2, int a, int b) { ... }
+ *
+ * the corresponding BPF_PROG2 syntax is:
+ *
+ *   int BPF_PROG2(test2, int, a, int, b) { ... }
+ *
+ * where type and the corresponding argument name are separated by comma.
+ *
+ * Use BPF_PROG2 macro if one of the arguments might be a struct/union larger
+ * than 8 bytes:
+ *
+ *   int BPF_PROG2(test_struct_arg, struct bpf_testmod_struct_arg_1, a, int, b,
+ *		   int, c, int, d, struct bpf_testmod_struct_arg_2, e, int, ret)
+ *   {
+ *        // access a, b, c, d, e, and ret directly
+ *        ...
+ *   }
  */
 #define BPF_PROG2(name, args...)						\
 name(unsigned long long *ctx);							\
 static __always_inline typeof(name(0))						\
-____##name(unsigned long long *ctx ____bpf_ctx_decl(args));			\
+____##name(unsigned long long *ctx ___bpf_ctx_decl(args));			\
 typeof(name(0)) name(unsigned long long *ctx)					\
 {										\
-	return ____##name(ctx ____bpf_ctx_arg(____bpf_reg_cnt(args), args));	\
+	return ____##name(ctx ___bpf_ctx_arg(args));				\
 }										\
 static __always_inline typeof(name(0))						\
-____##name(unsigned long long *ctx ____bpf_ctx_decl(args))
+____##name(unsigned long long *ctx ___bpf_ctx_decl(args))
 
 struct pt_regs;
 
-- 
cgit v1.2.3


From 272d1f4cfa3c75d4828b62ef33ccb207da3b7350 Mon Sep 17 00:00:00 2001
From: Yauheni Kaliuta <ykaliuta@redhat.com>
Date: Thu, 8 Sep 2022 15:01:46 +0300
Subject: selftests: bpf: test_kmod.sh: Pass parameters to the module

It's possible to specify particular tests for test_bpf.ko with
module parameters. Make it possible to pass the module parameters,
example:

test_kmod.sh test_range=1,3

Since magnitude tests take long time it can be reasonable to skip
them.

Signed-off-by: Yauheni Kaliuta <ykaliuta@redhat.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220908120146.381218-1-ykaliuta@redhat.com
---
 tools/testing/selftests/bpf/test_kmod.sh | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_kmod.sh b/tools/testing/selftests/bpf/test_kmod.sh
index 4f6444bcd53f..d4a4279c0181 100755
--- a/tools/testing/selftests/bpf/test_kmod.sh
+++ b/tools/testing/selftests/bpf/test_kmod.sh
@@ -1,6 +1,11 @@
 #!/bin/sh
 # SPDX-License-Identifier: GPL-2.0
 
+# Usage:
+# ./test_kmod.sh [module_param]...
+# Ex.: ./test_kmod.sh test_range=1,3
+# All the parameters are passed to the kernel module.
+
 # Kselftest framework requirement - SKIP code is 4.
 ksft_skip=4
 
@@ -26,15 +31,15 @@ test_run()
 	echo "[ JIT enabled:$1 hardened:$2 ]"
 	dmesg -C
 	if [ -f ${OUTPUT}/lib/test_bpf.ko ]; then
-		insmod ${OUTPUT}/lib/test_bpf.ko 2> /dev/null
+		insmod ${OUTPUT}/lib/test_bpf.ko "$@" 2> /dev/null
 		if [ $? -ne 0 ]; then
 			rc=1
 		fi
 	else
 		# Use modprobe dry run to check for missing test_bpf module
-		if ! /sbin/modprobe -q -n test_bpf; then
+		if ! /sbin/modprobe -q -n test_bpf "$@"; then
 			echo "test_bpf: [SKIP]"
-		elif /sbin/modprobe -q test_bpf; then
+		elif /sbin/modprobe -q test_bpf "$@"; then
 			echo "test_bpf: ok"
 		else
 			echo "test_bpf: [FAIL]"
-- 
cgit v1.2.3


From 01f2e36c959c813a532ae836db49b2ac9de46de4 Mon Sep 17 00:00:00 2001
From: Tao Chen <chentao.kernel@linux.alibaba.com>
Date: Tue, 13 Sep 2022 00:43:00 +0800
Subject: libbpf: Support raw BTF placed in the default search path

Currently, the default vmlinux files at '/boot/vmlinux-*',
'/lib/modules/*/vmlinux-*' etc. are parsed with 'btf__parse_elf()' to
extract BTF. It is possible that these files are actually raw BTF files
similar to /sys/kernel/btf/vmlinux. So parse these files with
'btf__parse' which tries both raw format and ELF format.

This might be useful in some scenarios where users put their custom BTF
into known locations and don't want to specify btf_custom_path option.

Signed-off-by: Tao Chen <chentao.kernel@linux.alibaba.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/3f59fb5a345d2e4f10e16fe9e35fbc4c03ecaa3e.1662999860.git.chentao.kernel@linux.alibaba.com
---
 tools/lib/bpf/btf.c | 30 ++++++++++++------------------
 1 file changed, 12 insertions(+), 18 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index 361131518d63..b4d9a96c3c1b 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -4642,20 +4642,17 @@ static int btf_dedup_remap_types(struct btf_dedup *d)
  */
 struct btf *btf__load_vmlinux_btf(void)
 {
-	struct {
-		const char *path_fmt;
-		bool raw_btf;
-	} locations[] = {
+	const char *locations[] = {
 		/* try canonical vmlinux BTF through sysfs first */
-		{ "/sys/kernel/btf/vmlinux", true /* raw BTF */ },
-		/* fall back to trying to find vmlinux ELF on disk otherwise */
-		{ "/boot/vmlinux-%1$s" },
-		{ "/lib/modules/%1$s/vmlinux-%1$s" },
-		{ "/lib/modules/%1$s/build/vmlinux" },
-		{ "/usr/lib/modules/%1$s/kernel/vmlinux" },
-		{ "/usr/lib/debug/boot/vmlinux-%1$s" },
-		{ "/usr/lib/debug/boot/vmlinux-%1$s.debug" },
-		{ "/usr/lib/debug/lib/modules/%1$s/vmlinux" },
+		"/sys/kernel/btf/vmlinux",
+		/* fall back to trying to find vmlinux on disk otherwise */
+		"/boot/vmlinux-%1$s",
+		"/lib/modules/%1$s/vmlinux-%1$s",
+		"/lib/modules/%1$s/build/vmlinux",
+		"/usr/lib/modules/%1$s/kernel/vmlinux",
+		"/usr/lib/debug/boot/vmlinux-%1$s",
+		"/usr/lib/debug/boot/vmlinux-%1$s.debug",
+		"/usr/lib/debug/lib/modules/%1$s/vmlinux",
 	};
 	char path[PATH_MAX + 1];
 	struct utsname buf;
@@ -4665,15 +4662,12 @@ struct btf *btf__load_vmlinux_btf(void)
 	uname(&buf);
 
 	for (i = 0; i < ARRAY_SIZE(locations); i++) {
-		snprintf(path, PATH_MAX, locations[i].path_fmt, buf.release);
+		snprintf(path, PATH_MAX, locations[i], buf.release);
 
 		if (access(path, R_OK))
 			continue;
 
-		if (locations[i].raw_btf)
-			btf = btf__parse_raw(path);
-		else
-			btf = btf__parse_elf(path, NULL);
+		btf = btf__parse(path, NULL);
 		err = libbpf_get_error(btf);
 		pr_debug("loading kernel BTF '%s': %d\n", path, err);
 		if (err)
-- 
cgit v1.2.3


From e9e315b4a5de32d0482b92f482517095d5d844e4 Mon Sep 17 00:00:00 2001
From: Roberto Sassu <roberto.sassu@huawei.com>
Date: Tue, 20 Sep 2022 09:59:41 +0200
Subject: bpf: Move dynptr type check to is_dynptr_type_expected()

Move dynptr type check to is_dynptr_type_expected() from
is_dynptr_reg_valid_init(), so that callers can better determine the cause
of a negative result (dynamic pointer not valid/initialized, dynamic
pointer of the wrong type). It will be useful for example for BTF, to
restrict which dynamic pointer types can be passed to kfuncs, as initially
only the local type will be supported.

Also, splitting makes the code more readable, since checking the dynamic
pointer type is not necessarily related to validity and initialization.

Split the validity/initialization and dynamic pointer type check also in
the verifier, and adjust the expected error message in the test (a test for
an unexpected dynptr type passed to a helper cannot be added due to missing
suitable helpers, but this case has been tested manually).

Cc: Joanne Koong <joannelkoong@gmail.com>
Cc: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
Acked-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220920075951.929132-4-roberto.sassu@huaweicloud.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/verifier.c                           | 35 +++++++++++++++++++------
 tools/testing/selftests/bpf/prog_tests/dynptr.c |  2 +-
 2 files changed, 28 insertions(+), 9 deletions(-)

(limited to 'tools')

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index c76fa45a5906..c08dde19eb67 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -782,8 +782,8 @@ static bool is_dynptr_reg_valid_uninit(struct bpf_verifier_env *env, struct bpf_
 	return true;
 }
 
-static bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
-				     enum bpf_arg_type arg_type)
+static bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env,
+				     struct bpf_reg_state *reg)
 {
 	struct bpf_func_state *state = func(env, reg);
 	int spi = get_spi(reg->off);
@@ -799,11 +799,24 @@ static bool is_dynptr_reg_valid_init(struct bpf_verifier_env *env, struct bpf_re
 			return false;
 	}
 
+	return true;
+}
+
+static bool is_dynptr_type_expected(struct bpf_verifier_env *env,
+				    struct bpf_reg_state *reg,
+				    enum bpf_arg_type arg_type)
+{
+	struct bpf_func_state *state = func(env, reg);
+	enum bpf_dynptr_type dynptr_type;
+	int spi = get_spi(reg->off);
+
 	/* ARG_PTR_TO_DYNPTR takes any type of dynptr */
 	if (arg_type == ARG_PTR_TO_DYNPTR)
 		return true;
 
-	return state->stack[spi].spilled_ptr.dynptr.type == arg_to_dynptr_type(arg_type);
+	dynptr_type = arg_to_dynptr_type(arg_type);
+
+	return state->stack[spi].spilled_ptr.dynptr.type == dynptr_type;
 }
 
 /* The reg state of a pointer or a bounded scalar was saved when
@@ -6095,21 +6108,27 @@ skip_type_check:
 			}
 
 			meta->uninit_dynptr_regno = regno;
-		} else if (!is_dynptr_reg_valid_init(env, reg, arg_type)) {
+		} else if (!is_dynptr_reg_valid_init(env, reg)) {
+			verbose(env,
+				"Expected an initialized dynptr as arg #%d\n",
+				arg + 1);
+			return -EINVAL;
+		} else if (!is_dynptr_type_expected(env, reg, arg_type)) {
 			const char *err_extra = "";
 
 			switch (arg_type & DYNPTR_TYPE_FLAG_MASK) {
 			case DYNPTR_TYPE_LOCAL:
-				err_extra = "local ";
+				err_extra = "local";
 				break;
 			case DYNPTR_TYPE_RINGBUF:
-				err_extra = "ringbuf ";
+				err_extra = "ringbuf";
 				break;
 			default:
+				err_extra = "<unknown>";
 				break;
 			}
-
-			verbose(env, "Expected an initialized %sdynptr as arg #%d\n",
+			verbose(env,
+				"Expected a dynptr of type %s as arg #%d\n",
 				err_extra, arg + 1);
 			return -EINVAL;
 		}
diff --git a/tools/testing/selftests/bpf/prog_tests/dynptr.c b/tools/testing/selftests/bpf/prog_tests/dynptr.c
index bcf80b9f7c27..8fc4e6c02bfd 100644
--- a/tools/testing/selftests/bpf/prog_tests/dynptr.c
+++ b/tools/testing/selftests/bpf/prog_tests/dynptr.c
@@ -30,7 +30,7 @@ static struct {
 	{"invalid_helper2", "Expected an initialized dynptr as arg #3"},
 	{"invalid_write1", "Expected an initialized dynptr as arg #1"},
 	{"invalid_write2", "Expected an initialized dynptr as arg #3"},
-	{"invalid_write3", "Expected an initialized ringbuf dynptr as arg #1"},
+	{"invalid_write3", "Expected an initialized dynptr as arg #1"},
 	{"invalid_write4", "arg 1 is an unacquired reference"},
 	{"invalid_read1", "invalid read from stack"},
 	{"invalid_read2", "cannot pass in dynptr at an offset"},
-- 
cgit v1.2.3


From 94fd7420faa0bc85341c0a9cbe5e5240ef4f123d Mon Sep 17 00:00:00 2001
From: Roberto Sassu <roberto.sassu@huawei.com>
Date: Tue, 20 Sep 2022 09:59:47 +0200
Subject: selftests/bpf: Compile kernel with everything as built-in
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since the eBPF CI does not support kernel modules, change the kernel config
to compile everything as built-in.

Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
Acked-by: Daniel Müller <deso@posteo.net>
Acked-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220920075951.929132-10-roberto.sassu@huaweicloud.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/config        | 26 +++++++++++++-------------
 tools/testing/selftests/bpf/config.x86_64 |  2 +-
 2 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 3fc46f9cfb22..0fdd11e6b742 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -7,9 +7,9 @@ CONFIG_BPF_LSM=y
 CONFIG_BPF_STREAM_PARSER=y
 CONFIG_BPF_SYSCALL=y
 CONFIG_CGROUP_BPF=y
-CONFIG_CRYPTO_HMAC=m
-CONFIG_CRYPTO_SHA256=m
-CONFIG_CRYPTO_USER_API_HASH=m
+CONFIG_CRYPTO_HMAC=y
+CONFIG_CRYPTO_SHA256=y
+CONFIG_CRYPTO_USER_API_HASH=y
 CONFIG_DYNAMIC_FTRACE=y
 CONFIG_FPROBE=y
 CONFIG_FTRACE_SYSCALLS=y
@@ -24,30 +24,30 @@ CONFIG_IP_NF_FILTER=y
 CONFIG_IP_NF_RAW=y
 CONFIG_IP_NF_TARGET_SYNPROXY=y
 CONFIG_IPV6=y
-CONFIG_IPV6_FOU=m
-CONFIG_IPV6_FOU_TUNNEL=m
+CONFIG_IPV6_FOU=y
+CONFIG_IPV6_FOU_TUNNEL=y
 CONFIG_IPV6_GRE=y
 CONFIG_IPV6_SEG6_BPF=y
-CONFIG_IPV6_SIT=m
+CONFIG_IPV6_SIT=y
 CONFIG_IPV6_TUNNEL=y
 CONFIG_LIRC=y
 CONFIG_LWTUNNEL=y
 CONFIG_MPLS=y
-CONFIG_MPLS_IPTUNNEL=m
-CONFIG_MPLS_ROUTING=m
+CONFIG_MPLS_IPTUNNEL=y
+CONFIG_MPLS_ROUTING=y
 CONFIG_MPTCP=y
 CONFIG_NET_CLS_ACT=y
 CONFIG_NET_CLS_BPF=y
-CONFIG_NET_CLS_FLOWER=m
-CONFIG_NET_FOU=m
+CONFIG_NET_CLS_FLOWER=y
+CONFIG_NET_FOU=y
 CONFIG_NET_FOU_IP_TUNNELS=y
 CONFIG_NET_IPGRE=y
 CONFIG_NET_IPGRE_DEMUX=y
 CONFIG_NET_IPIP=y
-CONFIG_NET_MPLS_GSO=m
+CONFIG_NET_MPLS_GSO=y
 CONFIG_NET_SCH_INGRESS=y
 CONFIG_NET_SCHED=y
-CONFIG_NETDEVSIM=m
+CONFIG_NETDEVSIM=y
 CONFIG_NETFILTER=y
 CONFIG_NETFILTER_SYNPROXY=y
 CONFIG_NETFILTER_XT_CONNMARK=y
@@ -60,7 +60,7 @@ CONFIG_NF_DEFRAG_IPV6=y
 CONFIG_RC_CORE=y
 CONFIG_SECURITY=y
 CONFIG_SECURITYFS=y
-CONFIG_TEST_BPF=m
+CONFIG_TEST_BPF=y
 CONFIG_USERFAULTFD=y
 CONFIG_VXLAN=y
 CONFIG_XDP_SOCKETS=y
diff --git a/tools/testing/selftests/bpf/config.x86_64 b/tools/testing/selftests/bpf/config.x86_64
index f0859a1d37ab..ce70c9509204 100644
--- a/tools/testing/selftests/bpf/config.x86_64
+++ b/tools/testing/selftests/bpf/config.x86_64
@@ -47,7 +47,7 @@ CONFIG_CPU_IDLE_GOV_LADDER=y
 CONFIG_CPUSETS=y
 CONFIG_CRC_T10DIF=y
 CONFIG_CRYPTO_BLAKE2B=y
-CONFIG_CRYPTO_DEV_VIRTIO=m
+CONFIG_CRYPTO_DEV_VIRTIO=y
 CONFIG_CRYPTO_SEQIV=y
 CONFIG_CRYPTO_XXHASH=y
 CONFIG_DCB=y
-- 
cgit v1.2.3


From 7c036ed9e0065e852fb1886d9ea97ceb35680e3f Mon Sep 17 00:00:00 2001
From: Roberto Sassu <roberto.sassu@huawei.com>
Date: Tue, 20 Sep 2022 09:59:48 +0200
Subject: selftests/bpf: Add verifier tests for bpf_lookup_*_key() and
 bpf_key_put()

Add verifier tests for bpf_lookup_*_key() and bpf_key_put(), to ensure that
acquired key references stored in the bpf_key structure are released, that
a non-NULL bpf_key pointer is passed to bpf_key_put(), and that key
references are not leaked.

Also, slightly modify test_verifier.c, to find the BTF ID of the attach
point for the LSM program type (currently, it is done only for TRACING).

Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
Acked-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220920075951.929132-11-roberto.sassu@huaweicloud.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/config                 |   1 +
 tools/testing/selftests/bpf/test_verifier.c        |   3 +-
 .../testing/selftests/bpf/verifier/ref_tracking.c  | 139 +++++++++++++++++++++
 3 files changed, 142 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 0fdd11e6b742..add5a5a919b4 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -30,6 +30,7 @@ CONFIG_IPV6_GRE=y
 CONFIG_IPV6_SEG6_BPF=y
 CONFIG_IPV6_SIT=y
 CONFIG_IPV6_TUNNEL=y
+CONFIG_KEYS=y
 CONFIG_LIRC=y
 CONFIG_LWTUNNEL=y
 CONFIG_MPLS=y
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index f9d553fbf68a..2dbcbf363c18 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -1498,7 +1498,8 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 		opts.log_level = DEFAULT_LIBBPF_LOG_LEVEL;
 	opts.prog_flags = pflags;
 
-	if (prog_type == BPF_PROG_TYPE_TRACING && test->kfunc) {
+	if ((prog_type == BPF_PROG_TYPE_TRACING ||
+	     prog_type == BPF_PROG_TYPE_LSM) && test->kfunc) {
 		int attach_btf_id;
 
 		attach_btf_id = libbpf_find_vmlinux_btf_id(test->kfunc,
diff --git a/tools/testing/selftests/bpf/verifier/ref_tracking.c b/tools/testing/selftests/bpf/verifier/ref_tracking.c
index 57a83d763ec1..f18ce867271f 100644
--- a/tools/testing/selftests/bpf/verifier/ref_tracking.c
+++ b/tools/testing/selftests/bpf/verifier/ref_tracking.c
@@ -84,6 +84,145 @@
 	.errstr = "Unreleased reference",
 	.result = REJECT,
 },
+{
+	"reference tracking: acquire/release user key reference",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_1, -3),
+	BPF_MOV64_IMM(BPF_REG_2, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_LSM,
+	.kfunc = "bpf",
+	.expected_attach_type = BPF_LSM_MAC,
+	.flags = BPF_F_SLEEPABLE,
+	.fixup_kfunc_btf_id = {
+		{ "bpf_lookup_user_key", 2 },
+		{ "bpf_key_put", 5 },
+	},
+	.result = ACCEPT,
+},
+{
+	"reference tracking: acquire/release system key reference",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_1, 1),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_LSM,
+	.kfunc = "bpf",
+	.expected_attach_type = BPF_LSM_MAC,
+	.flags = BPF_F_SLEEPABLE,
+	.fixup_kfunc_btf_id = {
+		{ "bpf_lookup_system_key", 1 },
+		{ "bpf_key_put", 4 },
+	},
+	.result = ACCEPT,
+},
+{
+	"reference tracking: release user key reference without check",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_1, -3),
+	BPF_MOV64_IMM(BPF_REG_2, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_LSM,
+	.kfunc = "bpf",
+	.expected_attach_type = BPF_LSM_MAC,
+	.flags = BPF_F_SLEEPABLE,
+	.errstr = "arg#0 pointer type STRUCT bpf_key must point to scalar, or struct with scalar",
+	.fixup_kfunc_btf_id = {
+		{ "bpf_lookup_user_key", 2 },
+		{ "bpf_key_put", 4 },
+	},
+	.result = REJECT,
+},
+{
+	"reference tracking: release system key reference without check",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_1, 1),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_LSM,
+	.kfunc = "bpf",
+	.expected_attach_type = BPF_LSM_MAC,
+	.flags = BPF_F_SLEEPABLE,
+	.errstr = "arg#0 pointer type STRUCT bpf_key must point to scalar, or struct with scalar",
+	.fixup_kfunc_btf_id = {
+		{ "bpf_lookup_system_key", 1 },
+		{ "bpf_key_put", 3 },
+	},
+	.result = REJECT,
+},
+{
+	"reference tracking: release with NULL key pointer",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_1, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_LSM,
+	.kfunc = "bpf",
+	.expected_attach_type = BPF_LSM_MAC,
+	.flags = BPF_F_SLEEPABLE,
+	.errstr = "arg#0 pointer type STRUCT bpf_key must point to scalar, or struct with scalar",
+	.fixup_kfunc_btf_id = {
+		{ "bpf_key_put", 1 },
+	},
+	.result = REJECT,
+},
+{
+	"reference tracking: leak potential reference to user key",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_1, -3),
+	BPF_MOV64_IMM(BPF_REG_2, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_LSM,
+	.kfunc = "bpf",
+	.expected_attach_type = BPF_LSM_MAC,
+	.flags = BPF_F_SLEEPABLE,
+	.errstr = "Unreleased reference",
+	.fixup_kfunc_btf_id = {
+		{ "bpf_lookup_user_key", 2 },
+	},
+	.result = REJECT,
+},
+{
+	"reference tracking: leak potential reference to system key",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_1, 1),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_LSM,
+	.kfunc = "bpf",
+	.expected_attach_type = BPF_LSM_MAC,
+	.flags = BPF_F_SLEEPABLE,
+	.errstr = "Unreleased reference",
+	.fixup_kfunc_btf_id = {
+		{ "bpf_lookup_system_key", 1 },
+	},
+	.result = REJECT,
+},
 {
 	"reference tracking: release reference without check",
 	.insns = {
-- 
cgit v1.2.3


From ecce368d6e6d76168be5d8d34b411c69ec367859 Mon Sep 17 00:00:00 2001
From: Roberto Sassu <roberto.sassu@huawei.com>
Date: Tue, 20 Sep 2022 09:59:49 +0200
Subject: selftests/bpf: Add additional tests for bpf_lookup_*_key()

Add a test to ensure that bpf_lookup_user_key() creates a referenced
special keyring when the KEY_LOOKUP_CREATE flag is passed to this function.

Ensure that the kfunc rejects invalid flags.

Ensure that a keyring can be obtained from bpf_lookup_system_key() when one
of the pre-determined keyring IDs is provided.

The test is currently blacklisted for s390x (JIT does not support calling
kernel function).

Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
Link: https://lore.kernel.org/r/20220920075951.929132-12-roberto.sassu@huaweicloud.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/DENYLIST.s390x         |   1 +
 .../testing/selftests/bpf/prog_tests/lookup_key.c  | 112 +++++++++++++++++++++
 .../testing/selftests/bpf/progs/test_lookup_key.c  |  46 +++++++++
 3 files changed, 159 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/lookup_key.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_lookup_key.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x
index 981c2be922f4..a6ac5dce7856 100644
--- a/tools/testing/selftests/bpf/DENYLIST.s390x
+++ b/tools/testing/selftests/bpf/DENYLIST.s390x
@@ -72,3 +72,4 @@ cgroup_hierarchical_stats                # JIT does not support calling kernel f
 htab_update                              # failed to attach: ERROR: strerror_r(-524)=22                                (trampoline)
 tracing_struct                           # failed to auto-attach: -524                                                 (trampoline)
 user_ringbuf                             # failed to find kernel BTF type ID of '__s390x_sys_prctl': -3                (?)
+lookup_key                               # JIT does not support calling kernel function                                (kfunc)
diff --git a/tools/testing/selftests/bpf/prog_tests/lookup_key.c b/tools/testing/selftests/bpf/prog_tests/lookup_key.c
new file mode 100644
index 000000000000..68025e88f352
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/lookup_key.c
@@ -0,0 +1,112 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright (C) 2022 Huawei Technologies Duesseldorf GmbH
+ *
+ * Author: Roberto Sassu <roberto.sassu@huawei.com>
+ */
+
+#include <linux/keyctl.h>
+#include <test_progs.h>
+
+#include "test_lookup_key.skel.h"
+
+#define KEY_LOOKUP_CREATE	0x01
+#define KEY_LOOKUP_PARTIAL	0x02
+
+static bool kfunc_not_supported;
+
+static int libbpf_print_cb(enum libbpf_print_level level, const char *fmt,
+			   va_list args)
+{
+	char *func;
+
+	if (strcmp(fmt, "libbpf: extern (func ksym) '%s': not found in kernel or module BTFs\n"))
+		return 0;
+
+	func = va_arg(args, char *);
+
+	if (strcmp(func, "bpf_lookup_user_key") && strcmp(func, "bpf_key_put") &&
+	    strcmp(func, "bpf_lookup_system_key"))
+		return 0;
+
+	kfunc_not_supported = true;
+	return 0;
+}
+
+void test_lookup_key(void)
+{
+	libbpf_print_fn_t old_print_cb;
+	struct test_lookup_key *skel;
+	__u32 next_id;
+	int ret;
+
+	skel = test_lookup_key__open();
+	if (!ASSERT_OK_PTR(skel, "test_lookup_key__open"))
+		return;
+
+	old_print_cb = libbpf_set_print(libbpf_print_cb);
+	ret = test_lookup_key__load(skel);
+	libbpf_set_print(old_print_cb);
+
+	if (ret < 0 && kfunc_not_supported) {
+		printf("%s:SKIP:bpf_lookup_*_key(), bpf_key_put() kfuncs not supported\n",
+		       __func__);
+		test__skip();
+		goto close_prog;
+	}
+
+	if (!ASSERT_OK(ret, "test_lookup_key__load"))
+		goto close_prog;
+
+	ret = test_lookup_key__attach(skel);
+	if (!ASSERT_OK(ret, "test_lookup_key__attach"))
+		goto close_prog;
+
+	skel->bss->monitored_pid = getpid();
+	skel->bss->key_serial = KEY_SPEC_THREAD_KEYRING;
+
+	/* The thread-specific keyring does not exist, this test fails. */
+	skel->bss->flags = 0;
+
+	ret = bpf_prog_get_next_id(0, &next_id);
+	if (!ASSERT_LT(ret, 0, "bpf_prog_get_next_id"))
+		goto close_prog;
+
+	/* Force creation of the thread-specific keyring, this test succeeds. */
+	skel->bss->flags = KEY_LOOKUP_CREATE;
+
+	ret = bpf_prog_get_next_id(0, &next_id);
+	if (!ASSERT_OK(ret, "bpf_prog_get_next_id"))
+		goto close_prog;
+
+	/* Pass both lookup flags for parameter validation. */
+	skel->bss->flags = KEY_LOOKUP_CREATE | KEY_LOOKUP_PARTIAL;
+
+	ret = bpf_prog_get_next_id(0, &next_id);
+	if (!ASSERT_OK(ret, "bpf_prog_get_next_id"))
+		goto close_prog;
+
+	/* Pass invalid flags. */
+	skel->bss->flags = UINT64_MAX;
+
+	ret = bpf_prog_get_next_id(0, &next_id);
+	if (!ASSERT_LT(ret, 0, "bpf_prog_get_next_id"))
+		goto close_prog;
+
+	skel->bss->key_serial = 0;
+	skel->bss->key_id = 1;
+
+	ret = bpf_prog_get_next_id(0, &next_id);
+	if (!ASSERT_OK(ret, "bpf_prog_get_next_id"))
+		goto close_prog;
+
+	skel->bss->key_id = UINT32_MAX;
+
+	ret = bpf_prog_get_next_id(0, &next_id);
+	ASSERT_LT(ret, 0, "bpf_prog_get_next_id");
+
+close_prog:
+	skel->bss->monitored_pid = 0;
+	test_lookup_key__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_lookup_key.c b/tools/testing/selftests/bpf/progs/test_lookup_key.c
new file mode 100644
index 000000000000..c73776990ae3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_lookup_key.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright (C) 2022 Huawei Technologies Duesseldorf GmbH
+ *
+ * Author: Roberto Sassu <roberto.sassu@huawei.com>
+ */
+
+#include "vmlinux.h"
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+__u32 monitored_pid;
+__u32 key_serial;
+__u32 key_id;
+__u64 flags;
+
+extern struct bpf_key *bpf_lookup_user_key(__u32 serial, __u64 flags) __ksym;
+extern struct bpf_key *bpf_lookup_system_key(__u64 id) __ksym;
+extern void bpf_key_put(struct bpf_key *key) __ksym;
+
+SEC("lsm.s/bpf")
+int BPF_PROG(bpf, int cmd, union bpf_attr *attr, unsigned int size)
+{
+	struct bpf_key *bkey;
+	__u32 pid;
+
+	pid = bpf_get_current_pid_tgid() >> 32;
+	if (pid != monitored_pid)
+		return 0;
+
+	if (key_serial)
+		bkey = bpf_lookup_user_key(key_serial, flags);
+	else
+		bkey = bpf_lookup_system_key(key_id);
+
+	if (!bkey)
+		return -ENOENT;
+
+	bpf_key_put(bkey);
+
+	return 0;
+}
-- 
cgit v1.2.3


From fc97590668ae60b94ad8bc4d9e85958f10cb3567 Mon Sep 17 00:00:00 2001
From: Roberto Sassu <roberto.sassu@huawei.com>
Date: Tue, 20 Sep 2022 09:59:50 +0200
Subject: selftests/bpf: Add test for bpf_verify_pkcs7_signature() kfunc

Perform several tests to ensure the correct implementation of the
bpf_verify_pkcs7_signature() kfunc.

Do the tests with data signed with a generated testing key (by using
sign-file from scripts/) and with the tcp_bic.ko kernel module if it is
found in the system. The test does not fail if tcp_bic.ko is not found.

First, perform an unsuccessful signature verification without data.

Second, perform a successful signature verification with the session
keyring and a new one created for testing.

Then, ensure that permission and validation checks are done properly on the
keyring provided to bpf_verify_pkcs7_signature(), despite those checks were
deferred at the time the keyring was retrieved with bpf_lookup_user_key().
The tests expect to encounter an error if the Search permission is removed
from the keyring, or the keyring is expired.

Finally, perform a successful and unsuccessful signature verification with
the keyrings with pre-determined IDs (the last test fails because the key
is not in the platform keyring).

The test is currently in the deny list for s390x (JIT does not support
calling kernel function).

Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
Link: https://lore.kernel.org/r/20220920075951.929132-13-roberto.sassu@huaweicloud.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/DENYLIST.s390x         |   1 +
 tools/testing/selftests/bpf/Makefile               |  14 +-
 tools/testing/selftests/bpf/config                 |   5 +
 tools/testing/selftests/bpf/config.x86_64          |   5 -
 .../selftests/bpf/prog_tests/verify_pkcs7_sig.c    | 399 +++++++++++++++++++++
 .../selftests/bpf/progs/test_verify_pkcs7_sig.c    |  90 +++++
 tools/testing/selftests/bpf/verify_sig_setup.sh    | 104 ++++++
 7 files changed, 610 insertions(+), 8 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c
 create mode 100755 tools/testing/selftests/bpf/verify_sig_setup.sh

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x
index a6ac5dce7856..a992fd978c1e 100644
--- a/tools/testing/selftests/bpf/DENYLIST.s390x
+++ b/tools/testing/selftests/bpf/DENYLIST.s390x
@@ -73,3 +73,4 @@ htab_update                              # failed to attach: ERROR: strerror_r(-
 tracing_struct                           # failed to auto-attach: -524                                                 (trampoline)
 user_ringbuf                             # failed to find kernel BTF type ID of '__s390x_sys_prctl': -3                (?)
 lookup_key                               # JIT does not support calling kernel function                                (kfunc)
+verify_pkcs7_sig                         # JIT does not support calling kernel function                                (kfunc)
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 1a0296bd744a..5898d3828b82 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -14,6 +14,7 @@ BPFTOOLDIR := $(TOOLSDIR)/bpf/bpftool
 APIDIR := $(TOOLSINCDIR)/uapi
 GENDIR := $(abspath ../../../../include/generated)
 GENHDR := $(GENDIR)/autoconf.h
+HOSTPKG_CONFIG := pkg-config
 
 ifneq ($(wildcard $(GENHDR)),)
   GENFLAGS := -DHAVE_GENHDR
@@ -75,7 +76,7 @@ TEST_PROGS := test_kmod.sh \
 	test_xsk.sh
 
 TEST_PROGS_EXTENDED := with_addr.sh \
-	with_tunnels.sh ima_setup.sh \
+	with_tunnels.sh ima_setup.sh verify_sig_setup.sh \
 	test_xdp_vlan.sh test_bpftool.py
 
 # Compile but not part of 'make run_tests'
@@ -84,7 +85,7 @@ TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \
 	test_lirc_mode2_user xdping test_cpp runqslower bench bpf_testmod.ko \
 	xskxceiver xdp_redirect_multi xdp_synproxy veristat
 
-TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read
+TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read $(OUTPUT)/sign-file
 
 # Emit succinct information message describing current building step
 # $1 - generic step name (e.g., CC, LINK, etc);
@@ -189,6 +190,12 @@ $(OUTPUT)/urandom_read: urandom_read.c urandom_read_aux.c $(OUTPUT)/liburandom_r
 		     -fuse-ld=$(LLD) -Wl,-znoseparate-code		       \
 		     -Wl,-rpath=. -Wl,--build-id=sha1 -o $@
 
+$(OUTPUT)/sign-file: ../../../../scripts/sign-file.c
+	$(call msg,SIGN-FILE,,$@)
+	$(Q)$(CC) $(shell $(HOSTPKG_CONFIG)--cflags libcrypto 2> /dev/null) \
+		  $< -o $@ \
+		  $(shell $(HOSTPKG_CONFIG) --libs libcrypto 2> /dev/null || echo -lcrypto)
+
 $(OUTPUT)/bpf_testmod.ko: $(VMLINUX_BTF) $(wildcard bpf_testmod/Makefile bpf_testmod/*.[ch])
 	$(call msg,MOD,,$@)
 	$(Q)$(RM) bpf_testmod/bpf_testmod.ko # force re-compilation
@@ -516,7 +523,8 @@ TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c trace_helpers.c	\
 TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read $(OUTPUT)/bpf_testmod.ko	\
 		       $(OUTPUT)/liburandom_read.so			\
 		       $(OUTPUT)/xdp_synproxy				\
-		       ima_setup.sh					\
+		       $(OUTPUT)/sign-file				\
+		       ima_setup.sh verify_sig_setup.sh			\
 		       $(wildcard progs/btf_dump_test_case_*.c)
 TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE
 TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS) -DENABLE_ATOMICS_TESTS
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index add5a5a919b4..905a9be8d0a2 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -33,6 +33,11 @@ CONFIG_IPV6_TUNNEL=y
 CONFIG_KEYS=y
 CONFIG_LIRC=y
 CONFIG_LWTUNNEL=y
+CONFIG_MODULE_SIG=y
+CONFIG_MODULE_SRCVERSION_ALL=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODULES=y
+CONFIG_MODVERSIONS=y
 CONFIG_MPLS=y
 CONFIG_MPLS_IPTUNNEL=y
 CONFIG_MPLS_ROUTING=y
diff --git a/tools/testing/selftests/bpf/config.x86_64 b/tools/testing/selftests/bpf/config.x86_64
index ce70c9509204..21ce5ea4304e 100644
--- a/tools/testing/selftests/bpf/config.x86_64
+++ b/tools/testing/selftests/bpf/config.x86_64
@@ -145,11 +145,6 @@ CONFIG_MCORE2=y
 CONFIG_MEMCG=y
 CONFIG_MEMORY_FAILURE=y
 CONFIG_MINIX_SUBPARTITION=y
-CONFIG_MODULE_SIG=y
-CONFIG_MODULE_SRCVERSION_ALL=y
-CONFIG_MODULE_UNLOAD=y
-CONFIG_MODULES=y
-CONFIG_MODVERSIONS=y
 CONFIG_NAMESPACES=y
 CONFIG_NET=y
 CONFIG_NET_9P=y
diff --git a/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c b/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c
new file mode 100644
index 000000000000..579d6ee83ce0
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c
@@ -0,0 +1,399 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright (C) 2022 Huawei Technologies Duesseldorf GmbH
+ *
+ * Author: Roberto Sassu <roberto.sassu@huawei.com>
+ */
+
+#include <stdio.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <endian.h>
+#include <limits.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <sys/mman.h>
+#include <linux/keyctl.h>
+#include <test_progs.h>
+
+#include "test_verify_pkcs7_sig.skel.h"
+
+#define MAX_DATA_SIZE (1024 * 1024)
+#define MAX_SIG_SIZE 1024
+
+#define VERIFY_USE_SECONDARY_KEYRING (1UL)
+#define VERIFY_USE_PLATFORM_KEYRING  (2UL)
+
+/* In stripped ARM and x86-64 modules, ~ is surprisingly rare. */
+#define MODULE_SIG_STRING "~Module signature appended~\n"
+
+/*
+ * Module signature information block.
+ *
+ * The constituents of the signature section are, in order:
+ *
+ *	- Signer's name
+ *	- Key identifier
+ *	- Signature data
+ *	- Information block
+ */
+struct module_signature {
+	__u8	algo;		/* Public-key crypto algorithm [0] */
+	__u8	hash;		/* Digest algorithm [0] */
+	__u8	id_type;	/* Key identifier type [PKEY_ID_PKCS7] */
+	__u8	signer_len;	/* Length of signer's name [0] */
+	__u8	key_id_len;	/* Length of key identifier [0] */
+	__u8	__pad[3];
+	__be32	sig_len;	/* Length of signature data */
+};
+
+struct data {
+	__u8 data[MAX_DATA_SIZE];
+	__u32 data_len;
+	__u8 sig[MAX_SIG_SIZE];
+	__u32 sig_len;
+};
+
+static bool kfunc_not_supported;
+
+static int libbpf_print_cb(enum libbpf_print_level level, const char *fmt,
+			   va_list args)
+{
+	if (strcmp(fmt, "libbpf: extern (func ksym) '%s': not found in kernel or module BTFs\n"))
+		return 0;
+
+	if (strcmp(va_arg(args, char *), "bpf_verify_pkcs7_signature"))
+		return 0;
+
+	kfunc_not_supported = true;
+	return 0;
+}
+
+static int _run_setup_process(const char *setup_dir, const char *cmd)
+{
+	int child_pid, child_status;
+
+	child_pid = fork();
+	if (child_pid == 0) {
+		execlp("./verify_sig_setup.sh", "./verify_sig_setup.sh", cmd,
+		       setup_dir, NULL);
+		exit(errno);
+
+	} else if (child_pid > 0) {
+		waitpid(child_pid, &child_status, 0);
+		return WEXITSTATUS(child_status);
+	}
+
+	return -EINVAL;
+}
+
+static int populate_data_item_str(const char *tmp_dir, struct data *data_item)
+{
+	struct stat st;
+	char data_template[] = "/tmp/dataXXXXXX";
+	char path[PATH_MAX];
+	int ret, fd, child_status, child_pid;
+
+	data_item->data_len = 4;
+	memcpy(data_item->data, "test", data_item->data_len);
+
+	fd = mkstemp(data_template);
+	if (fd == -1)
+		return -errno;
+
+	ret = write(fd, data_item->data, data_item->data_len);
+
+	close(fd);
+
+	if (ret != data_item->data_len) {
+		ret = -EIO;
+		goto out;
+	}
+
+	child_pid = fork();
+
+	if (child_pid == -1) {
+		ret = -errno;
+		goto out;
+	}
+
+	if (child_pid == 0) {
+		snprintf(path, sizeof(path), "%s/signing_key.pem", tmp_dir);
+
+		return execlp("./sign-file", "./sign-file", "-d", "sha256",
+			      path, path, data_template, NULL);
+	}
+
+	waitpid(child_pid, &child_status, 0);
+
+	ret = WEXITSTATUS(child_status);
+	if (ret)
+		goto out;
+
+	snprintf(path, sizeof(path), "%s.p7s", data_template);
+
+	ret = stat(path, &st);
+	if (ret == -1) {
+		ret = -errno;
+		goto out;
+	}
+
+	if (st.st_size > sizeof(data_item->sig)) {
+		ret = -EINVAL;
+		goto out_sig;
+	}
+
+	data_item->sig_len = st.st_size;
+
+	fd = open(path, O_RDONLY);
+	if (fd == -1) {
+		ret = -errno;
+		goto out_sig;
+	}
+
+	ret = read(fd, data_item->sig, data_item->sig_len);
+
+	close(fd);
+
+	if (ret != data_item->sig_len) {
+		ret = -EIO;
+		goto out_sig;
+	}
+
+	ret = 0;
+out_sig:
+	unlink(path);
+out:
+	unlink(data_template);
+	return ret;
+}
+
+static int populate_data_item_mod(struct data *data_item)
+{
+	char mod_path[PATH_MAX], *mod_path_ptr;
+	struct stat st;
+	void *mod;
+	FILE *fp;
+	struct module_signature ms;
+	int ret, fd, modlen, marker_len, sig_len;
+
+	data_item->data_len = 0;
+
+	if (stat("/lib/modules", &st) == -1)
+		return 0;
+
+	/* Requires CONFIG_TCP_CONG_BIC=m. */
+	fp = popen("find /lib/modules/$(uname -r) -name tcp_bic.ko", "r");
+	if (!fp)
+		return 0;
+
+	mod_path_ptr = fgets(mod_path, sizeof(mod_path), fp);
+	pclose(fp);
+
+	if (!mod_path_ptr)
+		return 0;
+
+	mod_path_ptr = strchr(mod_path, '\n');
+	if (!mod_path_ptr)
+		return 0;
+
+	*mod_path_ptr = '\0';
+
+	if (stat(mod_path, &st) == -1)
+		return 0;
+
+	modlen = st.st_size;
+	marker_len = sizeof(MODULE_SIG_STRING) - 1;
+
+	fd = open(mod_path, O_RDONLY);
+	if (fd == -1)
+		return -errno;
+
+	mod = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
+
+	close(fd);
+
+	if (mod == MAP_FAILED)
+		return -errno;
+
+	if (strncmp(mod + modlen - marker_len, MODULE_SIG_STRING, marker_len)) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	modlen -= marker_len;
+
+	memcpy(&ms, mod + (modlen - sizeof(ms)), sizeof(ms));
+
+	sig_len = __be32_to_cpu(ms.sig_len);
+	modlen -= sig_len + sizeof(ms);
+
+	if (modlen > sizeof(data_item->data)) {
+		ret = -E2BIG;
+		goto out;
+	}
+
+	memcpy(data_item->data, mod, modlen);
+	data_item->data_len = modlen;
+
+	if (sig_len > sizeof(data_item->sig)) {
+		ret = -E2BIG;
+		goto out;
+	}
+
+	memcpy(data_item->sig, mod + modlen, sig_len);
+	data_item->sig_len = sig_len;
+	ret = 0;
+out:
+	munmap(mod, st.st_size);
+	return ret;
+}
+
+void test_verify_pkcs7_sig(void)
+{
+	libbpf_print_fn_t old_print_cb;
+	char tmp_dir_template[] = "/tmp/verify_sigXXXXXX";
+	char *tmp_dir;
+	struct test_verify_pkcs7_sig *skel = NULL;
+	struct bpf_map *map;
+	struct data data;
+	int ret, zero = 0;
+
+	/* Trigger creation of session keyring. */
+	syscall(__NR_request_key, "keyring", "_uid.0", NULL,
+		KEY_SPEC_SESSION_KEYRING);
+
+	tmp_dir = mkdtemp(tmp_dir_template);
+	if (!ASSERT_OK_PTR(tmp_dir, "mkdtemp"))
+		return;
+
+	ret = _run_setup_process(tmp_dir, "setup");
+	if (!ASSERT_OK(ret, "_run_setup_process"))
+		goto close_prog;
+
+	skel = test_verify_pkcs7_sig__open();
+	if (!ASSERT_OK_PTR(skel, "test_verify_pkcs7_sig__open"))
+		goto close_prog;
+
+	old_print_cb = libbpf_set_print(libbpf_print_cb);
+	ret = test_verify_pkcs7_sig__load(skel);
+	libbpf_set_print(old_print_cb);
+
+	if (ret < 0 && kfunc_not_supported) {
+		printf(
+		  "%s:SKIP:bpf_verify_pkcs7_signature() kfunc not supported\n",
+		  __func__);
+		test__skip();
+		goto close_prog;
+	}
+
+	if (!ASSERT_OK(ret, "test_verify_pkcs7_sig__load"))
+		goto close_prog;
+
+	ret = test_verify_pkcs7_sig__attach(skel);
+	if (!ASSERT_OK(ret, "test_verify_pkcs7_sig__attach"))
+		goto close_prog;
+
+	map = bpf_object__find_map_by_name(skel->obj, "data_input");
+	if (!ASSERT_OK_PTR(map, "data_input not found"))
+		goto close_prog;
+
+	skel->bss->monitored_pid = getpid();
+
+	/* Test without data and signature. */
+	skel->bss->user_keyring_serial = KEY_SPEC_SESSION_KEYRING;
+
+	ret = bpf_map_update_elem(bpf_map__fd(map), &zero, &data, BPF_ANY);
+	if (!ASSERT_LT(ret, 0, "bpf_map_update_elem data_input"))
+		goto close_prog;
+
+	/* Test successful signature verification with session keyring. */
+	ret = populate_data_item_str(tmp_dir, &data);
+	if (!ASSERT_OK(ret, "populate_data_item_str"))
+		goto close_prog;
+
+	ret = bpf_map_update_elem(bpf_map__fd(map), &zero, &data, BPF_ANY);
+	if (!ASSERT_OK(ret, "bpf_map_update_elem data_input"))
+		goto close_prog;
+
+	/* Test successful signature verification with testing keyring. */
+	skel->bss->user_keyring_serial = syscall(__NR_request_key, "keyring",
+						 "ebpf_testing_keyring", NULL,
+						 KEY_SPEC_SESSION_KEYRING);
+
+	ret = bpf_map_update_elem(bpf_map__fd(map), &zero, &data, BPF_ANY);
+	if (!ASSERT_OK(ret, "bpf_map_update_elem data_input"))
+		goto close_prog;
+
+	/*
+	 * Ensure key_task_permission() is called and rejects the keyring
+	 * (no Search permission).
+	 */
+	syscall(__NR_keyctl, KEYCTL_SETPERM, skel->bss->user_keyring_serial,
+		0x37373737);
+
+	ret = bpf_map_update_elem(bpf_map__fd(map), &zero, &data, BPF_ANY);
+	if (!ASSERT_LT(ret, 0, "bpf_map_update_elem data_input"))
+		goto close_prog;
+
+	syscall(__NR_keyctl, KEYCTL_SETPERM, skel->bss->user_keyring_serial,
+		0x3f3f3f3f);
+
+	/*
+	 * Ensure key_validate() is called and rejects the keyring (key expired)
+	 */
+	syscall(__NR_keyctl, KEYCTL_SET_TIMEOUT,
+		skel->bss->user_keyring_serial, 1);
+	sleep(1);
+
+	ret = bpf_map_update_elem(bpf_map__fd(map), &zero, &data, BPF_ANY);
+	if (!ASSERT_LT(ret, 0, "bpf_map_update_elem data_input"))
+		goto close_prog;
+
+	skel->bss->user_keyring_serial = KEY_SPEC_SESSION_KEYRING;
+
+	/* Test with corrupted data (signature verification should fail). */
+	data.data[0] = 'a';
+	ret = bpf_map_update_elem(bpf_map__fd(map), &zero, &data, BPF_ANY);
+	if (!ASSERT_LT(ret, 0, "bpf_map_update_elem data_input"))
+		goto close_prog;
+
+	ret = populate_data_item_mod(&data);
+	if (!ASSERT_OK(ret, "populate_data_item_mod"))
+		goto close_prog;
+
+	/* Test signature verification with system keyrings. */
+	if (data.data_len) {
+		skel->bss->user_keyring_serial = 0;
+		skel->bss->system_keyring_id = 0;
+
+		ret = bpf_map_update_elem(bpf_map__fd(map), &zero, &data,
+					  BPF_ANY);
+		if (!ASSERT_OK(ret, "bpf_map_update_elem data_input"))
+			goto close_prog;
+
+		skel->bss->system_keyring_id = VERIFY_USE_SECONDARY_KEYRING;
+
+		ret = bpf_map_update_elem(bpf_map__fd(map), &zero, &data,
+					  BPF_ANY);
+		if (!ASSERT_OK(ret, "bpf_map_update_elem data_input"))
+			goto close_prog;
+
+		skel->bss->system_keyring_id = VERIFY_USE_PLATFORM_KEYRING;
+
+		ret = bpf_map_update_elem(bpf_map__fd(map), &zero, &data,
+					  BPF_ANY);
+		ASSERT_LT(ret, 0, "bpf_map_update_elem data_input");
+	}
+
+close_prog:
+	_run_setup_process(tmp_dir, "cleanup");
+
+	if (!skel)
+		return;
+
+	skel->bss->monitored_pid = 0;
+	test_verify_pkcs7_sig__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c b/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c
new file mode 100644
index 000000000000..ce419304ff1f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c
@@ -0,0 +1,90 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright (C) 2022 Huawei Technologies Duesseldorf GmbH
+ *
+ * Author: Roberto Sassu <roberto.sassu@huawei.com>
+ */
+
+#include "vmlinux.h"
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#define MAX_DATA_SIZE (1024 * 1024)
+#define MAX_SIG_SIZE 1024
+
+extern struct bpf_key *bpf_lookup_user_key(__u32 serial, __u64 flags) __ksym;
+extern struct bpf_key *bpf_lookup_system_key(__u64 id) __ksym;
+extern void bpf_key_put(struct bpf_key *key) __ksym;
+extern int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_ptr,
+				      struct bpf_dynptr *sig_ptr,
+				      struct bpf_key *trusted_keyring) __ksym;
+
+__u32 monitored_pid;
+__u32 user_keyring_serial;
+__u64 system_keyring_id;
+
+struct data {
+	__u8 data[MAX_DATA_SIZE];
+	__u32 data_len;
+	__u8 sig[MAX_SIG_SIZE];
+	__u32 sig_len;
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, struct data);
+} data_input SEC(".maps");
+
+char _license[] SEC("license") = "GPL";
+
+SEC("lsm.s/bpf")
+int BPF_PROG(bpf, int cmd, union bpf_attr *attr, unsigned int size)
+{
+	struct bpf_dynptr data_ptr, sig_ptr;
+	struct data *data_val;
+	struct bpf_key *trusted_keyring;
+	__u32 pid;
+	__u64 value;
+	int ret, zero = 0;
+
+	pid = bpf_get_current_pid_tgid() >> 32;
+	if (pid != monitored_pid)
+		return 0;
+
+	data_val = bpf_map_lookup_elem(&data_input, &zero);
+	if (!data_val)
+		return 0;
+
+	bpf_probe_read(&value, sizeof(value), &attr->value);
+
+	bpf_copy_from_user(data_val, sizeof(struct data),
+			   (void *)(unsigned long)value);
+
+	if (data_val->data_len > sizeof(data_val->data))
+		return -EINVAL;
+
+	bpf_dynptr_from_mem(data_val->data, data_val->data_len, 0, &data_ptr);
+
+	if (data_val->sig_len > sizeof(data_val->sig))
+		return -EINVAL;
+
+	bpf_dynptr_from_mem(data_val->sig, data_val->sig_len, 0, &sig_ptr);
+
+	if (user_keyring_serial)
+		trusted_keyring = bpf_lookup_user_key(user_keyring_serial, 0);
+	else
+		trusted_keyring = bpf_lookup_system_key(system_keyring_id);
+
+	if (!trusted_keyring)
+		return -ENOENT;
+
+	ret = bpf_verify_pkcs7_signature(&data_ptr, &sig_ptr, trusted_keyring);
+
+	bpf_key_put(trusted_keyring);
+
+	return ret;
+}
diff --git a/tools/testing/selftests/bpf/verify_sig_setup.sh b/tools/testing/selftests/bpf/verify_sig_setup.sh
new file mode 100755
index 000000000000..ba08922b4a27
--- /dev/null
+++ b/tools/testing/selftests/bpf/verify_sig_setup.sh
@@ -0,0 +1,104 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+set -e
+set -u
+set -o pipefail
+
+VERBOSE="${SELFTESTS_VERBOSE:=0}"
+LOG_FILE="$(mktemp /tmp/verify_sig_setup.log.XXXXXX)"
+
+x509_genkey_content="\
+[ req ]
+default_bits = 2048
+distinguished_name = req_distinguished_name
+prompt = no
+string_mask = utf8only
+x509_extensions = myexts
+
+[ req_distinguished_name ]
+CN = eBPF Signature Verification Testing Key
+
+[ myexts ]
+basicConstraints=critical,CA:FALSE
+keyUsage=digitalSignature
+subjectKeyIdentifier=hash
+authorityKeyIdentifier=keyid
+"
+
+usage()
+{
+	echo "Usage: $0 <setup|cleanup <existing_tmp_dir>"
+	exit 1
+}
+
+setup()
+{
+	local tmp_dir="$1"
+
+	echo "${x509_genkey_content}" > ${tmp_dir}/x509.genkey
+
+	openssl req -new -nodes -utf8 -sha256 -days 36500 \
+			-batch -x509 -config ${tmp_dir}/x509.genkey \
+			-outform PEM -out ${tmp_dir}/signing_key.pem \
+			-keyout ${tmp_dir}/signing_key.pem 2>&1
+
+	openssl x509 -in ${tmp_dir}/signing_key.pem -out \
+		${tmp_dir}/signing_key.der -outform der
+
+	key_id=$(cat ${tmp_dir}/signing_key.der | keyctl padd asymmetric ebpf_testing_key @s)
+
+	keyring_id=$(keyctl newring ebpf_testing_keyring @s)
+	keyctl link $key_id $keyring_id
+}
+
+cleanup() {
+	local tmp_dir="$1"
+
+	keyctl unlink $(keyctl search @s asymmetric ebpf_testing_key) @s
+	keyctl unlink $(keyctl search @s keyring ebpf_testing_keyring) @s
+	rm -rf ${tmp_dir}
+}
+
+catch()
+{
+	local exit_code="$1"
+	local log_file="$2"
+
+	if [[ "${exit_code}" -ne 0 ]]; then
+		cat "${log_file}" >&3
+	fi
+
+	rm -f "${log_file}"
+	exit ${exit_code}
+}
+
+main()
+{
+	[[ $# -ne 2 ]] && usage
+
+	local action="$1"
+	local tmp_dir="$2"
+
+	[[ ! -d "${tmp_dir}" ]] && echo "Directory ${tmp_dir} doesn't exist" && exit 1
+
+	if [[ "${action}" == "setup" ]]; then
+		setup "${tmp_dir}"
+	elif [[ "${action}" == "cleanup" ]]; then
+		cleanup "${tmp_dir}"
+	else
+		echo "Unknown action: ${action}"
+		exit 1
+	fi
+}
+
+trap 'catch "$?" "${LOG_FILE}"' EXIT
+
+if [[ "${VERBOSE}" -eq 0 ]]; then
+	# Save the stderr to 3 so that we can output back to
+	# it incase of an error.
+	exec 3>&2 1>"${LOG_FILE}" 2>&1
+fi
+
+main "$@"
+rm -f "${LOG_FILE}"
-- 
cgit v1.2.3


From b94fa9f9dcf99730eabd8febc4c95e44342bfb59 Mon Sep 17 00:00:00 2001
From: Roberto Sassu <roberto.sassu@huawei.com>
Date: Tue, 20 Sep 2022 09:59:51 +0200
Subject: selftests/bpf: Add tests for dynamic pointers parameters in kfuncs

Add tests to ensure that only supported dynamic pointer types are accepted,
that the passed argument is actually a dynamic pointer, that the passed
argument is a pointer to the stack, and that bpf_verify_pkcs7_signature()
correctly handles dynamic pointers with data set to NULL.

The tests are currently in the deny list for s390x (JIT does not support
calling kernel function).

Signed-off-by: Roberto Sassu <roberto.sassu@huawei.com>
Acked-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
Link: https://lore.kernel.org/r/20220920075951.929132-14-roberto.sassu@huaweicloud.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/DENYLIST.s390x         |   1 +
 .../selftests/bpf/prog_tests/kfunc_dynptr_param.c  | 164 +++++++++++++++++++++
 .../selftests/bpf/progs/test_kfunc_dynptr_param.c  |  94 ++++++++++++
 3 files changed, 259 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/kfunc_dynptr_param.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x
index a992fd978c1e..17e074eb42b8 100644
--- a/tools/testing/selftests/bpf/DENYLIST.s390x
+++ b/tools/testing/selftests/bpf/DENYLIST.s390x
@@ -74,3 +74,4 @@ tracing_struct                           # failed to auto-attach: -524
 user_ringbuf                             # failed to find kernel BTF type ID of '__s390x_sys_prctl': -3                (?)
 lookup_key                               # JIT does not support calling kernel function                                (kfunc)
 verify_pkcs7_sig                         # JIT does not support calling kernel function                                (kfunc)
+kfunc_dynptr_param                       # JIT does not support calling kernel function                                (kfunc)
diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_dynptr_param.c b/tools/testing/selftests/bpf/prog_tests/kfunc_dynptr_param.c
new file mode 100644
index 000000000000..c210657d4d0a
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/kfunc_dynptr_param.c
@@ -0,0 +1,164 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright (c) 2022 Facebook
+ * Copyright (C) 2022 Huawei Technologies Duesseldorf GmbH
+ *
+ * Author: Roberto Sassu <roberto.sassu@huawei.com>
+ */
+
+#include <test_progs.h>
+#include "test_kfunc_dynptr_param.skel.h"
+
+static size_t log_buf_sz = 1048576; /* 1 MB */
+static char obj_log_buf[1048576];
+
+static struct {
+	const char *prog_name;
+	const char *expected_verifier_err_msg;
+	int expected_runtime_err;
+} kfunc_dynptr_tests[] = {
+	{"dynptr_type_not_supp",
+	 "arg#0 pointer type STRUCT bpf_dynptr_kern points to unsupported dynamic pointer type", 0},
+	{"not_valid_dynptr",
+	 "arg#0 pointer type STRUCT bpf_dynptr_kern must be valid and initialized", 0},
+	{"not_ptr_to_stack", "arg#0 pointer type STRUCT bpf_dynptr_kern not to stack", 0},
+	{"dynptr_data_null", NULL, -EBADMSG},
+};
+
+static bool kfunc_not_supported;
+
+static int libbpf_print_cb(enum libbpf_print_level level, const char *fmt,
+			   va_list args)
+{
+	if (strcmp(fmt, "libbpf: extern (func ksym) '%s': not found in kernel or module BTFs\n"))
+		return 0;
+
+	if (strcmp(va_arg(args, char *), "bpf_verify_pkcs7_signature"))
+		return 0;
+
+	kfunc_not_supported = true;
+	return 0;
+}
+
+static void verify_fail(const char *prog_name, const char *expected_err_msg)
+{
+	struct test_kfunc_dynptr_param *skel;
+	LIBBPF_OPTS(bpf_object_open_opts, opts);
+	libbpf_print_fn_t old_print_cb;
+	struct bpf_program *prog;
+	int err;
+
+	opts.kernel_log_buf = obj_log_buf;
+	opts.kernel_log_size = log_buf_sz;
+	opts.kernel_log_level = 1;
+
+	skel = test_kfunc_dynptr_param__open_opts(&opts);
+	if (!ASSERT_OK_PTR(skel, "test_kfunc_dynptr_param__open_opts"))
+		goto cleanup;
+
+	prog = bpf_object__find_program_by_name(skel->obj, prog_name);
+	if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
+		goto cleanup;
+
+	bpf_program__set_autoload(prog, true);
+
+	bpf_map__set_max_entries(skel->maps.ringbuf, getpagesize());
+
+	kfunc_not_supported = false;
+
+	old_print_cb = libbpf_set_print(libbpf_print_cb);
+	err = test_kfunc_dynptr_param__load(skel);
+	libbpf_set_print(old_print_cb);
+
+	if (err < 0 && kfunc_not_supported) {
+		fprintf(stderr,
+		  "%s:SKIP:bpf_verify_pkcs7_signature() kfunc not supported\n",
+		  __func__);
+		test__skip();
+		goto cleanup;
+	}
+
+	if (!ASSERT_ERR(err, "unexpected load success"))
+		goto cleanup;
+
+	if (!ASSERT_OK_PTR(strstr(obj_log_buf, expected_err_msg), "expected_err_msg")) {
+		fprintf(stderr, "Expected err_msg: %s\n", expected_err_msg);
+		fprintf(stderr, "Verifier output: %s\n", obj_log_buf);
+	}
+
+cleanup:
+	test_kfunc_dynptr_param__destroy(skel);
+}
+
+static void verify_success(const char *prog_name, int expected_runtime_err)
+{
+	struct test_kfunc_dynptr_param *skel;
+	libbpf_print_fn_t old_print_cb;
+	struct bpf_program *prog;
+	struct bpf_link *link;
+	__u32 next_id;
+	int err;
+
+	skel = test_kfunc_dynptr_param__open();
+	if (!ASSERT_OK_PTR(skel, "test_kfunc_dynptr_param__open"))
+		return;
+
+	skel->bss->pid = getpid();
+
+	bpf_map__set_max_entries(skel->maps.ringbuf, getpagesize());
+
+	kfunc_not_supported = false;
+
+	old_print_cb = libbpf_set_print(libbpf_print_cb);
+	err = test_kfunc_dynptr_param__load(skel);
+	libbpf_set_print(old_print_cb);
+
+	if (err < 0 && kfunc_not_supported) {
+		fprintf(stderr,
+		  "%s:SKIP:bpf_verify_pkcs7_signature() kfunc not supported\n",
+		  __func__);
+		test__skip();
+		goto cleanup;
+	}
+
+	if (!ASSERT_OK(err, "test_kfunc_dynptr_param__load"))
+		goto cleanup;
+
+	prog = bpf_object__find_program_by_name(skel->obj, prog_name);
+	if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
+		goto cleanup;
+
+	link = bpf_program__attach(prog);
+	if (!ASSERT_OK_PTR(link, "bpf_program__attach"))
+		goto cleanup;
+
+	err = bpf_prog_get_next_id(0, &next_id);
+
+	bpf_link__destroy(link);
+
+	if (!ASSERT_OK(err, "bpf_prog_get_next_id"))
+		goto cleanup;
+
+	ASSERT_EQ(skel->bss->err, expected_runtime_err, "err");
+
+cleanup:
+	test_kfunc_dynptr_param__destroy(skel);
+}
+
+void test_kfunc_dynptr_param(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(kfunc_dynptr_tests); i++) {
+		if (!test__start_subtest(kfunc_dynptr_tests[i].prog_name))
+			continue;
+
+		if (kfunc_dynptr_tests[i].expected_verifier_err_msg)
+			verify_fail(kfunc_dynptr_tests[i].prog_name,
+			  kfunc_dynptr_tests[i].expected_verifier_err_msg);
+		else
+			verify_success(kfunc_dynptr_tests[i].prog_name,
+				kfunc_dynptr_tests[i].expected_runtime_err);
+	}
+}
diff --git a/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c b/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c
new file mode 100644
index 000000000000..ce39d096bba3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c
@@ -0,0 +1,94 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright (C) 2022 Huawei Technologies Duesseldorf GmbH
+ *
+ * Author: Roberto Sassu <roberto.sassu@huawei.com>
+ */
+
+#include "vmlinux.h"
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+extern struct bpf_key *bpf_lookup_system_key(__u64 id) __ksym;
+extern void bpf_key_put(struct bpf_key *key) __ksym;
+extern int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_ptr,
+				      struct bpf_dynptr *sig_ptr,
+				      struct bpf_key *trusted_keyring) __ksym;
+
+struct {
+	__uint(type, BPF_MAP_TYPE_RINGBUF);
+} ringbuf SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, __u32);
+} array_map SEC(".maps");
+
+int err, pid;
+
+char _license[] SEC("license") = "GPL";
+
+SEC("?lsm.s/bpf")
+int BPF_PROG(dynptr_type_not_supp, int cmd, union bpf_attr *attr,
+	     unsigned int size)
+{
+	char write_data[64] = "hello there, world!!";
+	struct bpf_dynptr ptr;
+
+	bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(write_data), 0, &ptr);
+
+	return bpf_verify_pkcs7_signature(&ptr, &ptr, NULL);
+}
+
+SEC("?lsm.s/bpf")
+int BPF_PROG(not_valid_dynptr, int cmd, union bpf_attr *attr, unsigned int size)
+{
+	unsigned long val;
+
+	return bpf_verify_pkcs7_signature((struct bpf_dynptr *)&val,
+					  (struct bpf_dynptr *)&val, NULL);
+}
+
+SEC("?lsm.s/bpf")
+int BPF_PROG(not_ptr_to_stack, int cmd, union bpf_attr *attr, unsigned int size)
+{
+	unsigned long val;
+
+	return bpf_verify_pkcs7_signature((struct bpf_dynptr *)val,
+					  (struct bpf_dynptr *)val, NULL);
+}
+
+SEC("lsm.s/bpf")
+int BPF_PROG(dynptr_data_null, int cmd, union bpf_attr *attr, unsigned int size)
+{
+	struct bpf_key *trusted_keyring;
+	struct bpf_dynptr ptr;
+	__u32 *value;
+	int ret, zero = 0;
+
+	if (bpf_get_current_pid_tgid() >> 32 != pid)
+		return 0;
+
+	value = bpf_map_lookup_elem(&array_map, &zero);
+	if (!value)
+		return 0;
+
+	/* Pass invalid flags. */
+	ret = bpf_dynptr_from_mem(value, sizeof(*value), ((__u64)~0ULL), &ptr);
+	if (ret != -EINVAL)
+		return 0;
+
+	trusted_keyring = bpf_lookup_system_key(0);
+	if (!trusted_keyring)
+		return 0;
+
+	err = bpf_verify_pkcs7_signature(&ptr, &ptr, trusted_keyring);
+
+	bpf_key_put(trusted_keyring);
+
+	return 0;
+}
-- 
cgit v1.2.3


From b06b45e82b59b69f5ac6b3916ac5dbd0294efc95 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Wed, 21 Sep 2022 18:48:27 +0200
Subject: selftests/bpf: add tests for bpf_ct_set_nat_info kfunc

Introduce self-tests for bpf_ct_set_nat_info kfunc used to set the
source or destination nat addresses/ports.

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Link: https://lore.kernel.org/r/803e33294e247744d466943105879414344d3235.1663778601.git.lorenzo@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/config              |  1 +
 tools/testing/selftests/bpf/prog_tests/bpf_nf.c | 10 ++++++---
 tools/testing/selftests/bpf/progs/test_bpf_nf.c | 27 +++++++++++++++++++++++++
 3 files changed, 35 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 905a9be8d0a2..9213565c0311 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -63,6 +63,7 @@ CONFIG_NF_CONNTRACK=y
 CONFIG_NF_CONNTRACK_MARK=y
 CONFIG_NF_DEFRAG_IPV4=y
 CONFIG_NF_DEFRAG_IPV6=y
+CONFIG_NF_NAT=y
 CONFIG_RC_CORE=y
 CONFIG_SECURITY=y
 CONFIG_SECURITYFS=y
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
index 0677a51694c9..8a838ea8bdf3 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
@@ -26,7 +26,10 @@ enum {
 	TEST_TC_BPF,
 };
 
-#define TIMEOUT_MS 3000
+#define TIMEOUT_MS		3000
+#define IPS_STATUS_MASK		(IPS_CONFIRMED | IPS_SEEN_REPLY | \
+				 IPS_SRC_NAT_DONE | IPS_DST_NAT_DONE | \
+				 IPS_SRC_NAT | IPS_DST_NAT)
 
 static int connect_to_server(int srv_fd)
 {
@@ -114,10 +117,11 @@ static void test_bpf_nf_ct(int mode)
 	ASSERT_GT(skel->bss->test_delta_timeout, 8, "Test for min ct timeout update");
 	ASSERT_LE(skel->bss->test_delta_timeout, 10, "Test for max ct timeout update");
 	ASSERT_EQ(skel->bss->test_insert_lookup_mark, 77, "Test for insert and lookup mark value");
-	ASSERT_EQ(skel->bss->test_status, IPS_CONFIRMED | IPS_SEEN_REPLY,
-		  "Test for ct status update ");
+	ASSERT_EQ(skel->bss->test_status, IPS_STATUS_MASK, "Test for ct status update ");
 	ASSERT_EQ(skel->data->test_exist_lookup, 0, "Test existing connection lookup");
 	ASSERT_EQ(skel->bss->test_exist_lookup_mark, 43, "Test existing connection lookup ctmark");
+	ASSERT_EQ(skel->data->test_snat_addr, 0, "Test for source natting");
+	ASSERT_EQ(skel->data->test_dnat_addr, 0, "Test for destination natting");
 end:
 	if (srv_client_fd != -1)
 		close(srv_client_fd);
diff --git a/tools/testing/selftests/bpf/progs/test_bpf_nf.c b/tools/testing/selftests/bpf/progs/test_bpf_nf.c
index 88842da86ddc..227e85e85dda 100644
--- a/tools/testing/selftests/bpf/progs/test_bpf_nf.c
+++ b/tools/testing/selftests/bpf/progs/test_bpf_nf.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <vmlinux.h>
 #include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
 
 #define EAFNOSUPPORT 97
 #define EPROTO 71
@@ -24,6 +25,8 @@ int test_succ_lookup = -ENOENT;
 u32 test_delta_timeout = 0;
 u32 test_status = 0;
 u32 test_insert_lookup_mark = 0;
+int test_snat_addr = -EINVAL;
+int test_dnat_addr = -EINVAL;
 __be32 saddr = 0;
 __be16 sport = 0;
 __be32 daddr = 0;
@@ -54,6 +57,8 @@ void bpf_ct_set_timeout(struct nf_conn *, u32) __ksym;
 int bpf_ct_change_timeout(struct nf_conn *, u32) __ksym;
 int bpf_ct_set_status(struct nf_conn *, u32) __ksym;
 int bpf_ct_change_status(struct nf_conn *, u32) __ksym;
+int bpf_ct_set_nat_info(struct nf_conn *, union nf_inet_addr *,
+			int port, enum nf_nat_manip_type) __ksym;
 
 static __always_inline void
 nf_ct_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32,
@@ -141,11 +146,22 @@ nf_ct_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32,
 	ct = alloc_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def,
 		      sizeof(opts_def));
 	if (ct) {
+		__u16 sport = bpf_get_prandom_u32();
+		__u16 dport = bpf_get_prandom_u32();
+		union nf_inet_addr saddr = {};
+		union nf_inet_addr daddr = {};
 		struct nf_conn *ct_ins;
 
 		bpf_ct_set_timeout(ct, 10000);
 		ct->mark = 77;
 
+		/* snat */
+		saddr.ip = bpf_get_prandom_u32();
+		bpf_ct_set_nat_info(ct, &saddr, sport, NF_NAT_MANIP_SRC);
+		/* dnat */
+		daddr.ip = bpf_get_prandom_u32();
+		bpf_ct_set_nat_info(ct, &daddr, dport, NF_NAT_MANIP_DST);
+
 		ct_ins = bpf_ct_insert_entry(ct);
 		if (ct_ins) {
 			struct nf_conn *ct_lk;
@@ -153,6 +169,17 @@ nf_ct_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32,
 			ct_lk = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4),
 					  &opts_def, sizeof(opts_def));
 			if (ct_lk) {
+				struct nf_conntrack_tuple *tuple;
+
+				/* check snat and dnat addresses */
+				tuple = &ct_lk->tuplehash[IP_CT_DIR_REPLY].tuple;
+				if (tuple->dst.u3.ip == saddr.ip &&
+				    tuple->dst.u.all == bpf_htons(sport))
+					test_snat_addr = 0;
+				if (tuple->src.u3.ip == daddr.ip &&
+				    tuple->src.u.all == bpf_htons(dport))
+					test_dnat_addr = 0;
+
 				/* update ct entry timeout */
 				bpf_ct_change_timeout(ct_lk, 10000);
 				test_delta_timeout = ct_lk->timeout - bpf_jiffies64();
-- 
cgit v1.2.3


From f338ac9105679df504c3809784f0716c25e87b31 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 21 Sep 2022 09:42:51 -0700
Subject: selftests/bpf: fix double bpf_object__close() in veristate

bpf_object__close(obj) is called twice for BPF object files with single
BPF program in it. This causes crash. Fix this by not calling
bpf_object__close() unnecessarily.

Fixes: c8bc5e050976 ("selftests/bpf: Add veristat tool for mass-verifying BPF object files")
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20220921164254.3630690-2-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/veristat.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c
index 39e6dc41e504..c0c8a65cda52 100644
--- a/tools/testing/selftests/bpf/veristat.c
+++ b/tools/testing/selftests/bpf/veristat.c
@@ -300,7 +300,6 @@ static int process_obj(const char *filename)
 		prog = bpf_object__next_program(obj, NULL);
 		bpf_program__set_autoload(prog, true);
 		process_prog(filename, obj, prog);
-		bpf_object__close(obj);
 		goto cleanup;
 	}
 
-- 
cgit v1.2.3


From e5eb08d8fe469c0da8643042893a0b7481807443 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 21 Sep 2022 09:42:52 -0700
Subject: selftests/bpf: add CSV output mode for veristat

Teach veristat to output results as CSV table for easier programmatic
processing. Change what was --output/-o argument to now be --emit/-e.
And then use --output-format/-o <fmt> to specify output format.
Currently "table" and "csv" is supported, table being default.

For CSV output mode veristat is using spec identifiers as column names.
E.g., instead of "Total states" veristat uses "total_states" as a CSV
header name.

Internally veristat recognizes three formats, one of them
(RESFMT_TABLE_CALCLEN) is a special format instructing veristat to
calculate column widths for table output. This felt a bit cleaner and
more uniform than either creating separate functions just for this.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20220921164254.3630690-3-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/veristat.c | 111 ++++++++++++++++++++++-----------
 1 file changed, 75 insertions(+), 36 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c
index c0c8a65cda52..0472bfae3c9d 100644
--- a/tools/testing/selftests/bpf/veristat.c
+++ b/tools/testing/selftests/bpf/veristat.c
@@ -46,10 +46,17 @@ struct stat_specs {
 	int lens[ALL_STATS_CNT];
 };
 
+enum resfmt {
+	RESFMT_TABLE,
+	RESFMT_TABLE_CALCLEN, /* fake format to pre-calculate table's column widths */
+	RESFMT_CSV,
+};
+
 static struct env {
 	char **filenames;
 	int filename_cnt;
 	bool verbose;
+	enum resfmt out_fmt;
 
 	struct verif_stats *prog_stats;
 	int prog_stat_cnt;
@@ -78,8 +85,9 @@ const char argp_program_doc[] =
 static const struct argp_option opts[] = {
 	{ NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help" },
 	{ "verbose", 'v', NULL, 0, "Verbose mode" },
-	{ "output", 'o', "SPEC", 0, "Specify output stats" },
+	{ "emit", 'e', "SPEC", 0, "Specify stats to be emitted" },
 	{ "sort", 's', "SPEC", 0, "Specify sort order" },
+	{ "output-format", 'o', "FMT", 0, "Result output format (table, csv), default is table." },
 	{},
 };
 
@@ -97,7 +105,7 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
 	case 'v':
 		env.verbose = true;
 		break;
-	case 'o':
+	case 'e':
 		err = parse_stats(arg, &env.output_spec);
 		if (err)
 			return err;
@@ -107,6 +115,16 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
 		if (err)
 			return err;
 		break;
+	case 'o':
+		if (strcmp(arg, "table") == 0) {
+			env.out_fmt = RESFMT_TABLE;
+		} else if (strcmp(arg, "csv") == 0) {
+			env.out_fmt = RESFMT_CSV;
+		} else {
+			fprintf(stderr, "Unrecognized output format '%s'\n", arg);
+			return -EINVAL;
+		}
+		break;
 	case ARGP_KEY_ARG:
 		tmp = realloc(env.filenames, (env.filename_cnt + 1) * sizeof(*env.filenames));
 		if (!tmp)
@@ -147,7 +165,7 @@ static struct stat_def {
 	[FILE_NAME] = { "File", {"file_name", "filename", "file"}, true /* asc */ },
 	[PROG_NAME] = { "Program", {"prog_name", "progname", "prog"}, true /* asc */ },
 	[VERDICT] = { "Verdict", {"verdict"}, true /* asc: failure, success */ },
-	[DURATION] = { "Duration, us", {"duration", "dur"}, },
+	[DURATION] = { "Duration (us)", {"duration", "dur"}, },
 	[TOTAL_INSNS] = { "Total insns", {"total_insns", "insns"}, },
 	[TOTAL_STATES] = { "Total states", {"total_states", "states"}, },
 	[PEAK_STATES] = { "Peak states", {"peak_states"}, },
@@ -385,7 +403,21 @@ static int cmp_prog_stats(const void *v1, const void *v2)
 #define HEADER_CHAR '-'
 #define COLUMN_SEP "  "
 
-static void output_headers(bool calc_len)
+static void output_header_underlines(void)
+{
+	int i, j, len;
+
+	for (i = 0; i < env.output_spec.spec_cnt; i++) {
+		len = env.output_spec.lens[i];
+
+		printf("%s", i == 0 ? "" : COLUMN_SEP);
+		for (j = 0; j < len; j++)
+			printf("%c", HEADER_CHAR);
+	}
+	printf("\n");
+}
+
+static void output_headers(enum resfmt fmt)
 {
 	int i, len;
 
@@ -393,34 +425,30 @@ static void output_headers(bool calc_len)
 		int id = env.output_spec.ids[i];
 		int *max_len = &env.output_spec.lens[i];
 
-		if (calc_len) {
+		switch (fmt) {
+		case RESFMT_TABLE_CALCLEN:
 			len = snprintf(NULL, 0, "%s", stat_defs[id].header);
 			if (len > *max_len)
 				*max_len = len;
-		} else {
+			break;
+		case RESFMT_TABLE:
 			printf("%s%-*s", i == 0 ? "" : COLUMN_SEP,  *max_len, stat_defs[id].header);
+			if (i == env.output_spec.spec_cnt - 1)
+				printf("\n");
+			break;
+		case RESFMT_CSV:
+			printf("%s%s", i == 0 ? "" : ",", stat_defs[id].names[0]);
+			if (i == env.output_spec.spec_cnt - 1)
+				printf("\n");
+			break;
 		}
 	}
 
-	if (!calc_len)
-		printf("\n");
+	if (fmt == RESFMT_TABLE)
+		output_header_underlines();
 }
 
-static void output_header_underlines(void)
-{
-	int i, j, len;
-
-	for (i = 0; i < env.output_spec.spec_cnt; i++) {
-		len = env.output_spec.lens[i];
-
-		printf("%s", i == 0 ? "" : COLUMN_SEP);
-		for (j = 0; j < len; j++)
-			printf("%c", HEADER_CHAR);
-	}
-	printf("\n");
-}
-
-static void output_stats(const struct verif_stats *s, bool calc_len)
+static void output_stats(const struct verif_stats *s, enum resfmt fmt, bool last)
 {
 	int i;
 
@@ -453,23 +481,36 @@ static void output_stats(const struct verif_stats *s, bool calc_len)
 			exit(1);
 		}
 
-		if (calc_len) {
+		switch (fmt) {
+		case RESFMT_TABLE_CALCLEN:
 			if (str)
 				len = snprintf(NULL, 0, "%s", str);
 			else
 				len = snprintf(NULL, 0, "%ld", val);
 			if (len > *max_len)
 				*max_len = len;
-		} else {
+			break;
+		case RESFMT_TABLE:
 			if (str)
 				printf("%s%-*s", i == 0 ? "" : COLUMN_SEP, *max_len, str);
 			else
 				printf("%s%*ld", i == 0 ? "" : COLUMN_SEP,  *max_len, val);
+			if (i == env.output_spec.spec_cnt - 1)
+				printf("\n");
+			break;
+		case RESFMT_CSV:
+			if (str)
+				printf("%s%s", i == 0 ? "" : ",", str);
+			else
+				printf("%s%ld", i == 0 ? "" : ",", val);
+			if (i == env.output_spec.spec_cnt - 1)
+				printf("\n");
+			break;
 		}
 	}
 
-	if (!calc_len)
-		printf("\n");
+	if (last && fmt == RESFMT_TABLE)
+		output_header_underlines();
 }
 
 int main(int argc, char **argv)
@@ -505,20 +546,18 @@ int main(int argc, char **argv)
 
 	qsort(env.prog_stats, env.prog_stat_cnt, sizeof(*env.prog_stats), cmp_prog_stats);
 
-	/* calculate column widths */
-	output_headers(true);
-	for (i = 0; i < env.prog_stat_cnt; i++) {
-		output_stats(&env.prog_stats[i], true);
+	if (env.out_fmt == RESFMT_TABLE) {
+		/* calculate column widths */
+		output_headers(RESFMT_TABLE_CALCLEN);
+		for (i = 0; i < env.prog_stat_cnt; i++)
+			output_stats(&env.prog_stats[i], RESFMT_TABLE_CALCLEN, false);
 	}
 
 	/* actually output the table */
-	output_headers(false);
-	output_header_underlines();
+	output_headers(env.out_fmt);
 	for (i = 0; i < env.prog_stat_cnt; i++) {
-		output_stats(&env.prog_stats[i], false);
+		output_stats(&env.prog_stats[i], env.out_fmt, i == env.prog_stat_cnt - 1);
 	}
-	output_header_underlines();
-	printf("\n");
 
 	printf("Done. Processed %d object files, %d programs.\n",
 	       env.filename_cnt, env.prog_stat_cnt);
-- 
cgit v1.2.3


From 394169b079b558cf91a9c23ffb6b55c14cd927e1 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 21 Sep 2022 09:42:53 -0700
Subject: selftests/bpf: add comparison mode to veristat

Add ability to compare and contrast two veristat runs, previously
recorded with veristat using CSV output format.

When veristat is called with -C (--compare) flag, veristat expects
exactly two input files specified, both should be in CSV format.
Expectation is that it's output from previous veristat runs, but as long
as column names and formats match, it should just work. First CSV file
is designated as a "baseline" provided, and the second one is
comparison (experiment) data set. Establishing baseline matters later
when calculating difference percentages, see below.

Veristat parses these two CSV files and "reconstructs" verifier stats
(it could be just a subset of all possible stats). File and program
names are mandatory as they are used as joining key (these two "stats"
are designated as "key stats" in the code).

Veristat currently enforces that the set of stats recorded in both CSV
has to exactly match, down to exact order. This is just a simplifying
condition which can be lifted with a bit of additional pre-processing to
reorded stat specs internally, which I didn't bother doing, yet.

For all the non-key stats, veristat will output three columns: one for
baseline data, one for comparison data, and one with an absolute and
relative percentage difference. If either baseline or comparison values
are missing (that is, respective CSV file doesn't have a row with
*exactly* matching file and program name), those values are assumed to
be empty or zero. In such case relative percentages are forced to +100%
or -100% output, for consistency with a typical case.

Veristat's -e (--emit) and -s (--sort) specs still apply, so even if CSV
contains lots of stats, user can request to compare only a subset of
them (and specify desired column order as well). Similarly, both CSV and
human-readable table output is honored. Note that input is currently
always expected to be CSV.

Here's an example shell session, recording data for biosnoop tool on two
different kernels and comparing them afterwards, outputting data in table
format.

  # on slightly older production kernel
  $ sudo ./veristat biosnoop_bpf.o
  File            Program                   Verdict  Duration (us)  Total insns  Total states  Peak states
  --------------  ------------------------  -------  -------------  -----------  ------------  -----------
  biosnoop_bpf.o  blk_account_io_merge_bio  success             37           24             1            1
  biosnoop_bpf.o  blk_account_io_start      failure              0            0             0            0
  biosnoop_bpf.o  block_rq_complete         success             76          104             6            6
  biosnoop_bpf.o  block_rq_insert           success             83           85             7            7
  biosnoop_bpf.o  block_rq_issue            success             79           85             7            7
  --------------  ------------------------  -------  -------------  -----------  ------------  -----------
  Done. Processed 1 object files, 5 programs.
  $ sudo ./veristat ~/local/tmp/fbcode-bpf-objs/biosnoop_bpf.o -o csv > baseline.csv
  $ cat baseline.csv
  file_name,prog_name,verdict,duration,total_insns,total_states,peak_states
  biosnoop_bpf.o,blk_account_io_merge_bio,success,36,24,1,1
  biosnoop_bpf.o,blk_account_io_start,failure,0,0,0,0
  biosnoop_bpf.o,block_rq_complete,success,82,104,6,6
  biosnoop_bpf.o,block_rq_insert,success,78,85,7,7
  biosnoop_bpf.o,block_rq_issue,success,74,85,7,7

  # on latest bpf-next kernel
  $ sudo ./veristat biosnoop_bpf.o
  File            Program                   Verdict  Duration (us)  Total insns  Total states  Peak states
  --------------  ------------------------  -------  -------------  -----------  ------------  -----------
  biosnoop_bpf.o  blk_account_io_merge_bio  success             31           24             1            1
  biosnoop_bpf.o  blk_account_io_start      failure              0            0             0            0
  biosnoop_bpf.o  block_rq_complete         success             76          104             6            6
  biosnoop_bpf.o  block_rq_insert           success             83           91             7            7
  biosnoop_bpf.o  block_rq_issue            success             74           91             7            7
  --------------  ------------------------  -------  -------------  -----------  ------------  -----------
  Done. Processed 1 object files, 5 programs.
  $ sudo ./veristat biosnoop_bpf.o -o csv > comparison.csv
  $ cat comparison.csv
  file_name,prog_name,verdict,duration,total_insns,total_states,peak_states
  biosnoop_bpf.o,blk_account_io_merge_bio,success,71,24,1,1
  biosnoop_bpf.o,blk_account_io_start,failure,0,0,0,0
  biosnoop_bpf.o,block_rq_complete,success,82,104,6,6
  biosnoop_bpf.o,block_rq_insert,success,83,91,7,7
  biosnoop_bpf.o,block_rq_issue,success,87,91,7,7

  # now let's compare with human-readable output (note that no sudo needed)
  # we also ignore verification duration in this case to shortned output
  $ ./veristat -C baseline.csv comparison.csv -e file,prog,verdict,insns
  File            Program                   Verdict (A)  Verdict (B)  Verdict (DIFF)  Total insns (A)  Total insns (B)  Total insns (DIFF)
  --------------  ------------------------  -----------  -----------  --------------  ---------------  ---------------  ------------------
  biosnoop_bpf.o  blk_account_io_merge_bio  success      success      MATCH                        24               24         +0 (+0.00%)
  biosnoop_bpf.o  blk_account_io_start      failure      failure      MATCH                         0                0       +0 (+100.00%)
  biosnoop_bpf.o  block_rq_complete         success      success      MATCH                       104              104         +0 (+0.00%)
  biosnoop_bpf.o  block_rq_insert           success      success      MATCH                        91               85         -6 (-6.59%)
  biosnoop_bpf.o  block_rq_issue            success      success      MATCH                        91               85         -6 (-6.59%)
  --------------  ------------------------  -----------  -----------  --------------  ---------------  ---------------  ------------------

While not particularly exciting example (it turned out to be kind of hard to
quickly find a nice example with significant difference just because of kernel
version bump), it should demonstrate main features.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20220921164254.3630690-4-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/veristat.c | 543 +++++++++++++++++++++++++++++----
 1 file changed, 492 insertions(+), 51 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c
index 0472bfae3c9d..c6837bac357f 100644
--- a/tools/testing/selftests/bpf/veristat.c
+++ b/tools/testing/selftests/bpf/veristat.c
@@ -43,7 +43,7 @@ struct stat_specs {
 	int spec_cnt;
 	enum stat_id ids[ALL_STATS_CNT];
 	bool asc[ALL_STATS_CNT];
-	int lens[ALL_STATS_CNT];
+	int lens[ALL_STATS_CNT * 3]; /* 3x for comparison mode */
 };
 
 enum resfmt {
@@ -57,16 +57,20 @@ static struct env {
 	int filename_cnt;
 	bool verbose;
 	enum resfmt out_fmt;
+	bool comparison_mode;
 
 	struct verif_stats *prog_stats;
 	int prog_stat_cnt;
 
+	/* baseline_stats is allocated and used only in comparsion mode */
+	struct verif_stats *baseline_stats;
+	int baseline_stat_cnt;
+
 	struct stat_specs output_spec;
 	struct stat_specs sort_spec;
 } env;
 
-static int libbpf_print_fn(enum libbpf_print_level level,
-		    const char *format, va_list args)
+static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
 {
 	if (!env.verbose)
 		return 0;
@@ -78,9 +82,10 @@ static int libbpf_print_fn(enum libbpf_print_level level,
 const char *argp_program_version = "veristat";
 const char *argp_program_bug_address = "<bpf@vger.kernel.org>";
 const char argp_program_doc[] =
-"veristat    BPF verifier stats collection tool.\n"
+"veristat    BPF verifier stats collection and comparison tool.\n"
 "\n"
-"USAGE: veristat <obj-file> [<obj-file>...]\n";
+"USAGE: veristat <obj-file> [<obj-file>...]\n"
+"   OR: veristat -C <baseline.csv> <comparison.csv>\n";
 
 static const struct argp_option opts[] = {
 	{ NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help" },
@@ -88,6 +93,7 @@ static const struct argp_option opts[] = {
 	{ "emit", 'e', "SPEC", 0, "Specify stats to be emitted" },
 	{ "sort", 's', "SPEC", 0, "Specify sort order" },
 	{ "output-format", 'o', "FMT", 0, "Result output format (table, csv), default is table." },
+	{ "compare", 'C', NULL, 0, "Comparison mode" },
 	{},
 };
 
@@ -125,6 +131,9 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
 			return -EINVAL;
 		}
 		break;
+	case 'C':
+		env.comparison_mode = true;
+		break;
 	case ARGP_KEY_ARG:
 		tmp = realloc(env.filenames, (env.filename_cnt + 1) * sizeof(*env.filenames));
 		if (!tmp)
@@ -141,6 +150,12 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
 	return 0;
 }
 
+static const struct argp argp = {
+	.options = opts,
+	.parser = parse_arg,
+	.doc = argp_program_doc,
+};
+
 static const struct stat_specs default_output_spec = {
 	.spec_cnt = 7,
 	.ids = {
@@ -219,6 +234,20 @@ static int parse_stats(const char *stats_str, struct stat_specs *specs)
 	return 0;
 }
 
+static void free_verif_stats(struct verif_stats *stats, size_t stat_cnt)
+{
+	int i;
+
+	if (!stats)
+		return;
+
+	for (i = 0; i < stat_cnt; i++) {
+		free(stats[i].file_name);
+		free(stats[i].prog_name);
+	}
+	free(stats);
+}
+
 static char verif_log_buf[64 * 1024];
 
 static int parse_verif_log(const char *buf, size_t buf_sz, struct verif_stats *s)
@@ -448,6 +477,33 @@ static void output_headers(enum resfmt fmt)
 		output_header_underlines();
 }
 
+static void prepare_value(const struct verif_stats *s, enum stat_id id,
+			  const char **str, long *val)
+{
+	switch (id) {
+	case FILE_NAME:
+		*str = s->file_name;
+		break;
+	case PROG_NAME:
+		*str = s->prog_name;
+		break;
+	case VERDICT:
+		*str = s->stats[VERDICT] ? "success" : "failure";
+		break;
+	case DURATION:
+	case TOTAL_INSNS:
+	case TOTAL_STATES:
+	case PEAK_STATES:
+	case MAX_STATES_PER_INSN:
+	case MARK_READ_MAX_LEN:
+		*val = s->stats[id];
+		break;
+	default:
+		fprintf(stderr, "Unrecognized stat #%d\n", id);
+		exit(1);
+	}
+}
+
 static void output_stats(const struct verif_stats *s, enum resfmt fmt, bool last)
 {
 	int i;
@@ -458,28 +514,7 @@ static void output_stats(const struct verif_stats *s, enum resfmt fmt, bool last
 		const char *str = NULL;
 		long val = 0;
 
-		switch (id) {
-		case FILE_NAME:
-			str = s->file_name;
-			break;
-		case PROG_NAME:
-			str = s->prog_name;
-			break;
-		case VERDICT:
-			str = s->stats[VERDICT] ? "success" : "failure";
-			break;
-		case DURATION:
-		case TOTAL_INSNS:
-		case TOTAL_STATES:
-		case PEAK_STATES:
-		case MAX_STATES_PER_INSN:
-		case MARK_READ_MAX_LEN:
-			val = s->stats[id];
-			break;
-		default:
-			fprintf(stderr, "Unrecognized stat #%d\n", id);
-			exit(1);
-		}
+		prepare_value(s, id, &str, &val);
 
 		switch (fmt) {
 		case RESFMT_TABLE_CALCLEN:
@@ -509,38 +544,28 @@ static void output_stats(const struct verif_stats *s, enum resfmt fmt, bool last
 		}
 	}
 
-	if (last && fmt == RESFMT_TABLE)
+	if (last && fmt == RESFMT_TABLE) {
 		output_header_underlines();
+		printf("Done. Processed %d object files, %d programs.\n",
+		       env.filename_cnt, env.prog_stat_cnt);
+	}
 }
 
-int main(int argc, char **argv)
+static int handle_verif_mode(void)
 {
-	static const struct argp argp = {
-		.options = opts,
-		.parser = parse_arg,
-		.doc = argp_program_doc,
-	};
-	int err = 0, i;
-
-	if (argp_parse(&argp, argc, argv, 0, NULL, NULL))
-		return 1;
+	int i, err;
 
 	if (env.filename_cnt == 0) {
 		fprintf(stderr, "Please provide path to BPF object file!\n");
 		argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
-		return 1;
+		return -EINVAL;
 	}
 
-	if (env.output_spec.spec_cnt == 0)
-		env.output_spec = default_output_spec;
-	if (env.sort_spec.spec_cnt == 0)
-		env.sort_spec = default_sort_spec;
-
 	for (i = 0; i < env.filename_cnt; i++) {
 		err = process_obj(env.filenames[i]);
 		if (err) {
 			fprintf(stderr, "Failed to process '%s': %d\n", env.filenames[i], err);
-			goto cleanup;
+			return err;
 		}
 	}
 
@@ -559,15 +584,431 @@ int main(int argc, char **argv)
 		output_stats(&env.prog_stats[i], env.out_fmt, i == env.prog_stat_cnt - 1);
 	}
 
-	printf("Done. Processed %d object files, %d programs.\n",
-	       env.filename_cnt, env.prog_stat_cnt);
+	return 0;
+}
+
+static int parse_stat_value(const char *str, enum stat_id id, struct verif_stats *st)
+{
+	switch (id) {
+	case FILE_NAME:
+		st->file_name = strdup(str);
+		if (!st->file_name)
+			return -ENOMEM;
+		break;
+	case PROG_NAME:
+		st->prog_name = strdup(str);
+		if (!st->prog_name)
+			return -ENOMEM;
+		break;
+	case VERDICT:
+		if (strcmp(str, "success") == 0) {
+			st->stats[VERDICT] = true;
+		} else if (strcmp(str, "failure") == 0) {
+			st->stats[VERDICT] = false;
+		} else {
+			fprintf(stderr, "Unrecognized verification verdict '%s'\n", str);
+			return -EINVAL;
+		}
+		break;
+	case DURATION:
+	case TOTAL_INSNS:
+	case TOTAL_STATES:
+	case PEAK_STATES:
+	case MAX_STATES_PER_INSN:
+	case MARK_READ_MAX_LEN: {
+		long val;
+		int err, n;
+
+		if (sscanf(str, "%ld %n", &val, &n) != 1 || n != strlen(str)) {
+			err = -errno;
+			fprintf(stderr, "Failed to parse '%s' as integer\n", str);
+			return err;
+		}
+
+		st->stats[id] = val;
+		break;
+	}
+	default:
+		fprintf(stderr, "Unrecognized stat #%d\n", id);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+static int parse_stats_csv(const char *filename, struct stat_specs *specs,
+			   struct verif_stats **statsp, int *stat_cntp)
+{
+	char line[4096];
+	FILE *f;
+	int err = 0;
+	bool header = true;
+
+	f = fopen(filename, "r");
+	if (!f) {
+		err = -errno;
+		fprintf(stderr, "Failed to open '%s': %d\n", filename, err);
+		return err;
+	}
+
+	*stat_cntp = 0;
+
+	while (fgets(line, sizeof(line), f)) {
+		char *input = line, *state = NULL, *next;
+		struct verif_stats *st = NULL;
+		int col = 0;
+
+		if (!header) {
+			void *tmp;
+
+			tmp = realloc(*statsp, (*stat_cntp + 1) * sizeof(**statsp));
+			if (!tmp) {
+				err = -ENOMEM;
+				goto cleanup;
+			}
+			*statsp = tmp;
+			st = &(*statsp)[*stat_cntp];
+			*stat_cntp += 1;
+		}
+
+		while ((next = strtok_r(state ? NULL : input, ",\n", &state))) {
+			if (header) {
+				/* for the first line, set up spec stats */
+				err = parse_stat(next, specs);
+				if (err)
+					goto cleanup;
+				continue;
+			}
+
+			/* for all other lines, parse values based on spec */
+			if (col >= specs->spec_cnt) {
+				fprintf(stderr, "Found extraneous column #%d in row #%d of '%s'\n",
+					col, *stat_cntp, filename);
+				err = -EINVAL;
+				goto cleanup;
+			}
+			err = parse_stat_value(next, specs->ids[col], st);
+			if (err)
+				goto cleanup;
+			col++;
+		}
+
+		if (!header && col < specs->spec_cnt) {
+			fprintf(stderr, "Not enough columns in row #%d in '%s'\n",
+				*stat_cntp, filename);
+			err = -EINVAL;
+			goto cleanup;
+		}
+
+		header = false;
+	}
+
+	if (!feof(f)) {
+		err = -errno;
+		fprintf(stderr, "Failed I/O for '%s': %d\n", filename, err);
+	}
 
 cleanup:
-	for (i = 0; i < env.prog_stat_cnt; i++) {
-		free(env.prog_stats[i].file_name);
-		free(env.prog_stats[i].prog_name);
+	fclose(f);
+	return err;
+}
+
+/* empty/zero stats for mismatched rows */
+static const struct verif_stats fallback_stats = { .file_name = "", .prog_name = "" };
+
+static bool is_key_stat(enum stat_id id)
+{
+	return id == FILE_NAME || id == PROG_NAME;
+}
+
+static void output_comp_header_underlines(void)
+{
+	int i, j, k;
+
+	for (i = 0; i < env.output_spec.spec_cnt; i++) {
+		int id = env.output_spec.ids[i];
+		int max_j = is_key_stat(id) ? 1 : 3;
+
+		for (j = 0; j < max_j; j++) {
+			int len = env.output_spec.lens[3 * i + j];
+
+			printf("%s", i + j == 0 ? "" : COLUMN_SEP);
+
+			for (k = 0; k < len; k++)
+				printf("%c", HEADER_CHAR);
+		}
+	}
+	printf("\n");
+}
+
+static void output_comp_headers(enum resfmt fmt)
+{
+	static const char *table_sfxs[3] = {" (A)", " (B)", " (DIFF)"};
+	static const char *name_sfxs[3] = {"_base", "_comp", "_diff"};
+	int i, j, len;
+
+	for (i = 0; i < env.output_spec.spec_cnt; i++) {
+		int id = env.output_spec.ids[i];
+		/* key stats don't have A/B/DIFF columns, they are common for both data sets */
+		int max_j = is_key_stat(id) ? 1 : 3;
+
+		for (j = 0; j < max_j; j++) {
+			int *max_len = &env.output_spec.lens[3 * i + j];
+			bool last = (i == env.output_spec.spec_cnt - 1) && (j == max_j - 1);
+			const char *sfx;
+
+			switch (fmt) {
+			case RESFMT_TABLE_CALCLEN:
+				sfx = is_key_stat(id) ? "" : table_sfxs[j];
+				len = snprintf(NULL, 0, "%s%s", stat_defs[id].header, sfx);
+				if (len > *max_len)
+					*max_len = len;
+				break;
+			case RESFMT_TABLE:
+				sfx = is_key_stat(id) ? "" : table_sfxs[j];
+				printf("%s%-*s%s", i + j == 0 ? "" : COLUMN_SEP,
+				       *max_len - (int)strlen(sfx), stat_defs[id].header, sfx);
+				if (last)
+					printf("\n");
+				break;
+			case RESFMT_CSV:
+				sfx = is_key_stat(id) ? "" : name_sfxs[j];
+				printf("%s%s%s", i + j == 0 ? "" : ",", stat_defs[id].names[0], sfx);
+				if (last)
+					printf("\n");
+				break;
+			}
+		}
+	}
+
+	if (fmt == RESFMT_TABLE)
+		output_comp_header_underlines();
+}
+
+static void output_comp_stats(const struct verif_stats *base, const struct verif_stats *comp,
+			      enum resfmt fmt, bool last)
+{
+	char base_buf[1024] = {}, comp_buf[1024] = {}, diff_buf[1024] = {};
+	int i;
+
+	for (i = 0; i < env.output_spec.spec_cnt; i++) {
+		int id = env.output_spec.ids[i], len;
+		int *max_len_base = &env.output_spec.lens[3 * i + 0];
+		int *max_len_comp = &env.output_spec.lens[3 * i + 1];
+		int *max_len_diff = &env.output_spec.lens[3 * i + 2];
+		const char *base_str = NULL, *comp_str = NULL;
+		long base_val = 0, comp_val = 0, diff_val = 0;
+
+		prepare_value(base, id, &base_str, &base_val);
+		prepare_value(comp, id, &comp_str, &comp_val);
+
+		/* normalize all the outputs to be in string buffers for simplicity */
+		if (is_key_stat(id)) {
+			/* key stats (file and program name) are always strings */
+			if (base != &fallback_stats)
+				snprintf(base_buf, sizeof(base_buf), "%s", base_str);
+			else
+				snprintf(base_buf, sizeof(base_buf), "%s", comp_str);
+		} else if (base_str) {
+			snprintf(base_buf, sizeof(base_buf), "%s", base_str);
+			snprintf(comp_buf, sizeof(comp_buf), "%s", comp_str);
+			if (strcmp(base_str, comp_str) == 0)
+				snprintf(diff_buf, sizeof(diff_buf), "%s", "MATCH");
+			else
+				snprintf(diff_buf, sizeof(diff_buf), "%s", "MISMATCH");
+		} else {
+			snprintf(base_buf, sizeof(base_buf), "%ld", base_val);
+			snprintf(comp_buf, sizeof(comp_buf), "%ld", comp_val);
+
+			diff_val = comp_val - base_val;
+			if (base == &fallback_stats || comp == &fallback_stats || base_val == 0) {
+				snprintf(diff_buf, sizeof(diff_buf), "%+ld (%+.2lf%%)",
+					 diff_val, comp_val < base_val ? -100.0 : 100.0);
+			} else {
+				snprintf(diff_buf, sizeof(diff_buf), "%+ld (%+.2lf%%)",
+					 diff_val, diff_val * 100.0 / base_val);
+			}
+		}
+
+		switch (fmt) {
+		case RESFMT_TABLE_CALCLEN:
+			len = strlen(base_buf);
+			if (len > *max_len_base)
+				*max_len_base = len;
+			if (!is_key_stat(id)) {
+				len = strlen(comp_buf);
+				if (len > *max_len_comp)
+					*max_len_comp = len;
+				len = strlen(diff_buf);
+				if (len > *max_len_diff)
+					*max_len_diff = len;
+			}
+			break;
+		case RESFMT_TABLE: {
+			/* string outputs are left-aligned, number outputs are right-aligned */
+			const char *fmt = base_str ? "%s%-*s" : "%s%*s";
+
+			printf(fmt, i == 0 ? "" : COLUMN_SEP, *max_len_base, base_buf);
+			if (!is_key_stat(id)) {
+				printf(fmt, COLUMN_SEP, *max_len_comp, comp_buf);
+				printf(fmt, COLUMN_SEP, *max_len_diff, diff_buf);
+			}
+			if (i == env.output_spec.spec_cnt - 1)
+				printf("\n");
+			break;
+		}
+		case RESFMT_CSV:
+			printf("%s%s", i == 0 ? "" : ",", base_buf);
+			if (!is_key_stat(id)) {
+				printf("%s%s", i == 0 ? "" : ",", comp_buf);
+				printf("%s%s", i == 0 ? "" : ",", diff_buf);
+			}
+			if (i == env.output_spec.spec_cnt - 1)
+				printf("\n");
+			break;
+		}
+	}
+
+	if (last && fmt == RESFMT_TABLE)
+		output_comp_header_underlines();
+}
+
+static int cmp_stats_key(const struct verif_stats *base, const struct verif_stats *comp)
+{
+	int r;
+
+	r = strcmp(base->file_name, comp->file_name);
+	if (r != 0)
+		return r;
+	return strcmp(base->prog_name, comp->prog_name);
+}
+
+static int handle_comparison_mode(void)
+{
+	struct stat_specs base_specs = {}, comp_specs = {};
+	enum resfmt cur_fmt;
+	int err, i, j;
+
+	if (env.filename_cnt != 2) {
+		fprintf(stderr, "Comparison mode expects exactly two input CSV files!\n");
+		argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
+		return -EINVAL;
+	}
+
+	err = parse_stats_csv(env.filenames[0], &base_specs,
+			      &env.baseline_stats, &env.baseline_stat_cnt);
+	if (err) {
+		fprintf(stderr, "Failed to parse stats from '%s': %d\n", env.filenames[0], err);
+		return err;
+	}
+	err = parse_stats_csv(env.filenames[1], &comp_specs,
+			      &env.prog_stats, &env.prog_stat_cnt);
+	if (err) {
+		fprintf(stderr, "Failed to parse stats from '%s': %d\n", env.filenames[1], err);
+		return err;
 	}
-	free(env.prog_stats);
+
+	/* To keep it simple we validate that the set and order of stats in
+	 * both CSVs are exactly the same. This can be lifted with a bit more
+	 * pre-processing later.
+	 */
+	if (base_specs.spec_cnt != comp_specs.spec_cnt) {
+		fprintf(stderr, "Number of stats in '%s' and '%s' differs (%d != %d)!\n",
+			env.filenames[0], env.filenames[1],
+			base_specs.spec_cnt, comp_specs.spec_cnt);
+		return -EINVAL;
+	}
+	for (i = 0; i < base_specs.spec_cnt; i++) {
+		if (base_specs.ids[i] != comp_specs.ids[i]) {
+			fprintf(stderr, "Stats composition differs between '%s' and '%s' (%s != %s)!\n",
+				env.filenames[0], env.filenames[1],
+				stat_defs[base_specs.ids[i]].names[0],
+				stat_defs[comp_specs.ids[i]].names[0]);
+			return -EINVAL;
+		}
+	}
+
+	qsort(env.prog_stats, env.prog_stat_cnt, sizeof(*env.prog_stats), cmp_prog_stats);
+	qsort(env.baseline_stats, env.baseline_stat_cnt, sizeof(*env.baseline_stats), cmp_prog_stats);
+
+	/* for human-readable table output we need to do extra pass to
+	 * calculate column widths, so we substitute current output format
+	 * with RESFMT_TABLE_CALCLEN and later revert it back to RESFMT_TABLE
+	 * and do everything again.
+	 */
+	if (env.out_fmt == RESFMT_TABLE)
+		cur_fmt = RESFMT_TABLE_CALCLEN;
+	else
+		cur_fmt = env.out_fmt;
+
+one_more_time:
+	output_comp_headers(cur_fmt);
+
+	/* If baseline and comparison datasets have different subset of rows
+	 * (we match by 'object + prog' as a unique key) then assume
+	 * empty/missing/zero value for rows that are missing in the opposite
+	 * data set
+	 */
+	i = j = 0;
+	while (i < env.baseline_stat_cnt || j < env.prog_stat_cnt) {
+		bool last = (i == env.baseline_stat_cnt - 1) || (j == env.prog_stat_cnt - 1);
+		const struct verif_stats *base, *comp;
+		int r;
+
+		base = i < env.baseline_stat_cnt ? &env.baseline_stats[i] : &fallback_stats;
+		comp = j < env.prog_stat_cnt ? &env.prog_stats[j] : &fallback_stats;
+
+		if (!base->file_name || !base->prog_name) {
+			fprintf(stderr, "Entry #%d in '%s' doesn't have file and/or program name specified!\n",
+				i, env.filenames[0]);
+			return -EINVAL;
+		}
+		if (!comp->file_name || !comp->prog_name) {
+			fprintf(stderr, "Entry #%d in '%s' doesn't have file and/or program name specified!\n",
+				j, env.filenames[1]);
+			return -EINVAL;
+		}
+
+		r = cmp_stats_key(base, comp);
+		if (r == 0) {
+			output_comp_stats(base, comp, cur_fmt, last);
+			i++;
+			j++;
+		} else if (comp == &fallback_stats || r < 0) {
+			output_comp_stats(base, &fallback_stats, cur_fmt, last);
+			i++;
+		} else {
+			output_comp_stats(&fallback_stats, comp, cur_fmt, last);
+			j++;
+		}
+	}
+
+	if (cur_fmt == RESFMT_TABLE_CALCLEN) {
+		cur_fmt = RESFMT_TABLE;
+		goto one_more_time; /* ... this time with feeling */
+	}
+
+	return 0;
+}
+
+int main(int argc, char **argv)
+{
+	int err = 0, i;
+
+	if (argp_parse(&argp, argc, argv, 0, NULL, NULL))
+		return 1;
+
+	if (env.output_spec.spec_cnt == 0)
+		env.output_spec = default_output_spec;
+	if (env.sort_spec.spec_cnt == 0)
+		env.sort_spec = default_sort_spec;
+
+	if (env.comparison_mode)
+		err = handle_comparison_mode();
+	else
+		err = handle_verif_mode();
+
+	free_verif_stats(env.prog_stats, env.prog_stat_cnt);
+	free_verif_stats(env.baseline_stats, env.baseline_stat_cnt);
 	for (i = 0; i < env.filename_cnt; i++)
 		free(env.filenames[i]);
 	free(env.filenames);
-- 
cgit v1.2.3


From bde4a96cdcadc1f9c92cc2715a0022545bfb3201 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Wed, 21 Sep 2022 09:42:54 -0700
Subject: selftests/bpf: add ability to filter programs in veristat

Add -f (--filter) argument which accepts glob-based filters for
narrowing down what BPF object files and programs within them should be
processed by veristat. This filtering applies both to comparison and
main (verification) mode.

Filter can be of two forms:
  - file (object) filter: 'strobemeta*'; in this case all the programs
    within matching files are implicitly allowed (or denied, depending
    if it's positive or negative rule, see below);
  - file and prog filter: 'strobemeta*/*unroll*' will further filter
    programs within matching files to only allow those program names that
    match '*unroll*' glob.

As mentioned, filters can be positive (allowlisting) and negative
(denylisting). Negative filters should start with '!': '!strobemeta*'
will deny any filename which basename starts with "strobemeta".

Further, one extra special syntax is supported to allow more convenient
use in practice. Instead of specifying rule on the command line,
veristat allows to specify file that contains rules, both positive and
negative, one line per one filter. This is achieved with -f @<filepath>
use, where <filepath> points to a text file containing rules (negative
and positive rules can be mixed). For convenience empty lines and lines
starting with '#' are ignored. This feature is useful to have some
pre-canned list of object files and program names that are tested
repeatedly, allowing to check in a list of rules and quickly specify
them on the command line.

As a demonstration (and a short cut for nearest future), create a small
list of "interesting" BPF object files from selftests/bpf and commit it
as veristat.cfg. It currently includes 73 programs, most of which are
the most complex and largest BPF programs in selftests, as judged by
total verified instruction count and verifier states total.

If there is overlap between positive or negative filters, negative
filter takes precedence (denylisting is stronger than allowlisting). If
no allow filter is specified, veristat implicitly assumes '*/*' rule. If
no deny rule is specified, veristat (logically) assumes no negative
filters.

Also note that -f (just like -e and -s) can be specified multiple times
and their effect is cumulative.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20220921164254.3630690-5-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/veristat.c   | 212 ++++++++++++++++++++++++++++++-
 tools/testing/selftests/bpf/veristat.cfg |  17 +++
 2 files changed, 227 insertions(+), 2 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/veristat.cfg

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c
index c6837bac357f..51030234b60a 100644
--- a/tools/testing/selftests/bpf/veristat.c
+++ b/tools/testing/selftests/bpf/veristat.c
@@ -52,6 +52,11 @@ enum resfmt {
 	RESFMT_CSV,
 };
 
+struct filter {
+	char *file_glob;
+	char *prog_glob;
+};
+
 static struct env {
 	char **filenames;
 	int filename_cnt;
@@ -68,6 +73,11 @@ static struct env {
 
 	struct stat_specs output_spec;
 	struct stat_specs sort_spec;
+
+	struct filter *allow_filters;
+	struct filter *deny_filters;
+	int allow_filter_cnt;
+	int deny_filter_cnt;
 } env;
 
 static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
@@ -94,10 +104,13 @@ static const struct argp_option opts[] = {
 	{ "sort", 's', "SPEC", 0, "Specify sort order" },
 	{ "output-format", 'o', "FMT", 0, "Result output format (table, csv), default is table." },
 	{ "compare", 'C', NULL, 0, "Comparison mode" },
+	{ "filter", 'f', "FILTER", 0, "Filter expressions (or @filename for file with expressions)." },
 	{},
 };
 
 static int parse_stats(const char *stats_str, struct stat_specs *specs);
+static int append_filter(struct filter **filters, int *cnt, const char *str);
+static int append_filter_file(const char *path);
 
 static error_t parse_arg(int key, char *arg, struct argp_state *state)
 {
@@ -134,6 +147,18 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
 	case 'C':
 		env.comparison_mode = true;
 		break;
+	case 'f':
+		if (arg[0] == '@')
+			err = append_filter_file(arg + 1);
+		else if (arg[0] == '!')
+			err = append_filter(&env.deny_filters, &env.deny_filter_cnt, arg + 1);
+		else
+			err = append_filter(&env.allow_filters, &env.allow_filter_cnt, arg);
+		if (err) {
+			fprintf(stderr, "Failed to collect program filter expressions: %d\n", err);
+			return err;
+		}
+		break;
 	case ARGP_KEY_ARG:
 		tmp = realloc(env.filenames, (env.filename_cnt + 1) * sizeof(*env.filenames));
 		if (!tmp)
@@ -156,6 +181,150 @@ static const struct argp argp = {
 	.doc = argp_program_doc,
 };
 
+
+/* Adapted from perf/util/string.c */
+static bool glob_matches(const char *str, const char *pat)
+{
+	while (*str && *pat && *pat != '*') {
+		if (*str != *pat)
+			return false;
+		str++;
+		pat++;
+	}
+	/* Check wild card */
+	if (*pat == '*') {
+		while (*pat == '*')
+			pat++;
+		if (!*pat) /* Tail wild card matches all */
+			return true;
+		while (*str)
+			if (glob_matches(str++, pat))
+				return true;
+	}
+	return !*str && !*pat;
+}
+
+static bool should_process_file(const char *filename)
+{
+	int i;
+
+	if (env.deny_filter_cnt > 0) {
+		for (i = 0; i < env.deny_filter_cnt; i++) {
+			if (glob_matches(filename, env.deny_filters[i].file_glob))
+				return false;
+		}
+	}
+
+	if (env.allow_filter_cnt == 0)
+		return true;
+
+	for (i = 0; i < env.allow_filter_cnt; i++) {
+		if (glob_matches(filename, env.allow_filters[i].file_glob))
+			return true;
+	}
+
+	return false;
+}
+
+static bool should_process_prog(const char *filename, const char *prog_name)
+{
+	int i;
+
+	if (env.deny_filter_cnt > 0) {
+		for (i = 0; i < env.deny_filter_cnt; i++) {
+			if (glob_matches(filename, env.deny_filters[i].file_glob))
+				return false;
+			if (!env.deny_filters[i].prog_glob)
+				continue;
+			if (glob_matches(prog_name, env.deny_filters[i].prog_glob))
+				return false;
+		}
+	}
+
+	if (env.allow_filter_cnt == 0)
+		return true;
+
+	for (i = 0; i < env.allow_filter_cnt; i++) {
+		if (!glob_matches(filename, env.allow_filters[i].file_glob))
+			continue;
+		/* if filter specifies only filename glob part, it implicitly
+		 * allows all progs within that file
+		 */
+		if (!env.allow_filters[i].prog_glob)
+			return true;
+		if (glob_matches(prog_name, env.allow_filters[i].prog_glob))
+			return true;
+	}
+
+	return false;
+}
+
+static int append_filter(struct filter **filters, int *cnt, const char *str)
+{
+	struct filter *f;
+	void *tmp;
+	const char *p;
+
+	tmp = realloc(*filters, (*cnt + 1) * sizeof(**filters));
+	if (!tmp)
+		return -ENOMEM;
+	*filters = tmp;
+
+	f = &(*filters)[*cnt];
+	f->file_glob = f->prog_glob = NULL;
+
+	/* filter can be specified either as "<obj-glob>" or "<obj-glob>/<prog-glob>" */
+	p = strchr(str, '/');
+	if (!p) {
+		f->file_glob = strdup(str);
+		if (!f->file_glob)
+			return -ENOMEM;
+	} else {
+		f->file_glob = strndup(str, p - str);
+		f->prog_glob = strdup(p + 1);
+		if (!f->file_glob || !f->prog_glob) {
+			free(f->file_glob);
+			free(f->prog_glob);
+			f->file_glob = f->prog_glob = NULL;
+			return -ENOMEM;
+		}
+	}
+
+	*cnt = *cnt + 1;
+	return 0;
+}
+
+static int append_filter_file(const char *path)
+{
+	char buf[1024];
+	FILE *f;
+	int err = 0;
+
+	f = fopen(path, "r");
+	if (!f) {
+		err = -errno;
+		fprintf(stderr, "Failed to open '%s': %d\n", path, err);
+		return err;
+	}
+
+	while (fscanf(f, " %1023[^\n]\n", buf) == 1) {
+		/* lines starting with # are comments, skip them */
+		if (buf[0] == '\0' || buf[0] == '#')
+			continue;
+		/* lines starting with ! are negative match filters */
+		if (buf[0] == '!')
+			err = append_filter(&env.deny_filters, &env.deny_filter_cnt, buf + 1);
+		else
+			err = append_filter(&env.allow_filters, &env.allow_filter_cnt, buf);
+		if (err)
+			goto cleanup;
+	}
+
+cleanup:
+	fclose(f);
+	return err;
+}
+
 static const struct stat_specs default_output_spec = {
 	.spec_cnt = 7,
 	.ids = {
@@ -283,6 +452,9 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf
 	int err = 0;
 	void *tmp;
 
+	if (!should_process_prog(basename(filename), bpf_program__name(prog)))
+		return 0;
+
 	tmp = realloc(env.prog_stats, (env.prog_stat_cnt + 1) * sizeof(*env.prog_stats));
 	if (!tmp)
 		return -ENOMEM;
@@ -330,6 +502,9 @@ static int process_obj(const char *filename)
 	LIBBPF_OPTS(bpf_object_open_opts, opts);
 	int err = 0, prog_cnt = 0;
 
+	if (!should_process_file(basename(filename)))
+		return 0;
+
 	old_libbpf_print_fn = libbpf_set_print(libbpf_print_fn);
 
 	obj = bpf_object__open_file(filename, &opts);
@@ -666,7 +841,10 @@ static int parse_stats_csv(const char *filename, struct stat_specs *specs,
 				goto cleanup;
 			}
 			*statsp = tmp;
+
 			st = &(*statsp)[*stat_cntp];
+			memset(st, 0, sizeof(*st));
+
 			*stat_cntp += 1;
 		}
 
@@ -692,14 +870,34 @@ static int parse_stats_csv(const char *filename, struct stat_specs *specs,
 			col++;
 		}
 
-		if (!header && col < specs->spec_cnt) {
+		if (header) {
+			header = false;
+			continue;
+		}
+
+		if (col < specs->spec_cnt) {
 			fprintf(stderr, "Not enough columns in row #%d in '%s'\n",
 				*stat_cntp, filename);
 			err = -EINVAL;
 			goto cleanup;
 		}
 
-		header = false;
+		if (!st->file_name || !st->prog_name) {
+			fprintf(stderr, "Row #%d in '%s' is missing file and/or program name\n",
+				*stat_cntp, filename);
+			err = -EINVAL;
+			goto cleanup;
+		}
+
+		/* in comparison mode we can only check filters after we
+		 * parsed entire line; if row should be ignored we pretend we
+		 * never parsed it
+		 */
+		if (!should_process_prog(st->file_name, st->prog_name)) {
+			free(st->file_name);
+			free(st->prog_name);
+			*stat_cntp -= 1;
+		}
 	}
 
 	if (!feof(f)) {
@@ -1012,5 +1210,15 @@ int main(int argc, char **argv)
 	for (i = 0; i < env.filename_cnt; i++)
 		free(env.filenames[i]);
 	free(env.filenames);
+	for (i = 0; i < env.allow_filter_cnt; i++) {
+		free(env.allow_filters[i].file_glob);
+		free(env.allow_filters[i].prog_glob);
+	}
+	free(env.allow_filters);
+	for (i = 0; i < env.deny_filter_cnt; i++) {
+		free(env.deny_filters[i].file_glob);
+		free(env.deny_filters[i].prog_glob);
+	}
+	free(env.deny_filters);
 	return -err;
 }
diff --git a/tools/testing/selftests/bpf/veristat.cfg b/tools/testing/selftests/bpf/veristat.cfg
new file mode 100644
index 000000000000..1a385061618d
--- /dev/null
+++ b/tools/testing/selftests/bpf/veristat.cfg
@@ -0,0 +1,17 @@
+# pre-canned list of rather complex selftests/bpf BPF object files to monitor
+# BPF verifier's performance on
+bpf_flow*
+bpf_loop_bench*
+loop*
+netif_receive_skb*
+profiler*
+pyperf*
+strobemeta*
+test_cls_redirect*
+test_l4lb
+test_sysctl*
+test_tcp_hdr_*
+test_usdt*
+test_verif_scale*
+test_xdp_noinline*
+xdp_synproxy*
-- 
cgit v1.2.3


From b780d1671cf933caa3f67160f73261f10750f1a9 Mon Sep 17 00:00:00 2001
From: Yauheni Kaliuta <ykaliuta@redhat.com>
Date: Tue, 20 Sep 2022 19:14:09 +0300
Subject: selftests/bpf: Add liburandom_read.so to TEST_GEN_FILES

Added urandom_read shared lib is missing from the list of installed
files what makes urandom_read test after `make install` or `make
gen_tar` broken.

Add the library to TEST_GEN_FILES. The names in the list do not
contain $(OUTPUT) since it's added by lib.mk code.

Fixes: 00a0fa2d7d49 ("selftests/bpf: Add urandom_read shared lib and USDTs")
Signed-off-by: Yauheni Kaliuta <ykaliuta@redhat.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220920161409.129953-1-ykaliuta@redhat.com
---
 tools/testing/selftests/bpf/Makefile | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 5898d3828b82..e6cf21fad69f 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -86,6 +86,7 @@ TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \
 	xskxceiver xdp_redirect_multi xdp_synproxy veristat
 
 TEST_CUSTOM_PROGS = $(OUTPUT)/urandom_read $(OUTPUT)/sign-file
+TEST_GEN_FILES += liburandom_read.so
 
 # Emit succinct information message describing current building step
 # $1 - generic step name (e.g., CC, LINK, etc);
-- 
cgit v1.2.3


From f5eb23b91c41a7ffc7ca7fe14f3c512360f02937 Mon Sep 17 00:00:00 2001
From: Hou Tao <houtao1@huawei.com>
Date: Wed, 21 Sep 2022 15:00:34 +0800
Subject: selftests/bpf: Destroy the skeleton when CONFIG_PREEMPT is off

Destroy the created skeleton when CONFIG_PREEMPT is off, else will be
resource leak.

Fixes: 73b97bc78b32 ("selftests/bpf: Test concurrent updates on bpf_task_storage_busy")
Signed-off-by: Hou Tao <houtao1@huawei.com>
Link: https://lore.kernel.org/r/20220921070035.2016413-2-houtao@huaweicloud.com
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
 tools/testing/selftests/bpf/map_tests/task_storage_map.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/map_tests/task_storage_map.c b/tools/testing/selftests/bpf/map_tests/task_storage_map.c
index aac08c85240b..7d050364efca 100644
--- a/tools/testing/selftests/bpf/map_tests/task_storage_map.c
+++ b/tools/testing/selftests/bpf/map_tests/task_storage_map.c
@@ -79,6 +79,7 @@ void test_task_storage_map_stress_lookup(void)
 	/* Only for a fully preemptible kernel */
 	if (!skel->kconfig->CONFIG_PREEMPT) {
 		printf("%s SKIP (no CONFIG_PREEMPT)\n", __func__);
+		read_bpf_task_storage_busy__destroy(skel);
 		skips++;
 		return;
 	}
-- 
cgit v1.2.3


From 103d002fb7d548fb1187e350f2b73788558128b9 Mon Sep 17 00:00:00 2001
From: Hou Tao <houtao1@huawei.com>
Date: Wed, 21 Sep 2022 15:00:35 +0800
Subject: selftests/bpf: Free the allocated resources after test case succeeds

Free the created fd or allocated bpf_object after test case succeeds,
else there will be resource leaks.

Spotted by using address sanitizer and checking the content of
/proc/$pid/fd directory.

Signed-off-by: Hou Tao <houtao1@huawei.com>
Link: https://lore.kernel.org/r/20220921070035.2016413-3-houtao@huaweicloud.com
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
 .../selftests/bpf/map_tests/array_map_batch_ops.c  |  2 ++
 .../selftests/bpf/map_tests/htab_map_batch_ops.c   |  2 ++
 .../bpf/map_tests/lpm_trie_map_batch_ops.c         |  2 ++
 tools/testing/selftests/bpf/test_maps.c            | 24 ++++++++++++++--------
 4 files changed, 21 insertions(+), 9 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c b/tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c
index 78c76496b14a..b595556315bc 100644
--- a/tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c
+++ b/tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c
@@ -3,6 +3,7 @@
 #include <stdio.h>
 #include <errno.h>
 #include <string.h>
+#include <unistd.h>
 
 #include <bpf/bpf.h>
 #include <bpf/libbpf.h>
@@ -137,6 +138,7 @@ static void __test_map_lookup_and_update_batch(bool is_pcpu)
 	free(keys);
 	free(values);
 	free(visited);
+	close(map_fd);
 }
 
 static void array_map_batch_ops(void)
diff --git a/tools/testing/selftests/bpf/map_tests/htab_map_batch_ops.c b/tools/testing/selftests/bpf/map_tests/htab_map_batch_ops.c
index f807d53fd8dd..1230ccf90128 100644
--- a/tools/testing/selftests/bpf/map_tests/htab_map_batch_ops.c
+++ b/tools/testing/selftests/bpf/map_tests/htab_map_batch_ops.c
@@ -3,6 +3,7 @@
 #include <stdio.h>
 #include <errno.h>
 #include <string.h>
+#include <unistd.h>
 
 #include <bpf/bpf.h>
 #include <bpf/libbpf.h>
@@ -255,6 +256,7 @@ void __test_map_lookup_and_delete_batch(bool is_pcpu)
 	free(visited);
 	if (!is_pcpu)
 		free(values);
+	close(map_fd);
 }
 
 void htab_map_batch_ops(void)
diff --git a/tools/testing/selftests/bpf/map_tests/lpm_trie_map_batch_ops.c b/tools/testing/selftests/bpf/map_tests/lpm_trie_map_batch_ops.c
index 87d07b596e17..b66d56ddb7ef 100644
--- a/tools/testing/selftests/bpf/map_tests/lpm_trie_map_batch_ops.c
+++ b/tools/testing/selftests/bpf/map_tests/lpm_trie_map_batch_ops.c
@@ -7,6 +7,7 @@
 #include <errno.h>
 #include <string.h>
 #include <stdlib.h>
+#include <unistd.h>
 
 #include <bpf/bpf.h>
 #include <bpf/libbpf.h>
@@ -150,4 +151,5 @@ void test_lpm_trie_map_batch_ops(void)
 	free(keys);
 	free(values);
 	free(visited);
+	close(map_fd);
 }
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 289ff310e283..b73152822aa2 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -659,13 +659,13 @@ static void test_sockmap(unsigned int tasks, void *data)
 {
 	struct bpf_map *bpf_map_rx, *bpf_map_tx, *bpf_map_msg, *bpf_map_break;
 	int map_fd_msg = 0, map_fd_rx = 0, map_fd_tx = 0, map_fd_break;
+	struct bpf_object *parse_obj, *verdict_obj, *msg_obj;
 	int ports[] = {50200, 50201, 50202, 50204};
 	int err, i, fd, udp, sfd[6] = {0xdeadbeef};
 	u8 buf[20] = {0x0, 0x5, 0x3, 0x2, 0x1, 0x0};
 	int parse_prog, verdict_prog, msg_prog;
 	struct sockaddr_in addr;
 	int one = 1, s, sc, rc;
-	struct bpf_object *obj;
 	struct timeval to;
 	__u32 key, value;
 	pid_t pid[tasks];
@@ -761,6 +761,7 @@ static void test_sockmap(unsigned int tasks, void *data)
 		       i, udp);
 		goto out_sockmap;
 	}
+	close(udp);
 
 	/* Test update without programs */
 	for (i = 0; i < 6; i++) {
@@ -823,27 +824,27 @@ static void test_sockmap(unsigned int tasks, void *data)
 
 	/* Load SK_SKB program and Attach */
 	err = bpf_prog_test_load(SOCKMAP_PARSE_PROG,
-			    BPF_PROG_TYPE_SK_SKB, &obj, &parse_prog);
+			    BPF_PROG_TYPE_SK_SKB, &parse_obj, &parse_prog);
 	if (err) {
 		printf("Failed to load SK_SKB parse prog\n");
 		goto out_sockmap;
 	}
 
 	err = bpf_prog_test_load(SOCKMAP_TCP_MSG_PROG,
-			    BPF_PROG_TYPE_SK_MSG, &obj, &msg_prog);
+			    BPF_PROG_TYPE_SK_MSG, &msg_obj, &msg_prog);
 	if (err) {
 		printf("Failed to load SK_SKB msg prog\n");
 		goto out_sockmap;
 	}
 
 	err = bpf_prog_test_load(SOCKMAP_VERDICT_PROG,
-			    BPF_PROG_TYPE_SK_SKB, &obj, &verdict_prog);
+			    BPF_PROG_TYPE_SK_SKB, &verdict_obj, &verdict_prog);
 	if (err) {
 		printf("Failed to load SK_SKB verdict prog\n");
 		goto out_sockmap;
 	}
 
-	bpf_map_rx = bpf_object__find_map_by_name(obj, "sock_map_rx");
+	bpf_map_rx = bpf_object__find_map_by_name(verdict_obj, "sock_map_rx");
 	if (!bpf_map_rx) {
 		printf("Failed to load map rx from verdict prog\n");
 		goto out_sockmap;
@@ -855,7 +856,7 @@ static void test_sockmap(unsigned int tasks, void *data)
 		goto out_sockmap;
 	}
 
-	bpf_map_tx = bpf_object__find_map_by_name(obj, "sock_map_tx");
+	bpf_map_tx = bpf_object__find_map_by_name(verdict_obj, "sock_map_tx");
 	if (!bpf_map_tx) {
 		printf("Failed to load map tx from verdict prog\n");
 		goto out_sockmap;
@@ -867,7 +868,7 @@ static void test_sockmap(unsigned int tasks, void *data)
 		goto out_sockmap;
 	}
 
-	bpf_map_msg = bpf_object__find_map_by_name(obj, "sock_map_msg");
+	bpf_map_msg = bpf_object__find_map_by_name(verdict_obj, "sock_map_msg");
 	if (!bpf_map_msg) {
 		printf("Failed to load map msg from msg_verdict prog\n");
 		goto out_sockmap;
@@ -879,7 +880,7 @@ static void test_sockmap(unsigned int tasks, void *data)
 		goto out_sockmap;
 	}
 
-	bpf_map_break = bpf_object__find_map_by_name(obj, "sock_map_break");
+	bpf_map_break = bpf_object__find_map_by_name(verdict_obj, "sock_map_break");
 	if (!bpf_map_break) {
 		printf("Failed to load map tx from verdict prog\n");
 		goto out_sockmap;
@@ -1125,7 +1126,9 @@ static void test_sockmap(unsigned int tasks, void *data)
 	}
 	close(fd);
 	close(map_fd_rx);
-	bpf_object__close(obj);
+	bpf_object__close(parse_obj);
+	bpf_object__close(msg_obj);
+	bpf_object__close(verdict_obj);
 	return;
 out:
 	for (i = 0; i < 6; i++)
@@ -1283,8 +1286,11 @@ static void test_map_in_map(void)
 			printf("Inner map mim.inner was not destroyed\n");
 			goto out_map_in_map;
 		}
+
+		close(fd);
 	}
 
+	bpf_object__close(obj);
 	return;
 
 out_map_in_map:
-- 
cgit v1.2.3


From b68d9c330eef9c860211ce65b86b90db32e0cef7 Mon Sep 17 00:00:00 2001
From: Zhengchao Shao <shaozhengchao@huawei.com>
Date: Wed, 21 Sep 2022 10:41:31 +0800
Subject: selftests/tc-testing: add selftests for cake qdisc
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Test 1212: Create CAKE with default setting
Test 3281: Create CAKE with bandwidth limit
Test c940: Create CAKE with autorate-ingress flag
Test 2310: Create CAKE with rtt time
Test 2385: Create CAKE with besteffort flag
Test a032: Create CAKE with diffserv8 flag
Test 2349: Create CAKE with diffserv4 flag
Test 8472: Create CAKE with flowblind flag
Test 2341: Create CAKE with dsthost and nat flag
Test 5134: Create CAKE with wash flag
Test 2302: Create CAKE with flowblind and no-split-gso flag
Test 0768: Create CAKE with dual-srchost and ack-filter flag
Test 0238: Create CAKE with dual-dsthost and ack-filter-aggressive flag
Test 6572: Create CAKE with memlimit and ptm flag
Test 2436: Create CAKE with fwmark and atm flag
Test 3984: Create CAKE with overhead and mpu
Test 5421: Create CAKE with conservative and ingress flag
Test 6854: Delete CAKE with conservative and ingress flag
Test 2342: Replace CAKE with mpu
Test 2313: Change CAKE with mpu
Test 4365: Show CAKE class

Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
Reviewed-by: Victor Nogueira <victor@mojatatu.com>
Tested-by: Victor Nogueira <victor@mojatatu.com>
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/tc-testing/tc-tests/qdiscs/cake.json | 487 +++++++++++++++++++++
 1 file changed, 487 insertions(+)
 create mode 100644 tools/testing/selftests/tc-testing/tc-tests/qdiscs/cake.json

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/cake.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/cake.json
new file mode 100644
index 000000000000..1134b72d281d
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/cake.json
@@ -0,0 +1,487 @@
+[
+    {
+        "id": "1212",
+        "name": "Create CAKE with default setting",
+        "category": [
+            "qdisc",
+            "cake"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cake",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc cake 1: root refcnt [0-9]+ bandwidth unlimited diffserv3 triple-isolate nonat nowash no-ack-filter split-gso rtt 100ms raw overhead",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "3281",
+        "name": "Create CAKE with bandwidth limit",
+        "category": [
+            "qdisc",
+            "cake"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cake bandwidth 1000",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc cake 1: root refcnt [0-9]+ bandwidth 1Kbit diffserv3 triple-isolate nonat nowash no-ack-filter split-gso rtt 100ms raw overhead",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "c940",
+        "name": "Create CAKE with autorate-ingress flag",
+        "category": [
+            "qdisc",
+            "cake"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cake autorate-ingress",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc cake 1: root refcnt [0-9]+ bandwidth unlimited autorate-ingress diffserv3 triple-isolate nonat nowash no-ack-filter split-gso rtt 100ms raw overhead",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "2310",
+        "name": "Create CAKE with rtt time",
+        "category": [
+            "qdisc",
+            "cake"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cake rtt 200",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc cake 1: root refcnt [0-9]+ bandwidth unlimited diffserv3 triple-isolate nonat nowash no-ack-filter split-gso rtt 200us raw overhead",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "2385",
+        "name": "Create CAKE with besteffort flag",
+        "category": [
+            "qdisc",
+            "cake"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cake besteffort",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc cake 1: root refcnt [0-9]+ bandwidth unlimited besteffort triple-isolate nonat nowash no-ack-filter split-gso rtt 100ms raw overhead",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "a032",
+        "name": "Create CAKE with diffserv8 flag",
+        "category": [
+            "qdisc",
+            "cake"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cake diffserv8",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc cake 1: root refcnt [0-9]+ bandwidth unlimited diffserv8 triple-isolate nonat nowash no-ack-filter split-gso rtt 100ms raw overhead",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "2349",
+        "name": "Create CAKE with diffserv4 flag",
+        "category": [
+            "qdisc",
+            "cake"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cake diffserv4",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc cake 1: root refcnt [0-9]+ bandwidth unlimited diffserv4 triple-isolate nonat nowash no-ack-filter split-gso rtt 100ms raw overhead",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "8472",
+        "name": "Create CAKE with flowblind flag",
+        "category": [
+            "qdisc",
+            "cake"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cake flowblind",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc cake 1: root refcnt [0-9]+ bandwidth unlimited diffserv3 flowblind nonat nowash no-ack-filter split-gso rtt 100ms raw overhead",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "2341",
+        "name": "Create CAKE with dsthost and nat flag",
+        "category": [
+            "qdisc",
+            "cake"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cake dsthost nat",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc cake 1: root refcnt [0-9]+ bandwidth unlimited diffserv3 dsthost nat nowash no-ack-filter split-gso rtt 100ms raw overhead",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "5134",
+        "name": "Create CAKE with wash flag",
+        "category": [
+            "qdisc",
+            "cake"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cake hosts wash",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc cake 1: root refcnt [0-9]+ bandwidth unlimited diffserv3 hosts nonat wash no-ack-filter split-gso rtt 100ms raw overhead",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "2302",
+        "name": "Create CAKE with flowblind and no-split-gso flag",
+        "category": [
+            "qdisc",
+            "cake"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cake flowblind no-split-gso",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc cake 1: root refcnt [0-9]+ bandwidth unlimited diffserv3 flowblind nonat nowash no-ack-filter no-split-gso rtt 100ms raw overhead",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "0768",
+        "name": "Create CAKE with dual-srchost and ack-filter flag",
+        "category": [
+            "qdisc",
+            "cake"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cake dual-srchost ack-filter",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc cake 1: root refcnt [0-9]+ bandwidth unlimited diffserv3 dual-srchost nonat nowash ack-filter split-gso rtt 100ms raw overhead",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "0238",
+        "name": "Create CAKE with dual-dsthost and ack-filter-aggressive flag",
+        "category": [
+            "qdisc",
+            "cake"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cake dual-dsthost ack-filter-aggressive",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc cake 1: root refcnt [0-9]+ bandwidth unlimited diffserv3 dual-dsthost nonat nowash ack-filter-aggressive split-gso rtt 100ms raw overhead",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "6572",
+        "name": "Create CAKE with memlimit and ptm flag",
+        "category": [
+            "qdisc",
+            "cake"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cake memlimit 10000 ptm",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc cake 1: root refcnt [0-9]+ bandwidth unlimited diffserv3 triple-isolate nonat nowash no-ack-filter split-gso rtt 100ms raw ptm overhead 0 memlimit 10000b",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "2436",
+        "name": "Create CAKE with fwmark and atm flag",
+        "category": [
+            "qdisc",
+            "cake"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cake fwmark 8 atm",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc cake 1: root refcnt [0-9]+ bandwidth unlimited diffserv3 triple-isolate nonat nowash no-ack-filter split-gso rtt 100ms raw atm overhead 0 fwmark 0x8",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "3984",
+        "name": "Create CAKE with overhead and mpu",
+        "category": [
+            "qdisc",
+            "cake"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cake overhead 128 mpu 256",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc cake 1: root refcnt [0-9]+ bandwidth unlimited diffserv3 triple-isolate nonat nowash no-ack-filter split-gso rtt 100ms noatm overhead 128 mpu 256",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "5421",
+        "name": "Create CAKE with conservative and ingress flag",
+        "category": [
+            "qdisc",
+            "cake"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cake conservative ingress",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc cake 1: root refcnt [0-9]+ bandwidth unlimited diffserv3 triple-isolate nonat nowash ingress no-ack-filter split-gso rtt 100ms atm overhead 48",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "6854",
+        "name": "Delete CAKE with conservative and ingress flag",
+        "category": [
+            "qdisc",
+            "cake"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true",
+            "$TC qdisc add dev $DUMMY handle 1: root cake conservative ingress"
+        ],
+        "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 1: root",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc cake 1: root refcnt [0-9]+ bandwidth unlimited diffserv3 triple-isolate nonat nowash ingress no-ack-filter split-gso rtt 100ms atm overhead 48",
+        "matchCount": "0",
+        "teardown": [
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "2342",
+        "name": "Replace CAKE with mpu",
+        "category": [
+            "qdisc",
+            "cake"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true",
+            "$TC qdisc add dev $DUMMY handle 1: root cake overhead 128 mpu 256"
+        ],
+        "cmdUnderTest": "$TC qdisc replace dev $DUMMY handle 1: root cake mpu 128",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc cake 1: root refcnt [0-9]+ bandwidth unlimited diffserv3 triple-isolate nonat nowash no-ack-filter split-gso rtt 100ms noatm overhead 128 mpu 128",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "2313",
+        "name": "Change CAKE with mpu",
+        "category": [
+            "qdisc",
+            "cake"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true",
+            "$TC qdisc add dev $DUMMY handle 1: root cake overhead 128 mpu 256"
+        ],
+        "cmdUnderTest": "$TC qdisc change dev $DUMMY handle 1: root cake mpu 128",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc cake 1: root refcnt [0-9]+ bandwidth unlimited diffserv3 triple-isolate nonat nowash no-ack-filter split-gso rtt 100ms noatm overhead 128 mpu 128",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "4365",
+        "name": "Show CAKE class",
+        "category": [
+            "qdisc",
+            "cake"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cake",
+        "expExitCode": "0",
+        "verifyCmd": "$TC class show dev $DUMMY",
+        "matchPattern": "class cake",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    }
+]
-- 
cgit v1.2.3


From 6c1ef8f00f9a383e1c0a48c059d3801270495c53 Mon Sep 17 00:00:00 2001
From: Zhengchao Shao <shaozhengchao@huawei.com>
Date: Wed, 21 Sep 2022 10:41:55 +0800
Subject: selftests/tc-testing: add selftests for cbq qdisc

Test 3460: Create CBQ with default setting
Test 0592: Create CBQ with mpu
Test 4684: Create CBQ with valid cell num
Test 4345: Create CBQ with invalid cell num
Test 4525: Create CBQ with valid ewma
Test 6784: Create CBQ with invalid ewma
Test 5468: Delete CBQ with handle
Test 492a: Show CBQ class

Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
Reviewed-by: Victor Nogueira <victor@mojatatu.com>
Tested-by: Victor Nogueira <victor@mojatatu.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/tc-testing/tc-tests/qdiscs/cbq.json  | 184 +++++++++++++++++++++
 1 file changed, 184 insertions(+)
 create mode 100644 tools/testing/selftests/tc-testing/tc-tests/qdiscs/cbq.json

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/cbq.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/cbq.json
new file mode 100644
index 000000000000..1ab21c83a122
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/cbq.json
@@ -0,0 +1,184 @@
+[
+    {
+        "id": "3460",
+        "name": "Create CBQ with default setting",
+        "category": [
+            "qdisc",
+            "cbq"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cbq bandwidth 10000 avpkt 9000",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc cbq 1: root refcnt [0-9]+ rate 10Kbit \\(bounded,isolated\\) prio no-transmit",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "0592",
+        "name": "Create CBQ with mpu",
+        "category": [
+            "qdisc",
+            "cbq"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cbq bandwidth 10000 avpkt 9000 mpu 1000",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc cbq 1: root refcnt [0-9]+ rate 10Kbit \\(bounded,isolated\\) prio no-transmit",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "4684",
+        "name": "Create CBQ with valid cell num",
+        "category": [
+            "qdisc",
+            "cbq"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cbq bandwidth 10000 avpkt 9000 cell 128",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc cbq 1: root refcnt [0-9]+ rate 10Kbit \\(bounded,isolated\\) prio no-transmit",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "4345",
+        "name": "Create CBQ with invalid cell num",
+        "category": [
+            "qdisc",
+            "cbq"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cbq bandwidth 10000 avpkt 9000 cell 100",
+        "expExitCode": "1",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc cbq 1: root refcnt [0-9]+ rate 10Kbit \\(bounded,isolated\\) prio no-transmit",
+        "matchCount": "0",
+        "teardown": [
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "4525",
+        "name": "Create CBQ with valid ewma",
+        "category": [
+            "qdisc",
+            "cbq"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cbq bandwidth 10000 avpkt 9000 ewma 16",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc cbq 1: root refcnt [0-9]+ rate 10Kbit \\(bounded,isolated\\) prio no-transmit",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "6784",
+        "name": "Create CBQ with invalid ewma",
+        "category": [
+            "qdisc",
+            "cbq"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cbq bandwidth 10000 avpkt 9000 ewma 128",
+        "expExitCode": "1",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc cbq 1: root refcnt [0-9]+ rate 10Kbit \\(bounded,isolated\\) prio no-transmit",
+        "matchCount": "0",
+        "teardown": [
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "5468",
+        "name": "Delete CBQ with handle",
+        "category": [
+            "qdisc",
+            "cbq"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true",
+            "$TC qdisc add dev $DUMMY handle 1: root cbq bandwidth 10000 avpkt 9000"
+        ],
+        "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 1: root",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc cbq 1: root refcnt [0-9]+ rate 10Kbit \\(bounded,isolated\\) prio no-transmit",
+        "matchCount": "0",
+        "teardown": [
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "492a",
+        "name": "Show CBQ class",
+        "category": [
+            "qdisc",
+            "cbq"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cbq bandwidth 10000 avpkt 9000",
+        "expExitCode": "0",
+        "verifyCmd": "$TC class show dev $DUMMY",
+        "matchPattern": "class cbq 1: root rate 10Kbit \\(bounded,isolated\\) prio no-transmit",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    }
+]
-- 
cgit v1.2.3


From 3bec7e2910b87636c4ecf5dffab87f616bf36020 Mon Sep 17 00:00:00 2001
From: Zhengchao Shao <shaozhengchao@huawei.com>
Date: Wed, 21 Sep 2022 10:42:05 +0800
Subject: selftests/tc-testing: add selftests for cbs qdisc

Test 1820: Create CBS with default setting
Test 1532: Create CBS with hicredit setting
Test 2078: Create CBS with locredit setting
Test 9271: Create CBS with sendslope setting
Test 0482: Create CBS with idleslope setting
Test e8f3: Create CBS with multiple setting
Test 23c9: Replace CBS with sendslope setting
Test a07a: Change CBS with idleslope setting
Test 43b3: Delete CBS with handle
Test 9472: Show CBS class

Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
Reviewed-by: Victor Nogueira <victor@mojatatu.com>
Tested-by: Victor Nogueira <victor@mojatatu.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/tc-testing/tc-tests/qdiscs/cbs.json  | 234 +++++++++++++++++++++
 1 file changed, 234 insertions(+)
 create mode 100644 tools/testing/selftests/tc-testing/tc-tests/qdiscs/cbs.json

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/cbs.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/cbs.json
new file mode 100644
index 000000000000..a46bf5ff8277
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/cbs.json
@@ -0,0 +1,234 @@
+[
+    {
+        "id": "1820",
+        "name": "Create CBS with default setting",
+        "category": [
+            "qdisc",
+            "cbs"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cbs",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc cbs 1: root refcnt [0-9]+ hicredit 0 locredit 0 sendslope 0 idleslope 0 offload 0.*qdisc pfifo 0: parent 1: limit 1000p",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "1532",
+        "name": "Create CBS with hicredit setting",
+        "category": [
+            "qdisc",
+            "cbs"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cbs hicredit 64",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc cbs 1: root refcnt [0-9]+ hicredit 64 locredit 0 sendslope 0 idleslope 0 offload 0.*qdisc pfifo 0: parent 1: limit 1000p",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "2078",
+        "name": "Create CBS with locredit setting",
+        "category": [
+            "qdisc",
+            "cbs"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cbs locredit 10",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc cbs 1: root refcnt [0-9]+ hicredit 0 locredit 10 sendslope 0 idleslope 0 offload 0.*qdisc pfifo 0: parent 1: limit 1000p",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "9271",
+        "name": "Create CBS with sendslope setting",
+        "category": [
+            "qdisc",
+            "cbs"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cbs sendslope 888",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc cbs 1: root refcnt [0-9]+ hicredit 0 locredit 0 sendslope 888 idleslope 0 offload 0.*qdisc pfifo 0: parent 1: limit 1000p",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "0482",
+        "name": "Create CBS with idleslope setting",
+        "category": [
+            "qdisc",
+            "cbs"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cbs idleslope 666",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc cbs 1: root refcnt [0-9]+ hicredit 0 locredit 0 sendslope 0 idleslope 666 offload 0.*qdisc pfifo 0: parent 1: limit 1000p",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "e8f3",
+        "name": "Create CBS with multiple setting",
+        "category": [
+            "qdisc",
+            "cbs"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cbs hicredit 10 locredit 75 sendslope 2 idleslope 666",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc cbs 1: root refcnt [0-9]+ hicredit 10 locredit 75 sendslope 2 idleslope 666 offload 0.*qdisc pfifo 0: parent 1: limit 1000p",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "23c9",
+        "name": "Replace CBS with sendslope setting",
+        "category": [
+            "qdisc",
+            "cbs"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true",
+            "$TC qdisc add dev $DUMMY handle 1: root cbs idleslope 666"
+        ],
+        "cmdUnderTest": "$TC qdisc replace dev $DUMMY handle 1: root cbs sendslope 10",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc cbs 1: root refcnt [0-9]+ hicredit 0 locredit 0 sendslope 10 idleslope 0 offload 0.*qdisc pfifo 0: parent 1: limit 1000p",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "a07a",
+        "name": "Change CBS with idleslope setting",
+        "category": [
+            "qdisc",
+            "cbs"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true",
+            "$TC qdisc add dev $DUMMY handle 1: root cbs idleslope 666"
+        ],
+        "cmdUnderTest": "$TC qdisc change dev $DUMMY handle 1: root cbs idleslope 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc cbs 1: root refcnt [0-9]+ hicredit 0 locredit 0 sendslope 0 idleslope 1 offload 0.*qdisc pfifo 0: parent 1: limit 1000p",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "43b3",
+        "name": "Delete CBS with handle",
+        "category": [
+            "qdisc",
+            "cbs"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true",
+            "$TC qdisc add dev $DUMMY handle 1: root cbs idleslope 666"
+        ],
+        "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 1: root",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc cbs 1: root refcnt [0-9]+ hicredit 0 locredit 0 sendslope 0 idleslope 1 offload 0.*qdisc pfifo 0: parent 1: limit 1000p",
+        "matchCount": "0",
+        "teardown": [
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "9472",
+        "name": "Show CBS class",
+        "category": [
+            "qdisc",
+            "cbs"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cbs",
+        "expExitCode": "0",
+        "verifyCmd": "$TC class show dev $DUMMY",
+        "matchPattern": "class cbs 1:[0-9]+ parent 1:",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    }
+]
-- 
cgit v1.2.3


From 9b1edbc1c58fb211976f6eaa8a25da04bfe59a42 Mon Sep 17 00:00:00 2001
From: Zhengchao Shao <shaozhengchao@huawei.com>
Date: Wed, 21 Sep 2022 10:42:15 +0800
Subject: selftests/tc-testing: add selftests for drr qdisc

Test 0385: Create DRR with default setting
Test 2375: Delete DRR with handle
Test 3092: Show DRR class

Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
Reviewed-by: Victor Nogueira <victor@mojatatu.com>
Tested-by: Victor Nogueira <victor@mojatatu.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/tc-testing/tc-tests/qdiscs/drr.json  | 71 ++++++++++++++++++++++
 1 file changed, 71 insertions(+)
 create mode 100644 tools/testing/selftests/tc-testing/tc-tests/qdiscs/drr.json

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/drr.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/drr.json
new file mode 100644
index 000000000000..486a425b3c1c
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/drr.json
@@ -0,0 +1,71 @@
+[
+    {
+        "id": "0385",
+        "name": "Create DRR with default setting",
+        "category": [
+            "qdisc",
+            "drr"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root drr",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc drr 1: root refcnt [0-9]+",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "2375",
+        "name": "Delete DRR with handle",
+        "category": [
+            "qdisc",
+            "drr"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true",
+            "$TC qdisc add dev $DUMMY handle 1: root drr"
+        ],
+        "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 1: root",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc drr 1: root refcnt [0-9]+",
+        "matchCount": "0",
+        "teardown": [
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "3092",
+        "name": "Show DRR class",
+        "category": [
+            "qdisc",
+            "drr"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root drr",
+        "expExitCode": "0",
+        "verifyCmd": "$TC class show dev $DUMMY",
+        "matchPattern": "class drr 1:",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    }
+]
-- 
cgit v1.2.3


From 5d93f04d681d60f6d0e3ecb2c8df97c2d9b28980 Mon Sep 17 00:00:00 2001
From: Zhengchao Shao <shaozhengchao@huawei.com>
Date: Wed, 21 Sep 2022 10:42:25 +0800
Subject: selftests/tc-testing: add selftests for dsmark qdisc

Test 6345: Create DSMARK with default setting
Test 3462: Create DSMARK with default_index setting
Test ca95: Create DSMARK with set_tc_index flag
Test a950: Create DSMARK with multiple setting
Test 4092: Delete DSMARK with handle
Test 5930: Show DSMARK class

Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
Reviewed-by: Victor Nogueira <victor@mojatatu.com>
Tested-by: Victor Nogueira <victor@mojatatu.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../tc-testing/tc-tests/qdiscs/dsmark.json         | 140 +++++++++++++++++++++
 1 file changed, 140 insertions(+)
 create mode 100644 tools/testing/selftests/tc-testing/tc-tests/qdiscs/dsmark.json

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/dsmark.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/dsmark.json
new file mode 100644
index 000000000000..c030795f9c37
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/dsmark.json
@@ -0,0 +1,140 @@
+[
+    {
+        "id": "6345",
+        "name": "Create DSMARK with default setting",
+        "category": [
+            "qdisc",
+            "dsmark"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dsmark indices 1024",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc dsmark 1: root refcnt [0-9]+ indices 0x0400",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "3462",
+        "name": "Create DSMARK with default_index setting",
+        "category": [
+            "qdisc",
+            "dsmark"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dsmark indices 1024 default_index 512",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc dsmark 1: root refcnt [0-9]+ indices 0x0400 default_index 0x0200",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "ca95",
+        "name": "Create DSMARK with set_tc_index flag",
+        "category": [
+            "qdisc",
+            "dsmark"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dsmark indices 1024 set_tc_index",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc dsmark 1: root refcnt [0-9]+ indices 0x0400 set_tc_index",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "a950",
+        "name": "Create DSMARK with multiple setting",
+        "category": [
+            "qdisc",
+            "dsmark"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dsmark indices 1024 default_index 1024 set_tc_index",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc dsmark 1: root refcnt [0-9]+ indices 0x0400 default_index 0x0400 set_tc_index",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "4092",
+        "name": "Delete DSMARK with handle",
+        "category": [
+            "qdisc",
+            "dsmark"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true",
+            "$TC qdisc add dev $DUMMY handle 1: root dsmark indices 1024 default_index 1024"
+        ],
+        "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 1: root",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc dsmark 1: root refcnt [0-9]+ indices 0x0400",
+        "matchCount": "0",
+        "teardown": [
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "5930",
+        "name": "Show DSMARK class",
+        "category": [
+            "qdisc",
+            "dsmark"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root dsmark indices 1024",
+        "expExitCode": "0",
+        "verifyCmd": "$TC class show dev $DUMMY",
+        "matchPattern": "class dsmark 1:",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    }
+]
-- 
cgit v1.2.3


From 965a25e3455018b380e12eea179144b564a52053 Mon Sep 17 00:00:00 2001
From: Zhengchao Shao <shaozhengchao@huawei.com>
Date: Wed, 21 Sep 2022 10:42:39 +0800
Subject: selftests/tc-testing: add selftests for fq_codel qdisc

Test 4957: Create FQ_CODEL with default setting
Test 7621: Create FQ_CODEL with limit setting
Test 6871: Create FQ_CODEL with memory_limit setting
Test 5636: Create FQ_CODEL with target setting
Test 630a: Create FQ_CODEL with interval setting
Test 4324: Create FQ_CODEL with quantum setting
Test b190: Create FQ_CODEL with noecn flag
Test 5381: Create FQ_CODEL with ce_threshold setting
Test c9d2: Create FQ_CODEL with drop_batch setting
Test 523b: Create FQ_CODEL with multiple setting
Test 9283: Replace FQ_CODEL with noecn setting
Test 3459: Change FQ_CODEL with limit setting
Test 0128: Delete FQ_CODEL with handle
Test 0435: Show FQ_CODEL class

Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
Reviewed-by: Victor Nogueira <victor@mojatatu.com>
Tested-by: Victor Nogueira <victor@mojatatu.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../tc-testing/tc-tests/qdiscs/fq_codel.json       | 326 +++++++++++++++++++++
 1 file changed, 326 insertions(+)
 create mode 100644 tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_codel.json

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_codel.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_codel.json
new file mode 100644
index 000000000000..a65266357a9a
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_codel.json
@@ -0,0 +1,326 @@
+[
+    {
+        "id": "4957",
+        "name": "Create FQ_CODEL with default setting",
+        "category": [
+            "qdisc",
+            "fq_codel"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root fq_codel",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc fq_codel 1: root refcnt [0-9]+ limit 10240p flows 1024 quantum.*target 5ms interval 100ms memory_limit 32Mb ecn drop_batch 64",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "7621",
+        "name": "Create FQ_CODEL with limit setting",
+        "category": [
+            "qdisc",
+            "fq_codel"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root fq_codel limit 1000",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc fq_codel 1: root refcnt [0-9]+ limit 1000p flows 1024 quantum.*target 5ms interval 100ms memory_limit 32Mb ecn drop_batch 64",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "6871",
+        "name": "Create FQ_CODEL with memory_limit setting",
+        "category": [
+            "qdisc",
+            "fq_codel"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root fq_codel memory_limit 100000",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc fq_codel 1: root refcnt [0-9]+ limit 10240p flows 1024 quantum.*target 5ms interval 100ms memory_limit 100000b ecn drop_batch 64",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "5636",
+        "name": "Create FQ_CODEL with target setting",
+        "category": [
+            "qdisc",
+            "fq_codel"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root fq_codel target 2000",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc fq_codel 1: root refcnt [0-9]+ limit 10240p flows 1024 quantum.*target 2ms interval 100ms memory_limit 32Mb ecn drop_batch 64",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "630a",
+        "name": "Create FQ_CODEL with interval setting",
+        "category": [
+            "qdisc",
+            "fq_codel"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root fq_codel interval 5000",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc fq_codel 1: root refcnt [0-9]+ limit 10240p flows 1024 quantum.*target 5ms interval 5ms memory_limit 32Mb ecn drop_batch 64",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "4324",
+        "name": "Create FQ_CODEL with quantum setting",
+        "category": [
+            "qdisc",
+            "fq_codel"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root fq_codel quantum 9000",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc fq_codel 1: root refcnt [0-9]+ limit 10240p flows 1024 quantum 9000 target 5ms interval 100ms memory_limit 32Mb ecn drop_batch 64",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "b190",
+        "name": "Create FQ_CODEL with noecn flag",
+        "category": [
+            "qdisc",
+            "fq_codel"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root fq_codel noecn",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc fq_codel 1: root refcnt [0-9]+ limit 10240p flows 1024 quantum.*target 5ms interval 100ms memory_limit 32Mb drop_batch 64",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "5381",
+        "name": "Create FQ_CODEL with ce_threshold setting",
+        "category": [
+            "qdisc",
+            "fq_codel"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root fq_codel ce_threshold 1024000",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc fq_codel 1: root refcnt [0-9]+ limit 10240p flows 1024 quantum.*target 5ms ce_threshold 1.02s interval 100ms memory_limit 32Mb ecn drop_batch 64",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "c9d2",
+        "name": "Create FQ_CODEL with drop_batch setting",
+        "category": [
+            "qdisc",
+            "fq_codel"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root fq_codel drop_batch 100",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc fq_codel 1: root refcnt [0-9]+ limit 10240p flows 1024 quantum.*target 5ms interval 100ms memory_limit 32Mb ecn drop_batch 100",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "523b",
+        "name": "Create FQ_CODEL with multiple setting",
+        "category": [
+            "qdisc",
+            "fq_codel"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root fq_codel limit 1000 flows 256 drop_batch 100",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc fq_codel 1: root refcnt [0-9]+ limit 1000p flows 256 quantum.*target 5ms interval 100ms memory_limit 32Mb ecn drop_batch 100",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "9283",
+        "name": "Replace FQ_CODEL with noecn setting",
+        "category": [
+            "qdisc",
+            "fq_codel"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true",
+            "$TC qdisc add dev $DUMMY handle 1: root fq_codel limit 1000 flows 256 drop_batch 100"
+        ],
+        "cmdUnderTest": "$TC qdisc replace dev $DUMMY handle 1: root fq_codel noecn",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc fq_codel 1: root refcnt [0-9]+ limit 1000p flows 256 quantum.*target 5ms interval 100ms memory_limit 32Mb drop_batch 100",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "3459",
+        "name": "Change FQ_CODEL with limit setting",
+        "category": [
+            "qdisc",
+            "fq_codel"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true",
+            "$TC qdisc add dev $DUMMY handle 1: root fq_codel limit 1000 flows 256 drop_batch 100"
+        ],
+        "cmdUnderTest": "$TC qdisc change dev $DUMMY handle 1: root fq_codel limit 2000",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc fq_codel 1: root refcnt [0-9]+ limit 2000p flows 256 quantum.*target 5ms interval 100ms memory_limit 32Mb ecn drop_batch 100",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "0128",
+        "name": "Delete FQ_CODEL with handle",
+        "category": [
+            "qdisc",
+            "fq_codel"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true",
+            "$TC qdisc add dev $DUMMY handle 1: root fq_codel limit 1000 flows 256 drop_batch 100"
+        ],
+        "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 1: root",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc fq_codel 1: root refcnt [0-9]+ limit 1000p flows 256 quantum.*target 5ms interval 100ms memory_limit 32Mb noecn drop_batch 100",
+        "matchCount": "0",
+        "teardown": [
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "0435",
+        "name": "Show FQ_CODEL class",
+        "category": [
+            "qdisc",
+            "fq_codel"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root fq_codel",
+        "expExitCode": "0",
+        "verifyCmd": "$TC class show dev $DUMMY",
+        "matchPattern": "class fq_codel 1:",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    }
+]
-- 
cgit v1.2.3


From 265b9adcc4c6f9b5e2d0795cd8a82a5c8351e99f Mon Sep 17 00:00:00 2001
From: Zhengchao Shao <shaozhengchao@huawei.com>
Date: Wed, 21 Sep 2022 10:42:51 +0800
Subject: selftests/tc-testing: add selftests for hfsc qdisc

Test 3254: Create HFSC with default setting
Test 0289: Create HFSC with class sc and ul rate setting
Test 846a: Create HFSC with class sc umax and dmax setting
Test 5413: Create HFSC with class rt and ls rate setting
Test 9312: Create HFSC with class rt umax and dmax setting
Test 6931: Delete HFSC with handle
Test 8436: Show HFSC class

Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
Reviewed-by: Victor Nogueira <victor@mojatatu.com>
Tested-by: Victor Nogueira <victor@mojatatu.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/tc-testing/tc-tests/qdiscs/hfsc.json | 167 +++++++++++++++++++++
 1 file changed, 167 insertions(+)
 create mode 100644 tools/testing/selftests/tc-testing/tc-tests/qdiscs/hfsc.json

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/hfsc.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/hfsc.json
new file mode 100644
index 000000000000..af27b2c20e17
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/hfsc.json
@@ -0,0 +1,167 @@
+[
+    {
+        "id": "3254",
+        "name": "Create HFSC with default setting",
+        "category": [
+            "qdisc",
+            "hfsc"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root hfsc",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc hfsc 1: root refcnt [0-9]+",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "0289",
+        "name": "Create HFSC with class sc and ul rate setting",
+        "category": [
+            "qdisc",
+            "hfsc"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true",
+            "$TC qdisc add dev $DUMMY handle 1: root hfsc default 11"
+        ],
+        "cmdUnderTest": "$TC class add dev $DUMMY parent 1: classid 1:1 hfsc sc rate 20000 ul rate 10000",
+        "expExitCode": "0",
+        "verifyCmd": "$TC class show dev $DUMMY",
+        "matchPattern": "class hfsc 1:1 parent 1: sc m1 0bit d 0us m2 20Kbit ul m1 0bit d 0us m2 10Kbit",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "846a",
+        "name": "Create HFSC with class sc umax and dmax setting",
+        "category": [
+            "qdisc",
+            "hfsc"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true",
+            "$TC qdisc add dev $DUMMY handle 1: root hfsc default 11"
+        ],
+        "cmdUnderTest": "$TC class add dev $DUMMY parent 1: classid 1:1 hfsc sc umax 1540 dmax 5ms rate 10000 ul rate 10000",
+        "expExitCode": "0",
+        "verifyCmd": "$TC class show dev $DUMMY",
+        "matchPattern": "class hfsc 1:1 parent 1: sc m1 2464Kbit d 5ms m2 10Kbit ul m1 0bit d 0us m2 10Kbit",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "5413",
+        "name": "Create HFSC with class rt and ls rate setting",
+        "category": [
+            "qdisc",
+            "hfsc"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true",
+            "$TC qdisc add dev $DUMMY handle 1: root hfsc default 11"
+        ],
+        "cmdUnderTest": "$TC class add dev $DUMMY parent 1: classid 1:1 hfsc rt rate 20000 ls rate 10000",
+        "expExitCode": "0",
+        "verifyCmd": "$TC class show dev $DUMMY",
+        "matchPattern": "class hfsc 1:1 parent 1: rt m1 0bit d 0us m2 20Kbit ls m1 0bit d 0us m2 10Kbit",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "9312",
+        "name": "Create HFSC with class rt umax and dmax setting",
+        "category": [
+            "qdisc",
+            "hfsc"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true",
+            "$TC qdisc add dev $DUMMY handle 1: root hfsc default 11"
+        ],
+        "cmdUnderTest": "$TC class add dev $DUMMY parent 1: classid 1:1 hfsc rt umax 1540 dmax 5ms rate 10000 ls rate 10000",
+        "expExitCode": "0",
+        "verifyCmd": "$TC class show dev $DUMMY",
+        "matchPattern": "class hfsc 1:1 parent 1: rt m1 2464Kbit d 5ms m2 10Kbit ls m1 0bit d 0us m2 10Kbit",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "6931",
+        "name": "Delete HFSC with handle",
+        "category": [
+            "qdisc",
+            "hfsc"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true",
+            "$TC qdisc add dev $DUMMY handle 1: root hfsc default 11"
+        ],
+        "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 1: root",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc hfsc 1: root refcnt [0-9]+",
+        "matchCount": "0",
+        "teardown": [
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "8436",
+        "name": "Show HFSC class",
+        "category": [
+            "qdisc",
+            "hfsc"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root hfsc",
+        "expExitCode": "0",
+        "verifyCmd": "$TC class show dev $DUMMY",
+        "matchPattern": "class hfsc 1: root",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    }
+]
-- 
cgit v1.2.3


From 68135f6362181bd093be7ff45362acec79c2cc1b Mon Sep 17 00:00:00 2001
From: Zhengchao Shao <shaozhengchao@huawei.com>
Date: Wed, 21 Sep 2022 10:43:02 +0800
Subject: selftests/tc-testing: add selftests for htb qdisc

Test 0904: Create HTB with default setting
Test 3906: Create HTB with default-N setting
Test 8492: Create HTB with r2q setting
Test 9502: Create HTB with direct_qlen setting
Test b924: Create HTB with class rate and burst setting
Test 4359: Create HTB with class mpu setting
Test 9048: Create HTB with class prio setting
Test 4994: Create HTB with class ceil setting
Test 9523: Create HTB with class cburst setting
Test 5353: Create HTB with class mtu setting
Test 346a: Create HTB with class quantum setting
Test 303a: Delete HTB with handle

Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
Reviewed-by: Victor Nogueira <victor@mojatatu.com>
Tested-by: Victor Nogueira <victor@mojatatu.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/tc-testing/tc-tests/qdiscs/htb.json  | 285 +++++++++++++++++++++
 1 file changed, 285 insertions(+)
 create mode 100644 tools/testing/selftests/tc-testing/tc-tests/qdiscs/htb.json

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/htb.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/htb.json
new file mode 100644
index 000000000000..9529899482e0
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/htb.json
@@ -0,0 +1,285 @@
+[
+    {
+        "id": "0904",
+        "name": "Create HTB with default setting",
+        "category": [
+            "qdisc",
+            "htb"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root htb",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc htb 1: root refcnt [0-9]+ r2q 10 default 0 direct_packets_stat.*direct_qlen",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "3906",
+        "name": "Create HTB with default-N setting",
+        "category": [
+            "qdisc",
+            "htb"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root htb default 10",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc htb 1: root refcnt [0-9]+ r2q 10 default 0x10 direct_packets_stat.* direct_qlen",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "8492",
+        "name": "Create HTB with r2q setting",
+        "category": [
+            "qdisc",
+            "htb"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root htb r2q 5",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc htb 1: root refcnt [0-9]+ r2q 5 default 0 direct_packets_stat.*direct_qlen",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "9502",
+        "name": "Create HTB with direct_qlen setting",
+        "category": [
+            "qdisc",
+            "htb"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root htb direct_qlen 1024",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc htb 1: root refcnt [0-9]+ r2q 10 default 0 direct_packets_stat.*direct_qlen 1024",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "b924",
+        "name": "Create HTB with class rate and burst setting",
+        "category": [
+            "qdisc",
+            "htb"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true",
+            "$TC qdisc add dev $DUMMY handle 1: root htb"
+        ],
+        "cmdUnderTest": "$TC class add dev $DUMMY parent 1: classid 1:1 htb rate 20kbit burst 1000",
+        "expExitCode": "0",
+        "verifyCmd": "$TC class show dev $DUMMY",
+        "matchPattern": "class htb 1:1 root prio 0 rate 20Kbit ceil 20Kbit burst 1000b cburst 1600b",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "4359",
+        "name": "Create HTB with class mpu setting",
+        "category": [
+            "qdisc",
+            "htb"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true",
+            "$TC qdisc add dev $DUMMY handle 1: root htb"
+        ],
+        "cmdUnderTest": "$TC class add dev $DUMMY parent 1: classid 1:1 htb rate 20Kbit mpu 64",
+        "expExitCode": "0",
+        "verifyCmd": "$TC class show dev $DUMMY",
+        "matchPattern": "class htb 1:1 root prio 0 rate 20Kbit ceil 20Kbit burst 1600b cburst 1600b",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "9048",
+        "name": "Create HTB with class prio setting",
+        "category": [
+            "qdisc",
+            "htb"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true",
+            "$TC qdisc add dev $DUMMY handle 1: root htb"
+        ],
+        "cmdUnderTest": "$TC class add dev $DUMMY parent 1: classid 1:1 htb rate 20Kbit prio 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC class show dev $DUMMY",
+        "matchPattern": "class htb 1:1 root prio 1 rate 20Kbit ceil 20Kbit burst 1600b cburst 1600b",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "4994",
+        "name": "Create HTB with class ceil setting",
+        "category": [
+            "qdisc",
+            "htb"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true",
+            "$TC qdisc add dev $DUMMY handle 1: root htb"
+        ],
+        "cmdUnderTest": "$TC class add dev $DUMMY parent 1: classid 1:1 htb rate 20Kbit ceil 10Kbit",
+        "expExitCode": "0",
+        "verifyCmd": "$TC class show dev $DUMMY",
+        "matchPattern": "class htb 1:1 root prio 0 rate 20Kbit ceil 10Kbit burst 1600b cburst 1600b",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "9523",
+        "name": "Create HTB with class cburst setting",
+        "category": [
+            "qdisc",
+            "htb"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true",
+            "$TC qdisc add dev $DUMMY handle 1: root htb"
+        ],
+        "cmdUnderTest": "$TC class add dev $DUMMY parent 1: classid 1:1 htb rate 20Kbit cburst 2000",
+        "expExitCode": "0",
+        "verifyCmd": "$TC class show dev $DUMMY",
+        "matchPattern": "class htb 1:1 root prio 0 rate 20Kbit ceil 20Kbit burst 1600b cburst 2000b",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "5353",
+        "name": "Create HTB with class mtu setting",
+        "category": [
+            "qdisc",
+            "htb"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true",
+            "$TC qdisc add dev $DUMMY handle 1: root htb"
+        ],
+        "cmdUnderTest": "$TC class add dev $DUMMY parent 1: classid 1:1 htb rate 20Kbit mtu 2048",
+        "expExitCode": "0",
+        "verifyCmd": "$TC class show dev $DUMMY",
+        "matchPattern": "class htb 1:1 root prio 0 rate 20Kbit ceil 20Kbit burst 2Kb cburst 2Kb",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "346a",
+        "name": "Create HTB with class quantum setting",
+        "category": [
+            "qdisc",
+            "htb"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true",
+            "$TC qdisc add dev $DUMMY handle 1: root htb"
+        ],
+        "cmdUnderTest": "$TC class add dev $DUMMY parent 1: classid 1:1 htb rate 20Kbit quantum 2048",
+        "expExitCode": "0",
+        "verifyCmd": "$TC class show dev $DUMMY",
+        "matchPattern": "class htb 1:1 root prio 0 rate 20Kbit ceil 20Kbit burst 1600b cburst 1600b",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "303a",
+        "name": "Delete HTB with handle",
+        "category": [
+            "qdisc",
+            "htb"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true",
+            "$TC qdisc add dev $DUMMY handle 1: root htb r2q 5"
+        ],
+        "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 1: root",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc htb 1: root refcnt [0-9]+",
+        "matchCount": "0",
+        "teardown": [
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    }
+]
-- 
cgit v1.2.3


From 8ab00f8b5e294c5c5206ab10ac2a7d94ecba9892 Mon Sep 17 00:00:00 2001
From: Zhengchao Shao <shaozhengchao@huawei.com>
Date: Wed, 21 Sep 2022 10:43:12 +0800
Subject: selftests/tc-testing: add selftests for mqprio qdisc

Test 9903: Add mqprio Qdisc to multi-queue device (8 queues)
Test 453a: Delete nonexistent mqprio Qdisc
Test 5292: Delete mqprio Qdisc twice
Test 45a9: Add mqprio Qdisc to single-queue device
Test 2ba9: Show mqprio class

Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
Reviewed-by: Victor Nogueira <victor@mojatatu.com>
Tested-by: Victor Nogueira <victor@mojatatu.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../tc-testing/tc-tests/qdiscs/mqprio.json         | 114 +++++++++++++++++++++
 1 file changed, 114 insertions(+)
 create mode 100644 tools/testing/selftests/tc-testing/tc-tests/qdiscs/mqprio.json

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/mqprio.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/mqprio.json
new file mode 100644
index 000000000000..6e1973f731e9
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/mqprio.json
@@ -0,0 +1,114 @@
+[
+    {
+        "id": "9903",
+        "name": "Add mqprio Qdisc to multi-queue device (8 queues)",
+        "category": [
+            "qdisc",
+            "mqprio"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "echo \"1 1 8\" > /sys/bus/netdevsim/new_device"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mqprio num_tc 8 map 0 1 2 3 4 5 6 7 queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 hw 0",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $ETH",
+        "matchPattern": "qdisc mqprio 1: root tc 8 map 0 1 2 3 4 5 6 7 0 0 0 0 0 0 0 0.*queues:\\(0:0\\) \\(1:1\\) \\(2:2\\) \\(3:3\\) \\(4:4\\) \\(5:5\\) \\(6:6\\) \\(7:7\\)",
+        "matchCount": "1",
+        "teardown": [
+            "echo \"1\" > /sys/bus/netdevsim/del_device"
+        ]
+    },
+    {
+        "id": "453a",
+        "name": "Delete nonexistent mqprio Qdisc",
+        "category": [
+            "qdisc",
+            "mqprio"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "echo \"1 1 4\" > /sys/bus/netdevsim/new_device"
+        ],
+        "cmdUnderTest": "$TC qdisc del dev $ETH root handle 1: mqprio",
+        "expExitCode": "2",
+        "verifyCmd": "$TC qdisc show dev $ETH",
+        "matchPattern": "qdisc mqprio 1: root",
+        "matchCount": "0",
+        "teardown": [
+            "echo \"1\" > /sys/bus/netdevsim/del_device"
+        ]
+    },
+    {
+        "id": "5292",
+        "name": "Delete mqprio Qdisc twice",
+        "category": [
+            "qdisc",
+            "mqprio"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "echo \"1 1 8\" > /sys/bus/netdevsim/new_device",
+            "$TC qdisc add dev $ETH root handle 1: mqprio num_tc 8 map 0 1 2 3 4 5 6 7 queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 hw 0",
+            "$TC qdisc del dev $ETH root handle 1:"
+        ],
+        "cmdUnderTest": "$TC qdisc del dev $ETH root handle 1:",
+        "expExitCode": "2",
+        "verifyCmd": "$TC qdisc show dev $ETH",
+        "matchPattern": "qdisc mqprio 1: root",
+        "matchCount": "0",
+        "teardown": [
+            "echo \"1\" > /sys/bus/netdevsim/del_device"
+        ]
+    },
+    {
+        "id": "45a9",
+        "name": "Add mqprio Qdisc to single-queue device",
+        "category": [
+            "qdisc",
+            "mqprio"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "echo \"1 1\" > /sys/bus/netdevsim/new_device"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mqprio num_tc 8 map 0 1 2 3 4 5 6 7 queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 hw 0",
+        "expExitCode": "2",
+        "verifyCmd": "$TC qdisc show dev $ETH",
+        "matchPattern": "qdisc mqprio 1: root",
+        "matchCount": "0",
+        "teardown": [
+            "echo \"1\" > /sys/bus/netdevsim/del_device"
+        ]
+    },
+    {
+        "id": "2ba9",
+        "name": "Show mqprio class",
+        "category": [
+            "qdisc",
+            "mqprio"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "echo \"1 1 8\" > /sys/bus/netdevsim/new_device"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mqprio num_tc 8 map 0 1 2 3 4 5 6 7 queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 hw 0",
+        "expExitCode": "0",
+        "verifyCmd": "$TC class show dev $ETH",
+        "matchPattern": "class mqprio 1:",
+        "matchCount": "16",
+        "teardown": [
+            "echo \"1\" > /sys/bus/netdevsim/del_device"
+        ]
+    }
+]
-- 
cgit v1.2.3


From e4c4bcb0e4eef5f35b39f814c54787fa3bef5400 Mon Sep 17 00:00:00 2001
From: Zhengchao Shao <shaozhengchao@huawei.com>
Date: Wed, 21 Sep 2022 10:43:21 +0800
Subject: selftests/tc-testing: add selftests for multiq qdisc

Test 20ba: Add multiq Qdisc to multi-queue device (8 queues)
Test 4301: List multiq Class
Test 7832: Delete nonexistent multiq Qdisc
Test 2891: Delete multiq Qdisc twice
Test 1329: Add multiq Qdisc to single-queue device

Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
Reviewed-by: Victor Nogueira <victor@mojatatu.com>
Tested-by: Victor Nogueira <victor@mojatatu.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../tc-testing/tc-tests/qdiscs/multiq.json         | 114 +++++++++++++++++++++
 1 file changed, 114 insertions(+)
 create mode 100644 tools/testing/selftests/tc-testing/tc-tests/qdiscs/multiq.json

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/multiq.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/multiq.json
new file mode 100644
index 000000000000..12c0af7a145d
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/multiq.json
@@ -0,0 +1,114 @@
+[
+    {
+        "id": "20ba",
+        "name": "Add multiq Qdisc to multi-queue device (8 queues)",
+        "category": [
+            "qdisc",
+            "multiq"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "echo \"1 1 8\" > /sys/bus/netdevsim/new_device"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: multiq",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $ETH",
+        "matchPattern": "qdisc multiq 1: root refcnt [0-9]+ bands 8",
+        "matchCount": "1",
+        "teardown": [
+            "echo \"1\" > /sys/bus/netdevsim/del_device"
+        ]
+    },
+    {
+        "id": "4301",
+        "name": "List multiq Class",
+        "category": [
+            "qdisc",
+            "multiq"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "echo \"1 1 8\" > /sys/bus/netdevsim/new_device"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: multiq",
+        "expExitCode": "0",
+        "verifyCmd": "$TC class show dev $ETH",
+        "matchPattern": "class multiq 1:[0-9]+ parent 1:",
+        "matchCount": "8",
+        "teardown": [
+            "echo \"1\" > /sys/bus/netdevsim/del_device"
+        ]
+    },
+    {
+        "id": "7832",
+        "name": "Delete nonexistent multiq Qdisc",
+        "category": [
+            "qdisc",
+            "multiq"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "echo \"1 1 4\" > /sys/bus/netdevsim/new_device"
+        ],
+        "cmdUnderTest": "$TC qdisc del dev $ETH root handle 1: multiq",
+        "expExitCode": "2",
+        "verifyCmd": "$TC qdisc show dev $ETH",
+        "matchPattern": "qdisc multiq 1: root",
+        "matchCount": "0",
+        "teardown": [
+            "echo \"1\" > /sys/bus/netdevsim/del_device"
+        ]
+    },
+    {
+        "id": "2891",
+        "name": "Delete multiq Qdisc twice",
+        "category": [
+            "qdisc",
+            "multiq"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "echo \"1 1 8\" > /sys/bus/netdevsim/new_device",
+            "$TC qdisc add dev $ETH root handle 1: multiq",
+            "$TC qdisc del dev $ETH root handle 1:"
+        ],
+        "cmdUnderTest": "$TC qdisc del dev $ETH root handle 1:",
+        "expExitCode": "2",
+        "verifyCmd": "$TC qdisc show dev $ETH",
+        "matchPattern": "qdisc mqprio 1: root",
+        "matchCount": "0",
+        "teardown": [
+            "echo \"1\" > /sys/bus/netdevsim/del_device"
+        ]
+    },
+    {
+        "id": "1329",
+        "name": "Add multiq Qdisc to single-queue device",
+        "category": [
+            "qdisc",
+            "multiq"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "echo \"1 1\" > /sys/bus/netdevsim/new_device"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: multiq",
+        "expExitCode": "2",
+        "verifyCmd": "$TC qdisc show dev $ETH",
+        "matchPattern": "qdisc multiq 1: root",
+        "matchCount": "0",
+        "teardown": [
+            "echo \"1\" > /sys/bus/netdevsim/del_device"
+        ]
+    }
+]
-- 
cgit v1.2.3


From 779f966f16db428005b533871c3b22b262b145aa Mon Sep 17 00:00:00 2001
From: Zhengchao Shao <shaozhengchao@huawei.com>
Date: Wed, 21 Sep 2022 10:43:33 +0800
Subject: selftests/tc-testing: add selftests for netem qdisc

Test cb28: Create NETEM with default setting
Test a089: Create NETEM with limit flag
Test 3449: Create NETEM with delay time
Test 3782: Create NETEM with distribution and corrupt flag
Test 2b82: Create NETEM with distribution and duplicate flag
Test a932: Create NETEM with distribution and loss flag
Test e01a: Create NETEM with distribution and loss state flag
Test ba29: Create NETEM with loss gemodel flag
Test 0492: Create NETEM with reorder flag
Test 7862: Create NETEM with rate limit
Test 7235: Create NETEM with multiple slot rate
Test 5439: Create NETEM with multiple slot setting
Test 5029: Change NETEM with loss state
Test 3785: Replace NETEM with delay time
Test 4502: Delete NETEM with handle
Test 0785: Show NETEM class

Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
Reviewed-by: Victor Nogueira <victor@mojatatu.com>
Tested-by: Victor Nogueira <victor@mojatatu.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../tc-testing/tc-tests/qdiscs/netem.json          | 372 +++++++++++++++++++++
 1 file changed, 372 insertions(+)
 create mode 100644 tools/testing/selftests/tc-testing/tc-tests/qdiscs/netem.json

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/netem.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/netem.json
new file mode 100644
index 000000000000..7e41f548f8e8
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/netem.json
@@ -0,0 +1,372 @@
+[
+    {
+        "id": "cb28",
+        "name": "Create NETEM with default setting",
+        "category": [
+            "qdisc",
+            "netem"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root netem",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc netem 1: root refcnt [0-9]+ limit",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "a089",
+        "name": "Create NETEM with limit flag",
+        "category": [
+            "qdisc",
+            "netem"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root netem limit 200",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc netem 1: root refcnt [0-9]+ limit 200",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "3449",
+        "name": "Create NETEM with delay time",
+        "category": [
+            "qdisc",
+            "netem"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root netem delay 100ms",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc netem 1: root refcnt [0-9]+ .*delay 100ms",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "3782",
+        "name": "Create NETEM with distribution and corrupt flag",
+        "category": [
+            "qdisc",
+            "netem"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root netem delay 100ms 10ms distribution normal corrupt 1%",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc netem 1: root refcnt [0-9]+ .*delay 100ms  10ms corrupt 1%",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "2b82",
+        "name": "Create NETEM with distribution and duplicate flag",
+        "category": [
+            "qdisc",
+            "netem"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root netem delay 100ms 10ms distribution normal duplicate 1%",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc netem 1: root refcnt [0-9]+ .*delay 100ms  10ms duplicate 1%",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "a932",
+        "name": "Create NETEM with distribution and loss flag",
+        "category": [
+            "qdisc",
+            "netem"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root netem delay 100ms 10ms distribution pareto loss 1%",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc netem 1: root refcnt [0-9]+ .*delay 100ms  10ms loss 1%",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "e01a",
+        "name": "Create NETEM with distribution and loss state flag",
+        "category": [
+            "qdisc",
+            "netem"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root netem delay 100ms 10ms distribution paretonormal loss state 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc netem 1: root refcnt [0-9]+ .*delay 100ms  10ms loss state p13 1% p31 99% p32 0% p23 100% p14 0%",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "ba29",
+        "name": "Create NETEM with loss gemodel flag",
+        "category": [
+            "qdisc",
+            "netem"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root netem loss gemodel 1%",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc netem 1: root refcnt [0-9]+ .*loss gemodel p 1%",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "0492",
+        "name": "Create NETEM with reorder flag",
+        "category": [
+            "qdisc",
+            "netem"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root netem delay 100ms 10ms reorder 2% gap 100",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc netem 1: root refcnt [0-9]+ .*reorder 2%",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "7862",
+        "name": "Create NETEM with rate limit",
+        "category": [
+            "qdisc",
+            "netem"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root netem rate 20000",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc netem 1: root refcnt [0-9]+ .*rate 20Kbit",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "7235",
+        "name": "Create NETEM with multiple slot rate",
+        "category": [
+            "qdisc",
+            "netem"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root netem slot 10 200 packets 2000 bytes 9000",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc netem 1: root refcnt [0-9]+ .*slot 10ns 200ns packets 2000 bytes 9000",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "5439",
+        "name": "Create NETEM with multiple slot setting",
+        "category": [
+            "qdisc",
+            "netem"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root netem slot distribution pareto 1ms 0.1ms",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc netem 1: root refcnt [0-9]+ .*slot distribution 1ms 100us",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "5029",
+        "name": "Change NETEM with loss state",
+        "category": [
+            "qdisc",
+            "netem"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true",
+            "$TC qdisc add dev $DUMMY handle 1: root netem delay 100ms 10ms distribution normal loss 1%"
+        ],
+        "cmdUnderTest": "$TC qdisc change dev $DUMMY handle 1: root netem delay 100ms 10ms distribution normal loss 2%",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc netem 1: root refcnt [0-9]+ .*loss 2%",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "3785",
+        "name": "Replace NETEM with delay time",
+        "category": [
+            "qdisc",
+            "netem"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true",
+            "$TC qdisc add dev $DUMMY handle 1: root netem delay 100ms 10ms distribution normal loss 1%"
+        ],
+        "cmdUnderTest": "$TC qdisc replace dev $DUMMY handle 1: root netem delay 200ms 10ms",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc netem 1: root refcnt [0-9]+ .*delay 200ms  10ms",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "4502",
+        "name": "Delete NETEM with handle",
+        "category": [
+            "qdisc",
+            "netem"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true",
+            "$TC qdisc add dev $DUMMY handle 1: root netem delay 100ms 10ms distribution normal"
+        ],
+        "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 1: root",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc netem 1: root refcnt [0-9]+ .*delay 100ms  10ms",
+        "matchCount": "0",
+        "teardown": [
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "0785",
+        "name": "Show NETEM class",
+        "category": [
+            "qdisc",
+            "netem"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root netem",
+        "expExitCode": "0",
+        "verifyCmd": "$TC class show dev $DUMMY",
+        "matchPattern": "class netem 1:",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    }
+]
-- 
cgit v1.2.3


From 856359c0d067654c83b8db09b413361846737100 Mon Sep 17 00:00:00 2001
From: Zhengchao Shao <shaozhengchao@huawei.com>
Date: Wed, 21 Sep 2022 10:43:44 +0800
Subject: selftests/tc-testing: add selftests for qfq qdisc

Test 0582: Create QFQ with default setting
Test c9a3: Create QFQ with class weight setting
Test 8452: Create QFQ with class maxpkt setting
Test d920: Create QFQ with multiple class setting
Test 0548: Delete QFQ with handle
Test 5901: Show QFQ class

Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
Reviewed-by: Victor Nogueira <victor@mojatatu.com>
Tested-by: Victor Nogueira <victor@mojatatu.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/tc-testing/tc-tests/qdiscs/qfq.json  | 145 +++++++++++++++++++++
 1 file changed, 145 insertions(+)
 create mode 100644 tools/testing/selftests/tc-testing/tc-tests/qdiscs/qfq.json

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/qfq.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/qfq.json
new file mode 100644
index 000000000000..330f1a25e0ab
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/qfq.json
@@ -0,0 +1,145 @@
+[
+    {
+        "id": "0582",
+        "name": "Create QFQ with default setting",
+        "category": [
+            "qdisc",
+            "qfq"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root qfq",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc qfq 1: root refcnt [0-9]+",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "c9a3",
+        "name": "Create QFQ with class weight setting",
+        "category": [
+            "qdisc",
+            "qfq"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true",
+            "$TC qdisc add dev $DUMMY handle 1: root qfq"
+        ],
+        "cmdUnderTest": "$TC class add dev $DUMMY parent 1: classid 1:1 qfq weight 100",
+        "expExitCode": "0",
+        "verifyCmd": "$TC class show dev $DUMMY",
+        "matchPattern": "class qfq 1:1 root weight 100 maxpkt",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "8452",
+        "name": "Create QFQ with class maxpkt setting",
+        "category": [
+            "qdisc",
+            "qfq"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true",
+            "$TC qdisc add dev $DUMMY handle 1: root qfq"
+        ],
+        "cmdUnderTest": "$TC class add dev $DUMMY parent 1: classid 1:1 qfq maxpkt 2000",
+        "expExitCode": "0",
+        "verifyCmd": "$TC class show dev $DUMMY",
+        "matchPattern": "class qfq 1:1 root weight 1 maxpkt 2000",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "d920",
+        "name": "Create QFQ with multiple class setting",
+        "category": [
+            "qdisc",
+            "qfq"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true",
+            "$TC qdisc add dev $DUMMY handle 1: root qfq",
+            "$TC class add dev $DUMMY parent 1: classid 1:1 qfq weight 100"
+        ],
+        "cmdUnderTest": "$TC class add dev $DUMMY parent 1: classid 1:2 qfq weight 200",
+        "expExitCode": "0",
+        "verifyCmd": "$TC class show dev $DUMMY",
+        "matchPattern": "class qfq 1:[0-9]+ root weight [0-9]+00 maxpkt",
+        "matchCount": "2",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "0548",
+        "name": "Delete QFQ with handle",
+        "category": [
+            "qdisc",
+            "qfq"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true",
+            "$TC qdisc add dev $DUMMY handle 1: root qfq",
+            "$TC class add dev $DUMMY parent 1: classid 1:1 qfq weight 100"
+        ],
+        "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 1: root",
+        "expExitCode": "0",
+        "verifyCmd": "$TC class show dev $DUMMY",
+        "matchPattern": "qdisc qfq 1: root refcnt [0-9]+",
+        "matchCount": "0",
+        "teardown": [
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "5901",
+        "name": "Show QFQ class",
+        "category": [
+            "qdisc",
+            "qfq"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root qfq",
+        "expExitCode": "0",
+        "verifyCmd": "$TC class show dev $DUMMY",
+        "matchPattern": "class qfq 1:",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    }
+]
-- 
cgit v1.2.3


From 5ca72fbeabede09ca5d8b703defdc8ccdfbe5892 Mon Sep 17 00:00:00 2001
From: Zhengchao Shao <shaozhengchao@huawei.com>
Date: Wed, 21 Sep 2022 10:43:54 +0800
Subject: selftests/tc-testing: add show class case for ingress qdisc

Test 0521: Show ingress class

Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
Reviewed-by: Victor Nogueira <victor@mojatatu.com>
Tested-by: Victor Nogueira <victor@mojatatu.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../tc-testing/tc-tests/qdiscs/ingress.json          | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/ingress.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/ingress.json
index d99dba6e2b1a..11d33362408c 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/ingress.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/ingress.json
@@ -98,5 +98,25 @@
         "teardown": [
             "$IP link del dev $DUMMY type dummy"
         ]
+    },
+    {
+        "id": "0521",
+        "name": "Show ingress class",
+        "category": [
+            "qdisc",
+            "ingress"
+        ],
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY ingress",
+        "expExitCode": "0",
+        "verifyCmd": "$TC class show dev $DUMMY",
+        "matchPattern": "class ingress",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY ingress",
+            "$IP link del dev $DUMMY type dummy"
+        ]
     }
 ]
-- 
cgit v1.2.3


From dfbadd7f9945429068f10343eee49d635adad0cd Mon Sep 17 00:00:00 2001
From: Zhengchao Shao <shaozhengchao@huawei.com>
Date: Wed, 21 Sep 2022 10:44:04 +0800
Subject: selftests/tc-testing: add show class case for mq qdisc

Test 1023: Show mq class

Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
Reviewed-by: Victor Nogueira <victor@mojatatu.com>
Tested-by: Victor Nogueira <victor@mojatatu.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/tc-testing/tc-tests/qdiscs/mq.json   | 24 +++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json
index c6046096d9db..44fbfc6caec7 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json
@@ -133,5 +133,27 @@
 	    "teardown": [
 		    "echo \"1\" > /sys/bus/netdevsim/del_device"
 	    ]
-	}
+	},
+        {
+            "id": "1023",
+            "name": "Show mq class",
+            "category": [
+                "qdisc",
+                "mq"
+            ],
+            "plugins": {
+                "requires": "nsPlugin"
+            },
+            "setup": [
+                "echo \"1 1 4\" > /sys/bus/netdevsim/new_device"
+            ],
+            "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq",
+            "expExitCode": "0",
+            "verifyCmd": "$TC class show dev $ETH",
+            "matchPattern": "class mq 1:[0-9]+ root",
+            "matchCount": "4",
+            "teardown": [
+                "echo \"1\" > /sys/bus/netdevsim/del_device"
+            ]
+        }
 ]
-- 
cgit v1.2.3


From 1c15eb2a03c679d8188b22dd4f07ae1bfdb9ac1b Mon Sep 17 00:00:00 2001
From: Zhengchao Shao <shaozhengchao@huawei.com>
Date: Wed, 21 Sep 2022 10:44:15 +0800
Subject: selftests/tc-testing: add show class case for prio qdisc

Test 2410: Show prio class

Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
Reviewed-by: Victor Nogueira <victor@mojatatu.com>
Tested-by: Victor Nogueira <victor@mojatatu.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/tc-testing/tc-tests/qdiscs/prio.json   | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/prio.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/prio.json
index 3076c02d08d6..8186de2f0dcf 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/prio.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/prio.json
@@ -272,5 +272,25 @@
         "teardown": [
             "$IP link del dev $DUMMY type dummy"
         ]
+    },
+    {
+        "id": "2410",
+        "name": "Show prio class",
+        "category": [
+            "qdisc",
+            "prio"
+        ],
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root prio",
+        "expExitCode": "0",
+        "verifyCmd": "$TC class show dev $DUMMY",
+        "matchPattern": "class prio 1:[0-9]+ parent 1:",
+        "matchCount": "3",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root prio",
+            "$IP link del dev $DUMMY type dummy"
+        ]
     }
 ]
-- 
cgit v1.2.3


From d3f832547bb2981a96f498e007c100703398de53 Mon Sep 17 00:00:00 2001
From: Zhengchao Shao <shaozhengchao@huawei.com>
Date: Wed, 21 Sep 2022 10:44:25 +0800
Subject: selftests/tc-testing: add show class case for red qdisc

Test 290a: Show RED class

Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
Reviewed-by: Victor Nogueira <victor@mojatatu.com>
Tested-by: Victor Nogueira <victor@mojatatu.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/tc-testing/tc-tests/qdiscs/red.json  | 23 ++++++++++++++++++++++
 1 file changed, 23 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/red.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/red.json
index 0703a2a255eb..4b3e449857f2 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/red.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/red.json
@@ -181,5 +181,28 @@
             "$TC qdisc del dev $DUMMY handle 1: root",
             "$IP link del dev $DUMMY type dummy"
         ]
+    },
+    {
+        "id": "290a",
+        "name": "Show RED class",
+        "category": [
+            "qdisc",
+            "red"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root red limit 1M avpkt 1500 min 100K max 300K",
+        "expExitCode": "0",
+        "verifyCmd": "$TC class show dev $DUMMY",
+        "matchPattern": "class red 1:[0-9]+ parent 1:",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
     }
 ]
-- 
cgit v1.2.3


From aacdecda9eb4c65e5ed02c088f8fa4fd50bd327d Mon Sep 17 00:00:00 2001
From: Matthieu Baerts <matthieu.baerts@tessares.net>
Date: Fri, 23 Sep 2022 10:23:06 +0200
Subject: selftests/bonding: re-add lladdr target test

It looks like this test has been accidentally dropped when resolving
conflicts in this Makefile.

Most probably because there were 3 different patches modifying this file
in parallel:

  commit 152e8ec77640 ("selftests/bonding: add a test for bonding lladdr target")
  commit bbb774d921e2 ("net: Add tests for bonding and team address list management")
  commit 2ffd57327ff1 ("selftests: bonding: cause oops in bond_rr_gen_slave_id")

The first one was applied in 'net-next' while the two other ones were
recently applied in the 'net' tree.

But that's alright, easy to fix by re-adding the missing one!

Fixes: 0140a7168f8b ("Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net")
Signed-off-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Link: https://lore.kernel.org/r/20220923082306.2468081-1-matthieu.baerts@tessares.net
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/drivers/net/bonding/Makefile | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/drivers/net/bonding/Makefile b/tools/testing/selftests/drivers/net/bonding/Makefile
index d14846fcf3d1..e9dab5f9d773 100644
--- a/tools/testing/selftests/drivers/net/bonding/Makefile
+++ b/tools/testing/selftests/drivers/net/bonding/Makefile
@@ -4,6 +4,7 @@
 TEST_PROGS := \
 	bond-arp-interval-causes-panic.sh \
 	bond-break-lacpdu-tx.sh \
+	bond-lladdr-target.sh \
 	dev_addr_lists.sh
 
 TEST_FILES := lag_lib.sh
-- 
cgit v1.2.3


From e0401dce5e28fb7118dbfd055c77d94433778a85 Mon Sep 17 00:00:00 2001
From: Yosry Ahmed <yosryahmed@google.com>
Date: Mon, 19 Sep 2022 17:53:30 +0000
Subject: selftests/bpf: Simplify cgroup_hierarchical_stats selftest

The cgroup_hierarchical_stats selftest is complicated. It has to be,
because it tests an entire workflow of recording, aggregating, and
dumping cgroup stats. However, some of the complexity is unnecessary.
The test now enables the memory controller in a cgroup hierarchy, invokes
reclaim, measure reclaim time, THEN uses that reclaim time to test the
stats collection and aggregation. We don't need to use such a
complicated stat, as the context in which the stat is collected is
orthogonal.

Simplify the test by using a simple stat instead of reclaim time, the
total number of times a process has ever entered a cgroup. This makes
the test simpler and removes the dependency on the memory controller and
the memory reclaim interface.

Signed-off-by: Yosry Ahmed <yosryahmed@google.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: KP Singh <kpsingh@kernel.org>
Link: https://lore.kernel.org/bpf/20220919175330.890793-1-yosryahmed@google.com
---
 .../bpf/prog_tests/cgroup_hierarchical_stats.c     | 170 +++++++++----------
 .../bpf/progs/cgroup_hierarchical_stats.c          | 181 +++++++--------------
 2 files changed, 131 insertions(+), 220 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_hierarchical_stats.c b/tools/testing/selftests/bpf/prog_tests/cgroup_hierarchical_stats.c
index bed1661596f7..3bd27d2ea668 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_hierarchical_stats.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_hierarchical_stats.c
@@ -1,6 +1,22 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * Functions to manage eBPF programs attached to cgroup subsystems
+ * This test makes sure BPF stats collection using rstat works correctly.
+ * The test uses 3 BPF progs:
+ * (a) counter: This BPF prog is invoked every time we attach a process to a
+ *              cgroup and locklessly increments a percpu counter.
+ *              The program then calls cgroup_rstat_updated() to inform rstat
+ *              of an update on the (cpu, cgroup) pair.
+ *
+ * (b) flusher: This BPF prog is invoked when an rstat flush is ongoing, it
+ *              aggregates all percpu counters to a total counter, and also
+ *              propagates the changes to the ancestor cgroups.
+ *
+ * (c) dumper: This BPF prog is a cgroup_iter. It is used to output the total
+ *             counter of a cgroup through reading a file in userspace.
+ *
+ * The test sets up a cgroup hierarchy, and the above programs. It spawns a few
+ * processes in the leaf cgroups and makes sure all the counters are aggregated
+ * correctly.
  *
  * Copyright 2022 Google LLC.
  */
@@ -21,8 +37,10 @@
 #define PAGE_SIZE 4096
 #define MB(x) (x << 20)
 
+#define PROCESSES_PER_CGROUP 3
+
 #define BPFFS_ROOT "/sys/fs/bpf/"
-#define BPFFS_VMSCAN BPFFS_ROOT"vmscan/"
+#define BPFFS_ATTACH_COUNTERS BPFFS_ROOT "attach_counters/"
 
 #define CG_ROOT_NAME "root"
 #define CG_ROOT_ID 1
@@ -79,7 +97,7 @@ static int setup_bpffs(void)
 		return err;
 
 	/* Create a directory to contain stat files in bpffs */
-	err = mkdir(BPFFS_VMSCAN, 0755);
+	err = mkdir(BPFFS_ATTACH_COUNTERS, 0755);
 	if (!ASSERT_OK(err, "mkdir"))
 		return err;
 
@@ -89,7 +107,7 @@ static int setup_bpffs(void)
 static void cleanup_bpffs(void)
 {
 	/* Remove created directory in bpffs */
-	ASSERT_OK(rmdir(BPFFS_VMSCAN), "rmdir "BPFFS_VMSCAN);
+	ASSERT_OK(rmdir(BPFFS_ATTACH_COUNTERS), "rmdir "BPFFS_ATTACH_COUNTERS);
 
 	/* Unmount bpffs, if it wasn't already mounted when we started */
 	if (mounted_bpffs)
@@ -118,18 +136,6 @@ static int setup_cgroups(void)
 
 		cgroups[i].fd = fd;
 		cgroups[i].id = get_cgroup_id(cgroups[i].path);
-
-		/*
-		 * Enable memcg controller for the entire hierarchy.
-		 * Note that stats are collected for all cgroups in a hierarchy
-		 * with memcg enabled anyway, but are only exposed for cgroups
-		 * that have memcg enabled.
-		 */
-		if (i < N_NON_LEAF_CGROUPS) {
-			err = enable_controllers(cgroups[i].path, "memory");
-			if (!ASSERT_OK(err, "enable_controllers"))
-				return err;
-		}
 	}
 	return 0;
 }
@@ -154,109 +160,85 @@ static void destroy_hierarchy(void)
 	cleanup_bpffs();
 }
 
-static int reclaimer(const char *cgroup_path, size_t size)
-{
-	static char size_buf[128];
-	char *buf, *ptr;
-	int err;
-
-	/* Join cgroup in the parent process workdir */
-	if (join_parent_cgroup(cgroup_path))
-		return EACCES;
-
-	/* Allocate memory */
-	buf = malloc(size);
-	if (!buf)
-		return ENOMEM;
-
-	/* Write to memory to make sure it's actually allocated */
-	for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
-		*ptr = 1;
-
-	/* Try to reclaim memory */
-	snprintf(size_buf, 128, "%lu", size);
-	err = write_cgroup_file_parent(cgroup_path, "memory.reclaim", size_buf);
-
-	free(buf);
-	/* memory.reclaim returns EAGAIN if the amount is not fully reclaimed */
-	if (err && errno != EAGAIN)
-		return errno;
-
-	return 0;
-}
-
-static int induce_vmscan(void)
+static int attach_processes(void)
 {
-	int i, status;
+	int i, j, status;
 
-	/*
-	 * In every leaf cgroup, run a child process that allocates some memory
-	 * and attempts to reclaim some of it.
-	 */
+	/* In every leaf cgroup, attach 3 processes */
 	for (i = N_NON_LEAF_CGROUPS; i < N_CGROUPS; i++) {
-		pid_t pid;
-
-		/* Create reclaimer child */
-		pid = fork();
-		if (pid == 0) {
-			status = reclaimer(cgroups[i].path, MB(5));
-			exit(status);
+		for (j = 0; j < PROCESSES_PER_CGROUP; j++) {
+			pid_t pid;
+
+			/* Create child and attach to cgroup */
+			pid = fork();
+			if (pid == 0) {
+				if (join_parent_cgroup(cgroups[i].path))
+					exit(EACCES);
+				exit(0);
+			}
+
+			/* Cleanup child */
+			waitpid(pid, &status, 0);
+			if (!ASSERT_TRUE(WIFEXITED(status), "child process exited"))
+				return 1;
+			if (!ASSERT_EQ(WEXITSTATUS(status), 0,
+				       "child process exit code"))
+				return 1;
 		}
-
-		/* Cleanup reclaimer child */
-		waitpid(pid, &status, 0);
-		ASSERT_TRUE(WIFEXITED(status), "reclaimer exited");
-		ASSERT_EQ(WEXITSTATUS(status), 0, "reclaim exit code");
 	}
 	return 0;
 }
 
 static unsigned long long
-get_cgroup_vmscan_delay(unsigned long long cgroup_id, const char *file_name)
+get_attach_counter(unsigned long long cgroup_id, const char *file_name)
 {
-	unsigned long long vmscan = 0, id = 0;
+	unsigned long long attach_counter = 0, id = 0;
 	static char buf[128], path[128];
 
 	/* For every cgroup, read the file generated by cgroup_iter */
-	snprintf(path, 128, "%s%s", BPFFS_VMSCAN, file_name);
+	snprintf(path, 128, "%s%s", BPFFS_ATTACH_COUNTERS, file_name);
 	if (!ASSERT_OK(read_from_file(path, buf, 128), "read cgroup_iter"))
 		return 0;
 
 	/* Check the output file formatting */
-	ASSERT_EQ(sscanf(buf, "cg_id: %llu, total_vmscan_delay: %llu\n",
-			 &id, &vmscan), 2, "output format");
+	ASSERT_EQ(sscanf(buf, "cg_id: %llu, attach_counter: %llu\n",
+			 &id, &attach_counter), 2, "output format");
 
 	/* Check that the cgroup_id is displayed correctly */
 	ASSERT_EQ(id, cgroup_id, "cgroup_id");
-	/* Check that the vmscan reading is non-zero */
-	ASSERT_GT(vmscan, 0, "vmscan_reading");
-	return vmscan;
+	/* Check that the counter is non-zero */
+	ASSERT_GT(attach_counter, 0, "attach counter non-zero");
+	return attach_counter;
 }
 
-static void check_vmscan_stats(void)
+static void check_attach_counters(void)
 {
-	unsigned long long vmscan_readings[N_CGROUPS], vmscan_root;
+	unsigned long long attach_counters[N_CGROUPS], root_attach_counter;
 	int i;
 
-	for (i = 0; i < N_CGROUPS; i++) {
-		vmscan_readings[i] = get_cgroup_vmscan_delay(cgroups[i].id,
-							     cgroups[i].name);
-	}
+	for (i = 0; i < N_CGROUPS; i++)
+		attach_counters[i] = get_attach_counter(cgroups[i].id,
+							cgroups[i].name);
 
 	/* Read stats for root too */
-	vmscan_root = get_cgroup_vmscan_delay(CG_ROOT_ID, CG_ROOT_NAME);
+	root_attach_counter = get_attach_counter(CG_ROOT_ID, CG_ROOT_NAME);
+
+	/* Check that all leafs cgroups have an attach counter of 3 */
+	for (i = N_NON_LEAF_CGROUPS; i < N_CGROUPS; i++)
+		ASSERT_EQ(attach_counters[i], PROCESSES_PER_CGROUP,
+			  "leaf cgroup attach counter");
 
 	/* Check that child1 == child1_1 + child1_2 */
-	ASSERT_EQ(vmscan_readings[1], vmscan_readings[3] + vmscan_readings[4],
-		  "child1_vmscan");
+	ASSERT_EQ(attach_counters[1], attach_counters[3] + attach_counters[4],
+		  "child1_counter");
 	/* Check that child2 == child2_1 + child2_2 */
-	ASSERT_EQ(vmscan_readings[2], vmscan_readings[5] + vmscan_readings[6],
-		  "child2_vmscan");
+	ASSERT_EQ(attach_counters[2], attach_counters[5] + attach_counters[6],
+		  "child2_counter");
 	/* Check that test == child1 + child2 */
-	ASSERT_EQ(vmscan_readings[0], vmscan_readings[1] + vmscan_readings[2],
-		  "test_vmscan");
+	ASSERT_EQ(attach_counters[0], attach_counters[1] + attach_counters[2],
+		  "test_counter");
 	/* Check that root >= test */
-	ASSERT_GE(vmscan_root, vmscan_readings[1], "root_vmscan");
+	ASSERT_GE(root_attach_counter, attach_counters[1], "root_counter");
 }
 
 /* Creates iter link and pins in bpffs, returns 0 on success, -errno on failure.
@@ -278,12 +260,12 @@ static int setup_cgroup_iter(struct cgroup_hierarchical_stats *obj,
 	linfo.cgroup.order = BPF_CGROUP_ITER_SELF_ONLY;
 	opts.link_info = &linfo;
 	opts.link_info_len = sizeof(linfo);
-	link = bpf_program__attach_iter(obj->progs.dump_vmscan, &opts);
+	link = bpf_program__attach_iter(obj->progs.dumper, &opts);
 	if (!ASSERT_OK_PTR(link, "attach_iter"))
 		return -EFAULT;
 
 	/* Pin the link to a bpffs file */
-	snprintf(path, 128, "%s%s", BPFFS_VMSCAN, file_name);
+	snprintf(path, 128, "%s%s", BPFFS_ATTACH_COUNTERS, file_name);
 	err = bpf_link__pin(link, path);
 	ASSERT_OK(err, "pin cgroup_iter");
 
@@ -313,7 +295,7 @@ static int setup_progs(struct cgroup_hierarchical_stats **skel)
 	if (!ASSERT_OK(err, "setup_cgroup_iter"))
 		return err;
 
-	bpf_program__set_autoattach((*skel)->progs.dump_vmscan, false);
+	bpf_program__set_autoattach((*skel)->progs.dumper, false);
 	err = cgroup_hierarchical_stats__attach(*skel);
 	if (!ASSERT_OK(err, "attach"))
 		return err;
@@ -328,13 +310,13 @@ static void destroy_progs(struct cgroup_hierarchical_stats *skel)
 
 	for (i = 0; i < N_CGROUPS; i++) {
 		/* Delete files in bpffs that cgroup_iters are pinned in */
-		snprintf(path, 128, "%s%s", BPFFS_VMSCAN,
+		snprintf(path, 128, "%s%s", BPFFS_ATTACH_COUNTERS,
 			 cgroups[i].name);
 		ASSERT_OK(remove(path), "remove cgroup_iter pin");
 	}
 
 	/* Delete root file in bpffs */
-	snprintf(path, 128, "%s%s", BPFFS_VMSCAN, CG_ROOT_NAME);
+	snprintf(path, 128, "%s%s", BPFFS_ATTACH_COUNTERS, CG_ROOT_NAME);
 	ASSERT_OK(remove(path), "remove cgroup_iter root pin");
 	cgroup_hierarchical_stats__destroy(skel);
 }
@@ -347,9 +329,9 @@ void test_cgroup_hierarchical_stats(void)
 		goto hierarchy_cleanup;
 	if (setup_progs(&skel))
 		goto cleanup;
-	if (induce_vmscan())
+	if (attach_processes())
 		goto cleanup;
-	check_vmscan_stats();
+	check_attach_counters();
 cleanup:
 	destroy_progs(skel);
 hierarchy_cleanup:
diff --git a/tools/testing/selftests/bpf/progs/cgroup_hierarchical_stats.c b/tools/testing/selftests/bpf/progs/cgroup_hierarchical_stats.c
index 8ab4253a1592..c74362854948 100644
--- a/tools/testing/selftests/bpf/progs/cgroup_hierarchical_stats.c
+++ b/tools/testing/selftests/bpf/progs/cgroup_hierarchical_stats.c
@@ -1,7 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * Functions to manage eBPF programs attached to cgroup subsystems
- *
  * Copyright 2022 Google LLC.
  */
 #include "vmlinux.h"
@@ -11,25 +9,14 @@
 
 char _license[] SEC("license") = "GPL";
 
-/*
- * Start times are stored per-task, not per-cgroup, as multiple tasks in one
- * cgroup can perform reclaim concurrently.
- */
-struct {
-	__uint(type, BPF_MAP_TYPE_TASK_STORAGE);
-	__uint(map_flags, BPF_F_NO_PREALLOC);
-	__type(key, int);
-	__type(value, __u64);
-} vmscan_start_time SEC(".maps");
-
-struct vmscan_percpu {
+struct percpu_attach_counter {
 	/* Previous percpu state, to figure out if we have new updates */
 	__u64 prev;
 	/* Current percpu state */
 	__u64 state;
 };
 
-struct vmscan {
+struct attach_counter {
 	/* State propagated through children, pending aggregation */
 	__u64 pending;
 	/* Total state, including all cpus and all children */
@@ -38,147 +25,94 @@ struct vmscan {
 
 struct {
 	__uint(type, BPF_MAP_TYPE_PERCPU_HASH);
-	__uint(max_entries, 100);
+	__uint(max_entries, 1024);
 	__type(key, __u64);
-	__type(value, struct vmscan_percpu);
-} pcpu_cgroup_vmscan_elapsed SEC(".maps");
+	__type(value, struct percpu_attach_counter);
+} percpu_attach_counters SEC(".maps");
 
 struct {
 	__uint(type, BPF_MAP_TYPE_HASH);
-	__uint(max_entries, 100);
+	__uint(max_entries, 1024);
 	__type(key, __u64);
-	__type(value, struct vmscan);
-} cgroup_vmscan_elapsed SEC(".maps");
+	__type(value, struct attach_counter);
+} attach_counters SEC(".maps");
 
 extern void cgroup_rstat_updated(struct cgroup *cgrp, int cpu) __ksym;
 extern void cgroup_rstat_flush(struct cgroup *cgrp) __ksym;
 
-static struct cgroup *task_memcg(struct task_struct *task)
-{
-	int cgrp_id;
-
-#if __has_builtin(__builtin_preserve_enum_value)
-	cgrp_id = bpf_core_enum_value(enum cgroup_subsys_id, memory_cgrp_id);
-#else
-	cgrp_id = memory_cgrp_id;
-#endif
-	return task->cgroups->subsys[cgrp_id]->cgroup;
-}
-
 static uint64_t cgroup_id(struct cgroup *cgrp)
 {
 	return cgrp->kn->id;
 }
 
-static int create_vmscan_percpu_elem(__u64 cg_id, __u64 state)
+static int create_percpu_attach_counter(__u64 cg_id, __u64 state)
 {
-	struct vmscan_percpu pcpu_init = {.state = state, .prev = 0};
+	struct percpu_attach_counter pcpu_init = {.state = state, .prev = 0};
 
-	return bpf_map_update_elem(&pcpu_cgroup_vmscan_elapsed, &cg_id,
+	return bpf_map_update_elem(&percpu_attach_counters, &cg_id,
 				   &pcpu_init, BPF_NOEXIST);
 }
 
-static int create_vmscan_elem(__u64 cg_id, __u64 state, __u64 pending)
+static int create_attach_counter(__u64 cg_id, __u64 state, __u64 pending)
 {
-	struct vmscan init = {.state = state, .pending = pending};
+	struct attach_counter init = {.state = state, .pending = pending};
 
-	return bpf_map_update_elem(&cgroup_vmscan_elapsed, &cg_id,
+	return bpf_map_update_elem(&attach_counters, &cg_id,
 				   &init, BPF_NOEXIST);
 }
 
-SEC("tp_btf/mm_vmscan_memcg_reclaim_begin")
-int BPF_PROG(vmscan_start, int order, gfp_t gfp_flags)
+SEC("fentry/cgroup_attach_task")
+int BPF_PROG(counter, struct cgroup *dst_cgrp, struct task_struct *leader,
+	     bool threadgroup)
 {
-	struct task_struct *task = bpf_get_current_task_btf();
-	__u64 *start_time_ptr;
-
-	start_time_ptr = bpf_task_storage_get(&vmscan_start_time, task, 0,
-					      BPF_LOCAL_STORAGE_GET_F_CREATE);
-	if (start_time_ptr)
-		*start_time_ptr = bpf_ktime_get_ns();
-	return 0;
-}
-
-SEC("tp_btf/mm_vmscan_memcg_reclaim_end")
-int BPF_PROG(vmscan_end, unsigned long nr_reclaimed)
-{
-	struct vmscan_percpu *pcpu_stat;
-	struct task_struct *current = bpf_get_current_task_btf();
-	struct cgroup *cgrp;
-	__u64 *start_time_ptr;
-	__u64 current_elapsed, cg_id;
-	__u64 end_time = bpf_ktime_get_ns();
-
-	/*
-	 * cgrp is the first parent cgroup of current that has memcg enabled in
-	 * its subtree_control, or NULL if memcg is disabled in the entire tree.
-	 * In a cgroup hierarchy like this:
-	 *                               a
-	 *                              / \
-	 *                             b   c
-	 *  If "a" has memcg enabled, while "b" doesn't, then processes in "b"
-	 *  will accumulate their stats directly to "a". This makes sure that no
-	 *  stats are lost from processes in leaf cgroups that don't have memcg
-	 *  enabled, but only exposes stats for cgroups that have memcg enabled.
-	 */
-	cgrp = task_memcg(current);
-	if (!cgrp)
+	__u64 cg_id = cgroup_id(dst_cgrp);
+	struct percpu_attach_counter *pcpu_counter = bpf_map_lookup_elem(
+			&percpu_attach_counters,
+			&cg_id);
+
+	if (pcpu_counter)
+		pcpu_counter->state += 1;
+	else if (create_percpu_attach_counter(cg_id, 1))
 		return 0;
 
-	cg_id = cgroup_id(cgrp);
-	start_time_ptr = bpf_task_storage_get(&vmscan_start_time, current, 0,
-					      BPF_LOCAL_STORAGE_GET_F_CREATE);
-	if (!start_time_ptr)
-		return 0;
-
-	current_elapsed = end_time - *start_time_ptr;
-	pcpu_stat = bpf_map_lookup_elem(&pcpu_cgroup_vmscan_elapsed,
-					&cg_id);
-	if (pcpu_stat)
-		pcpu_stat->state += current_elapsed;
-	else if (create_vmscan_percpu_elem(cg_id, current_elapsed))
-		return 0;
-
-	cgroup_rstat_updated(cgrp, bpf_get_smp_processor_id());
+	cgroup_rstat_updated(dst_cgrp, bpf_get_smp_processor_id());
 	return 0;
 }
 
 SEC("fentry/bpf_rstat_flush")
-int BPF_PROG(vmscan_flush, struct cgroup *cgrp, struct cgroup *parent, int cpu)
+int BPF_PROG(flusher, struct cgroup *cgrp, struct cgroup *parent, int cpu)
 {
-	struct vmscan_percpu *pcpu_stat;
-	struct vmscan *total_stat, *parent_stat;
+	struct percpu_attach_counter *pcpu_counter;
+	struct attach_counter *total_counter, *parent_counter;
 	__u64 cg_id = cgroup_id(cgrp);
 	__u64 parent_cg_id = parent ? cgroup_id(parent) : 0;
-	__u64 *pcpu_vmscan;
 	__u64 state;
 	__u64 delta = 0;
 
 	/* Add CPU changes on this level since the last flush */
-	pcpu_stat = bpf_map_lookup_percpu_elem(&pcpu_cgroup_vmscan_elapsed,
-					       &cg_id, cpu);
-	if (pcpu_stat) {
-		state = pcpu_stat->state;
-		delta += state - pcpu_stat->prev;
-		pcpu_stat->prev = state;
+	pcpu_counter = bpf_map_lookup_percpu_elem(&percpu_attach_counters,
+						  &cg_id, cpu);
+	if (pcpu_counter) {
+		state = pcpu_counter->state;
+		delta += state - pcpu_counter->prev;
+		pcpu_counter->prev = state;
 	}
 
-	total_stat = bpf_map_lookup_elem(&cgroup_vmscan_elapsed, &cg_id);
-	if (!total_stat) {
-		if (create_vmscan_elem(cg_id, delta, 0))
+	total_counter = bpf_map_lookup_elem(&attach_counters, &cg_id);
+	if (!total_counter) {
+		if (create_attach_counter(cg_id, delta, 0))
 			return 0;
-
 		goto update_parent;
 	}
 
 	/* Collect pending stats from subtree */
-	if (total_stat->pending) {
-		delta += total_stat->pending;
-		total_stat->pending = 0;
+	if (total_counter->pending) {
+		delta += total_counter->pending;
+		total_counter->pending = 0;
 	}
 
 	/* Propagate changes to this cgroup's total */
-	total_stat->state += delta;
+	total_counter->state += delta;
 
 update_parent:
 	/* Skip if there are no changes to propagate, or no parent */
@@ -186,20 +120,20 @@ update_parent:
 		return 0;
 
 	/* Propagate changes to cgroup's parent */
-	parent_stat = bpf_map_lookup_elem(&cgroup_vmscan_elapsed,
-					  &parent_cg_id);
-	if (parent_stat)
-		parent_stat->pending += delta;
+	parent_counter = bpf_map_lookup_elem(&attach_counters,
+					     &parent_cg_id);
+	if (parent_counter)
+		parent_counter->pending += delta;
 	else
-		create_vmscan_elem(parent_cg_id, 0, delta);
+		create_attach_counter(parent_cg_id, 0, delta);
 	return 0;
 }
 
 SEC("iter.s/cgroup")
-int BPF_PROG(dump_vmscan, struct bpf_iter_meta *meta, struct cgroup *cgrp)
+int BPF_PROG(dumper, struct bpf_iter_meta *meta, struct cgroup *cgrp)
 {
 	struct seq_file *seq = meta->seq;
-	struct vmscan *total_stat;
+	struct attach_counter *total_counter;
 	__u64 cg_id = cgrp ? cgroup_id(cgrp) : 0;
 
 	/* Do nothing for the terminal call */
@@ -209,18 +143,13 @@ int BPF_PROG(dump_vmscan, struct bpf_iter_meta *meta, struct cgroup *cgrp)
 	/* Flush the stats to make sure we get the most updated numbers */
 	cgroup_rstat_flush(cgrp);
 
-	total_stat = bpf_map_lookup_elem(&cgroup_vmscan_elapsed, &cg_id);
-	if (!total_stat) {
-		BPF_SEQ_PRINTF(seq, "cg_id: %llu, total_vmscan_delay: 0\n",
+	total_counter = bpf_map_lookup_elem(&attach_counters, &cg_id);
+	if (!total_counter) {
+		BPF_SEQ_PRINTF(seq, "cg_id: %llu, attach_counter: 0\n",
 			       cg_id);
 	} else {
-		BPF_SEQ_PRINTF(seq, "cg_id: %llu, total_vmscan_delay: %llu\n",
-			       cg_id, total_stat->state);
+		BPF_SEQ_PRINTF(seq, "cg_id: %llu, attach_counter: %llu\n",
+			       cg_id, total_counter->state);
 	}
-
-	/*
-	 * We only dump stats for one cgroup here, so return 1 to stop
-	 * iteration after the first cgroup.
-	 */
-	return 1;
+	return 0;
 }
-- 
cgit v1.2.3


From e588c116df6ca64a295017571151992c76d03132 Mon Sep 17 00:00:00 2001
From: Wang Yufen <wangyufen@huawei.com>
Date: Thu, 22 Sep 2022 14:28:44 +0800
Subject: libbpf: Add pathname_concat() helper

Move snprintf and len check to common helper pathname_concat() to make the
code simpler.

Signed-off-by: Wang Yufen <wangyufen@huawei.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/1663828124-10437-1-git-send-email-wangyufen@huawei.com
---
 tools/lib/bpf/libbpf.c | 76 +++++++++++++++++++-------------------------------
 1 file changed, 29 insertions(+), 47 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 67bc18506150..e691f08a297f 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -2097,19 +2097,30 @@ static bool get_map_field_int(const char *map_name, const struct btf *btf,
 	return true;
 }
 
+static int pathname_concat(char *buf, size_t buf_sz, const char *path, const char *name)
+{
+	int len;
+
+	len = snprintf(buf, buf_sz, "%s/%s", path, name);
+	if (len < 0)
+		return -EINVAL;
+	if (len >= buf_sz)
+		return -ENAMETOOLONG;
+
+	return 0;
+}
+
 static int build_map_pin_path(struct bpf_map *map, const char *path)
 {
 	char buf[PATH_MAX];
-	int len;
+	int err;
 
 	if (!path)
 		path = "/sys/fs/bpf";
 
-	len = snprintf(buf, PATH_MAX, "%s/%s", path, bpf_map__name(map));
-	if (len < 0)
-		return -EINVAL;
-	else if (len >= PATH_MAX)
-		return -ENAMETOOLONG;
+	err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map));
+	if (err)
+		return err;
 
 	return bpf_map__set_pin_path(map, buf);
 }
@@ -7968,17 +7979,9 @@ int bpf_object__pin_maps(struct bpf_object *obj, const char *path)
 			continue;
 
 		if (path) {
-			int len;
-
-			len = snprintf(buf, PATH_MAX, "%s/%s", path,
-				       bpf_map__name(map));
-			if (len < 0) {
-				err = -EINVAL;
-				goto err_unpin_maps;
-			} else if (len >= PATH_MAX) {
-				err = -ENAMETOOLONG;
+			err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map));
+			if (err)
 				goto err_unpin_maps;
-			}
 			sanitize_pin_path(buf);
 			pin_path = buf;
 		} else if (!map->pin_path) {
@@ -8016,14 +8019,9 @@ int bpf_object__unpin_maps(struct bpf_object *obj, const char *path)
 		char buf[PATH_MAX];
 
 		if (path) {
-			int len;
-
-			len = snprintf(buf, PATH_MAX, "%s/%s", path,
-				       bpf_map__name(map));
-			if (len < 0)
-				return libbpf_err(-EINVAL);
-			else if (len >= PATH_MAX)
-				return libbpf_err(-ENAMETOOLONG);
+			err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map));
+			if (err)
+				return libbpf_err(err);
 			sanitize_pin_path(buf);
 			pin_path = buf;
 		} else if (!map->pin_path) {
@@ -8041,6 +8039,7 @@ int bpf_object__unpin_maps(struct bpf_object *obj, const char *path)
 int bpf_object__pin_programs(struct bpf_object *obj, const char *path)
 {
 	struct bpf_program *prog;
+	char buf[PATH_MAX];
 	int err;
 
 	if (!obj)
@@ -8052,17 +8051,9 @@ int bpf_object__pin_programs(struct bpf_object *obj, const char *path)
 	}
 
 	bpf_object__for_each_program(prog, obj) {
-		char buf[PATH_MAX];
-		int len;
-
-		len = snprintf(buf, PATH_MAX, "%s/%s", path, prog->name);
-		if (len < 0) {
-			err = -EINVAL;
-			goto err_unpin_programs;
-		} else if (len >= PATH_MAX) {
-			err = -ENAMETOOLONG;
+		err = pathname_concat(buf, sizeof(buf), path, prog->name);
+		if (err)
 			goto err_unpin_programs;
-		}
 
 		err = bpf_program__pin(prog, buf);
 		if (err)
@@ -8073,13 +8064,7 @@ int bpf_object__pin_programs(struct bpf_object *obj, const char *path)
 
 err_unpin_programs:
 	while ((prog = bpf_object__prev_program(obj, prog))) {
-		char buf[PATH_MAX];
-		int len;
-
-		len = snprintf(buf, PATH_MAX, "%s/%s", path, prog->name);
-		if (len < 0)
-			continue;
-		else if (len >= PATH_MAX)
+		if (pathname_concat(buf, sizeof(buf), path, prog->name))
 			continue;
 
 		bpf_program__unpin(prog, buf);
@@ -8098,13 +8083,10 @@ int bpf_object__unpin_programs(struct bpf_object *obj, const char *path)
 
 	bpf_object__for_each_program(prog, obj) {
 		char buf[PATH_MAX];
-		int len;
 
-		len = snprintf(buf, PATH_MAX, "%s/%s", path, prog->name);
-		if (len < 0)
-			return libbpf_err(-EINVAL);
-		else if (len >= PATH_MAX)
-			return libbpf_err(-ENAMETOOLONG);
+		err = pathname_concat(buf, sizeof(buf), path, prog->name);
+		if (err)
+			return libbpf_err(err);
 
 		err = bpf_program__unpin(prog, buf);
 		if (err)
-- 
cgit v1.2.3


From dbdea9b36fb61da3b9a1be0dd63542e2bfd3e5d7 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Fri, 23 Sep 2022 16:05:59 -0700
Subject: libbpf: restore memory layout of bpf_object_open_opts

When attach_prog_fd field was removed in libbpf 1.0 and replaced with
`long: 0` placeholder, it actually shifted all the subsequent fields by
8 byte. This is due to `long: 0` promising to adjust next field's offset
to long-aligned offset. But in this case we were already long-aligned
as pin_root_path is a pointer. So `long: 0` had no effect, and thus
didn't feel the gap created by removed attach_prog_fd.

Non-zero bitfield should have been used instead. I validated using
pahole. Originally kconfig field was at offset 40. With `long: 0` it's
at offset 32, which is wrong. With this change it's back at offset 40.

While technically libbpf 1.0 is allowed to break backwards
compatibility and applications should have been recompiled against
libbpf 1.0 headers, but given how trivial it is to preserve memory
layout, let's fix this.

Reported-by: Grant Seltzer Richman <grantseltzer@gmail.com>
Fixes: 146bf811f5ac ("libbpf: remove most other deprecated high-level APIs")
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20220923230559.666608-1-andrii@kernel.org
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
---
 tools/lib/bpf/libbpf.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index e2d8c17f2e85..eee883f007f9 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -118,7 +118,9 @@ struct bpf_object_open_opts {
 	 * auto-pinned to that path on load; defaults to "/sys/fs/bpf".
 	 */
 	const char *pin_root_path;
-	long :0;
+
+	__u32 :32; /* stub out now removed attach_prog_fd */
+
 	/* Additional kernel config content that augments and overrides
 	 * system Kconfig for CONFIG_xxx externs.
 	 */
-- 
cgit v1.2.3


From 067f4f291c2063d86abe0a526ef211e03a4f1258 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Fri, 23 Sep 2022 10:59:09 -0700
Subject: selftests/bpf: add sign-file to .gitignore

Add sign-file to .gitignore to avoid accidentally checking it in.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20220923175913.3272430-2-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/.gitignore | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index 3b288562963e..07d2d0a8c5cb 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -40,6 +40,7 @@ test_cpp
 /runqslower
 /bench
 /veristat
+/sign-file
 *.ko
 *.tmp
 xskxceiver
-- 
cgit v1.2.3


From c2488d70ceee352611e55943c25abf30117e3b67 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Fri, 23 Sep 2022 10:59:10 -0700
Subject: selftests/bpf: make veristat's verifier log parsing faster and more
 robust

Make sure veristat doesn't spend ridiculous amount of time parsing
verifier stats from verifier log, especially for very large logs or
truncated logs (e.g., when verifier returns -ENOSPC due to too small
buffer). For this, parse lines from the end of the log and make sure we
parse only up to 100 last lines, where stats should be, if at all.

Suggested-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20220923175913.3272430-3-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/veristat.c | 29 ++++++++++++++++++++---------
 1 file changed, 20 insertions(+), 9 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c
index 51030234b60a..77bdfd6fe302 100644
--- a/tools/testing/selftests/bpf/veristat.c
+++ b/tools/testing/selftests/bpf/veristat.c
@@ -419,19 +419,30 @@ static void free_verif_stats(struct verif_stats *stats, size_t stat_cnt)
 
 static char verif_log_buf[64 * 1024];
 
-static int parse_verif_log(const char *buf, size_t buf_sz, struct verif_stats *s)
+#define MAX_PARSED_LOG_LINES 100
+
+static int parse_verif_log(char * const buf, size_t buf_sz, struct verif_stats *s)
 {
-	const char *next;
-	int pos;
+	const char *cur;
+	int pos, lines;
+
+	buf[buf_sz - 1] = '\0';
 
-	for (pos = 0; buf[0]; buf = next) {
-		if (buf[0] == '\n')
-			buf++;
-		next = strchrnul(&buf[pos], '\n');
+	for (pos = strlen(buf) - 1, lines = 0; pos >= 0 && lines < MAX_PARSED_LOG_LINES; lines++) {
+		/* find previous endline or otherwise take the start of log buf */
+		for (cur = &buf[pos]; cur > buf && cur[0] != '\n'; cur--, pos--) {
+		}
+		/* next time start from end of previous line (or pos goes to <0) */
+		pos--;
+		/* if we found endline, point right after endline symbol;
+		 * otherwise, stay at the beginning of log buf
+		 */
+		if (cur[0] == '\n')
+			cur++;
 
-		if (1 == sscanf(buf, "verification time %ld usec\n", &s->stats[DURATION]))
+		if (1 == sscanf(cur, "verification time %ld usec\n", &s->stats[DURATION]))
 			continue;
-		if (6 == sscanf(buf, "processed %ld insns (limit %*d) max_states_per_insn %ld total_states %ld peak_states %ld mark_read %ld",
+		if (6 == sscanf(cur, "processed %ld insns (limit %*d) max_states_per_insn %ld total_states %ld peak_states %ld mark_read %ld",
 				&s->stats[TOTAL_INSNS],
 				&s->stats[MAX_STATES_PER_INSN],
 				&s->stats[TOTAL_STATES],
-- 
cgit v1.2.3


From 518fee8bfaf2c628007909c0fc5336930b9b6ee4 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Fri, 23 Sep 2022 10:59:11 -0700
Subject: selftests/bpf: make veristat skip non-BPF and failing-to-open BPF
 objects

Make veristat ignore non-BPF object files. This allows simpler
mass-verification (e.g., `sudo ./veristat *.bpf.o` in selftests/bpf
directory). Note that `sudo ./veristat *.o` would also work, but with
selftests's multiple copies of BPF object files (.bpf.o and
.bpf.linked{1,2,3}.o) it's 4x slower.

Also, given some of BPF object files could be incomplete in the sense
that they are meant to be statically linked into final BPF object file
(like linked_maps, linked_funcs, linked_vars), note such instances in
stderr, but proceed anyways. This seems like a better trade off between
completely silently ignoring BPF object file and aborting
mass-verification altogether.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20220923175913.3272430-4-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/veristat.c | 78 ++++++++++++++++++++++++++++++----
 1 file changed, 70 insertions(+), 8 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c
index 77bdfd6fe302..f09dd143a8df 100644
--- a/tools/testing/selftests/bpf/veristat.c
+++ b/tools/testing/selftests/bpf/veristat.c
@@ -15,6 +15,8 @@
 #include <sys/sysinfo.h>
 #include <sys/stat.h>
 #include <bpf/libbpf.h>
+#include <libelf.h>
+#include <gelf.h>
 
 enum stat_id {
 	VERDICT,
@@ -78,6 +80,11 @@ static struct env {
 	struct filter *deny_filters;
 	int allow_filter_cnt;
 	int deny_filter_cnt;
+
+	int files_processed;
+	int files_skipped;
+	int progs_processed;
+	int progs_skipped;
 } env;
 
 static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
@@ -226,8 +233,41 @@ static bool should_process_file(const char *filename)
 	return false;
 }
 
-static bool should_process_prog(const char *filename, const char *prog_name)
+static bool is_bpf_obj_file(const char *path) {
+	Elf64_Ehdr *ehdr;
+	int fd, err = -EINVAL;
+	Elf *elf = NULL;
+
+	fd = open(path, O_RDONLY | O_CLOEXEC);
+	if (fd < 0)
+		return true; /* we'll fail later and propagate error */
+
+	/* ensure libelf is initialized */
+	(void)elf_version(EV_CURRENT);
+
+	elf = elf_begin(fd, ELF_C_READ, NULL);
+	if (!elf)
+		goto cleanup;
+
+	if (elf_kind(elf) != ELF_K_ELF || gelf_getclass(elf) != ELFCLASS64)
+		goto cleanup;
+
+	ehdr = elf64_getehdr(elf);
+	/* Old LLVM set e_machine to EM_NONE */
+	if (!ehdr || ehdr->e_type != ET_REL || (ehdr->e_machine && ehdr->e_machine != EM_BPF))
+		goto cleanup;
+
+	err = 0;
+cleanup:
+	if (elf)
+		elf_end(elf);
+	close(fd);
+	return err == 0;
+}
+
+static bool should_process_prog(const char *path, const char *prog_name)
 {
+	const char *filename = basename(path);
 	int i;
 
 	if (env.deny_filter_cnt > 0) {
@@ -303,7 +343,7 @@ static int append_filter_file(const char *path)
 	f = fopen(path, "r");
 	if (!f) {
 		err = -errno;
-		fprintf(stderr, "Failed to open '%s': %d\n", path, err);
+		fprintf(stderr, "Failed to open filters in '%s': %d\n", path, err);
 		return err;
 	}
 
@@ -463,8 +503,10 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf
 	int err = 0;
 	void *tmp;
 
-	if (!should_process_prog(basename(filename), bpf_program__name(prog)))
+	if (!should_process_prog(filename, bpf_program__name(prog))) {
+		env.progs_skipped++;
 		return 0;
+	}
 
 	tmp = realloc(env.prog_stats, (env.prog_stat_cnt + 1) * sizeof(*env.prog_stats));
 	if (!tmp)
@@ -487,6 +529,7 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf
 	verif_log_buf[0] = '\0';
 
 	err = bpf_object__load(obj);
+	env.progs_processed++;
 
 	stats->file_name = strdup(basename(filename));
 	stats->prog_name = strdup(bpf_program__name(prog));
@@ -513,18 +556,37 @@ static int process_obj(const char *filename)
 	LIBBPF_OPTS(bpf_object_open_opts, opts);
 	int err = 0, prog_cnt = 0;
 
-	if (!should_process_file(basename(filename)))
+	if (!should_process_file(basename(filename))) {
+		if (env.verbose)
+			printf("Skipping '%s' due to filters...\n", filename);
+		env.files_skipped++;
+		return 0;
+	}
+	if (!is_bpf_obj_file(filename)) {
+		if (env.verbose)
+			printf("Skipping '%s' as it's not a BPF object file...\n", filename);
+		env.files_skipped++;
 		return 0;
+	}
 
 	old_libbpf_print_fn = libbpf_set_print(libbpf_print_fn);
 
 	obj = bpf_object__open_file(filename, &opts);
 	if (!obj) {
-		err = -errno;
-		fprintf(stderr, "Failed to open '%s': %d\n", filename, err);
+		/* if libbpf can't open BPF object file, it could be because
+		 * that BPF object file is incomplete and has to be statically
+		 * linked into a final BPF object file; instead of bailing
+		 * out, report it into stderr, mark it as skipped, and
+		 * proceeed
+		 */
+		fprintf(stderr, "Failed to open '%s': %d\n", filename, -errno);
+		env.files_skipped++;
+		err = 0;
 		goto cleanup;
 	}
 
+	env.files_processed++;
+
 	bpf_object__for_each_program(prog, obj) {
 		prog_cnt++;
 	}
@@ -732,8 +794,8 @@ static void output_stats(const struct verif_stats *s, enum resfmt fmt, bool last
 
 	if (last && fmt == RESFMT_TABLE) {
 		output_header_underlines();
-		printf("Done. Processed %d object files, %d programs.\n",
-		       env.filename_cnt, env.prog_stat_cnt);
+		printf("Done. Processed %d files, %d programs. Skipped %d files, %d programs.\n",
+		       env.files_processed, env.files_skipped, env.progs_processed, env.progs_skipped);
 	}
 }
 
-- 
cgit v1.2.3


From c511d009ceb8cd980e4a823b7ca74abbdc7cdccc Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Fri, 23 Sep 2022 10:59:12 -0700
Subject: selftests/bpf: emit processing progress and add quiet mode to
 veristat

Emit "Processing <filepath>..." for each BPF object file to be
processed, to show progress. But also add -q (--quiet) flag to silence
such messages. Doing something more clever (like overwriting same output
line) is to cumbersome and easily breakable if there is any other
console output (e.g., errors from libbpf).

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20220923175913.3272430-5-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/veristat.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c
index f09dd143a8df..85a77f1dd863 100644
--- a/tools/testing/selftests/bpf/veristat.c
+++ b/tools/testing/selftests/bpf/veristat.c
@@ -63,6 +63,7 @@ static struct env {
 	char **filenames;
 	int filename_cnt;
 	bool verbose;
+	bool quiet;
 	enum resfmt out_fmt;
 	bool comparison_mode;
 
@@ -107,6 +108,7 @@ const char argp_program_doc[] =
 static const struct argp_option opts[] = {
 	{ NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help" },
 	{ "verbose", 'v', NULL, 0, "Verbose mode" },
+	{ "quiet", 'q', NULL, 0, "Quiet mode" },
 	{ "emit", 'e', "SPEC", 0, "Specify stats to be emitted" },
 	{ "sort", 's', "SPEC", 0, "Specify sort order" },
 	{ "output-format", 'o', "FMT", 0, "Result output format (table, csv), default is table." },
@@ -131,6 +133,9 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
 	case 'v':
 		env.verbose = true;
 		break;
+	case 'q':
+		env.quiet = true;
+		break;
 	case 'e':
 		err = parse_stats(arg, &env.output_spec);
 		if (err)
@@ -569,8 +574,10 @@ static int process_obj(const char *filename)
 		return 0;
 	}
 
-	old_libbpf_print_fn = libbpf_set_print(libbpf_print_fn);
+	if (!env.quiet && env.out_fmt == RESFMT_TABLE)
+		printf("Processing '%s'...\n", basename(filename));
 
+	old_libbpf_print_fn = libbpf_set_print(libbpf_print_fn);
 	obj = bpf_object__open_file(filename, &opts);
 	if (!obj) {
 		/* if libbpf can't open BPF object file, it could be because
@@ -1268,6 +1275,12 @@ int main(int argc, char **argv)
 	if (argp_parse(&argp, argc, argv, 0, NULL, NULL))
 		return 1;
 
+	if (env.verbose && env.quiet) {
+		fprintf(stderr, "Verbose and quiet modes are incompatible, please specify just one or neither!\n");
+		argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
+		return 1;
+	}
+
 	if (env.output_spec.spec_cnt == 0)
 		env.output_spec = default_output_spec;
 	if (env.sort_spec.spec_cnt == 0)
-- 
cgit v1.2.3


From e310efc5ddde04c41aa0501b5a7235b134c5fc6c Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Fri, 23 Sep 2022 10:59:13 -0700
Subject: selftests/bpf: allow to adjust BPF verifier log level in veristat

Add -l (--log-level) flag to override default BPF verifier log lever.
This only matters in verbose mode, which is the mode in which veristat
emits verifier log for each processed BPF program.

This is important because for successfully verified BPF programs
log_level 1 is empty, as BPF verifier truncates all the successfully
verified paths. So -l2 is the only way to actually get BPF verifier log
in practice. It looks sometihng like this:

  [vmuser@archvm bpf]$ sudo ./veristat xdp_tx.bpf.o -vl2
  Processing 'xdp_tx.bpf.o'...
  PROCESSING xdp_tx.bpf.o/xdp_tx, DURATION US: 19, VERDICT: success, VERIFIER LOG:
  func#0 @0
  0: R1=ctx(off=0,imm=0) R10=fp0
  ; return XDP_TX;
  0: (b4) w0 = 3                        ; R0_w=3
  1: (95) exit
  verification time 19 usec
  stack depth 0
  processed 2 insns (limit 1000000) max_states_per_insn 0 total_states 0 peak_states 0 mark_read 0

  File          Program  Verdict  Duration (us)  Total insns  Total states  Peak states
  ------------  -------  -------  -------------  -----------  ------------  -----------
  xdp_tx.bpf.o  xdp_tx   success             19            2             0            0
  ------------  -------  -------  -------------  -----------  ------------  -----------
  Done. Processed 1 files, 0 programs. Skipped 1 files, 0 programs.

Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/r/20220923175913.3272430-6-andrii@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/veristat.c | 14 +++++++++++++-
 1 file changed, 13 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c
index 85a77f1dd863..b0d83a28e348 100644
--- a/tools/testing/selftests/bpf/veristat.c
+++ b/tools/testing/selftests/bpf/veristat.c
@@ -64,6 +64,7 @@ static struct env {
 	int filename_cnt;
 	bool verbose;
 	bool quiet;
+	int log_level;
 	enum resfmt out_fmt;
 	bool comparison_mode;
 
@@ -108,6 +109,7 @@ const char argp_program_doc[] =
 static const struct argp_option opts[] = {
 	{ NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help" },
 	{ "verbose", 'v', NULL, 0, "Verbose mode" },
+	{ "log-level", 'l', "LEVEL", 0, "Verifier log level (default 0 for normal mode, 1 for verbose mode)" },
 	{ "quiet", 'q', NULL, 0, "Quiet mode" },
 	{ "emit", 'e', "SPEC", 0, "Specify stats to be emitted" },
 	{ "sort", 's', "SPEC", 0, "Specify sort order" },
@@ -156,6 +158,14 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
 			return -EINVAL;
 		}
 		break;
+	case 'l':
+		errno = 0;
+		env.log_level = strtol(arg, NULL, 10);
+		if (errno) {
+			fprintf(stderr, "invalid log level: %s\n", arg);
+			argp_usage(state);
+		}
+		break;
 	case 'C':
 		env.comparison_mode = true;
 		break;
@@ -526,7 +536,7 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf
 		if (!buf)
 			return -ENOMEM;
 		bpf_program__set_log_buf(prog, buf, buf_sz);
-		bpf_program__set_log_level(prog, 1 | 4); /* stats + log */
+		bpf_program__set_log_level(prog, env.log_level | 4); /* stats + log */
 	} else {
 		bpf_program__set_log_buf(prog, buf, buf_sz);
 		bpf_program__set_log_level(prog, 4); /* only verifier stats */
@@ -1280,6 +1290,8 @@ int main(int argc, char **argv)
 		argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
 		return 1;
 	}
+	if (env.verbose && env.log_level == 0)
+		env.log_level = 1;
 
 	if (env.output_spec.spec_cnt == 0)
 		env.output_spec = default_output_spec;
-- 
cgit v1.2.3


From 043a7356dbd0f44b2a2161649d89f4a43f3b0180 Mon Sep 17 00:00:00 2001
From: Liu Jian <liujian56@huawei.com>
Date: Tue, 23 Aug 2022 21:37:55 +0800
Subject: selftests/bpf: Add wait send memory test for sockmap redirect

Add one test for wait redirect sock's send memory test for sockmap.

Signed-off-by: Liu Jian <liujian56@huawei.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20220823133755.314697-3-liujian56@huawei.com
---
 tools/testing/selftests/bpf/test_sockmap.c | 42 ++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c
index dcb038e342d8..e768181a1bd7 100644
--- a/tools/testing/selftests/bpf/test_sockmap.c
+++ b/tools/testing/selftests/bpf/test_sockmap.c
@@ -138,6 +138,7 @@ struct sockmap_options {
 	bool data_test;
 	bool drop_expected;
 	bool check_recved_len;
+	bool tx_wait_mem;
 	int iov_count;
 	int iov_length;
 	int rate;
@@ -578,6 +579,10 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
 			sent = sendmsg(fd, &msg, flags);
 
 			if (!drop && sent < 0) {
+				if (opt->tx_wait_mem && errno == EACCES) {
+					errno = 0;
+					goto out_errno;
+				}
 				perror("sendmsg loop error");
 				goto out_errno;
 			} else if (drop && sent >= 0) {
@@ -644,6 +649,15 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
 				goto out_errno;
 			}
 
+			if (opt->tx_wait_mem) {
+				FD_ZERO(&w);
+				FD_SET(fd, &w);
+				slct = select(max_fd + 1, NULL, NULL, &w, &timeout);
+				errno = 0;
+				close(fd);
+				goto out_errno;
+			}
+
 			errno = 0;
 			if (peek_flag) {
 				flags |= MSG_PEEK;
@@ -752,6 +766,22 @@ static int sendmsg_test(struct sockmap_options *opt)
 			return err;
 	}
 
+	if (opt->tx_wait_mem) {
+		struct timeval timeout;
+		int rxtx_buf_len = 1024;
+
+		timeout.tv_sec = 3;
+		timeout.tv_usec = 0;
+
+		err = setsockopt(c2, SOL_SOCKET, SO_SNDTIMEO, &timeout, sizeof(struct timeval));
+		err |= setsockopt(c2, SOL_SOCKET, SO_SNDBUFFORCE, &rxtx_buf_len, sizeof(int));
+		err |= setsockopt(p2, SOL_SOCKET, SO_RCVBUFFORCE, &rxtx_buf_len, sizeof(int));
+		if (err) {
+			perror("setsockopt failed()");
+			return errno;
+		}
+	}
+
 	rxpid = fork();
 	if (rxpid == 0) {
 		if (txmsg_pop || txmsg_start_pop)
@@ -788,6 +818,9 @@ static int sendmsg_test(struct sockmap_options *opt)
 		return errno;
 	}
 
+	if (opt->tx_wait_mem)
+		close(c2);
+
 	txpid = fork();
 	if (txpid == 0) {
 		if (opt->sendpage)
@@ -1452,6 +1485,14 @@ static void test_txmsg_redir(int cgrp, struct sockmap_options *opt)
 	test_send(opt, cgrp);
 }
 
+static void test_txmsg_redir_wait_sndmem(int cgrp, struct sockmap_options *opt)
+{
+	txmsg_redir = 1;
+	opt->tx_wait_mem = true;
+	test_send_large(opt, cgrp);
+	opt->tx_wait_mem = false;
+}
+
 static void test_txmsg_drop(int cgrp, struct sockmap_options *opt)
 {
 	txmsg_drop = 1;
@@ -1800,6 +1841,7 @@ static int populate_progs(char *bpf_file)
 struct _test test[] = {
 	{"txmsg test passthrough", test_txmsg_pass},
 	{"txmsg test redirect", test_txmsg_redir},
+	{"txmsg test redirect wait send mem", test_txmsg_redir_wait_sndmem},
 	{"txmsg test drop", test_txmsg_drop},
 	{"txmsg test ingress redirect", test_txmsg_ingress_redir},
 	{"txmsg test skb", test_txmsg_skb},
-- 
cgit v1.2.3


From 7d45b5fd27b4ca53c19dba79d9d4936d6cb0f0ca Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Sat, 24 Sep 2022 00:00:12 +0300
Subject: selftests: net: tsn_lib: don't overwrite isochron receiver extra args
 with UDS

The extra_args argument ($3) of isochron_recv_start is overwritten with
uds ($2), if that argument exists.

This is currently not a problem, because the only TSN selftest
(ocelot/psfp.sh) omits remote sync so it does not specify to the
receiver a UNIX domain socket for ptp4l. So $uds is currently an empty
string.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Kurt Kanzenbach <kurt@linutronix.de>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/forwarding/tsn_lib.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/tsn_lib.sh b/tools/testing/selftests/net/forwarding/tsn_lib.sh
index 60a1423e8116..1c8e36c56f32 100644
--- a/tools/testing/selftests/net/forwarding/tsn_lib.sh
+++ b/tools/testing/selftests/net/forwarding/tsn_lib.sh
@@ -139,7 +139,7 @@ isochron_recv_start()
 	local extra_args=$3
 
 	if ! [ -z "${uds}" ]; then
-		extra_args="--unix-domain-socket ${uds}"
+		extra_args="${extra_args} --unix-domain-socket ${uds}"
 	fi
 
 	isochron rcv \
-- 
cgit v1.2.3


From 7ff9396ee82c84ad18b897f70e4486c9ad1693f8 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Sat, 24 Sep 2022 00:00:13 +0300
Subject: selftests: net: tsn_lib: allow running ptp4l on multiple interfaces

Switch ports will want to act as Boundary Clocks, which are configured
using ptp4l by specifying the "-i" argument multiple times.

Since we track a log file and a pid file for each ptp4l instance, and we
want to be compatible with the existing single-port callers of
ptp4l_start and ptp4l_stop, pass the interface list as a single string
of space-separated values. Based on this, we create a label for each
ptp4l instance, where the spaces are replaced with underscores
(ptp4l_start "eth0 eth1" generates "ptp4l_pid_eth0_eth1").

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Kurt Kanzenbach <kurt@linutronix.de>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/forwarding/tsn_lib.sh | 27 ++++++++++++++++-------
 1 file changed, 19 insertions(+), 8 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/tsn_lib.sh b/tools/testing/selftests/net/forwarding/tsn_lib.sh
index 1c8e36c56f32..ace9c4f06805 100644
--- a/tools/testing/selftests/net/forwarding/tsn_lib.sh
+++ b/tools/testing/selftests/net/forwarding/tsn_lib.sh
@@ -53,15 +53,27 @@ phc2sys_stop()
 	rm "${phc2sys_log}" 2> /dev/null
 }
 
+# Replace space separators from interface list with underscores
+if_names_to_label()
+{
+	local if_name_list="$1"
+
+	echo "${if_name_list/ /_}"
+}
+
 ptp4l_start()
 {
-	local if_name=$1
+	local if_names="$1"
 	local slave_only=$2
 	local uds_address=$3
-	local log="ptp4l_log_${if_name}"
-	local pid="ptp4l_pid_${if_name}"
+	local log="ptp4l_log_$(if_names_to_label ${if_names})"
+	local pid="ptp4l_pid_$(if_names_to_label ${if_names})"
 	local extra_args=""
 
+	for if_name in ${if_names}; do
+		extra_args="${extra_args} -i ${if_name}"
+	done
+
 	if [ "${slave_only}" = true ]; then
 		extra_args="${extra_args} -s"
 	fi
@@ -71,7 +83,6 @@ ptp4l_start()
 	declare -g "${log}=$(mktemp)"
 
 	chrt -f 10 ptp4l -m -2 -P \
-		-i ${if_name} \
 		--step_threshold 0.00002 \
 		--first_step_threshold 0.00002 \
 		--tx_timestamp_timeout 100 \
@@ -80,16 +91,16 @@ ptp4l_start()
 		> "${!log}" 2>&1 &
 	declare -g "${pid}=$!"
 
-	echo "ptp4l for interface ${if_name} logs to ${!log} and has pid ${!pid}"
+	echo "ptp4l for interfaces ${if_names} logs to ${!log} and has pid ${!pid}"
 
 	sleep 1
 }
 
 ptp4l_stop()
 {
-	local if_name=$1
-	local log="ptp4l_log_${if_name}"
-	local pid="ptp4l_pid_${if_name}"
+	local if_names="$1"
+	local log="ptp4l_log_$(if_names_to_label ${if_names})"
+	local pid="ptp4l_pid_$(if_names_to_label ${if_names})"
 
 	{ kill ${!pid} && wait ${!pid}; } 2> /dev/null
 	rm "${!log}" 2> /dev/null
-- 
cgit v1.2.3


From a7ce95ac837d9e74a66e5a2825595476722bb616 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Sat, 24 Sep 2022 00:00:14 +0300
Subject: selftests: net: tsn_lib: allow multiple isochron receivers

Move the PID variable for the isochron receiver into a separate
namespace per stats port, to allow multiple receivers (and/or
orchestration daemons) to be instantiated by the same script.

Preserve the existing behavior by making isochron_do() use the default
stats TCP port of 5000.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Kurt Kanzenbach <kurt@linutronix.de>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/forwarding/tsn_lib.sh | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/forwarding/tsn_lib.sh b/tools/testing/selftests/net/forwarding/tsn_lib.sh
index ace9c4f06805..20c2b411ba36 100644
--- a/tools/testing/selftests/net/forwarding/tsn_lib.sh
+++ b/tools/testing/selftests/net/forwarding/tsn_lib.sh
@@ -147,7 +147,9 @@ isochron_recv_start()
 {
 	local if_name=$1
 	local uds=$2
-	local extra_args=$3
+	local stats_port=$3
+	local extra_args=$4
+	local pid="isochron_pid_${stats_port}"
 
 	if ! [ -z "${uds}" ]; then
 		extra_args="${extra_args} --unix-domain-socket ${uds}"
@@ -158,16 +160,20 @@ isochron_recv_start()
 		--sched-priority 98 \
 		--sched-fifo \
 		--utc-tai-offset ${UTC_TAI_OFFSET} \
+		--stats-port ${stats_port} \
 		--quiet \
 		${extra_args} & \
-	isochron_pid=$!
+	declare -g "${pid}=$!"
 
 	sleep 1
 }
 
 isochron_recv_stop()
 {
-	{ kill ${isochron_pid} && wait ${isochron_pid}; } 2> /dev/null
+	local stats_port=$1
+	local pid="isochron_pid_${stats_port}"
+
+	{ kill ${!pid} && wait ${!pid}; } 2> /dev/null
 }
 
 isochron_do()
@@ -219,7 +225,7 @@ isochron_do()
 
 	cpufreq_max ${ISOCHRON_CPU}
 
-	isochron_recv_start "${h2}" "${receiver_uds}" "${receiver_extra_args}"
+	isochron_recv_start "${h2}" "${receiver_uds}" 5000 "${receiver_extra_args}"
 
 	isochron send \
 		--interface ${sender_if_name} \
@@ -240,7 +246,7 @@ isochron_do()
 		${extra_args} \
 		--quiet
 
-	isochron_recv_stop
+	isochron_recv_stop 5000
 
 	cpufreq_restore ${ISOCHRON_CPU}
 }
-- 
cgit v1.2.3


From 162d52dfee445c21d1fa5c4b1f55765946df4d3e Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Sat, 24 Sep 2022 00:00:15 +0300
Subject: selftests: net: tsn_lib: run phc2sys in automatic mode

We can make the phc2sys helper not only synchronize a PHC to
CLOCK_REALTIME, which is what it currently does, but also CLOCK_REALTIME
to a PHC, which is going to be needed in distributed TSN tests.

Instead of making the complexity of the arguments passed to
phc2sys_start() explode, we can let it figure out the sync direction
automatically, based on ptp4l's port states.

Towards that goal, pass just the path to the desired ptp4l instance's
UNIX domain socket, and remove the $if_name argument (from which it
derives the PHC). Also adapt the one caller from the ocelot psfp.sh
test. In the case of psfp.sh, phc2sys_start is able to properly figure
out that CLOCK_REALTIME is the source clock and swp1's PHC is the
destination, because of the way in which ptp4l_start for the
UDS_ADDRESS_SWP1 was called: with slave_only=false, so it will always
win the BMCA and always become the sync master between itself and $h1.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Kurt Kanzenbach <kurt@linutronix.de>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/drivers/net/ocelot/psfp.sh | 2 +-
 tools/testing/selftests/net/forwarding/tsn_lib.sh  | 7 ++-----
 2 files changed, 3 insertions(+), 6 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/drivers/net/ocelot/psfp.sh b/tools/testing/selftests/drivers/net/ocelot/psfp.sh
index 5a5cee92c665..bed748dde4b0 100755
--- a/tools/testing/selftests/drivers/net/ocelot/psfp.sh
+++ b/tools/testing/selftests/drivers/net/ocelot/psfp.sh
@@ -181,7 +181,7 @@ setup_prepare()
 
 	# Set up swp1 as a master PHC for h1, synchronized to the local
 	# CLOCK_REALTIME.
-	phc2sys_start ${swp1} ${UDS_ADDRESS_SWP1}
+	phc2sys_start ${UDS_ADDRESS_SWP1}
 
 	# Assumption true for LS1028A: h1 and h2 use the same PHC. So by
 	# synchronizing h1 to swp1 via PTP, h2 is also implicitly synchronized
diff --git a/tools/testing/selftests/net/forwarding/tsn_lib.sh b/tools/testing/selftests/net/forwarding/tsn_lib.sh
index 20c2b411ba36..b91bcd8008a9 100644
--- a/tools/testing/selftests/net/forwarding/tsn_lib.sh
+++ b/tools/testing/selftests/net/forwarding/tsn_lib.sh
@@ -22,8 +22,7 @@ fi
 
 phc2sys_start()
 {
-	local if_name=$1
-	local uds_address=$2
+	local uds_address=$1
 	local extra_args=""
 
 	if ! [ -z "${uds_address}" ]; then
@@ -33,9 +32,7 @@ phc2sys_start()
 	phc2sys_log="$(mktemp)"
 
 	chrt -f 10 phc2sys -m \
-		-c ${if_name} \
-		-s CLOCK_REALTIME \
-		-O ${UTC_TAI_OFFSET} \
+		-a -rr \
 		--step_threshold 0.00002 \
 		--first_step_threshold 0.00002 \
 		${extra_args} \
-- 
cgit v1.2.3


From c09eb2e578eb1668bbc84dc07e8d8bd6f04b9a02 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Mon, 26 Sep 2022 17:33:38 +0200
Subject: bpf: Adjust kprobe_multi entry_ip for CONFIG_X86_KERNEL_IBT

Martynas reported bpf_get_func_ip returning +4 address when
CONFIG_X86_KERNEL_IBT option is enabled.

When CONFIG_X86_KERNEL_IBT is enabled we'll have endbr instruction
at the function entry, which screws return value of bpf_get_func_ip()
helper that should return the function address.

There's short term workaround for kprobe_multi bpf program made by
Alexei [1], but we need this fixup also for bpf_get_attach_cookie,
that returns cookie based on the entry_ip value.

Moving the fixup in the fprobe handler, so both bpf_get_func_ip
and bpf_get_attach_cookie get expected function address when
CONFIG_X86_KERNEL_IBT option is enabled.

Also renaming kprobe_multi_link_handler entry_ip argument to fentry_ip
so it's clearer this is an ftrace __fentry__ ip.

[1] commit 7f0059b58f02 ("selftests/bpf: Fix kprobe_multi test.")

Cc: Peter Zijlstra <peterz@infradead.org>
Reported-by: Martynas Pumputis <m@lambda.lt>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Link: https://lore.kernel.org/r/20220926153340.1621984-5-jolsa@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/trace/bpf_trace.c                         | 20 ++++++++++++++++++--
 tools/testing/selftests/bpf/progs/kprobe_multi.c |  4 +---
 2 files changed, 19 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index b05f0310dbd3..ebd1b348beb3 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1028,6 +1028,22 @@ static const struct bpf_func_proto bpf_get_func_ip_proto_tracing = {
 	.arg1_type	= ARG_PTR_TO_CTX,
 };
 
+#ifdef CONFIG_X86_KERNEL_IBT
+static unsigned long get_entry_ip(unsigned long fentry_ip)
+{
+	u32 instr;
+
+	/* Being extra safe in here in case entry ip is on the page-edge. */
+	if (get_kernel_nofault(instr, (u32 *) fentry_ip - 1))
+		return fentry_ip;
+	if (is_endbr(instr))
+		fentry_ip -= ENDBR_INSN_SIZE;
+	return fentry_ip;
+}
+#else
+#define get_entry_ip(fentry_ip) fentry_ip
+#endif
+
 BPF_CALL_1(bpf_get_func_ip_kprobe, struct pt_regs *, regs)
 {
 	struct kprobe *kp = kprobe_running();
@@ -2600,13 +2616,13 @@ kprobe_multi_link_prog_run(struct bpf_kprobe_multi_link *link,
 }
 
 static void
-kprobe_multi_link_handler(struct fprobe *fp, unsigned long entry_ip,
+kprobe_multi_link_handler(struct fprobe *fp, unsigned long fentry_ip,
 			  struct pt_regs *regs)
 {
 	struct bpf_kprobe_multi_link *link;
 
 	link = container_of(fp, struct bpf_kprobe_multi_link, fp);
-	kprobe_multi_link_prog_run(link, entry_ip, regs);
+	kprobe_multi_link_prog_run(link, get_entry_ip(fentry_ip), regs);
 }
 
 static int symbols_cmp_r(const void *a, const void *b, const void *priv)
diff --git a/tools/testing/selftests/bpf/progs/kprobe_multi.c b/tools/testing/selftests/bpf/progs/kprobe_multi.c
index 08f95a8155d1..98c3399e15c0 100644
--- a/tools/testing/selftests/bpf/progs/kprobe_multi.c
+++ b/tools/testing/selftests/bpf/progs/kprobe_multi.c
@@ -36,15 +36,13 @@ __u64 kretprobe_test6_result = 0;
 __u64 kretprobe_test7_result = 0;
 __u64 kretprobe_test8_result = 0;
 
-extern bool CONFIG_X86_KERNEL_IBT __kconfig __weak;
-
 static void kprobe_multi_check(void *ctx, bool is_return)
 {
 	if (bpf_get_current_pid_tgid() >> 32 != pid)
 		return;
 
 	__u64 cookie = test_cookie ? bpf_get_attach_cookie(ctx) : 0;
-	__u64 addr = bpf_get_func_ip(ctx) - (CONFIG_X86_KERNEL_IBT ? 4 : 0);
+	__u64 addr = bpf_get_func_ip(ctx);
 
 #define SET(__var, __addr, __cookie) ({			\
 	if (((const void *) addr == __addr) &&		\
-- 
cgit v1.2.3


From 0e253f7e558a3e250902ba2034091e0185448836 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Mon, 26 Sep 2022 17:33:39 +0200
Subject: bpf: Return value in kprobe get_func_ip only for entry address

Changing return value of kprobe's version of bpf_get_func_ip
to return zero if the attach address is not on the function's
entry point.

For kprobes attached in the middle of the function we can't easily
get to the function address especially now with the CONFIG_X86_KERNEL_IBT
support.

If user cares about current IP for kprobes attached within the
function body, they can get it with PT_REGS_IP(ctx).

Suggested-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Martynas Pumputis <m@lambda.lt>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Link: https://lore.kernel.org/r/20220926153340.1621984-6-jolsa@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/uapi/linux/bpf.h                             | 1 +
 kernel/trace/bpf_trace.c                             | 5 ++++-
 tools/include/uapi/linux/bpf.h                       | 1 +
 tools/testing/selftests/bpf/progs/get_func_ip_test.c | 4 ++--
 4 files changed, 8 insertions(+), 3 deletions(-)

(limited to 'tools')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index ead35f39f185..d6bd10759eaf 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -4951,6 +4951,7 @@ union bpf_attr {
  * 		Get address of the traced function (for tracing and kprobe programs).
  * 	Return
  * 		Address of the traced function.
+ * 		0 for kprobes placed within the function (not at the entry).
  *
  * u64 bpf_get_attach_cookie(void *ctx)
  * 	Description
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index ebd1b348beb3..688552df95ca 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1048,7 +1048,10 @@ BPF_CALL_1(bpf_get_func_ip_kprobe, struct pt_regs *, regs)
 {
 	struct kprobe *kp = kprobe_running();
 
-	return kp ? (uintptr_t)kp->addr : 0;
+	if (!kp || !(kp->flags & KPROBE_FLAG_ON_FUNC_ENTRY))
+		return 0;
+
+	return get_entry_ip((uintptr_t)kp->addr);
 }
 
 static const struct bpf_func_proto bpf_get_func_ip_proto_kprobe = {
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index ead35f39f185..d6bd10759eaf 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -4951,6 +4951,7 @@ union bpf_attr {
  * 		Get address of the traced function (for tracing and kprobe programs).
  * 	Return
  * 		Address of the traced function.
+ * 		0 for kprobes placed within the function (not at the entry).
  *
  * u64 bpf_get_attach_cookie(void *ctx)
  * 	Description
diff --git a/tools/testing/selftests/bpf/progs/get_func_ip_test.c b/tools/testing/selftests/bpf/progs/get_func_ip_test.c
index a587aeca5ae0..6db70757bc8b 100644
--- a/tools/testing/selftests/bpf/progs/get_func_ip_test.c
+++ b/tools/testing/selftests/bpf/progs/get_func_ip_test.c
@@ -69,7 +69,7 @@ int test6(struct pt_regs *ctx)
 {
 	__u64 addr = bpf_get_func_ip(ctx);
 
-	test6_result = (const void *) addr == &bpf_fentry_test6 + 5;
+	test6_result = (const void *) addr == 0;
 	return 0;
 }
 
@@ -79,6 +79,6 @@ int test7(struct pt_regs *ctx)
 {
 	__u64 addr = bpf_get_func_ip(ctx);
 
-	test7_result = (const void *) addr == &bpf_fentry_test7 + 5;
+	test7_result = (const void *) addr == 0;
 	return 0;
 }
-- 
cgit v1.2.3


From 738c345b74b8d11edd01b6cee5628c6b8368d8ea Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Mon, 26 Sep 2022 17:33:40 +0200
Subject: selftests/bpf: Fix get_func_ip offset test for CONFIG_X86_KERNEL_IBT

With CONFIG_X86_KERNEL_IBT enabled the test for kprobe with offset
won't work because of the extra endbr instruction.

As suggested by Andrii adding CONFIG_X86_KERNEL_IBT detection
and using appropriate offset value based on that.

Also removing test7 program, because it does the same as test6.

Suggested-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Link: https://lore.kernel.org/r/20220926153340.1621984-7-jolsa@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../selftests/bpf/prog_tests/get_func_ip_test.c    | 59 ++++++++++++++++++----
 .../testing/selftests/bpf/progs/get_func_ip_test.c | 23 +++++----
 2 files changed, 60 insertions(+), 22 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c b/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c
index 938dbd4d7c2f..fede8ef58b5b 100644
--- a/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c
@@ -2,7 +2,7 @@
 #include <test_progs.h>
 #include "get_func_ip_test.skel.h"
 
-void test_get_func_ip_test(void)
+static void test_function_entry(void)
 {
 	struct get_func_ip_test *skel = NULL;
 	int err, prog_fd;
@@ -12,14 +12,6 @@ void test_get_func_ip_test(void)
 	if (!ASSERT_OK_PTR(skel, "get_func_ip_test__open"))
 		return;
 
-	/* test6 is x86_64 specifc because of the instruction
-	 * offset, disabling it for all other archs
-	 */
-#ifndef __x86_64__
-	bpf_program__set_autoload(skel->progs.test6, false);
-	bpf_program__set_autoload(skel->progs.test7, false);
-#endif
-
 	err = get_func_ip_test__load(skel);
 	if (!ASSERT_OK(err, "get_func_ip_test__load"))
 		goto cleanup;
@@ -43,11 +35,56 @@ void test_get_func_ip_test(void)
 	ASSERT_EQ(skel->bss->test3_result, 1, "test3_result");
 	ASSERT_EQ(skel->bss->test4_result, 1, "test4_result");
 	ASSERT_EQ(skel->bss->test5_result, 1, "test5_result");
+
+cleanup:
+	get_func_ip_test__destroy(skel);
+}
+
+/* test6 is x86_64 specific because of the instruction
+ * offset, disabling it for all other archs
+ */
 #ifdef __x86_64__
+static void test_function_body(void)
+{
+	struct get_func_ip_test *skel = NULL;
+	LIBBPF_OPTS(bpf_test_run_opts, topts);
+	LIBBPF_OPTS(bpf_kprobe_opts, kopts);
+	struct bpf_link *link6 = NULL;
+	int err, prog_fd;
+
+	skel = get_func_ip_test__open();
+	if (!ASSERT_OK_PTR(skel, "get_func_ip_test__open"))
+		return;
+
+	bpf_program__set_autoload(skel->progs.test6, true);
+
+	err = get_func_ip_test__load(skel);
+	if (!ASSERT_OK(err, "get_func_ip_test__load"))
+		goto cleanup;
+
+	kopts.offset = skel->kconfig->CONFIG_X86_KERNEL_IBT ? 9 : 5;
+
+	link6 = bpf_program__attach_kprobe_opts(skel->progs.test6, "bpf_fentry_test6", &kopts);
+	if (!ASSERT_OK_PTR(link6, "link6"))
+		goto cleanup;
+
+	prog_fd = bpf_program__fd(skel->progs.test1);
+	err = bpf_prog_test_run_opts(prog_fd, &topts);
+	ASSERT_OK(err, "test_run");
+	ASSERT_EQ(topts.retval, 0, "test_run");
+
 	ASSERT_EQ(skel->bss->test6_result, 1, "test6_result");
-	ASSERT_EQ(skel->bss->test7_result, 1, "test7_result");
-#endif
 
 cleanup:
+	bpf_link__destroy(link6);
 	get_func_ip_test__destroy(skel);
 }
+#else
+#define test_function_body()
+#endif
+
+void test_get_func_ip_test(void)
+{
+	test_function_entry();
+	test_function_body();
+}
diff --git a/tools/testing/selftests/bpf/progs/get_func_ip_test.c b/tools/testing/selftests/bpf/progs/get_func_ip_test.c
index 6db70757bc8b..8559e698b40d 100644
--- a/tools/testing/selftests/bpf/progs/get_func_ip_test.c
+++ b/tools/testing/selftests/bpf/progs/get_func_ip_test.c
@@ -2,6 +2,7 @@
 #include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
+#include <stdbool.h>
 
 char _license[] SEC("license") = "GPL";
 
@@ -13,6 +14,16 @@ extern const void bpf_modify_return_test __ksym;
 extern const void bpf_fentry_test6 __ksym;
 extern const void bpf_fentry_test7 __ksym;
 
+extern bool CONFIG_X86_KERNEL_IBT __kconfig __weak;
+
+/* This function is here to have CONFIG_X86_KERNEL_IBT
+ * used and added to object BTF.
+ */
+int unused(void)
+{
+	return CONFIG_X86_KERNEL_IBT ? 0 : 1;
+}
+
 __u64 test1_result = 0;
 SEC("fentry/bpf_fentry_test1")
 int BPF_PROG(test1, int a)
@@ -64,7 +75,7 @@ int BPF_PROG(test5, int a, int *b, int ret)
 }
 
 __u64 test6_result = 0;
-SEC("kprobe/bpf_fentry_test6+0x5")
+SEC("?kprobe")
 int test6(struct pt_regs *ctx)
 {
 	__u64 addr = bpf_get_func_ip(ctx);
@@ -72,13 +83,3 @@ int test6(struct pt_regs *ctx)
 	test6_result = (const void *) addr == 0;
 	return 0;
 }
-
-__u64 test7_result = 0;
-SEC("kprobe/bpf_fentry_test7+5")
-int test7(struct pt_regs *ctx)
-{
-	__u64 addr = bpf_get_func_ip(ctx);
-
-	test7_result = (const void *) addr == 0;
-	return 0;
-}
-- 
cgit v1.2.3


From 6a4ab8869d0bfcf83d7c5184561df8235553cf28 Mon Sep 17 00:00:00 2001
From: Jon Doron <jond@wiz.io>
Date: Sun, 25 Sep 2022 10:04:31 +0300
Subject: libbpf: Fix the case of running as non-root with capabilities

When running rootless with special capabilities like:
FOWNER / DAC_OVERRIDE / DAC_READ_SEARCH

The "access" API will not make the proper check if there is really
access to a file or not.

>From the access man page:
"
The check is done using the calling process's real UID and GID, rather
than the effective IDs as is done when actually attempting an operation
(e.g., open(2)) on the file.  Similarly, for the root user, the check
uses the set of permitted capabilities  rather than the set of effective
capabilities; ***and for non-root users, the check uses an empty set of
capabilities.***
"

What that means is that for non-root user the access API will not do the
proper validation if the process really has permission to a file or not.

To resolve this this patch replaces all the access API calls with
faccessat with AT_EACCESS flag.

Signed-off-by: Jon Doron <jond@wiz.io>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220925070431.1313680-1-arilou@gmail.com
---
 tools/lib/bpf/btf.c    | 2 +-
 tools/lib/bpf/libbpf.c | 6 +++---
 tools/lib/bpf/usdt.c   | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c
index b4d9a96c3c1b..d88647da2c7f 100644
--- a/tools/lib/bpf/btf.c
+++ b/tools/lib/bpf/btf.c
@@ -4664,7 +4664,7 @@ struct btf *btf__load_vmlinux_btf(void)
 	for (i = 0; i < ARRAY_SIZE(locations); i++) {
 		snprintf(path, PATH_MAX, locations[i], buf.release);
 
-		if (access(path, R_OK))
+		if (faccessat(AT_FDCWD, path, R_OK, AT_EACCESS))
 			continue;
 
 		btf = btf__parse(path, NULL);
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index e691f08a297f..184ce1684dcd 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -884,7 +884,7 @@ __u32 get_kernel_version(void)
 	__u32 major, minor, patch;
 	struct utsname info;
 
-	if (access(ubuntu_kver_file, R_OK) == 0) {
+	if (faccessat(AT_FDCWD, ubuntu_kver_file, R_OK, AT_EACCESS) == 0) {
 		FILE *f;
 
 		f = fopen(ubuntu_kver_file, "r");
@@ -9904,7 +9904,7 @@ static bool use_debugfs(void)
 	static int has_debugfs = -1;
 
 	if (has_debugfs < 0)
-		has_debugfs = access(DEBUGFS, F_OK) == 0;
+		has_debugfs = faccessat(AT_FDCWD, DEBUGFS, F_OK, AT_EACCESS) == 0;
 
 	return has_debugfs == 1;
 }
@@ -10721,7 +10721,7 @@ static int resolve_full_path(const char *file, char *result, size_t result_sz)
 				continue;
 			snprintf(result, result_sz, "%.*s/%s", seg_len, s, file);
 			/* ensure it has required permissions */
-			if (access(result, perm) < 0)
+			if (faccessat(AT_FDCWD, result, perm, AT_EACCESS) < 0)
 				continue;
 			pr_debug("resolved '%s' to '%s'\n", file, result);
 			return 0;
diff --git a/tools/lib/bpf/usdt.c b/tools/lib/bpf/usdt.c
index d18e37982344..e83b497c2245 100644
--- a/tools/lib/bpf/usdt.c
+++ b/tools/lib/bpf/usdt.c
@@ -282,7 +282,7 @@ struct usdt_manager *usdt_manager_new(struct bpf_object *obj)
 	 * If this is not supported, USDTs with semaphores will not be supported.
 	 * Added in: a6ca88b241d5 ("trace_uprobe: support reference counter in fd-based uprobe")
 	 */
-	man->has_sema_refcnt = access(ref_ctr_sysfs_path, F_OK) == 0;
+	man->has_sema_refcnt = faccessat(AT_FDCWD, ref_ctr_sysfs_path, F_OK, AT_EACCESS) == 0;
 
 	return man;
 }
-- 
cgit v1.2.3


From 2702c789996d9001cb60ef1dac055aca84d3c51a Mon Sep 17 00:00:00 2001
From: Yauheni Kaliuta <ykaliuta@redhat.com>
Date: Mon, 26 Sep 2022 12:23:20 +0300
Subject: selftests/bpf: Fix passing arguments via function in test_kmod.sh

Since the tests are run in a function $@ there actually contains the
function arguments, not the script ones.

Pass "$@" to the function as well.

Fixes: 272d1f4cfa3c ("selftests: bpf: test_kmod.sh: Pass parameters to the module")
Signed-off-by: Yauheni Kaliuta <ykaliuta@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20220926092320.564631-1-ykaliuta@redhat.com
---
 tools/testing/selftests/bpf/test_kmod.sh | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/test_kmod.sh b/tools/testing/selftests/bpf/test_kmod.sh
index d4a4279c0181..50dca53ac536 100755
--- a/tools/testing/selftests/bpf/test_kmod.sh
+++ b/tools/testing/selftests/bpf/test_kmod.sh
@@ -29,6 +29,7 @@ test_run()
 	sysctl -w net.core.bpf_jit_harden=$2 2>&1 > /dev/null
 
 	echo "[ JIT enabled:$1 hardened:$2 ]"
+	shift 2
 	dmesg -C
 	if [ -f ${OUTPUT}/lib/test_bpf.ko ]; then
 		insmod ${OUTPUT}/lib/test_bpf.ko "$@" 2> /dev/null
@@ -64,9 +65,9 @@ test_restore()
 
 rc=0
 test_save
-test_run 0 0
-test_run 1 0
-test_run 1 1
-test_run 1 2
+test_run 0 0 "$@"
+test_run 1 0 "$@"
+test_run 1 1 "$@"
+test_run 1 2 "$@"
 test_restore
 exit $rc
-- 
cgit v1.2.3


From 73dfe93ea1b319482e6d82a54fe06f953ceeeccb Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Thu, 22 Sep 2022 20:41:40 +0200
Subject: headers: Remove some left-over license text

Remove some left-over from commit e2be04c7f995 ("License cleanup: add SPDX
license identifier to uapi header files with a license")

When the SPDX-License-Identifier tag has been added, the corresponding
license text has not been removed.

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Acked-by: Alexander Duyck <alexanderduyck@fb.com>
Reviewed-by: Jiri Pirko <jiri@nvidia.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Link: https://lore.kernel.org/r/88410cddd31197ea26840d7dd71612bece8c6acf.1663871981.git.christophe.jaillet@wanadoo.fr
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/uapi/linux/tc_act/tc_bpf.h        |  5 -----
 include/uapi/linux/tc_act/tc_skbedit.h    | 13 -------------
 include/uapi/linux/tc_act/tc_skbmod.h     |  7 +------
 include/uapi/linux/tc_act/tc_tunnel_key.h |  5 -----
 include/uapi/linux/tc_act/tc_vlan.h       |  5 -----
 tools/include/uapi/linux/tc_act/tc_bpf.h  |  5 -----
 6 files changed, 1 insertion(+), 39 deletions(-)

(limited to 'tools')

diff --git a/include/uapi/linux/tc_act/tc_bpf.h b/include/uapi/linux/tc_act/tc_bpf.h
index 653c4f94f76e..fe6c8f8f3e8c 100644
--- a/include/uapi/linux/tc_act/tc_bpf.h
+++ b/include/uapi/linux/tc_act/tc_bpf.h
@@ -1,11 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
 /*
  * Copyright (c) 2015 Jiri Pirko <jiri@resnulli.us>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #ifndef __LINUX_TC_BPF_H
diff --git a/include/uapi/linux/tc_act/tc_skbedit.h b/include/uapi/linux/tc_act/tc_skbedit.h
index 6cb6101208d0..64032513cc4c 100644
--- a/include/uapi/linux/tc_act/tc_skbedit.h
+++ b/include/uapi/linux/tc_act/tc_skbedit.h
@@ -2,19 +2,6 @@
 /*
  * Copyright (c) 2008, Intel Corporation.
  *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
  * Author: Alexander Duyck <alexander.h.duyck@intel.com>
  */
 
diff --git a/include/uapi/linux/tc_act/tc_skbmod.h b/include/uapi/linux/tc_act/tc_skbmod.h
index af6ef2cfbf3d..ac62c9a993ea 100644
--- a/include/uapi/linux/tc_act/tc_skbmod.h
+++ b/include/uapi/linux/tc_act/tc_skbmod.h
@@ -1,12 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
 /*
  * Copyright (c) 2016, Jamal Hadi Salim
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
-*/
+ */
 
 #ifndef __LINUX_TC_SKBMOD_H
 #define __LINUX_TC_SKBMOD_H
diff --git a/include/uapi/linux/tc_act/tc_tunnel_key.h b/include/uapi/linux/tc_act/tc_tunnel_key.h
index 3f10dc4e7a4b..49ad4033951b 100644
--- a/include/uapi/linux/tc_act/tc_tunnel_key.h
+++ b/include/uapi/linux/tc_act/tc_tunnel_key.h
@@ -2,11 +2,6 @@
 /*
  * Copyright (c) 2016, Amir Vadai <amir@vadai.me>
  * Copyright (c) 2016, Mellanox Technologies. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #ifndef __LINUX_TC_TUNNEL_KEY_H
diff --git a/include/uapi/linux/tc_act/tc_vlan.h b/include/uapi/linux/tc_act/tc_vlan.h
index 5b306fe815cc..3e1f8e57cdd2 100644
--- a/include/uapi/linux/tc_act/tc_vlan.h
+++ b/include/uapi/linux/tc_act/tc_vlan.h
@@ -1,11 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
 /*
  * Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #ifndef __LINUX_TC_VLAN_H
diff --git a/tools/include/uapi/linux/tc_act/tc_bpf.h b/tools/include/uapi/linux/tc_act/tc_bpf.h
index 653c4f94f76e..fe6c8f8f3e8c 100644
--- a/tools/include/uapi/linux/tc_act/tc_bpf.h
+++ b/tools/include/uapi/linux/tc_act/tc_bpf.h
@@ -1,11 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
 /*
  * Copyright (c) 2015 Jiri Pirko <jiri@resnulli.us>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
  */
 
 #ifndef __LINUX_TC_BPF_H
-- 
cgit v1.2.3


From 7bcd9683e51575c72c9289c05213150245d1c186 Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Mon, 26 Sep 2022 11:35:36 +0100
Subject: selftests/net: enable io_uring sendzc testing

d8b6171bd58a5 ("selftests/io_uring: test zerocopy send") added io_uring
zerocopy tests but forgot to enable it in make runs. Add missing
io_uring_zerocopy_tx.sh into TEST_PROGS.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/28e743602cdd54ffc49f68bbcbcbafc59ba22dc2.1664142210.git.asml.silence@gmail.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/net/Makefile | 1 +
 1 file changed, 1 insertion(+)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index d87e8739bb30..2a6b0bc648c4 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -66,6 +66,7 @@ TEST_GEN_FILES += cmsg_sender
 TEST_GEN_FILES += stress_reuseport_listen
 TEST_PROGS += test_vxlan_vnifiltering.sh
 TEST_GEN_FILES += io_uring_zerocopy_tx
+TEST_PROGS += io_uring_zerocopy_tx.sh
 TEST_GEN_FILES += bind_bhash
 TEST_GEN_PROGS += sk_bind_sendto_listen
 TEST_GEN_PROGS += sk_connect_zero_addr
-- 
cgit v1.2.3


From 87dbdc230d162bf9ee1ac77c8ade178b6b1e199e Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andrii@kernel.org>
Date: Mon, 26 Sep 2022 21:29:39 -0700
Subject: libbpf: Don't require full struct enum64 in UAPI headers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Drop the requirement for system-wide kernel UAPI headers to provide full
struct btf_enum64 definition. This is an unexpected requirement that
slipped in libbpf 1.0 and put unnecessary pressure ([0]) on users to have
a bleeding-edge kernel UAPI header from unreleased Linux 6.0.

To achieve this, we forward declare struct btf_enum64. But that's not
enough as there is btf_enum64_value() helper that expects to know the
layout of struct btf_enum64. So we get a bit creative with
reinterpreting memory layout as array of __u32 and accesing lo32/hi32
fields as array elements. Alternative way would be to have a local
pointer variable for anonymous struct with exactly the same layout as
struct btf_enum64, but that gets us into C++ compiler errors complaining
about invalid type casts. So play it safe, if ugly.

  [0] Closes: https://github.com/libbpf/libbpf/issues/562

Fixes: d90ec262b35b ("libbpf: Add enum64 support for btf_dump")
Reported-by: Toke Høiland-Jørgensen <toke@toke.dk>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Toke Høiland-Jørgensen <toke@toke.dk>
Link: https://lore.kernel.org/bpf/20220927042940.147185-1-andrii@kernel.org
---
 tools/lib/bpf/btf.h | 25 ++++++++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h
index ae543144ee30..8e6880d91c84 100644
--- a/tools/lib/bpf/btf.h
+++ b/tools/lib/bpf/btf.h
@@ -486,6 +486,8 @@ static inline struct btf_enum *btf_enum(const struct btf_type *t)
 	return (struct btf_enum *)(t + 1);
 }
 
+struct btf_enum64;
+
 static inline struct btf_enum64 *btf_enum64(const struct btf_type *t)
 {
 	return (struct btf_enum64 *)(t + 1);
@@ -493,7 +495,28 @@ static inline struct btf_enum64 *btf_enum64(const struct btf_type *t)
 
 static inline __u64 btf_enum64_value(const struct btf_enum64 *e)
 {
-	return ((__u64)e->val_hi32 << 32) | e->val_lo32;
+	/* struct btf_enum64 is introduced in Linux 6.0, which is very
+	 * bleeding-edge. Here we are avoiding relying on struct btf_enum64
+	 * definition coming from kernel UAPI headers to support wider range
+	 * of system-wide kernel headers.
+	 *
+	 * Given this header can be also included from C++ applications, that
+	 * further restricts C tricks we can use (like using compatible
+	 * anonymous struct). So just treat struct btf_enum64 as
+	 * a three-element array of u32 and access second (lo32) and third
+	 * (hi32) elements directly.
+	 *
+	 * For reference, here is a struct btf_enum64 definition:
+	 *
+	 * const struct btf_enum64 {
+	 *	__u32	name_off;
+	 *	__u32	val_lo32;
+	 *	__u32	val_hi32;
+	 * };
+	 */
+	const __u32 *e64 = (const __u32 *)e;
+
+	return ((__u64)e64[2] << 32) | e64[1];
 }
 
 static inline struct btf_member *btf_members(const struct btf_type *t)
-- 
cgit v1.2.3


From 0335833b10cd0a7998ac1892c03802c1ec014f0d Mon Sep 17 00:00:00 2001
From: Zhengchao Shao <shaozhengchao@huawei.com>
Date: Sat, 24 Sep 2022 10:51:43 +0800
Subject: selftests/tc-testing: add selftests for atm qdisc

Test 7628: Create ATM with default setting
Test 390a: Delete ATM with valid handle
Test 32a0: Show ATM class
Test 6310: Dump ATM stats

Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/tc-testing/config          |  2 +
 .../selftests/tc-testing/tc-tests/qdiscs/atm.json  | 94 ++++++++++++++++++++++
 2 files changed, 96 insertions(+)
 create mode 100644 tools/testing/selftests/tc-testing/tc-tests/qdiscs/atm.json

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/config b/tools/testing/selftests/tc-testing/config
index a3239d5e40c7..711e9e6cef9b 100644
--- a/tools/testing/selftests/tc-testing/config
+++ b/tools/testing/selftests/tc-testing/config
@@ -12,6 +12,7 @@ CONFIG_NET_SCHED=y
 #
 # Queueing/Scheduling
 #
+CONFIG_NET_SCH_ATM=m
 CONFIG_NET_SCH_PRIO=m
 CONFIG_NET_SCH_INGRESS=m
 
@@ -67,3 +68,4 @@ CONFIG_NETDEVSIM=m
 ## Network testing
 #
 CONFIG_CAN=m
+CONFIG_ATM=y
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/atm.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/atm.json
new file mode 100644
index 000000000000..f5bc8670a67d
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/atm.json
@@ -0,0 +1,94 @@
+[
+    {
+        "id": "7628",
+        "name": "Create ATM with default setting",
+        "category": [
+            "qdisc",
+            "atm"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root atm",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc atm 1: root refcnt",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "390a",
+        "name": "Delete ATM with valid handle",
+        "category": [
+            "qdisc",
+            "atm"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true",
+            "$TC qdisc add dev $DUMMY handle 1: root atm"
+        ],
+        "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 1: root",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc atm 1: root refcnt",
+        "matchCount": "0",
+        "teardown": [
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "32a0",
+        "name": "Show ATM class",
+        "category": [
+            "qdisc",
+            "atm"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root atm",
+        "expExitCode": "0",
+        "verifyCmd": "$TC class show dev $DUMMY",
+        "matchPattern": "class atm 1: parent 1:",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "6310",
+        "name": "Dump ATM stats",
+        "category": [
+            "qdisc",
+            "atm"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root atm",
+        "expExitCode": "0",
+        "verifyCmd": "$TC -s qdisc show dev $DUMMY",
+        "matchPattern": "qdisc atm 1: root refcnt",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    }
+]
-- 
cgit v1.2.3


From 99e0f78d6bdd36b4d435fc14923430041eb9b051 Mon Sep 17 00:00:00 2001
From: Zhengchao Shao <shaozhengchao@huawei.com>
Date: Sat, 24 Sep 2022 10:51:44 +0800
Subject: selftests/tc-testing: add selftests for choke qdisc

Test 8937: Create CHOKE with default setting
Test 48c0: Create CHOKE with min packet setting
Test 38c1: Create CHOKE with max packet setting
Test 234a: Create CHOKE with ecn setting
Test 4380: Create CHOKE with burst setting
Test 48c7: Delete CHOKE with valid handle
Test 4398: Replace CHOKE with min setting
Test 0301: Change CHOKE with limit setting

Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/tc-testing/config          |   1 +
 .../tc-testing/tc-tests/qdiscs/choke.json          | 188 +++++++++++++++++++++
 2 files changed, 189 insertions(+)
 create mode 100644 tools/testing/selftests/tc-testing/tc-tests/qdiscs/choke.json

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/config b/tools/testing/selftests/tc-testing/config
index 711e9e6cef9b..e104e8ec30aa 100644
--- a/tools/testing/selftests/tc-testing/config
+++ b/tools/testing/selftests/tc-testing/config
@@ -13,6 +13,7 @@ CONFIG_NET_SCHED=y
 # Queueing/Scheduling
 #
 CONFIG_NET_SCH_ATM=m
+CONFIG_NET_SCH_CHOKE=m
 CONFIG_NET_SCH_PRIO=m
 CONFIG_NET_SCH_INGRESS=m
 
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/choke.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/choke.json
new file mode 100644
index 000000000000..31b7775d25fc
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/choke.json
@@ -0,0 +1,188 @@
+[
+    {
+        "id": "8937",
+        "name": "Create CHOKE with default setting",
+        "category": [
+            "qdisc",
+            "choke"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root choke limit 1000 bandwidth 10000",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc choke 1: root refcnt [0-9]+ limit 1000p min 83p max 250p",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "48c0",
+        "name": "Create CHOKE with min packet setting",
+        "category": [
+            "qdisc",
+            "choke"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root choke limit 1000 bandwidth 10000 min 100",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc choke 1: root refcnt [0-9]+ limit 1000p min 100p max 250p",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "38c1",
+        "name": "Create CHOKE with max packet setting",
+        "category": [
+            "qdisc",
+            "choke"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root choke limit 1000 bandwidth 10000 max 900",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc choke 1: root refcnt [0-9]+ limit 1000p min.*max 900p",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "234a",
+        "name": "Create CHOKE with ecn setting",
+        "category": [
+            "qdisc",
+            "choke"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root choke limit 1000 bandwidth 10000 ecn",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc choke 1: root refcnt [0-9]+ limit 1000p min 83p max 250p ecn",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "4380",
+        "name": "Create CHOKE with burst setting",
+        "category": [
+            "qdisc",
+            "choke"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root choke limit 1000 bandwidth 10000 burst 100",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc choke 1: root refcnt [0-9]+ limit 1000p min 83p max 250p",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "48c7",
+        "name": "Delete CHOKE with valid handle",
+        "category": [
+            "qdisc",
+            "choke"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true",
+            "$TC qdisc add dev $DUMMY handle 1: root choke limit 1000 bandwidth 10000"
+        ],
+        "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 1: root",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc choke 1: root refcnt [0-9]+ limit 1000p min 83p max 250p",
+        "matchCount": "0",
+        "teardown": [
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "4398",
+        "name": "Replace CHOKE with min setting",
+        "category": [
+            "qdisc",
+            "choke"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true",
+            "$TC qdisc add dev $DUMMY handle 1: root choke limit 1000 bandwidth 10000"
+        ],
+        "cmdUnderTest": "$TC qdisc replace dev $DUMMY handle 1: root choke limit 1000 bandwidth 10000 min 100",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc choke 1: root refcnt [0-9]+ limit 1000p min 100p max 250p",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "0301",
+        "name": "Change CHOKE with limit setting",
+        "category": [
+            "qdisc",
+            "choke"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true",
+            "$TC qdisc add dev $DUMMY handle 1: root choke limit 1000 bandwidth 10000"
+        ],
+        "cmdUnderTest": "$TC qdisc change dev $DUMMY handle 1: root choke limit 1000 bandwidth 10000 min 100",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc choke 1: root refcnt [0-9]+ limit 1000p min 100p max 250p",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    }
+]
-- 
cgit v1.2.3


From 412233b1f7e7b4ea6a4c325ae44602cbc152100a Mon Sep 17 00:00:00 2001
From: Zhengchao Shao <shaozhengchao@huawei.com>
Date: Sat, 24 Sep 2022 10:51:45 +0800
Subject: selftests/tc-testing: add selftests for codel qdisc

Test 983a: Create CODEL with default setting
Test 38aa: Create CODEL with limit packet setting
Test 9178: Create CODEL with target setting
Test 78d1: Create CODEL with interval setting
Test 238a: Create CODEL with ecn setting
Test 939c: Create CODEL with ce_threshold setting
Test 8380: Delete CODEL with valid handle
Test 289c: Replace CODEL with limit setting
Test 0648: Change CODEL with limit setting

Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/tc-testing/config          |   1 +
 .../tc-testing/tc-tests/qdiscs/codel.json          | 211 +++++++++++++++++++++
 2 files changed, 212 insertions(+)
 create mode 100644 tools/testing/selftests/tc-testing/tc-tests/qdiscs/codel.json

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/config b/tools/testing/selftests/tc-testing/config
index e104e8ec30aa..a02f959abe66 100644
--- a/tools/testing/selftests/tc-testing/config
+++ b/tools/testing/selftests/tc-testing/config
@@ -14,6 +14,7 @@ CONFIG_NET_SCHED=y
 #
 CONFIG_NET_SCH_ATM=m
 CONFIG_NET_SCH_CHOKE=m
+CONFIG_NET_SCH_CODEL=m
 CONFIG_NET_SCH_PRIO=m
 CONFIG_NET_SCH_INGRESS=m
 
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/codel.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/codel.json
new file mode 100644
index 000000000000..ea38099d48e5
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/codel.json
@@ -0,0 +1,211 @@
+[
+    {
+        "id": "983a",
+        "name": "Create CODEL with default setting",
+        "category": [
+            "qdisc",
+            "codel"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root codel",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc codel 1: root refcnt [0-9]+ limit 1000p target 5ms interval 100ms",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "38aa",
+        "name": "Create CODEL with limit packet setting",
+        "category": [
+            "qdisc",
+            "codel"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root codel limit 1500",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc codel 1: root refcnt [0-9]+ limit 1500p target 5ms interval 100ms",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "9178",
+        "name": "Create CODEL with target setting",
+        "category": [
+            "qdisc",
+            "codel"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root codel target 100ms",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc codel 1: root refcnt [0-9]+ limit 1000p target 100ms interval 100ms",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "78d1",
+        "name": "Create CODEL with interval setting",
+        "category": [
+            "qdisc",
+            "codel"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root codel interval 20ms",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc codel 1: root refcnt [0-9]+ limit 1000p target 5ms interval 20ms",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "238a",
+        "name": "Create CODEL with ecn setting",
+        "category": [
+            "qdisc",
+            "codel"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root codel ecn",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc codel 1: root refcnt [0-9]+ limit 1000p target 5ms interval 100ms ecn",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "939c",
+        "name": "Create CODEL with ce_threshold setting",
+        "category": [
+            "qdisc",
+            "codel"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root codel ce_threshold 20ms",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc codel 1: root refcnt [0-9]+ limit 1000p target 5ms ce_threshold 20ms interval 100ms",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "8380",
+        "name": "Delete CODEL with valid handle",
+        "category": [
+            "qdisc",
+            "codel"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true",
+            "$TC qdisc add dev $DUMMY handle 1: root codel"
+        ],
+        "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 1: root",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc codel 1: root refcnt [0-9]+ limit 1000p target 5ms interval 100ms",
+        "matchCount": "0",
+        "teardown": [
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "289c",
+        "name": "Replace CODEL with limit setting",
+        "category": [
+            "qdisc",
+            "codel"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true",
+            "$TC qdisc add dev $DUMMY handle 1: root codel"
+        ],
+        "cmdUnderTest": "$TC qdisc replace dev $DUMMY handle 1: root codel limit 5000",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc codel 1: root refcnt [0-9]+ limit 5000p target 5ms interval 100ms",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "0648",
+        "name": "Change CODEL with limit setting",
+        "category": [
+            "qdisc",
+            "codel"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true",
+            "$TC qdisc add dev $DUMMY handle 1: root codel"
+        ],
+        "cmdUnderTest": "$TC qdisc change dev $DUMMY handle 1: root codel limit 100",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc codel 1: root refcnt [0-9]+ limit 100p target 5ms interval 100ms",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    }
+]
-- 
cgit v1.2.3


From fa4b3e9f057b51e5bed86804c9f897f686d520ee Mon Sep 17 00:00:00 2001
From: Zhengchao Shao <shaozhengchao@huawei.com>
Date: Sat, 24 Sep 2022 10:51:46 +0800
Subject: selftests/tc-testing: add selftests for etf qdisc

Test 34ba: Create ETF with default setting
Test 438f: Create ETF with delta nanos setting
Test 9041: Create ETF with deadline_mode setting
Test 9a0c: Create ETF with skip_sock_check setting
Test 2093: Delete ETF with valid handle

Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/tc-testing/config          |   1 +
 .../selftests/tc-testing/tc-tests/qdiscs/etf.json  | 117 +++++++++++++++++++++
 2 files changed, 118 insertions(+)
 create mode 100644 tools/testing/selftests/tc-testing/tc-tests/qdiscs/etf.json

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/config b/tools/testing/selftests/tc-testing/config
index a02f959abe66..058ef0bf9dad 100644
--- a/tools/testing/selftests/tc-testing/config
+++ b/tools/testing/selftests/tc-testing/config
@@ -15,6 +15,7 @@ CONFIG_NET_SCHED=y
 CONFIG_NET_SCH_ATM=m
 CONFIG_NET_SCH_CHOKE=m
 CONFIG_NET_SCH_CODEL=m
+CONFIG_NET_SCH_ETF=m
 CONFIG_NET_SCH_PRIO=m
 CONFIG_NET_SCH_INGRESS=m
 
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/etf.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/etf.json
new file mode 100644
index 000000000000..0046d44bcd93
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/etf.json
@@ -0,0 +1,117 @@
+[
+    {
+        "id": "34ba",
+        "name": "Create ETF with default setting",
+        "category": [
+            "qdisc",
+            "etf"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root etf clockid CLOCK_TAI",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc etf 1: root refcnt [0-9]+ clockid TAI delta 0 offload off deadline_mode off skip_sock_check off",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "438f",
+        "name": "Create ETF with delta nanos setting",
+        "category": [
+            "qdisc",
+            "etf"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root etf delta 100 clockid CLOCK_TAI",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc etf 1: root refcnt [0-9]+ clockid TAI delta 100 offload off deadline_mode off skip_sock_check off",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "9041",
+        "name": "Create ETF with deadline_mode setting",
+        "category": [
+            "qdisc",
+            "etf"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root etf clockid CLOCK_TAI deadline_mode",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc etf 1: root refcnt [0-9]+ clockid TAI delta 0 offload off deadline_mode on skip_sock_check off",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "9a0c",
+        "name": "Create ETF with skip_sock_check setting",
+        "category": [
+            "qdisc",
+            "etf"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root etf clockid CLOCK_TAI skip_sock_check",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc etf 1: root refcnt [0-9]+ clockid TAI delta 0 offload off deadline_mode off skip_sock_check on",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "2093",
+        "name": "Delete ETF with valid handle",
+        "category": [
+            "qdisc",
+            "etf"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true",
+            "$TC qdisc add dev $DUMMY handle 1: root etf clockid CLOCK_TAI"
+        ],
+        "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 1: root",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc etf 1: root refcnt [0-9]+ clockid TAI delta 0 offload off deadline_mode off skip_sock_check off",
+        "matchCount": "0",
+        "teardown": [
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    }
+]
-- 
cgit v1.2.3


From 9e274718cc050874761ad4314d43cd82e7556128 Mon Sep 17 00:00:00 2001
From: Zhengchao Shao <shaozhengchao@huawei.com>
Date: Sat, 24 Sep 2022 10:51:47 +0800
Subject: selftests/tc-testing: add selftests for fq qdisc

Test 983b: Create FQ with default setting
Test 38a1: Create FQ with limit packet setting
Test 0a18: Create FQ with flow_limit setting
Test 2390: Create FQ with quantum setting
Test 845b: Create FQ with initial_quantum setting
Test 9398: Create FQ with maxrate setting
Test 342c: Create FQ with nopacing setting
Test 6391: Create FQ with refill_delay setting
Test 238b: Create FQ with low_rate_threshold setting
Test 7582: Create FQ with orphan_mask setting
Test 4894: Create FQ with timer_slack setting
Test 324c: Create FQ with ce_threshold setting
Test 424a: Create FQ with horizon time setting
Test 89e1: Create FQ with horizon_cap setting
Test 32e1: Delete FQ with valid handle
Test 49b0: Replace FQ with limit setting
Test 9478: Change FQ with limit setting

Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/tc-testing/config          |   1 +
 .../selftests/tc-testing/tc-tests/qdiscs/fq.json   | 395 +++++++++++++++++++++
 2 files changed, 396 insertions(+)
 create mode 100644 tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq.json

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/config b/tools/testing/selftests/tc-testing/config
index 058ef0bf9dad..eea77f9d6ba1 100644
--- a/tools/testing/selftests/tc-testing/config
+++ b/tools/testing/selftests/tc-testing/config
@@ -16,6 +16,7 @@ CONFIG_NET_SCH_ATM=m
 CONFIG_NET_SCH_CHOKE=m
 CONFIG_NET_SCH_CODEL=m
 CONFIG_NET_SCH_ETF=m
+CONFIG_NET_SCH_FQ=m
 CONFIG_NET_SCH_PRIO=m
 CONFIG_NET_SCH_INGRESS=m
 
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq.json
new file mode 100644
index 000000000000..8acb904d1419
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq.json
@@ -0,0 +1,395 @@
+[
+    {
+        "id": "983b",
+        "name": "Create FQ with default setting",
+        "category": [
+            "qdisc",
+            "fq"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root fq",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc fq 1: root refcnt [0-9]+ limit",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "38a1",
+        "name": "Create FQ with limit packet setting",
+        "category": [
+            "qdisc",
+            "fq"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root fq limit 3000",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc fq 1: root refcnt [0-9]+ limit 3000p",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "0a18",
+        "name": "Create FQ with flow_limit setting",
+        "category": [
+            "qdisc",
+            "fq"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root fq flow_limit 300",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc fq 1: root refcnt [0-9]+ limit 10000p flow_limit 300p",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "2390",
+        "name": "Create FQ with quantum setting",
+        "category": [
+            "qdisc",
+            "fq"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root fq quantum 9000",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc fq 1: root refcnt [0-9]+ limit 10000p flow_limit 100p buckets.*orphan_mask 1023 quantum 9000b",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "845b",
+        "name": "Create FQ with initial_quantum setting",
+        "category": [
+            "qdisc",
+            "fq"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root fq initial_quantum 900000",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc fq 1: root refcnt [0-9]+ limit 10000p flow_limit 100p buckets.*initial_quantum 900000b",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "9398",
+        "name": "Create FQ with maxrate setting",
+        "category": [
+            "qdisc",
+            "fq"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root fq maxrate 100000",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc fq 1: root refcnt [0-9]+ limit 10000p flow_limit 100p buckets.*maxrate 100Kbit",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "342c",
+        "name": "Create FQ with nopacing setting",
+        "category": [
+            "qdisc",
+            "fq"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root fq nopacing",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc fq 1: root refcnt [0-9]+ limit 10000p flow_limit 100p.*nopacing",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "6391",
+        "name": "Create FQ with refill_delay setting",
+        "category": [
+            "qdisc",
+            "fq"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root fq refill_delay 100ms",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc fq 1: root refcnt [0-9]+ limit 10000p flow_limit 100p.*refill_delay 100ms",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "238b",
+        "name": "Create FQ with low_rate_threshold setting",
+        "category": [
+            "qdisc",
+            "fq"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root fq low_rate_threshold 10000",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc fq 1: root refcnt [0-9]+ limit 10000p flow_limit 100p.*low_rate_threshold 10Kbit",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "7582",
+        "name": "Create FQ with orphan_mask setting",
+        "category": [
+            "qdisc",
+            "fq"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root fq orphan_mask 255",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc fq 1: root refcnt [0-9]+ limit 10000p flow_limit 100p.*orphan_mask 255",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "4894",
+        "name": "Create FQ with timer_slack setting",
+        "category": [
+            "qdisc",
+            "fq"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root fq timer_slack 100",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc fq 1: root refcnt [0-9]+ limit 10000p flow_limit 100p.*timer_slack 100ns",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "324c",
+        "name": "Create FQ with ce_threshold setting",
+        "category": [
+            "qdisc",
+            "fq"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root fq ce_threshold 100",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc fq 1: root refcnt [0-9]+ limit 10000p flow_limit 100p",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "424a",
+        "name": "Create FQ with horizon time setting",
+        "category": [
+            "qdisc",
+            "fq"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root fq horizon 100",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc fq 1: root refcnt [0-9]+ limit 10000p flow_limit 100p.*horizon 100us",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "89e1",
+        "name": "Create FQ with horizon_cap setting",
+        "category": [
+            "qdisc",
+            "fq"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root fq horizon_cap",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc fq 1: root refcnt [0-9]+ limit 10000p flow_limit 100p.*horizon_cap",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "32e1",
+        "name": "Delete FQ with valid handle",
+        "category": [
+            "qdisc",
+            "fq"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true",
+            "$TC qdisc add dev $DUMMY handle 1: root fq"
+        ],
+        "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 1: root",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc fq 1: root refcnt [0-9]+ limit 10000p",
+        "matchCount": "0",
+        "teardown": [
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "49b0",
+        "name": "Replace FQ with limit setting",
+        "category": [
+            "qdisc",
+            "fq"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true",
+            "$TC qdisc add dev $DUMMY handle 1: root fq"
+        ],
+        "cmdUnderTest": "$TC qdisc replace dev $DUMMY handle 1: root fq limit 5000",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc fq 1: root refcnt [0-9]+ limit 5000p",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "9478",
+        "name": "Change FQ with limit setting",
+        "category": [
+            "qdisc",
+            "fq"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true",
+            "$TC qdisc add dev $DUMMY handle 1: root fq"
+        ],
+        "cmdUnderTest": "$TC qdisc change dev $DUMMY handle 1: root fq limit 100",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc fq 1: root refcnt [0-9]+ limit 100p",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    }
+]
-- 
cgit v1.2.3


From a4a8d3562b07e7390708c9d6dc85932fceffe8af Mon Sep 17 00:00:00 2001
From: Zhengchao Shao <shaozhengchao@huawei.com>
Date: Sat, 24 Sep 2022 10:51:48 +0800
Subject: selftests/tc-testing: add selftests for gred qdisc

Test 8942: Create GRED with default setting
Test 5783: Create GRED with grio setting
Test 8a09: Create GRED with limit setting
Test 48cb: Create GRED with ecn setting
Test 763a: Change GRED setting
Test 8309: Show GRED class

Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/tc-testing/config          |   1 +
 .../selftests/tc-testing/tc-tests/qdiscs/gred.json | 164 +++++++++++++++++++++
 2 files changed, 165 insertions(+)
 create mode 100644 tools/testing/selftests/tc-testing/tc-tests/qdiscs/gred.json

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/config b/tools/testing/selftests/tc-testing/config
index eea77f9d6ba1..d8db68440395 100644
--- a/tools/testing/selftests/tc-testing/config
+++ b/tools/testing/selftests/tc-testing/config
@@ -17,6 +17,7 @@ CONFIG_NET_SCH_CHOKE=m
 CONFIG_NET_SCH_CODEL=m
 CONFIG_NET_SCH_ETF=m
 CONFIG_NET_SCH_FQ=m
+CONFIG_NET_SCH_GRED=m
 CONFIG_NET_SCH_PRIO=m
 CONFIG_NET_SCH_INGRESS=m
 
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/gred.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/gred.json
new file mode 100644
index 000000000000..013c8ee037a4
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/gred.json
@@ -0,0 +1,164 @@
+[
+    {
+        "id": "8942",
+        "name": "Create GRED with default setting",
+        "category": [
+            "qdisc",
+            "gred"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root gred setup vqs 10 default 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc gred 1: root refcnt [0-9]+ vqs 10 default 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "5783",
+        "name": "Create GRED with grio setting",
+        "category": [
+            "qdisc",
+            "gred"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root gred setup vqs 10 default 1 grio",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc gred 1: root refcnt [0-9]+ vqs 10 default 1.*grio",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "8a09",
+        "name": "Create GRED with limit setting",
+        "category": [
+            "qdisc",
+            "gred"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root gred setup vqs 10 default 1 limit 1000",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc gred 1: root refcnt [0-9]+ vqs 10 default 1 limit 1000b",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "48ca",
+        "name": "Create GRED with ecn setting",
+        "category": [
+            "qdisc",
+            "gred"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root gred setup vqs 10 default 2 ecn",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc gred 1: root refcnt [0-9]+ vqs 10 default 2.*ecn",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "48cb",
+        "name": "Create GRED with harddrop setting",
+        "category": [
+            "qdisc",
+            "gred"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root gred setup vqs 10 default 2 harddrop",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc gred 1: root refcnt [0-9]+ vqs 10 default 2.*harddrop",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "763a",
+        "name": "Change GRED setting",
+        "category": [
+            "qdisc",
+            "gred"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true",
+            "$TC qdisc add dev $DUMMY handle 1: root gred setup vqs 10 default 1"
+        ],
+        "cmdUnderTest": "$TC qdisc change dev $DUMMY handle 1: root gred limit 60KB min 15K max 25K burst 64 avpkt 1500 bandwidth 10Mbit DP 1 probability 0.1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc gred 1: root refcnt [0-9]+ vqs 10 default 1 limit.*vq 1 prio [0-9]+ limit 60Kb min 15Kb max 25Kb",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "8309",
+        "name": "Show GRED class",
+        "category": [
+            "qdisc",
+            "gred"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root gred setup vqs 10 default 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC class show dev $DUMMY",
+        "matchPattern": "class gred 1:",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    }
+]
-- 
cgit v1.2.3


From 225aeb62fe5812bd1d25ce37d771a21b8b029f6a Mon Sep 17 00:00:00 2001
From: Zhengchao Shao <shaozhengchao@huawei.com>
Date: Sat, 24 Sep 2022 10:51:49 +0800
Subject: selftests/tc-testing: add selftests for hhf qdisc

Test 4812: Create HHF with default setting
Test 8a92: Create HHF with limit setting
Test 3491: Create HHF with quantum setting
Test ba04: Create HHF with reset_timeout setting
Test 4238: Create HHF with admit_bytes setting
Test 839f: Create HHF with evict_timeout setting
Test a044: Create HHF with non_hh_weight setting
Test 32f9: Change HHF with limit setting
Test 385e: Show HHF class

Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/tc-testing/config          |   1 +
 .../selftests/tc-testing/tc-tests/qdiscs/hhf.json  | 210 +++++++++++++++++++++
 2 files changed, 211 insertions(+)
 create mode 100644 tools/testing/selftests/tc-testing/tc-tests/qdiscs/hhf.json

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/config b/tools/testing/selftests/tc-testing/config
index d8db68440395..86d57d599d67 100644
--- a/tools/testing/selftests/tc-testing/config
+++ b/tools/testing/selftests/tc-testing/config
@@ -18,6 +18,7 @@ CONFIG_NET_SCH_CODEL=m
 CONFIG_NET_SCH_ETF=m
 CONFIG_NET_SCH_FQ=m
 CONFIG_NET_SCH_GRED=m
+CONFIG_NET_SCH_HHF=m
 CONFIG_NET_SCH_PRIO=m
 CONFIG_NET_SCH_INGRESS=m
 
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/hhf.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/hhf.json
new file mode 100644
index 000000000000..949f6e5de902
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/hhf.json
@@ -0,0 +1,210 @@
+[
+    {
+        "id": "4812",
+        "name": "Create HHF with default setting",
+        "category": [
+            "qdisc",
+            "hhf"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root hhf",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc hhf 1: root refcnt [0-9]+.*hh_limit 2048 reset_timeout 40ms admit_bytes 128Kb evict_timeout 1s non_hh_weight 2",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "8a92",
+        "name": "Create HHF with limit setting",
+        "category": [
+            "qdisc",
+            "hhf"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root hhf limit 1500",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc hhf 1: root refcnt [0-9]+ limit 1500p.*hh_limit 2048 reset_timeout 40ms admit_bytes 128Kb evict_timeout 1s non_hh_weight 2",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "3491",
+        "name": "Create HHF with quantum setting",
+        "category": [
+            "qdisc",
+            "hhf"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root hhf quantum 9000",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc hhf 1: root refcnt [0-9]+.*quantum 9000b hh_limit 2048 reset_timeout 40ms admit_bytes 128Kb evict_timeout 1s non_hh_weight 2",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "ba04",
+        "name": "Create HHF with reset_timeout setting",
+        "category": [
+            "qdisc",
+            "hhf"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root hhf reset_timeout 100ms",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc hhf 1: root refcnt [0-9]+.*hh_limit 2048 reset_timeout 100ms admit_bytes 128Kb evict_timeout 1s non_hh_weight 2",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "4238",
+        "name": "Create HHF with admit_bytes setting",
+        "category": [
+            "qdisc",
+            "hhf"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root hhf admit_bytes 100000",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc hhf 1: root refcnt [0-9]+.*hh_limit 2048 reset_timeout 40ms admit_bytes 100000b evict_timeout 1s non_hh_weight 2",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "839f",
+        "name": "Create HHF with evict_timeout setting",
+        "category": [
+            "qdisc",
+            "hhf"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root hhf evict_timeout 0.5s",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc hhf 1: root refcnt [0-9]+.*hh_limit 2048 reset_timeout 40ms admit_bytes 128Kb evict_timeout 500ms non_hh_weight 2",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "a044",
+        "name": "Create HHF with non_hh_weight setting",
+        "category": [
+            "qdisc",
+            "hhf"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root hhf non_hh_weight 10",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc hhf 1: root refcnt [0-9]+.*hh_limit 2048 reset_timeout 40ms admit_bytes 128Kb evict_timeout 1s non_hh_weight 10",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "32f9",
+        "name": "Change HHF with limit setting",
+        "category": [
+            "qdisc",
+            "hhf"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true",
+            "$TC qdisc add dev $DUMMY handle 1: root hhf"
+        ],
+        "cmdUnderTest": "$TC qdisc change dev $DUMMY handle 1: root hhf limit 1500",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc hhf 1: root refcnt [0-9]+ limit 1500p.*hh_limit 2048 reset_timeout 40ms admit_bytes 128Kb evict_timeout 1s non_hh_weight 2",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "385e",
+        "name": "Show HHF class",
+        "category": [
+            "qdisc",
+            "hhf"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root hhf",
+        "expExitCode": "0",
+        "verifyCmd": "$TC class show dev $DUMMY",
+        "matchPattern": "class hhf 1:",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    }
+]
-- 
cgit v1.2.3


From 379a6509452e1b3c2eb916786f4c336c4ef054c0 Mon Sep 17 00:00:00 2001
From: Zhengchao Shao <shaozhengchao@huawei.com>
Date: Sat, 24 Sep 2022 10:51:50 +0800
Subject: selftests/tc-testing: add selftests for pfifo_fast qdisc

Test 900c: Create pfifo_fast with default setting
Test 7470: Dump pfifo_fast stats
Test b974: Replace pfifo_fast with different handle
Test 3240: Delete pfifo_fast with valid handle
Test 4385: Delete pfifo_fast with invalid handle

Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../tc-testing/tc-tests/qdiscs/pfifo_fast.json     | 119 +++++++++++++++++++++
 1 file changed, 119 insertions(+)
 create mode 100644 tools/testing/selftests/tc-testing/tc-tests/qdiscs/pfifo_fast.json

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/pfifo_fast.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/pfifo_fast.json
new file mode 100644
index 000000000000..ab53238f4c5a
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/pfifo_fast.json
@@ -0,0 +1,119 @@
+[
+    {
+        "id": "900c",
+        "name": "Create pfifo_fast with default setting",
+        "category": [
+            "qdisc",
+            "pfifo_fast"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root pfifo_fast",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc pfifo_fast 1: root refcnt [0-9]+ bands 3 priomap",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "7470",
+        "name": "Dump pfifo_fast stats",
+        "category": [
+            "qdisc",
+            "pfifo_fast"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root pfifo_fast",
+        "expExitCode": "0",
+        "verifyCmd": "$TC -s qdisc show dev $DUMMY",
+        "matchPattern": "Sent.*bytes.*pkt \\(dropped.*overlimits.*requeues .*\\)",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "b974",
+        "name": "Replace pfifo_fast with different handle",
+        "category": [
+            "qdisc",
+            "pfifo_fast"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true",
+            "$TC qdisc add dev $DUMMY handle 1: root pfifo_fast"
+        ],
+        "cmdUnderTest": "$TC qdisc replace dev $DUMMY handle 2: root pfifo_fast",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc pfifo_fast 2: root refcnt [0-9]+ bands 3 priomap",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 2: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "3240",
+        "name": "Delete pfifo_fast with valid handle",
+        "category": [
+            "qdisc",
+            "pfifo_fast"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true",
+            "$TC qdisc add dev $DUMMY handle 1: root pfifo_fast"
+        ],
+        "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 1: root",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc pfifo_fast 1: root refcnt [0-9]+ bands 3 priomap",
+        "matchCount": "0",
+        "teardown": [
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "4385",
+        "name": "Delete pfifo_fast with invalid handle",
+        "category": [
+            "qdisc",
+            "pfifo_fast"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true",
+            "$TC qdisc add dev $DUMMY handle 1: root pfifo_fast"
+        ],
+        "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 2: root",
+        "expExitCode": "2",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc pfifo_fast 1: root refcnt [0-9]+ bands 3 priomap",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    }
+]
-- 
cgit v1.2.3


From 7d0b4b0ccb1526aca3101cfe352aefa9915fb9af Mon Sep 17 00:00:00 2001
From: Zhengchao Shao <shaozhengchao@huawei.com>
Date: Sat, 24 Sep 2022 10:51:51 +0800
Subject: selftests/tc-testing: add selftests for plug qdisc

Test 3289: Create PLUG with default setting
Test 0917: Create PLUG with block setting
Test 483b: Create PLUG with release setting
Test 4995: Create PLUG with release_indefinite setting
Test 389c: Create PLUG with limit setting
Test 384a: Delete PLUG with valid handle
Test 439a: Replace PLUG with limit setting
Test 9831: Change PLUG with limit setting

Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/tc-testing/config          |   1 +
 .../selftests/tc-testing/tc-tests/qdiscs/plug.json | 188 +++++++++++++++++++++
 2 files changed, 189 insertions(+)
 create mode 100644 tools/testing/selftests/tc-testing/tc-tests/qdiscs/plug.json

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/config b/tools/testing/selftests/tc-testing/config
index 86d57d599d67..2a85ecc4a241 100644
--- a/tools/testing/selftests/tc-testing/config
+++ b/tools/testing/selftests/tc-testing/config
@@ -19,6 +19,7 @@ CONFIG_NET_SCH_ETF=m
 CONFIG_NET_SCH_FQ=m
 CONFIG_NET_SCH_GRED=m
 CONFIG_NET_SCH_HHF=m
+CONFIG_NET_SCH_PLUG=m
 CONFIG_NET_SCH_PRIO=m
 CONFIG_NET_SCH_INGRESS=m
 
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/plug.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/plug.json
new file mode 100644
index 000000000000..6454518af178
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/plug.json
@@ -0,0 +1,188 @@
+[
+    {
+        "id": "3289",
+        "name": "Create PLUG with default setting",
+        "category": [
+            "qdisc",
+            "plug"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root plug",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc plug 1: root refcnt",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "0917",
+        "name": "Create PLUG with block setting",
+        "category": [
+            "qdisc",
+            "plug"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root plug block",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc plug 1: root refcnt",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "483b",
+        "name": "Create PLUG with release setting",
+        "category": [
+            "qdisc",
+            "plug"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root plug release",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc plug 1: root refcnt",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "4995",
+        "name": "Create PLUG with release_indefinite setting",
+        "category": [
+            "qdisc",
+            "plug"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root plug release_indefinite",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc plug 1: root refcnt",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "389c",
+        "name": "Create PLUG with limit setting",
+        "category": [
+            "qdisc",
+            "plug"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root plug limit 100",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc plug 1: root refcnt",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "384a",
+        "name": "Delete PLUG with valid handle",
+        "category": [
+            "qdisc",
+            "plug"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true",
+            "$TC qdisc add dev $DUMMY handle 1: root plug"
+        ],
+        "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 1: root",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc plug 1: root refcnt",
+        "matchCount": "0",
+        "teardown": [
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "439a",
+        "name": "Replace PLUG with limit setting",
+        "category": [
+            "qdisc",
+            "plug"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true",
+            "$TC qdisc add dev $DUMMY handle 1: root plug"
+        ],
+        "cmdUnderTest": "$TC qdisc replace dev $DUMMY handle 1: root plug limit 1000",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc plug 1: root refcnt",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "9831",
+        "name": "Change PLUG with limit setting",
+        "category": [
+            "qdisc",
+            "plug"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true",
+            "$TC qdisc add dev $DUMMY handle 1: root plug"
+        ],
+        "cmdUnderTest": "$TC qdisc change dev $DUMMY handle 1: root plug limit 1000",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc plug 1: root refcnt",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    }
+]
-- 
cgit v1.2.3


From 6ad92dc56fca398ae0461fcfcb9f2a3370cf26ac Mon Sep 17 00:00:00 2001
From: Zhengchao Shao <shaozhengchao@huawei.com>
Date: Sat, 24 Sep 2022 10:51:52 +0800
Subject: selftests/tc-testing: add selftests for sfb qdisc

Test 3294: Create SFB with default setting
Test 430a: Create SFB with rehash setting
Test 3410: Create SFB with db setting
Test 49a0: Create SFB with limit setting
Test 1241: Create SFB with max setting
Test 3249: Create SFB with target setting
Test 30a9: Create SFB with increment setting
Test 239a: Create SFB with decrement setting
Test 9301: Create SFB with penalty_rate setting
Test 2a01: Create SFB with penalty_burst setting
Test 3209: Change SFB with rehash setting
Test 5447: Show SFB class

Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/tc-testing/config          |   1 +
 .../selftests/tc-testing/tc-tests/qdiscs/sfb.json  | 279 +++++++++++++++++++++
 2 files changed, 280 insertions(+)
 create mode 100644 tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfb.json

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/config b/tools/testing/selftests/tc-testing/config
index 2a85ecc4a241..5289c788d755 100644
--- a/tools/testing/selftests/tc-testing/config
+++ b/tools/testing/selftests/tc-testing/config
@@ -22,6 +22,7 @@ CONFIG_NET_SCH_HHF=m
 CONFIG_NET_SCH_PLUG=m
 CONFIG_NET_SCH_PRIO=m
 CONFIG_NET_SCH_INGRESS=m
+CONFIG_NET_SCH_SFB=m
 
 #
 # Classification
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfb.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfb.json
new file mode 100644
index 000000000000..ba2f5e79cdbf
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfb.json
@@ -0,0 +1,279 @@
+[
+    {
+        "id": "3294",
+        "name": "Create SFB with default setting",
+        "category": [
+            "qdisc",
+            "sfb"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root sfb",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc sfb 1: root refcnt [0-9]+ rehash 600s db 60s",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "430a",
+        "name": "Create SFB with rehash setting",
+        "category": [
+            "qdisc",
+            "sfb"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root sfb rehash 60",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc sfb 1: root refcnt [0-9]+ rehash 60ms db 60s",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "3410",
+        "name": "Create SFB with db setting",
+        "category": [
+            "qdisc",
+            "sfb"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root sfb db 10",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc sfb 1: root refcnt [0-9]+ rehash 600s db 10ms",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "49a0",
+        "name": "Create SFB with limit setting",
+        "category": [
+            "qdisc",
+            "sfb"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root sfb limit 100",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc sfb 1: root refcnt [0-9]+ rehash 600s db 60s limit 100p",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "1241",
+        "name": "Create SFB with max setting",
+        "category": [
+            "qdisc",
+            "sfb"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root sfb max 100",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc sfb 1: root refcnt 2 rehash 600s db 60s.*max 100p",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "3249",
+        "name": "Create SFB with target setting",
+        "category": [
+            "qdisc",
+            "sfb"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root sfb target 100",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc sfb 1: root refcnt 2 rehash 600s db 60s.*target 100p",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "30a9",
+        "name": "Create SFB with increment setting",
+        "category": [
+            "qdisc",
+            "sfb"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root sfb increment 0.1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc sfb 1: root refcnt 2 rehash 600s db 60s.*increment 0.1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "239a",
+        "name": "Create SFB with decrement setting",
+        "category": [
+            "qdisc",
+            "sfb"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root sfb decrement 0.1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc sfb 1: root refcnt 2 rehash 600s db 60s.*decrement 0.1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "9301",
+        "name": "Create SFB with penalty_rate setting",
+        "category": [
+            "qdisc",
+            "sfb"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root sfb penalty_rate 4000",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc sfb 1: root refcnt 2 rehash 600s db 60s.*penalty_rate 4000pps",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "2a01",
+        "name": "Create SFB with penalty_burst setting",
+        "category": [
+            "qdisc",
+            "sfb"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root sfb penalty_burst 64",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc sfb 1: root refcnt 2 rehash 600s db 60s.*penalty_burst 64p",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "3209",
+        "name": "Change SFB with rehash setting",
+        "category": [
+            "qdisc",
+            "sfb"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true",
+            "$TC qdisc add dev $DUMMY handle 1: root sfb penalty_burst 64"
+        ],
+        "cmdUnderTest": "$TC qdisc change dev $DUMMY handle 1: root sfb rehash 100",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc sfb 1: root refcnt 2 rehash 100ms db 60s",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "5447",
+        "name": "Show SFB class",
+        "category": [
+            "qdisc",
+            "sfb"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root sfb",
+        "expExitCode": "0",
+        "verifyCmd": "$TC class show dev $DUMMY",
+        "matchPattern": "class sfb 1:",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    }
+]
-- 
cgit v1.2.3


From 0158f65bfbdddae1ca72d7be548051fce6b42719 Mon Sep 17 00:00:00 2001
From: Zhengchao Shao <shaozhengchao@huawei.com>
Date: Sat, 24 Sep 2022 10:51:53 +0800
Subject: selftests/tc-testing: add selftests for sfq qdisc

Test 7482: Create SFQ with default setting
Test c186: Create SFQ with limit setting
Test ae23: Create SFQ with perturb setting
Test a430: Create SFQ with quantum setting
Test 4539: Create SFQ with divisor setting
Test b089: Create SFQ with flows setting
Test 99a0: Create SFQ with depth setting
Test 7389: Create SFQ with headdrop setting
Test 6472: Create SFQ with redflowlimit setting
Test 8929: Show SFQ class

Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/tc-testing/config          |   1 +
 .../selftests/tc-testing/tc-tests/qdiscs/sfq.json  | 232 +++++++++++++++++++++
 2 files changed, 233 insertions(+)
 create mode 100644 tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/config b/tools/testing/selftests/tc-testing/config
index 5289c788d755..bc0ef6eb515a 100644
--- a/tools/testing/selftests/tc-testing/config
+++ b/tools/testing/selftests/tc-testing/config
@@ -23,6 +23,7 @@ CONFIG_NET_SCH_PLUG=m
 CONFIG_NET_SCH_PRIO=m
 CONFIG_NET_SCH_INGRESS=m
 CONFIG_NET_SCH_SFB=m
+CONFIG_NET_SCH_SFQ=m
 
 #
 # Classification
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json
new file mode 100644
index 000000000000..b6be718a174a
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json
@@ -0,0 +1,232 @@
+[
+    {
+        "id": "7482",
+        "name": "Create SFQ with default setting",
+        "category": [
+            "qdisc",
+            "sfq"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root sfq",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc sfq 1: root refcnt [0-9]+ limit 127p quantum.*depth 127 divisor 1024",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "c186",
+        "name": "Create SFQ with limit setting",
+        "category": [
+            "qdisc",
+            "sfq"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root sfq limit 8",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc sfq 1: root refcnt [0-9]+ limit 8p",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "ae23",
+        "name": "Create SFQ with perturb setting",
+        "category": [
+            "qdisc",
+            "sfq"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root sfq perturb 10",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "depth 127 divisor 1024 perturb 10sec",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "a430",
+        "name": "Create SFQ with quantum setting",
+        "category": [
+            "qdisc",
+            "sfq"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root sfq quantum 9000",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc sfq 1: root refcnt [0-9]+ limit 127p quantum 9000b depth 127 divisor 1024",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "4539",
+        "name": "Create SFQ with divisor setting",
+        "category": [
+            "qdisc",
+            "sfq"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root sfq divisor 512",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc sfq 1: root refcnt [0-9]+ limit 127p quantum 1514b depth 127 divisor 512",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "b089",
+        "name": "Create SFQ with flows setting",
+        "category": [
+            "qdisc",
+            "sfq"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root sfq flows 20",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc sfq 1: root refcnt",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "99a0",
+        "name": "Create SFQ with depth setting",
+        "category": [
+            "qdisc",
+            "sfq"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root sfq depth 64",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc sfq 1: root refcnt [0-9]+ limit 127p quantum 1514b depth 64 divisor 1024",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "7389",
+        "name": "Create SFQ with headdrop setting",
+        "category": [
+            "qdisc",
+            "sfq"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root sfq headdrop",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc sfq 1: root refcnt [0-9]+ limit 127p quantum 1514b depth 127 headdrop divisor 1024",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "6472",
+        "name": "Create SFQ with redflowlimit setting",
+        "category": [
+            "qdisc",
+            "sfq"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root sfq redflowlimit 100000 min 8000 max 60000 probability 0.20 ecn headdrop",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc sfq 1: root refcnt [0-9]+ limit 127p quantum 1514b depth 127 headdrop divisor 1024 ewma 6 min 8000b max 60000b probability 0.2 ecn",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "8929",
+        "name": "Show SFQ class",
+        "category": [
+            "qdisc",
+            "sfq"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root sfq",
+        "expExitCode": "0",
+        "verifyCmd": "$TC class show dev $DUMMY",
+        "matchPattern": "class sfq 1:",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    }
+]
-- 
cgit v1.2.3


From c5a2d86b922868f7a3aecd4bb757d54acde85996 Mon Sep 17 00:00:00 2001
From: Zhengchao Shao <shaozhengchao@huawei.com>
Date: Sat, 24 Sep 2022 10:51:54 +0800
Subject: selftests/tc-testing: add selftests for skbprio qdisc

Test 283e: Create skbprio with default setting
Test c086: Create skbprio with limit setting
Test 6733: Change skbprio with limit setting
Test 2958: Show skbprio class

Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/tc-testing/config          |  1 +
 .../tc-testing/tc-tests/qdiscs/skbprio.json        | 95 ++++++++++++++++++++++
 2 files changed, 96 insertions(+)
 create mode 100644 tools/testing/selftests/tc-testing/tc-tests/qdiscs/skbprio.json

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/config b/tools/testing/selftests/tc-testing/config
index bc0ef6eb515a..22729f244c6e 100644
--- a/tools/testing/selftests/tc-testing/config
+++ b/tools/testing/selftests/tc-testing/config
@@ -24,6 +24,7 @@ CONFIG_NET_SCH_PRIO=m
 CONFIG_NET_SCH_INGRESS=m
 CONFIG_NET_SCH_SFB=m
 CONFIG_NET_SCH_SFQ=m
+CONFIG_NET_SCH_SKBPRIO=m
 
 #
 # Classification
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/skbprio.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/skbprio.json
new file mode 100644
index 000000000000..5766045c9d33
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/skbprio.json
@@ -0,0 +1,95 @@
+[
+    {
+        "id": "283e",
+        "name": "Create skbprio with default setting",
+        "category": [
+            "qdisc",
+            "skbprio"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root skbprio",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc skbprio 1: root refcnt [0-9]+ limit 64",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "c086",
+        "name": "Create skbprio with limit setting",
+        "category": [
+            "qdisc",
+            "skbprio"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root skbprio limit 1",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc skbprio 1: root refcnt [0-9]+ limit 1",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "6733",
+        "name": "Change skbprio with limit setting",
+        "category": [
+            "qdisc",
+            "skbprio"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true",
+            "$TC qdisc add dev $DUMMY handle 1: root skbprio"
+        ],
+        "cmdUnderTest": "$TC qdisc change dev $DUMMY handle 1: root skbprio limit 32",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc skbprio 1: root refcnt [0-9]+ limit 32",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "2958",
+        "name": "Show skbprio class",
+        "category": [
+            "qdisc",
+            "skbprio"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root skbprio",
+        "expExitCode": "0",
+        "verifyCmd": "$TC class show dev $DUMMY",
+        "matchPattern": "class skbprio 1:",
+        "matchCount": "64",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    }
+]
-- 
cgit v1.2.3


From 8a3b3667ddbd426d2d1a92069954a930e0f1c476 Mon Sep 17 00:00:00 2001
From: Zhengchao Shao <shaozhengchao@huawei.com>
Date: Sat, 24 Sep 2022 10:51:55 +0800
Subject: selftests/tc-testing: add selftests for taprio qdisc

Test ba39: Add taprio Qdisc to multi-queue device (8 queues)
Test 9462: Add taprio Qdisc with multiple sched-entry
Test 8d92: Add taprio Qdisc with txtime-delay
Test d092: Delete taprio Qdisc with valid handle
Test 8471: Show taprio class
Test 0a85: Add taprio Qdisc to single-queue device

Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/tc-testing/config          |   1 +
 .../tc-testing/tc-tests/qdiscs/taprio.json         | 135 +++++++++++++++++++++
 2 files changed, 136 insertions(+)
 create mode 100644 tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/config b/tools/testing/selftests/tc-testing/config
index 22729f244c6e..d6063918e2c3 100644
--- a/tools/testing/selftests/tc-testing/config
+++ b/tools/testing/selftests/tc-testing/config
@@ -25,6 +25,7 @@ CONFIG_NET_SCH_INGRESS=m
 CONFIG_NET_SCH_SFB=m
 CONFIG_NET_SCH_SFQ=m
 CONFIG_NET_SCH_SKBPRIO=m
+CONFIG_NET_SCH_TAPRIO=m
 
 #
 # Classification
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json
new file mode 100644
index 000000000000..a44455372646
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json
@@ -0,0 +1,135 @@
+[
+    {
+        "id": "ba39",
+        "name": "Add taprio Qdisc to multi-queue device (8 queues)",
+        "category": [
+            "qdisc",
+            "taprio"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "echo \"1 1 8\" > /sys/bus/netdevsim/new_device"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: taprio num_tc 3 map 2 2 1 0 2 2 2 2 2 2 2 2 2 2 2 2 queues 1@0 1@0 1@0 base-time 1000000000 sched-entry S 01 300000 flags 0x1 clockid CLOCK_TAI",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $ETH",
+        "matchPattern": "qdisc taprio 1: root refcnt [0-9]+ tc 3 map 2 2 1 0 2 2 2 2 2 2 2 2 2 2 2 2",
+        "matchCount": "1",
+        "teardown": [
+            "echo \"1\" > /sys/bus/netdevsim/del_device"
+        ]
+    },
+    {
+        "id": "9462",
+        "name": "Add taprio Qdisc with multiple sched-entry",
+        "category": [
+            "qdisc",
+            "taprio"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "echo \"1 1 8\" > /sys/bus/netdevsim/new_device"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: taprio num_tc 3 map 2 2 1 0 2 2 2 2 2 2 2 2 2 2 2 2 queues 1@0 1@0 1@0 base-time 1000000000 sched-entry S 01 300000 sched-entry S 03 300000 sched-entry S 04 400000 flags 0x1 clockid CLOCK_TAI",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $ETH",
+        "matchPattern": "index [0-9]+ cmd S gatemask 0x[0-9]+ interval [0-9]+00000",
+        "matchCount": "3",
+        "teardown": [
+            "echo \"1\" > /sys/bus/netdevsim/del_device"
+        ]
+    },
+    {
+        "id": "8d92",
+        "name": "Add taprio Qdisc with txtime-delay",
+        "category": [
+            "qdisc",
+            "taprio"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "echo \"1 1 8\" > /sys/bus/netdevsim/new_device"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: taprio num_tc 3 map 2 2 1 0 2 2 2 2 2 2 2 2 2 2 2 2 queues 1@0 1@0 1@0 base-time 1000000000 sched-entry S 01 300000 flags 0x1 txtime-delay 500000 clockid CLOCK_TAI",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $ETH",
+        "matchPattern": "clockid TAI flags 0x1 txtime delay 500000",
+        "matchCount": "1",
+        "teardown": [
+            "echo \"1\" > /sys/bus/netdevsim/del_device"
+        ]
+    },
+    {
+        "id": "d092",
+        "name": "Delete taprio Qdisc with valid handle",
+        "category": [
+            "qdisc",
+            "taprio"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "echo \"1 1 8\" > /sys/bus/netdevsim/new_device",
+            "$TC qdisc add dev $ETH root handle 1: taprio num_tc 3 map 2 2 1 0 2 2 2 2 2 2 2 2 2 2 2 2 queues 1@0 1@0 1@0 base-time 1000000000 sched-entry S 01 300000 flags 0x1 clockid CLOCK_TAI"
+        ],
+        "cmdUnderTest": "$TC qdisc del dev $ETH root handle 1:",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $ETH",
+        "matchPattern": "qdisc taprio 1: root refcnt",
+        "matchCount": "0",
+        "teardown": [
+            "echo \"1\" > /sys/bus/netdevsim/del_device"
+        ]
+    },
+    {
+        "id": "8471",
+        "name": "Show taprio class",
+        "category": [
+            "qdisc",
+            "taprio"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "echo \"1 1 8\" > /sys/bus/netdevsim/new_device"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: taprio num_tc 3 map 2 2 1 0 2 2 2 2 2 2 2 2 2 2 2 2 queues 1@0 1@0 1@0 base-time 1000000000 sched-entry S 01 300000 flags 0x1 clockid CLOCK_TAI",
+        "expExitCode": "0",
+        "verifyCmd": "$TC class show dev $ETH",
+        "matchPattern": "class taprio 1:[0-9]+ root leaf 1:",
+        "matchCount": "8",
+        "teardown": [
+            "echo \"1\" > /sys/bus/netdevsim/del_device"
+        ]
+    },
+    {
+        "id": "0a85",
+        "name": "Add taprio Qdisc to single-queue device",
+        "category": [
+            "qdisc",
+            "taprio"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "echo \"1 1\" > /sys/bus/netdevsim/new_device"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: taprio num_tc 3 map 2 2 1 0 2 2 2 2 2 2 2 2 2 2 2 2 queues 1@0 1@0 1@0 base-time 1000000000 sched-entry S 01 300000 flags 0x1 clockid CLOCK_TAI",
+        "expExitCode": "2",
+        "verifyCmd": "$TC qdisc show dev $ETH",
+        "matchPattern": "qdisc taprio 1: root refcnt",
+        "matchCount": "0",
+        "teardown": [
+            "echo \"1\" > /sys/bus/netdevsim/del_device"
+        ]
+    }
+]
-- 
cgit v1.2.3


From 10835be3f0f770254776512db21bba9b410da3c4 Mon Sep 17 00:00:00 2001
From: Zhengchao Shao <shaozhengchao@huawei.com>
Date: Sat, 24 Sep 2022 10:51:56 +0800
Subject: selftests/tc-testing: add selftests for tbf qdisc

Test 6430: Create TBF with default setting
Test 0518: Create TBF with mtu setting
Test 320a: Create TBF with peakrate setting
Test 239b: Create TBF with latency setting
Test c975: Create TBF with overhead setting
Test 948c: Create TBF with linklayer setting
Test 3549: Replace TBF with mtu
Test f948: Change TBF with latency time
Test 2348: Show TBF class

Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/tc-testing/config          |   1 +
 .../selftests/tc-testing/tc-tests/qdiscs/tbf.json  | 211 +++++++++++++++++++++
 2 files changed, 212 insertions(+)
 create mode 100644 tools/testing/selftests/tc-testing/tc-tests/qdiscs/tbf.json

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/config b/tools/testing/selftests/tc-testing/config
index d6063918e2c3..f8e5e1428bac 100644
--- a/tools/testing/selftests/tc-testing/config
+++ b/tools/testing/selftests/tc-testing/config
@@ -26,6 +26,7 @@ CONFIG_NET_SCH_SFB=m
 CONFIG_NET_SCH_SFQ=m
 CONFIG_NET_SCH_SKBPRIO=m
 CONFIG_NET_SCH_TAPRIO=m
+CONFIG_NET_SCH_TBF=m
 
 #
 # Classification
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/tbf.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/tbf.json
new file mode 100644
index 000000000000..a4b3dfe51ff5
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/tbf.json
@@ -0,0 +1,211 @@
+[
+    {
+        "id": "6430",
+        "name": "Create TBF with default setting",
+        "category": [
+            "qdisc",
+            "tbf"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root tbf limit 1000 burst 1500 rate 10000",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc tbf 1: root refcnt [0-9]+ rate 10Kbit burst 1500b limit 1000b",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "0518",
+        "name": "Create TBF with mtu setting",
+        "category": [
+            "qdisc",
+            "tbf"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root tbf limit 1000 burst 1500 rate 20000 mtu 2048",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc tbf 1: root refcnt [0-9]+ rate 20Kbit burst 1500b limit 1000b",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "320a",
+        "name": "Create TBF with peakrate setting",
+        "category": [
+            "qdisc",
+            "tbf"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root tbf limit 1000 burst 1500 rate 20000 mtu 1510 peakrate 30000",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc tbf 1: root refcnt [0-9]+ rate 20Kbit burst 1500b peakrate 30Kbit minburst.*limit 1000b",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "239b",
+        "name": "Create TBF with latency setting",
+        "category": [
+            "qdisc",
+            "tbf"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root tbf burst 1500 rate 20000 latency 100ms",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc tbf 1: root refcnt [0-9]+ rate 20Kbit burst 1500b lat 100ms",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "c975",
+        "name": "Create TBF with overhead setting",
+        "category": [
+            "qdisc",
+            "tbf"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root tbf limit 1000 burst 1500 rate 20000 overhead 300",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc tbf 1: root refcnt [0-9]+ rate 20Kbit burst 1800b limit 1000b overhead 300",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "948c",
+        "name": "Create TBF with linklayer setting",
+        "category": [
+            "qdisc",
+            "tbf"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root tbf limit 1000 burst 1500 rate 20000 linklayer atm",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc tbf 1: root refcnt [0-9]+ rate 20Kbit burst 1696b limit 1000b linklayer atm",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "3549",
+        "name": "Replace TBF with mtu",
+        "category": [
+            "qdisc",
+            "tbf"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true",
+            "$TC qdisc add dev $DUMMY handle 1: root tbf limit 1000 burst 1500 rate 20000 linklayer atm"
+        ],
+        "cmdUnderTest": "$TC qdisc replace dev $DUMMY handle 1: root tbf limit 1000 burst 1500 rate 20000 linklayer ethernet",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc tbf 1: root refcnt [0-9]+ rate 20Kbit burst 1500b limit 1000b",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "f948",
+        "name": "Change TBF with latency time",
+        "category": [
+            "qdisc",
+            "tbf"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true",
+            "$TC qdisc add dev $DUMMY handle 1: root tbf burst 1500 rate 20000 latency 10ms"
+        ],
+        "cmdUnderTest": "$TC qdisc change dev $DUMMY handle 1: root tbf burst 1500 rate 20000 latency 200ms",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc tbf 1: root refcnt [0-9]+ rate 20Kbit burst 1500b lat 200ms",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "2348",
+        "name": "Show TBF class",
+        "category": [
+            "qdisc",
+            "tbf"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root tbf limit 1000 burst 1500 rate 10000",
+        "expExitCode": "0",
+        "verifyCmd": "$TC class show dev $DUMMY",
+        "matchPattern": "class tbf.*parent 1:",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    }
+]
-- 
cgit v1.2.3


From cc62fbe114c9fada6594d7766acdd709c1c85cf1 Mon Sep 17 00:00:00 2001
From: Zhengchao Shao <shaozhengchao@huawei.com>
Date: Sat, 24 Sep 2022 10:51:57 +0800
Subject: selftests/tc-testing: add selftests for teql qdisc

Test 84a0: Create TEQL with default setting
Test 7734: Create TEQL with multiple device
Test 34a9: Delete TEQL with valid handle
Test 6289: Show TEQL stats

Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/tc-testing/config          |  1 +
 .../selftests/tc-testing/tc-tests/qdiscs/teql.json | 97 ++++++++++++++++++++++
 2 files changed, 98 insertions(+)
 create mode 100644 tools/testing/selftests/tc-testing/tc-tests/qdiscs/teql.json

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/config b/tools/testing/selftests/tc-testing/config
index f8e5e1428bac..2b2c2a835757 100644
--- a/tools/testing/selftests/tc-testing/config
+++ b/tools/testing/selftests/tc-testing/config
@@ -27,6 +27,7 @@ CONFIG_NET_SCH_SFQ=m
 CONFIG_NET_SCH_SKBPRIO=m
 CONFIG_NET_SCH_TAPRIO=m
 CONFIG_NET_SCH_TBF=m
+CONFIG_NET_SCH_TEQL=m
 
 #
 # Classification
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/teql.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/teql.json
new file mode 100644
index 000000000000..0082be0e93ac
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/teql.json
@@ -0,0 +1,97 @@
+[
+    {
+        "id": "84a0",
+        "name": "Create TEQL with default setting",
+        "category": [
+            "qdisc",
+            "teql"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root teql0",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc teql0 1: root refcnt",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "7734",
+        "name": "Create TEQL with multiple device",
+        "category": [
+            "qdisc",
+            "teql"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true",
+            "echo \"1 1 4\" > /sys/bus/netdevsim/new_device",
+            "$TC qdisc add dev $ETH root handle 1: teql0"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root teql0",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc teql0 1: root refcnt",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "echo \"1\" > /sys/bus/netdevsim/del_device",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "34a9",
+        "name": "Delete TEQL with valid handle",
+        "category": [
+            "qdisc",
+            "teql"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true",
+            "$TC qdisc add dev $DUMMY handle 1: root teql0"
+        ],
+        "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 1: root",
+        "expExitCode": "0",
+        "verifyCmd": "$TC qdisc show dev $DUMMY",
+        "matchPattern": "qdisc teql0 1: root refcnt",
+        "matchCount": "0",
+        "teardown": [
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    },
+    {
+        "id": "6289",
+        "name": "Show TEQL stats",
+        "category": [
+            "qdisc",
+            "teql"
+        ],
+        "plugins": {
+            "requires": "nsPlugin"
+        },
+        "setup": [
+            "$IP link add dev $DUMMY type dummy || /bin/true"
+        ],
+        "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root teql0",
+        "expExitCode": "0",
+        "verifyCmd": "$TC -s qdisc show dev $DUMMY",
+        "matchPattern": "qdisc teql0 1: root refcnt",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DUMMY handle 1: root",
+            "$IP link del dev $DUMMY type dummy"
+        ]
+    }
+]
-- 
cgit v1.2.3


From f0d74c4da1f060d2a66976193712a5e6abd361f5 Mon Sep 17 00:00:00 2001
From: Kui-Feng Lee <kuifeng@fb.com>
Date: Mon, 26 Sep 2022 11:49:53 -0700
Subject: bpf: Parameterize task iterators.

Allow creating an iterator that loops through resources of one
thread/process.

People could only create iterators to loop through all resources of
files, vma, and tasks in the system, even though they were interested
in only the resources of a specific task or process.  Passing the
additional parameters, people can now create an iterator to go
through all resources or only the resources of a task.

Signed-off-by: Kui-Feng Lee <kuifeng@fb.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Acked-by: Martin KaFai Lau <martin.lau@kernel.org>
Link: https://lore.kernel.org/bpf/20220926184957.208194-2-kuifeng@fb.com
---
 include/linux/bpf.h            |  25 ++++++
 include/uapi/linux/bpf.h       |   6 ++
 kernel/bpf/task_iter.c         | 188 ++++++++++++++++++++++++++++++++++++-----
 tools/include/uapi/linux/bpf.h |   6 ++
 4 files changed, 203 insertions(+), 22 deletions(-)

(limited to 'tools')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 5161fac0513f..0f3eaf3ed98c 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1796,6 +1796,27 @@ int bpf_obj_get_user(const char __user *pathname, int flags);
 	extern int bpf_iter_ ## target(args);			\
 	int __init bpf_iter_ ## target(args) { return 0; }
 
+/*
+ * The task type of iterators.
+ *
+ * For BPF task iterators, they can be parameterized with various
+ * parameters to visit only some of tasks.
+ *
+ * BPF_TASK_ITER_ALL (default)
+ *	Iterate over resources of every task.
+ *
+ * BPF_TASK_ITER_TID
+ *	Iterate over resources of a task/tid.
+ *
+ * BPF_TASK_ITER_TGID
+ *	Iterate over resources of every task of a process / task group.
+ */
+enum bpf_iter_task_type {
+	BPF_TASK_ITER_ALL = 0,
+	BPF_TASK_ITER_TID,
+	BPF_TASK_ITER_TGID,
+};
+
 struct bpf_iter_aux_info {
 	/* for map_elem iter */
 	struct bpf_map *map;
@@ -1805,6 +1826,10 @@ struct bpf_iter_aux_info {
 		struct cgroup *start; /* starting cgroup */
 		enum bpf_cgroup_iter_order order;
 	} cgroup;
+	struct {
+		enum bpf_iter_task_type	type;
+		u32 pid;
+	} task;
 };
 
 typedef int (*bpf_iter_attach_target_t)(struct bpf_prog *prog,
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index d6bd10759eaf..455b21a53aac 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -110,6 +110,12 @@ union bpf_iter_link_info {
 		__u32	cgroup_fd;
 		__u64	cgroup_id;
 	} cgroup;
+	/* Parameters of task iterators. */
+	struct {
+		__u32	tid;
+		__u32	pid;
+		__u32	pid_fd;
+	} task;
 };
 
 /* BPF syscall commands, see bpf(2) man-page for more details. */
diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c
index 8c921799def4..8b2f47e7139d 100644
--- a/kernel/bpf/task_iter.c
+++ b/kernel/bpf/task_iter.c
@@ -12,6 +12,9 @@
 
 struct bpf_iter_seq_task_common {
 	struct pid_namespace *ns;
+	enum bpf_iter_task_type	type;
+	u32 pid;
+	u32 pid_visiting;
 };
 
 struct bpf_iter_seq_task_info {
@@ -22,18 +25,115 @@ struct bpf_iter_seq_task_info {
 	u32 tid;
 };
 
-static struct task_struct *task_seq_get_next(struct pid_namespace *ns,
+static struct task_struct *task_group_seq_get_next(struct bpf_iter_seq_task_common *common,
+						   u32 *tid,
+						   bool skip_if_dup_files)
+{
+	struct task_struct *task, *next_task;
+	struct pid *pid;
+	u32 saved_tid;
+
+	if (!*tid) {
+		/* The first time, the iterator calls this function. */
+		pid = find_pid_ns(common->pid, common->ns);
+		if (!pid)
+			return NULL;
+
+		task = get_pid_task(pid, PIDTYPE_TGID);
+		if (!task)
+			return NULL;
+
+		*tid = common->pid;
+		common->pid_visiting = common->pid;
+
+		return task;
+	}
+
+	/* If the control returns to user space and comes back to the
+	 * kernel again, *tid and common->pid_visiting should be the
+	 * same for task_seq_start() to pick up the correct task.
+	 */
+	if (*tid == common->pid_visiting) {
+		pid = find_pid_ns(common->pid_visiting, common->ns);
+		task = get_pid_task(pid, PIDTYPE_PID);
+
+		return task;
+	}
+
+	pid = find_pid_ns(common->pid_visiting, common->ns);
+	if (!pid)
+		return NULL;
+
+	task = get_pid_task(pid, PIDTYPE_PID);
+	if (!task)
+		return NULL;
+
+retry:
+	if (!pid_alive(task)) {
+		put_task_struct(task);
+		return NULL;
+	}
+
+	next_task = next_thread(task);
+	put_task_struct(task);
+	if (!next_task)
+		return NULL;
+
+	saved_tid = *tid;
+	*tid = __task_pid_nr_ns(next_task, PIDTYPE_PID, common->ns);
+	if (!*tid || *tid == common->pid) {
+		/* Run out of tasks of a process.  The tasks of a
+		 * thread_group are linked as circular linked list.
+		 */
+		*tid = saved_tid;
+		return NULL;
+	}
+
+	get_task_struct(next_task);
+	common->pid_visiting = *tid;
+
+	if (skip_if_dup_files && task->files == task->group_leader->files) {
+		task = next_task;
+		goto retry;
+	}
+
+	return next_task;
+}
+
+static struct task_struct *task_seq_get_next(struct bpf_iter_seq_task_common *common,
 					     u32 *tid,
 					     bool skip_if_dup_files)
 {
 	struct task_struct *task = NULL;
 	struct pid *pid;
 
+	if (common->type == BPF_TASK_ITER_TID) {
+		if (*tid && *tid != common->pid)
+			return NULL;
+		rcu_read_lock();
+		pid = find_pid_ns(common->pid, common->ns);
+		if (pid) {
+			task = get_pid_task(pid, PIDTYPE_TGID);
+			*tid = common->pid;
+		}
+		rcu_read_unlock();
+
+		return task;
+	}
+
+	if (common->type == BPF_TASK_ITER_TGID) {
+		rcu_read_lock();
+		task = task_group_seq_get_next(common, tid, skip_if_dup_files);
+		rcu_read_unlock();
+
+		return task;
+	}
+
 	rcu_read_lock();
 retry:
-	pid = find_ge_pid(*tid, ns);
+	pid = find_ge_pid(*tid, common->ns);
 	if (pid) {
-		*tid = pid_nr_ns(pid, ns);
+		*tid = pid_nr_ns(pid, common->ns);
 		task = get_pid_task(pid, PIDTYPE_PID);
 		if (!task) {
 			++*tid;
@@ -56,7 +156,7 @@ static void *task_seq_start(struct seq_file *seq, loff_t *pos)
 	struct bpf_iter_seq_task_info *info = seq->private;
 	struct task_struct *task;
 
-	task = task_seq_get_next(info->common.ns, &info->tid, false);
+	task = task_seq_get_next(&info->common, &info->tid, false);
 	if (!task)
 		return NULL;
 
@@ -73,7 +173,7 @@ static void *task_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	++*pos;
 	++info->tid;
 	put_task_struct((struct task_struct *)v);
-	task = task_seq_get_next(info->common.ns, &info->tid, false);
+	task = task_seq_get_next(&info->common, &info->tid, false);
 	if (!task)
 		return NULL;
 
@@ -117,6 +217,41 @@ static void task_seq_stop(struct seq_file *seq, void *v)
 		put_task_struct((struct task_struct *)v);
 }
 
+static int bpf_iter_attach_task(struct bpf_prog *prog,
+				union bpf_iter_link_info *linfo,
+				struct bpf_iter_aux_info *aux)
+{
+	unsigned int flags;
+	struct pid *pid;
+	pid_t tgid;
+
+	if ((!!linfo->task.tid + !!linfo->task.pid + !!linfo->task.pid_fd) > 1)
+		return -EINVAL;
+
+	aux->task.type = BPF_TASK_ITER_ALL;
+	if (linfo->task.tid != 0) {
+		aux->task.type = BPF_TASK_ITER_TID;
+		aux->task.pid = linfo->task.tid;
+	}
+	if (linfo->task.pid != 0) {
+		aux->task.type = BPF_TASK_ITER_TGID;
+		aux->task.pid = linfo->task.pid;
+	}
+	if (linfo->task.pid_fd != 0) {
+		aux->task.type = BPF_TASK_ITER_TGID;
+
+		pid = pidfd_get_pid(linfo->task.pid_fd, &flags);
+		if (IS_ERR(pid))
+			return PTR_ERR(pid);
+
+		tgid = pid_nr_ns(pid, task_active_pid_ns(current));
+		aux->task.pid = tgid;
+		put_pid(pid);
+	}
+
+	return 0;
+}
+
 static const struct seq_operations task_seq_ops = {
 	.start	= task_seq_start,
 	.next	= task_seq_next,
@@ -137,8 +272,7 @@ struct bpf_iter_seq_task_file_info {
 static struct file *
 task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info)
 {
-	struct pid_namespace *ns = info->common.ns;
-	u32 curr_tid = info->tid;
+	u32 saved_tid = info->tid;
 	struct task_struct *curr_task;
 	unsigned int curr_fd = info->fd;
 
@@ -151,21 +285,18 @@ again:
 		curr_task = info->task;
 		curr_fd = info->fd;
 	} else {
-                curr_task = task_seq_get_next(ns, &curr_tid, true);
+		curr_task = task_seq_get_next(&info->common, &info->tid, true);
                 if (!curr_task) {
                         info->task = NULL;
-                        info->tid = curr_tid;
                         return NULL;
                 }
 
-                /* set info->task and info->tid */
+		/* set info->task */
 		info->task = curr_task;
-		if (curr_tid == info->tid) {
+		if (saved_tid == info->tid)
 			curr_fd = info->fd;
-		} else {
-			info->tid = curr_tid;
+		else
 			curr_fd = 0;
-		}
 	}
 
 	rcu_read_lock();
@@ -186,9 +317,15 @@ again:
 	/* the current task is done, go to the next task */
 	rcu_read_unlock();
 	put_task_struct(curr_task);
+
+	if (info->common.type == BPF_TASK_ITER_TID) {
+		info->task = NULL;
+		return NULL;
+	}
+
 	info->task = NULL;
 	info->fd = 0;
-	curr_tid = ++(info->tid);
+	saved_tid = ++(info->tid);
 	goto again;
 }
 
@@ -269,6 +406,9 @@ static int init_seq_pidns(void *priv_data, struct bpf_iter_aux_info *aux)
 	struct bpf_iter_seq_task_common *common = priv_data;
 
 	common->ns = get_pid_ns(task_active_pid_ns(current));
+	common->type = aux->task.type;
+	common->pid = aux->task.pid;
+
 	return 0;
 }
 
@@ -307,11 +447,10 @@ enum bpf_task_vma_iter_find_op {
 static struct vm_area_struct *
 task_vma_seq_get_next(struct bpf_iter_seq_task_vma_info *info)
 {
-	struct pid_namespace *ns = info->common.ns;
 	enum bpf_task_vma_iter_find_op op;
 	struct vm_area_struct *curr_vma;
 	struct task_struct *curr_task;
-	u32 curr_tid = info->tid;
+	u32 saved_tid = info->tid;
 
 	/* If this function returns a non-NULL vma, it holds a reference to
 	 * the task_struct, and holds read lock on vma->mm->mmap_lock.
@@ -371,14 +510,13 @@ task_vma_seq_get_next(struct bpf_iter_seq_task_vma_info *info)
 		}
 	} else {
 again:
-		curr_task = task_seq_get_next(ns, &curr_tid, true);
+		curr_task = task_seq_get_next(&info->common, &info->tid, true);
 		if (!curr_task) {
-			info->tid = curr_tid + 1;
+			info->tid++;
 			goto finish;
 		}
 
-		if (curr_tid != info->tid) {
-			info->tid = curr_tid;
+		if (saved_tid != info->tid) {
 			/* new task, process the first vma */
 			op = task_vma_iter_first_vma;
 		} else {
@@ -430,9 +568,12 @@ again:
 	return curr_vma;
 
 next_task:
+	if (info->common.type == BPF_TASK_ITER_TID)
+		goto finish;
+
 	put_task_struct(curr_task);
 	info->task = NULL;
-	curr_tid++;
+	info->tid++;
 	goto again;
 
 finish:
@@ -533,6 +674,7 @@ static const struct bpf_iter_seq_info task_seq_info = {
 
 static struct bpf_iter_reg task_reg_info = {
 	.target			= "task",
+	.attach_target		= bpf_iter_attach_task,
 	.feature		= BPF_ITER_RESCHED,
 	.ctx_arg_info_size	= 1,
 	.ctx_arg_info		= {
@@ -551,6 +693,7 @@ static const struct bpf_iter_seq_info task_file_seq_info = {
 
 static struct bpf_iter_reg task_file_reg_info = {
 	.target			= "task_file",
+	.attach_target		= bpf_iter_attach_task,
 	.feature		= BPF_ITER_RESCHED,
 	.ctx_arg_info_size	= 2,
 	.ctx_arg_info		= {
@@ -571,6 +714,7 @@ static const struct bpf_iter_seq_info task_vma_seq_info = {
 
 static struct bpf_iter_reg task_vma_reg_info = {
 	.target			= "task_vma",
+	.attach_target		= bpf_iter_attach_task,
 	.feature		= BPF_ITER_RESCHED,
 	.ctx_arg_info_size	= 2,
 	.ctx_arg_info		= {
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index d6bd10759eaf..455b21a53aac 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -110,6 +110,12 @@ union bpf_iter_link_info {
 		__u32	cgroup_fd;
 		__u64	cgroup_id;
 	} cgroup;
+	/* Parameters of task iterators. */
+	struct {
+		__u32	tid;
+		__u32	pid;
+		__u32	pid_fd;
+	} task;
 };
 
 /* BPF syscall commands, see bpf(2) man-page for more details. */
-- 
cgit v1.2.3


From 21fb6f2aa3890b0d0abf88b7756d0098e9367a7c Mon Sep 17 00:00:00 2001
From: Kui-Feng Lee <kuifeng@fb.com>
Date: Mon, 26 Sep 2022 11:49:54 -0700
Subject: bpf: Handle bpf_link_info for the parameterized task BPF iterators.

Add new fields to bpf_link_info that users can query it through
bpf_obj_get_info_by_fd().

Signed-off-by: Kui-Feng Lee <kuifeng@fb.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Acked-by: Martin KaFai Lau <martin.lau@kernel.org>
Link: https://lore.kernel.org/bpf/20220926184957.208194-3-kuifeng@fb.com
---
 include/uapi/linux/bpf.h       |  4 ++++
 kernel/bpf/task_iter.c         | 18 ++++++++++++++++++
 tools/include/uapi/linux/bpf.h |  4 ++++
 3 files changed, 26 insertions(+)

(limited to 'tools')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 455b21a53aac..3075018a4ef8 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -6265,6 +6265,10 @@ struct bpf_link_info {
 					__u64 cgroup_id;
 					__u32 order;
 				} cgroup;
+				struct {
+					__u32 tid;
+					__u32 pid;
+				} task;
 			};
 		} iter;
 		struct  {
diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c
index 8b2f47e7139d..46f836be22e2 100644
--- a/kernel/bpf/task_iter.c
+++ b/kernel/bpf/task_iter.c
@@ -672,6 +672,21 @@ static const struct bpf_iter_seq_info task_seq_info = {
 	.seq_priv_size		= sizeof(struct bpf_iter_seq_task_info),
 };
 
+static int bpf_iter_fill_link_info(const struct bpf_iter_aux_info *aux, struct bpf_link_info *info)
+{
+	switch (aux->task.type) {
+	case BPF_TASK_ITER_TID:
+		info->iter.task.tid = aux->task.pid;
+		break;
+	case BPF_TASK_ITER_TGID:
+		info->iter.task.pid = aux->task.pid;
+		break;
+	default:
+		break;
+	}
+	return 0;
+}
+
 static struct bpf_iter_reg task_reg_info = {
 	.target			= "task",
 	.attach_target		= bpf_iter_attach_task,
@@ -682,6 +697,7 @@ static struct bpf_iter_reg task_reg_info = {
 		  PTR_TO_BTF_ID_OR_NULL },
 	},
 	.seq_info		= &task_seq_info,
+	.fill_link_info		= bpf_iter_fill_link_info,
 };
 
 static const struct bpf_iter_seq_info task_file_seq_info = {
@@ -703,6 +719,7 @@ static struct bpf_iter_reg task_file_reg_info = {
 		  PTR_TO_BTF_ID_OR_NULL },
 	},
 	.seq_info		= &task_file_seq_info,
+	.fill_link_info		= bpf_iter_fill_link_info,
 };
 
 static const struct bpf_iter_seq_info task_vma_seq_info = {
@@ -724,6 +741,7 @@ static struct bpf_iter_reg task_vma_reg_info = {
 		  PTR_TO_BTF_ID_OR_NULL },
 	},
 	.seq_info		= &task_vma_seq_info,
+	.fill_link_info		= bpf_iter_fill_link_info,
 };
 
 BPF_CALL_5(bpf_find_vma, struct task_struct *, task, u64, start,
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 455b21a53aac..3075018a4ef8 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -6265,6 +6265,10 @@ struct bpf_link_info {
 					__u64 cgroup_id;
 					__u32 order;
 				} cgroup;
+				struct {
+					__u32 tid;
+					__u32 pid;
+				} task;
 			};
 		} iter;
 		struct  {
-- 
cgit v1.2.3


From b3e1331eb925a45df1cc5d02a725e5ea70da0e2e Mon Sep 17 00:00:00 2001
From: Kui-Feng Lee <kuifeng@fb.com>
Date: Mon, 26 Sep 2022 11:49:56 -0700
Subject: selftests/bpf: Test parameterized task BPF iterators.

Test iterators of vma, files and tasks.

Ensure the API works appropriately to visit all tasks,
tasks in a process, or a particular task.

Signed-off-by: Kui-Feng Lee <kuifeng@fb.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Acked-by: Martin KaFai Lau <martin.lau@kernel.org>
Link: https://lore.kernel.org/bpf/20220926184957.208194-5-kuifeng@fb.com
---
 tools/testing/selftests/bpf/prog_tests/bpf_iter.c  | 282 +++++++++++++++++++--
 tools/testing/selftests/bpf/prog_tests/btf_dump.c  |   2 +-
 tools/testing/selftests/bpf/progs/bpf_iter_task.c  |   9 +
 .../selftests/bpf/progs/bpf_iter_task_file.c       |   9 +-
 .../selftests/bpf/progs/bpf_iter_task_vma.c        |   7 +-
 .../selftests/bpf/progs/bpf_iter_vma_offset.c      |  37 +++
 6 files changed, 322 insertions(+), 24 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_vma_offset.c

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index e89685bd587c..3369c5ec3a17 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -1,6 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2020 Facebook */
 #include <test_progs.h>
+#include <unistd.h>
+#include <sys/syscall.h>
 #include "bpf_iter_ipv6_route.skel.h"
 #include "bpf_iter_netlink.skel.h"
 #include "bpf_iter_bpf_map.skel.h"
@@ -14,6 +16,7 @@
 #include "bpf_iter_udp4.skel.h"
 #include "bpf_iter_udp6.skel.h"
 #include "bpf_iter_unix.skel.h"
+#include "bpf_iter_vma_offset.skel.h"
 #include "bpf_iter_test_kern1.skel.h"
 #include "bpf_iter_test_kern2.skel.h"
 #include "bpf_iter_test_kern3.skel.h"
@@ -43,13 +46,13 @@ static void test_btf_id_or_null(void)
 	}
 }
 
-static void do_dummy_read(struct bpf_program *prog)
+static void do_dummy_read_opts(struct bpf_program *prog, struct bpf_iter_attach_opts *opts)
 {
 	struct bpf_link *link;
 	char buf[16] = {};
 	int iter_fd, len;
 
-	link = bpf_program__attach_iter(prog, NULL);
+	link = bpf_program__attach_iter(prog, opts);
 	if (!ASSERT_OK_PTR(link, "attach_iter"))
 		return;
 
@@ -68,6 +71,11 @@ free_link:
 	bpf_link__destroy(link);
 }
 
+static void do_dummy_read(struct bpf_program *prog)
+{
+	do_dummy_read_opts(prog, NULL);
+}
+
 static void do_read_map_iter_fd(struct bpf_object_skeleton **skel, struct bpf_program *prog,
 				struct bpf_map *map)
 {
@@ -167,19 +175,140 @@ static void test_bpf_map(void)
 	bpf_iter_bpf_map__destroy(skel);
 }
 
-static void test_task(void)
+static int pidfd_open(pid_t pid, unsigned int flags)
+{
+	return syscall(SYS_pidfd_open, pid, flags);
+}
+
+static void check_bpf_link_info(const struct bpf_program *prog)
+{
+	LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+	union bpf_iter_link_info linfo;
+	struct bpf_link_info info = {};
+	struct bpf_link *link;
+	__u32 info_len;
+	int err;
+
+	memset(&linfo, 0, sizeof(linfo));
+	linfo.task.tid = getpid();
+	opts.link_info = &linfo;
+	opts.link_info_len = sizeof(linfo);
+
+	link = bpf_program__attach_iter(prog, &opts);
+	if (!ASSERT_OK_PTR(link, "attach_iter"))
+		return;
+
+	info_len = sizeof(info);
+	err = bpf_obj_get_info_by_fd(bpf_link__fd(link), &info, &info_len);
+	ASSERT_OK(err, "bpf_obj_get_info_by_fd");
+	ASSERT_EQ(info.iter.task.tid, getpid(), "check_task_tid");
+
+	bpf_link__destroy(link);
+}
+
+static pthread_mutex_t do_nothing_mutex;
+
+static void *do_nothing_wait(void *arg)
+{
+	pthread_mutex_lock(&do_nothing_mutex);
+	pthread_mutex_unlock(&do_nothing_mutex);
+
+	pthread_exit(arg);
+}
+
+static void test_task_common_nocheck(struct bpf_iter_attach_opts *opts,
+				     int *num_unknown, int *num_known)
 {
 	struct bpf_iter_task *skel;
+	pthread_t thread_id;
+	void *ret;
 
 	skel = bpf_iter_task__open_and_load();
 	if (!ASSERT_OK_PTR(skel, "bpf_iter_task__open_and_load"))
 		return;
 
-	do_dummy_read(skel->progs.dump_task);
+	ASSERT_OK(pthread_mutex_lock(&do_nothing_mutex), "pthread_mutex_lock");
+
+	ASSERT_OK(pthread_create(&thread_id, NULL, &do_nothing_wait, NULL),
+		  "pthread_create");
+
+	skel->bss->tid = getpid();
+
+	do_dummy_read_opts(skel->progs.dump_task, opts);
+
+	*num_unknown = skel->bss->num_unknown_tid;
+	*num_known = skel->bss->num_known_tid;
+
+	ASSERT_OK(pthread_mutex_unlock(&do_nothing_mutex), "pthread_mutex_unlock");
+	ASSERT_FALSE(pthread_join(thread_id, &ret) || ret != NULL,
+		     "pthread_join");
 
 	bpf_iter_task__destroy(skel);
 }
 
+static void test_task_common(struct bpf_iter_attach_opts *opts, int num_unknown, int num_known)
+{
+	int num_unknown_tid, num_known_tid;
+
+	test_task_common_nocheck(opts, &num_unknown_tid, &num_known_tid);
+	ASSERT_EQ(num_unknown_tid, num_unknown, "check_num_unknown_tid");
+	ASSERT_EQ(num_known_tid, num_known, "check_num_known_tid");
+}
+
+static void test_task_tid(void)
+{
+	LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+	union bpf_iter_link_info linfo;
+	int num_unknown_tid, num_known_tid;
+
+	memset(&linfo, 0, sizeof(linfo));
+	linfo.task.tid = getpid();
+	opts.link_info = &linfo;
+	opts.link_info_len = sizeof(linfo);
+	test_task_common(&opts, 0, 1);
+
+	linfo.task.tid = 0;
+	linfo.task.pid = getpid();
+	test_task_common(&opts, 1, 1);
+
+	test_task_common_nocheck(NULL, &num_unknown_tid, &num_known_tid);
+	ASSERT_GT(num_unknown_tid, 1, "check_num_unknown_tid");
+	ASSERT_EQ(num_known_tid, 1, "check_num_known_tid");
+}
+
+static void test_task_pid(void)
+{
+	LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+	union bpf_iter_link_info linfo;
+
+	memset(&linfo, 0, sizeof(linfo));
+	linfo.task.pid = getpid();
+	opts.link_info = &linfo;
+	opts.link_info_len = sizeof(linfo);
+
+	test_task_common(&opts, 1, 1);
+}
+
+static void test_task_pidfd(void)
+{
+	LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+	union bpf_iter_link_info linfo;
+	int pidfd;
+
+	pidfd = pidfd_open(getpid(), 0);
+	if (!ASSERT_GT(pidfd, 0, "pidfd_open"))
+		return;
+
+	memset(&linfo, 0, sizeof(linfo));
+	linfo.task.pid_fd = pidfd;
+	opts.link_info = &linfo;
+	opts.link_info_len = sizeof(linfo);
+
+	test_task_common(&opts, 1, 1);
+
+	close(pidfd);
+}
+
 static void test_task_sleepable(void)
 {
 	struct bpf_iter_task *skel;
@@ -212,14 +341,11 @@ static void test_task_stack(void)
 	bpf_iter_task_stack__destroy(skel);
 }
 
-static void *do_nothing(void *arg)
-{
-	pthread_exit(arg);
-}
-
 static void test_task_file(void)
 {
+	LIBBPF_OPTS(bpf_iter_attach_opts, opts);
 	struct bpf_iter_task_file *skel;
+	union bpf_iter_link_info linfo;
 	pthread_t thread_id;
 	void *ret;
 
@@ -229,19 +355,36 @@ static void test_task_file(void)
 
 	skel->bss->tgid = getpid();
 
-	if (!ASSERT_OK(pthread_create(&thread_id, NULL, &do_nothing, NULL),
-		  "pthread_create"))
-		goto done;
+	ASSERT_OK(pthread_mutex_lock(&do_nothing_mutex), "pthread_mutex_lock");
 
-	do_dummy_read(skel->progs.dump_task_file);
+	ASSERT_OK(pthread_create(&thread_id, NULL, &do_nothing_wait, NULL),
+		  "pthread_create");
+
+	memset(&linfo, 0, sizeof(linfo));
+	linfo.task.tid = getpid();
+	opts.link_info = &linfo;
+	opts.link_info_len = sizeof(linfo);
 
-	if (!ASSERT_FALSE(pthread_join(thread_id, &ret) || ret != NULL,
-		  "pthread_join"))
-		goto done;
+	do_dummy_read_opts(skel->progs.dump_task_file, &opts);
 
 	ASSERT_EQ(skel->bss->count, 0, "check_count");
+	ASSERT_EQ(skel->bss->unique_tgid_count, 1, "check_unique_tgid_count");
+
+	skel->bss->last_tgid = 0;
+	skel->bss->count = 0;
+	skel->bss->unique_tgid_count = 0;
+
+	do_dummy_read(skel->progs.dump_task_file);
+
+	ASSERT_EQ(skel->bss->count, 0, "check_count");
+	ASSERT_GT(skel->bss->unique_tgid_count, 1, "check_unique_tgid_count");
+
+	check_bpf_link_info(skel->progs.dump_task_file);
+
+	ASSERT_OK(pthread_mutex_unlock(&do_nothing_mutex), "pthread_mutex_unlock");
+	ASSERT_OK(pthread_join(thread_id, &ret), "pthread_join");
+	ASSERT_NULL(ret, "pthread_join");
 
-done:
 	bpf_iter_task_file__destroy(skel);
 }
 
@@ -1249,7 +1392,7 @@ static void str_strip_first_line(char *str)
 	*dst = '\0';
 }
 
-static void test_task_vma(void)
+static void test_task_vma_common(struct bpf_iter_attach_opts *opts)
 {
 	int err, iter_fd = -1, proc_maps_fd = -1;
 	struct bpf_iter_task_vma *skel;
@@ -1261,13 +1404,14 @@ static void test_task_vma(void)
 		return;
 
 	skel->bss->pid = getpid();
+	skel->bss->one_task = opts ? 1 : 0;
 
 	err = bpf_iter_task_vma__load(skel);
 	if (!ASSERT_OK(err, "bpf_iter_task_vma__load"))
 		goto out;
 
 	skel->links.proc_maps = bpf_program__attach_iter(
-		skel->progs.proc_maps, NULL);
+		skel->progs.proc_maps, opts);
 
 	if (!ASSERT_OK_PTR(skel->links.proc_maps, "bpf_program__attach_iter")) {
 		skel->links.proc_maps = NULL;
@@ -1291,6 +1435,8 @@ static void test_task_vma(void)
 			goto out;
 		len += err;
 	}
+	if (opts)
+		ASSERT_EQ(skel->bss->one_task_error, 0, "unexpected task");
 
 	/* read CMP_BUFFER_SIZE (1kB) from /proc/pid/maps */
 	snprintf(maps_path, 64, "/proc/%u/maps", skel->bss->pid);
@@ -1306,6 +1452,9 @@ static void test_task_vma(void)
 	str_strip_first_line(proc_maps_output);
 
 	ASSERT_STREQ(task_vma_output, proc_maps_output, "compare_output");
+
+	check_bpf_link_info(skel->progs.proc_maps);
+
 out:
 	close(proc_maps_fd);
 	close(iter_fd);
@@ -1325,8 +1474,93 @@ void test_bpf_sockmap_map_iter_fd(void)
 	bpf_iter_sockmap__destroy(skel);
 }
 
+static void test_task_vma(void)
+{
+	LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+	union bpf_iter_link_info linfo;
+
+	memset(&linfo, 0, sizeof(linfo));
+	linfo.task.tid = getpid();
+	opts.link_info = &linfo;
+	opts.link_info_len = sizeof(linfo);
+
+	test_task_vma_common(&opts);
+	test_task_vma_common(NULL);
+}
+
+/* uprobe attach point */
+static noinline int trigger_func(int arg)
+{
+	asm volatile ("");
+	return arg + 1;
+}
+
+static void test_task_vma_offset_common(struct bpf_iter_attach_opts *opts, bool one_proc)
+{
+	struct bpf_iter_vma_offset *skel;
+	struct bpf_link *link;
+	char buf[16] = {};
+	int iter_fd, len;
+	int pgsz, shift;
+
+	skel = bpf_iter_vma_offset__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "bpf_iter_vma_offset__open_and_load"))
+		return;
+
+	skel->bss->pid = getpid();
+	skel->bss->address = (uintptr_t)trigger_func;
+	for (pgsz = getpagesize(), shift = 0; pgsz > 1; pgsz >>= 1, shift++)
+		;
+	skel->bss->page_shift = shift;
+
+	link = bpf_program__attach_iter(skel->progs.get_vma_offset, opts);
+	if (!ASSERT_OK_PTR(link, "attach_iter"))
+		return;
+
+	iter_fd = bpf_iter_create(bpf_link__fd(link));
+	if (!ASSERT_GT(iter_fd, 0, "create_iter"))
+		goto exit;
+
+	while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
+		;
+	buf[15] = 0;
+	ASSERT_EQ(strcmp(buf, "OK\n"), 0, "strcmp");
+
+	ASSERT_EQ(skel->bss->offset, get_uprobe_offset(trigger_func), "offset");
+	if (one_proc)
+		ASSERT_EQ(skel->bss->unique_tgid_cnt, 1, "unique_tgid_count");
+	else
+		ASSERT_GT(skel->bss->unique_tgid_cnt, 1, "unique_tgid_count");
+
+	close(iter_fd);
+
+exit:
+	bpf_link__destroy(link);
+}
+
+static void test_task_vma_offset(void)
+{
+	LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+	union bpf_iter_link_info linfo;
+
+	memset(&linfo, 0, sizeof(linfo));
+	linfo.task.pid = getpid();
+	opts.link_info = &linfo;
+	opts.link_info_len = sizeof(linfo);
+
+	test_task_vma_offset_common(&opts, true);
+
+	linfo.task.pid = 0;
+	linfo.task.tid = getpid();
+	test_task_vma_offset_common(&opts, true);
+
+	test_task_vma_offset_common(NULL, false);
+}
+
 void test_bpf_iter(void)
 {
+	ASSERT_OK(pthread_mutex_init(&do_nothing_mutex, NULL), "pthread_mutex_init");
+
 	if (test__start_subtest("btf_id_or_null"))
 		test_btf_id_or_null();
 	if (test__start_subtest("ipv6_route"))
@@ -1335,8 +1569,12 @@ void test_bpf_iter(void)
 		test_netlink();
 	if (test__start_subtest("bpf_map"))
 		test_bpf_map();
-	if (test__start_subtest("task"))
-		test_task();
+	if (test__start_subtest("task_tid"))
+		test_task_tid();
+	if (test__start_subtest("task_pid"))
+		test_task_pid();
+	if (test__start_subtest("task_pidfd"))
+		test_task_pidfd();
 	if (test__start_subtest("task_sleepable"))
 		test_task_sleepable();
 	if (test__start_subtest("task_stack"))
@@ -1397,4 +1635,6 @@ void test_bpf_iter(void)
 		test_ksym_iter();
 	if (test__start_subtest("bpf_sockmap_map_iter_fd"))
 		test_bpf_sockmap_map_iter_fd();
+	if (test__start_subtest("vma_offset"))
+		test_task_vma_offset();
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
index b1ca954ed1e5..24da335482d4 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
@@ -764,7 +764,7 @@ static void test_btf_dump_struct_data(struct btf *btf, struct btf_dump *d,
 
 	/* union with nested struct */
 	TEST_BTF_DUMP_DATA(btf, d, "union", str, union bpf_iter_link_info, BTF_F_COMPACT,
-			   "(union bpf_iter_link_info){.map = (struct){.map_fd = (__u32)1,},.cgroup = (struct){.order = (enum bpf_cgroup_iter_order)BPF_CGROUP_ITER_SELF_ONLY,.cgroup_fd = (__u32)1,},}",
+			   "(union bpf_iter_link_info){.map = (struct){.map_fd = (__u32)1,},.cgroup = (struct){.order = (enum bpf_cgroup_iter_order)BPF_CGROUP_ITER_SELF_ONLY,.cgroup_fd = (__u32)1,},.task = (struct){.tid = (__u32)1,.pid = (__u32)1,},}",
 			   { .cgroup = { .order = 1, .cgroup_fd = 1, }});
 
 	/* struct skb with nested structs/unions; because type output is so
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task.c b/tools/testing/selftests/bpf/progs/bpf_iter_task.c
index d22741272692..96131b9a1caa 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task.c
@@ -6,6 +6,10 @@
 
 char _license[] SEC("license") = "GPL";
 
+uint32_t tid = 0;
+int num_unknown_tid = 0;
+int num_known_tid = 0;
+
 SEC("iter/task")
 int dump_task(struct bpf_iter__task *ctx)
 {
@@ -18,6 +22,11 @@ int dump_task(struct bpf_iter__task *ctx)
 		return 0;
 	}
 
+	if (task->pid != tid)
+		num_unknown_tid++;
+	else
+		num_known_tid++;
+
 	if (ctx->meta->seq_num == 0)
 		BPF_SEQ_PRINTF(seq, "    tgid      gid\n");
 
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c
index 6e7b400888fe..b0255080662d 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c
@@ -7,14 +7,16 @@ char _license[] SEC("license") = "GPL";
 
 int count = 0;
 int tgid = 0;
+int last_tgid = 0;
+int unique_tgid_count = 0;
 
 SEC("iter/task_file")
 int dump_task_file(struct bpf_iter__task_file *ctx)
 {
 	struct seq_file *seq = ctx->meta->seq;
 	struct task_struct *task = ctx->task;
-	__u32 fd = ctx->fd;
 	struct file *file = ctx->file;
+	__u32 fd = ctx->fd;
 
 	if (task == (void *)0 || file == (void *)0)
 		return 0;
@@ -27,6 +29,11 @@ int dump_task_file(struct bpf_iter__task_file *ctx)
 	if (tgid == task->tgid && task->tgid != task->pid)
 		count++;
 
+	if (last_tgid != task->tgid) {
+		last_tgid = task->tgid;
+		unique_tgid_count++;
+	}
+
 	BPF_SEQ_PRINTF(seq, "%8d %8d %8d %lx\n", task->tgid, task->pid, fd,
 		       (long)file->f_op);
 	return 0;
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c
index 4ea6a37d1345..dd923dc637d5 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_vma.c
@@ -20,6 +20,8 @@ char _license[] SEC("license") = "GPL";
 #define D_PATH_BUF_SIZE 1024
 char d_path_buf[D_PATH_BUF_SIZE] = {};
 __u32 pid = 0;
+__u32 one_task = 0;
+__u32 one_task_error = 0;
 
 SEC("iter/task_vma") int proc_maps(struct bpf_iter__task_vma *ctx)
 {
@@ -33,8 +35,11 @@ SEC("iter/task_vma") int proc_maps(struct bpf_iter__task_vma *ctx)
 		return 0;
 
 	file = vma->vm_file;
-	if (task->tgid != pid)
+	if (task->tgid != pid) {
+		if (one_task)
+			one_task_error = 1;
 		return 0;
+	}
 	perm_str[0] = (vma->vm_flags & VM_READ) ? 'r' : '-';
 	perm_str[1] = (vma->vm_flags & VM_WRITE) ? 'w' : '-';
 	perm_str[2] = (vma->vm_flags & VM_EXEC) ? 'x' : '-';
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_vma_offset.c b/tools/testing/selftests/bpf/progs/bpf_iter_vma_offset.c
new file mode 100644
index 000000000000..ee7455d2623a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_vma_offset.c
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+#include "bpf_iter.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+__u32 unique_tgid_cnt = 0;
+uintptr_t address = 0;
+uintptr_t offset = 0;
+__u32 last_tgid = 0;
+__u32 pid = 0;
+__u32 page_shift = 0;
+
+SEC("iter/task_vma")
+int get_vma_offset(struct bpf_iter__task_vma *ctx)
+{
+	struct vm_area_struct *vma = ctx->vma;
+	struct seq_file *seq = ctx->meta->seq;
+	struct task_struct *task = ctx->task;
+
+	if (task == NULL || vma == NULL)
+		return 0;
+
+	if (last_tgid != task->tgid)
+		unique_tgid_cnt++;
+	last_tgid = task->tgid;
+
+	if (task->tgid != pid)
+		return 0;
+
+	if (vma->vm_start <= address && vma->vm_end > address) {
+		offset = address - vma->vm_start + (vma->vm_pgoff << page_shift);
+		BPF_SEQ_PRINTF(seq, "OK\n");
+	}
+	return 0;
+}
-- 
cgit v1.2.3


From 6bdb6d6be019f697296f52c37865dd7b0ce80750 Mon Sep 17 00:00:00 2001
From: Kui-Feng Lee <kuifeng@fb.com>
Date: Mon, 26 Sep 2022 11:49:57 -0700
Subject: bpftool: Show parameters of BPF task iterators.

Show tid or pid of iterators if giving an argument of tid or pid

For example, the command `bpftool link list` may list following
lines.

1: iter  prog 2  target_name bpf_map
2: iter  prog 3  target_name bpf_prog
33: iter  prog 225  target_name task_file  tid 1644
        pids test_progs(1644)

Link 33 is a task_file iterator with tid 1644.  For now, only targets
of task, task_file and task_vma may be with tid or pid to filter out
tasks other than those belonging to a process (pid) or a thread (tid).

Signed-off-by: Kui-Feng Lee <kuifeng@fb.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Quentin Monnet <quentin@isovalent.com>
Acked-by: Yonghong Song <yhs@fb.com>
Acked-by: Martin KaFai Lau <martin.lau@kernel.org>
Link: https://lore.kernel.org/bpf/20220926184957.208194-6-kuifeng@fb.com
---
 tools/bpf/bpftool/link.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c
index ef0dc2f8d5a2..2863639706dd 100644
--- a/tools/bpf/bpftool/link.c
+++ b/tools/bpf/bpftool/link.c
@@ -106,6 +106,13 @@ static const char *cgroup_order_string(__u32 order)
 	}
 }
 
+static bool is_iter_task_target(const char *target_name)
+{
+	return strcmp(target_name, "task") == 0 ||
+		strcmp(target_name, "task_file") == 0 ||
+		strcmp(target_name, "task_vma") == 0;
+}
+
 static void show_iter_json(struct bpf_link_info *info, json_writer_t *wtr)
 {
 	const char *target_name = u64_to_ptr(info->iter.target_name);
@@ -114,6 +121,12 @@ static void show_iter_json(struct bpf_link_info *info, json_writer_t *wtr)
 
 	if (is_iter_map_target(target_name))
 		jsonw_uint_field(wtr, "map_id", info->iter.map.map_id);
+	else if (is_iter_task_target(target_name)) {
+		if (info->iter.task.tid)
+			jsonw_uint_field(wtr, "tid", info->iter.task.tid);
+		else if (info->iter.task.pid)
+			jsonw_uint_field(wtr, "pid", info->iter.task.pid);
+	}
 
 	if (is_iter_cgroup_target(target_name)) {
 		jsonw_lluint_field(wtr, "cgroup_id", info->iter.cgroup.cgroup_id);
@@ -237,6 +250,12 @@ static void show_iter_plain(struct bpf_link_info *info)
 
 	if (is_iter_map_target(target_name))
 		printf("map_id %u  ", info->iter.map.map_id);
+	else if (is_iter_task_target(target_name)) {
+		if (info->iter.task.tid)
+			printf("tid %u ", info->iter.task.tid);
+		else if (info->iter.task.pid)
+			printf("pid %u ", info->iter.task.pid);
+	}
 
 	if (is_iter_cgroup_target(target_name)) {
 		printf("cgroup_id %llu  ", info->iter.cgroup.cgroup_id);
-- 
cgit v1.2.3


From 38e35e1d0cee3432baadfd6900e1d05a3419eda6 Mon Sep 17 00:00:00 2001
From: Wang Yufen <wangyufen@huawei.com>
Date: Mon, 26 Sep 2022 13:12:01 +0800
Subject: selftests/bpf: Convert sockmap_basic test to ASSERT_* macros

Convert the selftest to use the preferred ASSERT_* macros instead of the
deprecated CHECK().

Signed-off-by: Wang Yufen <wangyufen@huawei.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/1664169131-32405-2-git-send-email-wangyufen@huawei.com
---
 .../selftests/bpf/prog_tests/sockmap_basic.c       | 87 ++++++++--------------
 1 file changed, 33 insertions(+), 54 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
index cec5c0882372..0aa088900699 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
@@ -27,21 +27,21 @@ static int connected_socket_v4(void)
 	int s, repair, err;
 
 	s = socket(AF_INET, SOCK_STREAM, 0);
-	if (CHECK_FAIL(s == -1))
+	if (!ASSERT_GE(s, 0, "socket"))
 		goto error;
 
 	repair = TCP_REPAIR_ON;
 	err = setsockopt(s, SOL_TCP, TCP_REPAIR, &repair, sizeof(repair));
-	if (CHECK_FAIL(err))
+	if (!ASSERT_OK(err, "setsockopt(TCP_REPAIR)"))
 		goto error;
 
 	err = connect(s, (struct sockaddr *)&addr, len);
-	if (CHECK_FAIL(err))
+	if (!ASSERT_OK(err, "connect"))
 		goto error;
 
 	repair = TCP_REPAIR_OFF_NO_WP;
 	err = setsockopt(s, SOL_TCP, TCP_REPAIR, &repair, sizeof(repair));
-	if (CHECK_FAIL(err))
+	if (!ASSERT_OK(err, "setsockopt(TCP_REPAIR)"))
 		goto error;
 
 	return s;
@@ -54,7 +54,7 @@ error:
 static void compare_cookies(struct bpf_map *src, struct bpf_map *dst)
 {
 	__u32 i, max_entries = bpf_map__max_entries(src);
-	int err, duration = 0, src_fd, dst_fd;
+	int err, src_fd, dst_fd;
 
 	src_fd = bpf_map__fd(src);
 	dst_fd = bpf_map__fd(dst);
@@ -65,20 +65,18 @@ static void compare_cookies(struct bpf_map *src, struct bpf_map *dst)
 		err = bpf_map_lookup_elem(src_fd, &i, &src_cookie);
 		if (err && errno == ENOENT) {
 			err = bpf_map_lookup_elem(dst_fd, &i, &dst_cookie);
-			CHECK(!err, "map_lookup_elem(dst)", "element %u not deleted\n", i);
-			CHECK(err && errno != ENOENT, "map_lookup_elem(dst)", "%s\n",
-			      strerror(errno));
+			ASSERT_ERR(err, "map_lookup_elem(dst)");
+			ASSERT_EQ(errno, ENOENT, "map_lookup_elem(dst)");
 			continue;
 		}
-		if (CHECK(err, "lookup_elem(src)", "%s\n", strerror(errno)))
+		if (!ASSERT_OK(err, "lookup_elem(src)"))
 			continue;
 
 		err = bpf_map_lookup_elem(dst_fd, &i, &dst_cookie);
-		if (CHECK(err, "lookup_elem(dst)", "%s\n", strerror(errno)))
+		if (!ASSERT_OK(err, "lookup_elem(dst)"))
 			continue;
 
-		CHECK(dst_cookie != src_cookie, "cookie mismatch",
-		      "%llu != %llu (pos %u)\n", dst_cookie, src_cookie, i);
+		ASSERT_EQ(dst_cookie, src_cookie, "cookie mismatch");
 	}
 }
 
@@ -89,20 +87,16 @@ static void test_sockmap_create_update_free(enum bpf_map_type map_type)
 	int s, map, err;
 
 	s = connected_socket_v4();
-	if (CHECK_FAIL(s < 0))
+	if (!ASSERT_GE(s, 0, "connected_socket_v4"))
 		return;
 
 	map = bpf_map_create(map_type, NULL, sizeof(int), sizeof(int), 1, NULL);
-	if (CHECK_FAIL(map < 0)) {
-		perror("bpf_cmap_create");
+	if (!ASSERT_GE(map, 0, "bpf_map_create"))
 		goto out;
-	}
 
 	err = bpf_map_update_elem(map, &zero, &s, BPF_NOEXIST);
-	if (CHECK_FAIL(err)) {
-		perror("bpf_map_update");
+	if (!ASSERT_OK(err, "bpf_map_update"))
 		goto out;
-	}
 
 out:
 	close(map);
@@ -115,32 +109,26 @@ static void test_skmsg_helpers(enum bpf_map_type map_type)
 	int err, map, verdict;
 
 	skel = test_skmsg_load_helpers__open_and_load();
-	if (CHECK_FAIL(!skel)) {
-		perror("test_skmsg_load_helpers__open_and_load");
+	if (!ASSERT_OK_PTR(skel, "test_skmsg_load_helpers__open_and_load"))
 		return;
-	}
 
 	verdict = bpf_program__fd(skel->progs.prog_msg_verdict);
 	map = bpf_map__fd(skel->maps.sock_map);
 
 	err = bpf_prog_attach(verdict, map, BPF_SK_MSG_VERDICT, 0);
-	if (CHECK_FAIL(err)) {
-		perror("bpf_prog_attach");
+	if (!ASSERT_OK(err, "bpf_prog_attach"))
 		goto out;
-	}
 
 	err = bpf_prog_detach2(verdict, map, BPF_SK_MSG_VERDICT);
-	if (CHECK_FAIL(err)) {
-		perror("bpf_prog_detach2");
+	if (!ASSERT_OK(err, "bpf_prog_detach2"))
 		goto out;
-	}
 out:
 	test_skmsg_load_helpers__destroy(skel);
 }
 
 static void test_sockmap_update(enum bpf_map_type map_type)
 {
-	int err, prog, src, duration = 0;
+	int err, prog, src;
 	struct test_sockmap_update *skel;
 	struct bpf_map *dst_map;
 	const __u32 zero = 0;
@@ -153,11 +141,11 @@ static void test_sockmap_update(enum bpf_map_type map_type)
 	__s64 sk;
 
 	sk = connected_socket_v4();
-	if (CHECK(sk == -1, "connected_socket_v4", "cannot connect\n"))
+	if (!ASSERT_NEQ(sk, -1, "connected_socket_v4"))
 		return;
 
 	skel = test_sockmap_update__open_and_load();
-	if (CHECK(!skel, "open_and_load", "cannot load skeleton\n"))
+	if (!ASSERT_OK_PTR(skel, "open_and_load"))
 		goto close_sk;
 
 	prog = bpf_program__fd(skel->progs.copy_sock_map);
@@ -168,7 +156,7 @@ static void test_sockmap_update(enum bpf_map_type map_type)
 		dst_map = skel->maps.dst_sock_hash;
 
 	err = bpf_map_update_elem(src, &zero, &sk, BPF_NOEXIST);
-	if (CHECK(err, "update_elem(src)", "errno=%u\n", errno))
+	if (!ASSERT_OK(err, "update_elem(src)"))
 		goto out;
 
 	err = bpf_prog_test_run_opts(prog, &topts);
@@ -188,17 +176,16 @@ close_sk:
 static void test_sockmap_invalid_update(void)
 {
 	struct test_sockmap_invalid_update *skel;
-	int duration = 0;
 
 	skel = test_sockmap_invalid_update__open_and_load();
-	if (CHECK(skel, "open_and_load", "verifier accepted map_update\n"))
+	if (!ASSERT_NULL(skel, "open_and_load"))
 		test_sockmap_invalid_update__destroy(skel);
 }
 
 static void test_sockmap_copy(enum bpf_map_type map_type)
 {
 	DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
-	int err, len, src_fd, iter_fd, duration = 0;
+	int err, len, src_fd, iter_fd;
 	union bpf_iter_link_info linfo = {};
 	__u32 i, num_sockets, num_elems;
 	struct bpf_iter_sockmap *skel;
@@ -208,7 +195,7 @@ static void test_sockmap_copy(enum bpf_map_type map_type)
 	char buf[64];
 
 	skel = bpf_iter_sockmap__open_and_load();
-	if (CHECK(!skel, "bpf_iter_sockmap__open_and_load", "skeleton open_and_load failed\n"))
+	if (!ASSERT_OK_PTR(skel, "bpf_iter_sockmap__open_and_load"))
 		return;
 
 	if (map_type == BPF_MAP_TYPE_SOCKMAP) {
@@ -222,7 +209,7 @@ static void test_sockmap_copy(enum bpf_map_type map_type)
 	}
 
 	sock_fd = calloc(num_sockets, sizeof(*sock_fd));
-	if (CHECK(!sock_fd, "calloc(sock_fd)", "failed to allocate\n"))
+	if (!ASSERT_OK_PTR(sock_fd, "calloc(sock_fd)"))
 		goto out;
 
 	for (i = 0; i < num_sockets; i++)
@@ -232,11 +219,11 @@ static void test_sockmap_copy(enum bpf_map_type map_type)
 
 	for (i = 0; i < num_sockets; i++) {
 		sock_fd[i] = connected_socket_v4();
-		if (CHECK(sock_fd[i] == -1, "connected_socket_v4", "cannot connect\n"))
+		if (!ASSERT_NEQ(sock_fd[i], -1, "connected_socket_v4"))
 			goto out;
 
 		err = bpf_map_update_elem(src_fd, &i, &sock_fd[i], BPF_NOEXIST);
-		if (CHECK(err, "map_update", "failed: %s\n", strerror(errno)))
+		if (!ASSERT_OK(err, "map_update"))
 			goto out;
 	}
 
@@ -248,22 +235,20 @@ static void test_sockmap_copy(enum bpf_map_type map_type)
 		goto out;
 
 	iter_fd = bpf_iter_create(bpf_link__fd(link));
-	if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+	if (!ASSERT_GE(iter_fd, 0, "create_iter"))
 		goto free_link;
 
 	/* do some tests */
 	while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
 		;
-	if (CHECK(len < 0, "read", "failed: %s\n", strerror(errno)))
+	if (!ASSERT_GE(len, 0, "read"))
 		goto close_iter;
 
 	/* test results */
-	if (CHECK(skel->bss->elems != num_elems, "elems", "got %u expected %u\n",
-		  skel->bss->elems, num_elems))
+	if (!ASSERT_EQ(skel->bss->elems, num_elems, "elems"))
 		goto close_iter;
 
-	if (CHECK(skel->bss->socks != num_sockets, "socks", "got %u expected %u\n",
-		  skel->bss->socks, num_sockets))
+	if (!ASSERT_EQ(skel->bss->socks, num_sockets, "socks"))
 		goto close_iter;
 
 	compare_cookies(src, skel->maps.dst);
@@ -288,28 +273,22 @@ static void test_sockmap_skb_verdict_attach(enum bpf_attach_type first,
 	int err, map, verdict;
 
 	skel = test_sockmap_skb_verdict_attach__open_and_load();
-	if (CHECK_FAIL(!skel)) {
-		perror("test_sockmap_skb_verdict_attach__open_and_load");
+	if (!ASSERT_OK_PTR(skel, "open_and_load"))
 		return;
-	}
 
 	verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
 	map = bpf_map__fd(skel->maps.sock_map);
 
 	err = bpf_prog_attach(verdict, map, first, 0);
-	if (CHECK_FAIL(err)) {
-		perror("bpf_prog_attach");
+	if (!ASSERT_OK(err, "bpf_prog_attach"))
 		goto out;
-	}
 
 	err = bpf_prog_attach(verdict, map, second, 0);
 	ASSERT_EQ(err, -EBUSY, "prog_attach_fail");
 
 	err = bpf_prog_detach2(verdict, map, first);
-	if (CHECK_FAIL(err)) {
-		perror("bpf_prog_detach2");
+	if (!ASSERT_OK(err, "bpf_prog_detach2"))
 		goto out;
-	}
 out:
 	test_sockmap_skb_verdict_attach__destroy(skel);
 }
-- 
cgit v1.2.3


From d155fcb3fff16410ccd7583f9a16c15ddffeca1e Mon Sep 17 00:00:00 2001
From: Wang Yufen <wangyufen@huawei.com>
Date: Mon, 26 Sep 2022 13:12:02 +0800
Subject: selftests/bpf: Convert sockmap_ktls test to ASSERT_* macros

Convert the selftest to use the preferred ASSERT_* macros instead of the
deprecated CHECK().

Signed-off-by: Wang Yufen <wangyufen@huawei.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/1664169131-32405-3-git-send-email-wangyufen@huawei.com
---
 .../selftests/bpf/prog_tests/sockmap_ktls.c        | 39 ++++++----------------
 1 file changed, 10 insertions(+), 29 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
index e172d89e92e1..2d0796314862 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
@@ -15,16 +15,12 @@ static int tcp_server(int family)
 	int err, s;
 
 	s = socket(family, SOCK_STREAM, 0);
-	if (CHECK_FAIL(s == -1)) {
-		perror("socket");
+	if (!ASSERT_GE(s, 0, "socket"))
 		return -1;
-	}
 
 	err = listen(s, SOMAXCONN);
-	if (CHECK_FAIL(err)) {
-		perror("listen");
+	if (!ASSERT_OK(err, "listen"))
 		return -1;
-	}
 
 	return s;
 }
@@ -48,44 +44,31 @@ static void test_sockmap_ktls_disconnect_after_delete(int family, int map)
 		return;
 
 	err = getsockname(srv, (struct sockaddr *)&addr, &len);
-	if (CHECK_FAIL(err)) {
-		perror("getsockopt");
+	if (!ASSERT_OK(err, "getsockopt"))
 		goto close_srv;
-	}
 
 	cli = socket(family, SOCK_STREAM, 0);
-	if (CHECK_FAIL(cli == -1)) {
-		perror("socket");
+	if (!ASSERT_GE(cli, 0, "socket"))
 		goto close_srv;
-	}
 
 	err = connect(cli, (struct sockaddr *)&addr, len);
-	if (CHECK_FAIL(err)) {
-		perror("connect");
+	if (!ASSERT_OK(err, "connect"))
 		goto close_cli;
-	}
 
 	err = bpf_map_update_elem(map, &zero, &cli, 0);
-	if (CHECK_FAIL(err)) {
-		perror("bpf_map_update_elem");
+	if (!ASSERT_OK(err, "bpf_map_update_elem"))
 		goto close_cli;
-	}
 
 	err = setsockopt(cli, IPPROTO_TCP, TCP_ULP, "tls", strlen("tls"));
-	if (CHECK_FAIL(err)) {
-		perror("setsockopt(TCP_ULP)");
+	if (!ASSERT_OK(err, "setsockopt(TCP_ULP)"))
 		goto close_cli;
-	}
 
 	err = bpf_map_delete_elem(map, &zero);
-	if (CHECK_FAIL(err)) {
-		perror("bpf_map_delete_elem");
+	if (!ASSERT_OK(err, "bpf_map_delete_elem"))
 		goto close_cli;
-	}
 
 	err = disconnect(cli);
-	if (CHECK_FAIL(err))
-		perror("disconnect");
+	ASSERT_OK(err, "disconnect");
 
 close_cli:
 	close(cli);
@@ -168,10 +151,8 @@ static void run_tests(int family, enum bpf_map_type map_type)
 	int map;
 
 	map = bpf_map_create(map_type, NULL, sizeof(int), sizeof(int), 1, NULL);
-	if (CHECK_FAIL(map < 0)) {
-		perror("bpf_map_create");
+	if (!ASSERT_GE(map, 0, "bpf_map_create"))
 		return;
-	}
 
 	if (test__start_subtest(fmt_test_name("disconnect_after_delete", family, map_type)))
 		test_sockmap_ktls_disconnect_after_delete(family, map);
-- 
cgit v1.2.3


From 099763e7da0beec120827547f227c123e9d4a155 Mon Sep 17 00:00:00 2001
From: Wang Yufen <wangyufen@huawei.com>
Date: Mon, 26 Sep 2022 13:12:03 +0800
Subject: selftests/bpf: Convert sockopt test to ASSERT_* macros

Convert the selftest to use the preferred ASSERT_* macros instead of the
deprecated CHECK().

Signed-off-by: Wang Yufen <wangyufen@huawei.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/1664169131-32405-4-git-send-email-wangyufen@huawei.com
---
 tools/testing/selftests/bpf/prog_tests/sockopt.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt.c b/tools/testing/selftests/bpf/prog_tests/sockopt.c
index cd09f4c7dd92..aa4debf62fc6 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockopt.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockopt.c
@@ -972,12 +972,12 @@ void test_sockopt(void)
 	int cgroup_fd, i;
 
 	cgroup_fd = test__join_cgroup("/sockopt");
-	if (CHECK_FAIL(cgroup_fd < 0))
+	if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup"))
 		return;
 
 	for (i = 0; i < ARRAY_SIZE(tests); i++) {
 		test__start_subtest(tests[i].descr);
-		CHECK_FAIL(run_test(cgroup_fd, &tests[i]));
+		ASSERT_OK(run_test(cgroup_fd, &tests[i]), tests[i].descr);
 	}
 
 	close(cgroup_fd);
-- 
cgit v1.2.3


From 675bc8abe16d9ce97970e8a781e9e72bb8d47ca2 Mon Sep 17 00:00:00 2001
From: Wang Yufen <wangyufen@huawei.com>
Date: Mon, 26 Sep 2022 13:12:04 +0800
Subject: selftests/bpf: Convert sockopt_inherit test to ASSERT_* macros

Convert the selftest to use the preferred ASSERT_* macros instead of the
deprecated CHECK().

Signed-off-by: Wang Yufen <wangyufen@huawei.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/1664169131-32405-5-git-send-email-wangyufen@huawei.com
---
 .../selftests/bpf/prog_tests/sockopt_inherit.c     | 30 ++++++++++------------
 1 file changed, 13 insertions(+), 17 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c b/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c
index c5cb6e8374b6..60c17a8e2789 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c
@@ -76,20 +76,16 @@ static void *server_thread(void *arg)
 	pthread_cond_signal(&server_started);
 	pthread_mutex_unlock(&server_started_mtx);
 
-	if (CHECK_FAIL(err < 0)) {
-		perror("Failed to listed on socket");
+	if (!ASSERT_GE(err, 0, "listed on socket"))
 		return NULL;
-	}
 
 	err += verify_sockopt(fd, CUSTOM_INHERIT1, "listen", 1);
 	err += verify_sockopt(fd, CUSTOM_INHERIT2, "listen", 1);
 	err += verify_sockopt(fd, CUSTOM_LISTENER, "listen", 1);
 
 	client_fd = accept(fd, (struct sockaddr *)&addr, &len);
-	if (CHECK_FAIL(client_fd < 0)) {
-		perror("Failed to accept client");
+	if (!ASSERT_GE(client_fd, 0, "accept client"))
 		return NULL;
-	}
 
 	err += verify_sockopt(client_fd, CUSTOM_INHERIT1, "accept", 1);
 	err += verify_sockopt(client_fd, CUSTOM_INHERIT2, "accept", 1);
@@ -183,20 +179,20 @@ static void run_test(int cgroup_fd)
 		goto close_bpf_object;
 
 	err = prog_attach(obj, cgroup_fd, "cgroup/getsockopt", "_getsockopt");
-	if (CHECK_FAIL(err))
+	if (!ASSERT_OK(err, "prog_attach _getsockopt"))
 		goto close_bpf_object;
 
 	err = prog_attach(obj, cgroup_fd, "cgroup/setsockopt", "_setsockopt");
-	if (CHECK_FAIL(err))
+	if (!ASSERT_OK(err, "prog_attach _setsockopt"))
 		goto close_bpf_object;
 
 	server_fd = start_server();
-	if (CHECK_FAIL(server_fd < 0))
+	if (!ASSERT_GE(server_fd, 0, "start_server"))
 		goto close_bpf_object;
 
 	pthread_mutex_lock(&server_started_mtx);
-	if (CHECK_FAIL(pthread_create(&tid, NULL, server_thread,
-				      (void *)&server_fd))) {
+	if (!ASSERT_OK(pthread_create(&tid, NULL, server_thread,
+				      (void *)&server_fd), "pthread_create")) {
 		pthread_mutex_unlock(&server_started_mtx);
 		goto close_server_fd;
 	}
@@ -204,17 +200,17 @@ static void run_test(int cgroup_fd)
 	pthread_mutex_unlock(&server_started_mtx);
 
 	client_fd = connect_to_server(server_fd);
-	if (CHECK_FAIL(client_fd < 0))
+	if (!ASSERT_GE(client_fd, 0, "connect_to_server"))
 		goto close_server_fd;
 
-	CHECK_FAIL(verify_sockopt(client_fd, CUSTOM_INHERIT1, "connect", 0));
-	CHECK_FAIL(verify_sockopt(client_fd, CUSTOM_INHERIT2, "connect", 0));
-	CHECK_FAIL(verify_sockopt(client_fd, CUSTOM_LISTENER, "connect", 0));
+	ASSERT_OK(verify_sockopt(client_fd, CUSTOM_INHERIT1, "connect", 0), "verify_sockopt1");
+	ASSERT_OK(verify_sockopt(client_fd, CUSTOM_INHERIT2, "connect", 0), "verify_sockopt2");
+	ASSERT_OK(verify_sockopt(client_fd, CUSTOM_LISTENER, "connect", 0), "verify_sockopt ener");
 
 	pthread_join(tid, &server_err);
 
 	err = (int)(long)server_err;
-	CHECK_FAIL(err);
+	ASSERT_OK(err, "pthread_join retval");
 
 	close(client_fd);
 
@@ -229,7 +225,7 @@ void test_sockopt_inherit(void)
 	int cgroup_fd;
 
 	cgroup_fd = test__join_cgroup("/sockopt_inherit");
-	if (CHECK_FAIL(cgroup_fd < 0))
+	if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup"))
 		return;
 
 	run_test(cgroup_fd);
-- 
cgit v1.2.3


From a605a6bbccceebbb68ab9f8ff2b27e2faa38525d Mon Sep 17 00:00:00 2001
From: Wang Yufen <wangyufen@huawei.com>
Date: Mon, 26 Sep 2022 13:12:05 +0800
Subject: selftests/bpf: Convert sockopt_multi test to ASSERT_* macros

Convert the selftest to use the preferred ASSERT_* macros instead of the
deprecated CHECK().

Signed-off-by: Wang Yufen <wangyufen@huawei.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/1664169131-32405-6-git-send-email-wangyufen@huawei.com
---
 tools/testing/selftests/bpf/prog_tests/sockopt_multi.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c b/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c
index 28d592dc54a7..7f5659349011 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c
@@ -303,11 +303,11 @@ void test_sockopt_multi(void)
 	int err = -1;
 
 	cg_parent = test__join_cgroup("/parent");
-	if (CHECK_FAIL(cg_parent < 0))
+	if (!ASSERT_GE(cg_parent, 0, "join_cgroup /parent"))
 		goto out;
 
 	cg_child = test__join_cgroup("/parent/child");
-	if (CHECK_FAIL(cg_child < 0))
+	if (!ASSERT_GE(cg_child, 0, "join_cgroup /parent/child"))
 		goto out;
 
 	obj = bpf_object__open_file("sockopt_multi.bpf.o", NULL);
@@ -319,11 +319,11 @@ void test_sockopt_multi(void)
 		goto out;
 
 	sock_fd = socket(AF_INET, SOCK_STREAM, 0);
-	if (CHECK_FAIL(sock_fd < 0))
+	if (!ASSERT_GE(sock_fd, 0, "socket"))
 		goto out;
 
-	CHECK_FAIL(run_getsockopt_test(obj, cg_parent, cg_child, sock_fd));
-	CHECK_FAIL(run_setsockopt_test(obj, cg_parent, cg_child, sock_fd));
+	ASSERT_OK(run_getsockopt_test(obj, cg_parent, cg_child, sock_fd), "getsockopt_test");
+	ASSERT_OK(run_setsockopt_test(obj, cg_parent, cg_child, sock_fd), "setsockopt_test");
 
 out:
 	close(sock_fd);
-- 
cgit v1.2.3


From f19708dfa0bf5a016f27e92ef4d3514788f6dc8b Mon Sep 17 00:00:00 2001
From: Wang Yufen <wangyufen@huawei.com>
Date: Mon, 26 Sep 2022 13:12:06 +0800
Subject: selftests/bpf: Convert sockopt_sk test to ASSERT_* macros

Convert the selftest to use the preferred ASSERT_* macros instead of the
deprecated CHECK().

Signed-off-by: Wang Yufen <wangyufen@huawei.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/1664169131-32405-7-git-send-email-wangyufen@huawei.com
---
 tools/testing/selftests/bpf/prog_tests/sockopt_sk.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c
index 30a99d2ed5c6..60d952719d27 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c
@@ -223,7 +223,7 @@ void test_sockopt_sk(void)
 	int cgroup_fd;
 
 	cgroup_fd = test__join_cgroup("/sockopt_sk");
-	if (CHECK_FAIL(cgroup_fd < 0))
+	if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup /sockopt_sk"))
 		return;
 
 	run_test(cgroup_fd);
-- 
cgit v1.2.3


From a0a17296713aea7b3cbc94662c6ffb53a79a3f2c Mon Sep 17 00:00:00 2001
From: Wang Yufen <wangyufen@huawei.com>
Date: Mon, 26 Sep 2022 13:12:07 +0800
Subject: selftests/bpf: Convert tcp_estats test to ASSERT_* macros

Convert the selftest to use the preferred ASSERT_* macros instead of the
deprecated CHECK().

Signed-off-by: Wang Yufen <wangyufen@huawei.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/1664169131-32405-8-git-send-email-wangyufen@huawei.com
---
 tools/testing/selftests/bpf/prog_tests/tcp_estats.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_estats.c b/tools/testing/selftests/bpf/prog_tests/tcp_estats.c
index 032dbfb26256..e070bca2b764 100644
--- a/tools/testing/selftests/bpf/prog_tests/tcp_estats.c
+++ b/tools/testing/selftests/bpf/prog_tests/tcp_estats.c
@@ -6,11 +6,9 @@ void test_tcp_estats(void)
 	const char *file = "./test_tcp_estats.bpf.o";
 	int err, prog_fd;
 	struct bpf_object *obj;
-	__u32 duration = 0;
 
 	err = bpf_prog_test_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
-	CHECK(err, "", "err %d errno %d\n", err, errno);
-	if (err)
+	if (!ASSERT_OK(err, ""))
 		return;
 
 	bpf_object__close(obj);
-- 
cgit v1.2.3


From 3082f8cd4ba32091be82c19c357ddfd300c5a433 Mon Sep 17 00:00:00 2001
From: Wang Yufen <wangyufen@huawei.com>
Date: Mon, 26 Sep 2022 13:12:08 +0800
Subject: selftests/bpf: Convert tcp_hdr_options test to ASSERT_* macros

Convert the selftest to use the preferred ASSERT_* macros instead of the
deprecated CHECK().

Signed-off-by: Wang Yufen <wangyufen@huawei.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/1664169131-32405-9-git-send-email-wangyufen@huawei.com
---
 .../selftests/bpf/prog_tests/tcp_hdr_options.c     | 80 ++++++++--------------
 1 file changed, 28 insertions(+), 52 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c
index f24436d33cd6..617bbce6ef8f 100644
--- a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c
+++ b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c
@@ -42,13 +42,10 @@ struct sk_fds {
 
 static int create_netns(void)
 {
-	if (CHECK(unshare(CLONE_NEWNET), "create netns",
-		  "unshare(CLONE_NEWNET): %s (%d)",
-		  strerror(errno), errno))
+	if (!ASSERT_OK(unshare(CLONE_NEWNET), "create netns"))
 		return -1;
 
-	if (CHECK(system("ip link set dev lo up"), "run ip cmd",
-		  "failed to bring lo link up\n"))
+	if (!ASSERT_OK(system("ip link set dev lo up"), "run ip cmd"))
 		return -1;
 
 	return 0;
@@ -80,16 +77,12 @@ static int sk_fds_shutdown(struct sk_fds *sk_fds)
 
 	shutdown(sk_fds->active_fd, SHUT_WR);
 	ret = read(sk_fds->passive_fd, &abyte, sizeof(abyte));
-	if (CHECK(ret != 0, "read-after-shutdown(passive_fd):",
-		  "ret:%d %s (%d)\n",
-		  ret, strerror(errno), errno))
+	if (!ASSERT_EQ(ret, 0, "read-after-shutdown(passive_fd):"))
 		return -1;
 
 	shutdown(sk_fds->passive_fd, SHUT_WR);
 	ret = read(sk_fds->active_fd, &abyte, sizeof(abyte));
-	if (CHECK(ret != 0, "read-after-shutdown(active_fd):",
-		  "ret:%d %s (%d)\n",
-		  ret, strerror(errno), errno))
+	if (!ASSERT_EQ(ret, 0, "read-after-shutdown(active_fd):"))
 		return -1;
 
 	return 0;
@@ -102,8 +95,7 @@ static int sk_fds_connect(struct sk_fds *sk_fds, bool fast_open)
 	socklen_t len;
 
 	sk_fds->srv_fd = start_server(AF_INET6, SOCK_STREAM, LO_ADDR6, 0, 0);
-	if (CHECK(sk_fds->srv_fd == -1, "start_server", "%s (%d)\n",
-		  strerror(errno), errno))
+	if (!ASSERT_NEQ(sk_fds->srv_fd, -1, "start_server"))
 		goto error;
 
 	if (fast_open)
@@ -112,28 +104,25 @@ static int sk_fds_connect(struct sk_fds *sk_fds, bool fast_open)
 	else
 		sk_fds->active_fd = connect_to_fd(sk_fds->srv_fd, 0);
 
-	if (CHECK_FAIL(sk_fds->active_fd == -1)) {
+	if (!ASSERT_NEQ(sk_fds->active_fd, -1, "")) {
 		close(sk_fds->srv_fd);
 		goto error;
 	}
 
 	len = sizeof(addr6);
-	if (CHECK(getsockname(sk_fds->srv_fd, (struct sockaddr *)&addr6,
-			      &len), "getsockname(srv_fd)", "%s (%d)\n",
-		  strerror(errno), errno))
+	if (!ASSERT_OK(getsockname(sk_fds->srv_fd, (struct sockaddr *)&addr6,
+				   &len), "getsockname(srv_fd)"))
 		goto error_close;
 	sk_fds->passive_lport = ntohs(addr6.sin6_port);
 
 	len = sizeof(addr6);
-	if (CHECK(getsockname(sk_fds->active_fd, (struct sockaddr *)&addr6,
-			      &len), "getsockname(active_fd)", "%s (%d)\n",
-		  strerror(errno), errno))
+	if (!ASSERT_OK(getsockname(sk_fds->active_fd, (struct sockaddr *)&addr6,
+				   &len), "getsockname(active_fd)"))
 		goto error_close;
 	sk_fds->active_lport = ntohs(addr6.sin6_port);
 
 	sk_fds->passive_fd = accept(sk_fds->srv_fd, NULL, 0);
-	if (CHECK(sk_fds->passive_fd == -1, "accept(srv_fd)", "%s (%d)\n",
-		  strerror(errno), errno))
+	if (!ASSERT_NEQ(sk_fds->passive_fd, -1, "accept(srv_fd)"))
 		goto error_close;
 
 	if (fast_open) {
@@ -141,8 +130,7 @@ static int sk_fds_connect(struct sk_fds *sk_fds, bool fast_open)
 		int ret;
 
 		ret = read(sk_fds->passive_fd, bytes_in, sizeof(bytes_in));
-		if (CHECK(ret != sizeof(fast), "read fastopen syn data",
-			  "expected=%lu actual=%d\n", sizeof(fast), ret)) {
+		if (!ASSERT_EQ(ret, sizeof(fast), "read fastopen syn data")) {
 			close(sk_fds->passive_fd);
 			goto error_close;
 		}
@@ -163,8 +151,7 @@ static int check_hdr_opt(const struct bpf_test_option *exp,
 			 const struct bpf_test_option *act,
 			 const char *hdr_desc)
 {
-	if (CHECK(memcmp(exp, act, sizeof(*exp)),
-		  "expected-vs-actual", "unexpected %s\n", hdr_desc)) {
+	if (!ASSERT_OK(memcmp(exp, act, sizeof(*exp)), hdr_desc)) {
 		print_option(exp, "expected: ");
 		print_option(act, "  actual: ");
 		return -1;
@@ -178,13 +165,11 @@ static int check_hdr_stg(const struct hdr_stg *exp, int fd,
 {
 	struct hdr_stg act;
 
-	if (CHECK(bpf_map_lookup_elem(hdr_stg_map_fd, &fd, &act),
-		  "map_lookup(hdr_stg_map_fd)", "%s %s (%d)\n",
-		  stg_desc, strerror(errno), errno))
+	if (!ASSERT_OK(bpf_map_lookup_elem(hdr_stg_map_fd, &fd, &act),
+		  "map_lookup(hdr_stg_map_fd)"))
 		return -1;
 
-	if (CHECK(memcmp(exp, &act, sizeof(*exp)),
-		  "expected-vs-actual", "unexpected %s\n", stg_desc)) {
+	if (!ASSERT_OK(memcmp(exp, &act, sizeof(*exp)), stg_desc)) {
 		print_hdr_stg(exp, "expected: ");
 		print_hdr_stg(&act, "  actual: ");
 		return -1;
@@ -228,9 +213,8 @@ static void check_hdr_and_close_fds(struct sk_fds *sk_fds)
 	if (sk_fds_shutdown(sk_fds))
 		goto check_linum;
 
-	if (CHECK(expected_inherit_cb_flags != skel->bss->inherit_cb_flags,
-		  "Unexpected inherit_cb_flags", "0x%x != 0x%x\n",
-		  skel->bss->inherit_cb_flags, expected_inherit_cb_flags))
+	if (!ASSERT_EQ(expected_inherit_cb_flags, skel->bss->inherit_cb_flags,
+		       "inherit_cb_flags"))
 		goto check_linum;
 
 	if (check_hdr_stg(&exp_passive_hdr_stg, sk_fds->passive_fd,
@@ -257,7 +241,7 @@ static void check_hdr_and_close_fds(struct sk_fds *sk_fds)
 		      "active_fin_in");
 
 check_linum:
-	CHECK_FAIL(check_error_linum(sk_fds));
+	ASSERT_FALSE(check_error_linum(sk_fds), "check_error_linum");
 	sk_fds_close(sk_fds);
 }
 
@@ -497,26 +481,20 @@ static void misc(void)
 		/* MSG_EOR to ensure skb will not be combined */
 		ret = send(sk_fds.active_fd, send_msg, sizeof(send_msg),
 			   MSG_EOR);
-		if (CHECK(ret != sizeof(send_msg), "send(msg)", "ret:%d\n",
-			  ret))
+		if (!ASSERT_EQ(ret, sizeof(send_msg), "send(msg)"))
 			goto check_linum;
 
 		ret = read(sk_fds.passive_fd, recv_msg, sizeof(recv_msg));
-		if (CHECK(ret != sizeof(send_msg), "read(msg)", "ret:%d\n",
-			  ret))
+		if (ASSERT_EQ(ret, sizeof(send_msg), "read(msg)"))
 			goto check_linum;
 	}
 
 	if (sk_fds_shutdown(&sk_fds))
 		goto check_linum;
 
-	CHECK(misc_skel->bss->nr_syn != 1, "unexpected nr_syn",
-	      "expected (1) != actual (%u)\n",
-		misc_skel->bss->nr_syn);
+	ASSERT_EQ(misc_skel->bss->nr_syn, 1, "unexpected nr_syn");
 
-	CHECK(misc_skel->bss->nr_data != nr_data, "unexpected nr_data",
-	      "expected (%u) != actual (%u)\n",
-	      nr_data, misc_skel->bss->nr_data);
+	ASSERT_EQ(misc_skel->bss->nr_data, nr_data, "unexpected nr_data");
 
 	/* The last ACK may have been delayed, so it is either 1 or 2. */
 	CHECK(misc_skel->bss->nr_pure_ack != 1 &&
@@ -525,12 +503,10 @@ static void misc(void)
 	      "expected (1 or 2) != actual (%u)\n",
 		misc_skel->bss->nr_pure_ack);
 
-	CHECK(misc_skel->bss->nr_fin != 1, "unexpected nr_fin",
-	      "expected (1) != actual (%u)\n",
-	      misc_skel->bss->nr_fin);
+	ASSERT_EQ(misc_skel->bss->nr_fin, 1, "unexpected nr_fin");
 
 check_linum:
-	CHECK_FAIL(check_error_linum(&sk_fds));
+	ASSERT_FALSE(check_error_linum(&sk_fds), "check_error_linum");
 	sk_fds_close(&sk_fds);
 	bpf_link__destroy(link);
 }
@@ -555,15 +531,15 @@ void test_tcp_hdr_options(void)
 	int i;
 
 	skel = test_tcp_hdr_options__open_and_load();
-	if (CHECK(!skel, "open and load skel", "failed"))
+	if (!ASSERT_OK_PTR(skel, "open and load skel"))
 		return;
 
 	misc_skel = test_misc_tcp_hdr_options__open_and_load();
-	if (CHECK(!misc_skel, "open and load misc test skel", "failed"))
+	if (!ASSERT_OK_PTR(misc_skel, "open and load misc test skel"))
 		goto skel_destroy;
 
 	cg_fd = test__join_cgroup(CG_NAME);
-	if (CHECK_FAIL(cg_fd < 0))
+	if (ASSERT_GE(cg_fd, 0, "join_cgroup"))
 		goto skel_destroy;
 
 	for (i = 0; i < ARRAY_SIZE(tests); i++) {
-- 
cgit v1.2.3


From 8dda32ac58b622f4c1ec0edd4f6b12f84170fe01 Mon Sep 17 00:00:00 2001
From: Wang Yufen <wangyufen@huawei.com>
Date: Mon, 26 Sep 2022 13:12:09 +0800
Subject: selftests/bpf: Convert tcp_rtt test to ASSERT_* macros

Convert the selftest to use the preferred ASSERT_* macros instead of the
deprecated CHECK().

Signed-off-by: Wang Yufen <wangyufen@huawei.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/1664169131-32405-10-git-send-email-wangyufen@huawei.com
---
 tools/testing/selftests/bpf/prog_tests/tcp_rtt.c | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c
index 96ff2c20af81..8fe84da1b9b4 100644
--- a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c
+++ b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c
@@ -16,8 +16,7 @@ static void send_byte(int fd)
 {
 	char b = 0x55;
 
-	if (CHECK_FAIL(write(fd, &b, sizeof(b)) != 1))
-		perror("Failed to send single byte");
+	ASSERT_EQ(write(fd, &b, sizeof(b)), 1, "send single byte");
 }
 
 static int wait_for_ack(int fd, int retries)
@@ -51,10 +50,8 @@ static int verify_sk(int map_fd, int client_fd, const char *msg, __u32 invoked,
 	int err = 0;
 	struct tcp_rtt_storage val;
 
-	if (CHECK_FAIL(bpf_map_lookup_elem(map_fd, &client_fd, &val) < 0)) {
-		perror("Failed to read socket storage");
+	if (!ASSERT_GE(bpf_map_lookup_elem(map_fd, &client_fd, &val), 0, "read socket storage"))
 		return -1;
-	}
 
 	if (val.invoked != invoked) {
 		log_err("%s: unexpected bpf_tcp_sock.invoked %d != %d",
@@ -151,14 +148,14 @@ void test_tcp_rtt(void)
 	int server_fd, cgroup_fd;
 
 	cgroup_fd = test__join_cgroup("/tcp_rtt");
-	if (CHECK_FAIL(cgroup_fd < 0))
+	if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup /tcp_rtt"))
 		return;
 
 	server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 0, 0);
-	if (CHECK_FAIL(server_fd < 0))
+	if (!ASSERT_GE(server_fd, 0, "start_server"))
 		goto close_cgroup_fd;
 
-	CHECK_FAIL(run_test(cgroup_fd, server_fd));
+	ASSERT_OK(run_test(cgroup_fd, server_fd), "run_test");
 
 	close(server_fd);
 
-- 
cgit v1.2.3


From 9d0b05bdfbea25693cdd63c29aa12b982307d81e Mon Sep 17 00:00:00 2001
From: Wang Yufen <wangyufen@huawei.com>
Date: Mon, 26 Sep 2022 13:12:10 +0800
Subject: selftests/bpf: Convert tcpbpf_user test to ASSERT_* macros

Convert the selftest to use the preferred ASSERT_* macros instead of the
deprecated CHECK().

Signed-off-by: Wang Yufen <wangyufen@huawei.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/1664169131-32405-11-git-send-email-wangyufen@huawei.com
---
 .../testing/selftests/bpf/prog_tests/tcpbpf_user.c | 32 ++++++++--------------
 1 file changed, 12 insertions(+), 20 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c b/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c
index 87923d2865b7..7e8fe1bad03f 100644
--- a/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c
+++ b/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c
@@ -8,8 +8,6 @@
 #define LO_ADDR6 "::1"
 #define CG_NAME "/tcpbpf-user-test"
 
-static __u32 duration;
-
 static void verify_result(struct tcpbpf_globals *result)
 {
 	__u32 expected_events = ((1 << BPF_SOCK_OPS_TIMEOUT_INIT) |
@@ -22,9 +20,7 @@ static void verify_result(struct tcpbpf_globals *result)
 				 (1 << BPF_SOCK_OPS_TCP_LISTEN_CB));
 
 	/* check global map */
-	CHECK(expected_events != result->event_map, "event_map",
-	      "unexpected event_map: actual 0x%08x != expected 0x%08x\n",
-	      result->event_map, expected_events);
+	ASSERT_EQ(expected_events, result->event_map, "event_map");
 
 	ASSERT_EQ(result->bytes_received, 501, "bytes_received");
 	ASSERT_EQ(result->bytes_acked, 1002, "bytes_acked");
@@ -56,18 +52,15 @@ static void run_test(struct tcpbpf_globals *result)
 	int i, rv;
 
 	listen_fd = start_server(AF_INET6, SOCK_STREAM, LO_ADDR6, 0, 0);
-	if (CHECK(listen_fd == -1, "start_server", "listen_fd:%d errno:%d\n",
-		  listen_fd, errno))
+	if (!ASSERT_NEQ(listen_fd, -1, "start_server"))
 		goto done;
 
 	cli_fd = connect_to_fd(listen_fd, 0);
-	if (CHECK(cli_fd == -1, "connect_to_fd(listen_fd)",
-		  "cli_fd:%d errno:%d\n", cli_fd, errno))
+	if (!ASSERT_NEQ(cli_fd, -1, "connect_to_fd(listen_fd)"))
 		goto done;
 
 	accept_fd = accept(listen_fd, NULL, NULL);
-	if (CHECK(accept_fd == -1, "accept(listen_fd)",
-		  "accept_fd:%d errno:%d\n", accept_fd, errno))
+	if (!ASSERT_NEQ(accept_fd, -1, "accept(listen_fd)"))
 		goto done;
 
 	/* Send 1000B of '+'s from cli_fd -> accept_fd */
@@ -75,11 +68,11 @@ static void run_test(struct tcpbpf_globals *result)
 		buf[i] = '+';
 
 	rv = send(cli_fd, buf, 1000, 0);
-	if (CHECK(rv != 1000, "send(cli_fd)", "rv:%d errno:%d\n", rv, errno))
+	if (!ASSERT_EQ(rv, 1000, "send(cli_fd)"))
 		goto done;
 
 	rv = recv(accept_fd, buf, 1000, 0);
-	if (CHECK(rv != 1000, "recv(accept_fd)", "rv:%d errno:%d\n", rv, errno))
+	if (!ASSERT_EQ(rv, 1000, "recv(accept_fd)"))
 		goto done;
 
 	/* Send 500B of '.'s from accept_fd ->cli_fd */
@@ -87,11 +80,11 @@ static void run_test(struct tcpbpf_globals *result)
 		buf[i] = '.';
 
 	rv = send(accept_fd, buf, 500, 0);
-	if (CHECK(rv != 500, "send(accept_fd)", "rv:%d errno:%d\n", rv, errno))
+	if (!ASSERT_EQ(rv, 500, "send(accept_fd)"))
 		goto done;
 
 	rv = recv(cli_fd, buf, 500, 0);
-	if (CHECK(rv != 500, "recv(cli_fd)", "rv:%d errno:%d\n", rv, errno))
+	if (!ASSERT_EQ(rv, 500, "recv(cli_fd)"))
 		goto done;
 
 	/*
@@ -100,12 +93,12 @@ static void run_test(struct tcpbpf_globals *result)
 	 */
 	shutdown(accept_fd, SHUT_WR);
 	err = recv(cli_fd, buf, 1, 0);
-	if (CHECK(err, "recv(cli_fd) for fin", "err:%d errno:%d\n", err, errno))
+	if (!ASSERT_OK(err, "recv(cli_fd) for fin"))
 		goto done;
 
 	shutdown(cli_fd, SHUT_WR);
 	err = recv(accept_fd, buf, 1, 0);
-	CHECK(err, "recv(accept_fd) for fin", "err:%d errno:%d\n", err, errno);
+	ASSERT_OK(err, "recv(accept_fd) for fin");
 done:
 	if (accept_fd != -1)
 		close(accept_fd);
@@ -124,12 +117,11 @@ void test_tcpbpf_user(void)
 	int cg_fd = -1;
 
 	skel = test_tcpbpf_kern__open_and_load();
-	if (CHECK(!skel, "open and load skel", "failed"))
+	if (!ASSERT_OK_PTR(skel, "open and load skel"))
 		return;
 
 	cg_fd = test__join_cgroup(CG_NAME);
-	if (CHECK(cg_fd < 0, "test__join_cgroup(" CG_NAME ")",
-		  "cg_fd:%d errno:%d", cg_fd, errno))
+	if (!ASSERT_GE(cg_fd, 0, "test__join_cgroup(" CG_NAME ")"))
 		goto err;
 
 	skel->links.bpf_testcb = bpf_program__attach_cgroup(skel->progs.bpf_testcb, cg_fd);
-- 
cgit v1.2.3


From 1fddca3d36d1dc4a19a8060d20de1b77edfe63e0 Mon Sep 17 00:00:00 2001
From: Wang Yufen <wangyufen@huawei.com>
Date: Mon, 26 Sep 2022 13:12:11 +0800
Subject: selftests/bpf: Convert udp_limit test to ASSERT_* macros

Convert the selftest to use the preferred ASSERT_* macros instead of the
deprecated CHECK().

Signed-off-by: Wang Yufen <wangyufen@huawei.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/1664169131-32405-12-git-send-email-wangyufen@huawei.com
---
 tools/testing/selftests/bpf/prog_tests/udp_limit.c | 18 +++++++-----------
 1 file changed, 7 insertions(+), 11 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/udp_limit.c b/tools/testing/selftests/bpf/prog_tests/udp_limit.c
index 56c9d6bd38a3..2643d896ddae 100644
--- a/tools/testing/selftests/bpf/prog_tests/udp_limit.c
+++ b/tools/testing/selftests/bpf/prog_tests/udp_limit.c
@@ -5,8 +5,6 @@
 #include <sys/types.h>
 #include <sys/socket.h>
 
-static int duration;
-
 void test_udp_limit(void)
 {
 	struct udp_limit *skel;
@@ -14,11 +12,11 @@ void test_udp_limit(void)
 	int cgroup_fd;
 
 	cgroup_fd = test__join_cgroup("/udp_limit");
-	if (CHECK(cgroup_fd < 0, "cg-join", "errno %d", errno))
+	if (!ASSERT_GE(cgroup_fd, 0, "cg-join"))
 		return;
 
 	skel = udp_limit__open_and_load();
-	if (CHECK(!skel, "skel-load", "errno %d", errno))
+	if (!ASSERT_OK_PTR(skel, "skel-load"))
 		goto close_cgroup_fd;
 
 	skel->links.sock = bpf_program__attach_cgroup(skel->progs.sock, cgroup_fd);
@@ -32,11 +30,11 @@ void test_udp_limit(void)
 	 * verify that.
 	 */
 	fd1 = socket(AF_INET, SOCK_DGRAM, 0);
-	if (CHECK(fd1 < 0, "fd1", "errno %d", errno))
+	if (!ASSERT_GE(fd1, 0, "socket(fd1)"))
 		goto close_skeleton;
 
 	fd2 = socket(AF_INET, SOCK_DGRAM, 0);
-	if (CHECK(fd2 >= 0, "fd2", "errno %d", errno))
+	if (!ASSERT_LT(fd2, 0, "socket(fd2)"))
 		goto close_skeleton;
 
 	/* We can reopen again after close. */
@@ -44,7 +42,7 @@ void test_udp_limit(void)
 	fd1 = -1;
 
 	fd1 = socket(AF_INET, SOCK_DGRAM, 0);
-	if (CHECK(fd1 < 0, "fd1-again", "errno %d", errno))
+	if (!ASSERT_GE(fd1, 0, "socket(fd1-again)"))
 		goto close_skeleton;
 
 	/* Make sure the program was invoked the expected
@@ -54,13 +52,11 @@ void test_udp_limit(void)
 	 * - close fd1          - BPF_CGROUP_INET_SOCK_RELEASE
 	 * - open fd1 again     - BPF_CGROUP_INET_SOCK_CREATE
 	 */
-	if (CHECK(skel->bss->invocations != 4, "bss-invocations",
-		  "invocations=%d", skel->bss->invocations))
+	if (!ASSERT_EQ(skel->bss->invocations, 4, "bss-invocations"))
 		goto close_skeleton;
 
 	/* We should still have a single socket in use */
-	if (CHECK(skel->bss->in_use != 1, "bss-in_use",
-		  "in_use=%d", skel->bss->in_use))
+	if (!ASSERT_EQ(skel->bss->in_use, 1, "bss-in_use"))
 		goto close_skeleton;
 
 close_skeleton:
-- 
cgit v1.2.3


From 3411c5b6f8d6e08d98e606dcf74fc42e2f9d731f Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <martin.lau@kernel.org>
Date: Thu, 29 Sep 2022 00:04:07 -0700
Subject: selftests/bpf: Check -EBUSY for the recurred
 bpf_setsockopt(TCP_CONGESTION)

This patch changes the bpf_dctcp test to ensure the recurred
bpf_setsockopt(TCP_CONGESTION) returns -EBUSY.

Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
Link: https://lore.kernel.org/r/20220929070407.965581-6-martin.lau@linux.dev
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../testing/selftests/bpf/prog_tests/bpf_tcp_ca.c  |  4 ++++
 tools/testing/selftests/bpf/progs/bpf_dctcp.c      | 25 +++++++++++++++-------
 2 files changed, 21 insertions(+), 8 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
index 2959a52ced06..e980188d4124 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
@@ -290,6 +290,10 @@ static void test_dctcp_fallback(void)
 		goto done;
 	ASSERT_STREQ(dctcp_skel->bss->cc_res, "cubic", "cc_res");
 	ASSERT_EQ(dctcp_skel->bss->tcp_cdg_res, -ENOTSUPP, "tcp_cdg_res");
+	/* All setsockopt(TCP_CONGESTION) in the recurred
+	 * bpf_dctcp->init() should fail with -EBUSY.
+	 */
+	ASSERT_EQ(dctcp_skel->bss->ebusy_cnt, 3, "ebusy_cnt");
 
 	err = getsockopt(srv_fd, SOL_TCP, TCP_CONGESTION, srv_cc, &cc_len);
 	if (!ASSERT_OK(err, "getsockopt(srv_fd, TCP_CONGESTION)"))
diff --git a/tools/testing/selftests/bpf/progs/bpf_dctcp.c b/tools/testing/selftests/bpf/progs/bpf_dctcp.c
index 9573be6122be..460682759aed 100644
--- a/tools/testing/selftests/bpf/progs/bpf_dctcp.c
+++ b/tools/testing/selftests/bpf/progs/bpf_dctcp.c
@@ -11,6 +11,7 @@
 #include <linux/types.h>
 #include <linux/stddef.h>
 #include <linux/tcp.h>
+#include <errno.h>
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
 #include "bpf_tcp_helpers.h"
@@ -23,6 +24,7 @@ const char tcp_cdg[] = "cdg";
 char cc_res[TCP_CA_NAME_MAX];
 int tcp_cdg_res = 0;
 int stg_result = 0;
+int ebusy_cnt = 0;
 
 struct {
 	__uint(type, BPF_MAP_TYPE_SK_STORAGE);
@@ -64,16 +66,23 @@ void BPF_PROG(dctcp_init, struct sock *sk)
 
 	if (!(tp->ecn_flags & TCP_ECN_OK) && fallback[0]) {
 		/* Switch to fallback */
-		bpf_setsockopt(sk, SOL_TCP, TCP_CONGESTION,
-			       (void *)fallback, sizeof(fallback));
-		/* Switch back to myself which the bpf trampoline
-		 * stopped calling dctcp_init recursively.
+		if (bpf_setsockopt(sk, SOL_TCP, TCP_CONGESTION,
+				   (void *)fallback, sizeof(fallback)) == -EBUSY)
+			ebusy_cnt++;
+
+		/* Switch back to myself and the recurred dctcp_init()
+		 * will get -EBUSY for all bpf_setsockopt(TCP_CONGESTION),
+		 * except the last "cdg" one.
 		 */
-		bpf_setsockopt(sk, SOL_TCP, TCP_CONGESTION,
-			       (void *)bpf_dctcp, sizeof(bpf_dctcp));
+		if (bpf_setsockopt(sk, SOL_TCP, TCP_CONGESTION,
+				   (void *)bpf_dctcp, sizeof(bpf_dctcp)) == -EBUSY)
+			ebusy_cnt++;
+
 		/* Switch back to fallback */
-		bpf_setsockopt(sk, SOL_TCP, TCP_CONGESTION,
-			       (void *)fallback, sizeof(fallback));
+		if (bpf_setsockopt(sk, SOL_TCP, TCP_CONGESTION,
+				   (void *)fallback, sizeof(fallback)) == -EBUSY)
+			ebusy_cnt++;
+
 		/* Expecting -ENOTSUPP for tcp_cdg_res */
 		tcp_cdg_res = bpf_setsockopt(sk, SOL_TCP, TCP_CONGESTION,
 					     (void *)tcp_cdg, sizeof(tcp_cdg));
-- 
cgit v1.2.3


From d863f42930db35e82f47e4b4c78531a2b8d396ae Mon Sep 17 00:00:00 2001
From: Yuan Can <yuancan@huawei.com>
Date: Wed, 28 Sep 2022 09:04:39 +0000
Subject: bpftool: Remove unused struct btf_attach_point

After commit 2828d0d75b73 ("bpftool: Switch to libbpf's hashmap for
programs/maps in BTF listing"), struct btf_attach_point is not used
anymore and can be removed as well.

Signed-off-by: Yuan Can <yuancan@huawei.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220928090440.79637-2-yuancan@huawei.com
---
 tools/bpf/bpftool/btf.c | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
index 0744bd1150be..64411fe49a66 100644
--- a/tools/bpf/bpftool/btf.c
+++ b/tools/bpf/bpftool/btf.c
@@ -43,11 +43,6 @@ static const char * const btf_kind_str[NR_BTF_KINDS] = {
 	[BTF_KIND_ENUM64]	= "ENUM64",
 };
 
-struct btf_attach_point {
-	__u32 obj_id;
-	__u32 btf_id;
-};
-
 static const char *btf_int_enc_str(__u8 encoding)
 {
 	switch (encoding) {
-- 
cgit v1.2.3


From f95a479797dc2c65fdf2809a7c388e7a9e2bc853 Mon Sep 17 00:00:00 2001
From: Yuan Can <yuancan@huawei.com>
Date: Wed, 28 Sep 2022 09:04:40 +0000
Subject: bpftool: Remove unused struct event_ring_info

After commit 9b190f185d2f ("tools/bpftool: switch map event_pipe to
libbpf's perf_buffer"), struct event_ring_info is not used any more and
can be removed as well.

Signed-off-by: Yuan Can <yuancan@huawei.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220928090440.79637-3-yuancan@huawei.com
---
 tools/bpf/bpftool/map_perf_ring.c | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/map_perf_ring.c b/tools/bpf/bpftool/map_perf_ring.c
index 6b0c410152de..1583281d1327 100644
--- a/tools/bpf/bpftool/map_perf_ring.c
+++ b/tools/bpf/bpftool/map_perf_ring.c
@@ -29,13 +29,6 @@
 
 static volatile bool stop;
 
-struct event_ring_info {
-	int fd;
-	int key;
-	unsigned int cpu;
-	void *mem;
-};
-
 struct perf_event_sample {
 	struct perf_event_header header;
 	__u64 time;
-- 
cgit v1.2.3


From 2efcf695bfc0f078dd7d5d23d96a97db34c930d5 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.i.king@gmail.com>
Date: Wed, 28 Sep 2022 23:15:55 +0100
Subject: selftests/bpf: Fix spelling mistake "unpriviledged" -> "unprivileged"

There are a couple of spelling mistakes, one in a literal string and one
in a comment. Fix them.

Signed-off-by: Colin Ian King <colin.i.king@gmail.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20220928221555.67873-1-colin.i.king@gmail.com
---
 tools/testing/selftests/bpf/verifier/calls.c   | 2 +-
 tools/testing/selftests/bpf/verifier/var_off.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c
index 3fb4f69b1962..e1a937277b54 100644
--- a/tools/testing/selftests/bpf/verifier/calls.c
+++ b/tools/testing/selftests/bpf/verifier/calls.c
@@ -284,7 +284,7 @@
 	.result = ACCEPT,
 },
 {
-	"calls: not on unpriviledged",
+	"calls: not on unprivileged",
 	.insns = {
 	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
 	BPF_MOV64_IMM(BPF_REG_0, 1),
diff --git a/tools/testing/selftests/bpf/verifier/var_off.c b/tools/testing/selftests/bpf/verifier/var_off.c
index 187c6f6e32bc..d37f512fad16 100644
--- a/tools/testing/selftests/bpf/verifier/var_off.c
+++ b/tools/testing/selftests/bpf/verifier/var_off.c
@@ -121,7 +121,7 @@
 	BPF_EXIT_INSN(),
 	},
 	.fixup_map_hash_8b = { 1 },
-	/* The unpriviledged case is not too interesting; variable
+	/* The unprivileged case is not too interesting; variable
 	 * stack access is rejected.
 	 */
 	.errstr_unpriv = "R2 variable stack access prohibited for !root",
-- 
cgit v1.2.3


From 51e05a8cf8eb34da7473823b7f236a77adfef0b4 Mon Sep 17 00:00:00 2001
From: Xin Liu <liuxin350@huawei.com>
Date: Fri, 30 Sep 2022 17:07:08 +0800
Subject: libbpf: Fix overrun in netlink attribute iteration

I accidentally found that a change in commit 1045b03e07d8 ("netlink: fix
overrun in attribute iteration") was not synchronized to the function
`nla_ok` in tools/lib/bpf/nlattr.c, I think it is necessary to modify,
this patch will do it.

Signed-off-by: Xin Liu <liuxin350@huawei.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Link: https://lore.kernel.org/bpf/20220930090708.62394-1-liuxin350@huawei.com
---
 tools/lib/bpf/nlattr.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/lib/bpf/nlattr.c b/tools/lib/bpf/nlattr.c
index f57e77a6e40f..3900d052ed19 100644
--- a/tools/lib/bpf/nlattr.c
+++ b/tools/lib/bpf/nlattr.c
@@ -32,7 +32,7 @@ static struct nlattr *nla_next(const struct nlattr *nla, int *remaining)
 
 static int nla_ok(const struct nlattr *nla, int remaining)
 {
-	return remaining >= sizeof(*nla) &&
+	return remaining >= (int)sizeof(*nla) &&
 	       nla->nla_len >= sizeof(*nla) &&
 	       nla->nla_len <= remaining;
 }
-- 
cgit v1.2.3


From 3ca2fb497440a3c8294f9df0ce7b2c3c9a1c5875 Mon Sep 17 00:00:00 2001
From: Tianyi Liu <i.pear@outlook.com>
Date: Wed, 28 Sep 2022 16:09:32 +0800
Subject: bpftool: Fix error message of strerror

strerror() expects a positive errno, however variable err will never be
positive when an error occurs. This causes bpftool to output too many
"unknown error", even a simple "file not exist" error can not get an
accurate message.

This patch fixed all "strerror(err)" patterns in bpftool.
Specially in btf.c#L823, hashmap__append() is an internal function of
libbpf and will not change errno, so there's a little difference.
Some libbpf_get_error() calls are kept for return values.

Changes since v1: https://lore.kernel.org/bpf/SY4P282MB1084B61CD8671DFA395AA8579D539@SY4P282MB1084.AUSP282.PROD.OUTLOOK.COM/
Check directly for NULL values instead of calling libbpf_get_error().

Signed-off-by: Tianyi Liu <i.pear@outlook.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Reviewed-by: Quentin Monnet <quentin@isovalent.com>
Link: https://lore.kernel.org/bpf/SY4P282MB1084AD9CD84A920F08DF83E29D549@SY4P282MB1084.AUSP282.PROD.OUTLOOK.COM
---
 tools/bpf/bpftool/btf.c           | 11 +++++------
 tools/bpf/bpftool/gen.c           |  4 ++--
 tools/bpf/bpftool/map_perf_ring.c |  7 +++----
 3 files changed, 10 insertions(+), 12 deletions(-)

(limited to 'tools')

diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
index 64411fe49a66..68a70ac03c80 100644
--- a/tools/bpf/bpftool/btf.c
+++ b/tools/bpf/bpftool/btf.c
@@ -635,10 +635,9 @@ static int do_dump(int argc, char **argv)
 
 		btf = btf__parse_split(*argv, base ?: base_btf);
 		err = libbpf_get_error(btf);
-		if (err) {
-			btf = NULL;
+		if (!btf) {
 			p_err("failed to load BTF from %s: %s",
-			      *argv, strerror(err));
+			      *argv, strerror(errno));
 			goto done;
 		}
 		NEXT_ARG();
@@ -683,8 +682,8 @@ static int do_dump(int argc, char **argv)
 
 		btf = btf__load_from_kernel_by_id_split(btf_id, base_btf);
 		err = libbpf_get_error(btf);
-		if (err) {
-			p_err("get btf by id (%u): %s", btf_id, strerror(err));
+		if (!btf) {
+			p_err("get btf by id (%u): %s", btf_id, strerror(errno));
 			goto done;
 		}
 	}
@@ -820,7 +819,7 @@ build_btf_type_table(struct hashmap *tab, enum bpf_obj_type type,
 				      u32_as_hash_field(id));
 		if (err) {
 			p_err("failed to append entry to hashmap for BTF ID %u, object ID %u: %s",
-			      btf_id, id, strerror(errno));
+			      btf_id, id, strerror(-err));
 			goto err_free;
 		}
 	}
diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
index 7070dcffa822..cf8b4e525c88 100644
--- a/tools/bpf/bpftool/gen.c
+++ b/tools/bpf/bpftool/gen.c
@@ -1594,14 +1594,14 @@ static int do_object(int argc, char **argv)
 
 		err = bpf_linker__add_file(linker, file, NULL);
 		if (err) {
-			p_err("failed to link '%s': %s (%d)", file, strerror(err), err);
+			p_err("failed to link '%s': %s (%d)", file, strerror(errno), errno);
 			goto out;
 		}
 	}
 
 	err = bpf_linker__finalize(linker);
 	if (err) {
-		p_err("failed to finalize ELF file: %s (%d)", strerror(err), err);
+		p_err("failed to finalize ELF file: %s (%d)", strerror(errno), errno);
 		goto out;
 	}
 
diff --git a/tools/bpf/bpftool/map_perf_ring.c b/tools/bpf/bpftool/map_perf_ring.c
index 1583281d1327..21d7d447e1f3 100644
--- a/tools/bpf/bpftool/map_perf_ring.c
+++ b/tools/bpf/bpftool/map_perf_ring.c
@@ -188,10 +188,9 @@ int do_event_pipe(int argc, char **argv)
 	opts.map_keys = &ctx.idx;
 	pb = perf_buffer__new_raw(map_fd, MMAP_PAGE_CNT, &perf_attr,
 				  print_bpf_output, &ctx, &opts);
-	err = libbpf_get_error(pb);
-	if (err) {
+	if (!pb) {
 		p_err("failed to create perf buffer: %s (%d)",
-		      strerror(err), err);
+		      strerror(errno), errno);
 		goto err_close_map;
 	}
 
@@ -206,7 +205,7 @@ int do_event_pipe(int argc, char **argv)
 		err = perf_buffer__poll(pb, 200);
 		if (err < 0 && err != -EINTR) {
 			p_err("perf buffer polling failed: %s (%d)",
-			      strerror(err), err);
+			      strerror(errno), errno);
 			goto err_close_pb;
 		}
 	}
-- 
cgit v1.2.3


From 5f388bba7acbdb097a9e7ed932a39b40f7eb2acf Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Thu, 29 Sep 2022 11:01:33 +0200
Subject: selftests/xsk: Fix double free

Fix a double free at exit of the test suite.

Fixes: a693ff3ed561 ("selftests/xsk: Add support for executing tests on physical device")
Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Andrii Nakryiko <andrii@kernel.org>
Acked-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Link: https://lore.kernel.org/bpf/20220929090133.7869-1-magnus.karlsson@gmail.com
---
 tools/testing/selftests/bpf/xskxceiver.c | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c
index ef33309bbe49..d1a5f3218c34 100644
--- a/tools/testing/selftests/bpf/xskxceiver.c
+++ b/tools/testing/selftests/bpf/xskxceiver.c
@@ -1953,9 +1953,6 @@ int main(int argc, char **argv)
 
 	pkt_stream_delete(tx_pkt_stream_default);
 	pkt_stream_delete(rx_pkt_stream_default);
-	free(ifobj_rx->umem);
-	if (!ifobj_tx->shared_umem)
-		free(ifobj_tx->umem);
 	ifobject_delete(ifobj_tx);
 	ifobject_delete(ifobj_rx);
 
-- 
cgit v1.2.3


From f77a9f3cd1e669b83d575e1f4df69c0079704842 Mon Sep 17 00:00:00 2001
From: Zhengchao Shao <shaozhengchao@huawei.com>
Date: Thu, 29 Sep 2022 12:19:09 +0800
Subject: selftests/tc-testing: update qdisc/cls/action features in config

Since three patchsets "add tc-testing test cases", "refactor duplicate
codes in the tc cls walk function", and "refactor duplicate codes in the
qdisc class walk function" are merged to net-next tree, the list of
supported features needs to be updated in config file.

Signed-off-by: Zhengchao Shao <shaozhengchao@huawei.com>
Reviewed-by: Victor Nogueira <victor@mojatatu.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Link: https://lore.kernel.org/r/20220929041909.83913-1-shaozhengchao@huawei.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 tools/testing/selftests/tc-testing/config | 25 ++++++++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/tc-testing/config b/tools/testing/selftests/tc-testing/config
index 2b2c2a835757..4638c63a339f 100644
--- a/tools/testing/selftests/tc-testing/config
+++ b/tools/testing/selftests/tc-testing/config
@@ -13,15 +13,28 @@ CONFIG_NET_SCHED=y
 # Queueing/Scheduling
 #
 CONFIG_NET_SCH_ATM=m
+CONFIG_NET_SCH_CAKE=m
+CONFIG_NET_SCH_CBQ=m
+CONFIG_NET_SCH_CBS=m
 CONFIG_NET_SCH_CHOKE=m
 CONFIG_NET_SCH_CODEL=m
+CONFIG_NET_SCH_DRR=m
+CONFIG_NET_SCH_DSMARK=m
 CONFIG_NET_SCH_ETF=m
 CONFIG_NET_SCH_FQ=m
+CONFIG_NET_SCH_FQ_CODEL=m
 CONFIG_NET_SCH_GRED=m
+CONFIG_NET_SCH_HFSC=m
 CONFIG_NET_SCH_HHF=m
+CONFIG_NET_SCH_HTB=m
+CONFIG_NET_SCH_INGRESS=m
+CONFIG_NET_SCH_MQPRIO=m
+CONFIG_NET_SCH_MULTIQ=m
+CONFIG_NET_SCH_NETEM=m
+CONFIG_NET_SCH_PIE=m
 CONFIG_NET_SCH_PLUG=m
 CONFIG_NET_SCH_PRIO=m
-CONFIG_NET_SCH_INGRESS=m
+CONFIG_NET_SCH_QFQ=m
 CONFIG_NET_SCH_SFB=m
 CONFIG_NET_SCH_SFQ=m
 CONFIG_NET_SCH_SKBPRIO=m
@@ -37,6 +50,15 @@ CONFIG_NET_CLS_FW=m
 CONFIG_NET_CLS_U32=m
 CONFIG_CLS_U32_PERF=y
 CONFIG_CLS_U32_MARK=y
+CONFIG_NET_CLS_BASIC=m
+CONFIG_NET_CLS_BPF=m
+CONFIG_NET_CLS_CGROUP=m
+CONFIG_NET_CLS_FLOW=m
+CONFIG_NET_CLS_FLOWER=m
+CONFIG_NET_CLS_MATCHALL=m
+CONFIG_NET_CLS_ROUTE4=m
+CONFIG_NET_CLS_RSVP=m
+CONFIG_NET_CLS_TCINDEX=m
 CONFIG_NET_EMATCH=y
 CONFIG_NET_EMATCH_STACK=32
 CONFIG_NET_EMATCH_CMP=m
@@ -68,6 +90,7 @@ CONFIG_NET_ACT_IFE=m
 CONFIG_NET_ACT_TUNNEL_KEY=m
 CONFIG_NET_ACT_CT=m
 CONFIG_NET_ACT_MPLS=m
+CONFIG_NET_ACT_GATE=m
 CONFIG_NET_IFE_SKBMARK=m
 CONFIG_NET_IFE_SKBPRIO=m
 CONFIG_NET_IFE_SKBTCINDEX=m
-- 
cgit v1.2.3


From 6bf41020b72b1c5ab96acc309a135153abbe20c8 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Fri, 30 Sep 2022 08:59:33 -0700
Subject: selftests: mptcp: update and extend fastclose test-cases

After the previous patches, the MPTCP protocol can generate
fast-closes on both ends of the connection. Rework the relevant
test-case to carefully trigger the fast-close code-path on a
single end at the time, while ensuring than a predictable amount
of data is spooled on both ends.

Additionally add another test-cases for the passive socket
fast-close.

Reviewed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/mptcp/mptcp_connect.c | 65 ++++++++++++++--
 tools/testing/selftests/net/mptcp/mptcp_join.sh   | 90 ++++++++++++++++++-----
 2 files changed, 130 insertions(+), 25 deletions(-)

(limited to 'tools')

diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c
index 24d4e9cb617e..e54653ea2ed4 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c
@@ -72,6 +72,8 @@ static int cfg_wait;
 static uint32_t cfg_mark;
 static char *cfg_input;
 static int cfg_repeat = 1;
+static int cfg_truncate;
+static int cfg_rcv_trunc;
 
 struct cfg_cmsg_types {
 	unsigned int cmsg_enabled:1;
@@ -95,11 +97,15 @@ static struct cfg_sockopt_types cfg_sockopt_types;
 
 static void die_usage(void)
 {
-	fprintf(stderr, "Usage: mptcp_connect [-6] [-c cmsg] [-i file] [-I num] [-j] [-l] "
+	fprintf(stderr, "Usage: mptcp_connect [-6] [-c cmsg] [-f offset] [-i file] [-I num] [-j] [-l] "
 		"[-m mode] [-M mark] [-o option] [-p port] [-P mode] [-j] [-l] [-r num] "
 		"[-s MPTCP|TCP] [-S num] [-r num] [-t num] [-T num] [-u] [-w sec] connect_address\n");
 	fprintf(stderr, "\t-6 use ipv6\n");
 	fprintf(stderr, "\t-c cmsg -- test cmsg type <cmsg>\n");
+	fprintf(stderr, "\t-f offset -- stop the I/O after receiving and sending the specified amount "
+		"of bytes. If there are unread bytes in the receive queue, that will cause a MPTCP "
+		"fastclose at close/shutdown. If offset is negative, expect the peer to close before "
+		"all the local data as been sent, thus toleration errors on write and EPIPE signals\n");
 	fprintf(stderr, "\t-i file -- read the data to send from the given file instead of stdin");
 	fprintf(stderr, "\t-I num -- repeat the transfer 'num' times. In listen mode accepts num "
 		"incoming connections, in client mode, disconnect and reconnect to the server\n");
@@ -382,7 +388,7 @@ static size_t do_rnd_write(const int fd, char *buf, const size_t len)
 
 	bw = write(fd, buf, do_w);
 	if (bw < 0)
-		perror("write");
+		return bw;
 
 	/* let the join handshake complete, before going on */
 	if (cfg_join && first) {
@@ -571,7 +577,7 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd, bool *in_closed_after
 		.fd = peerfd,
 		.events = POLLIN | POLLOUT,
 	};
-	unsigned int woff = 0, wlen = 0;
+	unsigned int woff = 0, wlen = 0, total_wlen = 0, total_rlen = 0;
 	char wbuf[8192];
 
 	set_nonblock(peerfd, true);
@@ -597,7 +603,16 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd, bool *in_closed_after
 		}
 
 		if (fds.revents & POLLIN) {
-			len = do_rnd_read(peerfd, rbuf, sizeof(rbuf));
+			ssize_t rb = sizeof(rbuf);
+
+			/* limit the total amount of read data to the trunc value*/
+			if (cfg_truncate > 0) {
+				if (rb + total_rlen > cfg_truncate)
+					rb = cfg_truncate - total_rlen;
+				len = read(peerfd, rbuf, rb);
+			} else {
+				len = do_rnd_read(peerfd, rbuf, sizeof(rbuf));
+			}
 			if (len == 0) {
 				/* no more data to receive:
 				 * peer has closed its write side
@@ -612,10 +627,13 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd, bool *in_closed_after
 
 			/* Else, still have data to transmit */
 			} else if (len < 0) {
+				if (cfg_rcv_trunc)
+					return 0;
 				perror("read");
 				return 3;
 			}
 
+			total_rlen += len;
 			do_write(outfd, rbuf, len);
 		}
 
@@ -628,12 +646,21 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd, bool *in_closed_after
 			if (wlen > 0) {
 				ssize_t bw;
 
+				/* limit the total amount of written data to the trunc value */
+				if (cfg_truncate > 0 && wlen + total_wlen > cfg_truncate)
+					wlen = cfg_truncate - total_wlen;
+
 				bw = do_rnd_write(peerfd, wbuf + woff, wlen);
-				if (bw < 0)
+				if (bw < 0) {
+					if (cfg_rcv_trunc)
+						return 0;
+					perror("write");
 					return 111;
+				}
 
 				woff += bw;
 				wlen -= bw;
+				total_wlen += bw;
 			} else if (wlen == 0) {
 				/* We have no more data to send. */
 				fds.events &= ~POLLOUT;
@@ -652,10 +679,16 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd, bool *in_closed_after
 		}
 
 		if (fds.revents & (POLLERR | POLLNVAL)) {
+			if (cfg_rcv_trunc)
+				return 0;
 			fprintf(stderr, "Unexpected revents: "
 				"POLLERR/POLLNVAL(%x)\n", fds.revents);
 			return 5;
 		}
+
+		if (cfg_truncate > 0 && total_wlen >= cfg_truncate &&
+		    total_rlen >= cfg_truncate)
+			break;
 	}
 
 	/* leave some time for late join/announce */
@@ -1160,11 +1193,13 @@ again:
 	}
 
 	/* close the client socket open only if we are not going to reconnect */
-	ret = copyfd_io(fd_in, fd, 1, cfg_repeat == 1);
+	ret = copyfd_io(fd_in, fd, 1, 0);
 	if (ret)
 		return ret;
 
-	if (--cfg_repeat > 0) {
+	if (cfg_truncate > 0) {
+		xdisconnect(fd, peer->ai_addrlen);
+	} else if (--cfg_repeat > 0) {
 		xdisconnect(fd, peer->ai_addrlen);
 
 		/* the socket could be unblocking at this point, we need the
@@ -1176,7 +1211,10 @@ again:
 		if (cfg_input)
 			close(fd_in);
 		goto again;
+	} else {
+		close(fd);
 	}
+
 	return 0;
 }
 
@@ -1262,8 +1300,19 @@ static void parse_opts(int argc, char **argv)
 {
 	int c;
 
-	while ((c = getopt(argc, argv, "6c:hi:I:jlm:M:o:p:P:r:R:s:S:t:T:w:")) != -1) {
+	while ((c = getopt(argc, argv, "6c:f:hi:I:jlm:M:o:p:P:r:R:s:S:t:T:w:")) != -1) {
 		switch (c) {
+		case 'f':
+			cfg_truncate = atoi(optarg);
+
+			/* when receiving a fastclose, ignore PIPE signals and
+			 * all the I/O errors later in the code
+			 */
+			if (cfg_truncate < 0) {
+				cfg_rcv_trunc = true;
+				signal(SIGPIPE, handle_signal);
+			}
+			break;
 		case 'j':
 			cfg_join = true;
 			cfg_mode = CFG_MODE_POLL;
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 2957fe414639..f3dd5f2a0272 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -346,10 +346,21 @@ check_transfer()
 	local in=$1
 	local out=$2
 	local what=$3
+	local bytes=$4
 	local i a b
 
 	local line
-	cmp -l "$in" "$out" | while read -r i a b; do
+	if [ -n "$bytes" ]; then
+		# when truncating we must check the size explicitly
+		local out_size=$(wc -c $out | awk '{print $1}')
+		if [ $out_size -ne $bytes ]; then
+			echo "[ FAIL ] $what output file has wrong size ($out_size, $bytes)"
+			fail_test
+			return 1
+		fi
+		bytes="--bytes=${bytes}"
+	fi
+	cmp -l "$in" "$out" ${bytes} | while read -r i a b; do
 		local sum=$((0${a} + 0${b}))
 		if [ $check_invert -eq 0 ] || [ $sum -ne $((0xff)) ]; then
 			echo "[ FAIL ] $what does not match (in, out):"
@@ -707,9 +718,31 @@ do_transfer()
 	fi
 
 	local flags="subflow"
+	local extra_cl_args=""
+	local extra_srv_args=""
+	local trunc_size=""
 	if [[ "${addr_nr_ns2}" = "fastclose_"* ]]; then
+		if [ ${test_link_fail} -le 1 ]; then
+			echo "fastclose tests need test_link_fail argument"
+			fail_test
+			return 1
+		fi
+
 		# disconnect
-		extra_args="$extra_args -I ${addr_nr_ns2:10}"
+		trunc_size=${test_link_fail}
+		local side=${addr_nr_ns2:10}
+
+		if [ ${side} = "client" ]; then
+			extra_cl_args="-f ${test_link_fail}"
+			extra_srv_args="-f -1"
+		elif [ ${side} = "server" ]; then
+			extra_srv_args="-f ${test_link_fail}"
+			extra_cl_args="-f -1"
+		else
+			echo "wrong/unknown fastclose spec ${side}"
+			fail_test
+			return 1
+		fi
 		addr_nr_ns2=0
 	elif [[ "${addr_nr_ns2}" = "userspace_"* ]]; then
 		userspace_pm=1
@@ -737,39 +770,41 @@ do_transfer()
 		local_addr="0.0.0.0"
 	fi
 
+	extra_srv_args="$extra_args $extra_srv_args"
 	if [ "$test_link_fail" -gt 1 ];then
 		timeout ${timeout_test} \
 			ip netns exec ${listener_ns} \
 				./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \
-					$extra_args ${local_addr} < "$sinfail" > "$sout" &
+					$extra_srv_args ${local_addr} < "$sinfail" > "$sout" &
 	else
 		timeout ${timeout_test} \
 			ip netns exec ${listener_ns} \
 				./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \
-					$extra_args ${local_addr} < "$sin" > "$sout" &
+					$extra_srv_args ${local_addr} < "$sin" > "$sout" &
 	fi
 	local spid=$!
 
 	wait_local_port_listen "${listener_ns}" "${port}"
 
+	extra_cl_args="$extra_args $extra_cl_args"
 	if [ "$test_link_fail" -eq 0 ];then
 		timeout ${timeout_test} \
 			ip netns exec ${connector_ns} \
 				./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
-					$extra_args $connect_addr < "$cin" > "$cout" &
+					$extra_cl_args $connect_addr < "$cin" > "$cout" &
 	elif [ "$test_link_fail" -eq 1 ] || [ "$test_link_fail" -eq 2 ];then
 		( cat "$cinfail" ; sleep 2; link_failure $listener_ns ; cat "$cinfail" ) | \
 			tee "$cinsent" | \
 			timeout ${timeout_test} \
 				ip netns exec ${connector_ns} \
 					./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
-						$extra_args $connect_addr > "$cout" &
+						$extra_cl_args $connect_addr > "$cout" &
 	else
 		tee "$cinsent" < "$cinfail" | \
 			timeout ${timeout_test} \
 				ip netns exec ${connector_ns} \
 					./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
-						$extra_args $connect_addr > "$cout" &
+						$extra_cl_args $connect_addr > "$cout" &
 	fi
 	local cpid=$!
 
@@ -971,15 +1006,15 @@ do_transfer()
 	fi
 
 	if [ "$test_link_fail" -gt 1 ];then
-		check_transfer $sinfail $cout "file received by client"
+		check_transfer $sinfail $cout "file received by client" $trunc_size
 	else
-		check_transfer $sin $cout "file received by client"
+		check_transfer $sin $cout "file received by client" $trunc_size
 	fi
 	retc=$?
 	if [ "$test_link_fail" -eq 0 ];then
-		check_transfer $cin $sout "file received by server"
+		check_transfer $cin $sout "file received by server" $trunc_size
 	else
-		check_transfer $cinsent $sout "file received by server"
+		check_transfer $cinsent $sout "file received by server" $trunc_size
 	fi
 	rets=$?
 
@@ -1188,12 +1223,23 @@ chk_fclose_nr()
 {
 	local fclose_tx=$1
 	local fclose_rx=$2
+	local ns_invert=$3
 	local count
 	local dump_stats
+	local ns_tx=$ns2
+	local ns_rx=$ns1
+	local extra_msg="   "
+
+	if [[ $ns_invert = "invert" ]]; then
+		ns_tx=$ns1
+		ns_rx=$ns2
+		extra_msg=${extra_msg}"invert"
+	fi
 
 	printf "%-${nr_blank}s %s" " " "ctx"
-	count=$(ip netns exec $ns2 nstat -as | grep MPTcpExtMPFastcloseTx | awk '{print $2}')
+	count=$(ip netns exec $ns_tx nstat -as | grep MPTcpExtMPFastcloseTx | awk '{print $2}')
 	[ -z "$count" ] && count=0
+	[ "$count" != "$fclose_tx" ] && extra_msg="$extra_msg,tx=$count"
 	if [ "$count" != "$fclose_tx" ]; then
 		echo "[fail] got $count MP_FASTCLOSE[s] TX expected $fclose_tx"
 		fail_test
@@ -1203,17 +1249,20 @@ chk_fclose_nr()
 	fi
 
 	echo -n " - fclzrx"
-	count=$(ip netns exec $ns1 nstat -as | grep MPTcpExtMPFastcloseRx | awk '{print $2}')
+	count=$(ip netns exec $ns_rx nstat -as | grep MPTcpExtMPFastcloseRx | awk '{print $2}')
 	[ -z "$count" ] && count=0
+	[ "$count" != "$fclose_rx" ] && extra_msg="$extra_msg,rx=$count"
 	if [ "$count" != "$fclose_rx" ]; then
 		echo "[fail] got $count MP_FASTCLOSE[s] RX expected $fclose_rx"
 		fail_test
 		dump_stats=1
 	else
-		echo "[ ok ]"
+		echo -n "[ ok ]"
 	fi
 
 	[ "${dump_stats}" = 1 ] && dump_stats
+
+	echo "$extra_msg"
 }
 
 chk_rst_nr()
@@ -1236,7 +1285,7 @@ chk_rst_nr()
 	printf "%-${nr_blank}s %s" " " "rtx"
 	count=$(ip netns exec $ns_tx nstat -as | grep MPTcpExtMPRstTx | awk '{print $2}')
 	[ -z "$count" ] && count=0
-	if [ "$count" != "$rst_tx" ]; then
+	if [ $count -lt $rst_tx ]; then
 		echo "[fail] got $count MP_RST[s] TX expected $rst_tx"
 		fail_test
 		dump_stats=1
@@ -1247,7 +1296,7 @@ chk_rst_nr()
 	echo -n " - rstrx "
 	count=$(ip netns exec $ns_rx nstat -as | grep MPTcpExtMPRstRx | awk '{print $2}')
 	[ -z "$count" ] && count=0
-	if [ "$count" != "$rst_rx" ]; then
+	if [ "$count" -lt "$rst_rx" ]; then
 		echo "[fail] got $count MP_RST[s] RX expected $rst_rx"
 		fail_test
 		dump_stats=1
@@ -2801,11 +2850,18 @@ fullmesh_tests()
 fastclose_tests()
 {
 	if reset "fastclose test"; then
-		run_tests $ns1 $ns2 10.0.1.1 1024 0 fastclose_2
+		run_tests $ns1 $ns2 10.0.1.1 1024 0 fastclose_client
 		chk_join_nr 0 0 0
 		chk_fclose_nr 1 1
 		chk_rst_nr 1 1 invert
 	fi
+
+	if reset "fastclose server test"; then
+		run_tests $ns1 $ns2 10.0.1.1 1024 0 fastclose_server
+		chk_join_nr 0 0 0
+		chk_fclose_nr 1 1 invert
+		chk_rst_nr 1 1
+	fi
 }
 
 pedit_action_pkts()
-- 
cgit v1.2.3