378 files changed, 13992 insertions, 2317 deletions
diff --git a/tools/testing/selftests/arm64/fp/Makefile b/tools/testing/selftests/arm64/fp/Makefile
index f2abdd6ba12e..ba1488c7c315 100644
--- a/tools/testing/selftests/arm64/fp/Makefile
+++ b/tools/testing/selftests/arm64/fp/Makefile
@@ -9,12 +9,12 @@ TEST_PROGS_EXTENDED := fpsimd-test fpsimd-stress \
 
 all: $(TEST_GEN_PROGS) $(TEST_PROGS_EXTENDED)
 
-fpsimd-test: fpsimd-test.o
+fpsimd-test: fpsimd-test.o asm-utils.o
 	$(CC) -nostdlib $^ -o $@
 rdvl-sve: rdvl-sve.o rdvl.o
-sve-ptrace: sve-ptrace.o sve-ptrace-asm.o
+sve-ptrace: sve-ptrace.o
 sve-probe-vls: sve-probe-vls.o rdvl.o
-sve-test: sve-test.o
+sve-test: sve-test.o asm-utils.o
 	$(CC) -nostdlib $^ -o $@
 vec-syscfg: vec-syscfg.o rdvl.o
 vlset: vlset.o
diff --git a/tools/testing/selftests/arm64/fp/TODO b/tools/testing/selftests/arm64/fp/TODO
index b6b7ebfcf362..44004e53da33 100644
--- a/tools/testing/selftests/arm64/fp/TODO
+++ b/tools/testing/selftests/arm64/fp/TODO
@@ -1,4 +1,7 @@
 - Test unsupported values in the ABIs.
-- More coverage for ptrace (eg, vector length conversions).
-- Coverage for signals.
-- Test PR_SVE_VL_INHERITY after a double fork.
+- More coverage for ptrace:
+ - Get/set of FFR.
+ - Ensure ptraced processes actually see the register state visible through
+   the ptrace interface.
+ - Big endian.
+- Test PR_SVE_VL_INHERIT after a double fork.
diff --git a/tools/testing/selftests/arm64/fp/asm-utils.S b/tools/testing/selftests/arm64/fp/asm-utils.S
new file mode 100644
index 000000000000..4b9728efc18d
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/asm-utils.S
@@ -0,0 +1,172 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2015-2021 ARM Limited.
+// Original author: Dave Martin <Dave.Martin@arm.com>
+//
+// Utility functions for assembly code.
+
+#include <asm/unistd.h>
+#include "assembler.h"
+
+// Print a single character x0 to stdout
+// Clobbers x0-x2,x8
+function putc
+	str	x0, [sp, #-16]!
+
+	mov	x0, #1			// STDOUT_FILENO
+	mov	x1, sp
+	mov	x2, #1
+	mov	x8, #__NR_write
+	svc	#0
+
+	add	sp, sp, #16
+	ret
+endfunction
+.globl	putc
+	
+// Print a NUL-terminated string starting at address x0 to stdout
+// Clobbers x0-x3,x8
+function puts
+	mov	x1, x0
+
+	mov	x2, #0
+0:	ldrb	w3, [x0], #1
+	cbz	w3, 1f
+	add	x2, x2, #1
+	b	0b
+
+1:	mov	w0, #1			// STDOUT_FILENO
+	mov	x8, #__NR_write
+	svc	#0
+
+	ret
+endfunction
+.globl	puts
+
+// Print an unsigned decimal number x0 to stdout
+// Clobbers x0-x4,x8
+function putdec
+	mov	x1, sp
+	str	x30, [sp, #-32]!	// Result can't be > 20 digits
+
+	mov	x2, #0
+	strb	w2, [x1, #-1]!		// Write the NUL terminator
+
+	mov	x2, #10
+0:	udiv	x3, x0, x2		// div-mod loop to generate the digits
+	msub	x0, x3, x2, x0
+	add	w0, w0, #'0'
+	strb	w0, [x1, #-1]!
+	mov	x0, x3
+	cbnz	x3, 0b
+
+	ldrb	w0, [x1]
+	cbnz	w0, 1f
+	mov	w0, #'0'		// Print "0" for 0, not ""
+	strb	w0, [x1, #-1]!
+
+1:	mov	x0, x1
+	bl	puts
+
+	ldr	x30, [sp], #32
+	ret
+endfunction
+.globl	putdec
+
+// Print an unsigned decimal number x0 to stdout, followed by a newline
+// Clobbers x0-x5,x8
+function putdecn
+	mov	x5, x30
+
+	bl	putdec
+	mov	x0, #'\n'
+	bl	putc
+
+	ret	x5
+endfunction
+.globl	putdecn
+
+// Clobbers x0-x3,x8
+function puthexb
+	str	x30, [sp, #-0x10]!
+
+	mov	w3, w0
+	lsr	w0, w0, #4
+	bl	puthexnibble
+	mov	w0, w3
+
+	ldr	x30, [sp], #0x10
+	// fall through to puthexnibble
+endfunction
+.globl	puthexb
+
+// Clobbers x0-x2,x8
+function puthexnibble
+	and	w0, w0, #0xf
+	cmp	w0, #10
+	blo	1f
+	add	w0, w0, #'a' - ('9' + 1)
+1:	add	w0, w0, #'0'
+	b	putc
+endfunction
+.globl	puthexnibble
+
+// x0=data in, x1=size in, clobbers x0-x5,x8
+function dumphex
+	str	x30, [sp, #-0x10]!
+
+	mov	x4, x0
+	mov	x5, x1
+
+0:	subs	x5, x5, #1
+	b.lo	1f
+	ldrb	w0, [x4], #1
+	bl	puthexb
+	b	0b
+
+1:	ldr	x30, [sp], #0x10
+	ret
+endfunction
+.globl	dumphex
+
+	// Trivial memory copy: copy x2 bytes, starting at address x1, to address x0.
+// Clobbers x0-x3
+function memcpy
+	cmp	x2, #0
+	b.eq	1f
+0:	ldrb	w3, [x1], #1
+	strb	w3, [x0], #1
+	subs	x2, x2, #1
+	b.ne	0b
+1:	ret
+endfunction
+.globl	memcpy
+
+// Fill x1 bytes starting at x0 with 0xae (for canary purposes)
+// Clobbers x1, x2.
+function memfill_ae
+	mov	w2, #0xae
+	b	memfill
+endfunction
+.globl	memfill_ae
+	
+// Fill x1 bytes starting at x0 with 0.
+// Clobbers x1, x2.
+function memclr
+	mov	w2, #0
+endfunction
+.globl	memclr
+	// fall through to memfill
+
+// Trivial memory fill: fill x1 bytes starting at address x0 with byte w2
+// Clobbers x1
+function memfill
+	cmp	x1, #0
+	b.eq	1f
+
+0:	strb	w2, [x0], #1
+	subs	x1, x1, #1
+	b.ne	0b
+
+1:	ret
+endfunction
+.globl	memfill
diff --git a/tools/testing/selftests/arm64/fp/assembler.h b/tools/testing/selftests/arm64/fp/assembler.h
index 8944f2189206..90bd433d2665 100644
--- a/tools/testing/selftests/arm64/fp/assembler.h
+++ b/tools/testing/selftests/arm64/fp/assembler.h
@@ -54,4 +54,15 @@ endfunction
 	.purgem \name\()_entry
 .endm
 
+// Utility macro to print a literal string
+// Clobbers x0-x4,x8
+.macro puts string
+	.pushsection .rodata.str1.1, "aMS", 1
+.L__puts_literal\@: .string "\string"
+	.popsection
+
+	ldr	x0, =.L__puts_literal\@
+	bl	puts
+.endm
+
 #endif /* ! ASSEMBLER_H */
diff --git a/tools/testing/selftests/arm64/fp/fpsimd-test.S b/tools/testing/selftests/arm64/fp/fpsimd-test.S
index 0dbd594c2747..e21e8ea52c7e 100644
--- a/tools/testing/selftests/arm64/fp/fpsimd-test.S
+++ b/tools/testing/selftests/arm64/fp/fpsimd-test.S
@@ -33,131 +33,6 @@
 define_accessor setv, NVR, _vldr
 define_accessor getv, NVR, _vstr
 
-// Print a single character x0 to stdout
-// Clobbers x0-x2,x8
-function putc
-	str	x0, [sp, #-16]!
-
-	mov	x0, #1			// STDOUT_FILENO
-	mov	x1, sp
-	mov	x2, #1
-	mov	x8, #__NR_write
-	svc	#0
-
-	add	sp, sp, #16
-	ret
-endfunction
-
-// Print a NUL-terminated string starting at address x0 to stdout
-// Clobbers x0-x3,x8
-function puts
-	mov	x1, x0
-
-	mov	x2, #0
-0:	ldrb	w3, [x0], #1
-	cbz	w3, 1f
-	add	x2, x2, #1
-	b	0b
-
-1:	mov	w0, #1			// STDOUT_FILENO
-	mov	x8, #__NR_write
-	svc	#0
-
-	ret
-endfunction
-
-// Utility macro to print a literal string
-// Clobbers x0-x4,x8
-.macro puts string
-	.pushsection .rodata.str1.1, "aMS", 1
-.L__puts_literal\@: .string "\string"
-	.popsection
-
-	ldr	x0, =.L__puts_literal\@
-	bl	puts
-.endm
-
-// Print an unsigned decimal number x0 to stdout
-// Clobbers x0-x4,x8
-function putdec
-	mov	x1, sp
-	str	x30, [sp, #-32]!	// Result can't be > 20 digits
-
-	mov	x2, #0
-	strb	w2, [x1, #-1]!		// Write the NUL terminator
-
-	mov	x2, #10
-0:	udiv	x3, x0, x2		// div-mod loop to generate the digits
-	msub	x0, x3, x2, x0
-	add	w0, w0, #'0'
-	strb	w0, [x1, #-1]!
-	mov	x0, x3
-	cbnz	x3, 0b
-
-	ldrb	w0, [x1]
-	cbnz	w0, 1f
-	mov	w0, #'0'		// Print "0" for 0, not ""
-	strb	w0, [x1, #-1]!
-
-1:	mov	x0, x1
-	bl	puts
-
-	ldr	x30, [sp], #32
-	ret
-endfunction
-
-// Print an unsigned decimal number x0 to stdout, followed by a newline
-// Clobbers x0-x5,x8
-function putdecn
-	mov	x5, x30
-
-	bl	putdec
-	mov	x0, #'\n'
-	bl	putc
-
-	ret	x5
-endfunction
-
-
-// Clobbers x0-x3,x8
-function puthexb
-	str	x30, [sp, #-0x10]!
-
-	mov	w3, w0
-	lsr	w0, w0, #4
-	bl	puthexnibble
-	mov	w0, w3
-
-	ldr	x30, [sp], #0x10
-	// fall through to puthexnibble
-endfunction
-// Clobbers x0-x2,x8
-function puthexnibble
-	and	w0, w0, #0xf
-	cmp	w0, #10
-	blo	1f
-	add	w0, w0, #'a' - ('9' + 1)
-1:	add	w0, w0, #'0'
-	b	putc
-endfunction
-
-// x0=data in, x1=size in, clobbers x0-x5,x8
-function dumphex
-	str	x30, [sp, #-0x10]!
-
-	mov	x4, x0
-	mov	x5, x1
-
-0:	subs	x5, x5, #1
-	b.lo	1f
-	ldrb	w0, [x4], #1
-	bl	puthexb
-	b	0b
-
-1:	ldr	x30, [sp], #0x10
-	ret
-endfunction
-
 // Declare some storate space to shadow the SVE register contents:
 .pushsection .text
 .data
@@ -168,18 +43,6 @@ scratch:
 	.space	MAXVL_B
 .popsection
 
-// Trivial memory copy: copy x2 bytes, starting at address x1, to address x0.
-// Clobbers x0-x3
-function memcpy
-	cmp	x2, #0
-	b.eq	1f
-0:	ldrb	w3, [x1], #1
-	strb	w3, [x0], #1
-	subs	x2, x2, #1
-	b.ne	0b
-1:	ret
-endfunction
-
 // Generate a test pattern for storage in SVE registers
 // x0: pid	(16 bits)
 // x1: register number (6 bits)
@@ -227,33 +90,6 @@ function setup_vreg
 	ret	x4
 endfunction
 
-// Fill x1 bytes starting at x0 with 0xae (for canary purposes)
-// Clobbers x1, x2.
-function memfill_ae
-	mov	w2, #0xae
-	b	memfill
-endfunction
-
-// Fill x1 bytes starting at x0 with 0.
-// Clobbers x1, x2.
-function memclr
-	mov	w2, #0
-endfunction
-	// fall through to memfill
-
-// Trivial memory fill: fill x1 bytes starting at address x0 with byte w2
-// Clobbers x1
-function memfill
-	cmp	x1, #0
-	b.eq	1f
-
-0:	strb	w2, [x0], #1
-	subs	x1, x1, #1
-	b.ne	0b
-
-1:	ret
-endfunction
-
 // Trivial memory compare: compare x2 bytes starting at address x0 with
 // bytes starting at address x1.
 // Returns only if all bytes match; otherwise, the program is aborted.
diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace-asm.S b/tools/testing/selftests/arm64/fp/sve-ptrace-asm.S
deleted file mode 100644
index 3e81f9fab574..000000000000
--- a/tools/testing/selftests/arm64/fp/sve-ptrace-asm.S
+++ /dev/null
@@ -1,33 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-// Copyright (C) 2015-2019 ARM Limited.
-// Original author: Dave Martin <Dave.Martin@arm.com>
-#include <asm/unistd.h>
-
-.arch_extension sve
-
-.globl sve_store_patterns
-
-sve_store_patterns:
-	mov	x1, x0
-
-	index	z0.b, #0, #1
-	str	q0, [x1]
-
-	mov	w8, #__NR_getpid
-	svc	#0
-	str	q0, [x1, #0x10]
-
-	mov	z1.d, z0.d
-	str	q0, [x1, #0x20]
-
-	mov	w8, #__NR_getpid
-	svc	#0
-	str	q0, [x1, #0x30]
-
-	mov	z1.d, z0.d
-	str	q0, [x1, #0x40]
-
-	ret
-
-.size	sve_store_patterns, . - sve_store_patterns
-.type	sve_store_patterns, @function
diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c
index 612d3899614a..c4417bc48d4f 100644
--- a/tools/testing/selftests/arm64/fp/sve-ptrace.c
+++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c
@@ -1,15 +1,17 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * Copyright (C) 2015-2020 ARM Limited.
+ * Copyright (C) 2015-2021 ARM Limited.
  * Original author: Dave Martin <Dave.Martin@arm.com>
  */
 #include <errno.h>
+#include <stdbool.h>
 #include <stddef.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <unistd.h>
 #include <sys/auxv.h>
+#include <sys/prctl.h>
 #include <sys/ptrace.h>
 #include <sys/types.h>
 #include <sys/uio.h>
@@ -19,40 +21,22 @@
 
 #include "../../kselftest.h"
 
+#define VL_TESTS (((SVE_VQ_MAX - SVE_VQ_MIN) + 1) * 3)
+#define FPSIMD_TESTS 5
+
+#define EXPECTED_TESTS (VL_TESTS + FPSIMD_TESTS)
+
 /* <linux/elf.h> and <sys/auxv.h> don't like each other, so: */
 #ifndef NT_ARM_SVE
 #define NT_ARM_SVE 0x405
 #endif
 
-/* Number of registers filled in by sve_store_patterns */
-#define NR_VREGS 5
-
-void sve_store_patterns(__uint128_t v[NR_VREGS]);
-
-static void dump(const void *buf, size_t size)
-{
-	size_t i;
-	const unsigned char *p = buf;
-
-	for (i = 0; i < size; ++i)
-		printf(" %.2x", *p++);
-}
-
-static int check_vregs(const __uint128_t vregs[NR_VREGS])
+static void fill_buf(char *buf, size_t size)
 {
 	int i;
-	int ok = 1;
-
-	for (i = 0; i < NR_VREGS; ++i) {
-		printf("# v[%d]:", i);
-		dump(&vregs[i], sizeof vregs[i]);
-		putchar('\n');
-
-		if (vregs[i] != vregs[0])
-			ok = 0;
-	}
 
-	return ok;
+	for (i = 0; i < size; i++)
+		buf[i] = random();
 }
 
 static int do_child(void)
@@ -66,6 +50,15 @@ static int do_child(void)
 	return EXIT_SUCCESS;
 }
 
+static int get_fpsimd(pid_t pid, struct user_fpsimd_state *fpsimd)
+{
+	struct iovec iov;
+
+	iov.iov_base = fpsimd;
+	iov.iov_len = sizeof(*fpsimd);
+	return ptrace(PTRACE_GETREGSET, pid, NT_PRFPREG, &iov);
+}
+
 static struct user_sve_header *get_sve(pid_t pid, void **buf, size_t *size)
 {
 	struct user_sve_header *sve;
@@ -112,25 +105,335 @@ static int set_sve(pid_t pid, const struct user_sve_header *sve)
 	return ptrace(PTRACE_SETREGSET, pid, NT_ARM_SVE, &iov);
 }
 
-static void dump_sve_regs(const struct user_sve_header *sve, unsigned int num,
-			  unsigned int vlmax)
+/* Validate setting and getting the inherit flag */
+static void ptrace_set_get_inherit(pid_t child)
+{
+	struct user_sve_header sve;
+	struct user_sve_header *new_sve = NULL;
+	size_t new_sve_size = 0;
+	int ret;
+
+	/* First set the flag */
+	memset(&sve, 0, sizeof(sve));
+	sve.size = sizeof(sve);
+	sve.vl = sve_vl_from_vq(SVE_VQ_MIN);
+	sve.flags = SVE_PT_VL_INHERIT;
+	ret = set_sve(child, &sve);
+	if (ret != 0) {
+		ksft_test_result_fail("Failed to set SVE_PT_VL_INHERIT\n");
+		return;
+	}
+
+	/*
+	 * Read back the new register state and verify that we have
+	 * set the flags we expected.
+	 */
+	if (!get_sve(child, (void **)&new_sve, &new_sve_size)) {
+		ksft_test_result_fail("Failed to read SVE flags\n");
+		return;
+	}
+
+	ksft_test_result(new_sve->flags & SVE_PT_VL_INHERIT,
+			 "SVE_PT_VL_INHERIT set\n");
+
+	/* Now clear */
+	sve.flags &= ~SVE_PT_VL_INHERIT;
+	ret = set_sve(child, &sve);
+	if (ret != 0) {
+		ksft_test_result_fail("Failed to clear SVE_PT_VL_INHERIT\n");
+		return;
+	}
+
+	if (!get_sve(child, (void **)&new_sve, &new_sve_size)) {
+		ksft_test_result_fail("Failed to read SVE flags\n");
+		return;
+	}
+
+	ksft_test_result(!(new_sve->flags & SVE_PT_VL_INHERIT),
+			 "SVE_PT_VL_INHERIT cleared\n");
+
+	free(new_sve);
+}
+
+/* Validate attempting to set the specfied VL via ptrace */
+static void ptrace_set_get_vl(pid_t child, unsigned int vl, bool *supported)
+{
+	struct user_sve_header sve;
+	struct user_sve_header *new_sve = NULL;
+	size_t new_sve_size = 0;
+	int ret, prctl_vl;
+
+	*supported = false;
+
+	/* Check if the VL is supported in this process */
+	prctl_vl = prctl(PR_SVE_SET_VL, vl);
+	if (prctl_vl == -1)
+		ksft_exit_fail_msg("prctl(PR_SVE_SET_VL) failed: %s (%d)\n",
+				   strerror(errno), errno);
+
+	/* If the VL is not supported then a supported VL will be returned */
+	*supported = (prctl_vl == vl);
+
+	/* Set the VL by doing a set with no register payload */
+	memset(&sve, 0, sizeof(sve));
+	sve.size = sizeof(sve);
+	sve.vl = vl;
+	ret = set_sve(child, &sve);
+	if (ret != 0) {
+		ksft_test_result_fail("Failed to set VL %u\n", vl);
+		return;
+	}
+
+	/*
+	 * Read back the new register state and verify that we have the
+	 * same VL that we got from prctl() on ourselves.
+	 */
+	if (!get_sve(child, (void **)&new_sve, &new_sve_size)) {
+		ksft_test_result_fail("Failed to read VL %u\n", vl);
+		return;
+	}
+
+	ksft_test_result(new_sve->vl = prctl_vl, "Set VL %u\n", vl);
+
+	free(new_sve);
+}
+
+static void check_u32(unsigned int vl, const char *reg,
+		      uint32_t *in, uint32_t *out, int *errors)
+{
+	if (*in != *out) {
+		printf("# VL %d %s wrote %x read %x\n",
+		       vl, reg, *in, *out);
+		(*errors)++;
+	}
+}
+
+/* Access the FPSIMD registers via the SVE regset */
+static void ptrace_sve_fpsimd(pid_t child)
 {
-	unsigned int vq;
-	unsigned int i;
+	void *svebuf = NULL;
+	size_t svebufsz = 0;
+	struct user_sve_header *sve;
+	struct user_fpsimd_state *fpsimd, new_fpsimd;
+	unsigned int i, j;
+	unsigned char *p;
+
+	/* New process should start with FPSIMD registers only */
+	sve = get_sve(child, &svebuf, &svebufsz);
+	if (!sve) {
+		ksft_test_result_fail("get_sve: %s\n", strerror(errno));
+
+		return;
+	} else {
+		ksft_test_result_pass("get_sve(FPSIMD)\n");
+	}
 
-	if ((sve->flags & SVE_PT_REGS_MASK) != SVE_PT_REGS_SVE)
-		ksft_exit_fail_msg("Dumping non-SVE register\n");
+	ksft_test_result((sve->flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD,
+			 "Set FPSIMD registers\n");
+	if ((sve->flags & SVE_PT_REGS_MASK) != SVE_PT_REGS_FPSIMD)
+		goto out;
 
-	if (vlmax > sve->vl)
-		vlmax = sve->vl;
+	/* Try to set a known FPSIMD state via PT_REGS_SVE */
+	fpsimd = (struct user_fpsimd_state *)((char *)sve +
+					      SVE_PT_FPSIMD_OFFSET);
+	for (i = 0; i < 32; ++i) {
+		p = (unsigned char *)&fpsimd->vregs[i];
 
-	vq = sve_vq_from_vl(sve->vl);
-	for (i = 0; i < num; ++i) {
-		printf("# z%u:", i);
-		dump((const char *)sve + SVE_PT_SVE_ZREG_OFFSET(vq, i),
-		     vlmax);
-		printf("%s\n", vlmax == sve->vl ? "" : " ...");
+		for (j = 0; j < sizeof(fpsimd->vregs[i]); ++j)
+			p[j] = j;
 	}
+
+	if (set_sve(child, sve)) {
+		ksft_test_result_fail("set_sve(FPSIMD): %s\n",
+				      strerror(errno));
+
+		goto out;
+	}
+
+	/* Verify via the FPSIMD regset */
+	if (get_fpsimd(child, &new_fpsimd)) {
+		ksft_test_result_fail("get_fpsimd(): %s\n",
+				      strerror(errno));
+		goto out;
+	}
+	if (memcmp(fpsimd, &new_fpsimd, sizeof(*fpsimd)) == 0)
+		ksft_test_result_pass("get_fpsimd() gave same state\n");
+	else
+		ksft_test_result_fail("get_fpsimd() gave different state\n");
+
+out:
+	free(svebuf);
+}
+
+/* Validate attempting to set SVE data and read SVE data */
+static void ptrace_set_sve_get_sve_data(pid_t child, unsigned int vl)
+{
+	void *write_buf;
+	void *read_buf = NULL;
+	struct user_sve_header *write_sve;
+	struct user_sve_header *read_sve;
+	size_t read_sve_size = 0;
+	unsigned int vq = sve_vq_from_vl(vl);
+	int ret, i;
+	size_t data_size;
+	int errors = 0;
+
+	data_size = SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq, SVE_PT_REGS_SVE);
+	write_buf = malloc(data_size);
+	if (!write_buf) {
+		ksft_test_result_fail("Error allocating %d byte buffer for VL %u\n",
+				      data_size, vl);
+		return;
+	}
+	write_sve = write_buf;
+
+	/* Set up some data and write it out */
+	memset(write_sve, 0, data_size);
+	write_sve->size = data_size;
+	write_sve->vl = vl;
+	write_sve->flags = SVE_PT_REGS_SVE;
+
+	for (i = 0; i < __SVE_NUM_ZREGS; i++)
+		fill_buf(write_buf + SVE_PT_SVE_ZREG_OFFSET(vq, i),
+			 SVE_PT_SVE_ZREG_SIZE(vq));
+
+	for (i = 0; i < __SVE_NUM_PREGS; i++)
+		fill_buf(write_buf + SVE_PT_SVE_PREG_OFFSET(vq, i),
+			 SVE_PT_SVE_PREG_SIZE(vq));
+
+	fill_buf(write_buf + SVE_PT_SVE_FPSR_OFFSET(vq), SVE_PT_SVE_FPSR_SIZE);
+	fill_buf(write_buf + SVE_PT_SVE_FPCR_OFFSET(vq), SVE_PT_SVE_FPCR_SIZE);
+
+	/* TODO: Generate a valid FFR pattern */
+
+	ret = set_sve(child, write_sve);
+	if (ret != 0) {
+		ksft_test_result_fail("Failed to set VL %u data\n", vl);
+		goto out;
+	}
+
+	/* Read the data back */
+	if (!get_sve(child, (void **)&read_buf, &read_sve_size)) {
+		ksft_test_result_fail("Failed to read VL %u data\n", vl);
+		goto out;
+	}
+	read_sve = read_buf;
+
+	/* We might read more data if there's extensions we don't know */
+	if (read_sve->size < write_sve->size) {
+		ksft_test_result_fail("Wrote %d bytes, only read %d\n",
+				      write_sve->size, read_sve->size);
+		goto out_read;
+	}
+
+	for (i = 0; i < __SVE_NUM_ZREGS; i++) {
+		if (memcmp(write_buf + SVE_PT_SVE_ZREG_OFFSET(vq, i),
+			   read_buf + SVE_PT_SVE_ZREG_OFFSET(vq, i),
+			   SVE_PT_SVE_ZREG_SIZE(vq)) != 0) {
+			printf("# Mismatch in %u Z%d\n", vl, i);
+			errors++;
+		}
+	}
+
+	for (i = 0; i < __SVE_NUM_PREGS; i++) {
+		if (memcmp(write_buf + SVE_PT_SVE_PREG_OFFSET(vq, i),
+			   read_buf + SVE_PT_SVE_PREG_OFFSET(vq, i),
+			   SVE_PT_SVE_PREG_SIZE(vq)) != 0) {
+			printf("# Mismatch in %u P%d\n", vl, i);
+			errors++;
+		}
+	}
+
+	check_u32(vl, "FPSR", write_buf + SVE_PT_SVE_FPSR_OFFSET(vq),
+		  read_buf + SVE_PT_SVE_FPSR_OFFSET(vq), &errors);
+	check_u32(vl, "FPCR", write_buf + SVE_PT_SVE_FPCR_OFFSET(vq),
+		  read_buf + SVE_PT_SVE_FPCR_OFFSET(vq), &errors);
+
+	ksft_test_result(errors == 0, "Set and get SVE data for VL %u\n", vl);
+
+out_read:
+	free(read_buf);
+out:
+	free(write_buf);
+}
+
+/* Validate attempting to set SVE data and read SVE data */
+static void ptrace_set_sve_get_fpsimd_data(pid_t child, unsigned int vl)
+{
+	void *write_buf;
+	struct user_sve_header *write_sve;
+	unsigned int vq = sve_vq_from_vl(vl);
+	struct user_fpsimd_state fpsimd_state;
+	int ret, i;
+	size_t data_size;
+	int errors = 0;
+
+	if (__BYTE_ORDER == __BIG_ENDIAN) {
+		ksft_test_result_skip("Big endian not supported\n");
+		return;
+	}
+
+	data_size = SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq, SVE_PT_REGS_SVE);
+	write_buf = malloc(data_size);
+	if (!write_buf) {
+		ksft_test_result_fail("Error allocating %d byte buffer for VL %u\n",
+				      data_size, vl);
+		return;
+	}
+	write_sve = write_buf;
+
+	/* Set up some data and write it out */
+	memset(write_sve, 0, data_size);
+	write_sve->size = data_size;
+	write_sve->vl = vl;
+	write_sve->flags = SVE_PT_REGS_SVE;
+
+	for (i = 0; i < __SVE_NUM_ZREGS; i++)
+		fill_buf(write_buf + SVE_PT_SVE_ZREG_OFFSET(vq, i),
+			 SVE_PT_SVE_ZREG_SIZE(vq));
+
+	fill_buf(write_buf + SVE_PT_SVE_FPSR_OFFSET(vq), SVE_PT_SVE_FPSR_SIZE);
+	fill_buf(write_buf + SVE_PT_SVE_FPCR_OFFSET(vq), SVE_PT_SVE_FPCR_SIZE);
+
+	ret = set_sve(child, write_sve);
+	if (ret != 0) {
+		ksft_test_result_fail("Failed to set VL %u data\n", vl);
+		goto out;
+	}
+
+	/* Read the data back */
+	if (get_fpsimd(child, &fpsimd_state)) {
+		ksft_test_result_fail("Failed to read VL %u FPSIMD data\n",
+				      vl);
+		goto out;
+	}
+
+	for (i = 0; i < __SVE_NUM_ZREGS; i++) {
+		__uint128_t tmp = 0;
+
+		/*
+		 * Z regs are stored endianness invariant, this won't
+		 * work for big endian
+		 */
+		memcpy(&tmp, write_buf + SVE_PT_SVE_ZREG_OFFSET(vq, i),
+		       sizeof(tmp));
+
+		if (tmp != fpsimd_state.vregs[i]) {
+			printf("# Mismatch in FPSIMD for VL %u Z%d\n", vl, i);
+			errors++;
+		}
+	}
+
+	check_u32(vl, "FPSR", write_buf + SVE_PT_SVE_FPSR_OFFSET(vq),
+		  &fpsimd_state.fpsr, &errors);
+	check_u32(vl, "FPCR", write_buf + SVE_PT_SVE_FPCR_OFFSET(vq),
+		  &fpsimd_state.fpcr, &errors);
+
+	ksft_test_result(errors == 0, "Set and get FPSIMD data for VL %u\n",
+			 vl);
+
+out:
+	free(write_buf);
 }
 
 static int do_parent(pid_t child)
@@ -139,13 +442,8 @@ static int do_parent(pid_t child)
 	pid_t pid;
 	int status;
 	siginfo_t si;
-	void *svebuf = NULL, *newsvebuf;
-	size_t svebufsz = 0, newsvebufsz;
-	struct user_sve_header *sve, *new_sve;
-	struct user_fpsimd_state *fpsimd;
-	unsigned int i, j;
-	unsigned char *p;
-	unsigned int vq;
+	unsigned int vq, vl;
+	bool vl_supported;
 
 	/* Attach to the child */
 	while (1) {
@@ -167,8 +465,6 @@ static int do_parent(pid_t child)
 		if (WIFEXITED(status) || WIFSIGNALED(status))
 			ksft_exit_fail_msg("Child died unexpectedly\n");
 
-		ksft_test_result(WIFSTOPPED(status), "WIFSTOPPED(%d)\n",
-				 status);
 		if (!WIFSTOPPED(status))
 			goto error;
 
@@ -203,98 +499,27 @@ static int do_parent(pid_t child)
 		}
 	}
 
-	sve = get_sve(pid, &svebuf, &svebufsz);
-	if (!sve) {
-		int e = errno;
+	/* FPSIMD via SVE regset */
+	ptrace_sve_fpsimd(child);
 
-		ksft_test_result_fail("get_sve: %s\n", strerror(errno));
-		if (e == ESRCH)
-			goto disappeared;
+	/* prctl() flags */
+	ptrace_set_get_inherit(child);
 
-		goto error;
-	} else {
-		ksft_test_result_pass("get_sve\n");
-	}
-
-	ksft_test_result((sve->flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD,
-			 "FPSIMD registers\n");
-	if ((sve->flags & SVE_PT_REGS_MASK) != SVE_PT_REGS_FPSIMD)
-		goto error;
-
-	fpsimd = (struct user_fpsimd_state *)((char *)sve +
-					      SVE_PT_FPSIMD_OFFSET);
-	for (i = 0; i < 32; ++i) {
-		p = (unsigned char *)&fpsimd->vregs[i];
-
-		for (j = 0; j < sizeof fpsimd->vregs[i]; ++j)
-			p[j] = j;
-	}
-
-	if (set_sve(pid, sve)) {
-		int e = errno;
-
-		ksft_test_result_fail("set_sve(FPSIMD): %s\n",
-				      strerror(errno));
-		if (e == ESRCH)
-			goto disappeared;
-
-		goto error;
-	}
+	/* Step through every possible VQ */
+	for (vq = SVE_VQ_MIN; vq <= SVE_VQ_MAX; vq++) {
+		vl = sve_vl_from_vq(vq);
 
-	vq = sve_vq_from_vl(sve->vl);
+		/* First, try to set this vector length */
+		ptrace_set_get_vl(child, vl, &vl_supported);
 
-	newsvebufsz = SVE_PT_SVE_ZREG_OFFSET(vq, 1);
-	new_sve = newsvebuf = malloc(newsvebufsz);
-	if (!new_sve) {
-		errno = ENOMEM;
-		perror(NULL);
-		goto error;
-	}
-
-	*new_sve = *sve;
-	new_sve->flags &= ~SVE_PT_REGS_MASK;
-	new_sve->flags |= SVE_PT_REGS_SVE;
-	memset((char *)new_sve + SVE_PT_SVE_ZREG_OFFSET(vq, 0),
-	       0, SVE_PT_SVE_ZREG_SIZE(vq));
-	new_sve->size = SVE_PT_SVE_ZREG_OFFSET(vq, 1);
-	if (set_sve(pid, new_sve)) {
-		int e = errno;
-
-		ksft_test_result_fail("set_sve(ZREG): %s\n", strerror(errno));
-		if (e == ESRCH)
-			goto disappeared;
-
-		goto error;
-	}
-
-	new_sve = get_sve(pid, &newsvebuf, &newsvebufsz);
-	if (!new_sve) {
-		int e = errno;
-
-		ksft_test_result_fail("get_sve(ZREG): %s\n", strerror(errno));
-		if (e == ESRCH)
-			goto disappeared;
-
-		goto error;
-	}
-
-	ksft_test_result((new_sve->flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_SVE,
-			 "SVE registers\n");
-	if ((new_sve->flags & SVE_PT_REGS_MASK) != SVE_PT_REGS_SVE)
-		goto error;
-
-	dump_sve_regs(new_sve, 3, sizeof fpsimd->vregs[0]);
-
-	p = (unsigned char *)new_sve + SVE_PT_SVE_ZREG_OFFSET(vq, 1);
-	for (i = 0; i < sizeof fpsimd->vregs[0]; ++i) {
-		unsigned char expected = i;
-
-		if (__BYTE_ORDER == __BIG_ENDIAN)
-			expected = sizeof fpsimd->vregs[0] - 1 - expected;
-
-		ksft_test_result(p[i] == expected, "p[%d] == expected\n", i);
-		if (p[i] != expected)
-			goto error;
+		/* If the VL is supported validate data set/get */
+		if (vl_supported) {
+			ptrace_set_sve_get_sve_data(child, vl);
+			ptrace_set_sve_get_fpsimd_data(child, vl);
+		} else {
+			ksft_test_result_skip("set SVE get SVE for VL %d\n", vl);
+			ksft_test_result_skip("set SVE get FPSIMD for VL %d\n", vl);
+		}
 	}
 
 	ret = EXIT_SUCCESS;
@@ -309,20 +534,16 @@ disappeared:
 int main(void)
 {
 	int ret = EXIT_SUCCESS;
-	__uint128_t v[NR_VREGS];
 	pid_t child;
 
+	srandom(getpid());
+
 	ksft_print_header();
-	ksft_set_plan(20);
+	ksft_set_plan(EXPECTED_TESTS);
 
 	if (!(getauxval(AT_HWCAP) & HWCAP_SVE))
 		ksft_exit_skip("SVE not available\n");
 
-	sve_store_patterns(v);
-
-	if (!check_vregs(v))
-		ksft_exit_fail_msg("Initial check_vregs() failed\n");
-
 	child = fork();
 	if (!child)
 		return do_child();
diff --git a/tools/testing/selftests/arm64/fp/sve-test.S b/tools/testing/selftests/arm64/fp/sve-test.S
index e3e08d9c7020..f5b1b48ffff2 100644
--- a/tools/testing/selftests/arm64/fp/sve-test.S
+++ b/tools/testing/selftests/arm64/fp/sve-test.S
@@ -46,130 +46,6 @@ define_accessor getz, NZR, _sve_str_v
 define_accessor setp, NPR, _sve_ldr_p
 define_accessor getp, NPR, _sve_str_p
 
-// Print a single character x0 to stdout
-// Clobbers x0-x2,x8
-function putc
-	str	x0, [sp, #-16]!
-
-	mov	x0, #1			// STDOUT_FILENO
-	mov	x1, sp
-	mov	x2, #1
-	mov	x8, #__NR_write
-	svc	#0
-
-	add	sp, sp, #16
-	ret
-endfunction
-
-// Print a NUL-terminated string starting at address x0 to stdout
-// Clobbers x0-x3,x8
-function puts
-	mov	x1, x0
-
-	mov	x2, #0
-0:	ldrb	w3, [x0], #1
-	cbz	w3, 1f
-	add	x2, x2, #1
-	b	0b
-
-1:	mov	w0, #1			// STDOUT_FILENO
-	mov	x8, #__NR_write
-	svc	#0
-
-	ret
-endfunction
-
-// Utility macro to print a literal string
-// Clobbers x0-x4,x8
-.macro puts string
-	.pushsection .rodata.str1.1, "aMS", 1
-.L__puts_literal\@: .string "\string"
-	.popsection
-
-	ldr	x0, =.L__puts_literal\@
-	bl	puts
-.endm
-
-// Print an unsigned decimal number x0 to stdout
-// Clobbers x0-x4,x8
-function putdec
-	mov	x1, sp
-	str	x30, [sp, #-32]!	// Result can't be > 20 digits
-
-	mov	x2, #0
-	strb	w2, [x1, #-1]!		// Write the NUL terminator
-
-	mov	x2, #10
-0:	udiv	x3, x0, x2		// div-mod loop to generate the digits
-	msub	x0, x3, x2, x0
-	add	w0, w0, #'0'
-	strb	w0, [x1, #-1]!
-	mov	x0, x3
-	cbnz	x3, 0b
-
-	ldrb	w0, [x1]
-	cbnz	w0, 1f
-	mov	w0, #'0'		// Print "0" for 0, not ""
-	strb	w0, [x1, #-1]!
-
-1:	mov	x0, x1
-	bl	puts
-
-	ldr	x30, [sp], #32
-	ret
-endfunction
-
-// Print an unsigned decimal number x0 to stdout, followed by a newline
-// Clobbers x0-x5,x8
-function putdecn
-	mov	x5, x30
-
-	bl	putdec
-	mov	x0, #'\n'
-	bl	putc
-
-	ret	x5
-endfunction
-
-// Clobbers x0-x3,x8
-function puthexb
-	str	x30, [sp, #-0x10]!
-
-	mov	w3, w0
-	lsr	w0, w0, #4
-	bl	puthexnibble
-	mov	w0, w3
-
-	ldr	x30, [sp], #0x10
-	// fall through to puthexnibble
-endfunction
-// Clobbers x0-x2,x8
-function puthexnibble
-	and	w0, w0, #0xf
-	cmp	w0, #10
-	blo	1f
-	add	w0, w0, #'a' - ('9' + 1)
-1:	add	w0, w0, #'0'
-	b	putc
-endfunction
-
-// x0=data in, x1=size in, clobbers x0-x5,x8
-function dumphex
-	str	x30, [sp, #-0x10]!
-
-	mov	x4, x0
-	mov	x5, x1
-
-0:	subs	x5, x5, #1
-	b.lo	1f
-	ldrb	w0, [x4], #1
-	bl	puthexb
-	b	0b
-
-1:	ldr	x30, [sp], #0x10
-	ret
-endfunction
-
 // Declare some storate space to shadow the SVE register contents:
 .pushsection .text
 .data
@@ -184,18 +60,6 @@ scratch:
 	.space	MAXVL_B
 .popsection
 
-// Trivial memory copy: copy x2 bytes, starting at address x1, to address x0.
-// Clobbers x0-x3
-function memcpy
-	cmp	x2, #0
-	b.eq	1f
-0:	ldrb	w3, [x1], #1
-	strb	w3, [x0], #1
-	subs	x2, x2, #1
-	b.ne	0b
-1:	ret
-endfunction
-
 // Generate a test pattern for storage in SVE registers
 // x0: pid	(16 bits)
 // x1: register number (6 bits)
@@ -316,33 +180,6 @@ function setup_ffr
 	ret	x4
 endfunction
 
-// Fill x1 bytes starting at x0 with 0xae (for canary purposes)
-// Clobbers x1, x2.
-function memfill_ae
-	mov	w2, #0xae
-	b	memfill
-endfunction
-
-// Fill x1 bytes starting at x0 with 0.
-// Clobbers x1, x2.
-function memclr
-	mov	w2, #0
-endfunction
-	// fall through to memfill
-
-// Trivial memory fill: fill x1 bytes starting at address x0 with byte w2
-// Clobbers x1
-function memfill
-	cmp	x1, #0
-	b.eq	1f
-
-0:	strb	w2, [x0], #1
-	subs	x1, x1, #1
-	b.ne	0b
-
-1:	ret
-endfunction
-
 // Trivial memory compare: compare x2 bytes starting at address x0 with
 // bytes starting at address x1.
 // Returns only if all bytes match; otherwise, the program is aborted.
diff --git a/tools/testing/selftests/arm64/fp/vec-syscfg.c b/tools/testing/selftests/arm64/fp/vec-syscfg.c
index c02071dcb563..272b888e018e 100644
--- a/tools/testing/selftests/arm64/fp/vec-syscfg.c
+++ b/tools/testing/selftests/arm64/fp/vec-syscfg.c
@@ -109,7 +109,7 @@ static int get_child_rdvl(struct vec_data *data)
 
 		/* exec() a new binary which puts the VL on stdout */
 		ret = execl(data->rdvl_binary, data->rdvl_binary, NULL);
-		fprintf(stderr, "execl(%s) failed: %d\n",
+		fprintf(stderr, "execl(%s) failed: %d (%s)\n",
 			data->rdvl_binary, errno, strerror(errno));
 
 		exit(EXIT_FAILURE);
@@ -180,7 +180,6 @@ static int file_read_integer(const char *name, int *val)
 static int file_write_integer(const char *name, int val)
 {
 	FILE *f;
-	int ret;
 
 	f = fopen(name, "w");
 	if (!f) {
@@ -192,11 +191,6 @@ static int file_write_integer(const char *name, int val)
 
 	fprintf(f, "%d", val);
 	fclose(f);
-	if (ret < 0) {
-		ksft_test_result_fail("Error writing %d to %s\n",
-				      val, name);
-		return -1;
-	}
 
 	return 0;
 }
@@ -335,12 +329,9 @@ static void prctl_set_same(struct vec_data *data)
 		return;
 	}
 
-	if (cur_vl != data->rdvl())
-		ksft_test_result_pass("%s current VL is %d\n",
-				      data->name, ret);
-	else
-		ksft_test_result_fail("%s prctl() VL %d but RDVL is %d\n",
-				      data->name, ret, data->rdvl());
+	ksft_test_result(cur_vl == data->rdvl(),
+			 "%s set VL %d and have VL %d\n",
+			 data->name, cur_vl, data->rdvl());
 }
 
 /* Can we set a new VL for this process? */
@@ -549,6 +540,82 @@ static void prctl_set_onexec(struct vec_data *data)
 	file_write_integer(data->default_vl_file, data->default_vl);
 }
 
+/* For each VQ verify that setting via prctl() does the right thing */
+static void prctl_set_all_vqs(struct vec_data *data)
+{
+	int ret, vq, vl, new_vl;
+	int errors = 0;
+
+	if (!data->min_vl || !data->max_vl) {
+		ksft_test_result_skip("%s Failed to enumerate VLs, not testing VL setting\n",
+				      data->name);
+		return;
+	}
+
+	for (vq = SVE_VQ_MIN; vq <= SVE_VQ_MAX; vq++) {
+		vl = sve_vl_from_vq(vq);
+
+		/* Attempt to set the VL */
+		ret = prctl(data->prctl_set, vl);
+		if (ret < 0) {
+			errors++;
+			ksft_print_msg("%s prctl set failed for %d: %d (%s)\n",
+				       data->name, vl,
+				       errno, strerror(errno));
+			continue;
+		}
+
+		new_vl = ret & PR_SVE_VL_LEN_MASK;
+
+		/* Check that we actually have the reported new VL */
+		if (data->rdvl() != new_vl) {
+			ksft_print_msg("Set %s VL %d but RDVL reports %d\n",
+				       data->name, new_vl, data->rdvl());
+			errors++;
+		}
+
+		/* Was that the VL we asked for? */
+		if (new_vl == vl)
+			continue;
+
+		/* Should round up to the minimum VL if below it */
+		if (vl < data->min_vl) {
+			if (new_vl != data->min_vl) {
+				ksft_print_msg("%s VL %d returned %d not minimum %d\n",
+					       data->name, vl, new_vl,
+					       data->min_vl);
+				errors++;
+			}
+
+			continue;
+		}
+
+		/* Should round down to maximum VL if above it */
+		if (vl > data->max_vl) {
+			if (new_vl != data->max_vl) {
+				ksft_print_msg("%s VL %d returned %d not maximum %d\n",
+					       data->name, vl, new_vl,
+					       data->max_vl);
+				errors++;
+			}
+
+			continue;
+		}
+
+		/* Otherwise we should've rounded down */
+		if (!(new_vl < vl)) {
+			ksft_print_msg("%s VL %d returned %d, did not round down\n",
+				       data->name, vl, new_vl);
+			errors++;
+
+			continue;
+		}
+	}
+
+	ksft_test_result(errors == 0, "%s prctl() set all VLs, %d errors\n",
+			 data->name, errors);
+}
+
 typedef void (*test_type)(struct vec_data *);
 
 static const test_type tests[] = {
@@ -561,10 +628,12 @@ static const test_type tests[] = {
 	proc_write_max,
 
 	prctl_get,
+	prctl_set_same,
 	prctl_set,
 	prctl_set_no_child,
 	prctl_set_for_child,
 	prctl_set_onexec,
+	prctl_set_all_vqs,
 };
 
 int main(void)
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index 433f8bef261e..1dad8d617da8 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -9,8 +9,9 @@ test_tag
 FEATURE-DUMP.libbpf
 fixdep
 test_dev_cgroup
-/test_progs*
-!test_progs.h
+/test_progs
+/test_progs-no_alu32
+/test_progs-bpf_gcc
 test_verifier_log
 feature
 test_sock
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 799b88152e9e..54b0a41a3775 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -122,12 +122,15 @@ BPFOBJ := $(BUILD_DIR)/libbpf/libbpf.a
 ifneq ($(CROSS_COMPILE),)
 HOST_BUILD_DIR		:= $(BUILD_DIR)/host
 HOST_SCRATCH_DIR	:= $(OUTPUT)/host-tools
+HOST_INCLUDE_DIR	:= $(HOST_SCRATCH_DIR)/include
 else
 HOST_BUILD_DIR		:= $(BUILD_DIR)
 HOST_SCRATCH_DIR	:= $(SCRATCH_DIR)
+HOST_INCLUDE_DIR	:= $(INCLUDE_DIR)
 endif
 HOST_BPFOBJ := $(HOST_BUILD_DIR)/libbpf/libbpf.a
 RESOLVE_BTFIDS := $(HOST_BUILD_DIR)/resolve_btfids/resolve_btfids
+RUNQSLOWER_OUTPUT := $(BUILD_DIR)/runqslower/
 
 VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux)				\
 		     $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux)	\
@@ -152,7 +155,7 @@ $(notdir $(TEST_GEN_PROGS)						\
 # sort removes libbpf duplicates when not cross-building
 MAKE_DIRS := $(sort $(BUILD_DIR)/libbpf $(HOST_BUILD_DIR)/libbpf	       \
 	       $(HOST_BUILD_DIR)/bpftool $(HOST_BUILD_DIR)/resolve_btfids      \
-	       $(INCLUDE_DIR))
+	       $(RUNQSLOWER_OUTPUT) $(INCLUDE_DIR))
 $(MAKE_DIRS):
 	$(call msg,MKDIR,,$@)
 	$(Q)mkdir -p $@
@@ -181,11 +184,13 @@ $(OUTPUT)/test_stub.o: test_stub.c $(BPFOBJ)
 
 DEFAULT_BPFTOOL := $(HOST_SCRATCH_DIR)/sbin/bpftool
 
-$(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL)
-	$(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/runqslower	\
-		    OUTPUT=$(SCRATCH_DIR)/ VMLINUX_BTF=$(VMLINUX_BTF)   \
-		    BPFOBJ=$(BPFOBJ) BPF_INCLUDE=$(INCLUDE_DIR) &&	\
-		    cp $(SCRATCH_DIR)/runqslower $@
+$(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL) $(RUNQSLOWER_OUTPUT)
+	$(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/runqslower	       \
+		    OUTPUT=$(RUNQSLOWER_OUTPUT) VMLINUX_BTF=$(VMLINUX_BTF)     \
+		    BPFTOOL_OUTPUT=$(BUILD_DIR)/bpftool/		       \
+		    BPFOBJ_OUTPUT=$(BUILD_DIR)/libbpf			       \
+		    BPFOBJ=$(BPFOBJ) BPF_INCLUDE=$(INCLUDE_DIR) &&	       \
+		    cp $(RUNQSLOWER_OUTPUT)runqslower $@
 
 TEST_GEN_PROGS_EXTENDED += $(DEFAULT_BPFTOOL)
 
@@ -209,7 +214,9 @@ $(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile)    \
 		    CC=$(HOSTCC) LD=$(HOSTLD)				       \
 		    EXTRA_CFLAGS='-g -O0'				       \
 		    OUTPUT=$(HOST_BUILD_DIR)/bpftool/			       \
-		    prefix= DESTDIR=$(HOST_SCRATCH_DIR)/ install
+		    LIBBPF_OUTPUT=$(HOST_BUILD_DIR)/libbpf/		       \
+		    LIBBPF_DESTDIR=$(HOST_SCRATCH_DIR)/			       \
+		    prefix= DESTDIR=$(HOST_SCRATCH_DIR)/ install-bin
 
 all: docs
 
@@ -225,7 +232,7 @@ docs-clean:
 
 $(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile)		       \
 	   ../../../include/uapi/linux/bpf.h                                   \
-	   | $(INCLUDE_DIR) $(BUILD_DIR)/libbpf
+	   | $(BUILD_DIR)/libbpf
 	$(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(BUILD_DIR)/libbpf/ \
 		    EXTRA_CFLAGS='-g -O0'				       \
 		    DESTDIR=$(SCRATCH_DIR) prefix= all install_headers
@@ -233,7 +240,7 @@ $(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile)		       \
 ifneq ($(BPFOBJ),$(HOST_BPFOBJ))
 $(HOST_BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile)                \
 	   ../../../include/uapi/linux/bpf.h                                   \
-	   | $(INCLUDE_DIR) $(HOST_BUILD_DIR)/libbpf
+	   | $(HOST_BUILD_DIR)/libbpf
 	$(Q)$(MAKE) $(submake_extras) -C $(BPFDIR)                             \
 		    EXTRA_CFLAGS='-g -O0'				       \
 		    OUTPUT=$(HOST_BUILD_DIR)/libbpf/ CC=$(HOSTCC) LD=$(HOSTLD) \
@@ -258,6 +265,7 @@ $(RESOLVE_BTFIDS): $(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/resolve_btfids	\
 		       $(TOOLSDIR)/lib/str_error_r.c
 	$(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/resolve_btfids	\
 		CC=$(HOSTCC) LD=$(HOSTLD) AR=$(HOSTAR) \
+		LIBBPF_INCLUDE=$(HOST_INCLUDE_DIR) \
 		OUTPUT=$(HOST_BUILD_DIR)/resolve_btfids/ BPFOBJ=$(HOST_BPFOBJ)
 
 # Get Clang's default includes on this system, as opposed to those seen by
@@ -269,7 +277,7 @@ $(RESOLVE_BTFIDS): $(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/resolve_btfids	\
 define get_sys_includes
 $(shell $(1) -v -E - </dev/null 2>&1 \
 	| sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') \
-$(shell $(1) -dM -E - </dev/null | grep '#define __riscv_xlen ' | sed 's/#define /-D/' | sed 's/ /=/')
+$(shell $(1) -dM -E - </dev/null | grep '__riscv_xlen ' | awk '{printf("-D__riscv_xlen=%d -D__BITS_PER_LONG=%d", $$3, $$3)}')
 endef
 
 # Determine target endianness.
@@ -315,7 +323,9 @@ LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h		\
 		linked_vars.skel.h linked_maps.skel.h
 
 LSKELS := kfunc_call_test.c fentry_test.c fexit_test.c fexit_sleep.c \
-	test_ksyms_module.c test_ringbuf.c atomics.c trace_printk.c
+	test_ringbuf.c atomics.c trace_printk.c trace_vprintk.c
+# Generate both light skeleton and libbpf skeleton for these
+LSKELS_EXTRA := test_ksyms_module.c test_ksyms_weak.c
 SKEL_BLACKLIST += $$(LSKELS)
 
 test_static_linked.skel.h-deps := test_static_linked1.o test_static_linked2.o
@@ -345,7 +355,7 @@ TRUNNER_BPF_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.o, $$(TRUNNER_BPF_SRCS)
 TRUNNER_BPF_SKELS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.skel.h,	\
 				 $$(filter-out $(SKEL_BLACKLIST) $(LINKED_BPF_SRCS),\
 					       $$(TRUNNER_BPF_SRCS)))
-TRUNNER_BPF_LSKELS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.lskel.h, $$(LSKELS))
+TRUNNER_BPF_LSKELS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.lskel.h, $$(LSKELS) $$(LSKELS_EXTRA))
 TRUNNER_BPF_SKELS_LINKED := $$(addprefix $$(TRUNNER_OUTPUT)/,$(LINKED_SKELS))
 TEST_GEN_FILES += $$(TRUNNER_BPF_OBJS)
 
@@ -394,7 +404,7 @@ $(TRUNNER_BPF_LSKELS): %.lskel.h: %.o $(BPFTOOL) | $(TRUNNER_OUTPUT)
 	$(Q)$$(BPFTOOL) gen object $$(<:.o=.linked2.o) $$(<:.o=.linked1.o)
 	$(Q)$$(BPFTOOL) gen object $$(<:.o=.linked3.o) $$(<:.o=.linked2.o)
 	$(Q)diff $$(<:.o=.linked2.o) $$(<:.o=.linked3.o)
-	$(Q)$$(BPFTOOL) gen skeleton -L $$(<:.o=.linked3.o) name $$(notdir $$(<:.o=)) > $$@
+	$(Q)$$(BPFTOOL) gen skeleton -L $$(<:.o=.linked3.o) name $$(notdir $$(<:.o=_lskel)) > $$@
 
 $(TRUNNER_BPF_SKELS_LINKED): $(TRUNNER_BPF_OBJS) $(BPFTOOL) | $(TRUNNER_OUTPUT)
 	$$(call msg,LINK-BPF,$(TRUNNER_BINARY),$$(@:.skel.h=.o))
@@ -411,10 +421,9 @@ ifeq ($($(TRUNNER_TESTS_DIR)-tests-hdr),)
 $(TRUNNER_TESTS_DIR)-tests-hdr := y
 $(TRUNNER_TESTS_HDR): $(TRUNNER_TESTS_DIR)/*.c
 	$$(call msg,TEST-HDR,$(TRUNNER_BINARY),$$@)
-	$$(shell ( cd $(TRUNNER_TESTS_DIR);				\
-		  echo '/* Generated header, do not edit */';		\
-		  ls *.c 2> /dev/null |					\
-			sed -e 's@\([^\.]*\)\.c@DEFINE_TEST(\1)@';	\
+	$$(shell (echo '/* Generated header, do not edit */';					\
+		  sed -n -E 's/^void (serial_)?test_([a-zA-Z0-9_]+)\((void)?\).*/DEFINE_TEST(\2)/p'	\
+			$(TRUNNER_TESTS_DIR)/*.c | sort ;	\
 		 ) > $$@)
 endif
 
@@ -453,7 +462,7 @@ $(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS)			\
 			     | $(TRUNNER_BINARY)-extras
 	$$(call msg,BINARY,,$$@)
 	$(Q)$$(CC) $$(CFLAGS) $$(filter %.a %.o,$$^) $$(LDLIBS) -o $$@
-	$(Q)$(RESOLVE_BTFIDS) --no-fail --btf $(TRUNNER_OUTPUT)/btf_data.o $$@
+	$(Q)$(RESOLVE_BTFIDS) --btf $(TRUNNER_OUTPUT)/btf_data.o $$@
 
 endef
 
@@ -513,20 +522,22 @@ $(OUTPUT)/test_cpp: test_cpp.cpp $(OUTPUT)/test_core_extern.skel.h $(BPFOBJ)
 	$(Q)$(CXX) $(CFLAGS) $(filter %.a %.o %.cpp,$^) $(LDLIBS) -o $@
 
 # Benchmark runner
-$(OUTPUT)/bench_%.o: benchs/bench_%.c bench.h
+$(OUTPUT)/bench_%.o: benchs/bench_%.c bench.h $(BPFOBJ)
 	$(call msg,CC,,$@)
-	$(Q)$(CC) $(CFLAGS) -c $(filter %.c,$^) $(LDLIBS) -o $@
+	$(Q)$(CC) $(CFLAGS) -O2 -c $(filter %.c,$^) $(LDLIBS) -o $@
 $(OUTPUT)/bench_rename.o: $(OUTPUT)/test_overhead.skel.h
 $(OUTPUT)/bench_trigger.o: $(OUTPUT)/trigger_bench.skel.h
 $(OUTPUT)/bench_ringbufs.o: $(OUTPUT)/ringbuf_bench.skel.h \
 			    $(OUTPUT)/perfbuf_bench.skel.h
-$(OUTPUT)/bench.o: bench.h testing_helpers.h
+$(OUTPUT)/bench_bloom_filter_map.o: $(OUTPUT)/bloom_filter_bench.skel.h
+$(OUTPUT)/bench.o: bench.h testing_helpers.h $(BPFOBJ)
 $(OUTPUT)/bench: LDLIBS += -lm
 $(OUTPUT)/bench: $(OUTPUT)/bench.o $(OUTPUT)/testing_helpers.o \
 		 $(OUTPUT)/bench_count.o \
 		 $(OUTPUT)/bench_rename.o \
 		 $(OUTPUT)/bench_trigger.o \
-		 $(OUTPUT)/bench_ringbufs.o
+		 $(OUTPUT)/bench_ringbufs.o \
+		 $(OUTPUT)/bench_bloom_filter_map.o
 	$(call msg,BINARY,,$@)
 	$(Q)$(CC) $(LDFLAGS) -o $@ $(filter %.a %.o,$^) $(LDLIBS)
 
diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst
index 9b17f2867488..5e287e445f75 100644
--- a/tools/testing/selftests/bpf/README.rst
+++ b/tools/testing/selftests/bpf/README.rst
@@ -201,6 +201,20 @@ Without it, the error from compiling bpf selftests looks like:
 
 __ https://reviews.llvm.org/D93563
 
+btf_tag test and Clang version
+==============================
+
+The btf_tag selftest require LLVM support to recognize the btf_decl_tag attribute.
+It was introduced in `Clang 14`__.
+
+Without it, the btf_tag selftest will be skipped and you will observe:
+
+.. code-block:: console
+
+  #<test_num> btf_tag:SKIP
+
+__ https://reviews.llvm.org/D111588
+
 Clang dependencies for static linking tests
 ===========================================
 
@@ -228,3 +242,16 @@ To fix this issue, user newer libbpf.
 .. Links
 .. _clang reloc patch: https://reviews.llvm.org/D102712
 .. _kernel llvm reloc: /Documentation/bpf/llvm_reloc.rst
+
+Clang dependencies for the u32 spill test (xdpwall)
+===================================================
+The xdpwall selftest requires a change in `Clang 14`__.
+
+Without it, the xdpwall selftest will fail and the error message
+from running test_progs will look like:
+
+.. code-block:: console
+
+  test_xdpwall:FAIL:Does LLVM have https://reviews.llvm.org/D109073? unexpected error: -4007
+
+__ https://reviews.llvm.org/D109073
diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c
index 6ea15b93a2f8..cc4722f693e9 100644
--- a/tools/testing/selftests/bpf/bench.c
+++ b/tools/testing/selftests/bpf/bench.c
@@ -51,6 +51,35 @@ void setup_libbpf()
 		fprintf(stderr, "failed to increase RLIMIT_MEMLOCK: %d", err);
 }
 
+void false_hits_report_progress(int iter, struct bench_res *res, long delta_ns)
+{
+	long total = res->false_hits  + res->hits + res->drops;
+
+	printf("Iter %3d (%7.3lfus): ",
+	       iter, (delta_ns - 1000000000) / 1000.0);
+
+	printf("%ld false hits of %ld total operations. Percentage = %2.2f %%\n",
+	       res->false_hits, total, ((float)res->false_hits / total) * 100);
+}
+
+void false_hits_report_final(struct bench_res res[], int res_cnt)
+{
+	long total_hits = 0, total_drops = 0, total_false_hits = 0, total_ops = 0;
+	int i;
+
+	for (i = 0; i < res_cnt; i++) {
+		total_hits += res[i].hits;
+		total_false_hits += res[i].false_hits;
+		total_drops += res[i].drops;
+	}
+	total_ops = total_hits + total_false_hits + total_drops;
+
+	printf("Summary: %ld false hits of %ld total operations. ",
+	       total_false_hits, total_ops);
+	printf("Percentage =  %2.2f %%\n",
+	       ((float)total_false_hits / total_ops) * 100);
+}
+
 void hits_drops_report_progress(int iter, struct bench_res *res, long delta_ns)
 {
 	double hits_per_sec, drops_per_sec;
@@ -63,20 +92,22 @@ void hits_drops_report_progress(int iter, struct bench_res *res, long delta_ns)
 	printf("Iter %3d (%7.3lfus): ",
 	       iter, (delta_ns - 1000000000) / 1000.0);
 
-	printf("hits %8.3lfM/s (%7.3lfM/prod), drops %8.3lfM/s\n",
-	       hits_per_sec, hits_per_prod, drops_per_sec);
+	printf("hits %8.3lfM/s (%7.3lfM/prod), drops %8.3lfM/s, total operations %8.3lfM/s\n",
+	       hits_per_sec, hits_per_prod, drops_per_sec, hits_per_sec + drops_per_sec);
 }
 
 void hits_drops_report_final(struct bench_res res[], int res_cnt)
 {
 	int i;
-	double hits_mean = 0.0, drops_mean = 0.0;
-	double hits_stddev = 0.0, drops_stddev = 0.0;
+	double hits_mean = 0.0, drops_mean = 0.0, total_ops_mean = 0.0;
+	double hits_stddev = 0.0, drops_stddev = 0.0, total_ops_stddev = 0.0;
+	double total_ops;
 
 	for (i = 0; i < res_cnt; i++) {
 		hits_mean += res[i].hits / 1000000.0 / (0.0 + res_cnt);
 		drops_mean += res[i].drops / 1000000.0 / (0.0 + res_cnt);
 	}
+	total_ops_mean = hits_mean + drops_mean;
 
 	if (res_cnt > 1)  {
 		for (i = 0; i < res_cnt; i++) {
@@ -86,14 +117,21 @@ void hits_drops_report_final(struct bench_res res[], int res_cnt)
 			drops_stddev += (drops_mean - res[i].drops / 1000000.0) *
 					(drops_mean - res[i].drops / 1000000.0) /
 					(res_cnt - 1.0);
+			total_ops = res[i].hits + res[i].drops;
+			total_ops_stddev += (total_ops_mean - total_ops / 1000000.0) *
+					(total_ops_mean - total_ops / 1000000.0) /
+					(res_cnt - 1.0);
 		}
 		hits_stddev = sqrt(hits_stddev);
 		drops_stddev = sqrt(drops_stddev);
+		total_ops_stddev = sqrt(total_ops_stddev);
 	}
 	printf("Summary: hits %8.3lf \u00B1 %5.3lfM/s (%7.3lfM/prod), ",
 	       hits_mean, hits_stddev, hits_mean / env.producer_cnt);
-	printf("drops %8.3lf \u00B1 %5.3lfM/s\n",
+	printf("drops %8.3lf \u00B1 %5.3lfM/s, ",
 	       drops_mean, drops_stddev);
+	printf("total operations %8.3lf \u00B1 %5.3lfM/s\n",
+	       total_ops_mean, total_ops_stddev);
 }
 
 const char *argp_program_version = "benchmark";
@@ -132,9 +170,11 @@ static const struct argp_option opts[] = {
 };
 
 extern struct argp bench_ringbufs_argp;
+extern struct argp bench_bloom_map_argp;
 
 static const struct argp_child bench_parsers[] = {
 	{ &bench_ringbufs_argp, 0, "Ring buffers benchmark", 0 },
+	{ &bench_bloom_map_argp, 0, "Bloom filter map benchmark", 0 },
 	{},
 };
 
@@ -323,6 +363,11 @@ extern const struct bench bench_rb_libbpf;
 extern const struct bench bench_rb_custom;
 extern const struct bench bench_pb_libbpf;
 extern const struct bench bench_pb_custom;
+extern const struct bench bench_bloom_lookup;
+extern const struct bench bench_bloom_update;
+extern const struct bench bench_bloom_false_positive;
+extern const struct bench bench_hashmap_without_bloom;
+extern const struct bench bench_hashmap_with_bloom;
 
 static const struct bench *benchs[] = {
 	&bench_count_global,
@@ -344,6 +389,11 @@ static const struct bench *benchs[] = {
 	&bench_rb_custom,
 	&bench_pb_libbpf,
 	&bench_pb_custom,
+	&bench_bloom_lookup,
+	&bench_bloom_update,
+	&bench_bloom_false_positive,
+	&bench_hashmap_without_bloom,
+	&bench_hashmap_with_bloom,
 };
 
 static void setup_benchmark()
diff --git a/tools/testing/selftests/bpf/bench.h b/tools/testing/selftests/bpf/bench.h
index c1f48a473b02..624c6b11501f 100644
--- a/tools/testing/selftests/bpf/bench.h
+++ b/tools/testing/selftests/bpf/bench.h
@@ -33,6 +33,7 @@ struct env {
 struct bench_res {
 	long hits;
 	long drops;
+	long false_hits;
 };
 
 struct bench {
@@ -56,6 +57,8 @@ extern const struct bench *bench;
 void setup_libbpf();
 void hits_drops_report_progress(int iter, struct bench_res *res, long delta_ns);
 void hits_drops_report_final(struct bench_res res[], int res_cnt);
+void false_hits_report_progress(int iter, struct bench_res *res, long delta_ns);
+void false_hits_report_final(struct bench_res res[], int res_cnt);
 
 static inline __u64 get_time_ns() {
 	struct timespec t;
diff --git a/tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c b/tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c
new file mode 100644
index 000000000000..6eeeed2913e6
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c
@@ -0,0 +1,477 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include <argp.h>
+#include <linux/log2.h>
+#include <pthread.h>
+#include "bench.h"
+#include "bloom_filter_bench.skel.h"
+#include "bpf_util.h"
+
+static struct ctx {
+	bool use_array_map;
+	bool use_hashmap;
+	bool hashmap_use_bloom;
+	bool count_false_hits;
+
+	struct bloom_filter_bench *skel;
+
+	int bloom_fd;
+	int hashmap_fd;
+	int array_map_fd;
+
+	pthread_mutex_t map_done_mtx;
+	pthread_cond_t map_done_cv;
+	bool map_done;
+	bool map_prepare_err;
+
+	__u32 next_map_idx;
+} ctx = {
+	.map_done_mtx = PTHREAD_MUTEX_INITIALIZER,
+	.map_done_cv = PTHREAD_COND_INITIALIZER,
+};
+
+struct stat {
+	__u32 stats[3];
+};
+
+static struct {
+	__u32 nr_entries;
+	__u8 nr_hash_funcs;
+	__u8 value_size;
+} args = {
+	.nr_entries = 1000,
+	.nr_hash_funcs = 3,
+	.value_size = 8,
+};
+
+enum {
+	ARG_NR_ENTRIES = 3000,
+	ARG_NR_HASH_FUNCS = 3001,
+	ARG_VALUE_SIZE = 3002,
+};
+
+static const struct argp_option opts[] = {
+	{ "nr_entries", ARG_NR_ENTRIES, "NR_ENTRIES", 0,
+		"Set number of expected unique entries in the bloom filter"},
+	{ "nr_hash_funcs", ARG_NR_HASH_FUNCS, "NR_HASH_FUNCS", 0,
+		"Set number of hash functions in the bloom filter"},
+	{ "value_size", ARG_VALUE_SIZE, "VALUE_SIZE", 0,
+		"Set value size (in bytes) of bloom filter entries"},
+	{},
+};
+
+static error_t parse_arg(int key, char *arg, struct argp_state *state)
+{
+	switch (key) {
+	case ARG_NR_ENTRIES:
+		args.nr_entries = strtol(arg, NULL, 10);
+		if (args.nr_entries == 0) {
+			fprintf(stderr, "Invalid nr_entries count.");
+			argp_usage(state);
+		}
+		break;
+	case ARG_NR_HASH_FUNCS:
+		args.nr_hash_funcs = strtol(arg, NULL, 10);
+		if (args.nr_hash_funcs == 0 || args.nr_hash_funcs > 15) {
+			fprintf(stderr,
+				"The bloom filter must use 1 to 15 hash functions.");
+			argp_usage(state);
+		}
+		break;
+	case ARG_VALUE_SIZE:
+		args.value_size = strtol(arg, NULL, 10);
+		if (args.value_size < 2 || args.value_size > 256) {
+			fprintf(stderr,
+				"Invalid value size. Must be between 2 and 256 bytes");
+			argp_usage(state);
+		}
+		break;
+	default:
+		return ARGP_ERR_UNKNOWN;
+	}
+
+	return 0;
+}
+
+/* exported into benchmark runner */
+const struct argp bench_bloom_map_argp = {
+	.options = opts,
+	.parser = parse_arg,
+};
+
+static void validate(void)
+{
+	if (env.consumer_cnt != 1) {
+		fprintf(stderr,
+			"The bloom filter benchmarks do not support multi-consumer use\n");
+		exit(1);
+	}
+}
+
+static inline void trigger_bpf_program(void)
+{
+	syscall(__NR_getpgid);
+}
+
+static void *producer(void *input)
+{
+	while (true)
+		trigger_bpf_program();
+
+	return NULL;
+}
+
+static void *map_prepare_thread(void *arg)
+{
+	__u32 val_size, i;
+	void *val = NULL;
+	int err;
+
+	val_size = args.value_size;
+	val = malloc(val_size);
+	if (!val) {
+		ctx.map_prepare_err = true;
+		goto done;
+	}
+
+	while (true) {
+		i = __atomic_add_fetch(&ctx.next_map_idx, 1, __ATOMIC_RELAXED);
+		if (i > args.nr_entries)
+			break;
+
+again:
+		/* Populate hashmap, bloom filter map, and array map with the same
+		 * random values
+		 */
+		err = syscall(__NR_getrandom, val, val_size, 0);
+		if (err != val_size) {
+			ctx.map_prepare_err = true;
+			fprintf(stderr, "failed to get random value: %d\n", -errno);
+			break;
+		}
+
+		if (ctx.use_hashmap) {
+			err = bpf_map_update_elem(ctx.hashmap_fd, val, val, BPF_NOEXIST);
+			if (err) {
+				if (err != -EEXIST) {
+					ctx.map_prepare_err = true;
+					fprintf(stderr, "failed to add elem to hashmap: %d\n",
+						-errno);
+					break;
+				}
+				goto again;
+			}
+		}
+
+		i--;
+
+		if (ctx.use_array_map) {
+			err = bpf_map_update_elem(ctx.array_map_fd, &i, val, 0);
+			if (err) {
+				ctx.map_prepare_err = true;
+				fprintf(stderr, "failed to add elem to array map: %d\n", -errno);
+				break;
+			}
+		}
+
+		if (ctx.use_hashmap && !ctx.hashmap_use_bloom)
+			continue;
+
+		err = bpf_map_update_elem(ctx.bloom_fd, NULL, val, 0);
+		if (err) {
+			ctx.map_prepare_err = true;
+			fprintf(stderr,
+				"failed to add elem to bloom filter map: %d\n", -errno);
+			break;
+		}
+	}
+done:
+	pthread_mutex_lock(&ctx.map_done_mtx);
+	ctx.map_done = true;
+	pthread_cond_signal(&ctx.map_done_cv);
+	pthread_mutex_unlock(&ctx.map_done_mtx);
+
+	if (val)
+		free(val);
+
+	return NULL;
+}
+
+static void populate_maps(void)
+{
+	unsigned int nr_cpus = bpf_num_possible_cpus();
+	pthread_t map_thread;
+	int i, err, nr_rand_bytes;
+
+	ctx.bloom_fd = bpf_map__fd(ctx.skel->maps.bloom_map);
+	ctx.hashmap_fd = bpf_map__fd(ctx.skel->maps.hashmap);
+	ctx.array_map_fd = bpf_map__fd(ctx.skel->maps.array_map);
+
+	for (i = 0; i < nr_cpus; i++) {
+		err = pthread_create(&map_thread, NULL, map_prepare_thread,
+				     NULL);
+		if (err) {
+			fprintf(stderr, "failed to create pthread: %d\n", -errno);
+			exit(1);
+		}
+	}
+
+	pthread_mutex_lock(&ctx.map_done_mtx);
+	while (!ctx.map_done)
+		pthread_cond_wait(&ctx.map_done_cv, &ctx.map_done_mtx);
+	pthread_mutex_unlock(&ctx.map_done_mtx);
+
+	if (ctx.map_prepare_err)
+		exit(1);
+
+	nr_rand_bytes = syscall(__NR_getrandom, ctx.skel->bss->rand_vals,
+				ctx.skel->rodata->nr_rand_bytes, 0);
+	if (nr_rand_bytes != ctx.skel->rodata->nr_rand_bytes) {
+		fprintf(stderr, "failed to get random bytes\n");
+		exit(1);
+	}
+}
+
+static void check_args(void)
+{
+	if (args.value_size < 8)  {
+		__u64 nr_unique_entries = 1ULL << (args.value_size * 8);
+
+		if (args.nr_entries > nr_unique_entries) {
+			fprintf(stderr,
+				"Not enough unique values for the nr_entries requested\n");
+			exit(1);
+		}
+	}
+}
+
+static struct bloom_filter_bench *setup_skeleton(void)
+{
+	struct bloom_filter_bench *skel;
+
+	check_args();
+
+	setup_libbpf();
+
+	skel = bloom_filter_bench__open();
+	if (!skel) {
+		fprintf(stderr, "failed to open skeleton\n");
+		exit(1);
+	}
+
+	skel->rodata->hashmap_use_bloom = ctx.hashmap_use_bloom;
+	skel->rodata->count_false_hits = ctx.count_false_hits;
+
+	/* Resize number of entries */
+	bpf_map__set_max_entries(skel->maps.hashmap, args.nr_entries);
+
+	bpf_map__set_max_entries(skel->maps.array_map, args.nr_entries);
+
+	bpf_map__set_max_entries(skel->maps.bloom_map, args.nr_entries);
+
+	/* Set value size */
+	bpf_map__set_value_size(skel->maps.array_map, args.value_size);
+
+	bpf_map__set_value_size(skel->maps.bloom_map, args.value_size);
+
+	bpf_map__set_value_size(skel->maps.hashmap, args.value_size);
+
+	/* For the hashmap, we use the value as the key as well */
+	bpf_map__set_key_size(skel->maps.hashmap, args.value_size);
+
+	skel->bss->value_size = args.value_size;
+
+	/* Set number of hash functions */
+	bpf_map__set_map_extra(skel->maps.bloom_map, args.nr_hash_funcs);
+
+	if (bloom_filter_bench__load(skel)) {
+		fprintf(stderr, "failed to load skeleton\n");
+		exit(1);
+	}
+
+	return skel;
+}
+
+static void bloom_lookup_setup(void)
+{
+	struct bpf_link *link;
+
+	ctx.use_array_map = true;
+
+	ctx.skel = setup_skeleton();
+
+	populate_maps();
+
+	link = bpf_program__attach(ctx.skel->progs.bloom_lookup);
+	if (!link) {
+		fprintf(stderr, "failed to attach program!\n");
+		exit(1);
+	}
+}
+
+static void bloom_update_setup(void)
+{
+	struct bpf_link *link;
+
+	ctx.use_array_map = true;
+
+	ctx.skel = setup_skeleton();
+
+	populate_maps();
+
+	link = bpf_program__attach(ctx.skel->progs.bloom_update);
+	if (!link) {
+		fprintf(stderr, "failed to attach program!\n");
+		exit(1);
+	}
+}
+
+static void false_positive_setup(void)
+{
+	struct bpf_link *link;
+
+	ctx.use_hashmap = true;
+	ctx.hashmap_use_bloom = true;
+	ctx.count_false_hits = true;
+
+	ctx.skel = setup_skeleton();
+
+	populate_maps();
+
+	link = bpf_program__attach(ctx.skel->progs.bloom_hashmap_lookup);
+	if (!link) {
+		fprintf(stderr, "failed to attach program!\n");
+		exit(1);
+	}
+}
+
+static void hashmap_with_bloom_setup(void)
+{
+	struct bpf_link *link;
+
+	ctx.use_hashmap = true;
+	ctx.hashmap_use_bloom = true;
+
+	ctx.skel = setup_skeleton();
+
+	populate_maps();
+
+	link = bpf_program__attach(ctx.skel->progs.bloom_hashmap_lookup);
+	if (!link) {
+		fprintf(stderr, "failed to attach program!\n");
+		exit(1);
+	}
+}
+
+static void hashmap_no_bloom_setup(void)
+{
+	struct bpf_link *link;
+
+	ctx.use_hashmap = true;
+
+	ctx.skel = setup_skeleton();
+
+	populate_maps();
+
+	link = bpf_program__attach(ctx.skel->progs.bloom_hashmap_lookup);
+	if (!link) {
+		fprintf(stderr, "failed to attach program!\n");
+		exit(1);
+	}
+}
+
+static void measure(struct bench_res *res)
+{
+	unsigned long total_hits = 0, total_drops = 0, total_false_hits = 0;
+	static unsigned long last_hits, last_drops, last_false_hits;
+	unsigned int nr_cpus = bpf_num_possible_cpus();
+	int hit_key, drop_key, false_hit_key;
+	int i;
+
+	hit_key = ctx.skel->rodata->hit_key;
+	drop_key = ctx.skel->rodata->drop_key;
+	false_hit_key = ctx.skel->rodata->false_hit_key;
+
+	if (ctx.skel->bss->error != 0) {
+		fprintf(stderr, "error (%d) when searching the bloom filter\n",
+			ctx.skel->bss->error);
+		exit(1);
+	}
+
+	for (i = 0; i < nr_cpus; i++) {
+		struct stat *s = (void *)&ctx.skel->bss->percpu_stats[i];
+
+		total_hits += s->stats[hit_key];
+		total_drops += s->stats[drop_key];
+		total_false_hits += s->stats[false_hit_key];
+	}
+
+	res->hits = total_hits - last_hits;
+	res->drops = total_drops - last_drops;
+	res->false_hits = total_false_hits - last_false_hits;
+
+	last_hits = total_hits;
+	last_drops = total_drops;
+	last_false_hits = total_false_hits;
+}
+
+static void *consumer(void *input)
+{
+	return NULL;
+}
+
+const struct bench bench_bloom_lookup = {
+	.name = "bloom-lookup",
+	.validate = validate,
+	.setup = bloom_lookup_setup,
+	.producer_thread = producer,
+	.consumer_thread = consumer,
+	.measure = measure,
+	.report_progress = hits_drops_report_progress,
+	.report_final = hits_drops_report_final,
+};
+
+const struct bench bench_bloom_update = {
+	.name = "bloom-update",
+	.validate = validate,
+	.setup = bloom_update_setup,
+	.producer_thread = producer,
+	.consumer_thread = consumer,
+	.measure = measure,
+	.report_progress = hits_drops_report_progress,
+	.report_final = hits_drops_report_final,
+};
+
+const struct bench bench_bloom_false_positive = {
+	.name = "bloom-false-positive",
+	.validate = validate,
+	.setup = false_positive_setup,
+	.producer_thread = producer,
+	.consumer_thread = consumer,
+	.measure = measure,
+	.report_progress = false_hits_report_progress,
+	.report_final = false_hits_report_final,
+};
+
+const struct bench bench_hashmap_without_bloom = {
+	.name = "hashmap-without-bloom",
+	.validate = validate,
+	.setup = hashmap_no_bloom_setup,
+	.producer_thread = producer,
+	.consumer_thread = consumer,
+	.measure = measure,
+	.report_progress = hits_drops_report_progress,
+	.report_final = hits_drops_report_final,
+};
+
+const struct bench bench_hashmap_with_bloom = {
+	.name = "hashmap-with-bloom",
+	.validate = validate,
+	.setup = hashmap_with_bloom_setup,
+	.producer_thread = producer,
+	.consumer_thread = consumer,
+	.measure = measure,
+	.report_progress = hits_drops_report_progress,
+	.report_final = hits_drops_report_final,
+};
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_bloom_filter_map.sh b/tools/testing/selftests/bpf/benchs/run_bench_bloom_filter_map.sh
new file mode 100755
index 000000000000..8ffd385ab2f4
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/run_bench_bloom_filter_map.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source ./benchs/run_common.sh
+
+set -eufo pipefail
+
+header "Bloom filter map"
+for v in 2 4 8 16 40; do
+for t in 1 4 8 12 16; do
+for h in {1..10}; do
+subtitle "value_size: $v bytes, # threads: $t, # hashes: $h"
+	for e in 10000 50000 75000 100000 250000 500000 750000 1000000 2500000 5000000; do
+		printf "%'d entries -\n" $e
+		printf "\t"
+		summarize "Lookups, total operations: " \
+			"$($RUN_BENCH -p $t --nr_hash_funcs $h --nr_entries $e --value_size $v bloom-lookup)"
+		printf "\t"
+		summarize "Updates, total operations: " \
+			"$($RUN_BENCH -p $t --nr_hash_funcs $h --nr_entries $e --value_size $v bloom-update)"
+		printf "\t"
+		summarize_percentage "False positive rate: " \
+			"$($RUN_BENCH -p $t --nr_hash_funcs $h --nr_entries $e --value_size $v bloom-false-positive)"
+	done
+	printf "\n"
+done
+done
+done
+
+header "Hashmap without bloom filter vs. hashmap with bloom filter (throughput, 8 threads)"
+for v in 2 4 8 16 40; do
+for h in {1..10}; do
+subtitle "value_size: $v, # hashes: $h"
+	for e in 10000 50000 75000 100000 250000 500000 750000 1000000 2500000 5000000; do
+		printf "%'d entries -\n" $e
+		printf "\t"
+		summarize_total "Hashmap without bloom filter: " \
+			"$($RUN_BENCH --nr_hash_funcs $h --nr_entries $e --value_size $v -p 8 hashmap-without-bloom)"
+		printf "\t"
+		summarize_total "Hashmap with bloom filter: " \
+			"$($RUN_BENCH --nr_hash_funcs $h --nr_entries $e --value_size $v -p 8 hashmap-with-bloom)"
+	done
+	printf "\n"
+done
+done
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh b/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh
index af4aa04caba6..ada028aa9007 100755
--- a/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh
+++ b/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh
@@ -1,34 +1,8 @@
 #!/bin/bash
 
-set -eufo pipefail
-
-RUN_BENCH="sudo ./bench -w3 -d10 -a"
-
-function hits()
-{
-	echo "$*" | sed -E "s/.*hits\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/"
-}
-
-function drops()
-{
-	echo "$*" | sed -E "s/.*drops\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/"
-}
+source ./benchs/run_common.sh
 
-function header()
-{
-	local len=${#1}
-
-	printf "\n%s\n" "$1"
-	for i in $(seq 1 $len); do printf '='; done
-	printf '\n'
-}
-
-function summarize()
-{
-	bench="$1"
-	summary=$(echo $2 | tail -n1)
-	printf "%-20s %s (drops %s)\n" "$bench" "$(hits $summary)" "$(drops $summary)"
-}
+set -eufo pipefail
 
 header "Single-producer, parallel producer"
 for b in rb-libbpf rb-custom pb-libbpf pb-custom; do
diff --git a/tools/testing/selftests/bpf/benchs/run_common.sh b/tools/testing/selftests/bpf/benchs/run_common.sh
new file mode 100644
index 000000000000..9a16be78b180
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/run_common.sh
@@ -0,0 +1,60 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+RUN_BENCH="sudo ./bench -w3 -d10 -a"
+
+function header()
+{
+	local len=${#1}
+
+	printf "\n%s\n" "$1"
+	for i in $(seq 1 $len); do printf '='; done
+	printf '\n'
+}
+
+function subtitle()
+{
+	local len=${#1}
+	printf "\t%s\n" "$1"
+}
+
+function hits()
+{
+	echo "$*" | sed -E "s/.*hits\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/"
+}
+
+function drops()
+{
+	echo "$*" | sed -E "s/.*drops\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/"
+}
+
+function percentage()
+{
+	echo "$*" | sed -E "s/.*Percentage\s=\s+([0-9]+\.[0-9]+).*/\1/"
+}
+
+function total()
+{
+	echo "$*" | sed -E "s/.*total operations\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/"
+}
+
+function summarize()
+{
+	bench="$1"
+	summary=$(echo $2 | tail -n1)
+	printf "%-20s %s (drops %s)\n" "$bench" "$(hits $summary)" "$(drops $summary)"
+}
+
+function summarize_percentage()
+{
+	bench="$1"
+	summary=$(echo $2 | tail -n1)
+	printf "%-20s %s%%\n" "$bench" "$(percentage $summary)"
+}
+
+function summarize_total()
+{
+	bench="$1"
+	summary=$(echo $2 | tail -n1)
+	printf "%-20s %s\n" "$bench" "$(total $summary)"
+}
diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h
index 89c6d58e5dd6..11ee801e75e7 100644
--- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h
+++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h
@@ -34,6 +34,21 @@ DECLARE_TRACE(bpf_testmod_test_write_bare,
 	TP_ARGS(task, ctx)
 );
 
+#undef BPF_TESTMOD_DECLARE_TRACE
+#ifdef DECLARE_TRACE_WRITABLE
+#define BPF_TESTMOD_DECLARE_TRACE(call, proto, args, size) \
+	DECLARE_TRACE_WRITABLE(call, PARAMS(proto), PARAMS(args), size)
+#else
+#define BPF_TESTMOD_DECLARE_TRACE(call, proto, args, size) \
+	DECLARE_TRACE(call, PARAMS(proto), PARAMS(args))
+#endif
+
+BPF_TESTMOD_DECLARE_TRACE(bpf_testmod_test_writable_bare,
+	TP_PROTO(struct bpf_testmod_test_writable_ctx *ctx),
+	TP_ARGS(ctx),
+	sizeof(struct bpf_testmod_test_writable_ctx)
+);
+
 #endif /* _BPF_TESTMOD_EVENTS_H */
 
 #undef TRACE_INCLUDE_PATH
diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
index 141d8da687d2..5d52ea2768df 100644
--- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
+++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
@@ -1,5 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2020 Facebook */
+#include <linux/btf.h>
+#include <linux/btf_ids.h>
 #include <linux/error-injection.h>
 #include <linux/init.h>
 #include <linux/module.h>
@@ -13,6 +15,24 @@
 
 DEFINE_PER_CPU(int, bpf_testmod_ksym_percpu) = 123;
 
+noinline void
+bpf_testmod_test_mod_kfunc(int i)
+{
+	*(int *)this_cpu_ptr(&bpf_testmod_ksym_percpu) = i;
+}
+
+noinline int bpf_testmod_loop_test(int n)
+{
+	int i, sum = 0;
+
+	/* the primary goal of this test is to test LBR. Create a lot of
+	 * branches in the function, so we can catch it easily.
+	 */
+	for (i = 0; i < n; i++)
+		sum += i;
+	return sum;
+}
+
 noinline ssize_t
 bpf_testmod_test_read(struct file *file, struct kobject *kobj,
 		      struct bin_attribute *bin_attr,
@@ -24,7 +44,21 @@ bpf_testmod_test_read(struct file *file, struct kobject *kobj,
 		.len = len,
 	};
 
-	trace_bpf_testmod_test_read(current, &ctx);
+	/* This is always true. Use the check to make sure the compiler
+	 * doesn't remove bpf_testmod_loop_test.
+	 */
+	if (bpf_testmod_loop_test(101) > 100)
+		trace_bpf_testmod_test_read(current, &ctx);
+
+	/* Magic number to enable writable tp */
+	if (len == 64) {
+		struct bpf_testmod_test_writable_ctx writable = {
+			.val = 1024,
+		};
+		trace_bpf_testmod_test_writable_bare(&writable);
+		if (writable.early_ret)
+			return snprintf(buf, len, "%d\n", writable.val);
+	}
 
 	return -EIO; /* always fail */
 }
@@ -55,13 +89,26 @@ static struct bin_attribute bin_attr_bpf_testmod_file __ro_after_init = {
 	.write = bpf_testmod_test_write,
 };
 
+BTF_SET_START(bpf_testmod_kfunc_ids)
+BTF_ID(func, bpf_testmod_test_mod_kfunc)
+BTF_SET_END(bpf_testmod_kfunc_ids)
+
+static DEFINE_KFUNC_BTF_ID_SET(&bpf_testmod_kfunc_ids, bpf_testmod_kfunc_btf_set);
+
 static int bpf_testmod_init(void)
 {
-	return sysfs_create_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file);
+	int ret;
+
+	ret = sysfs_create_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file);
+	if (ret)
+		return ret;
+	register_kfunc_btf_id_set(&prog_test_kfunc_list, &bpf_testmod_kfunc_btf_set);
+	return 0;
 }
 
 static void bpf_testmod_exit(void)
 {
+	unregister_kfunc_btf_id_set(&prog_test_kfunc_list, &bpf_testmod_kfunc_btf_set);
 	return sysfs_remove_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file);
 }
 
@@ -71,4 +118,3 @@ module_exit(bpf_testmod_exit);
 MODULE_AUTHOR("Andrii Nakryiko");
 MODULE_DESCRIPTION("BPF selftests module");
 MODULE_LICENSE("Dual BSD/GPL");
-
diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h
index b3892dc40111..0d71e2607832 100644
--- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h
+++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h
@@ -17,4 +17,9 @@ struct bpf_testmod_test_write_ctx {
 	size_t len;
 };
 
+struct bpf_testmod_test_writable_ctx {
+	bool early_ret;
+	int val;
+};
+
 #endif /* _BPF_TESTMOD_H */
diff --git a/tools/testing/selftests/bpf/btf_helpers.c b/tools/testing/selftests/bpf/btf_helpers.c
index b692e6ead9b5..b5b6b013a245 100644
--- a/tools/testing/selftests/bpf/btf_helpers.c
+++ b/tools/testing/selftests/bpf/btf_helpers.c
@@ -24,11 +24,12 @@ static const char * const btf_kind_str_mapping[] = {
 	[BTF_KIND_VAR]		= "VAR",
 	[BTF_KIND_DATASEC]	= "DATASEC",
 	[BTF_KIND_FLOAT]	= "FLOAT",
+	[BTF_KIND_DECL_TAG]	= "DECL_TAG",
 };
 
 static const char *btf_kind_str(__u16 kind)
 {
-	if (kind > BTF_KIND_DATASEC)
+	if (kind > BTF_KIND_DECL_TAG)
 		return "UNKNOWN";
 	return btf_kind_str_mapping[kind];
 }
@@ -177,6 +178,10 @@ int fprintf_btf_type_raw(FILE *out, const struct btf *btf, __u32 id)
 	case BTF_KIND_FLOAT:
 		fprintf(out, " size=%u", t->size);
 		break;
+	case BTF_KIND_DECL_TAG:
+		fprintf(out, " type_id=%u component_idx=%d",
+			t->type, btf_decl_tag(t)->component_idx);
+		break;
 	default:
 		break;
 	}
@@ -210,7 +215,7 @@ int btf_validate_raw(struct btf *btf, int nr_types, const char *exp_types[])
 	int i;
 	bool ok = true;
 
-	ASSERT_EQ(btf__get_nr_types(btf), nr_types, "btf_nr_types");
+	ASSERT_EQ(btf__type_cnt(btf) - 1, nr_types, "btf_nr_types");
 
 	for (i = 1; i <= nr_types; i++) {
 		if (!ASSERT_STREQ(btf_type_raw_dump(btf, i), exp_types[i - 1], "raw_dump"))
@@ -249,7 +254,7 @@ const char *btf_type_c_dump(const struct btf *btf)
 		return NULL;
 	}
 
-	for (i = 1; i <= btf__get_nr_types(btf); i++) {
+	for (i = 1; i < btf__type_cnt(btf); i++) {
 		err = btf_dump__dump_type(d, i);
 		if (err) {
 			fprintf(stderr, "Failed to dump type [%d]: %d\n", i, err);
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c
index f3daa44a8266..9d59c3990ca8 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.c
+++ b/tools/testing/selftests/bpf/cgroup_helpers.c
@@ -33,10 +33,9 @@
 #define CGROUP_MOUNT_DFLT		"/sys/fs/cgroup"
 #define NETCLS_MOUNT_PATH		CGROUP_MOUNT_DFLT "/net_cls"
 #define CGROUP_WORK_DIR			"/cgroup-test-work-dir"
-
 #define format_cgroup_path(buf, path) \
-	snprintf(buf, sizeof(buf), "%s%s%s", CGROUP_MOUNT_PATH, \
-		 CGROUP_WORK_DIR, path)
+	snprintf(buf, sizeof(buf), "%s%s%d%s", CGROUP_MOUNT_PATH, \
+	CGROUP_WORK_DIR, getpid(), path)
 
 #define format_classid_path(buf)				\
 	snprintf(buf, sizeof(buf), "%s%s", NETCLS_MOUNT_PATH,	\
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.h b/tools/testing/selftests/bpf/cgroup_helpers.h
index 629da3854b3e..fcc9cb91b211 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.h
+++ b/tools/testing/selftests/bpf/cgroup_helpers.h
@@ -26,4 +26,4 @@ int join_classid(void);
 int setup_classid_environment(void);
 void cleanup_classid_environment(void);
 
-#endif /* __CGROUP_HELPERS_H */
+#endif /* __CGROUP_HELPERS_H */
+\ No newline at end of file
diff --git a/tools/testing/selftests/bpf/flow_dissector_load.c b/tools/testing/selftests/bpf/flow_dissector_load.c
index 3fd83b9dc1bf..87fd1aa323a9 100644
--- a/tools/testing/selftests/bpf/flow_dissector_load.c
+++ b/tools/testing/selftests/bpf/flow_dissector_load.c
@@ -17,7 +17,7 @@
 const char *cfg_pin_path = "/sys/fs/bpf/flow_dissector";
 const char *cfg_map_name = "jmp_table";
 bool cfg_attach = true;
-char *cfg_section_name;
+char *cfg_prog_name;
 char *cfg_path_name;
 
 static void load_and_attach_program(void)
@@ -25,7 +25,11 @@ static void load_and_attach_program(void)
 	int prog_fd, ret;
 	struct bpf_object *obj;
 
-	ret = bpf_flow_load(&obj, cfg_path_name, cfg_section_name,
+	ret = libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+	if (ret)
+		error(1, 0, "failed to enable libbpf strict mode: %d", ret);
+
+	ret = bpf_flow_load(&obj, cfg_path_name, cfg_prog_name,
 			    cfg_map_name, NULL, &prog_fd, NULL);
 	if (ret)
 		error(1, 0, "bpf_flow_load %s", cfg_path_name);
@@ -75,15 +79,15 @@ static void parse_opts(int argc, char **argv)
 			break;
 		case 'p':
 			if (cfg_path_name)
-				error(1, 0, "only one prog name can be given");
+				error(1, 0, "only one path can be given");
 
 			cfg_path_name = optarg;
 			break;
 		case 's':
-			if (cfg_section_name)
-				error(1, 0, "only one section can be given");
+			if (cfg_prog_name)
+				error(1, 0, "only one prog can be given");
 
-			cfg_section_name = optarg;
+			cfg_prog_name = optarg;
 			break;
 		}
 	}
@@ -94,7 +98,7 @@ static void parse_opts(int argc, char **argv)
 	if (cfg_attach && !cfg_path_name)
 		error(1, 0, "must provide a path to the BPF program");
 
-	if (cfg_attach && !cfg_section_name)
+	if (cfg_attach && !cfg_prog_name)
 		error(1, 0, "must provide a section name");
 }
 
diff --git a/tools/testing/selftests/bpf/flow_dissector_load.h b/tools/testing/selftests/bpf/flow_dissector_load.h
index 7290401ec172..9d0acc2fc6cc 100644
--- a/tools/testing/selftests/bpf/flow_dissector_load.h
+++ b/tools/testing/selftests/bpf/flow_dissector_load.h
@@ -7,7 +7,7 @@
 
 static inline int bpf_flow_load(struct bpf_object **obj,
 				const char *path,
-				const char *section_name,
+				const char *prog_name,
 				const char *map_name,
 				const char *keys_map_name,
 				int *prog_fd,
@@ -23,13 +23,7 @@ static inline int bpf_flow_load(struct bpf_object **obj,
 	if (ret)
 		return ret;
 
-	main_prog = NULL;
-	bpf_object__for_each_program(prog, *obj) {
-		if (strcmp(section_name, bpf_program__section_name(prog)) == 0) {
-			main_prog = prog;
-			break;
-		}
-	}
+	main_prog = bpf_object__find_program_by_name(*obj, prog_name);
 	if (!main_prog)
 		return -1;
 
diff --git a/tools/testing/selftests/bpf/prog_tests/atomics.c b/tools/testing/selftests/bpf/prog_tests/atomics.c
index ba0e1efe5a45..0f9525293881 100644
--- a/tools/testing/selftests/bpf/prog_tests/atomics.c
+++ b/tools/testing/selftests/bpf/prog_tests/atomics.c
@@ -4,13 +4,13 @@
 
 #include "atomics.lskel.h"
 
-static void test_add(struct atomics *skel)
+static void test_add(struct atomics_lskel *skel)
 {
 	int err, prog_fd;
 	__u32 duration = 0, retval;
 	int link_fd;
 
-	link_fd = atomics__add__attach(skel);
+	link_fd = atomics_lskel__add__attach(skel);
 	if (!ASSERT_GT(link_fd, 0, "attach(add)"))
 		return;
 
@@ -36,13 +36,13 @@ cleanup:
 	close(link_fd);
 }
 
-static void test_sub(struct atomics *skel)
+static void test_sub(struct atomics_lskel *skel)
 {
 	int err, prog_fd;
 	__u32 duration = 0, retval;
 	int link_fd;
 
-	link_fd = atomics__sub__attach(skel);
+	link_fd = atomics_lskel__sub__attach(skel);
 	if (!ASSERT_GT(link_fd, 0, "attach(sub)"))
 		return;
 
@@ -69,13 +69,13 @@ cleanup:
 	close(link_fd);
 }
 
-static void test_and(struct atomics *skel)
+static void test_and(struct atomics_lskel *skel)
 {
 	int err, prog_fd;
 	__u32 duration = 0, retval;
 	int link_fd;
 
-	link_fd = atomics__and__attach(skel);
+	link_fd = atomics_lskel__and__attach(skel);
 	if (!ASSERT_GT(link_fd, 0, "attach(and)"))
 		return;
 
@@ -97,13 +97,13 @@ cleanup:
 	close(link_fd);
 }
 
-static void test_or(struct atomics *skel)
+static void test_or(struct atomics_lskel *skel)
 {
 	int err, prog_fd;
 	__u32 duration = 0, retval;
 	int link_fd;
 
-	link_fd = atomics__or__attach(skel);
+	link_fd = atomics_lskel__or__attach(skel);
 	if (!ASSERT_GT(link_fd, 0, "attach(or)"))
 		return;
 
@@ -126,13 +126,13 @@ cleanup:
 	close(link_fd);
 }
 
-static void test_xor(struct atomics *skel)
+static void test_xor(struct atomics_lskel *skel)
 {
 	int err, prog_fd;
 	__u32 duration = 0, retval;
 	int link_fd;
 
-	link_fd = atomics__xor__attach(skel);
+	link_fd = atomics_lskel__xor__attach(skel);
 	if (!ASSERT_GT(link_fd, 0, "attach(xor)"))
 		return;
 
@@ -154,13 +154,13 @@ cleanup:
 	close(link_fd);
 }
 
-static void test_cmpxchg(struct atomics *skel)
+static void test_cmpxchg(struct atomics_lskel *skel)
 {
 	int err, prog_fd;
 	__u32 duration = 0, retval;
 	int link_fd;
 
-	link_fd = atomics__cmpxchg__attach(skel);
+	link_fd = atomics_lskel__cmpxchg__attach(skel);
 	if (!ASSERT_GT(link_fd, 0, "attach(cmpxchg)"))
 		return;
 
@@ -183,13 +183,13 @@ cleanup:
 	close(link_fd);
 }
 
-static void test_xchg(struct atomics *skel)
+static void test_xchg(struct atomics_lskel *skel)
 {
 	int err, prog_fd;
 	__u32 duration = 0, retval;
 	int link_fd;
 
-	link_fd = atomics__xchg__attach(skel);
+	link_fd = atomics_lskel__xchg__attach(skel);
 	if (!ASSERT_GT(link_fd, 0, "attach(xchg)"))
 		return;
 
@@ -212,10 +212,10 @@ cleanup:
 
 void test_atomics(void)
 {
-	struct atomics *skel;
+	struct atomics_lskel *skel;
 	__u32 duration = 0;
 
-	skel = atomics__open_and_load();
+	skel = atomics_lskel__open_and_load();
 	if (CHECK(!skel, "skel_load", "atomics skeleton failed\n"))
 		return;
 
@@ -225,6 +225,7 @@ void test_atomics(void)
 		test__skip();
 		goto cleanup;
 	}
+	skel->bss->pid = getpid();
 
 	if (test__start_subtest("add"))
 		test_add(skel);
@@ -242,5 +243,5 @@ void test_atomics(void)
 		test_xchg(skel);
 
 cleanup:
-	atomics__destroy(skel);
+	atomics_lskel__destroy(skel);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
index bf307bb9e446..d0bd51eb23c8 100644
--- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c
+++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
@@ -5,6 +5,11 @@
 /* this is how USDT semaphore is actually defined, except volatile modifier */
 volatile unsigned short uprobe_ref_ctr __attribute__((unused)) __attribute((section(".probes")));
 
+/* attach point */
+static void method(void) {
+	return ;
+}
+
 void test_attach_probe(void)
 {
 	DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts);
@@ -14,12 +19,26 @@ void test_attach_probe(void)
 	struct test_attach_probe* skel;
 	size_t uprobe_offset;
 	ssize_t base_addr, ref_ctr_offset;
+	bool legacy;
+
+	/* Check if new-style kprobe/uprobe API is supported.
+	 * Kernels that support new FD-based kprobe and uprobe BPF attachment
+	 * through perf_event_open() syscall expose
+	 * /sys/bus/event_source/devices/kprobe/type and
+	 * /sys/bus/event_source/devices/uprobe/type files, respectively. They
+	 * contain magic numbers that are passed as "type" field of
+	 * perf_event_attr. Lack of such file in the system indicates legacy
+	 * kernel with old-style kprobe/uprobe attach interface through
+	 * creating per-probe event through tracefs. For such cases
+	 * ref_ctr_offset feature is not supported, so we don't test it.
+	 */
+	legacy = access("/sys/bus/event_source/devices/kprobe/type", F_OK) != 0;
 
 	base_addr = get_base_addr();
 	if (CHECK(base_addr < 0, "get_base_addr",
 		  "failed to find base addr: %zd", base_addr))
 		return;
-	uprobe_offset = get_uprobe_offset(&get_base_addr, base_addr);
+	uprobe_offset = get_uprobe_offset(&method, base_addr);
 
 	ref_ctr_offset = get_rel_offset((uintptr_t)&uprobe_ref_ctr);
 	if (!ASSERT_GE(ref_ctr_offset, 0, "ref_ctr_offset"))
@@ -45,10 +64,11 @@ void test_attach_probe(void)
 		goto cleanup;
 	skel->links.handle_kretprobe = kretprobe_link;
 
-	ASSERT_EQ(uprobe_ref_ctr, 0, "uprobe_ref_ctr_before");
+	if (!legacy)
+		ASSERT_EQ(uprobe_ref_ctr, 0, "uprobe_ref_ctr_before");
 
 	uprobe_opts.retprobe = false;
-	uprobe_opts.ref_ctr_offset = ref_ctr_offset;
+	uprobe_opts.ref_ctr_offset = legacy ? 0 : ref_ctr_offset;
 	uprobe_link = bpf_program__attach_uprobe_opts(skel->progs.handle_uprobe,
 						      0 /* self pid */,
 						      "/proc/self/exe",
@@ -58,11 +78,12 @@ void test_attach_probe(void)
 		goto cleanup;
 	skel->links.handle_uprobe = uprobe_link;
 
-	ASSERT_GT(uprobe_ref_ctr, 0, "uprobe_ref_ctr_after");
+	if (!legacy)
+		ASSERT_GT(uprobe_ref_ctr, 0, "uprobe_ref_ctr_after");
 
 	/* if uprobe uses ref_ctr, uretprobe has to use ref_ctr as well */
 	uprobe_opts.retprobe = true;
-	uprobe_opts.ref_ctr_offset = ref_ctr_offset;
+	uprobe_opts.ref_ctr_offset = legacy ? 0 : ref_ctr_offset;
 	uretprobe_link = bpf_program__attach_uprobe_opts(skel->progs.handle_uretprobe,
 							 -1 /* any pid */,
 							 "/proc/self/exe",
@@ -82,7 +103,7 @@ void test_attach_probe(void)
 		goto cleanup;
 
 	/* trigger & validate uprobe & uretprobe */
-	get_base_addr();
+	method();
 
 	if (CHECK(skel->bss->uprobe_res != 3, "check_uprobe_res",
 		  "wrong uprobe res: %d\n", skel->bss->uprobe_res))
diff --git a/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c b/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c
new file mode 100644
index 000000000000..be73e3de6668
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c
@@ -0,0 +1,211 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include <sys/syscall.h>
+#include <test_progs.h>
+#include "bloom_filter_map.skel.h"
+
+static void test_fail_cases(void)
+{
+	__u32 value;
+	int fd, err;
+
+	/* Invalid key size */
+	fd = bpf_create_map(BPF_MAP_TYPE_BLOOM_FILTER, 4, sizeof(value), 100, 0);
+	if (!ASSERT_LT(fd, 0, "bpf_create_map bloom filter invalid key size"))
+		close(fd);
+
+	/* Invalid value size */
+	fd = bpf_create_map(BPF_MAP_TYPE_BLOOM_FILTER, 0, 0, 100, 0);
+	if (!ASSERT_LT(fd, 0, "bpf_create_map bloom filter invalid value size 0"))
+		close(fd);
+
+	/* Invalid max entries size */
+	fd = bpf_create_map(BPF_MAP_TYPE_BLOOM_FILTER, 0, sizeof(value), 0, 0);
+	if (!ASSERT_LT(fd, 0, "bpf_create_map bloom filter invalid max entries size"))
+		close(fd);
+
+	/* Bloom filter maps do not support BPF_F_NO_PREALLOC */
+	fd = bpf_create_map(BPF_MAP_TYPE_BLOOM_FILTER, 0, sizeof(value), 100,
+			    BPF_F_NO_PREALLOC);
+	if (!ASSERT_LT(fd, 0, "bpf_create_map bloom filter invalid flags"))
+		close(fd);
+
+	fd = bpf_create_map(BPF_MAP_TYPE_BLOOM_FILTER, 0, sizeof(value), 100, 0);
+	if (!ASSERT_GE(fd, 0, "bpf_create_map bloom filter"))
+		return;
+
+	/* Test invalid flags */
+	err = bpf_map_update_elem(fd, NULL, &value, -1);
+	ASSERT_EQ(err, -EINVAL, "bpf_map_update_elem bloom filter invalid flags");
+
+	err = bpf_map_update_elem(fd, NULL, &value, BPF_EXIST);
+	ASSERT_EQ(err, -EINVAL, "bpf_map_update_elem bloom filter invalid flags");
+
+	err = bpf_map_update_elem(fd, NULL, &value, BPF_F_LOCK);
+	ASSERT_EQ(err, -EINVAL, "bpf_map_update_elem bloom filter invalid flags");
+
+	err = bpf_map_update_elem(fd, NULL, &value, BPF_NOEXIST);
+	ASSERT_EQ(err, -EINVAL, "bpf_map_update_elem bloom filter invalid flags");
+
+	err = bpf_map_update_elem(fd, NULL, &value, 10000);
+	ASSERT_EQ(err, -EINVAL, "bpf_map_update_elem bloom filter invalid flags");
+
+	close(fd);
+}
+
+static void test_success_cases(void)
+{
+	char value[11];
+	int fd, err;
+
+	/* Create a map */
+	fd = bpf_create_map(BPF_MAP_TYPE_BLOOM_FILTER, 0, sizeof(value), 100,
+			    BPF_F_ZERO_SEED | BPF_F_NUMA_NODE);
+	if (!ASSERT_GE(fd, 0, "bpf_create_map bloom filter success case"))
+		return;
+
+	/* Add a value to the bloom filter */
+	err = bpf_map_update_elem(fd, NULL, &value, 0);
+	if (!ASSERT_OK(err, "bpf_map_update_elem bloom filter success case"))
+		goto done;
+
+	 /* Lookup a value in the bloom filter */
+	err = bpf_map_lookup_elem(fd, NULL, &value);
+	ASSERT_OK(err, "bpf_map_update_elem bloom filter success case");
+
+done:
+	close(fd);
+}
+
+static void check_bloom(struct bloom_filter_map *skel)
+{
+	struct bpf_link *link;
+
+	link = bpf_program__attach(skel->progs.check_bloom);
+	if (!ASSERT_OK_PTR(link, "link"))
+		return;
+
+	syscall(SYS_getpgid);
+
+	ASSERT_EQ(skel->bss->error, 0, "error");
+
+	bpf_link__destroy(link);
+}
+
+static void test_inner_map(struct bloom_filter_map *skel, const __u32 *rand_vals,
+			   __u32 nr_rand_vals)
+{
+	int outer_map_fd, inner_map_fd, err, i, key = 0;
+	struct bpf_link *link;
+
+	/* Create a bloom filter map that will be used as the inner map */
+	inner_map_fd = bpf_create_map(BPF_MAP_TYPE_BLOOM_FILTER, 0, sizeof(*rand_vals),
+				      nr_rand_vals, 0);
+	if (!ASSERT_GE(inner_map_fd, 0, "bpf_create_map bloom filter inner map"))
+		return;
+
+	for (i = 0; i < nr_rand_vals; i++) {
+		err = bpf_map_update_elem(inner_map_fd, NULL, rand_vals + i, BPF_ANY);
+		if (!ASSERT_OK(err, "Add random value to inner_map_fd"))
+			goto done;
+	}
+
+	/* Add the bloom filter map to the outer map */
+	outer_map_fd = bpf_map__fd(skel->maps.outer_map);
+	err = bpf_map_update_elem(outer_map_fd, &key, &inner_map_fd, BPF_ANY);
+	if (!ASSERT_OK(err, "Add bloom filter map to outer map"))
+		goto done;
+
+	/* Attach the bloom_filter_inner_map prog */
+	link = bpf_program__attach(skel->progs.inner_map);
+	if (!ASSERT_OK_PTR(link, "link"))
+		goto delete_inner_map;
+
+	syscall(SYS_getpgid);
+
+	ASSERT_EQ(skel->bss->error, 0, "error");
+
+	bpf_link__destroy(link);
+
+delete_inner_map:
+	/* Ensure the inner bloom filter map can be deleted */
+	err = bpf_map_delete_elem(outer_map_fd, &key);
+	ASSERT_OK(err, "Delete inner bloom filter map");
+
+done:
+	close(inner_map_fd);
+}
+
+static int setup_progs(struct bloom_filter_map **out_skel, __u32 **out_rand_vals,
+		       __u32 *out_nr_rand_vals)
+{
+	struct bloom_filter_map *skel;
+	int random_data_fd, bloom_fd;
+	__u32 *rand_vals = NULL;
+	__u32 map_size, val;
+	int err, i;
+
+	/* Set up a bloom filter map skeleton */
+	skel = bloom_filter_map__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "bloom_filter_map__open_and_load"))
+		return -EINVAL;
+
+	/* Set up rand_vals */
+	map_size = bpf_map__max_entries(skel->maps.map_random_data);
+	rand_vals = malloc(sizeof(*rand_vals) * map_size);
+	if (!rand_vals) {
+		err = -ENOMEM;
+		goto error;
+	}
+
+	/* Generate random values and populate both skeletons */
+	random_data_fd = bpf_map__fd(skel->maps.map_random_data);
+	bloom_fd = bpf_map__fd(skel->maps.map_bloom);
+	for (i = 0; i < map_size; i++) {
+		val = rand();
+
+		err = bpf_map_update_elem(random_data_fd, &i, &val, BPF_ANY);
+		if (!ASSERT_OK(err, "Add random value to map_random_data"))
+			goto error;
+
+		err = bpf_map_update_elem(bloom_fd, NULL, &val, BPF_ANY);
+		if (!ASSERT_OK(err, "Add random value to map_bloom"))
+			goto error;
+
+		rand_vals[i] = val;
+	}
+
+	*out_skel = skel;
+	*out_rand_vals = rand_vals;
+	*out_nr_rand_vals = map_size;
+
+	return 0;
+
+error:
+	bloom_filter_map__destroy(skel);
+	if (rand_vals)
+		free(rand_vals);
+	return err;
+}
+
+void test_bloom_filter_map(void)
+{
+	__u32 *rand_vals, nr_rand_vals;
+	struct bloom_filter_map *skel;
+	int err;
+
+	test_fail_cases();
+	test_success_cases();
+
+	err = setup_progs(&skel, &rand_vals, &nr_rand_vals);
+	if (err)
+		return;
+
+	test_inner_map(skel, rand_vals, nr_rand_vals);
+	free(rand_vals);
+
+	check_bloom(skel);
+
+	bloom_filter_map__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index 77ac24b191d4..9454331aaf85 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -589,7 +589,7 @@ out:
 
 static void test_bpf_hash_map(void)
 {
-	__u32 expected_key_a = 0, expected_key_b = 0, expected_key_c = 0;
+	__u32 expected_key_a = 0, expected_key_b = 0;
 	DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
 	struct bpf_iter_bpf_hash_map *skel;
 	int err, i, len, map_fd, iter_fd;
@@ -638,7 +638,6 @@ static void test_bpf_hash_map(void)
 		val = i + 4;
 		expected_key_a += key.a;
 		expected_key_b += key.b;
-		expected_key_c += key.c;
 		expected_val += val;
 
 		err = bpf_map_update_elem(map_fd, &key, &val, BPF_ANY);
@@ -685,7 +684,7 @@ out:
 
 static void test_bpf_percpu_hash_map(void)
 {
-	__u32 expected_key_a = 0, expected_key_b = 0, expected_key_c = 0;
+	__u32 expected_key_a = 0, expected_key_b = 0;
 	DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
 	struct bpf_iter_bpf_percpu_hash_map *skel;
 	int err, i, j, len, map_fd, iter_fd;
@@ -722,7 +721,6 @@ static void test_bpf_percpu_hash_map(void)
 		key.c = i + 3;
 		expected_key_a += key.a;
 		expected_key_b += key.b;
-		expected_key_c += key.c;
 
 		for (j = 0; j < bpf_num_possible_cpus(); j++) {
 			*(__u32 *)(val + j * 8) = i + j;
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt.c
index 85babb0487b3..b52ff8ce34db 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt.c
@@ -179,7 +179,7 @@ done:
 	free_fds(est_fds, nr_est);
 }
 
-void test_bpf_iter_setsockopt(void)
+void serial_test_bpf_iter_setsockopt(void)
 {
 	struct bpf_iter_setsockopt *iter_skel = NULL;
 	struct bpf_cubic *cubic_skel = NULL;
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c
index 284d5921c345..eb8eeebe6935 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c
@@ -3,7 +3,7 @@
 
 #define nr_iters 2
 
-void test_bpf_obj_id(void)
+void serial_test_bpf_obj_id(void)
 {
 	const __u64 array_magic_value = 0xfaceb00c;
 	const __u32 array_key = 0;
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
index 3d002c245d2b..27f5d8ea7964 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
@@ -39,82 +39,171 @@ struct scale_test_def {
 	bool fails;
 };
 
-void test_bpf_verif_scale(void)
-{
-	struct scale_test_def tests[] = {
-		{ "loop3.o", BPF_PROG_TYPE_RAW_TRACEPOINT, true /* fails */ },
-
-		{ "test_verif_scale1.o", BPF_PROG_TYPE_SCHED_CLS },
-		{ "test_verif_scale2.o", BPF_PROG_TYPE_SCHED_CLS },
-		{ "test_verif_scale3.o", BPF_PROG_TYPE_SCHED_CLS },
-
-		{ "pyperf_global.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
-		{ "pyperf_subprogs.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
-
-		/* full unroll by llvm */
-		{ "pyperf50.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
-		{ "pyperf100.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
-		{ "pyperf180.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
-
-		/* partial unroll. llvm will unroll loop ~150 times.
-		 * C loop count -> 600.
-		 * Asm loop count -> 4.
-		 * 16k insns in loop body.
-		 * Total of 5 such loops. Total program size ~82k insns.
-		 */
-		{ "pyperf600.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
-
-		/* no unroll at all.
-		 * C loop count -> 600.
-		 * ASM loop count -> 600.
-		 * ~110 insns in loop body.
-		 * Total of 5 such loops. Total program size ~1500 insns.
-		 */
-		{ "pyperf600_nounroll.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
-
-		{ "loop1.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
-		{ "loop2.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
-		{ "loop4.o", BPF_PROG_TYPE_SCHED_CLS },
-		{ "loop5.o", BPF_PROG_TYPE_SCHED_CLS },
-		{ "loop6.o", BPF_PROG_TYPE_KPROBE },
-
-		/* partial unroll. 19k insn in a loop.
-		 * Total program size 20.8k insn.
-		 * ~350k processed_insns
-		 */
-		{ "strobemeta.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
-
-		/* no unroll, tiny loops */
-		{ "strobemeta_nounroll1.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
-		{ "strobemeta_nounroll2.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
-
-		/* non-inlined subprogs */
-		{ "strobemeta_subprogs.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
-
-		{ "test_sysctl_loop1.o", BPF_PROG_TYPE_CGROUP_SYSCTL },
-		{ "test_sysctl_loop2.o", BPF_PROG_TYPE_CGROUP_SYSCTL },
-
-		{ "test_xdp_loop.o", BPF_PROG_TYPE_XDP },
-		{ "test_seg6_loop.o", BPF_PROG_TYPE_LWT_SEG6LOCAL },
-	};
+static void scale_test(const char *file,
+		       enum bpf_prog_type attach_type,
+		       bool should_fail)
+{
 	libbpf_print_fn_t old_print_fn = NULL;
-	int err, i;
+	int err;
 
 	if (env.verifier_stats) {
 		test__force_log();
 		old_print_fn = libbpf_set_print(libbpf_debug_print);
 	}
 
-	for (i = 0; i < ARRAY_SIZE(tests); i++) {
-		const struct scale_test_def *test = &tests[i];
-
-		if (!test__start_subtest(test->file))
-			continue;
-
-		err = check_load(test->file, test->attach_type);
-		CHECK_FAIL(err && !test->fails);
-	}
+	err = check_load(file, attach_type);
+	if (should_fail)
+		ASSERT_ERR(err, "expect_error");
+	else
+		ASSERT_OK(err, "expect_success");
 
 	if (env.verifier_stats)
 		libbpf_set_print(old_print_fn);
 }
+
+void test_verif_scale1()
+{
+	scale_test("test_verif_scale1.o", BPF_PROG_TYPE_SCHED_CLS, false);
+}
+
+void test_verif_scale2()
+{
+	scale_test("test_verif_scale2.o", BPF_PROG_TYPE_SCHED_CLS, false);
+}
+
+void test_verif_scale3()
+{
+	scale_test("test_verif_scale3.o", BPF_PROG_TYPE_SCHED_CLS, false);
+}
+
+void test_verif_scale_pyperf_global()
+{
+	scale_test("pyperf_global.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
+}
+
+void test_verif_scale_pyperf_subprogs()
+{
+	scale_test("pyperf_subprogs.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
+}
+
+void test_verif_scale_pyperf50()
+{
+	/* full unroll by llvm */
+	scale_test("pyperf50.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
+}
+
+void test_verif_scale_pyperf100()
+{
+	/* full unroll by llvm */
+	scale_test("pyperf100.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
+}
+
+void test_verif_scale_pyperf180()
+{
+	/* full unroll by llvm */
+	scale_test("pyperf180.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
+}
+
+void test_verif_scale_pyperf600()
+{
+	/* partial unroll. llvm will unroll loop ~150 times.
+	 * C loop count -> 600.
+	 * Asm loop count -> 4.
+	 * 16k insns in loop body.
+	 * Total of 5 such loops. Total program size ~82k insns.
+	 */
+	scale_test("pyperf600.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
+}
+
+void test_verif_scale_pyperf600_nounroll()
+{
+	/* no unroll at all.
+	 * C loop count -> 600.
+	 * ASM loop count -> 600.
+	 * ~110 insns in loop body.
+	 * Total of 5 such loops. Total program size ~1500 insns.
+	 */
+	scale_test("pyperf600_nounroll.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
+}
+
+void test_verif_scale_loop1()
+{
+	scale_test("loop1.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
+}
+
+void test_verif_scale_loop2()
+{
+	scale_test("loop2.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
+}
+
+void test_verif_scale_loop3_fail()
+{
+	scale_test("loop3.o", BPF_PROG_TYPE_RAW_TRACEPOINT, true /* fails */);
+}
+
+void test_verif_scale_loop4()
+{
+	scale_test("loop4.o", BPF_PROG_TYPE_SCHED_CLS, false);
+}
+
+void test_verif_scale_loop5()
+{
+	scale_test("loop5.o", BPF_PROG_TYPE_SCHED_CLS, false);
+}
+
+void test_verif_scale_loop6()
+{
+	scale_test("loop6.o", BPF_PROG_TYPE_KPROBE, false);
+}
+
+void test_verif_scale_strobemeta()
+{
+	/* partial unroll. 19k insn in a loop.
+	 * Total program size 20.8k insn.
+	 * ~350k processed_insns
+	 */
+	scale_test("strobemeta.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
+}
+
+void test_verif_scale_strobemeta_nounroll1()
+{
+	/* no unroll, tiny loops */
+	scale_test("strobemeta_nounroll1.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
+}
+
+void test_verif_scale_strobemeta_nounroll2()
+{
+	/* no unroll, tiny loops */
+	scale_test("strobemeta_nounroll2.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
+}
+
+void test_verif_scale_strobemeta_subprogs()
+{
+	/* non-inlined subprogs */
+	scale_test("strobemeta_subprogs.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
+}
+
+void test_verif_scale_sysctl_loop1()
+{
+	scale_test("test_sysctl_loop1.o", BPF_PROG_TYPE_CGROUP_SYSCTL, false);
+}
+
+void test_verif_scale_sysctl_loop2()
+{
+	scale_test("test_sysctl_loop2.o", BPF_PROG_TYPE_CGROUP_SYSCTL, false);
+}
+
+void test_verif_scale_xdp_loop()
+{
+	scale_test("test_xdp_loop.o", BPF_PROG_TYPE_XDP, false);
+}
+
+void test_verif_scale_seg6_loop()
+{
+	scale_test("test_seg6_loop.o", BPF_PROG_TYPE_LWT_SEG6LOCAL, false);
+}
+
+void test_verif_twfw()
+{
+	scale_test("twfw.o", BPF_PROG_TYPE_CGROUP_SKB, false);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c
index 649f87382c8d..ac596cb06e40 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf.c
@@ -39,8 +39,8 @@ static bool always_log;
 #define BTF_END_RAW 0xdeadbeef
 #define NAME_TBD 0xdeadb33f
 
-#define NAME_NTH(N) (0xffff0000 | N)
-#define IS_NAME_NTH(X) ((X & 0xffff0000) == 0xffff0000)
+#define NAME_NTH(N) (0xfffe0000 | N)
+#define IS_NAME_NTH(X) ((X & 0xffff0000) == 0xfffe0000)
 #define GET_NAME_NTH_IDX(X) (X & 0x0000ffff)
 
 #define MAX_NR_RAW_U32 1024
@@ -3661,6 +3661,285 @@ static struct btf_raw_test raw_tests[] = {
 	.err_str = "Invalid type_size",
 },
 
+{
+	.descr = "decl_tag test #1, struct/member, well-formed",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		BTF_STRUCT_ENC(0, 2, 8),			/* [2] */
+		BTF_MEMBER_ENC(NAME_TBD, 1, 0),
+		BTF_MEMBER_ENC(NAME_TBD, 1, 32),
+		BTF_DECL_TAG_ENC(NAME_TBD, 2, -1),
+		BTF_DECL_TAG_ENC(NAME_TBD, 2, 0),
+		BTF_DECL_TAG_ENC(NAME_TBD, 2, 1),
+		BTF_END_RAW,
+	},
+	BTF_STR_SEC("\0m1\0m2\0tag1\0tag2\0tag3"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "tag_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = 8,
+	.key_type_id = 1,
+	.value_type_id = 2,
+	.max_entries = 1,
+},
+{
+	.descr = "decl_tag test #2, union/member, well-formed",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		BTF_UNION_ENC(NAME_TBD, 2, 4),			/* [2] */
+		BTF_MEMBER_ENC(NAME_TBD, 1, 0),
+		BTF_MEMBER_ENC(NAME_TBD, 1, 0),
+		BTF_DECL_TAG_ENC(NAME_TBD, 2, -1),
+		BTF_DECL_TAG_ENC(NAME_TBD, 2, 0),
+		BTF_DECL_TAG_ENC(NAME_TBD, 2, 1),
+		BTF_END_RAW,
+	},
+	BTF_STR_SEC("\0t\0m1\0m2\0tag1\0tag2\0tag3"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "tag_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = 4,
+	.key_type_id = 1,
+	.value_type_id = 2,
+	.max_entries = 1,
+},
+{
+	.descr = "decl_tag test #3, variable, well-formed",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		BTF_VAR_ENC(NAME_TBD, 1, 0),			/* [2] */
+		BTF_VAR_ENC(NAME_TBD, 1, 1),			/* [3] */
+		BTF_DECL_TAG_ENC(NAME_TBD, 2, -1),
+		BTF_DECL_TAG_ENC(NAME_TBD, 3, -1),
+		BTF_END_RAW,
+	},
+	BTF_STR_SEC("\0local\0global\0tag1\0tag2"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "tag_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = 4,
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 1,
+},
+{
+	.descr = "decl_tag test #4, func/parameter, well-formed",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		BTF_FUNC_PROTO_ENC(0, 2),			/* [2] */
+			BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1),
+			BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1),
+		BTF_FUNC_ENC(NAME_TBD, 2),			/* [3] */
+		BTF_DECL_TAG_ENC(NAME_TBD, 3, -1),
+		BTF_DECL_TAG_ENC(NAME_TBD, 3, 0),
+		BTF_DECL_TAG_ENC(NAME_TBD, 3, 1),
+		BTF_END_RAW,
+	},
+	BTF_STR_SEC("\0arg1\0arg2\0f\0tag1\0tag2\0tag3"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "tag_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = 4,
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 1,
+},
+{
+	.descr = "decl_tag test #5, invalid value",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		BTF_VAR_ENC(NAME_TBD, 1, 0),			/* [2] */
+		BTF_DECL_TAG_ENC(0, 2, -1),
+		BTF_END_RAW,
+	},
+	BTF_STR_SEC("\0local\0tag"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "tag_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = 4,
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 1,
+	.btf_load_err = true,
+	.err_str = "Invalid value",
+},
+{
+	.descr = "decl_tag test #6, invalid target type",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		BTF_DECL_TAG_ENC(NAME_TBD, 1, -1),
+		BTF_END_RAW,
+	},
+	BTF_STR_SEC("\0tag1"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "tag_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = 4,
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 1,
+	.btf_load_err = true,
+	.err_str = "Invalid type",
+},
+{
+	.descr = "decl_tag test #7, invalid vlen",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		BTF_VAR_ENC(NAME_TBD, 1, 0),			/* [2] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DECL_TAG, 0, 1), 2), (0),
+		BTF_END_RAW,
+	},
+	BTF_STR_SEC("\0local\0tag1"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "tag_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = 4,
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 1,
+	.btf_load_err = true,
+	.err_str = "vlen != 0",
+},
+{
+	.descr = "decl_tag test #8, invalid kflag",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		BTF_VAR_ENC(NAME_TBD, 1, 0),			/* [2] */
+		BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DECL_TAG, 1, 0), 2), (-1),
+		BTF_END_RAW,
+	},
+	BTF_STR_SEC("\0local\0tag1"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "tag_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = 4,
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 1,
+	.btf_load_err = true,
+	.err_str = "Invalid btf_info kind_flag",
+},
+{
+	.descr = "decl_tag test #9, var, invalid component_idx",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		BTF_VAR_ENC(NAME_TBD, 1, 0),			/* [2] */
+		BTF_DECL_TAG_ENC(NAME_TBD, 2, 0),
+		BTF_END_RAW,
+	},
+	BTF_STR_SEC("\0local\0tag"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "tag_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = 4,
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 1,
+	.btf_load_err = true,
+	.err_str = "Invalid component_idx",
+},
+{
+	.descr = "decl_tag test #10, struct member, invalid component_idx",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		BTF_STRUCT_ENC(0, 2, 8),			/* [2] */
+		BTF_MEMBER_ENC(NAME_TBD, 1, 0),
+		BTF_MEMBER_ENC(NAME_TBD, 1, 32),
+		BTF_DECL_TAG_ENC(NAME_TBD, 2, 2),
+		BTF_END_RAW,
+	},
+	BTF_STR_SEC("\0m1\0m2\0tag"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "tag_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = 8,
+	.key_type_id = 1,
+	.value_type_id = 2,
+	.max_entries = 1,
+	.btf_load_err = true,
+	.err_str = "Invalid component_idx",
+},
+{
+	.descr = "decl_tag test #11, func parameter, invalid component_idx",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		BTF_FUNC_PROTO_ENC(0, 2),			/* [2] */
+			BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1),
+			BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1),
+		BTF_FUNC_ENC(NAME_TBD, 2),			/* [3] */
+		BTF_DECL_TAG_ENC(NAME_TBD, 3, 2),
+		BTF_END_RAW,
+	},
+	BTF_STR_SEC("\0arg1\0arg2\0f\0tag"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "tag_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = 4,
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 1,
+	.btf_load_err = true,
+	.err_str = "Invalid component_idx",
+},
+{
+	.descr = "decl_tag test #12, < -1 component_idx",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		BTF_FUNC_PROTO_ENC(0, 2),			/* [2] */
+			BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1),
+			BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1),
+		BTF_FUNC_ENC(NAME_TBD, 2),			/* [3] */
+		BTF_DECL_TAG_ENC(NAME_TBD, 3, -2),
+		BTF_END_RAW,
+	},
+	BTF_STR_SEC("\0arg1\0arg2\0f\0tag"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "tag_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = 4,
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 1,
+	.btf_load_err = true,
+	.err_str = "Invalid component_idx",
+},
+{
+	.descr = "decl_tag test #13, typedef, well-formed",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		BTF_TYPEDEF_ENC(NAME_TBD, 1),			/* [2] */
+		BTF_DECL_TAG_ENC(NAME_TBD, 2, -1),
+		BTF_END_RAW,
+	},
+	BTF_STR_SEC("\0t\0tag"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "tag_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = 4,
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 1,
+},
+{
+	.descr = "decl_tag test #14, typedef, invalid component_idx",
+	.raw_types = {
+		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+		BTF_TYPEDEF_ENC(NAME_TBD, 1),			/* [2] */
+		BTF_DECL_TAG_ENC(NAME_TBD, 2, 0),
+		BTF_END_RAW,
+	},
+	BTF_STR_SEC("\0local\0tag"),
+	.map_type = BPF_MAP_TYPE_ARRAY,
+	.map_name = "tag_type_check_btf",
+	.key_size = sizeof(int),
+	.value_size = 4,
+	.key_type_id = 1,
+	.value_type_id = 1,
+	.max_entries = 1,
+	.btf_load_err = true,
+	.err_str = "Invalid component_idx",
+},
+
 }; /* struct btf_raw_test raw_tests[] */
 
 static const char *get_next_str(const char *start, const char *end)
@@ -4268,7 +4547,7 @@ static void do_test_file(unsigned int test_num)
 	if (CHECK(err, "obj: %d", err))
 		return;
 
-	prog = bpf_program__next(NULL, obj);
+	prog = bpf_object__next_program(obj, NULL);
 	if (CHECK(!prog, "Cannot find bpf_prog")) {
 		err = -1;
 		goto done;
@@ -6421,27 +6700,33 @@ const struct btf_dedup_test dedup_tests[] = {
 				BTF_MEMBER_ENC(NAME_NTH(4), 5, 64),	/* const int *a;	*/
 				BTF_MEMBER_ENC(NAME_NTH(5), 2, 128),	/* int b[16];		*/
 				BTF_MEMBER_ENC(NAME_NTH(6), 1, 640),	/* int c;		*/
-				BTF_MEMBER_ENC(NAME_NTH(8), 13, 672),	/* float d;		*/
+				BTF_MEMBER_ENC(NAME_NTH(8), 15, 672),	/* float d;		*/
 			/* ptr -> [3] struct s */
 			BTF_PTR_ENC(3),							/* [4] */
 			/* ptr -> [6] const int */
 			BTF_PTR_ENC(6),							/* [5] */
 			/* const -> [1] int */
 			BTF_CONST_ENC(1),						/* [6] */
+			/* tag -> [3] struct s */
+			BTF_DECL_TAG_ENC(NAME_NTH(2), 3, -1),				/* [7] */
+			/* tag -> [3] struct s, member 1 */
+			BTF_DECL_TAG_ENC(NAME_NTH(2), 3, 1),				/* [8] */
 
 			/* full copy of the above */
-			BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4),	/* [7] */
-			BTF_TYPE_ARRAY_ENC(7, 7, 16),					/* [8] */
-			BTF_STRUCT_ENC(NAME_NTH(2), 5, 88),				/* [9] */
-				BTF_MEMBER_ENC(NAME_NTH(3), 10, 0),
-				BTF_MEMBER_ENC(NAME_NTH(4), 11, 64),
-				BTF_MEMBER_ENC(NAME_NTH(5), 8, 128),
-				BTF_MEMBER_ENC(NAME_NTH(6), 7, 640),
-				BTF_MEMBER_ENC(NAME_NTH(8), 13, 672),
-			BTF_PTR_ENC(9),							/* [10] */
-			BTF_PTR_ENC(12),						/* [11] */
-			BTF_CONST_ENC(7),						/* [12] */
-			BTF_TYPE_FLOAT_ENC(NAME_NTH(7), 4),				/* [13] */
+			BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4),	/* [9] */
+			BTF_TYPE_ARRAY_ENC(9, 9, 16),					/* [10] */
+			BTF_STRUCT_ENC(NAME_NTH(2), 5, 88),				/* [11] */
+				BTF_MEMBER_ENC(NAME_NTH(3), 12, 0),
+				BTF_MEMBER_ENC(NAME_NTH(4), 13, 64),
+				BTF_MEMBER_ENC(NAME_NTH(5), 10, 128),
+				BTF_MEMBER_ENC(NAME_NTH(6), 9, 640),
+				BTF_MEMBER_ENC(NAME_NTH(8), 15, 672),
+			BTF_PTR_ENC(11),						/* [12] */
+			BTF_PTR_ENC(14),						/* [13] */
+			BTF_CONST_ENC(9),						/* [14] */
+			BTF_TYPE_FLOAT_ENC(NAME_NTH(7), 4),				/* [15] */
+			BTF_DECL_TAG_ENC(NAME_NTH(2), 11, -1),				/* [16] */
+			BTF_DECL_TAG_ENC(NAME_NTH(2), 11, 1),				/* [17] */
 			BTF_END_RAW,
 		},
 		BTF_STR_SEC("\0int\0s\0next\0a\0b\0c\0float\0d"),
@@ -6458,14 +6743,16 @@ const struct btf_dedup_test dedup_tests[] = {
 				BTF_MEMBER_ENC(NAME_NTH(1), 5, 64),	/* const int *a;	*/
 				BTF_MEMBER_ENC(NAME_NTH(2), 2, 128),	/* int b[16];		*/
 				BTF_MEMBER_ENC(NAME_NTH(3), 1, 640),	/* int c;		*/
-				BTF_MEMBER_ENC(NAME_NTH(4), 7, 672),	/* float d;		*/
+				BTF_MEMBER_ENC(NAME_NTH(4), 9, 672),	/* float d;		*/
 			/* ptr -> [3] struct s */
 			BTF_PTR_ENC(3),							/* [4] */
 			/* ptr -> [6] const int */
 			BTF_PTR_ENC(6),							/* [5] */
 			/* const -> [1] int */
 			BTF_CONST_ENC(1),						/* [6] */
-			BTF_TYPE_FLOAT_ENC(NAME_NTH(7), 4),				/* [7] */
+			BTF_DECL_TAG_ENC(NAME_NTH(2), 3, -1),				/* [7] */
+			BTF_DECL_TAG_ENC(NAME_NTH(2), 3, 1),				/* [8] */
+			BTF_TYPE_FLOAT_ENC(NAME_NTH(7), 4),				/* [9] */
 			BTF_END_RAW,
 		},
 		BTF_STR_SEC("\0a\0b\0c\0d\0int\0float\0next\0s"),
@@ -6590,9 +6877,12 @@ const struct btf_dedup_test dedup_tests[] = {
 				BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 8),
 			BTF_FUNC_ENC(NAME_TBD, 12),					/* [13] func */
 			BTF_TYPE_FLOAT_ENC(NAME_TBD, 2),				/* [14] float */
+			BTF_DECL_TAG_ENC(NAME_TBD, 13, -1),				/* [15] decl_tag */
+			BTF_DECL_TAG_ENC(NAME_TBD, 13, 1),				/* [16] decl_tag */
+			BTF_DECL_TAG_ENC(NAME_TBD, 7, -1),				/* [17] decl_tag */
 			BTF_END_RAW,
 		},
-		BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N"),
+		BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P\0Q"),
 	},
 	.expect = {
 		.raw_types = {
@@ -6616,9 +6906,12 @@ const struct btf_dedup_test dedup_tests[] = {
 				BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 8),
 			BTF_FUNC_ENC(NAME_TBD, 12),					/* [13] func */
 			BTF_TYPE_FLOAT_ENC(NAME_TBD, 2),				/* [14] float */
+			BTF_DECL_TAG_ENC(NAME_TBD, 13, -1),				/* [15] decl_tag */
+			BTF_DECL_TAG_ENC(NAME_TBD, 13, 1),				/* [16] decl_tag */
+			BTF_DECL_TAG_ENC(NAME_TBD, 7, -1),				/* [17] decl_tag */
 			BTF_END_RAW,
 		},
-		BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N"),
+		BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P\0Q"),
 	},
 	.opts = {
 		.dont_resolve_fwds = false,
@@ -6767,6 +7060,185 @@ const struct btf_dedup_test dedup_tests[] = {
 		.dedup_table_size = 1
 	},
 },
+{
+	.descr = "dedup: func/func_arg/var tags",
+	.input = {
+		.raw_types = {
+			/* int */
+			BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+			/* static int t */
+			BTF_VAR_ENC(NAME_NTH(1), 1, 0),			/* [2] */
+			/* void f(int a1, int a2) */
+			BTF_FUNC_PROTO_ENC(0, 2),			/* [3] */
+				BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(2), 1),
+				BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(3), 1),
+			BTF_FUNC_ENC(NAME_NTH(4), 2),			/* [4] */
+			/* tag -> t */
+			BTF_DECL_TAG_ENC(NAME_NTH(5), 2, -1),		/* [5] */
+			BTF_DECL_TAG_ENC(NAME_NTH(5), 2, -1),		/* [6] */
+			/* tag -> func */
+			BTF_DECL_TAG_ENC(NAME_NTH(5), 4, -1),		/* [7] */
+			BTF_DECL_TAG_ENC(NAME_NTH(5), 4, -1),		/* [8] */
+			/* tag -> func arg a1 */
+			BTF_DECL_TAG_ENC(NAME_NTH(5), 4, 1),		/* [9] */
+			BTF_DECL_TAG_ENC(NAME_NTH(5), 4, 1),		/* [10] */
+			BTF_END_RAW,
+		},
+		BTF_STR_SEC("\0t\0a1\0a2\0f\0tag"),
+	},
+	.expect = {
+		.raw_types = {
+			BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+			BTF_VAR_ENC(NAME_NTH(1), 1, 0),			/* [2] */
+			BTF_FUNC_PROTO_ENC(0, 2),			/* [3] */
+				BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(2), 1),
+				BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(3), 1),
+			BTF_FUNC_ENC(NAME_NTH(4), 2),			/* [4] */
+			BTF_DECL_TAG_ENC(NAME_NTH(5), 2, -1),		/* [5] */
+			BTF_DECL_TAG_ENC(NAME_NTH(5), 4, -1),		/* [6] */
+			BTF_DECL_TAG_ENC(NAME_NTH(5), 4, 1),		/* [7] */
+			BTF_END_RAW,
+		},
+		BTF_STR_SEC("\0t\0a1\0a2\0f\0tag"),
+	},
+	.opts = {
+		.dont_resolve_fwds = false,
+	},
+},
+{
+	.descr = "dedup: func/func_param tags",
+	.input = {
+		.raw_types = {
+			/* int */
+			BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+			/* void f(int a1, int a2) */
+			BTF_FUNC_PROTO_ENC(0, 2),			/* [2] */
+				BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(1), 1),
+				BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(2), 1),
+			BTF_FUNC_ENC(NAME_NTH(3), 2),			/* [3] */
+			/* void f(int a1, int a2) */
+			BTF_FUNC_PROTO_ENC(0, 2),			/* [4] */
+				BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(1), 1),
+				BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(2), 1),
+			BTF_FUNC_ENC(NAME_NTH(3), 4),			/* [5] */
+			/* tag -> f: tag1, tag2 */
+			BTF_DECL_TAG_ENC(NAME_NTH(4), 3, -1),		/* [6] */
+			BTF_DECL_TAG_ENC(NAME_NTH(5), 3, -1),		/* [7] */
+			/* tag -> f/a2: tag1, tag2 */
+			BTF_DECL_TAG_ENC(NAME_NTH(4), 3, 1),		/* [8] */
+			BTF_DECL_TAG_ENC(NAME_NTH(5), 3, 1),		/* [9] */
+			/* tag -> f: tag1, tag3 */
+			BTF_DECL_TAG_ENC(NAME_NTH(4), 5, -1),		/* [10] */
+			BTF_DECL_TAG_ENC(NAME_NTH(6), 5, -1),		/* [11] */
+			/* tag -> f/a2: tag1, tag3 */
+			BTF_DECL_TAG_ENC(NAME_NTH(4), 5, 1),		/* [12] */
+			BTF_DECL_TAG_ENC(NAME_NTH(6), 5, 1),		/* [13] */
+			BTF_END_RAW,
+		},
+		BTF_STR_SEC("\0a1\0a2\0f\0tag1\0tag2\0tag3"),
+	},
+	.expect = {
+		.raw_types = {
+			BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+			BTF_FUNC_PROTO_ENC(0, 2),			/* [2] */
+				BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(1), 1),
+				BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(2), 1),
+			BTF_FUNC_ENC(NAME_NTH(3), 2),			/* [3] */
+			BTF_DECL_TAG_ENC(NAME_NTH(4), 3, -1),		/* [4] */
+			BTF_DECL_TAG_ENC(NAME_NTH(5), 3, -1),		/* [5] */
+			BTF_DECL_TAG_ENC(NAME_NTH(6), 3, -1),		/* [6] */
+			BTF_DECL_TAG_ENC(NAME_NTH(4), 3, 1),		/* [7] */
+			BTF_DECL_TAG_ENC(NAME_NTH(5), 3, 1),		/* [8] */
+			BTF_DECL_TAG_ENC(NAME_NTH(6), 3, 1),		/* [9] */
+			BTF_END_RAW,
+		},
+		BTF_STR_SEC("\0a1\0a2\0f\0tag1\0tag2\0tag3"),
+	},
+	.opts = {
+		.dont_resolve_fwds = false,
+	},
+},
+{
+	.descr = "dedup: struct/struct_member tags",
+	.input = {
+		.raw_types = {
+			/* int */
+			BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+			BTF_STRUCT_ENC(NAME_NTH(1), 2, 8),		/* [2] */
+				BTF_MEMBER_ENC(NAME_NTH(2), 1, 0),
+				BTF_MEMBER_ENC(NAME_NTH(3), 1, 32),
+			BTF_STRUCT_ENC(NAME_NTH(1), 2, 8),		/* [3] */
+				BTF_MEMBER_ENC(NAME_NTH(2), 1, 0),
+				BTF_MEMBER_ENC(NAME_NTH(3), 1, 32),
+			/* tag -> t: tag1, tag2 */
+			BTF_DECL_TAG_ENC(NAME_NTH(4), 2, -1),		/* [4] */
+			BTF_DECL_TAG_ENC(NAME_NTH(5), 2, -1),		/* [5] */
+			/* tag -> t/m2: tag1, tag2 */
+			BTF_DECL_TAG_ENC(NAME_NTH(4), 2, 1),		/* [6] */
+			BTF_DECL_TAG_ENC(NAME_NTH(5), 2, 1),		/* [7] */
+			/* tag -> t: tag1, tag3 */
+			BTF_DECL_TAG_ENC(NAME_NTH(4), 3, -1),		/* [8] */
+			BTF_DECL_TAG_ENC(NAME_NTH(6), 3, -1),		/* [9] */
+			/* tag -> t/m2: tag1, tag3 */
+			BTF_DECL_TAG_ENC(NAME_NTH(4), 3, 1),		/* [10] */
+			BTF_DECL_TAG_ENC(NAME_NTH(6), 3, 1),		/* [11] */
+			BTF_END_RAW,
+		},
+		BTF_STR_SEC("\0t\0m1\0m2\0tag1\0tag2\0tag3"),
+	},
+	.expect = {
+		.raw_types = {
+			BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+			BTF_STRUCT_ENC(NAME_NTH(1), 2, 8),		/* [2] */
+				BTF_MEMBER_ENC(NAME_NTH(2), 1, 0),
+				BTF_MEMBER_ENC(NAME_NTH(3), 1, 32),
+			BTF_DECL_TAG_ENC(NAME_NTH(4), 2, -1),		/* [3] */
+			BTF_DECL_TAG_ENC(NAME_NTH(5), 2, -1),		/* [4] */
+			BTF_DECL_TAG_ENC(NAME_NTH(6), 2, -1),		/* [5] */
+			BTF_DECL_TAG_ENC(NAME_NTH(4), 2, 1),		/* [6] */
+			BTF_DECL_TAG_ENC(NAME_NTH(5), 2, 1),		/* [7] */
+			BTF_DECL_TAG_ENC(NAME_NTH(6), 2, 1),		/* [8] */
+			BTF_END_RAW,
+		},
+		BTF_STR_SEC("\0t\0m1\0m2\0tag1\0tag2\0tag3"),
+	},
+	.opts = {
+		.dont_resolve_fwds = false,
+	},
+},
+{
+	.descr = "dedup: typedef tags",
+	.input = {
+		.raw_types = {
+			/* int */
+			BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+			BTF_TYPEDEF_ENC(NAME_NTH(1), 1),		/* [2] */
+			BTF_TYPEDEF_ENC(NAME_NTH(1), 1),		/* [3] */
+			/* tag -> t: tag1, tag2 */
+			BTF_DECL_TAG_ENC(NAME_NTH(2), 2, -1),		/* [4] */
+			BTF_DECL_TAG_ENC(NAME_NTH(3), 2, -1),		/* [5] */
+			/* tag -> t: tag1, tag3 */
+			BTF_DECL_TAG_ENC(NAME_NTH(2), 3, -1),		/* [6] */
+			BTF_DECL_TAG_ENC(NAME_NTH(4), 3, -1),		/* [7] */
+			BTF_END_RAW,
+		},
+		BTF_STR_SEC("\0t\0tag1\0tag2\0tag3"),
+	},
+	.expect = {
+		.raw_types = {
+			BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),	/* [1] */
+			BTF_TYPEDEF_ENC(NAME_NTH(1), 1),		/* [2] */
+			BTF_DECL_TAG_ENC(NAME_NTH(2), 2, -1),		/* [3] */
+			BTF_DECL_TAG_ENC(NAME_NTH(3), 2, -1),		/* [4] */
+			BTF_DECL_TAG_ENC(NAME_NTH(4), 2, -1),		/* [5] */
+			BTF_END_RAW,
+		},
+		BTF_STR_SEC("\0t\0tag1\0tag2\0tag3"),
+	},
+	.opts = {
+		.dont_resolve_fwds = false,
+	},
+},
 
 };
 
@@ -6801,6 +7273,8 @@ static int btf_type_size(const struct btf_type *t)
 		return base_size + sizeof(struct btf_var);
 	case BTF_KIND_DATASEC:
 		return base_size + vlen * sizeof(struct btf_var_secinfo);
+	case BTF_KIND_DECL_TAG:
+		return base_size + sizeof(struct btf_decl_tag);
 	default:
 		fprintf(stderr, "Unsupported BTF_KIND:%u\n", kind);
 		return -EINVAL;
@@ -6871,8 +7345,8 @@ static void do_test_dedup(unsigned int test_num)
 		goto done;
 	}
 
-	test_btf_data = btf__get_raw_data(test_btf, &test_btf_size);
-	expect_btf_data = btf__get_raw_data(expect_btf, &expect_btf_size);
+	test_btf_data = btf__raw_data(test_btf, &test_btf_size);
+	expect_btf_data = btf__raw_data(expect_btf, &expect_btf_size);
 	if (CHECK(test_btf_size != expect_btf_size,
 		  "test_btf_size:%u != expect_btf_size:%u",
 		  test_btf_size, expect_btf_size)) {
@@ -6926,8 +7400,8 @@ static void do_test_dedup(unsigned int test_num)
 		expect_str_cur += expect_len + 1;
 	}
 
-	test_nr_types = btf__get_nr_types(test_btf);
-	expect_nr_types = btf__get_nr_types(expect_btf);
+	test_nr_types = btf__type_cnt(test_btf);
+	expect_nr_types = btf__type_cnt(expect_btf);
 	if (CHECK(test_nr_types != expect_nr_types,
 		  "test_nr_types:%u != expect_nr_types:%u",
 		  test_nr_types, expect_nr_types)) {
@@ -6935,7 +7409,7 @@ static void do_test_dedup(unsigned int test_num)
 		goto done;
 	}
 
-	for (i = 1; i <= test_nr_types; i++) {
+	for (i = 1; i < test_nr_types; i++) {
 		const struct btf_type *test_type, *expect_type;
 		int test_size, expect_size;
 
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
index 52ccf0cf35e1..aa76360d8f49 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
@@ -27,7 +27,7 @@ static struct btf_dump_test_case {
 static int btf_dump_all_types(const struct btf *btf,
 			      const struct btf_dump_opts *opts)
 {
-	size_t type_cnt = btf__get_nr_types(btf);
+	size_t type_cnt = btf__type_cnt(btf);
 	struct btf_dump *d;
 	int err = 0, id;
 
@@ -36,7 +36,7 @@ static int btf_dump_all_types(const struct btf *btf,
 	if (err)
 		return err;
 
-	for (id = 1; id <= type_cnt; id++) {
+	for (id = 1; id < type_cnt; id++) {
 		err = btf_dump__dump_type(d, id);
 		if (err)
 			goto done;
@@ -133,7 +133,7 @@ static char *dump_buf;
 static size_t dump_buf_sz;
 static FILE *dump_buf_file;
 
-void test_btf_dump_incremental(void)
+static void test_btf_dump_incremental(void)
 {
 	struct btf *btf = NULL;
 	struct btf_dump *d = NULL;
@@ -171,7 +171,7 @@ void test_btf_dump_incremental(void)
 	err = btf__add_field(btf, "x", 2, 0, 0);
 	ASSERT_OK(err, "field_ok");
 
-	for (i = 1; i <= btf__get_nr_types(btf); i++) {
+	for (i = 1; i < btf__type_cnt(btf); i++) {
 		err = btf_dump__dump_type(d, i);
 		ASSERT_OK(err, "dump_type_ok");
 	}
@@ -210,7 +210,7 @@ void test_btf_dump_incremental(void)
 	err = btf__add_field(btf, "s", 3, 32, 0);
 	ASSERT_OK(err, "field_ok");
 
-	for (i = 1; i <= btf__get_nr_types(btf); i++) {
+	for (i = 1; i < btf__type_cnt(btf); i++) {
 		err = btf_dump__dump_type(d, i);
 		ASSERT_OK(err, "dump_type_ok");
 	}
@@ -358,12 +358,27 @@ static void test_btf_dump_int_data(struct btf *btf, struct btf_dump *d,
 	TEST_BTF_DUMP_DATA_OVER(btf, d, NULL, str, int, sizeof(int)-1, "", 1);
 
 #ifdef __SIZEOF_INT128__
-	TEST_BTF_DUMP_DATA(btf, d, NULL, str, __int128, BTF_F_COMPACT,
-			   "(__int128)0xffffffffffffffff",
-			   0xffffffffffffffff);
-	ASSERT_OK(btf_dump_data(btf, d, "__int128", NULL, 0, &i, 16, str,
-				"(__int128)0xfffffffffffffffffffffffffffffffe"),
-		  "dump __int128");
+	/* gcc encode unsigned __int128 type with name "__int128 unsigned" in dwarf,
+	 * and clang encode it with name "unsigned __int128" in dwarf.
+	 * Do an availability test for either variant before doing actual test.
+	 */
+	if (btf__find_by_name(btf, "unsigned __int128") > 0) {
+		TEST_BTF_DUMP_DATA(btf, d, NULL, str, unsigned __int128, BTF_F_COMPACT,
+				   "(unsigned __int128)0xffffffffffffffff",
+				   0xffffffffffffffff);
+		ASSERT_OK(btf_dump_data(btf, d, "unsigned __int128", NULL, 0, &i, 16, str,
+					"(unsigned __int128)0xfffffffffffffffffffffffffffffffe"),
+			  "dump unsigned __int128");
+	} else if (btf__find_by_name(btf, "__int128 unsigned") > 0) {
+		TEST_BTF_DUMP_DATA(btf, d, NULL, str, __int128 unsigned, BTF_F_COMPACT,
+				   "(__int128 unsigned)0xffffffffffffffff",
+				   0xffffffffffffffff);
+		ASSERT_OK(btf_dump_data(btf, d, "__int128 unsigned", NULL, 0, &i, 16, str,
+					"(__int128 unsigned)0xfffffffffffffffffffffffffffffffe"),
+			  "dump unsigned __int128");
+	} else {
+		ASSERT_TRUE(false, "unsigned_int128_not_found");
+	}
 #endif
 }
 
@@ -763,8 +778,10 @@ static void test_btf_dump_struct_data(struct btf *btf, struct btf_dump *d,
 static void test_btf_dump_var_data(struct btf *btf, struct btf_dump *d,
 				   char *str)
 {
+#if defined(__i386__) || defined(__x86_64__) || defined(__aarch64__)
 	TEST_BTF_DUMP_VAR(btf, d, NULL, str, "cpu_number", int, BTF_F_COMPACT,
 			  "int cpu_number = (int)100", 100);
+#endif
 	TEST_BTF_DUMP_VAR(btf, d, NULL, str, "cpu_profile_flip", int, BTF_F_COMPACT,
 			  "static int cpu_profile_flip = (int)2", 2);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_endian.c b/tools/testing/selftests/bpf/prog_tests/btf_endian.c
index 8ab5d3e358dd..8afbf3d0b89a 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_endian.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_endian.c
@@ -7,12 +7,12 @@
 #include <bpf/btf.h>
 
 void test_btf_endian() {
-#if __BYTE_ORDER == __LITTLE_ENDIAN
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
 	enum btf_endianness endian = BTF_LITTLE_ENDIAN;
-#elif __BYTE_ORDER == __BIG_ENDIAN
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
 	enum btf_endianness endian = BTF_BIG_ENDIAN;
 #else
-#error "Unrecognized __BYTE_ORDER"
+#error "Unrecognized __BYTE_ORDER__"
 #endif
 	enum btf_endianness swap_endian = 1 - endian;
 	struct btf *btf = NULL, *swap_btf = NULL;
@@ -32,7 +32,7 @@ void test_btf_endian() {
 	ASSERT_EQ(btf__endianness(btf), swap_endian, "endian");
 
 	/* Get raw BTF data in non-native endianness... */
-	raw_data = btf__get_raw_data(btf, &raw_sz);
+	raw_data = btf__raw_data(btf, &raw_sz);
 	if (!ASSERT_OK_PTR(raw_data, "raw_data_inverted"))
 		goto err_out;
 
@@ -42,9 +42,9 @@ void test_btf_endian() {
 		goto err_out;
 
 	ASSERT_EQ(btf__endianness(swap_btf), swap_endian, "endian");
-	ASSERT_EQ(btf__get_nr_types(swap_btf), btf__get_nr_types(btf), "nr_types");
+	ASSERT_EQ(btf__type_cnt(swap_btf), btf__type_cnt(btf), "nr_types");
 
-	swap_raw_data = btf__get_raw_data(swap_btf, &swap_raw_sz);
+	swap_raw_data = btf__raw_data(swap_btf, &swap_raw_sz);
 	if (!ASSERT_OK_PTR(swap_raw_data, "swap_raw_data"))
 		goto err_out;
 
@@ -58,7 +58,7 @@ void test_btf_endian() {
 
 	/* swap it back to native endianness */
 	btf__set_endianness(swap_btf, endian);
-	swap_raw_data = btf__get_raw_data(swap_btf, &swap_raw_sz);
+	swap_raw_data = btf__raw_data(swap_btf, &swap_raw_sz);
 	if (!ASSERT_OK_PTR(swap_raw_data, "swap_raw_data"))
 		goto err_out;
 
@@ -75,7 +75,7 @@ void test_btf_endian() {
 	swap_btf = NULL;
 
 	btf__set_endianness(btf, swap_endian);
-	raw_data = btf__get_raw_data(btf, &raw_sz);
+	raw_data = btf__raw_data(btf, &raw_sz);
 	if (!ASSERT_OK_PTR(raw_data, "raw_data_inverted"))
 		goto err_out;
 
@@ -85,7 +85,7 @@ void test_btf_endian() {
 		goto err_out;
 
 	ASSERT_EQ(btf__endianness(swap_btf), swap_endian, "endian");
-	ASSERT_EQ(btf__get_nr_types(swap_btf), btf__get_nr_types(btf), "nr_types");
+	ASSERT_EQ(btf__type_cnt(swap_btf), btf__type_cnt(btf), "nr_types");
 
 	/* the type should appear as if it was stored in native endianness */
 	t = btf__type_by_id(swap_btf, var_id);
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_split.c b/tools/testing/selftests/bpf/prog_tests/btf_split.c
index ca7c2a91610a..b1ffe61f2aa9 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_split.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_split.c
@@ -72,7 +72,7 @@ void test_btf_split() {
 	d = btf_dump__new(btf2, NULL, &opts, btf_dump_printf);
 	if (!ASSERT_OK_PTR(d, "btf_dump__new"))
 		goto cleanup;
-	for (i = 1; i <= btf__get_nr_types(btf2); i++) {
+	for (i = 1; i < btf__type_cnt(btf2); i++) {
 		err = btf_dump__dump_type(d, i);
 		ASSERT_OK(err, "dump_type_ok");
 	}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_tag.c b/tools/testing/selftests/bpf/prog_tests/btf_tag.c
new file mode 100644
index 000000000000..91821f42714d
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/btf_tag.c
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <test_progs.h>
+#include "tag.skel.h"
+
+void test_btf_tag(void)
+{
+	struct tag *skel;
+
+	skel = tag__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "btf_tag"))
+		return;
+
+	if (skel->rodata->skip_tests) {
+		printf("%s:SKIP: btf_tag attribute not supported", __func__);
+		test__skip();
+	}
+
+	tag__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_write.c b/tools/testing/selftests/bpf/prog_tests/btf_write.c
index 022c7d89d6f4..b912eeb0b6b4 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_write.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_write.c
@@ -4,19 +4,15 @@
 #include <bpf/btf.h>
 #include "btf_helpers.h"
 
-void test_btf_write() {
+static void gen_btf(struct btf *btf)
+{
 	const struct btf_var_secinfo *vi;
 	const struct btf_type *t;
 	const struct btf_member *m;
 	const struct btf_enum *v;
 	const struct btf_param *p;
-	struct btf *btf;
 	int id, err, str_off;
 
-	btf = btf__new_empty();
-	if (!ASSERT_OK_PTR(btf, "new_empty"))
-		return;
-
 	str_off = btf__find_str(btf, "int");
 	ASSERT_EQ(str_off, -ENOENT, "int_str_missing_off");
 
@@ -281,5 +277,159 @@ void test_btf_write() {
 		     "[17] DATASEC 'datasec1' size=12 vlen=1\n"
 		     "\ttype_id=1 offset=4 size=8", "raw_dump");
 
+	/* DECL_TAG */
+	id = btf__add_decl_tag(btf, "tag1", 16, -1);
+	ASSERT_EQ(id, 18, "tag_id");
+	t = btf__type_by_id(btf, 18);
+	ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "tag1", "tag_value");
+	ASSERT_EQ(btf_kind(t), BTF_KIND_DECL_TAG, "tag_kind");
+	ASSERT_EQ(t->type, 16, "tag_type");
+	ASSERT_EQ(btf_decl_tag(t)->component_idx, -1, "tag_component_idx");
+	ASSERT_STREQ(btf_type_raw_dump(btf, 18),
+		     "[18] DECL_TAG 'tag1' type_id=16 component_idx=-1", "raw_dump");
+
+	id = btf__add_decl_tag(btf, "tag2", 14, 1);
+	ASSERT_EQ(id, 19, "tag_id");
+	t = btf__type_by_id(btf, 19);
+	ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "tag2", "tag_value");
+	ASSERT_EQ(btf_kind(t), BTF_KIND_DECL_TAG, "tag_kind");
+	ASSERT_EQ(t->type, 14, "tag_type");
+	ASSERT_EQ(btf_decl_tag(t)->component_idx, 1, "tag_component_idx");
+	ASSERT_STREQ(btf_type_raw_dump(btf, 19),
+		     "[19] DECL_TAG 'tag2' type_id=14 component_idx=1", "raw_dump");
+}
+
+static void test_btf_add()
+{
+	struct btf *btf;
+
+	btf = btf__new_empty();
+	if (!ASSERT_OK_PTR(btf, "new_empty"))
+		return;
+
+	gen_btf(btf);
+
+	VALIDATE_RAW_BTF(
+		btf,
+		"[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+		"[2] PTR '(anon)' type_id=1",
+		"[3] CONST '(anon)' type_id=5",
+		"[4] VOLATILE '(anon)' type_id=3",
+		"[5] RESTRICT '(anon)' type_id=4",
+		"[6] ARRAY '(anon)' type_id=2 index_type_id=1 nr_elems=10",
+		"[7] STRUCT 's1' size=8 vlen=2\n"
+		"\t'f1' type_id=1 bits_offset=0\n"
+		"\t'f2' type_id=1 bits_offset=32 bitfield_size=16",
+		"[8] UNION 'u1' size=8 vlen=1\n"
+		"\t'f1' type_id=1 bits_offset=0 bitfield_size=16",
+		"[9] ENUM 'e1' size=4 vlen=2\n"
+		"\t'v1' val=1\n"
+		"\t'v2' val=2",
+		"[10] FWD 'struct_fwd' fwd_kind=struct",
+		"[11] FWD 'union_fwd' fwd_kind=union",
+		"[12] ENUM 'enum_fwd' size=4 vlen=0",
+		"[13] TYPEDEF 'typedef1' type_id=1",
+		"[14] FUNC 'func1' type_id=15 linkage=global",
+		"[15] FUNC_PROTO '(anon)' ret_type_id=1 vlen=2\n"
+		"\t'p1' type_id=1\n"
+		"\t'p2' type_id=2",
+		"[16] VAR 'var1' type_id=1, linkage=global-alloc",
+		"[17] DATASEC 'datasec1' size=12 vlen=1\n"
+		"\ttype_id=1 offset=4 size=8",
+		"[18] DECL_TAG 'tag1' type_id=16 component_idx=-1",
+		"[19] DECL_TAG 'tag2' type_id=14 component_idx=1");
+
 	btf__free(btf);
 }
+
+static void test_btf_add_btf()
+{
+	struct btf *btf1 = NULL, *btf2 = NULL;
+	int id;
+
+	btf1 = btf__new_empty();
+	if (!ASSERT_OK_PTR(btf1, "btf1"))
+		return;
+
+	btf2 = btf__new_empty();
+	if (!ASSERT_OK_PTR(btf2, "btf2"))
+		goto cleanup;
+
+	gen_btf(btf1);
+	gen_btf(btf2);
+
+	id = btf__add_btf(btf1, btf2);
+	if (!ASSERT_EQ(id, 20, "id"))
+		goto cleanup;
+
+	VALIDATE_RAW_BTF(
+		btf1,
+		"[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+		"[2] PTR '(anon)' type_id=1",
+		"[3] CONST '(anon)' type_id=5",
+		"[4] VOLATILE '(anon)' type_id=3",
+		"[5] RESTRICT '(anon)' type_id=4",
+		"[6] ARRAY '(anon)' type_id=2 index_type_id=1 nr_elems=10",
+		"[7] STRUCT 's1' size=8 vlen=2\n"
+		"\t'f1' type_id=1 bits_offset=0\n"
+		"\t'f2' type_id=1 bits_offset=32 bitfield_size=16",
+		"[8] UNION 'u1' size=8 vlen=1\n"
+		"\t'f1' type_id=1 bits_offset=0 bitfield_size=16",
+		"[9] ENUM 'e1' size=4 vlen=2\n"
+		"\t'v1' val=1\n"
+		"\t'v2' val=2",
+		"[10] FWD 'struct_fwd' fwd_kind=struct",
+		"[11] FWD 'union_fwd' fwd_kind=union",
+		"[12] ENUM 'enum_fwd' size=4 vlen=0",
+		"[13] TYPEDEF 'typedef1' type_id=1",
+		"[14] FUNC 'func1' type_id=15 linkage=global",
+		"[15] FUNC_PROTO '(anon)' ret_type_id=1 vlen=2\n"
+		"\t'p1' type_id=1\n"
+		"\t'p2' type_id=2",
+		"[16] VAR 'var1' type_id=1, linkage=global-alloc",
+		"[17] DATASEC 'datasec1' size=12 vlen=1\n"
+		"\ttype_id=1 offset=4 size=8",
+		"[18] DECL_TAG 'tag1' type_id=16 component_idx=-1",
+		"[19] DECL_TAG 'tag2' type_id=14 component_idx=1",
+
+		/* types appended from the second BTF */
+		"[20] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+		"[21] PTR '(anon)' type_id=20",
+		"[22] CONST '(anon)' type_id=24",
+		"[23] VOLATILE '(anon)' type_id=22",
+		"[24] RESTRICT '(anon)' type_id=23",
+		"[25] ARRAY '(anon)' type_id=21 index_type_id=20 nr_elems=10",
+		"[26] STRUCT 's1' size=8 vlen=2\n"
+		"\t'f1' type_id=20 bits_offset=0\n"
+		"\t'f2' type_id=20 bits_offset=32 bitfield_size=16",
+		"[27] UNION 'u1' size=8 vlen=1\n"
+		"\t'f1' type_id=20 bits_offset=0 bitfield_size=16",
+		"[28] ENUM 'e1' size=4 vlen=2\n"
+		"\t'v1' val=1\n"
+		"\t'v2' val=2",
+		"[29] FWD 'struct_fwd' fwd_kind=struct",
+		"[30] FWD 'union_fwd' fwd_kind=union",
+		"[31] ENUM 'enum_fwd' size=4 vlen=0",
+		"[32] TYPEDEF 'typedef1' type_id=20",
+		"[33] FUNC 'func1' type_id=34 linkage=global",
+		"[34] FUNC_PROTO '(anon)' ret_type_id=20 vlen=2\n"
+		"\t'p1' type_id=20\n"
+		"\t'p2' type_id=21",
+		"[35] VAR 'var1' type_id=20, linkage=global-alloc",
+		"[36] DATASEC 'datasec1' size=12 vlen=1\n"
+		"\ttype_id=20 offset=4 size=8",
+		"[37] DECL_TAG 'tag1' type_id=35 component_idx=-1",
+		"[38] DECL_TAG 'tag2' type_id=33 component_idx=1");
+
+cleanup:
+	btf__free(btf1);
+	btf__free(btf2);
+}
+
+void test_btf_write()
+{
+	if (test__start_subtest("btf_add"))
+		test_btf_add();
+	if (test__start_subtest("btf_add_btf"))
+		test_btf_add_btf();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c b/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c
index 876be0ecb654..621c57222191 100644
--- a/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c
+++ b/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c
@@ -363,7 +363,7 @@ close_bpf_object:
 	cg_storage_multi_shared__destroy(obj);
 }
 
-void test_cg_storage_multi(void)
+void serial_test_cg_storage_multi(void)
 {
 	int parent_cgroup_fd = -1, child_cgroup_fd = -1;
 
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c
index 70e94e783070..5de485c7370f 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c
@@ -21,7 +21,7 @@ static int prog_load(void)
 			       bpf_log_buf, BPF_LOG_BUF_SIZE);
 }
 
-void test_cgroup_attach_autodetach(void)
+void serial_test_cgroup_attach_autodetach(void)
 {
 	__u32 duration = 0, prog_cnt = 4, attach_flags;
 	int allow_prog[2] = {-1};
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c
index 20bb8831dda6..731bea84d8ed 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c
@@ -74,7 +74,7 @@ static int prog_load_cnt(int verdict, int val)
 	return ret;
 }
 
-void test_cgroup_attach_multi(void)
+void serial_test_cgroup_attach_multi(void)
 {
 	__u32 prog_ids[4], prog_cnt = 0, attach_flags, saved_prog_id;
 	int cg1 = 0, cg2 = 0, cg3 = 0, cg4 = 0, cg5 = 0, key = 0;
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c
index 9e96f8d87fea..10d3c33821a7 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c
@@ -23,7 +23,7 @@ static int prog_load(int verdict)
 			       bpf_log_buf, BPF_LOG_BUF_SIZE);
 }
 
-void test_cgroup_attach_override(void)
+void serial_test_cgroup_attach_override(void)
 {
 	int drop_prog = -1, allow_prog = -1, foo = -1, bar = -1;
 	__u32 duration = 0;
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_link.c b/tools/testing/selftests/bpf/prog_tests/cgroup_link.c
index 9091524131d6..9e6e6aad347c 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_link.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_link.c
@@ -24,7 +24,7 @@ int ping_and_check(int exp_calls, int exp_alt_calls)
 	return 0;
 }
 
-void test_cgroup_link(void)
+void serial_test_cgroup_link(void)
 {
 	struct {
 		const char *path;
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c b/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c
index ab3b9bc5e6d1..9026b42914d3 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c
@@ -46,7 +46,7 @@ void test_cgroup_v1v2(void)
 {
 	struct network_helper_opts opts = {};
 	int server_fd, client_fd, cgroup_fd;
-	static const int port = 60123;
+	static const int port = 60120;
 
 	/* Step 1: Check base connectivity works without any BPF. */
 	server_fd = start_server(AF_INET, SOCK_STREAM, NULL, port, 0);
diff --git a/tools/testing/selftests/bpf/prog_tests/check_mtu.c b/tools/testing/selftests/bpf/prog_tests/check_mtu.c
index 012068f33a0a..f73e6e36b74d 100644
--- a/tools/testing/selftests/bpf/prog_tests/check_mtu.c
+++ b/tools/testing/selftests/bpf/prog_tests/check_mtu.c
@@ -195,7 +195,7 @@ cleanup:
 	test_check_mtu__destroy(skel);
 }
 
-void test_check_mtu(void)
+void serial_test_check_mtu(void)
 {
 	__u32 mtu_lo;
 
diff --git a/tools/testing/selftests/bpf/prog_tests/core_autosize.c b/tools/testing/selftests/bpf/prog_tests/core_autosize.c
index 3d4b2a358d47..1dfe14ff6aa4 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_autosize.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_autosize.c
@@ -112,7 +112,7 @@ void test_core_autosize(void)
 	if (!ASSERT_OK_PTR(f, "btf_fdopen"))
 		goto cleanup;
 
-	raw_data = btf__get_raw_data(btf, &raw_sz);
+	raw_data = btf__raw_data(btf, &raw_sz);
 	if (!ASSERT_OK_PTR(raw_data, "raw_data"))
 		goto cleanup;
 	written = fwrite(raw_data, 1, raw_sz, f);
@@ -163,7 +163,7 @@ void test_core_autosize(void)
 
 	usleep(1);
 
-	bss_map = bpf_object__find_map_by_name(skel->obj, "test_cor.bss");
+	bss_map = bpf_object__find_map_by_name(skel->obj, ".bss");
 	if (!ASSERT_OK_PTR(bss_map, "bss_map_find"))
 		goto cleanup;
 
diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
index 4739b15b2a97..55ec85ba7375 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
@@ -30,7 +30,7 @@ static int duration = 0;
 	.output_len = sizeof(struct core_reloc_module_output),		\
 	.prog_sec_name = sec_name,					\
 	.raw_tp_name = tp_name,						\
-	.trigger = trigger_module_test_read,				\
+	.trigger = __trigger_module_test_read,				\
 	.needs_testmod = true,						\
 }
 
@@ -249,8 +249,7 @@ static int duration = 0;
 #define SIZE_CASE_COMMON(name)						\
 	.case_name = #name,						\
 	.bpf_obj_file = "test_core_reloc_size.o",			\
-	.btf_src_file = "btf__core_reloc_" #name ".o",			\
-	.relaxed_core_relocs = true
+	.btf_src_file = "btf__core_reloc_" #name ".o"
 
 #define SIZE_OUTPUT_DATA(type)						\
 	STRUCT_TO_CHAR_PTR(core_reloc_size_output) {			\
@@ -382,7 +381,7 @@ static int setup_type_id_case_local(struct core_reloc_test_case *test)
 	exp->local_anon_void_ptr = -1;
 	exp->local_anon_arr = -1;
 
-	for (i = 1; i <= btf__get_nr_types(local_btf); i++)
+	for (i = 1; i < btf__type_cnt(local_btf); i++)
 	{
 		t = btf__type_by_id(local_btf, i);
 		/* we are interested only in anonymous types */
@@ -475,19 +474,11 @@ static int setup_type_id_case_failure(struct core_reloc_test_case *test)
 	return 0;
 }
 
-static int trigger_module_test_read(const struct core_reloc_test_case *test)
+static int __trigger_module_test_read(const struct core_reloc_test_case *test)
 {
 	struct core_reloc_module_output *exp = (void *)test->output;
-	int fd, err;
-
-	fd = open("/sys/kernel/bpf_testmod", O_RDONLY);
-	err = -errno;
-	if (CHECK(fd < 0, "testmod_file_open", "failed: %d\n", err))
-		return err;
-
-	read(fd, NULL, exp->len); /* request expected number of bytes */
-	close(fd);
 
+	trigger_module_test_read(exp->len);
 	return 0;
 }
 
@@ -876,7 +867,7 @@ void test_core_reloc(void)
 			goto cleanup;
 		}
 
-		data_map = bpf_object__find_map_by_name(obj, "test_cor.bss");
+		data_map = bpf_object__find_map_by_name(obj, ".bss");
 		if (CHECK(!data_map, "find_data_map", "data map not found\n"))
 			goto cleanup;
 
diff --git a/tools/testing/selftests/bpf/prog_tests/dummy_st_ops.c b/tools/testing/selftests/bpf/prog_tests/dummy_st_ops.c
new file mode 100644
index 000000000000..cbaa44ffb8c6
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/dummy_st_ops.c
@@ -0,0 +1,115 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2021. Huawei Technologies Co., Ltd */
+#include <test_progs.h>
+#include "dummy_st_ops.skel.h"
+
+/* Need to keep consistent with definition in include/linux/bpf.h */
+struct bpf_dummy_ops_state {
+	int val;
+};
+
+static void test_dummy_st_ops_attach(void)
+{
+	struct dummy_st_ops *skel;
+	struct bpf_link *link;
+
+	skel = dummy_st_ops__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "dummy_st_ops_load"))
+		return;
+
+	link = bpf_map__attach_struct_ops(skel->maps.dummy_1);
+	ASSERT_EQ(libbpf_get_error(link), -EOPNOTSUPP, "dummy_st_ops_attach");
+
+	dummy_st_ops__destroy(skel);
+}
+
+static void test_dummy_init_ret_value(void)
+{
+	__u64 args[1] = {0};
+	struct bpf_prog_test_run_attr attr = {
+		.ctx_size_in = sizeof(args),
+		.ctx_in = args,
+	};
+	struct dummy_st_ops *skel;
+	int fd, err;
+
+	skel = dummy_st_ops__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "dummy_st_ops_load"))
+		return;
+
+	fd = bpf_program__fd(skel->progs.test_1);
+	attr.prog_fd = fd;
+	err = bpf_prog_test_run_xattr(&attr);
+	ASSERT_OK(err, "test_run");
+	ASSERT_EQ(attr.retval, 0xf2f3f4f5, "test_ret");
+
+	dummy_st_ops__destroy(skel);
+}
+
+static void test_dummy_init_ptr_arg(void)
+{
+	int exp_retval = 0xbeef;
+	struct bpf_dummy_ops_state in_state = {
+		.val = exp_retval,
+	};
+	__u64 args[1] = {(unsigned long)&in_state};
+	struct bpf_prog_test_run_attr attr = {
+		.ctx_size_in = sizeof(args),
+		.ctx_in = args,
+	};
+	struct dummy_st_ops *skel;
+	int fd, err;
+
+	skel = dummy_st_ops__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "dummy_st_ops_load"))
+		return;
+
+	fd = bpf_program__fd(skel->progs.test_1);
+	attr.prog_fd = fd;
+	err = bpf_prog_test_run_xattr(&attr);
+	ASSERT_OK(err, "test_run");
+	ASSERT_EQ(in_state.val, 0x5a, "test_ptr_ret");
+	ASSERT_EQ(attr.retval, exp_retval, "test_ret");
+
+	dummy_st_ops__destroy(skel);
+}
+
+static void test_dummy_multiple_args(void)
+{
+	__u64 args[5] = {0, -100, 0x8a5f, 'c', 0x1234567887654321ULL};
+	struct bpf_prog_test_run_attr attr = {
+		.ctx_size_in = sizeof(args),
+		.ctx_in = args,
+	};
+	struct dummy_st_ops *skel;
+	int fd, err;
+	size_t i;
+	char name[8];
+
+	skel = dummy_st_ops__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "dummy_st_ops_load"))
+		return;
+
+	fd = bpf_program__fd(skel->progs.test_2);
+	attr.prog_fd = fd;
+	err = bpf_prog_test_run_xattr(&attr);
+	ASSERT_OK(err, "test_run");
+	for (i = 0; i < ARRAY_SIZE(args); i++) {
+		snprintf(name, sizeof(name), "arg %zu", i);
+		ASSERT_EQ(skel->bss->test_2_args[i], args[i], name);
+	}
+
+	dummy_st_ops__destroy(skel);
+}
+
+void test_dummy_st_ops(void)
+{
+	if (test__start_subtest("dummy_st_ops_attach"))
+		test_dummy_st_ops_attach();
+	if (test__start_subtest("dummy_init_ret_value"))
+		test_dummy_init_ret_value();
+	if (test__start_subtest("dummy_init_ptr_arg"))
+		test_dummy_init_ptr_arg();
+	if (test__start_subtest("dummy_multiple_args"))
+		test_dummy_multiple_args();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c
index 91154c2ba256..4374ac8a8a91 100644
--- a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c
+++ b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c
@@ -6,23 +6,23 @@
 
 void test_fentry_fexit(void)
 {
-	struct fentry_test *fentry_skel = NULL;
-	struct fexit_test *fexit_skel = NULL;
+	struct fentry_test_lskel *fentry_skel = NULL;
+	struct fexit_test_lskel *fexit_skel = NULL;
 	__u64 *fentry_res, *fexit_res;
 	__u32 duration = 0, retval;
 	int err, prog_fd, i;
 
-	fentry_skel = fentry_test__open_and_load();
+	fentry_skel = fentry_test_lskel__open_and_load();
 	if (CHECK(!fentry_skel, "fentry_skel_load", "fentry skeleton failed\n"))
 		goto close_prog;
-	fexit_skel = fexit_test__open_and_load();
+	fexit_skel = fexit_test_lskel__open_and_load();
 	if (CHECK(!fexit_skel, "fexit_skel_load", "fexit skeleton failed\n"))
 		goto close_prog;
 
-	err = fentry_test__attach(fentry_skel);
+	err = fentry_test_lskel__attach(fentry_skel);
 	if (CHECK(err, "fentry_attach", "fentry attach failed: %d\n", err))
 		goto close_prog;
-	err = fexit_test__attach(fexit_skel);
+	err = fexit_test_lskel__attach(fexit_skel);
 	if (CHECK(err, "fexit_attach", "fexit attach failed: %d\n", err))
 		goto close_prog;
 
@@ -44,6 +44,6 @@ void test_fentry_fexit(void)
 	}
 
 close_prog:
-	fentry_test__destroy(fentry_skel);
-	fexit_test__destroy(fexit_skel);
+	fentry_test_lskel__destroy(fentry_skel);
+	fexit_test_lskel__destroy(fexit_skel);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_test.c b/tools/testing/selftests/bpf/prog_tests/fentry_test.c
index 174c89e7456e..12921b3850d2 100644
--- a/tools/testing/selftests/bpf/prog_tests/fentry_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/fentry_test.c
@@ -3,19 +3,19 @@
 #include <test_progs.h>
 #include "fentry_test.lskel.h"
 
-static int fentry_test(struct fentry_test *fentry_skel)
+static int fentry_test(struct fentry_test_lskel *fentry_skel)
 {
 	int err, prog_fd, i;
 	__u32 duration = 0, retval;
 	int link_fd;
 	__u64 *result;
 
-	err = fentry_test__attach(fentry_skel);
+	err = fentry_test_lskel__attach(fentry_skel);
 	if (!ASSERT_OK(err, "fentry_attach"))
 		return err;
 
 	/* Check that already linked program can't be attached again. */
-	link_fd = fentry_test__test1__attach(fentry_skel);
+	link_fd = fentry_test_lskel__test1__attach(fentry_skel);
 	if (!ASSERT_LT(link_fd, 0, "fentry_attach_link"))
 		return -1;
 
@@ -31,7 +31,7 @@ static int fentry_test(struct fentry_test *fentry_skel)
 			return -1;
 	}
 
-	fentry_test__detach(fentry_skel);
+	fentry_test_lskel__detach(fentry_skel);
 
 	/* zero results for re-attach test */
 	memset(fentry_skel->bss, 0, sizeof(*fentry_skel->bss));
@@ -40,10 +40,10 @@ static int fentry_test(struct fentry_test *fentry_skel)
 
 void test_fentry_test(void)
 {
-	struct fentry_test *fentry_skel = NULL;
+	struct fentry_test_lskel *fentry_skel = NULL;
 	int err;
 
-	fentry_skel = fentry_test__open_and_load();
+	fentry_skel = fentry_test_lskel__open_and_load();
 	if (!ASSERT_OK_PTR(fentry_skel, "fentry_skel_load"))
 		goto cleanup;
 
@@ -55,5 +55,5 @@ void test_fentry_test(void)
 	ASSERT_OK(err, "fentry_second_attach");
 
 cleanup:
-	fentry_test__destroy(fentry_skel);
+	fentry_test_lskel__destroy(fentry_skel);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
index 73b4c76e6b86..9cff14a23bb7 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
@@ -60,7 +60,7 @@ static void test_fexit_bpf2bpf_common(const char *obj_file,
 	struct bpf_object *obj = NULL, *tgt_obj;
 	__u32 retval, tgt_prog_id, info_len;
 	struct bpf_prog_info prog_info = {};
-	struct bpf_program **prog = NULL;
+	struct bpf_program **prog = NULL, *p;
 	struct bpf_link **link = NULL;
 	int err, tgt_fd, i;
 	struct btf *btf;
@@ -69,9 +69,6 @@ static void test_fexit_bpf2bpf_common(const char *obj_file,
 			    &tgt_obj, &tgt_fd);
 	if (!ASSERT_OK(err, "tgt_prog_load"))
 		return;
-	DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
-			    .attach_prog_fd = tgt_fd,
-			   );
 
 	info_len = sizeof(prog_info);
 	err = bpf_obj_get_info_by_fd(tgt_fd, &prog_info, &info_len);
@@ -89,10 +86,15 @@ static void test_fexit_bpf2bpf_common(const char *obj_file,
 	if (!ASSERT_OK_PTR(prog, "prog_ptr"))
 		goto close_prog;
 
-	obj = bpf_object__open_file(obj_file, &opts);
+	obj = bpf_object__open_file(obj_file, NULL);
 	if (!ASSERT_OK_PTR(obj, "obj_open"))
 		goto close_prog;
 
+	bpf_object__for_each_program(p, obj) {
+		err = bpf_program__set_attach_target(p, tgt_fd, NULL);
+		ASSERT_OK(err, "set_attach_target");
+	}
+
 	err = bpf_object__load(obj);
 	if (!ASSERT_OK(err, "obj_load"))
 		goto close_prog;
@@ -270,7 +272,7 @@ static void test_fmod_ret_freplace(void)
 	struct bpf_link *freplace_link = NULL;
 	struct bpf_program *prog;
 	__u32 duration = 0;
-	int err, pkt_fd;
+	int err, pkt_fd, attach_prog_fd;
 
 	err = bpf_prog_load(tgt_name, BPF_PROG_TYPE_UNSPEC,
 			    &pkt_obj, &pkt_fd);
@@ -278,26 +280,32 @@ static void test_fmod_ret_freplace(void)
 	if (CHECK(err, "tgt_prog_load", "file %s err %d errno %d\n",
 		  tgt_name, err, errno))
 		return;
-	opts.attach_prog_fd = pkt_fd;
 
-	freplace_obj = bpf_object__open_file(freplace_name, &opts);
+	freplace_obj = bpf_object__open_file(freplace_name, NULL);
 	if (!ASSERT_OK_PTR(freplace_obj, "freplace_obj_open"))
 		goto out;
 
+	prog = bpf_object__next_program(freplace_obj, NULL);
+	err = bpf_program__set_attach_target(prog, pkt_fd, NULL);
+	ASSERT_OK(err, "freplace__set_attach_target");
+
 	err = bpf_object__load(freplace_obj);
 	if (CHECK(err, "freplace_obj_load", "err %d\n", err))
 		goto out;
 
-	prog = bpf_program__next(NULL, freplace_obj);
 	freplace_link = bpf_program__attach_trace(prog);
 	if (!ASSERT_OK_PTR(freplace_link, "freplace_attach_trace"))
 		goto out;
 
-	opts.attach_prog_fd = bpf_program__fd(prog);
-	fmod_obj = bpf_object__open_file(fmod_ret_name, &opts);
+	fmod_obj = bpf_object__open_file(fmod_ret_name, NULL);
 	if (!ASSERT_OK_PTR(fmod_obj, "fmod_obj_open"))
 		goto out;
 
+	attach_prog_fd = bpf_program__fd(prog);
+	prog = bpf_object__next_program(fmod_obj, NULL);
+	err = bpf_program__set_attach_target(prog, attach_prog_fd, NULL);
+	ASSERT_OK(err, "fmod_ret_set_attach_target");
+
 	err = bpf_object__load(fmod_obj);
 	if (CHECK(!err, "fmod_obj_load", "loading fmod_ret should fail\n"))
 		goto out;
@@ -322,14 +330,14 @@ static void test_func_sockmap_update(void)
 }
 
 static void test_obj_load_failure_common(const char *obj_file,
-					  const char *target_obj_file)
-
+					 const char *target_obj_file)
 {
 	/*
 	 * standalone test that asserts failure to load freplace prog
 	 * because of invalid return code.
 	 */
 	struct bpf_object *obj = NULL, *pkt_obj;
+	struct bpf_program *prog;
 	int err, pkt_fd;
 	__u32 duration = 0;
 
@@ -339,14 +347,15 @@ static void test_obj_load_failure_common(const char *obj_file,
 	if (CHECK(err, "tgt_prog_load", "file %s err %d errno %d\n",
 		  target_obj_file, err, errno))
 		return;
-	DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
-			    .attach_prog_fd = pkt_fd,
-			   );
 
-	obj = bpf_object__open_file(obj_file, &opts);
+	obj = bpf_object__open_file(obj_file, NULL);
 	if (!ASSERT_OK_PTR(obj, "obj_open"))
 		goto close_prog;
 
+	prog = bpf_object__next_program(obj, NULL);
+	err = bpf_program__set_attach_target(prog, pkt_fd, NULL);
+	ASSERT_OK(err, "set_attach_target");
+
 	/* It should fail to load the program */
 	err = bpf_object__load(obj);
 	if (CHECK(!err, "bpf_obj_load should fail", "err %d\n", err))
@@ -371,7 +380,8 @@ static void test_func_map_prog_compatibility(void)
 				     "./test_attach_probe.o");
 }
 
-void test_fexit_bpf2bpf(void)
+/* NOTE: affect other tests, must run in serial mode */
+void serial_test_fexit_bpf2bpf(void)
 {
 	if (test__start_subtest("target_no_callees"))
 		test_target_no_callees();
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c b/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c
index 4e7f4b42ea29..f949647dbbc2 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c
@@ -10,7 +10,7 @@
 
 static int do_sleep(void *skel)
 {
-	struct fexit_sleep *fexit_skel = skel;
+	struct fexit_sleep_lskel *fexit_skel = skel;
 	struct timespec ts1 = { .tv_nsec = 1 };
 	struct timespec ts2 = { .tv_sec = 10 };
 
@@ -25,16 +25,16 @@ static char child_stack[STACK_SIZE];
 
 void test_fexit_sleep(void)
 {
-	struct fexit_sleep *fexit_skel = NULL;
+	struct fexit_sleep_lskel *fexit_skel = NULL;
 	int wstatus, duration = 0;
 	pid_t cpid;
 	int err, fexit_cnt;
 
-	fexit_skel = fexit_sleep__open_and_load();
+	fexit_skel = fexit_sleep_lskel__open_and_load();
 	if (CHECK(!fexit_skel, "fexit_skel_load", "fexit skeleton failed\n"))
 		goto cleanup;
 
-	err = fexit_sleep__attach(fexit_skel);
+	err = fexit_sleep_lskel__attach(fexit_skel);
 	if (CHECK(err, "fexit_attach", "fexit attach failed: %d\n", err))
 		goto cleanup;
 
@@ -60,7 +60,7 @@ void test_fexit_sleep(void)
 	 */
 	close(fexit_skel->progs.nanosleep_fentry.prog_fd);
 	close(fexit_skel->progs.nanosleep_fexit.prog_fd);
-	fexit_sleep__detach(fexit_skel);
+	fexit_sleep_lskel__detach(fexit_skel);
 
 	/* kill the thread to unwind sys_nanosleep stack through the trampoline */
 	kill(cpid, 9);
@@ -78,5 +78,5 @@ void test_fexit_sleep(void)
 		goto cleanup;
 
 cleanup:
-	fexit_sleep__destroy(fexit_skel);
+	fexit_sleep_lskel__destroy(fexit_skel);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_test.c b/tools/testing/selftests/bpf/prog_tests/fexit_test.c
index af3dba726701..d4887d8bb396 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_test.c
@@ -3,19 +3,19 @@
 #include <test_progs.h>
 #include "fexit_test.lskel.h"
 
-static int fexit_test(struct fexit_test *fexit_skel)
+static int fexit_test(struct fexit_test_lskel *fexit_skel)
 {
 	int err, prog_fd, i;
 	__u32 duration = 0, retval;
 	int link_fd;
 	__u64 *result;
 
-	err = fexit_test__attach(fexit_skel);
+	err = fexit_test_lskel__attach(fexit_skel);
 	if (!ASSERT_OK(err, "fexit_attach"))
 		return err;
 
 	/* Check that already linked program can't be attached again. */
-	link_fd = fexit_test__test1__attach(fexit_skel);
+	link_fd = fexit_test_lskel__test1__attach(fexit_skel);
 	if (!ASSERT_LT(link_fd, 0, "fexit_attach_link"))
 		return -1;
 
@@ -31,7 +31,7 @@ static int fexit_test(struct fexit_test *fexit_skel)
 			return -1;
 	}
 
-	fexit_test__detach(fexit_skel);
+	fexit_test_lskel__detach(fexit_skel);
 
 	/* zero results for re-attach test */
 	memset(fexit_skel->bss, 0, sizeof(*fexit_skel->bss));
@@ -40,10 +40,10 @@ static int fexit_test(struct fexit_test *fexit_skel)
 
 void test_fexit_test(void)
 {
-	struct fexit_test *fexit_skel = NULL;
+	struct fexit_test_lskel *fexit_skel = NULL;
 	int err;
 
-	fexit_skel = fexit_test__open_and_load();
+	fexit_skel = fexit_test_lskel__open_and_load();
 	if (!ASSERT_OK_PTR(fexit_skel, "fexit_skel_load"))
 		goto cleanup;
 
@@ -55,5 +55,5 @@ void test_fexit_test(void)
 	ASSERT_OK(err, "fexit_second_attach");
 
 cleanup:
-	fexit_test__destroy(fexit_skel);
+	fexit_test_lskel__destroy(fexit_skel);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
index 225714f71ac6..ac54e3f91d42 100644
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
@@ -458,9 +458,9 @@ static int init_prog_array(struct bpf_object *obj, struct bpf_map *prog_array)
 		return -1;
 
 	for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
-		snprintf(prog_name, sizeof(prog_name), "flow_dissector/%i", i);
+		snprintf(prog_name, sizeof(prog_name), "flow_dissector_%d", i);
 
-		prog = bpf_object__find_program_by_title(obj, prog_name);
+		prog = bpf_object__find_program_by_name(obj, prog_name);
 		if (!prog)
 			return -1;
 
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c
index 0e8a4d2f023d..6093728497c7 100644
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c
@@ -2,7 +2,7 @@
 #include <test_progs.h>
 #include <network_helpers.h>
 
-void test_flow_dissector_load_bytes(void)
+void serial_test_flow_dissector_load_bytes(void)
 {
 	struct bpf_flow_keys flow_keys;
 	__u32 duration = 0, retval, size;
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c
index 3931ede5c534..f0c6c226aba8 100644
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c
@@ -628,7 +628,7 @@ out_close:
 	}
 }
 
-void test_flow_dissector_reattach(void)
+void serial_test_flow_dissector_reattach(void)
 {
 	int err, new_net, saved_net;
 
diff --git a/tools/testing/selftests/bpf/prog_tests/get_branch_snapshot.c b/tools/testing/selftests/bpf/prog_tests/get_branch_snapshot.c
new file mode 100644
index 000000000000..3948da12a528
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/get_branch_snapshot.c
@@ -0,0 +1,130 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <test_progs.h>
+#include "get_branch_snapshot.skel.h"
+
+static int *pfd_array;
+static int cpu_cnt;
+
+static bool is_hypervisor(void)
+{
+	char *line = NULL;
+	bool ret = false;
+	size_t len;
+	FILE *fp;
+
+	fp = fopen("/proc/cpuinfo", "r");
+	if (!fp)
+		return false;
+
+	while (getline(&line, &len, fp) != -1) {
+		if (!strncmp(line, "flags", 5)) {
+			if (strstr(line, "hypervisor") != NULL)
+				ret = true;
+			break;
+		}
+	}
+
+	free(line);
+	fclose(fp);
+	return ret;
+}
+
+static int create_perf_events(void)
+{
+	struct perf_event_attr attr = {0};
+	int cpu;
+
+	/* create perf event */
+	attr.size = sizeof(attr);
+	attr.type = PERF_TYPE_RAW;
+	attr.config = 0x1b00;
+	attr.sample_type = PERF_SAMPLE_BRANCH_STACK;
+	attr.branch_sample_type = PERF_SAMPLE_BRANCH_KERNEL |
+		PERF_SAMPLE_BRANCH_USER | PERF_SAMPLE_BRANCH_ANY;
+
+	cpu_cnt = libbpf_num_possible_cpus();
+	pfd_array = malloc(sizeof(int) * cpu_cnt);
+	if (!pfd_array) {
+		cpu_cnt = 0;
+		return 1;
+	}
+
+	for (cpu = 0; cpu < cpu_cnt; cpu++) {
+		pfd_array[cpu] = syscall(__NR_perf_event_open, &attr,
+					 -1, cpu, -1, PERF_FLAG_FD_CLOEXEC);
+		if (pfd_array[cpu] < 0)
+			break;
+	}
+
+	return cpu == 0;
+}
+
+static void close_perf_events(void)
+{
+	int cpu, fd;
+
+	for (cpu = 0; cpu < cpu_cnt; cpu++) {
+		fd = pfd_array[cpu];
+		if (fd < 0)
+			break;
+		close(fd);
+	}
+	free(pfd_array);
+}
+
+void serial_test_get_branch_snapshot(void)
+{
+	struct get_branch_snapshot *skel = NULL;
+	int err;
+
+	/* Skip the test before we fix LBR snapshot for hypervisor. */
+	if (is_hypervisor()) {
+		test__skip();
+		return;
+	}
+
+	if (create_perf_events()) {
+		test__skip();  /* system doesn't support LBR */
+		goto cleanup;
+	}
+
+	skel = get_branch_snapshot__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "get_branch_snapshot__open_and_load"))
+		goto cleanup;
+
+	err = kallsyms_find("bpf_testmod_loop_test", &skel->bss->address_low);
+	if (!ASSERT_OK(err, "kallsyms_find"))
+		goto cleanup;
+
+	/* Just a guess for the end of this function, as module functions
+	 * in /proc/kallsyms could come in any order.
+	 */
+	skel->bss->address_high = skel->bss->address_low + 128;
+
+	err = get_branch_snapshot__attach(skel);
+	if (!ASSERT_OK(err, "get_branch_snapshot__attach"))
+		goto cleanup;
+
+	trigger_module_test_read(100);
+
+	if (skel->bss->total_entries < 16) {
+		/* too few entries for the hit/waste test */
+		test__skip();
+		goto cleanup;
+	}
+
+	ASSERT_GT(skel->bss->test1_hits, 6, "find_looptest_in_lbr");
+
+	/* Given we stop LBR in software, we will waste a few entries.
+	 * But we should try to waste as few as possible entries. We are at
+	 * about 7 on x86_64 systems.
+	 * Add a check for < 10 so that we get heads-up when something
+	 * changes and wastes too many entries.
+	 */
+	ASSERT_LT(skel->bss->wasted_entries, 10, "check_wasted_entries");
+
+cleanup:
+	get_branch_snapshot__destroy(skel);
+	close_perf_events();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/global_data.c b/tools/testing/selftests/bpf/prog_tests/global_data.c
index 9efa7e50eab2..afd8639f9a94 100644
--- a/tools/testing/selftests/bpf/prog_tests/global_data.c
+++ b/tools/testing/selftests/bpf/prog_tests/global_data.c
@@ -103,11 +103,18 @@ static void test_global_data_struct(struct bpf_object *obj, __u32 duration)
 static void test_global_data_rdonly(struct bpf_object *obj, __u32 duration)
 {
 	int err = -ENOMEM, map_fd, zero = 0;
-	struct bpf_map *map;
+	struct bpf_map *map, *map2;
 	__u8 *buff;
 
 	map = bpf_object__find_map_by_name(obj, "test_glo.rodata");
-	if (CHECK_FAIL(!map || !bpf_map__is_internal(map)))
+	if (!ASSERT_OK_PTR(map, "map"))
+		return;
+	if (!ASSERT_TRUE(bpf_map__is_internal(map), "is_internal"))
+		return;
+
+	/* ensure we can lookup internal maps by their ELF names */
+	map2 = bpf_object__find_map_by_name(obj, ".rodata");
+	if (!ASSERT_EQ(map, map2, "same_maps"))
 		return;
 
 	map_fd = bpf_map__fd(map);
diff --git a/tools/testing/selftests/bpf/prog_tests/global_data_init.c b/tools/testing/selftests/bpf/prog_tests/global_data_init.c
index ee46b11f1f9a..1db86eab101b 100644
--- a/tools/testing/selftests/bpf/prog_tests/global_data_init.c
+++ b/tools/testing/selftests/bpf/prog_tests/global_data_init.c
@@ -16,7 +16,7 @@ void test_global_data_init(void)
 	if (CHECK_FAIL(err))
 		return;
 
-	map = bpf_object__find_map_by_name(obj, "test_glo.rodata");
+	map = bpf_object__find_map_by_name(obj, ".rodata");
 	if (CHECK_FAIL(!map || !bpf_map__is_internal(map)))
 		goto out;
 
diff --git a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c
index ddfb6bf97152..01e51d16c8b8 100644
--- a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c
+++ b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c
@@ -48,7 +48,8 @@ static void on_sample(void *ctx, int cpu, void *data, __u32 size)
 	*(bool *)ctx = true;
 }
 
-void test_kfree_skb(void)
+/* TODO: fix kernel panic caused by this test in parallel mode */
+void serial_test_kfree_skb(void)
 {
 	struct __sk_buff skb = {};
 	struct bpf_prog_test_run_attr tattr = {
@@ -92,7 +93,7 @@ void test_kfree_skb(void)
 	if (CHECK(!fexit, "find_prog", "prog eth_type_trans not found\n"))
 		goto close_prog;
 
-	global_data = bpf_object__find_map_by_name(obj2, "kfree_sk.bss");
+	global_data = bpf_object__find_map_by_name(obj2, ".bss");
 	if (CHECK(!global_data, "find global data", "not found\n"))
 		goto close_prog;
 
diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
index 9611f2bc50df..5c9c0176991b 100644
--- a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
+++ b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
@@ -7,10 +7,10 @@
 
 static void test_main(void)
 {
-	struct kfunc_call_test *skel;
+	struct kfunc_call_test_lskel *skel;
 	int prog_fd, retval, err;
 
-	skel = kfunc_call_test__open_and_load();
+	skel = kfunc_call_test_lskel__open_and_load();
 	if (!ASSERT_OK_PTR(skel, "skel"))
 		return;
 
@@ -26,7 +26,7 @@ static void test_main(void)
 	ASSERT_OK(err, "bpf_prog_test_run(test2)");
 	ASSERT_EQ(retval, 3, "test2-retval");
 
-	kfunc_call_test__destroy(skel);
+	kfunc_call_test_lskel__destroy(skel);
 }
 
 static void test_subprog(void)
diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c
index cf3acfa5a91d..79f6bd1e50d6 100644
--- a/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c
@@ -7,6 +7,7 @@
 #include "test_ksyms_btf.skel.h"
 #include "test_ksyms_btf_null_check.skel.h"
 #include "test_ksyms_weak.skel.h"
+#include "test_ksyms_weak.lskel.h"
 
 static int duration;
 
@@ -89,11 +90,11 @@ static void test_weak_syms(void)
 	int err;
 
 	skel = test_ksyms_weak__open_and_load();
-	if (CHECK(!skel, "test_ksyms_weak__open_and_load", "failed\n"))
+	if (!ASSERT_OK_PTR(skel, "test_ksyms_weak__open_and_load"))
 		return;
 
 	err = test_ksyms_weak__attach(skel);
-	if (CHECK(err, "test_ksyms_weak__attach", "skeleton attach failed: %d\n", err))
+	if (!ASSERT_OK(err, "test_ksyms_weak__attach"))
 		goto cleanup;
 
 	/* trigger tracepoint */
@@ -109,6 +110,33 @@ cleanup:
 	test_ksyms_weak__destroy(skel);
 }
 
+static void test_weak_syms_lskel(void)
+{
+	struct test_ksyms_weak_lskel *skel;
+	struct test_ksyms_weak_lskel__data *data;
+	int err;
+
+	skel = test_ksyms_weak_lskel__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "test_ksyms_weak_lskel__open_and_load"))
+		return;
+
+	err = test_ksyms_weak_lskel__attach(skel);
+	if (!ASSERT_OK(err, "test_ksyms_weak_lskel__attach"))
+		goto cleanup;
+
+	/* trigger tracepoint */
+	usleep(1);
+
+	data = skel->data;
+	ASSERT_EQ(data->out__existing_typed, 0, "existing typed ksym");
+	ASSERT_NEQ(data->out__existing_typeless, -1, "existing typeless ksym");
+	ASSERT_EQ(data->out__non_existent_typeless, 0, "nonexistent typeless ksym");
+	ASSERT_EQ(data->out__non_existent_typed, 0, "nonexistent typed ksym");
+
+cleanup:
+	test_ksyms_weak_lskel__destroy(skel);
+}
+
 void test_ksyms_btf(void)
 {
 	int percpu_datasec;
@@ -136,4 +164,7 @@ void test_ksyms_btf(void)
 
 	if (test__start_subtest("weak_ksyms"))
 		test_weak_syms();
+
+	if (test__start_subtest("weak_ksyms_lskel"))
+		test_weak_syms_lskel();
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms_module.c b/tools/testing/selftests/bpf/prog_tests/ksyms_module.c
index 2cd5cded543f..d490ad80eccb 100644
--- a/tools/testing/selftests/bpf/prog_tests/ksyms_module.c
+++ b/tools/testing/selftests/bpf/prog_tests/ksyms_module.c
@@ -2,30 +2,61 @@
 /* Copyright (c) 2021 Facebook */
 
 #include <test_progs.h>
-#include <bpf/libbpf.h>
-#include <bpf/btf.h>
+#include <network_helpers.h>
 #include "test_ksyms_module.lskel.h"
+#include "test_ksyms_module.skel.h"
 
-static int duration;
-
-void test_ksyms_module(void)
+void test_ksyms_module_lskel(void)
 {
-	struct test_ksyms_module* skel;
+	struct test_ksyms_module_lskel *skel;
+	int retval;
 	int err;
 
-	skel = test_ksyms_module__open_and_load();
-	if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+	if (!env.has_testmod) {
+		test__skip();
 		return;
+	}
 
-	err = test_ksyms_module__attach(skel);
-	if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+	skel = test_ksyms_module_lskel__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "test_ksyms_module_lskel__open_and_load"))
+		return;
+	err = bpf_prog_test_run(skel->progs.load.prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
+				NULL, NULL, (__u32 *)&retval, NULL);
+	if (!ASSERT_OK(err, "bpf_prog_test_run"))
 		goto cleanup;
+	ASSERT_EQ(retval, 0, "retval");
+	ASSERT_EQ(skel->bss->out_bpf_testmod_ksym, 42, "bpf_testmod_ksym");
+cleanup:
+	test_ksyms_module_lskel__destroy(skel);
+}
 
-	usleep(1);
+void test_ksyms_module_libbpf(void)
+{
+	struct test_ksyms_module *skel;
+	int retval, err;
 
-	ASSERT_EQ(skel->bss->triggered, true, "triggered");
-	ASSERT_EQ(skel->bss->out_mod_ksym_global, 123, "global_ksym_val");
+	if (!env.has_testmod) {
+		test__skip();
+		return;
+	}
 
+	skel = test_ksyms_module__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "test_ksyms_module__open"))
+		return;
+	err = bpf_prog_test_run(bpf_program__fd(skel->progs.load), 1, &pkt_v4,
+				sizeof(pkt_v4), NULL, NULL, (__u32 *)&retval, NULL);
+	if (!ASSERT_OK(err, "bpf_prog_test_run"))
+		goto cleanup;
+	ASSERT_EQ(retval, 0, "retval");
+	ASSERT_EQ(skel->bss->out_bpf_testmod_ksym, 42, "bpf_testmod_ksym");
 cleanup:
 	test_ksyms_module__destroy(skel);
 }
+
+void test_ksyms_module(void)
+{
+	if (test__start_subtest("lskel"))
+		test_ksyms_module_lskel();
+	if (test__start_subtest("libbpf"))
+		test_ksyms_module_libbpf();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c b/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c
index 59adb4715394..7589c03fd26b 100644
--- a/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c
+++ b/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c
@@ -541,7 +541,7 @@ close_servers:
 	}
 }
 
-void test_migrate_reuseport(void)
+void serial_test_migrate_reuseport(void)
 {
 	struct test_migrate_reuseport *skel;
 	int i;
diff --git a/tools/testing/selftests/bpf/prog_tests/modify_return.c b/tools/testing/selftests/bpf/prog_tests/modify_return.c
index 97fec70c600b..b772fe30ce9b 100644
--- a/tools/testing/selftests/bpf/prog_tests/modify_return.c
+++ b/tools/testing/selftests/bpf/prog_tests/modify_return.c
@@ -53,7 +53,8 @@ cleanup:
 	modify_return__destroy(skel);
 }
 
-void test_modify_return(void)
+/* TODO: conflict with get_func_ip_test */
+void serial_test_modify_return(void)
 {
 	run_test(0 /* input_retval */,
 		 1 /* want_side_effect */,
diff --git a/tools/testing/selftests/bpf/prog_tests/module_attach.c b/tools/testing/selftests/bpf/prog_tests/module_attach.c
index d85a69b7ce44..6d0e50dcf47c 100644
--- a/tools/testing/selftests/bpf/prog_tests/module_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/module_attach.c
@@ -2,46 +2,33 @@
 /* Copyright (c) 2020 Facebook */
 
 #include <test_progs.h>
+#include <stdbool.h>
 #include "test_module_attach.skel.h"
 
 static int duration;
 
-static int trigger_module_test_read(int read_sz)
+static int trigger_module_test_writable(int *val)
 {
 	int fd, err;
+	char buf[65];
+	ssize_t rd;
 
-	fd = open("/sys/kernel/bpf_testmod", O_RDONLY);
+	fd = open(BPF_TESTMOD_TEST_FILE, O_RDONLY);
 	err = -errno;
-	if (CHECK(fd < 0, "testmod_file_open", "failed: %d\n", err))
+	if (!ASSERT_GE(fd, 0, "testmode_file_open"))
 		return err;
 
-	read(fd, NULL, read_sz);
-	close(fd);
-
-	return 0;
-}
-
-static int trigger_module_test_write(int write_sz)
-{
-	int fd, err;
-	char *buf = malloc(write_sz);
-
-	if (!buf)
-		return -ENOMEM;
-
-	memset(buf, 'a', write_sz);
-	buf[write_sz-1] = '\0';
-
-	fd = open("/sys/kernel/bpf_testmod", O_WRONLY);
+	rd = read(fd, buf, sizeof(buf) - 1);
 	err = -errno;
-	if (CHECK(fd < 0, "testmod_file_open", "failed: %d\n", err)) {
-		free(buf);
+	if (!ASSERT_GT(rd, 0, "testmod_file_rd_val")) {
+		close(fd);
 		return err;
 	}
 
-	write(fd, buf, write_sz);
+	buf[rd] = '\0';
+	*val = strtol(buf, NULL, 0);
 	close(fd);
-	free(buf);
+
 	return 0;
 }
 
@@ -58,6 +45,7 @@ void test_module_attach(void)
 	struct test_module_attach__bss *bss;
 	struct bpf_link *link;
 	int err;
+	int writable_val = 0;
 
 	skel = test_module_attach__open();
 	if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
@@ -90,6 +78,14 @@ void test_module_attach(void)
 	ASSERT_EQ(bss->fexit_ret, -EIO, "fexit_tet");
 	ASSERT_EQ(bss->fmod_ret_read_sz, READ_SZ, "fmod_ret");
 
+	bss->raw_tp_writable_bare_early_ret = true;
+	bss->raw_tp_writable_bare_out_val = 0xf1f2f3f4;
+	ASSERT_OK(trigger_module_test_writable(&writable_val),
+		  "trigger_writable");
+	ASSERT_EQ(bss->raw_tp_writable_bare_in_val, 1024, "writable_test_in");
+	ASSERT_EQ(bss->raw_tp_writable_bare_out_val, writable_val,
+		  "writable_test_out");
+
 	test_module_attach__detach(skel);
 
 	/* attach fentry/fexit and make sure it get's module reference */
diff --git a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c
index 2535788e135f..24d493482ffc 100644
--- a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c
+++ b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c
@@ -78,7 +78,8 @@ static void test_ns_current_pid_tgid_new_ns(void)
 		return;
 }
 
-void test_ns_current_pid_tgid(void)
+/* TODO: use a different tracepoint */
+void serial_test_ns_current_pid_tgid(void)
 {
 	if (test__start_subtest("ns_current_pid_tgid_root_ns"))
 		test_current_pid_tgid(NULL);
diff --git a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c
index 6490e9673002..4e32f3586a75 100644
--- a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c
+++ b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c
@@ -43,9 +43,10 @@ int trigger_on_cpu(int cpu)
 	return 0;
 }
 
-void test_perf_buffer(void)
+void serial_test_perf_buffer(void)
 {
-	int err, on_len, nr_on_cpus = 0, nr_cpus, i;
+	int err, on_len, nr_on_cpus = 0, nr_cpus, i, j;
+	int zero = 0, my_pid = getpid();
 	struct perf_buffer_opts pb_opts = {};
 	struct test_perf_buffer *skel;
 	cpu_set_t cpu_seen;
@@ -71,6 +72,10 @@ void test_perf_buffer(void)
 	if (CHECK(!skel, "skel_load", "skeleton open/load failed\n"))
 		goto out_close;
 
+	err = bpf_map_update_elem(bpf_map__fd(skel->maps.my_pid_map), &zero, &my_pid, 0);
+	if (!ASSERT_OK(err, "my_pid_update"))
+		goto out_close;
+
 	/* attach probe */
 	err = test_perf_buffer__attach(skel);
 	if (CHECK(err, "attach_kprobe", "err %d\n", err))
@@ -107,19 +112,19 @@ void test_perf_buffer(void)
 		  "expect %d, seen %d\n", nr_on_cpus, CPU_COUNT(&cpu_seen)))
 		goto out_free_pb;
 
-	if (CHECK(perf_buffer__buffer_cnt(pb) != nr_cpus, "buf_cnt",
-		  "got %zu, expected %d\n", perf_buffer__buffer_cnt(pb), nr_cpus))
+	if (CHECK(perf_buffer__buffer_cnt(pb) != nr_on_cpus, "buf_cnt",
+		  "got %zu, expected %d\n", perf_buffer__buffer_cnt(pb), nr_on_cpus))
 		goto out_close;
 
-	for (i = 0; i < nr_cpus; i++) {
+	for (i = 0, j = 0; i < nr_cpus; i++) {
 		if (i >= on_len || !online[i])
 			continue;
 
-		fd = perf_buffer__buffer_fd(pb, i);
+		fd = perf_buffer__buffer_fd(pb, j);
 		CHECK(fd < 0 || last_fd == fd, "fd_check", "last fd %d == fd %d\n", last_fd, fd);
 		last_fd = fd;
 
-		err = perf_buffer__consume_buffer(pb, i);
+		err = perf_buffer__consume_buffer(pb, j);
 		if (CHECK(err, "drain_buf", "cpu %d, err %d\n", i, err))
 			goto out_close;
 
@@ -127,12 +132,13 @@ void test_perf_buffer(void)
 		if (trigger_on_cpu(i))
 			goto out_close;
 
-		err = perf_buffer__consume_buffer(pb, i);
-		if (CHECK(err, "consume_buf", "cpu %d, err %d\n", i, err))
+		err = perf_buffer__consume_buffer(pb, j);
+		if (CHECK(err, "consume_buf", "cpu %d, err %d\n", j, err))
 			goto out_close;
 
 		if (CHECK(!CPU_ISSET(i, &cpu_seen), "cpu_seen", "cpu %d not seen\n", i))
 			goto out_close;
+		j++;
 	}
 
 out_free_pb:
diff --git a/tools/testing/selftests/bpf/prog_tests/perf_link.c b/tools/testing/selftests/bpf/prog_tests/perf_link.c
index b1abd0c46607..ede07344f264 100644
--- a/tools/testing/selftests/bpf/prog_tests/perf_link.c
+++ b/tools/testing/selftests/bpf/prog_tests/perf_link.c
@@ -23,7 +23,8 @@ static void burn_cpu(void)
 		++j;
 }
 
-void test_perf_link(void)
+/* TODO: often fails in concurrent mode */
+void serial_test_perf_link(void)
 {
 	struct test_perf_link *skel = NULL;
 	struct perf_event_attr attr;
diff --git a/tools/testing/selftests/bpf/prog_tests/probe_user.c b/tools/testing/selftests/bpf/prog_tests/probe_user.c
index 95bd12097358..abf890d066eb 100644
--- a/tools/testing/selftests/bpf/prog_tests/probe_user.c
+++ b/tools/testing/selftests/bpf/prog_tests/probe_user.c
@@ -1,9 +1,10 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <test_progs.h>
 
-void test_probe_user(void)
+/* TODO: corrupts other tests uses connect() */
+void serial_test_probe_user(void)
 {
-	const char *prog_name = "kprobe/__sys_connect";
+	const char *prog_name = "handle_sys_connect";
 	const char *obj_file = "./test_probe_user.o";
 	DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts, );
 	int err, results_map_fd, sock_fd, duration = 0;
@@ -18,7 +19,7 @@ void test_probe_user(void)
 	if (!ASSERT_OK_PTR(obj, "obj_open_file"))
 		return;
 
-	kprobe_prog = bpf_object__find_program_by_title(obj, prog_name);
+	kprobe_prog = bpf_object__find_program_by_name(obj, prog_name);
 	if (CHECK(!kprobe_prog, "find_probe",
 		  "prog '%s' not found\n", prog_name))
 		goto cleanup;
diff --git a/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_test_run.c b/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_test_run.c
index 5c45424cac5f..ddefa1192e5d 100644
--- a/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_test_run.c
+++ b/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_test_run.c
@@ -3,7 +3,8 @@
 #include <test_progs.h>
 #include <linux/nbd.h>
 
-void test_raw_tp_writable_test_run(void)
+/* NOTE: conflict with other tests. */
+void serial_test_raw_tp_writable_test_run(void)
 {
 	__u32 duration = 0;
 	char error[4096];
diff --git a/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c b/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c
index 5f9eaa3ab584..fd5d2ddfb062 100644
--- a/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c
+++ b/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c
@@ -37,7 +37,7 @@ void test_rdonly_maps(void)
 	if (CHECK(err, "obj_load", "err %d errno %d\n", err, errno))
 		goto cleanup;
 
-	bss_map = bpf_object__find_map_by_name(obj, "test_rdo.bss");
+	bss_map = bpf_object__find_map_by_name(obj, ".bss");
 	if (CHECK(!bss_map, "find_bss_map", "failed\n"))
 		goto cleanup;
 
diff --git a/tools/testing/selftests/bpf/prog_tests/recursion.c b/tools/testing/selftests/bpf/prog_tests/recursion.c
index 0e378d63fe18..f3af2627b599 100644
--- a/tools/testing/selftests/bpf/prog_tests/recursion.c
+++ b/tools/testing/selftests/bpf/prog_tests/recursion.c
@@ -20,18 +20,18 @@ void test_recursion(void)
 		goto out;
 
 	ASSERT_EQ(skel->bss->pass1, 0, "pass1 == 0");
-	bpf_map_lookup_elem(bpf_map__fd(skel->maps.hash1), &key, 0);
+	bpf_map_delete_elem(bpf_map__fd(skel->maps.hash1), &key);
 	ASSERT_EQ(skel->bss->pass1, 1, "pass1 == 1");
-	bpf_map_lookup_elem(bpf_map__fd(skel->maps.hash1), &key, 0);
+	bpf_map_delete_elem(bpf_map__fd(skel->maps.hash1), &key);
 	ASSERT_EQ(skel->bss->pass1, 2, "pass1 == 2");
 
 	ASSERT_EQ(skel->bss->pass2, 0, "pass2 == 0");
-	bpf_map_lookup_elem(bpf_map__fd(skel->maps.hash2), &key, 0);
+	bpf_map_delete_elem(bpf_map__fd(skel->maps.hash2), &key);
 	ASSERT_EQ(skel->bss->pass2, 1, "pass2 == 1");
-	bpf_map_lookup_elem(bpf_map__fd(skel->maps.hash2), &key, 0);
+	bpf_map_delete_elem(bpf_map__fd(skel->maps.hash2), &key);
 	ASSERT_EQ(skel->bss->pass2, 2, "pass2 == 2");
 
-	err = bpf_obj_get_info_by_fd(bpf_program__fd(skel->progs.on_lookup),
+	err = bpf_obj_get_info_by_fd(bpf_program__fd(skel->progs.on_delete),
 				     &prog_info, &prog_info_len);
 	if (!ASSERT_OK(err, "get_prog_info"))
 		goto out;
diff --git a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c
index 4e91f4d6466c..873323fb18ba 100644
--- a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c
+++ b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c
@@ -1,6 +1,21 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <test_progs.h>
 
+static void toggle_object_autoload_progs(const struct bpf_object *obj,
+					 const char *name_load)
+{
+	struct bpf_program *prog;
+
+	bpf_object__for_each_program(prog, obj) {
+		const char *name = bpf_program__name(prog);
+
+		if (!strcmp(name_load, name))
+			bpf_program__set_autoload(prog, true);
+		else
+			bpf_program__set_autoload(prog, false);
+	}
+}
+
 void test_reference_tracking(void)
 {
 	const char *file = "test_sk_lookup_kern.o";
@@ -9,44 +24,49 @@ void test_reference_tracking(void)
 		.object_name = obj_name,
 		.relaxed_maps = true,
 	);
-	struct bpf_object *obj;
+	struct bpf_object *obj_iter, *obj = NULL;
 	struct bpf_program *prog;
 	__u32 duration = 0;
 	int err = 0;
 
-	obj = bpf_object__open_file(file, &open_opts);
-	if (!ASSERT_OK_PTR(obj, "obj_open_file"))
+	obj_iter = bpf_object__open_file(file, &open_opts);
+	if (!ASSERT_OK_PTR(obj_iter, "obj_iter_open_file"))
 		return;
 
-	if (CHECK(strcmp(bpf_object__name(obj), obj_name), "obj_name",
+	if (CHECK(strcmp(bpf_object__name(obj_iter), obj_name), "obj_name",
 		  "wrong obj name '%s', expected '%s'\n",
-		  bpf_object__name(obj), obj_name))
+		  bpf_object__name(obj_iter), obj_name))
 		goto cleanup;
 
-	bpf_object__for_each_program(prog, obj) {
-		const char *title;
+	bpf_object__for_each_program(prog, obj_iter) {
+		const char *name;
 
-		/* Ignore .text sections */
-		title = bpf_program__section_name(prog);
-		if (strstr(title, ".text") != NULL)
+		name = bpf_program__name(prog);
+		if (!test__start_subtest(name))
 			continue;
 
-		if (!test__start_subtest(title))
-			continue;
+		obj = bpf_object__open_file(file, &open_opts);
+		if (!ASSERT_OK_PTR(obj, "obj_open_file"))
+			goto cleanup;
 
+		toggle_object_autoload_progs(obj, name);
 		/* Expect verifier failure if test name has 'err' */
-		if (strstr(title, "err_") != NULL) {
+		if (strncmp(name, "err_", sizeof("err_") - 1) == 0) {
 			libbpf_print_fn_t old_print_fn;
 
 			old_print_fn = libbpf_set_print(NULL);
-			err = !bpf_program__load(prog, "GPL", 0);
+			err = !bpf_object__load(obj);
 			libbpf_set_print(old_print_fn);
 		} else {
-			err = bpf_program__load(prog, "GPL", 0);
+			err = bpf_object__load(obj);
 		}
-		CHECK(err, title, "\n");
+		ASSERT_OK(err, name);
+
+		bpf_object__close(obj);
+		obj = NULL;
 	}
 
 cleanup:
 	bpf_object__close(obj);
+	bpf_object__close(obj_iter);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
index f62361306f6d..f4a13d9dd5c8 100644
--- a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
+++ b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
@@ -106,9 +106,9 @@ static int resolve_symbols(void)
 		  "Failed to load BTF from btf_data.o\n"))
 		return -1;
 
-	nr = btf__get_nr_types(btf);
+	nr = btf__type_cnt(btf);
 
-	for (type_id = 1; type_id <= nr; type_id++) {
+	for (type_id = 1; type_id < nr; type_id++) {
 		if (__resolve_symbol(btf, type_id))
 			break;
 	}
@@ -117,14 +117,14 @@ static int resolve_symbols(void)
 	return 0;
 }
 
-int test_resolve_btfids(void)
+void test_resolve_btfids(void)
 {
 	__u32 *test_list, *test_lists[] = { test_list_local, test_list_global };
 	unsigned int i, j;
 	int ret = 0;
 
 	if (resolve_symbols())
-		return -1;
+		return;
 
 	/* Check BTF_ID_LIST(test_list_local) and
 	 * BTF_ID_LIST_GLOBAL(test_list_global) IDs
@@ -138,7 +138,7 @@ int test_resolve_btfids(void)
 				    test_symbols[i].name,
 				    test_list[i], test_symbols[i].id);
 			if (ret)
-				return ret;
+				return;
 		}
 	}
 
@@ -161,9 +161,7 @@ int test_resolve_btfids(void)
 
 		if (i > 0) {
 			if (!ASSERT_LE(test_set.ids[i - 1], test_set.ids[i], "sort_check"))
-				return -1;
+				return;
 		}
 	}
-
-	return ret;
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
index 4706cee84360..9a80fe8a6427 100644
--- a/tools/testing/selftests/bpf/prog_tests/ringbuf.c
+++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
@@ -58,7 +58,7 @@ static int process_sample(void *ctx, void *data, size_t len)
 	}
 }
 
-static struct test_ringbuf *skel;
+static struct test_ringbuf_lskel *skel;
 static struct ring_buffer *ringbuf;
 
 static void trigger_samples()
@@ -90,13 +90,13 @@ void test_ringbuf(void)
 	int page_size = getpagesize();
 	void *mmap_ptr, *tmp_ptr;
 
-	skel = test_ringbuf__open();
+	skel = test_ringbuf_lskel__open();
 	if (CHECK(!skel, "skel_open", "skeleton open failed\n"))
 		return;
 
 	skel->maps.ringbuf.max_entries = page_size;
 
-	err = test_ringbuf__load(skel);
+	err = test_ringbuf_lskel__load(skel);
 	if (CHECK(err != 0, "skel_load", "skeleton load failed\n"))
 		goto cleanup;
 
@@ -154,7 +154,7 @@ void test_ringbuf(void)
 	if (CHECK(!ringbuf, "ringbuf_create", "failed to create ringbuf\n"))
 		goto cleanup;
 
-	err = test_ringbuf__attach(skel);
+	err = test_ringbuf_lskel__attach(skel);
 	if (CHECK(err, "skel_attach", "skeleton attachment failed: %d\n", err))
 		goto cleanup;
 
@@ -292,8 +292,8 @@ void test_ringbuf(void)
 	CHECK(skel->bss->discarded != 1, "err_discarded", "exp %ld, got %ld\n",
 	      1L, skel->bss->discarded);
 
-	test_ringbuf__detach(skel);
+	test_ringbuf_lskel__detach(skel);
 cleanup:
 	ring_buffer__free(ringbuf);
-	test_ringbuf__destroy(skel);
+	test_ringbuf_lskel__destroy(skel);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c
index 4efd337d6a3c..3cfc910ab3c1 100644
--- a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c
+++ b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c
@@ -114,7 +114,7 @@ static int prepare_bpf_obj(void)
 	err = bpf_object__load(obj);
 	RET_ERR(err, "load bpf_object", "err:%d\n", err);
 
-	prog = bpf_program__next(NULL, obj);
+	prog = bpf_object__next_program(obj, NULL);
 	RET_ERR(!prog, "get first bpf_program", "!prog\n");
 	select_by_skb_data_prog = bpf_program__fd(prog);
 	RET_ERR(select_by_skb_data_prog < 0, "get prog fd",
@@ -858,7 +858,7 @@ out:
 	cleanup();
 }
 
-void test_select_reuseport(void)
+void serial_test_select_reuseport(void)
 {
 	saved_tcp_fo = read_int_sysctl(TCP_FO_SYSCTL);
 	if (saved_tcp_fo < 0)
diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal_sched_switch.c b/tools/testing/selftests/bpf/prog_tests/send_signal_sched_switch.c
index 189a34a7addb..15dacfcfaa6d 100644
--- a/tools/testing/selftests/bpf/prog_tests/send_signal_sched_switch.c
+++ b/tools/testing/selftests/bpf/prog_tests/send_signal_sched_switch.c
@@ -25,7 +25,8 @@ static void *worker(void *p)
 	return NULL;
 }
 
-void test_send_signal_sched_switch(void)
+/* NOTE: cause events loss */
+void serial_test_send_signal_sched_switch(void)
 {
 	struct test_send_signal_kern *skel;
 	pthread_t threads[THREAD_COUNT];
diff --git a/tools/testing/selftests/bpf/prog_tests/signal_pending.c b/tools/testing/selftests/bpf/prog_tests/signal_pending.c
index dfcbddcbe4d3..fdfdcff6cbef 100644
--- a/tools/testing/selftests/bpf/prog_tests/signal_pending.c
+++ b/tools/testing/selftests/bpf/prog_tests/signal_pending.c
@@ -42,7 +42,7 @@ static void test_signal_pending_by_type(enum bpf_prog_type prog_type)
 	signal(SIGALRM, SIG_DFL);
 }
 
-void test_signal_pending(enum bpf_prog_type prog_type)
+void test_signal_pending(void)
 {
 	test_signal_pending_by_type(BPF_PROG_TYPE_SOCKET_FILTER);
 	test_signal_pending_by_type(BPF_PROG_TYPE_FLOW_DISSECTOR);
diff --git a/tools/testing/selftests/bpf/prog_tests/sk_assign.c b/tools/testing/selftests/bpf/prog_tests/sk_assign.c
index 3a469099f30d..1d272e05188e 100644
--- a/tools/testing/selftests/bpf/prog_tests/sk_assign.c
+++ b/tools/testing/selftests/bpf/prog_tests/sk_assign.c
@@ -48,7 +48,7 @@ configure_stack(void)
 		return false;
 	sprintf(tc_cmd, "%s %s %s %s", "tc filter add dev lo ingress bpf",
 		       "direct-action object-file ./test_sk_assign.o",
-		       "section classifier/sk_assign_test",
+		       "section tc",
 		       (env.verbosity < VERBOSE_VERY) ? " 2>/dev/null" : "verbose");
 	if (CHECK(system(tc_cmd), "BPF load failed;",
 		  "run with -vv for more info\n"))
diff --git a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
index aee41547e7f4..6db07401bc49 100644
--- a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
+++ b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
@@ -598,7 +598,7 @@ close:
 
 static void run_lookup_prog(const struct test *t)
 {
-	int server_fds[MAX_SERVERS] = { -1 };
+	int server_fds[] = { [0 ... MAX_SERVERS - 1] = -1 };
 	int client_fd, reuse_conn_fd = -1;
 	struct bpf_link *lookup_link;
 	int i, err;
@@ -1053,7 +1053,7 @@ static void run_sk_assign(struct test_sk_lookup *skel,
 			  struct bpf_program *lookup_prog,
 			  const char *remote_ip, const char *local_ip)
 {
-	int server_fds[MAX_SERVERS] = { -1 };
+	int server_fds[] = { [0 ... MAX_SERVERS - 1] = -1 };
 	struct bpf_sk_lookup ctx;
 	__u64 server_cookie;
 	int i, err;
diff --git a/tools/testing/selftests/bpf/prog_tests/sk_storage_tracing.c b/tools/testing/selftests/bpf/prog_tests/sk_storage_tracing.c
index 2b392590e8ca..547ae53cde74 100644
--- a/tools/testing/selftests/bpf/prog_tests/sk_storage_tracing.c
+++ b/tools/testing/selftests/bpf/prog_tests/sk_storage_tracing.c
@@ -105,7 +105,7 @@ out:
 		close(listen_fd);
 }
 
-void test_sk_storage_tracing(void)
+void serial_test_sk_storage_tracing(void)
 {
 	struct test_sk_storage_trace_itself *skel_itself;
 	int err;
diff --git a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
index fafeddaad6a9..c437e6ba8fe2 100644
--- a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
+++ b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
@@ -11,12 +11,14 @@ void test_skb_ctx(void)
 		.cb[3] = 4,
 		.cb[4] = 5,
 		.priority = 6,
+		.ingress_ifindex = 11,
 		.ifindex = 1,
 		.tstamp = 7,
 		.wire_len = 100,
 		.gso_segs = 8,
 		.mark = 9,
 		.gso_size = 10,
+		.hwtstamp = 11,
 	};
 	struct bpf_prog_test_run_attr tattr = {
 		.data_in = &pkt_v4,
@@ -97,6 +99,10 @@ void test_skb_ctx(void)
 		   "ctx_out_ifindex",
 		   "skb->ifindex == %d, expected %d\n",
 		   skb.ifindex, 1);
+	CHECK_ATTR(skb.ingress_ifindex != 11,
+		   "ctx_out_ingress_ifindex",
+		   "skb->ingress_ifindex == %d, expected %d\n",
+		   skb.ingress_ifindex, 11);
 	CHECK_ATTR(skb.tstamp != 8,
 		   "ctx_out_tstamp",
 		   "skb->tstamp == %lld, expected %d\n",
diff --git a/tools/testing/selftests/bpf/prog_tests/skc_to_unix_sock.c b/tools/testing/selftests/bpf/prog_tests/skc_to_unix_sock.c
new file mode 100644
index 000000000000..3eefdfed1db9
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/skc_to_unix_sock.c
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2021 Hengqi Chen */
+
+#include <test_progs.h>
+#include <sys/un.h>
+#include "test_skc_to_unix_sock.skel.h"
+
+static const char *sock_path = "@skc_to_unix_sock";
+
+void test_skc_to_unix_sock(void)
+{
+	struct test_skc_to_unix_sock *skel;
+	struct sockaddr_un sockaddr;
+	int err, sockfd = 0;
+
+	skel = test_skc_to_unix_sock__open();
+	if (!ASSERT_OK_PTR(skel, "could not open BPF object"))
+		return;
+
+	skel->rodata->my_pid = getpid();
+
+	err = test_skc_to_unix_sock__load(skel);
+	if (!ASSERT_OK(err, "could not load BPF object"))
+		goto cleanup;
+
+	err = test_skc_to_unix_sock__attach(skel);
+	if (!ASSERT_OK(err, "could not attach BPF object"))
+		goto cleanup;
+
+	/* trigger unix_listen */
+	sockfd = socket(AF_UNIX, SOCK_STREAM, 0);
+	if (!ASSERT_GT(sockfd, 0, "socket failed"))
+		goto cleanup;
+
+	memset(&sockaddr, 0, sizeof(sockaddr));
+	sockaddr.sun_family = AF_UNIX;
+	strncpy(sockaddr.sun_path, sock_path, strlen(sock_path));
+	sockaddr.sun_path[0] = '\0';
+
+	err = bind(sockfd, (struct sockaddr *)&sockaddr, sizeof(sockaddr));
+	if (!ASSERT_OK(err, "bind failed"))
+		goto cleanup;
+
+	err = listen(sockfd, 1);
+	if (!ASSERT_OK(err, "listen failed"))
+		goto cleanup;
+
+	ASSERT_EQ(strcmp(skel->bss->path, sock_path), 0, "bpf_skc_to_unix_sock failed");
+
+cleanup:
+	if (sockfd)
+		close(sockfd);
+	test_skc_to_unix_sock__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/skeleton.c b/tools/testing/selftests/bpf/prog_tests/skeleton.c
index f6f130c99b8c..180afd632f4c 100644
--- a/tools/testing/selftests/bpf/prog_tests/skeleton.c
+++ b/tools/testing/selftests/bpf/prog_tests/skeleton.c
@@ -16,8 +16,13 @@ void test_skeleton(void)
 	struct test_skeleton* skel;
 	struct test_skeleton__bss *bss;
 	struct test_skeleton__data *data;
+	struct test_skeleton__data_dyn *data_dyn;
 	struct test_skeleton__rodata *rodata;
+	struct test_skeleton__rodata_dyn *rodata_dyn;
 	struct test_skeleton__kconfig *kcfg;
+	const void *elf_bytes;
+	size_t elf_bytes_sz = 0;
+	int i;
 
 	skel = test_skeleton__open();
 	if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
@@ -28,7 +33,12 @@ void test_skeleton(void)
 
 	bss = skel->bss;
 	data = skel->data;
+	data_dyn = skel->data_dyn;
 	rodata = skel->rodata;
+	rodata_dyn = skel->rodata_dyn;
+
+	ASSERT_STREQ(bpf_map__name(skel->maps.rodata_dyn), ".rodata.dyn", "rodata_dyn_name");
+	ASSERT_STREQ(bpf_map__name(skel->maps.data_dyn), ".data.dyn", "data_dyn_name");
 
 	/* validate values are pre-initialized correctly */
 	CHECK(data->in1 != -1, "in1", "got %d != exp %d\n", data->in1, -1);
@@ -44,6 +54,12 @@ void test_skeleton(void)
 	CHECK(rodata->in.in6 != 0, "in6", "got %d != exp %d\n", rodata->in.in6, 0);
 	CHECK(bss->out6 != 0, "out6", "got %d != exp %d\n", bss->out6, 0);
 
+	ASSERT_EQ(rodata_dyn->in_dynarr_sz, 0, "in_dynarr_sz");
+	for (i = 0; i < 4; i++)
+		ASSERT_EQ(rodata_dyn->in_dynarr[i], -(i + 1), "in_dynarr");
+	for (i = 0; i < 4; i++)
+		ASSERT_EQ(data_dyn->out_dynarr[i], i + 1, "out_dynarr");
+
 	/* validate we can pre-setup global variables, even in .bss */
 	data->in1 = 10;
 	data->in2 = 11;
@@ -51,6 +67,10 @@ void test_skeleton(void)
 	bss->in4 = 13;
 	rodata->in.in6 = 14;
 
+	rodata_dyn->in_dynarr_sz = 4;
+	for (i = 0; i < 4; i++)
+		rodata_dyn->in_dynarr[i] = i + 10;
+
 	err = test_skeleton__load(skel);
 	if (CHECK(err, "skel_load", "failed to load skeleton: %d\n", err))
 		goto cleanup;
@@ -62,6 +82,10 @@ void test_skeleton(void)
 	CHECK(bss->in4 != 13, "in4", "got %lld != exp %lld\n", bss->in4, 13LL);
 	CHECK(rodata->in.in6 != 14, "in6", "got %d != exp %d\n", rodata->in.in6, 14);
 
+	ASSERT_EQ(rodata_dyn->in_dynarr_sz, 4, "in_dynarr_sz");
+	for (i = 0; i < 4; i++)
+		ASSERT_EQ(rodata_dyn->in_dynarr[i], i + 10, "in_dynarr");
+
 	/* now set new values and attach to get them into outX variables */
 	data->in1 = 1;
 	data->in2 = 2;
@@ -71,6 +95,8 @@ void test_skeleton(void)
 	bss->in5.b = 6;
 	kcfg = skel->kconfig;
 
+	skel->data_read_mostly->read_mostly_var = 123;
+
 	err = test_skeleton__attach(skel);
 	if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
 		goto cleanup;
@@ -91,6 +117,15 @@ void test_skeleton(void)
 	CHECK(bss->kern_ver != kcfg->LINUX_KERNEL_VERSION, "ext2",
 	      "got %d != exp %d\n", bss->kern_ver, kcfg->LINUX_KERNEL_VERSION);
 
+	for (i = 0; i < 4; i++)
+		ASSERT_EQ(data_dyn->out_dynarr[i], i + 10, "out_dynarr");
+
+	ASSERT_EQ(skel->bss->out_mostly_var, 123, "out_mostly_var");
+
+	elf_bytes = test_skeleton__elf_bytes(&elf_bytes_sz);
+	ASSERT_OK_PTR(elf_bytes, "elf_bytes");
+	ASSERT_GE(elf_bytes_sz, 0, "elf_bytes_sz");
+
 cleanup:
 	test_skeleton__destroy(skel);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/snprintf.c b/tools/testing/selftests/bpf/prog_tests/snprintf.c
index 8fd1b4b29a0e..394ebfc3bbf3 100644
--- a/tools/testing/selftests/bpf/prog_tests/snprintf.c
+++ b/tools/testing/selftests/bpf/prog_tests/snprintf.c
@@ -33,7 +33,7 @@
 
 #define EXP_NO_BUF_RET 29
 
-void test_snprintf_positive(void)
+static void test_snprintf_positive(void)
 {
 	char exp_addr_out[] = EXP_ADDR_OUT;
 	char exp_sym_out[]  = EXP_SYM_OUT;
@@ -103,7 +103,7 @@ static int load_single_snprintf(char *fmt)
 	return ret;
 }
 
-void test_snprintf_negative(void)
+static void test_snprintf_negative(void)
 {
 	ASSERT_OK(load_single_snprintf("valid %d"), "valid usage");
 
diff --git a/tools/testing/selftests/bpf/prog_tests/snprintf_btf.c b/tools/testing/selftests/bpf/prog_tests/snprintf_btf.c
index 76e1f5fe18fa..dd41b826be30 100644
--- a/tools/testing/selftests/bpf/prog_tests/snprintf_btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/snprintf_btf.c
@@ -6,7 +6,7 @@
 /* Demonstrate that bpf_snprintf_btf succeeds and that various data types
  * are formatted correctly.
  */
-void test_snprintf_btf(void)
+void serial_test_snprintf_btf(void)
 {
 	struct netif_receive_skb *skel;
 	struct netif_receive_skb__bss *bss;
diff --git a/tools/testing/selftests/bpf/prog_tests/sock_fields.c b/tools/testing/selftests/bpf/prog_tests/sock_fields.c
index 577d619fb07e..fae40db4d81f 100644
--- a/tools/testing/selftests/bpf/prog_tests/sock_fields.c
+++ b/tools/testing/selftests/bpf/prog_tests/sock_fields.c
@@ -329,7 +329,7 @@ done:
 		close(listen_fd);
 }
 
-void test_sock_fields(void)
+void serial_test_sock_fields(void)
 {
 	struct bpf_link *egress_link = NULL, *ingress_link = NULL;
 	int parent_cg_fd = -1, child_cg_fd = -1;
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
index d88bb65b74cc..2a9cb951bfd6 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
@@ -2002,7 +2002,7 @@ static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map,
 	test_udp_unix_redir(skel, map, family);
 }
 
-void test_sockmap_listen(void)
+void serial_test_sockmap_listen(void)
 {
 	struct test_sockmap_listen *skel;
 
diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c b/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c
index 51fac975b316..bc34f7773444 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c
@@ -2,7 +2,7 @@
 #include <test_progs.h>
 #include "cgroup_helpers.h"
 
-static int prog_attach(struct bpf_object *obj, int cgroup_fd, const char *title)
+static int prog_attach(struct bpf_object *obj, int cgroup_fd, const char *title, const char *name)
 {
 	enum bpf_attach_type attach_type;
 	enum bpf_prog_type prog_type;
@@ -15,23 +15,23 @@ static int prog_attach(struct bpf_object *obj, int cgroup_fd, const char *title)
 		return -1;
 	}
 
-	prog = bpf_object__find_program_by_title(obj, title);
+	prog = bpf_object__find_program_by_name(obj, name);
 	if (!prog) {
-		log_err("Failed to find %s BPF program", title);
+		log_err("Failed to find %s BPF program", name);
 		return -1;
 	}
 
 	err = bpf_prog_attach(bpf_program__fd(prog), cgroup_fd,
 			      attach_type, BPF_F_ALLOW_MULTI);
 	if (err) {
-		log_err("Failed to attach %s BPF program", title);
+		log_err("Failed to attach %s BPF program", name);
 		return -1;
 	}
 
 	return 0;
 }
 
-static int prog_detach(struct bpf_object *obj, int cgroup_fd, const char *title)
+static int prog_detach(struct bpf_object *obj, int cgroup_fd, const char *title, const char *name)
 {
 	enum bpf_attach_type attach_type;
 	enum bpf_prog_type prog_type;
@@ -42,7 +42,7 @@ static int prog_detach(struct bpf_object *obj, int cgroup_fd, const char *title)
 	if (err)
 		return -1;
 
-	prog = bpf_object__find_program_by_title(obj, title);
+	prog = bpf_object__find_program_by_name(obj, name);
 	if (!prog)
 		return -1;
 
@@ -89,7 +89,7 @@ static int run_getsockopt_test(struct bpf_object *obj, int cg_parent,
 	 * - child:  0x80 -> 0x90
 	 */
 
-	err = prog_attach(obj, cg_child, "cgroup/getsockopt/child");
+	err = prog_attach(obj, cg_child, "cgroup/getsockopt", "_getsockopt_child");
 	if (err)
 		goto detach;
 
@@ -113,7 +113,7 @@ static int run_getsockopt_test(struct bpf_object *obj, int cg_parent,
 	 * - parent: 0x90 -> 0xA0
 	 */
 
-	err = prog_attach(obj, cg_parent, "cgroup/getsockopt/parent");
+	err = prog_attach(obj, cg_parent, "cgroup/getsockopt", "_getsockopt_parent");
 	if (err)
 		goto detach;
 
@@ -157,7 +157,7 @@ static int run_getsockopt_test(struct bpf_object *obj, int cg_parent,
 	 * - parent: unexpected 0x40, EPERM
 	 */
 
-	err = prog_detach(obj, cg_child, "cgroup/getsockopt/child");
+	err = prog_detach(obj, cg_child, "cgroup/getsockopt", "_getsockopt_child");
 	if (err) {
 		log_err("Failed to detach child program");
 		goto detach;
@@ -198,8 +198,8 @@ static int run_getsockopt_test(struct bpf_object *obj, int cg_parent,
 	}
 
 detach:
-	prog_detach(obj, cg_child, "cgroup/getsockopt/child");
-	prog_detach(obj, cg_parent, "cgroup/getsockopt/parent");
+	prog_detach(obj, cg_child, "cgroup/getsockopt", "_getsockopt_child");
+	prog_detach(obj, cg_parent, "cgroup/getsockopt", "_getsockopt_parent");
 
 	return err;
 }
@@ -236,7 +236,7 @@ static int run_setsockopt_test(struct bpf_object *obj, int cg_parent,
 
 	/* Attach child program and make sure it adds 0x10. */
 
-	err = prog_attach(obj, cg_child, "cgroup/setsockopt");
+	err = prog_attach(obj, cg_child, "cgroup/setsockopt", "_setsockopt");
 	if (err)
 		goto detach;
 
@@ -263,7 +263,7 @@ static int run_setsockopt_test(struct bpf_object *obj, int cg_parent,
 
 	/* Attach parent program and make sure it adds another 0x10. */
 
-	err = prog_attach(obj, cg_parent, "cgroup/setsockopt");
+	err = prog_attach(obj, cg_parent, "cgroup/setsockopt", "_setsockopt");
 	if (err)
 		goto detach;
 
@@ -289,8 +289,8 @@ static int run_setsockopt_test(struct bpf_object *obj, int cg_parent,
 	}
 
 detach:
-	prog_detach(obj, cg_child, "cgroup/setsockopt");
-	prog_detach(obj, cg_parent, "cgroup/setsockopt");
+	prog_detach(obj, cg_child, "cgroup/setsockopt", "_setsockopt");
+	prog_detach(obj, cg_parent, "cgroup/setsockopt", "_setsockopt");
 
 	return err;
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/tailcalls.c b/tools/testing/selftests/bpf/prog_tests/tailcalls.c
index b5940e6ca67c..9825f1f7bfcc 100644
--- a/tools/testing/selftests/bpf/prog_tests/tailcalls.c
+++ b/tools/testing/selftests/bpf/prog_tests/tailcalls.c
@@ -21,7 +21,7 @@ static void test_tailcall_1(void)
 	if (CHECK_FAIL(err))
 		return;
 
-	prog = bpf_object__find_program_by_title(obj, "classifier");
+	prog = bpf_object__find_program_by_name(obj, "entry");
 	if (CHECK_FAIL(!prog))
 		goto out;
 
@@ -38,9 +38,9 @@ static void test_tailcall_1(void)
 		goto out;
 
 	for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
-		snprintf(prog_name, sizeof(prog_name), "classifier/%i", i);
+		snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
 
-		prog = bpf_object__find_program_by_title(obj, prog_name);
+		prog = bpf_object__find_program_by_name(obj, prog_name);
 		if (CHECK_FAIL(!prog))
 			goto out;
 
@@ -70,9 +70,9 @@ static void test_tailcall_1(void)
 	      err, errno, retval);
 
 	for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
-		snprintf(prog_name, sizeof(prog_name), "classifier/%i", i);
+		snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
 
-		prog = bpf_object__find_program_by_title(obj, prog_name);
+		prog = bpf_object__find_program_by_name(obj, prog_name);
 		if (CHECK_FAIL(!prog))
 			goto out;
 
@@ -92,9 +92,9 @@ static void test_tailcall_1(void)
 
 	for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
 		j = bpf_map__def(prog_array)->max_entries - 1 - i;
-		snprintf(prog_name, sizeof(prog_name), "classifier/%i", j);
+		snprintf(prog_name, sizeof(prog_name), "classifier_%d", j);
 
-		prog = bpf_object__find_program_by_title(obj, prog_name);
+		prog = bpf_object__find_program_by_name(obj, prog_name);
 		if (CHECK_FAIL(!prog))
 			goto out;
 
@@ -159,7 +159,7 @@ static void test_tailcall_2(void)
 	if (CHECK_FAIL(err))
 		return;
 
-	prog = bpf_object__find_program_by_title(obj, "classifier");
+	prog = bpf_object__find_program_by_name(obj, "entry");
 	if (CHECK_FAIL(!prog))
 		goto out;
 
@@ -176,9 +176,9 @@ static void test_tailcall_2(void)
 		goto out;
 
 	for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
-		snprintf(prog_name, sizeof(prog_name), "classifier/%i", i);
+		snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
 
-		prog = bpf_object__find_program_by_title(obj, prog_name);
+		prog = bpf_object__find_program_by_name(obj, prog_name);
 		if (CHECK_FAIL(!prog))
 			goto out;
 
@@ -219,10 +219,7 @@ out:
 	bpf_object__close(obj);
 }
 
-/* test_tailcall_3 checks that the count value of the tail call limit
- * enforcement matches with expectations.
- */
-static void test_tailcall_3(void)
+static void test_tailcall_count(const char *which)
 {
 	int err, map_fd, prog_fd, main_fd, data_fd, i, val;
 	struct bpf_map *prog_array, *data_map;
@@ -231,12 +228,12 @@ static void test_tailcall_3(void)
 	__u32 retval, duration;
 	char buff[128] = {};
 
-	err = bpf_prog_load("tailcall3.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
+	err = bpf_prog_load(which, BPF_PROG_TYPE_SCHED_CLS, &obj,
 			    &prog_fd);
 	if (CHECK_FAIL(err))
 		return;
 
-	prog = bpf_object__find_program_by_title(obj, "classifier");
+	prog = bpf_object__find_program_by_name(obj, "entry");
 	if (CHECK_FAIL(!prog))
 		goto out;
 
@@ -252,7 +249,7 @@ static void test_tailcall_3(void)
 	if (CHECK_FAIL(map_fd < 0))
 		goto out;
 
-	prog = bpf_object__find_program_by_title(obj, "classifier/0");
+	prog = bpf_object__find_program_by_name(obj, "classifier_0");
 	if (CHECK_FAIL(!prog))
 		goto out;
 
@@ -296,6 +293,22 @@ out:
 	bpf_object__close(obj);
 }
 
+/* test_tailcall_3 checks that the count value of the tail call limit
+ * enforcement matches with expectations. JIT uses direct jump.
+ */
+static void test_tailcall_3(void)
+{
+	test_tailcall_count("tailcall3.o");
+}
+
+/* test_tailcall_6 checks that the count value of the tail call limit
+ * enforcement matches with expectations. JIT uses indirect jump.
+ */
+static void test_tailcall_6(void)
+{
+	test_tailcall_count("tailcall6.o");
+}
+
 /* test_tailcall_4 checks that the kernel properly selects indirect jump
  * for the case where the key is not known. Latter is passed via global
  * data to select different targets we can compare return value of.
@@ -316,7 +329,7 @@ static void test_tailcall_4(void)
 	if (CHECK_FAIL(err))
 		return;
 
-	prog = bpf_object__find_program_by_title(obj, "classifier");
+	prog = bpf_object__find_program_by_name(obj, "entry");
 	if (CHECK_FAIL(!prog))
 		goto out;
 
@@ -341,9 +354,9 @@ static void test_tailcall_4(void)
 		return;
 
 	for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
-		snprintf(prog_name, sizeof(prog_name), "classifier/%i", i);
+		snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
 
-		prog = bpf_object__find_program_by_title(obj, prog_name);
+		prog = bpf_object__find_program_by_name(obj, prog_name);
 		if (CHECK_FAIL(!prog))
 			goto out;
 
@@ -404,7 +417,7 @@ static void test_tailcall_5(void)
 	if (CHECK_FAIL(err))
 		return;
 
-	prog = bpf_object__find_program_by_title(obj, "classifier");
+	prog = bpf_object__find_program_by_name(obj, "entry");
 	if (CHECK_FAIL(!prog))
 		goto out;
 
@@ -429,9 +442,9 @@ static void test_tailcall_5(void)
 		return;
 
 	for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
-		snprintf(prog_name, sizeof(prog_name), "classifier/%i", i);
+		snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
 
-		prog = bpf_object__find_program_by_title(obj, prog_name);
+		prog = bpf_object__find_program_by_name(obj, prog_name);
 		if (CHECK_FAIL(!prog))
 			goto out;
 
@@ -490,7 +503,7 @@ static void test_tailcall_bpf2bpf_1(void)
 	if (CHECK_FAIL(err))
 		return;
 
-	prog = bpf_object__find_program_by_title(obj, "classifier");
+	prog = bpf_object__find_program_by_name(obj, "entry");
 	if (CHECK_FAIL(!prog))
 		goto out;
 
@@ -508,9 +521,9 @@ static void test_tailcall_bpf2bpf_1(void)
 
 	/* nop -> jmp */
 	for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
-		snprintf(prog_name, sizeof(prog_name), "classifier/%i", i);
+		snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
 
-		prog = bpf_object__find_program_by_title(obj, prog_name);
+		prog = bpf_object__find_program_by_name(obj, prog_name);
 		if (CHECK_FAIL(!prog))
 			goto out;
 
@@ -574,7 +587,7 @@ static void test_tailcall_bpf2bpf_2(void)
 	if (CHECK_FAIL(err))
 		return;
 
-	prog = bpf_object__find_program_by_title(obj, "classifier");
+	prog = bpf_object__find_program_by_name(obj, "entry");
 	if (CHECK_FAIL(!prog))
 		goto out;
 
@@ -590,7 +603,7 @@ static void test_tailcall_bpf2bpf_2(void)
 	if (CHECK_FAIL(map_fd < 0))
 		goto out;
 
-	prog = bpf_object__find_program_by_title(obj, "classifier/0");
+	prog = bpf_object__find_program_by_name(obj, "classifier_0");
 	if (CHECK_FAIL(!prog))
 		goto out;
 
@@ -652,7 +665,7 @@ static void test_tailcall_bpf2bpf_3(void)
 	if (CHECK_FAIL(err))
 		return;
 
-	prog = bpf_object__find_program_by_title(obj, "classifier");
+	prog = bpf_object__find_program_by_name(obj, "entry");
 	if (CHECK_FAIL(!prog))
 		goto out;
 
@@ -669,9 +682,9 @@ static void test_tailcall_bpf2bpf_3(void)
 		goto out;
 
 	for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
-		snprintf(prog_name, sizeof(prog_name), "classifier/%i", i);
+		snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
 
-		prog = bpf_object__find_program_by_title(obj, prog_name);
+		prog = bpf_object__find_program_by_name(obj, prog_name);
 		if (CHECK_FAIL(!prog))
 			goto out;
 
@@ -749,7 +762,7 @@ static void test_tailcall_bpf2bpf_4(bool noise)
 	if (CHECK_FAIL(err))
 		return;
 
-	prog = bpf_object__find_program_by_title(obj, "classifier");
+	prog = bpf_object__find_program_by_name(obj, "entry");
 	if (CHECK_FAIL(!prog))
 		goto out;
 
@@ -766,9 +779,9 @@ static void test_tailcall_bpf2bpf_4(bool noise)
 		goto out;
 
 	for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
-		snprintf(prog_name, sizeof(prog_name), "classifier/%i", i);
+		snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
 
-		prog = bpf_object__find_program_by_title(obj, prog_name);
+		prog = bpf_object__find_program_by_name(obj, prog_name);
 		if (CHECK_FAIL(!prog))
 			goto out;
 
@@ -822,6 +835,8 @@ void test_tailcalls(void)
 		test_tailcall_4();
 	if (test__start_subtest("tailcall_5"))
 		test_tailcall_5();
+	if (test__start_subtest("tailcall_6"))
+		test_tailcall_6();
 	if (test__start_subtest("tailcall_bpf2bpf_1"))
 		test_tailcall_bpf2bpf_1();
 	if (test__start_subtest("tailcall_bpf2bpf_2"))
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
index e7201ba29ccd..4b18b73df10b 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
@@ -176,6 +176,18 @@ static int netns_setup_namespaces(const char *verb)
 	return 0;
 }
 
+static void netns_setup_namespaces_nofail(const char *verb)
+{
+	const char * const *ns = namespaces;
+	char cmd[128];
+
+	while (*ns) {
+		snprintf(cmd, sizeof(cmd), "ip netns %s %s > /dev/null 2>&1", verb, *ns);
+		system(cmd);
+		ns++;
+	}
+}
+
 struct netns_setup_result {
 	int ifindex_veth_src_fwd;
 	int ifindex_veth_dst_fwd;
@@ -633,7 +645,7 @@ static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result)
 	struct nstoken *nstoken = NULL;
 	int err;
 	int tunnel_pid = -1;
-	int src_fd, target_fd;
+	int src_fd, target_fd = -1;
 	int ifindex;
 
 	/* Start a L3 TUN/TAP tunnel between the src and dst namespaces.
@@ -762,6 +774,8 @@ fail:
 
 static void *test_tc_redirect_run_tests(void *arg)
 {
+	netns_setup_namespaces_nofail("delete");
+
 	RUN_TEST(tc_redirect_peer);
 	RUN_TEST(tc_redirect_peer_l3);
 	RUN_TEST(tc_redirect_neigh);
@@ -769,7 +783,7 @@ static void *test_tc_redirect_run_tests(void *arg)
 	return NULL;
 }
 
-void test_tc_redirect(void)
+void serial_test_tc_redirect(void)
 {
 	pthread_t test_thread;
 	int err;
diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c
index d207e968e6b1..265b4fe33ec3 100644
--- a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c
+++ b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c
@@ -109,7 +109,7 @@ static int run_test(int cgroup_fd, int server_fd)
 		return -1;
 	}
 
-	map = bpf_map__next(NULL, obj);
+	map = bpf_object__next_map(obj, NULL);
 	map_fd = bpf_map__fd(map);
 
 	err = bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_SOCK_OPS, 0);
diff --git a/tools/testing/selftests/bpf/prog_tests/test_ima.c b/tools/testing/selftests/bpf/prog_tests/test_ima.c
index 0252f61d611a..97d8a6f84f4a 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_ima.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_ima.c
@@ -43,7 +43,7 @@ static int process_sample(void *ctx, void *data, size_t len)
 void test_test_ima(void)
 {
 	char measured_dir_template[] = "/tmp/ima_measuredXXXXXX";
-	struct ring_buffer *ringbuf;
+	struct ring_buffer *ringbuf = NULL;
 	const char *measured_dir;
 	char cmd[256];
 
@@ -85,5 +85,6 @@ close_clean:
 	err = system(cmd);
 	CHECK(err, "failed to run command", "%s, errno = %d\n", cmd, errno);
 close_prog:
+	ring_buffer__free(ringbuf);
 	ima__destroy(skel);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/timer.c b/tools/testing/selftests/bpf/prog_tests/timer.c
index 25f40e1b9967..0f4e49e622cd 100644
--- a/tools/testing/selftests/bpf/prog_tests/timer.c
+++ b/tools/testing/selftests/bpf/prog_tests/timer.c
@@ -39,7 +39,8 @@ static int timer(struct timer *timer_skel)
 	return 0;
 }
 
-void test_timer(void)
+/* TODO: use pid filtering */
+void serial_test_timer(void)
 {
 	struct timer *timer_skel = NULL;
 	int err;
diff --git a/tools/testing/selftests/bpf/prog_tests/timer_mim.c b/tools/testing/selftests/bpf/prog_tests/timer_mim.c
index ced8f6cf347c..949a0617869d 100644
--- a/tools/testing/selftests/bpf/prog_tests/timer_mim.c
+++ b/tools/testing/selftests/bpf/prog_tests/timer_mim.c
@@ -52,7 +52,7 @@ static int timer_mim(struct timer_mim *timer_skel)
 	return 0;
 }
 
-void test_timer_mim(void)
+void serial_test_timer_mim(void)
 {
 	struct timer_mim_reject *timer_reject_skel = NULL;
 	libbpf_print_fn_t old_print_fn = NULL;
diff --git a/tools/testing/selftests/bpf/prog_tests/tp_attach_query.c b/tools/testing/selftests/bpf/prog_tests/tp_attach_query.c
index fb095e5cd9af..8652d0a46c87 100644
--- a/tools/testing/selftests/bpf/prog_tests/tp_attach_query.c
+++ b/tools/testing/selftests/bpf/prog_tests/tp_attach_query.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <test_progs.h>
 
-void test_tp_attach_query(void)
+void serial_test_tp_attach_query(void)
 {
 	const int num_progs = 3;
 	int i, j, bytes, efd, err, prog_fd[num_progs], pmu_fd[num_progs];
diff --git a/tools/testing/selftests/bpf/prog_tests/trace_printk.c b/tools/testing/selftests/bpf/prog_tests/trace_printk.c
index d39bc00feb45..cade7f12315f 100644
--- a/tools/testing/selftests/bpf/prog_tests/trace_printk.c
+++ b/tools/testing/selftests/bpf/prog_tests/trace_printk.c
@@ -8,35 +8,34 @@
 #define TRACEBUF	"/sys/kernel/debug/tracing/trace_pipe"
 #define SEARCHMSG	"testing,testing"
 
-void test_trace_printk(void)
+void serial_test_trace_printk(void)
 {
-	int err, iter = 0, duration = 0, found = 0;
-	struct trace_printk__bss *bss;
-	struct trace_printk *skel;
+	struct trace_printk_lskel__bss *bss;
+	int err = 0, iter = 0, found = 0;
+	struct trace_printk_lskel *skel;
 	char *buf = NULL;
 	FILE *fp = NULL;
 	size_t buflen;
 
-	skel = trace_printk__open();
-	if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+	skel = trace_printk_lskel__open();
+	if (!ASSERT_OK_PTR(skel, "trace_printk__open"))
 		return;
 
-	ASSERT_EQ(skel->rodata->fmt[0], 'T', "invalid printk fmt string");
+	ASSERT_EQ(skel->rodata->fmt[0], 'T', "skel->rodata->fmt[0]");
 	skel->rodata->fmt[0] = 't';
 
-	err = trace_printk__load(skel);
-	if (CHECK(err, "skel_load", "failed to load skeleton: %d\n", err))
+	err = trace_printk_lskel__load(skel);
+	if (!ASSERT_OK(err, "trace_printk__load"))
 		goto cleanup;
 
 	bss = skel->bss;
 
-	err = trace_printk__attach(skel);
-	if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+	err = trace_printk_lskel__attach(skel);
+	if (!ASSERT_OK(err, "trace_printk__attach"))
 		goto cleanup;
 
 	fp = fopen(TRACEBUF, "r");
-	if (CHECK(fp == NULL, "could not open trace buffer",
-		  "error %d opening %s", errno, TRACEBUF))
+	if (!ASSERT_OK_PTR(fp, "fopen(TRACEBUF)"))
 		goto cleanup;
 
 	/* We do not want to wait forever if this test fails... */
@@ -44,16 +43,12 @@ void test_trace_printk(void)
 
 	/* wait for tracepoint to trigger */
 	usleep(1);
-	trace_printk__detach(skel);
+	trace_printk_lskel__detach(skel);
 
-	if (CHECK(bss->trace_printk_ran == 0,
-		  "bpf_trace_printk never ran",
-		  "ran == %d", bss->trace_printk_ran))
+	if (!ASSERT_GT(bss->trace_printk_ran, 0, "bss->trace_printk_ran"))
 		goto cleanup;
 
-	if (CHECK(bss->trace_printk_ret <= 0,
-		  "bpf_trace_printk returned <= 0 value",
-		  "got %d", bss->trace_printk_ret))
+	if (!ASSERT_GT(bss->trace_printk_ret, 0, "bss->trace_printk_ret"))
 		goto cleanup;
 
 	/* verify our search string is in the trace buffer */
@@ -66,12 +61,11 @@ void test_trace_printk(void)
 			break;
 	}
 
-	if (CHECK(!found, "message from bpf_trace_printk not found",
-		  "no instance of %s in %s", SEARCHMSG, TRACEBUF))
+	if (!ASSERT_EQ(found, bss->trace_printk_ran, "found"))
 		goto cleanup;
 
 cleanup:
-	trace_printk__destroy(skel);
+	trace_printk_lskel__destroy(skel);
 	free(buf);
 	if (fp)
 		fclose(fp);
diff --git a/tools/testing/selftests/bpf/prog_tests/trace_vprintk.c b/tools/testing/selftests/bpf/prog_tests/trace_vprintk.c
new file mode 100644
index 000000000000..7a4e313e8558
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/trace_vprintk.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include <test_progs.h>
+
+#include "trace_vprintk.lskel.h"
+
+#define TRACEBUF	"/sys/kernel/debug/tracing/trace_pipe"
+#define SEARCHMSG	"1,2,3,4,5,6,7,8,9,10"
+
+void serial_test_trace_vprintk(void)
+{
+	struct trace_vprintk_lskel__bss *bss;
+	int err = 0, iter = 0, found = 0;
+	struct trace_vprintk_lskel *skel;
+	char *buf = NULL;
+	FILE *fp = NULL;
+	size_t buflen;
+
+	skel = trace_vprintk_lskel__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "trace_vprintk__open_and_load"))
+		goto cleanup;
+
+	bss = skel->bss;
+
+	err = trace_vprintk_lskel__attach(skel);
+	if (!ASSERT_OK(err, "trace_vprintk__attach"))
+		goto cleanup;
+
+	fp = fopen(TRACEBUF, "r");
+	if (!ASSERT_OK_PTR(fp, "fopen(TRACEBUF)"))
+		goto cleanup;
+
+	/* We do not want to wait forever if this test fails... */
+	fcntl(fileno(fp), F_SETFL, O_NONBLOCK);
+
+	/* wait for tracepoint to trigger */
+	usleep(1);
+	trace_vprintk_lskel__detach(skel);
+
+	if (!ASSERT_GT(bss->trace_vprintk_ran, 0, "bss->trace_vprintk_ran"))
+		goto cleanup;
+
+	if (!ASSERT_GT(bss->trace_vprintk_ret, 0, "bss->trace_vprintk_ret"))
+		goto cleanup;
+
+	/* verify our search string is in the trace buffer */
+	while (getline(&buf, &buflen, fp) >= 0 || errno == EAGAIN) {
+		if (strstr(buf, SEARCHMSG) != NULL)
+			found++;
+		if (found == bss->trace_vprintk_ran)
+			break;
+		if (++iter > 1000)
+			break;
+	}
+
+	if (!ASSERT_EQ(found, bss->trace_vprintk_ran, "found"))
+		goto cleanup;
+
+	if (!ASSERT_LT(bss->null_data_vprintk_ret, 0, "bss->null_data_vprintk_ret"))
+		goto cleanup;
+
+cleanup:
+	trace_vprintk_lskel__destroy(skel);
+	free(buf);
+	if (fp)
+		fclose(fp);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c
index d7f5a931d7f3..fc146671b20a 100644
--- a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c
+++ b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c
@@ -41,7 +41,8 @@ static struct bpf_link *load(struct bpf_object *obj, const char *name)
 	return bpf_program__attach_trace(prog);
 }
 
-void test_trampoline_count(void)
+/* TODO: use different target function to run in concurrent mode */
+void serial_test_trampoline_count(void)
 {
 	const char *fentry_name = "fentry/__set_task_comm";
 	const char *fexit_name = "fexit/__set_task_comm";
diff --git a/tools/testing/selftests/bpf/prog_tests/verif_stats.c b/tools/testing/selftests/bpf/prog_tests/verif_stats.c
new file mode 100644
index 000000000000..a47e7c0e1ffd
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/verif_stats.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include <test_progs.h>
+
+#include "trace_vprintk.lskel.h"
+
+void test_verif_stats(void)
+{
+	__u32 len = sizeof(struct bpf_prog_info);
+	struct trace_vprintk_lskel *skel;
+	struct bpf_prog_info info = {};
+	int err;
+
+	skel = trace_vprintk_lskel__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "trace_vprintk__open_and_load"))
+		goto cleanup;
+
+	err = bpf_obj_get_info_by_fd(skel->progs.sys_enter.prog_fd, &info, &len);
+	if (!ASSERT_OK(err, "bpf_obj_get_info_by_fd"))
+		goto cleanup;
+
+	if (!ASSERT_GT(info.verified_insns, 0, "verified_insns"))
+		goto cleanup;
+
+cleanup:
+	trace_vprintk_lskel__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
index d5c98f2cb12f..f529e3c923ae 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
@@ -2,7 +2,7 @@
 #include <test_progs.h>
 #include <network_helpers.h>
 
-void test_xdp_adjust_tail_shrink(void)
+static void test_xdp_adjust_tail_shrink(void)
 {
 	const char *file = "./test_xdp_adjust_tail_shrink.o";
 	__u32 duration, retval, size, expect_sz;
@@ -30,7 +30,7 @@ void test_xdp_adjust_tail_shrink(void)
 	bpf_object__close(obj);
 }
 
-void test_xdp_adjust_tail_grow(void)
+static void test_xdp_adjust_tail_grow(void)
 {
 	const char *file = "./test_xdp_adjust_tail_grow.o";
 	struct bpf_object *obj;
@@ -58,7 +58,7 @@ void test_xdp_adjust_tail_grow(void)
 	bpf_object__close(obj);
 }
 
-void test_xdp_adjust_tail_grow2(void)
+static void test_xdp_adjust_tail_grow2(void)
 {
 	const char *file = "./test_xdp_adjust_tail_grow.o";
 	char buf[4096]; /* avoid segfault: large buf to hold grow results */
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_attach.c
index 15ef3531483e..4c4057262cd8 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_attach.c
@@ -4,7 +4,7 @@
 #define IFINDEX_LO 1
 #define XDP_FLAGS_REPLACE		(1U << 4)
 
-void test_xdp_attach(void)
+void serial_test_xdp_attach(void)
 {
 	__u32 duration = 0, id1, id2, id0 = 0, len;
 	struct bpf_object *obj1, *obj2, *obj3;
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c
index ad3ba81b4048..faa22b84f2ee 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c
@@ -519,7 +519,7 @@ static struct bond_test_case bond_test_cases[] = {
 	{ "xdp_bonding_xor_layer34", BOND_MODE_XOR, BOND_XMIT_POLICY_LAYER34, },
 };
 
-void test_xdp_bonding(void)
+void serial_test_xdp_bonding(void)
 {
 	libbpf_print_fn_t old_print_fn;
 	struct skeletons skeletons = {};
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
index 8755effd80b0..fd812bd43600 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
@@ -7,7 +7,7 @@
 
 #define IFINDEX_LO	1
 
-void test_xdp_cpumap_attach(void)
+void serial_test_xdp_cpumap_attach(void)
 {
 	struct test_xdp_with_cpumap_helpers *skel;
 	struct bpf_prog_info info = {};
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
index c72af030ff10..3079d5568f8f 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
@@ -8,7 +8,7 @@
 
 #define IFINDEX_LO 1
 
-void test_xdp_with_devmap_helpers(void)
+static void test_xdp_with_devmap_helpers(void)
 {
 	struct test_xdp_with_devmap_helpers *skel;
 	struct bpf_prog_info info = {};
@@ -60,7 +60,7 @@ out_close:
 	test_xdp_with_devmap_helpers__destroy(skel);
 }
 
-void test_neg_xdp_devmap_helpers(void)
+static void test_neg_xdp_devmap_helpers(void)
 {
 	struct test_xdp_devmap_helpers *skel;
 
@@ -72,7 +72,7 @@ void test_neg_xdp_devmap_helpers(void)
 }
 
 
-void test_xdp_devmap_attach(void)
+void serial_test_xdp_devmap_attach(void)
 {
 	if (test__start_subtest("DEVMAP with programs in entries"))
 		test_xdp_with_devmap_helpers();
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_info.c b/tools/testing/selftests/bpf/prog_tests/xdp_info.c
index d2d7a283d72f..4e2a4fd56f67 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_info.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_info.c
@@ -4,7 +4,7 @@
 
 #define IFINDEX_LO 1
 
-void test_xdp_info(void)
+void serial_test_xdp_info(void)
 {
 	__u32 len = sizeof(struct bpf_prog_info), duration = 0, prog_id;
 	const char *file = "./xdp_dummy.o";
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_link.c b/tools/testing/selftests/bpf/prog_tests/xdp_link.c
index 46eed0a33c23..983ab0b47d30 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_link.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_link.c
@@ -6,7 +6,7 @@
 
 #define IFINDEX_LO 1
 
-void test_xdp_link(void)
+void serial_test_xdp_link(void)
 {
 	__u32 duration = 0, id1, id2, id0 = 0, prog_fd1, prog_fd2, err;
 	DECLARE_LIBBPF_OPTS(bpf_xdp_set_link_opts, opts, .old_fd = -1);
diff --git a/tools/testing/selftests/bpf/prog_tests/xdpwall.c b/tools/testing/selftests/bpf/prog_tests/xdpwall.c
new file mode 100644
index 000000000000..f3927829a55a
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xdpwall.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "test_progs.h"
+#include "xdpwall.skel.h"
+
+void test_xdpwall(void)
+{
+	struct xdpwall *skel;
+
+	skel = xdpwall__open_and_load();
+	ASSERT_OK_PTR(skel, "Does LLMV have https://reviews.llvm.org/D109073?");
+
+	xdpwall__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/atomics.c b/tools/testing/selftests/bpf/progs/atomics.c
index c245345e41ca..16e57313204a 100644
--- a/tools/testing/selftests/bpf/progs/atomics.c
+++ b/tools/testing/selftests/bpf/progs/atomics.c
@@ -10,6 +10,8 @@ bool skip_tests __attribute((__section__(".data"))) = false;
 bool skip_tests = true;
 #endif
 
+__u32 pid = 0;
+
 __u64 add64_value = 1;
 __u64 add64_result = 0;
 __u32 add32_value = 1;
@@ -21,6 +23,8 @@ __u64 add_noreturn_value = 1;
 SEC("fentry/bpf_fentry_test1")
 int BPF_PROG(add, int a)
 {
+	if (pid != (bpf_get_current_pid_tgid() >> 32))
+		return 0;
 #ifdef ENABLE_ATOMICS_TESTS
 	__u64 add_stack_value = 1;
 
@@ -45,6 +49,8 @@ __s64 sub_noreturn_value = 1;
 SEC("fentry/bpf_fentry_test1")
 int BPF_PROG(sub, int a)
 {
+	if (pid != (bpf_get_current_pid_tgid() >> 32))
+		return 0;
 #ifdef ENABLE_ATOMICS_TESTS
 	__u64 sub_stack_value = 1;
 
@@ -67,6 +73,8 @@ __u64 and_noreturn_value = (0x110ull << 32);
 SEC("fentry/bpf_fentry_test1")
 int BPF_PROG(and, int a)
 {
+	if (pid != (bpf_get_current_pid_tgid() >> 32))
+		return 0;
 #ifdef ENABLE_ATOMICS_TESTS
 
 	and64_result = __sync_fetch_and_and(&and64_value, 0x011ull << 32);
@@ -86,6 +94,8 @@ __u64 or_noreturn_value = (0x110ull << 32);
 SEC("fentry/bpf_fentry_test1")
 int BPF_PROG(or, int a)
 {
+	if (pid != (bpf_get_current_pid_tgid() >> 32))
+		return 0;
 #ifdef ENABLE_ATOMICS_TESTS
 	or64_result = __sync_fetch_and_or(&or64_value, 0x011ull << 32);
 	or32_result = __sync_fetch_and_or(&or32_value, 0x011);
@@ -104,6 +114,8 @@ __u64 xor_noreturn_value = (0x110ull << 32);
 SEC("fentry/bpf_fentry_test1")
 int BPF_PROG(xor, int a)
 {
+	if (pid != (bpf_get_current_pid_tgid() >> 32))
+		return 0;
 #ifdef ENABLE_ATOMICS_TESTS
 	xor64_result = __sync_fetch_and_xor(&xor64_value, 0x011ull << 32);
 	xor32_result = __sync_fetch_and_xor(&xor32_value, 0x011);
@@ -123,6 +135,8 @@ __u32 cmpxchg32_result_succeed = 0;
 SEC("fentry/bpf_fentry_test1")
 int BPF_PROG(cmpxchg, int a)
 {
+	if (pid != (bpf_get_current_pid_tgid() >> 32))
+		return 0;
 #ifdef ENABLE_ATOMICS_TESTS
 	cmpxchg64_result_fail = __sync_val_compare_and_swap(&cmpxchg64_value, 0, 3);
 	cmpxchg64_result_succeed = __sync_val_compare_and_swap(&cmpxchg64_value, 1, 2);
@@ -142,6 +156,8 @@ __u32 xchg32_result = 0;
 SEC("fentry/bpf_fentry_test1")
 int BPF_PROG(xchg, int a)
 {
+	if (pid != (bpf_get_current_pid_tgid() >> 32))
+		return 0;
 #ifdef ENABLE_ATOMICS_TESTS
 	__u64 val64 = 2;
 	__u32 val32 = 2;
diff --git a/tools/testing/selftests/bpf/progs/bloom_filter_bench.c b/tools/testing/selftests/bpf/progs/bloom_filter_bench.c
new file mode 100644
index 000000000000..d9a88dd1ea65
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bloom_filter_bench.c
@@ -0,0 +1,153 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include <errno.h>
+#include <linux/bpf.h>
+#include <stdbool.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct bpf_map;
+
+__u8 rand_vals[2500000];
+const __u32 nr_rand_bytes = 2500000;
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(key_size, sizeof(__u32));
+	/* max entries and value_size will be set programmatically.
+	 * They are configurable from the userspace bench program.
+	 */
+} array_map SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_BLOOM_FILTER);
+	/* max entries,  value_size, and # of hash functions will be set
+	 * programmatically. They are configurable from the userspace
+	 * bench program.
+	 */
+	__uint(map_extra, 3);
+} bloom_map SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	/* max entries, key_size, and value_size, will be set
+	 * programmatically. They are configurable from the userspace
+	 * bench program.
+	 */
+} hashmap SEC(".maps");
+
+struct callback_ctx {
+	struct bpf_map *map;
+	bool update;
+};
+
+/* Tracks the number of hits, drops, and false hits */
+struct {
+	__u32 stats[3];
+} __attribute__((__aligned__(256))) percpu_stats[256];
+
+const __u32 hit_key  = 0;
+const __u32 drop_key  = 1;
+const __u32 false_hit_key = 2;
+
+__u8 value_size;
+
+const volatile bool hashmap_use_bloom;
+const volatile bool count_false_hits;
+
+int error = 0;
+
+static __always_inline void log_result(__u32 key)
+{
+	__u32 cpu = bpf_get_smp_processor_id();
+
+	percpu_stats[cpu & 255].stats[key]++;
+}
+
+static __u64
+bloom_callback(struct bpf_map *map, __u32 *key, void *val,
+	       struct callback_ctx *data)
+{
+	int err;
+
+	if (data->update)
+		err = bpf_map_push_elem(data->map, val, 0);
+	else
+		err = bpf_map_peek_elem(data->map, val);
+
+	if (err) {
+		error |= 1;
+		return 1; /* stop the iteration */
+	}
+
+	log_result(hit_key);
+
+	return 0;
+}
+
+SEC("fentry/__x64_sys_getpgid")
+int bloom_lookup(void *ctx)
+{
+	struct callback_ctx data;
+
+	data.map = (struct bpf_map *)&bloom_map;
+	data.update = false;
+
+	bpf_for_each_map_elem(&array_map, bloom_callback, &data, 0);
+
+	return 0;
+}
+
+SEC("fentry/__x64_sys_getpgid")
+int bloom_update(void *ctx)
+{
+	struct callback_ctx data;
+
+	data.map = (struct bpf_map *)&bloom_map;
+	data.update = true;
+
+	bpf_for_each_map_elem(&array_map, bloom_callback, &data, 0);
+
+	return 0;
+}
+
+SEC("fentry/__x64_sys_getpgid")
+int bloom_hashmap_lookup(void *ctx)
+{
+	__u64 *result;
+	int i, err;
+
+	__u32 index = bpf_get_prandom_u32();
+	__u32 bitmask = (1ULL << 21) - 1;
+
+	for (i = 0; i < 1024; i++, index += value_size) {
+		index = index & bitmask;
+
+		if (hashmap_use_bloom) {
+			err = bpf_map_peek_elem(&bloom_map,
+						rand_vals + index);
+			if (err) {
+				if (err != -ENOENT) {
+					error |= 2;
+					return 0;
+				}
+				log_result(hit_key);
+				continue;
+			}
+		}
+
+		result = bpf_map_lookup_elem(&hashmap,
+					     rand_vals + index);
+		if (result) {
+			log_result(hit_key);
+		} else {
+			if (hashmap_use_bloom && count_false_hits)
+				log_result(false_hit_key);
+			log_result(drop_key);
+		}
+	}
+
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bloom_filter_map.c b/tools/testing/selftests/bpf/progs/bloom_filter_map.c
new file mode 100644
index 000000000000..1316f3db79d9
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bloom_filter_map.c
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct bpf_map;
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__type(key, __u32);
+	__type(value, __u32);
+	__uint(max_entries, 1000);
+} map_random_data SEC(".maps");
+
+struct map_bloom_type {
+	__uint(type, BPF_MAP_TYPE_BLOOM_FILTER);
+	__type(value, __u32);
+	__uint(max_entries, 10000);
+	__uint(map_extra, 5);
+} map_bloom SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+	__type(key, int);
+	__type(value, int);
+	__uint(max_entries, 1);
+	__array(values, struct map_bloom_type);
+} outer_map SEC(".maps");
+
+struct callback_ctx {
+	struct bpf_map *map;
+};
+
+int error = 0;
+
+static __u64
+check_elem(struct bpf_map *map, __u32 *key, __u32 *val,
+	   struct callback_ctx *data)
+{
+	int err;
+
+	err = bpf_map_peek_elem(data->map, val);
+	if (err) {
+		error |= 1;
+		return 1; /* stop the iteration */
+	}
+
+	return 0;
+}
+
+SEC("fentry/__x64_sys_getpgid")
+int inner_map(void *ctx)
+{
+	struct bpf_map *inner_map;
+	struct callback_ctx data;
+	int key = 0;
+
+	inner_map = bpf_map_lookup_elem(&outer_map, &key);
+	if (!inner_map) {
+		error |= 2;
+		return 0;
+	}
+
+	data.map = inner_map;
+	bpf_for_each_map_elem(&map_random_data, check_elem, &data, 0);
+
+	return 0;
+}
+
+SEC("fentry/__x64_sys_getpgid")
+int check_bloom(void *ctx)
+{
+	struct callback_ctx data;
+
+	data.map = (struct bpf_map *)&map_bloom;
+	bpf_for_each_map_elem(&map_random_data, check_elem, &data, 0);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_cubic.c b/tools/testing/selftests/bpf/progs/bpf_cubic.c
index f62df4d023f9..d9660e7200e2 100644
--- a/tools/testing/selftests/bpf/progs/bpf_cubic.c
+++ b/tools/testing/selftests/bpf/progs/bpf_cubic.c
@@ -169,11 +169,7 @@ static __always_inline void bictcp_hystart_reset(struct sock *sk)
 	ca->sample_cnt = 0;
 }
 
-/* "struct_ops/" prefix is not a requirement
- * It will be recognized as BPF_PROG_TYPE_STRUCT_OPS
- * as long as it is used in one of the func ptr
- * under SEC(".struct_ops").
- */
+/* "struct_ops/" prefix is a requirement */
 SEC("struct_ops/bpf_cubic_init")
 void BPF_PROG(bpf_cubic_init, struct sock *sk)
 {
@@ -188,10 +184,8 @@ void BPF_PROG(bpf_cubic_init, struct sock *sk)
 		tcp_sk(sk)->snd_ssthresh = initial_ssthresh;
 }
 
-/* No prefix in SEC will also work.
- * The remaining tcp-cubic functions have an easier way.
- */
-SEC("no-sec-prefix-bictcp_cwnd_event")
+/* "struct_ops" prefix is a requirement */
+SEC("struct_ops/bpf_cubic_cwnd_event")
 void BPF_PROG(bpf_cubic_cwnd_event, struct sock *sk, enum tcp_ca_event event)
 {
 	if (event == CA_EVENT_TX_START) {
diff --git a/tools/testing/selftests/bpf/progs/bpf_flow.c b/tools/testing/selftests/bpf/progs/bpf_flow.c
index 95a5a0778ed7..f266c757b3df 100644
--- a/tools/testing/selftests/bpf/progs/bpf_flow.c
+++ b/tools/testing/selftests/bpf/progs/bpf_flow.c
@@ -19,9 +19,8 @@
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_endian.h>
 
-int _version SEC("version") = 1;
 #define PROG(F) PROG_(F, _##F)
-#define PROG_(NUM, NAME) SEC("flow_dissector/"#NUM) int bpf_func##NAME
+#define PROG_(NUM, NAME) SEC("flow_dissector") int flow_dissector_##NUM
 
 /* These are the identifiers of the BPF programs that will be used in tail
  * calls. Name is limited to 16 characters, with the terminating character and
diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_bitfields.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_bitfields.c
index 8f44767a75fa..e5560a656030 100644
--- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_bitfields.c
+++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_bitfields.c
@@ -11,7 +11,7 @@
 /*
  *struct bitfields_only_mixed_types {
  *	int a: 3;
- *	long int b: 2;
+ *	long b: 2;
  *	_Bool c: 1;
  *	enum {
  *		A = 0,
@@ -27,7 +27,7 @@
 
 struct bitfields_only_mixed_types {
 	int a: 3;
-	long int b: 2;
+	long b: 2;
 	bool c: 1; /* it's really a _Bool type */
 	enum {
 		A, /* A = 0, dumper is very explicit */
@@ -44,8 +44,8 @@ struct bitfields_only_mixed_types {
  *	char: 4;
  *	int a: 4;
  *	short b;
- *	long int c;
- *	long int d: 8;
+ *	long c;
+ *	long d: 8;
  *	int e;
  *	int f;
  *};
@@ -71,7 +71,7 @@ struct bitfield_mixed_with_others {
  *struct bitfield_flushed {
  *	int a: 4;
  *	long: 60;
- *	long int b: 16;
+ *	long b: 16;
  *};
  *
  */
diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_packing.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_packing.c
index 1cef3bec1dc7..e304b6204bd9 100644
--- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_packing.c
+++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_packing.c
@@ -29,7 +29,7 @@ struct non_packed_fields {
 struct nested_packed {
 	char: 4;
 	int a: 4;
-	long int b;
+	long b;
 	struct {
 		char c;
 		int d;
@@ -44,7 +44,7 @@ union union_is_never_packed {
 
 union union_does_not_need_packing {
 	struct {
-		long int a;
+		long a;
 		int b;
 	} __attribute__((packed));
 	int c;
diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c
index 35c512818a56..f2661c8d2d90 100644
--- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c
+++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c
@@ -9,7 +9,7 @@
 /* ----- START-EXPECTED-OUTPUT ----- */
 struct padded_implicitly {
 	int a;
-	long int b;
+	long b;
 	char c;
 };
 
diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c
index 8aaa24a00322..1c7105fcae3c 100644
--- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c
+++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c
@@ -189,7 +189,7 @@ struct struct_with_embedded_stuff {
 			const char *d;
 		} e;
 		union {
-			volatile long int f;
+			volatile long f;
 			void * restrict g;
 		};
 	};
diff --git a/tools/testing/selftests/bpf/progs/cg_storage_multi_isolated.c b/tools/testing/selftests/bpf/progs/cg_storage_multi_isolated.c
index a25373002055..3f81ff92184c 100644
--- a/tools/testing/selftests/bpf/progs/cg_storage_multi_isolated.c
+++ b/tools/testing/selftests/bpf/progs/cg_storage_multi_isolated.c
@@ -20,7 +20,7 @@ struct {
 
 __u32 invocations = 0;
 
-SEC("cgroup_skb/egress/1")
+SEC("cgroup_skb/egress")
 int egress1(struct __sk_buff *skb)
 {
 	struct cgroup_value *ptr_cg_storage =
@@ -32,7 +32,7 @@ int egress1(struct __sk_buff *skb)
 	return 1;
 }
 
-SEC("cgroup_skb/egress/2")
+SEC("cgroup_skb/egress")
 int egress2(struct __sk_buff *skb)
 {
 	struct cgroup_value *ptr_cg_storage =
diff --git a/tools/testing/selftests/bpf/progs/cg_storage_multi_shared.c b/tools/testing/selftests/bpf/progs/cg_storage_multi_shared.c
index a149f33bc533..d662db27fe4a 100644
--- a/tools/testing/selftests/bpf/progs/cg_storage_multi_shared.c
+++ b/tools/testing/selftests/bpf/progs/cg_storage_multi_shared.c
@@ -20,7 +20,7 @@ struct {
 
 __u32 invocations = 0;
 
-SEC("cgroup_skb/egress/1")
+SEC("cgroup_skb/egress")
 int egress1(struct __sk_buff *skb)
 {
 	struct cgroup_value *ptr_cg_storage =
@@ -32,7 +32,7 @@ int egress1(struct __sk_buff *skb)
 	return 1;
 }
 
-SEC("cgroup_skb/egress/2")
+SEC("cgroup_skb/egress")
 int egress2(struct __sk_buff *skb)
 {
 	struct cgroup_value *ptr_cg_storage =
diff --git a/tools/testing/selftests/bpf/progs/cgroup_skb_sk_lookup_kern.c b/tools/testing/selftests/bpf/progs/cgroup_skb_sk_lookup_kern.c
index 3f757e30d7a0..88638315c582 100644
--- a/tools/testing/selftests/bpf/progs/cgroup_skb_sk_lookup_kern.c
+++ b/tools/testing/selftests/bpf/progs/cgroup_skb_sk_lookup_kern.c
@@ -14,7 +14,6 @@
 #include <sys/types.h>
 #include <sys/socket.h>
 
-int _version SEC("version") = 1;
 char _license[] SEC("license") = "GPL";
 
 __u16 g_serv_port = 0;
diff --git a/tools/testing/selftests/bpf/progs/connect4_dropper.c b/tools/testing/selftests/bpf/progs/connect4_dropper.c
index b565d997810a..d3f4c5e4fb69 100644
--- a/tools/testing/selftests/bpf/progs/connect4_dropper.c
+++ b/tools/testing/selftests/bpf/progs/connect4_dropper.c
@@ -18,7 +18,7 @@ int connect_v4_dropper(struct bpf_sock_addr *ctx)
 {
 	if (ctx->type != SOCK_STREAM)
 		return VERDICT_PROCEED;
-	if (ctx->user_port == bpf_htons(60123))
+	if (ctx->user_port == bpf_htons(60120))
 		return VERDICT_REJECT;
 	return VERDICT_PROCEED;
 }
diff --git a/tools/testing/selftests/bpf/progs/connect4_prog.c b/tools/testing/selftests/bpf/progs/connect4_prog.c
index a943d394fd3a..b241932911db 100644
--- a/tools/testing/selftests/bpf/progs/connect4_prog.c
+++ b/tools/testing/selftests/bpf/progs/connect4_prog.c
@@ -31,8 +31,6 @@
 #define IFNAMSIZ 16
 #endif
 
-int _version SEC("version") = 1;
-
 __attribute__ ((noinline))
 int do_bind(struct bpf_sock_addr *ctx)
 {
diff --git a/tools/testing/selftests/bpf/progs/connect6_prog.c b/tools/testing/selftests/bpf/progs/connect6_prog.c
index 506d0f81a375..40266d2c737c 100644
--- a/tools/testing/selftests/bpf/progs/connect6_prog.c
+++ b/tools/testing/selftests/bpf/progs/connect6_prog.c
@@ -24,8 +24,6 @@
 
 #define DST_REWRITE_PORT6	6666
 
-int _version SEC("version") = 1;
-
 SEC("cgroup/connect6")
 int connect_v6_prog(struct bpf_sock_addr *ctx)
 {
diff --git a/tools/testing/selftests/bpf/progs/connect_force_port4.c b/tools/testing/selftests/bpf/progs/connect_force_port4.c
index a979aaef2a76..27a632dd382e 100644
--- a/tools/testing/selftests/bpf/progs/connect_force_port4.c
+++ b/tools/testing/selftests/bpf/progs/connect_force_port4.c
@@ -13,7 +13,6 @@
 #include <bpf_sockopt_helpers.h>
 
 char _license[] SEC("license") = "GPL";
-int _version SEC("version") = 1;
 
 struct svc_addr {
 	__be32 addr;
diff --git a/tools/testing/selftests/bpf/progs/connect_force_port6.c b/tools/testing/selftests/bpf/progs/connect_force_port6.c
index afc8f1c5a9d6..19cad93e612f 100644
--- a/tools/testing/selftests/bpf/progs/connect_force_port6.c
+++ b/tools/testing/selftests/bpf/progs/connect_force_port6.c
@@ -12,7 +12,6 @@
 #include <bpf_sockopt_helpers.h>
 
 char _license[] SEC("license") = "GPL";
-int _version SEC("version") = 1;
 
 struct svc_addr {
 	__be32 addr[4];
diff --git a/tools/testing/selftests/bpf/progs/dev_cgroup.c b/tools/testing/selftests/bpf/progs/dev_cgroup.c
index 8924e06bdef0..79b54a4fa244 100644
--- a/tools/testing/selftests/bpf/progs/dev_cgroup.c
+++ b/tools/testing/selftests/bpf/progs/dev_cgroup.c
@@ -57,4 +57,3 @@ int bpf_prog1(struct bpf_cgroup_dev_ctx *ctx)
 }
 
 char _license[] SEC("license") = "GPL";
-__u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/tools/testing/selftests/bpf/progs/dummy_st_ops.c b/tools/testing/selftests/bpf/progs/dummy_st_ops.c
new file mode 100644
index 000000000000..ead87edb75e2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/dummy_st_ops.c
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2021. Huawei Technologies Co., Ltd */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+struct bpf_dummy_ops_state {
+	int val;
+} __attribute__((preserve_access_index));
+
+struct bpf_dummy_ops {
+	int (*test_1)(struct bpf_dummy_ops_state *state);
+	int (*test_2)(struct bpf_dummy_ops_state *state, int a1, unsigned short a2,
+		      char a3, unsigned long a4);
+};
+
+char _license[] SEC("license") = "GPL";
+
+SEC("struct_ops/test_1")
+int BPF_PROG(test_1, struct bpf_dummy_ops_state *state)
+{
+	int ret;
+
+	if (!state)
+		return 0xf2f3f4f5;
+
+	ret = state->val;
+	state->val = 0x5a;
+	return ret;
+}
+
+__u64 test_2_args[5];
+
+SEC("struct_ops/test_2")
+int BPF_PROG(test_2, struct bpf_dummy_ops_state *state, int a1, unsigned short a2,
+	     char a3, unsigned long a4)
+{
+	test_2_args[0] = (unsigned long)state;
+	test_2_args[1] = a1;
+	test_2_args[2] = a2;
+	test_2_args[3] = a3;
+	test_2_args[4] = a4;
+	return 0;
+}
+
+SEC(".struct_ops")
+struct bpf_dummy_ops dummy_1 = {
+	.test_1 = (void *)test_1,
+	.test_2 = (void *)test_2,
+};
diff --git a/tools/testing/selftests/bpf/progs/fexit_sleep.c b/tools/testing/selftests/bpf/progs/fexit_sleep.c
index 03a672d76353..bca92c9bd29a 100644
--- a/tools/testing/selftests/bpf/progs/fexit_sleep.c
+++ b/tools/testing/selftests/bpf/progs/fexit_sleep.c
@@ -13,7 +13,7 @@ int fexit_cnt = 0;
 SEC("fentry/__x64_sys_nanosleep")
 int BPF_PROG(nanosleep_fentry, const struct pt_regs *regs)
 {
-	if ((int)bpf_get_current_pid_tgid() != pid)
+	if (bpf_get_current_pid_tgid() >> 32 != pid)
 		return 0;
 
 	fentry_cnt++;
@@ -23,7 +23,7 @@ int BPF_PROG(nanosleep_fentry, const struct pt_regs *regs)
 SEC("fexit/__x64_sys_nanosleep")
 int BPF_PROG(nanosleep_fexit, const struct pt_regs *regs, int ret)
 {
-	if ((int)bpf_get_current_pid_tgid() != pid)
+	if (bpf_get_current_pid_tgid() >> 32 != pid)
 		return 0;
 
 	fexit_cnt++;
diff --git a/tools/testing/selftests/bpf/progs/for_each_array_map_elem.c b/tools/testing/selftests/bpf/progs/for_each_array_map_elem.c
index 75e8e1069fe7..df918b2469da 100644
--- a/tools/testing/selftests/bpf/progs/for_each_array_map_elem.c
+++ b/tools/testing/selftests/bpf/progs/for_each_array_map_elem.c
@@ -47,7 +47,7 @@ check_percpu_elem(struct bpf_map *map, __u32 *key, __u64 *val,
 
 u32 arraymap_output = 0;
 
-SEC("classifier")
+SEC("tc")
 int test_pkt_access(struct __sk_buff *skb)
 {
 	struct callback_ctx data;
diff --git a/tools/testing/selftests/bpf/progs/for_each_hash_map_elem.c b/tools/testing/selftests/bpf/progs/for_each_hash_map_elem.c
index 913dd91aafff..276994d5c0c7 100644
--- a/tools/testing/selftests/bpf/progs/for_each_hash_map_elem.c
+++ b/tools/testing/selftests/bpf/progs/for_each_hash_map_elem.c
@@ -78,7 +78,7 @@ int hashmap_output = 0;
 int hashmap_elems = 0;
 int percpu_map_elems = 0;
 
-SEC("classifier")
+SEC("tc")
 int test_pkt_access(struct __sk_buff *skb)
 {
 	struct callback_ctx data;
diff --git a/tools/testing/selftests/bpf/progs/get_branch_snapshot.c b/tools/testing/selftests/bpf/progs/get_branch_snapshot.c
new file mode 100644
index 000000000000..a1b139888048
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/get_branch_snapshot.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+__u64 test1_hits = 0;
+__u64 address_low = 0;
+__u64 address_high = 0;
+int wasted_entries = 0;
+long total_entries = 0;
+
+#define ENTRY_CNT 32
+struct perf_branch_entry entries[ENTRY_CNT] = {};
+
+static inline bool in_range(__u64 val)
+{
+	return (val >= address_low) && (val < address_high);
+}
+
+SEC("fexit/bpf_testmod_loop_test")
+int BPF_PROG(test1, int n, int ret)
+{
+	long i;
+
+	total_entries = bpf_get_branch_snapshot(entries, sizeof(entries), 0);
+	total_entries /= sizeof(struct perf_branch_entry);
+
+	for (i = 0; i < ENTRY_CNT; i++) {
+		if (i >= total_entries)
+			break;
+		if (in_range(entries[i].from) && in_range(entries[i].to))
+			test1_hits++;
+		else if (!test1_hits)
+			wasted_entries++;
+	}
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/get_cgroup_id_kern.c b/tools/testing/selftests/bpf/progs/get_cgroup_id_kern.c
index 6b42db2fe391..68587b1de34e 100644
--- a/tools/testing/selftests/bpf/progs/get_cgroup_id_kern.c
+++ b/tools/testing/selftests/bpf/progs/get_cgroup_id_kern.c
@@ -37,4 +37,3 @@ int trace(void *ctx)
 }
 
 char _license[] SEC("license") = "GPL";
-__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */
diff --git a/tools/testing/selftests/bpf/progs/kfree_skb.c b/tools/testing/selftests/bpf/progs/kfree_skb.c
index 55e283050cab..7236da72ce80 100644
--- a/tools/testing/selftests/bpf/progs/kfree_skb.c
+++ b/tools/testing/selftests/bpf/progs/kfree_skb.c
@@ -9,8 +9,8 @@
 char _license[] SEC("license") = "GPL";
 struct {
 	__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
-	__uint(key_size, sizeof(int));
-	__uint(value_size, sizeof(int));
+	__type(key, int);
+	__type(value, int);
 } perf_buf_map SEC(".maps");
 
 #define _(P) (__builtin_preserve_access_index(P))
diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_test.c b/tools/testing/selftests/bpf/progs/kfunc_call_test.c
index 470f8723e463..8a8cf59017aa 100644
--- a/tools/testing/selftests/bpf/progs/kfunc_call_test.c
+++ b/tools/testing/selftests/bpf/progs/kfunc_call_test.c
@@ -8,7 +8,7 @@ extern int bpf_kfunc_call_test2(struct sock *sk, __u32 a, __u32 b) __ksym;
 extern __u64 bpf_kfunc_call_test1(struct sock *sk, __u32 a, __u64 b,
 				  __u32 c, __u64 d) __ksym;
 
-SEC("classifier")
+SEC("tc")
 int kfunc_call_test2(struct __sk_buff *skb)
 {
 	struct bpf_sock *sk = skb->sk;
@@ -23,7 +23,7 @@ int kfunc_call_test2(struct __sk_buff *skb)
 	return bpf_kfunc_call_test2((struct sock *)sk, 1, 2);
 }
 
-SEC("classifier")
+SEC("tc")
 int kfunc_call_test1(struct __sk_buff *skb)
 {
 	struct bpf_sock *sk = skb->sk;
diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c b/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c
index 5fbd9e232d44..c1fdecabeabf 100644
--- a/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c
+++ b/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c
@@ -33,7 +33,7 @@ int __noinline f1(struct __sk_buff *skb)
 	return (__u32)bpf_kfunc_call_test1((struct sock *)sk, 1, 2, 3, 4);
 }
 
-SEC("classifier")
+SEC("tc")
 int kfunc_call_test1(struct __sk_buff *skb)
 {
 	return f1(skb);
diff --git a/tools/testing/selftests/bpf/progs/map_ptr_kern.c b/tools/testing/selftests/bpf/progs/map_ptr_kern.c
index d1d304c980f0..b1b711d9b214 100644
--- a/tools/testing/selftests/bpf/progs/map_ptr_kern.c
+++ b/tools/testing/selftests/bpf/progs/map_ptr_kern.c
@@ -683,5 +683,4 @@ int cg_skb(void *ctx)
 	return 1;
 }
 
-__u32 _version SEC("version") = 1;
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/netcnt_prog.c b/tools/testing/selftests/bpf/progs/netcnt_prog.c
index 43649bce4c54..f718b2c212dc 100644
--- a/tools/testing/selftests/bpf/progs/netcnt_prog.c
+++ b/tools/testing/selftests/bpf/progs/netcnt_prog.c
@@ -68,4 +68,3 @@ int bpf_nextcnt(struct __sk_buff *skb)
 }
 
 char _license[] SEC("license") = "GPL";
-__u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/tools/testing/selftests/bpf/progs/perf_event_stackmap.c b/tools/testing/selftests/bpf/progs/perf_event_stackmap.c
index 25467d13c356..b3fcb5274ee0 100644
--- a/tools/testing/selftests/bpf/progs/perf_event_stackmap.c
+++ b/tools/testing/selftests/bpf/progs/perf_event_stackmap.c
@@ -11,8 +11,8 @@ typedef __u64 stack_trace_t[PERF_MAX_STACK_DEPTH];
 struct {
 	__uint(type, BPF_MAP_TYPE_STACK_TRACE);
 	__uint(max_entries, 16384);
-	__uint(key_size, sizeof(__u32));
-	__uint(value_size, sizeof(stack_trace_t));
+	__type(key, __u32);
+	__type(value, stack_trace_t);
 } stackmap SEC(".maps");
 
 struct {
diff --git a/tools/testing/selftests/bpf/progs/recursion.c b/tools/testing/selftests/bpf/progs/recursion.c
index 49f679375b9d..3c2423bb19e2 100644
--- a/tools/testing/selftests/bpf/progs/recursion.c
+++ b/tools/testing/selftests/bpf/progs/recursion.c
@@ -24,8 +24,8 @@ struct {
 int pass1 = 0;
 int pass2 = 0;
 
-SEC("fentry/__htab_map_lookup_elem")
-int BPF_PROG(on_lookup, struct bpf_map *map)
+SEC("fentry/htab_map_delete_elem")
+int BPF_PROG(on_delete, struct bpf_map *map)
 {
 	int key = 0;
 
@@ -35,10 +35,7 @@ int BPF_PROG(on_lookup, struct bpf_map *map)
 	}
 	if (map == (void *)&hash2) {
 		pass2++;
-		/* htab_map_gen_lookup() will inline below call
-		 * into direct call to __htab_map_lookup_elem()
-		 */
-		bpf_map_lookup_elem(&hash2, &key);
+		bpf_map_delete_elem(&hash2, &key);
 		return 0;
 	}
 
diff --git a/tools/testing/selftests/bpf/progs/sendmsg4_prog.c b/tools/testing/selftests/bpf/progs/sendmsg4_prog.c
index ac5abc34cde8..ea75a44cb7fc 100644
--- a/tools/testing/selftests/bpf/progs/sendmsg4_prog.c
+++ b/tools/testing/selftests/bpf/progs/sendmsg4_prog.c
@@ -18,8 +18,6 @@
 #define DST_PORT		4040
 #define DST_REWRITE_PORT4	4444
 
-int _version SEC("version") = 1;
-
 SEC("cgroup/sendmsg4")
 int sendmsg_v4_prog(struct bpf_sock_addr *ctx)
 {
diff --git a/tools/testing/selftests/bpf/progs/sendmsg6_prog.c b/tools/testing/selftests/bpf/progs/sendmsg6_prog.c
index 24694b1a8d82..bf9b46b806f6 100644
--- a/tools/testing/selftests/bpf/progs/sendmsg6_prog.c
+++ b/tools/testing/selftests/bpf/progs/sendmsg6_prog.c
@@ -22,8 +22,6 @@
 
 #define DST_REWRITE_PORT6	6666
 
-int _version SEC("version") = 1;
-
 SEC("cgroup/sendmsg6")
 int sendmsg_v6_prog(struct bpf_sock_addr *ctx)
 {
diff --git a/tools/testing/selftests/bpf/progs/skb_pkt_end.c b/tools/testing/selftests/bpf/progs/skb_pkt_end.c
index 7f2eaa2f89f8..992b7861003a 100644
--- a/tools/testing/selftests/bpf/progs/skb_pkt_end.c
+++ b/tools/testing/selftests/bpf/progs/skb_pkt_end.c
@@ -25,7 +25,7 @@ out:
 	return ip;
 }
 
-SEC("classifier/cls")
+SEC("tc")
 int main_prog(struct __sk_buff *skb)
 {
 	struct iphdr *ip = NULL;
diff --git a/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c b/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c
index ca283af80d4e..95d5b941bc1f 100644
--- a/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c
+++ b/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c
@@ -2,8 +2,6 @@
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_endian.h>
 
-int _version SEC("version") = 1;
-
 SEC("sk_skb1")
 int bpf_prog1(struct __sk_buff *skb)
 {
diff --git a/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c b/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c
index eeaf6e75c9a2..80632954c5a1 100644
--- a/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c
+++ b/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c
@@ -3,8 +3,6 @@
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_endian.h>
 
-int _version SEC("version") = 1;
-
 SEC("sk_msg1")
 int bpf_prog1(struct sk_msg_md *msg)
 {
diff --git a/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c b/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c
index 4797dc985064..e2468a6d01a5 100644
--- a/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c
+++ b/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c
@@ -2,27 +2,25 @@
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_endian.h>
 
-int _version SEC("version") = 1;
-
 struct {
 	__uint(type, BPF_MAP_TYPE_SOCKMAP);
 	__uint(max_entries, 20);
-	__uint(key_size, sizeof(int));
-	__uint(value_size, sizeof(int));
+	__type(key, int);
+	__type(value, int);
 } sock_map_rx SEC(".maps");
 
 struct {
 	__uint(type, BPF_MAP_TYPE_SOCKMAP);
 	__uint(max_entries, 20);
-	__uint(key_size, sizeof(int));
-	__uint(value_size, sizeof(int));
+	__type(key, int);
+	__type(value, int);
 } sock_map_tx SEC(".maps");
 
 struct {
 	__uint(type, BPF_MAP_TYPE_SOCKMAP);
 	__uint(max_entries, 20);
-	__uint(key_size, sizeof(int));
-	__uint(value_size, sizeof(int));
+	__type(key, int);
+	__type(value, int);
 } sock_map_msg SEC(".maps");
 
 struct {
diff --git a/tools/testing/selftests/bpf/progs/sockopt_inherit.c b/tools/testing/selftests/bpf/progs/sockopt_inherit.c
index c6d428a8d785..9fb241b97291 100644
--- a/tools/testing/selftests/bpf/progs/sockopt_inherit.c
+++ b/tools/testing/selftests/bpf/progs/sockopt_inherit.c
@@ -3,7 +3,6 @@
 #include <bpf/bpf_helpers.h>
 
 char _license[] SEC("license") = "GPL";
-__u32 _version SEC("version") = 1;
 
 #define SOL_CUSTOM			0xdeadbeef
 #define CUSTOM_INHERIT1			0
diff --git a/tools/testing/selftests/bpf/progs/sockopt_multi.c b/tools/testing/selftests/bpf/progs/sockopt_multi.c
index 9d8c212dde9f..177a59069dae 100644
--- a/tools/testing/selftests/bpf/progs/sockopt_multi.c
+++ b/tools/testing/selftests/bpf/progs/sockopt_multi.c
@@ -4,9 +4,8 @@
 #include <bpf/bpf_helpers.h>
 
 char _license[] SEC("license") = "GPL";
-__u32 _version SEC("version") = 1;
 
-SEC("cgroup/getsockopt/child")
+SEC("cgroup/getsockopt")
 int _getsockopt_child(struct bpf_sockopt *ctx)
 {
 	__u8 *optval_end = ctx->optval_end;
@@ -29,7 +28,7 @@ int _getsockopt_child(struct bpf_sockopt *ctx)
 	return 1;
 }
 
-SEC("cgroup/getsockopt/parent")
+SEC("cgroup/getsockopt")
 int _getsockopt_parent(struct bpf_sockopt *ctx)
 {
 	__u8 *optval_end = ctx->optval_end;
diff --git a/tools/testing/selftests/bpf/progs/strobemeta.h b/tools/testing/selftests/bpf/progs/strobemeta.h
index 7de534f38c3f..60c93aee2f4a 100644
--- a/tools/testing/selftests/bpf/progs/strobemeta.h
+++ b/tools/testing/selftests/bpf/progs/strobemeta.h
@@ -358,7 +358,7 @@ static __always_inline uint64_t read_str_var(struct strobemeta_cfg *cfg,
 					     void *payload)
 {
 	void *location;
-	uint32_t len;
+	uint64_t len;
 
 	data->str_lens[idx] = 0;
 	location = calc_location(&cfg->str_locs[idx], tls_base);
@@ -390,7 +390,7 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg,
 	struct strobe_map_descr* descr = &data->map_descrs[idx];
 	struct strobe_map_raw map;
 	void *location;
-	uint32_t len;
+	uint64_t len;
 	int i;
 
 	descr->tag_len = 0; /* presume no tag is set */
diff --git a/tools/testing/selftests/bpf/progs/tag.c b/tools/testing/selftests/bpf/progs/tag.c
new file mode 100644
index 000000000000..1792f4eda095
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tag.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#ifndef __has_attribute
+#define __has_attribute(x) 0
+#endif
+
+#if __has_attribute(btf_decl_tag)
+#define __tag1 __attribute__((btf_decl_tag("tag1")))
+#define __tag2 __attribute__((btf_decl_tag("tag2")))
+volatile const bool skip_tests __tag1 __tag2 = false;
+#else
+#define __tag1
+#define __tag2
+volatile const bool skip_tests = true;
+#endif
+
+struct key_t {
+	int a;
+	int b __tag1 __tag2;
+	int c;
+} __tag1 __tag2;
+
+typedef struct {
+	int a;
+	int b;
+} value_t __tag1 __tag2;
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, 3);
+	__type(key, struct key_t);
+	__type(value, value_t);
+} hashmap1 SEC(".maps");
+
+
+static __noinline int foo(int x __tag1 __tag2) __tag1 __tag2
+{
+	struct key_t key;
+	value_t val = {};
+
+	key.a = key.b = key.c = x;
+	bpf_map_update_elem(&hashmap1, &key, &val, 0);
+	return 0;
+}
+
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG(sub, int x)
+{
+	return foo(x);
+}
diff --git a/tools/testing/selftests/bpf/progs/tailcall1.c b/tools/testing/selftests/bpf/progs/tailcall1.c
index 7115bcefbe8a..8159a0b4a69a 100644
--- a/tools/testing/selftests/bpf/progs/tailcall1.c
+++ b/tools/testing/selftests/bpf/progs/tailcall1.c
@@ -11,8 +11,8 @@ struct {
 } jmp_table SEC(".maps");
 
 #define TAIL_FUNC(x) 				\
-	SEC("classifier/" #x)			\
-	int bpf_func_##x(struct __sk_buff *skb)	\
+	SEC("tc")				\
+	int classifier_##x(struct __sk_buff *skb)	\
 	{					\
 		return x;			\
 	}
@@ -20,7 +20,7 @@ TAIL_FUNC(0)
 TAIL_FUNC(1)
 TAIL_FUNC(2)
 
-SEC("classifier")
+SEC("tc")
 int entry(struct __sk_buff *skb)
 {
 	/* Multiple locations to make sure we patch
@@ -45,4 +45,3 @@ int entry(struct __sk_buff *skb)
 }
 
 char __license[] SEC("license") = "GPL";
-int _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/tailcall2.c b/tools/testing/selftests/bpf/progs/tailcall2.c
index 0431e4fe7efd..a5ff53e61702 100644
--- a/tools/testing/selftests/bpf/progs/tailcall2.c
+++ b/tools/testing/selftests/bpf/progs/tailcall2.c
@@ -10,41 +10,41 @@ struct {
 	__uint(value_size, sizeof(__u32));
 } jmp_table SEC(".maps");
 
-SEC("classifier/0")
-int bpf_func_0(struct __sk_buff *skb)
+SEC("tc")
+int classifier_0(struct __sk_buff *skb)
 {
 	bpf_tail_call_static(skb, &jmp_table, 1);
 	return 0;
 }
 
-SEC("classifier/1")
-int bpf_func_1(struct __sk_buff *skb)
+SEC("tc")
+int classifier_1(struct __sk_buff *skb)
 {
 	bpf_tail_call_static(skb, &jmp_table, 2);
 	return 1;
 }
 
-SEC("classifier/2")
-int bpf_func_2(struct __sk_buff *skb)
+SEC("tc")
+int classifier_2(struct __sk_buff *skb)
 {
 	return 2;
 }
 
-SEC("classifier/3")
-int bpf_func_3(struct __sk_buff *skb)
+SEC("tc")
+int classifier_3(struct __sk_buff *skb)
 {
 	bpf_tail_call_static(skb, &jmp_table, 4);
 	return 3;
 }
 
-SEC("classifier/4")
-int bpf_func_4(struct __sk_buff *skb)
+SEC("tc")
+int classifier_4(struct __sk_buff *skb)
 {
 	bpf_tail_call_static(skb, &jmp_table, 3);
 	return 4;
 }
 
-SEC("classifier")
+SEC("tc")
 int entry(struct __sk_buff *skb)
 {
 	bpf_tail_call_static(skb, &jmp_table, 0);
@@ -56,4 +56,3 @@ int entry(struct __sk_buff *skb)
 }
 
 char __license[] SEC("license") = "GPL";
-int _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/tailcall3.c b/tools/testing/selftests/bpf/progs/tailcall3.c
index 910858fe078a..f60bcd7b8d4b 100644
--- a/tools/testing/selftests/bpf/progs/tailcall3.c
+++ b/tools/testing/selftests/bpf/progs/tailcall3.c
@@ -12,15 +12,15 @@ struct {
 
 int count = 0;
 
-SEC("classifier/0")
-int bpf_func_0(struct __sk_buff *skb)
+SEC("tc")
+int classifier_0(struct __sk_buff *skb)
 {
 	count++;
 	bpf_tail_call_static(skb, &jmp_table, 0);
 	return 1;
 }
 
-SEC("classifier")
+SEC("tc")
 int entry(struct __sk_buff *skb)
 {
 	bpf_tail_call_static(skb, &jmp_table, 0);
@@ -28,4 +28,3 @@ int entry(struct __sk_buff *skb)
 }
 
 char __license[] SEC("license") = "GPL";
-int _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/tailcall4.c b/tools/testing/selftests/bpf/progs/tailcall4.c
index bd4be135c39d..a56bbc2313ca 100644
--- a/tools/testing/selftests/bpf/progs/tailcall4.c
+++ b/tools/testing/selftests/bpf/progs/tailcall4.c
@@ -13,8 +13,8 @@ struct {
 int selector = 0;
 
 #define TAIL_FUNC(x)				\
-	SEC("classifier/" #x)			\
-	int bpf_func_##x(struct __sk_buff *skb)	\
+	SEC("tc")				\
+	int classifier_##x(struct __sk_buff *skb)	\
 	{					\
 		return x;			\
 	}
@@ -22,7 +22,7 @@ TAIL_FUNC(0)
 TAIL_FUNC(1)
 TAIL_FUNC(2)
 
-SEC("classifier")
+SEC("tc")
 int entry(struct __sk_buff *skb)
 {
 	bpf_tail_call(skb, &jmp_table, selector);
@@ -30,4 +30,3 @@ int entry(struct __sk_buff *skb)
 }
 
 char __license[] SEC("license") = "GPL";
-int _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/tailcall5.c b/tools/testing/selftests/bpf/progs/tailcall5.c
index adf30a33064e..8d03496eb6ca 100644
--- a/tools/testing/selftests/bpf/progs/tailcall5.c
+++ b/tools/testing/selftests/bpf/progs/tailcall5.c
@@ -13,8 +13,8 @@ struct {
 int selector = 0;
 
 #define TAIL_FUNC(x)				\
-	SEC("classifier/" #x)			\
-	int bpf_func_##x(struct __sk_buff *skb)	\
+	SEC("tc")				\
+	int classifier_##x(struct __sk_buff *skb)	\
 	{					\
 		return x;			\
 	}
@@ -22,7 +22,7 @@ TAIL_FUNC(0)
 TAIL_FUNC(1)
 TAIL_FUNC(2)
 
-SEC("classifier")
+SEC("tc")
 int entry(struct __sk_buff *skb)
 {
 	int idx = 0;
@@ -37,4 +37,3 @@ int entry(struct __sk_buff *skb)
 }
 
 char __license[] SEC("license") = "GPL";
-int _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/tailcall6.c b/tools/testing/selftests/bpf/progs/tailcall6.c
new file mode 100644
index 000000000000..d77b8abd62f3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tailcall6.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+
+#include <bpf/bpf_helpers.h>
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+	__uint(max_entries, 1);
+	__uint(key_size, sizeof(__u32));
+	__uint(value_size, sizeof(__u32));
+} jmp_table SEC(".maps");
+
+int count, which;
+
+SEC("tc")
+int classifier_0(struct __sk_buff *skb)
+{
+	count++;
+	if (__builtin_constant_p(which))
+		__bpf_unreachable();
+	bpf_tail_call(skb, &jmp_table, which);
+	return 1;
+}
+
+SEC("tc")
+int entry(struct __sk_buff *skb)
+{
+	if (__builtin_constant_p(which))
+		__bpf_unreachable();
+	bpf_tail_call(skb, &jmp_table, which);
+	return 0;
+}
+
+char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf1.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf1.c
index 0103f3dd9f02..8c91428deb90 100644
--- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf1.c
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf1.c
@@ -10,8 +10,8 @@ struct {
 } jmp_table SEC(".maps");
 
 #define TAIL_FUNC(x) 				\
-	SEC("classifier/" #x)			\
-	int bpf_func_##x(struct __sk_buff *skb)	\
+	SEC("tc")				\
+	int classifier_##x(struct __sk_buff *skb)	\
 	{					\
 		return x;			\
 	}
@@ -26,7 +26,7 @@ int subprog_tail(struct __sk_buff *skb)
 	return skb->len * 2;
 }
 
-SEC("classifier")
+SEC("tc")
 int entry(struct __sk_buff *skb)
 {
 	bpf_tail_call_static(skb, &jmp_table, 1);
@@ -35,4 +35,3 @@ int entry(struct __sk_buff *skb)
 }
 
 char __license[] SEC("license") = "GPL";
-int _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c
index 3cc4c12817b5..ce97d141daee 100644
--- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c
@@ -22,14 +22,14 @@ int subprog_tail(struct __sk_buff *skb)
 
 int count = 0;
 
-SEC("classifier/0")
-int bpf_func_0(struct __sk_buff *skb)
+SEC("tc")
+int classifier_0(struct __sk_buff *skb)
 {
 	count++;
 	return subprog_tail(skb);
 }
 
-SEC("classifier")
+SEC("tc")
 int entry(struct __sk_buff *skb)
 {
 	bpf_tail_call_static(skb, &jmp_table, 0);
@@ -38,4 +38,3 @@ int entry(struct __sk_buff *skb)
 }
 
 char __license[] SEC("license") = "GPL";
-int _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf3.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf3.c
index 0d5482bea6c9..7fab39a3bb12 100644
--- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf3.c
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf3.c
@@ -33,23 +33,23 @@ int subprog_tail(struct __sk_buff *skb)
 	return skb->len * 2;
 }
 
-SEC("classifier/0")
-int bpf_func_0(struct __sk_buff *skb)
+SEC("tc")
+int classifier_0(struct __sk_buff *skb)
 {
 	volatile char arr[128] = {};
 
 	return subprog_tail2(skb);
 }
 
-SEC("classifier/1")
-int bpf_func_1(struct __sk_buff *skb)
+SEC("tc")
+int classifier_1(struct __sk_buff *skb)
 {
 	volatile char arr[128] = {};
 
 	return skb->len * 3;
 }
 
-SEC("classifier")
+SEC("tc")
 int entry(struct __sk_buff *skb)
 {
 	volatile char arr[128] = {};
@@ -58,4 +58,3 @@ int entry(struct __sk_buff *skb)
 }
 
 char __license[] SEC("license") = "GPL";
-int _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c
index e89368a50b97..b67e8022d500 100644
--- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c
@@ -50,30 +50,29 @@ int subprog_tail(struct __sk_buff *skb)
 	return skb->len;
 }
 
-SEC("classifier/1")
-int bpf_func_1(struct __sk_buff *skb)
+SEC("tc")
+int classifier_1(struct __sk_buff *skb)
 {
 	return subprog_tail_2(skb);
 }
 
-SEC("classifier/2")
-int bpf_func_2(struct __sk_buff *skb)
+SEC("tc")
+int classifier_2(struct __sk_buff *skb)
 {
 	count++;
 	return subprog_tail_2(skb);
 }
 
-SEC("classifier/0")
-int bpf_func_0(struct __sk_buff *skb)
+SEC("tc")
+int classifier_0(struct __sk_buff *skb)
 {
 	return subprog_tail_1(skb);
 }
 
-SEC("classifier")
+SEC("tc")
 int entry(struct __sk_buff *skb)
 {
 	return subprog_tail(skb);
 }
 
 char __license[] SEC("license") = "GPL";
-int _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/tcp_rtt.c b/tools/testing/selftests/bpf/progs/tcp_rtt.c
index 0cb3204ddb18..0988d79f1587 100644
--- a/tools/testing/selftests/bpf/progs/tcp_rtt.c
+++ b/tools/testing/selftests/bpf/progs/tcp_rtt.c
@@ -3,7 +3,6 @@
 #include <bpf/bpf_helpers.h>
 
 char _license[] SEC("license") = "GPL";
-__u32 _version SEC("version") = 1;
 
 struct tcp_rtt_storage {
 	__u32 invoked;
diff --git a/tools/testing/selftests/bpf/progs/test_btf_haskv.c b/tools/testing/selftests/bpf/progs/test_btf_haskv.c
index 31538c9ed193..160ead6c67b2 100644
--- a/tools/testing/selftests/bpf/progs/test_btf_haskv.c
+++ b/tools/testing/selftests/bpf/progs/test_btf_haskv.c
@@ -4,8 +4,6 @@
 #include <bpf/bpf_helpers.h>
 #include "bpf_legacy.h"
 
-int _version SEC("version") = 1;
-
 struct ipv_counts {
 	unsigned int v4;
 	unsigned int v6;
diff --git a/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c b/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c
index c1e0c8c7c55f..c218cf8989a9 100644
--- a/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c
+++ b/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c
@@ -21,8 +21,8 @@ struct inner_map_sz2 {
 struct outer_arr {
 	__uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
 	__uint(max_entries, 3);
-	__uint(key_size, sizeof(int));
-	__uint(value_size, sizeof(int));
+	__type(key, int);
+	__type(value, int);
 	/* it's possible to use anonymous struct as inner map definition here */
 	__array(values, struct {
 		__uint(type, BPF_MAP_TYPE_ARRAY);
@@ -61,8 +61,8 @@ struct inner_map_sz4 {
 struct outer_arr_dyn {
 	__uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
 	__uint(max_entries, 3);
-	__uint(key_size, sizeof(int));
-	__uint(value_size, sizeof(int));
+	__type(key, int);
+	__type(value, int);
 	__array(values, struct {
 		__uint(type, BPF_MAP_TYPE_ARRAY);
 		__uint(map_flags, BPF_F_INNER_MAP);
@@ -81,7 +81,7 @@ struct outer_arr_dyn {
 struct outer_hash {
 	__uint(type, BPF_MAP_TYPE_HASH_OF_MAPS);
 	__uint(max_entries, 5);
-	__uint(key_size, sizeof(int));
+	__type(key, int);
 	/* Here everything works flawlessly due to reuse of struct inner_map
 	 * and compiler will complain at the attempt to use non-inner_map
 	 * references below. This is great experience.
@@ -111,8 +111,8 @@ struct sockarr_sz2 {
 struct outer_sockarr_sz1 {
 	__uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
 	__uint(max_entries, 1);
-	__uint(key_size, sizeof(int));
-	__uint(value_size, sizeof(int));
+	__type(key, int);
+	__type(value, int);
 	__array(values, struct sockarr_sz1);
 } outer_sockarr SEC(".maps") = {
 	.values = { (void *)&sockarr_sz1 },
diff --git a/tools/testing/selftests/bpf/progs/test_btf_newkv.c b/tools/testing/selftests/bpf/progs/test_btf_newkv.c
index 6c5560162746..1884a5bd10f5 100644
--- a/tools/testing/selftests/bpf/progs/test_btf_newkv.c
+++ b/tools/testing/selftests/bpf/progs/test_btf_newkv.c
@@ -4,8 +4,6 @@
 #include <bpf/bpf_helpers.h>
 #include "bpf_legacy.h"
 
-int _version SEC("version") = 1;
-
 struct ipv_counts {
 	unsigned int v4;
 	unsigned int v6;
diff --git a/tools/testing/selftests/bpf/progs/test_btf_nokv.c b/tools/testing/selftests/bpf/progs/test_btf_nokv.c
index 506da7fd2da2..15e0f9945fe4 100644
--- a/tools/testing/selftests/bpf/progs/test_btf_nokv.c
+++ b/tools/testing/selftests/bpf/progs/test_btf_nokv.c
@@ -3,8 +3,6 @@
 #include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
 
-int _version SEC("version") = 1;
-
 struct ipv_counts {
 	unsigned int v4;
 	unsigned int v6;
diff --git a/tools/testing/selftests/bpf/progs/test_btf_skc_cls_ingress.c b/tools/testing/selftests/bpf/progs/test_btf_skc_cls_ingress.c
index 9a6b85dd52d2..e2bea4da194b 100644
--- a/tools/testing/selftests/bpf/progs/test_btf_skc_cls_ingress.c
+++ b/tools/testing/selftests/bpf/progs/test_btf_skc_cls_ingress.c
@@ -145,7 +145,7 @@ release:
 	return TC_ACT_OK;
 }
 
-SEC("classifier/ingress")
+SEC("tc")
 int cls_ingress(struct __sk_buff *skb)
 {
 	struct ipv6hdr *ip6h;
diff --git a/tools/testing/selftests/bpf/progs/test_cgroup_link.c b/tools/testing/selftests/bpf/progs/test_cgroup_link.c
index 77e47b9e4446..4faba88e45a5 100644
--- a/tools/testing/selftests/bpf/progs/test_cgroup_link.c
+++ b/tools/testing/selftests/bpf/progs/test_cgroup_link.c
@@ -6,14 +6,14 @@
 int calls = 0;
 int alt_calls = 0;
 
-SEC("cgroup_skb/egress1")
+SEC("cgroup_skb/egress")
 int egress(struct __sk_buff *skb)
 {
 	__sync_fetch_and_add(&calls, 1);
 	return 1;
 }
 
-SEC("cgroup_skb/egress2")
+SEC("cgroup_skb/egress")
 int egress_alt(struct __sk_buff *skb)
 {
 	__sync_fetch_and_add(&alt_calls, 1);
diff --git a/tools/testing/selftests/bpf/progs/test_check_mtu.c b/tools/testing/selftests/bpf/progs/test_check_mtu.c
index 71184af57749..2ec1de11a3ae 100644
--- a/tools/testing/selftests/bpf/progs/test_check_mtu.c
+++ b/tools/testing/selftests/bpf/progs/test_check_mtu.c
@@ -153,7 +153,7 @@ int xdp_input_len_exceed(struct xdp_md *ctx)
 	return retval;
 }
 
-SEC("classifier")
+SEC("tc")
 int tc_use_helper(struct __sk_buff *ctx)
 {
 	int retval = BPF_OK; /* Expected retval on successful test */
@@ -172,7 +172,7 @@ out:
 	return retval;
 }
 
-SEC("classifier")
+SEC("tc")
 int tc_exceed_mtu(struct __sk_buff *ctx)
 {
 	__u32 ifindex = GLOBAL_USER_IFINDEX;
@@ -196,7 +196,7 @@ int tc_exceed_mtu(struct __sk_buff *ctx)
 	return retval;
 }
 
-SEC("classifier")
+SEC("tc")
 int tc_exceed_mtu_da(struct __sk_buff *ctx)
 {
 	/* SKB Direct-Access variant */
@@ -223,7 +223,7 @@ int tc_exceed_mtu_da(struct __sk_buff *ctx)
 	return retval;
 }
 
-SEC("classifier")
+SEC("tc")
 int tc_minus_delta(struct __sk_buff *ctx)
 {
 	int retval = BPF_OK; /* Expected retval on successful test */
@@ -245,7 +245,7 @@ int tc_minus_delta(struct __sk_buff *ctx)
 	return retval;
 }
 
-SEC("classifier")
+SEC("tc")
 int tc_input_len(struct __sk_buff *ctx)
 {
 	int retval = BPF_OK; /* Expected retval on successful test */
@@ -265,7 +265,7 @@ int tc_input_len(struct __sk_buff *ctx)
 	return retval;
 }
 
-SEC("classifier")
+SEC("tc")
 int tc_input_len_exceed(struct __sk_buff *ctx)
 {
 	int retval = BPF_DROP; /* Fail */
diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect.c b/tools/testing/selftests/bpf/progs/test_cls_redirect.c
index e2a5acc4785c..2833ad722cb7 100644
--- a/tools/testing/selftests/bpf/progs/test_cls_redirect.c
+++ b/tools/testing/selftests/bpf/progs/test_cls_redirect.c
@@ -928,7 +928,7 @@ static INLINING verdict_t process_ipv6(buf_t *pkt, metrics_t *metrics)
 	}
 }
 
-SEC("classifier/cls_redirect")
+SEC("tc")
 int cls_redirect(struct __sk_buff *skb)
 {
 	metrics_t *metrics = get_global_metrics();
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_mods.c b/tools/testing/selftests/bpf/progs/test_core_reloc_mods.c
index 8b533db4a7a5..b2ded497572a 100644
--- a/tools/testing/selftests/bpf/progs/test_core_reloc_mods.c
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_mods.c
@@ -42,7 +42,16 @@ struct core_reloc_mods {
 	core_reloc_mods_substruct_t h;
 };
 
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
 #define CORE_READ(dst, src) bpf_core_read(dst, sizeof(*(dst)), src)
+#else
+#define CORE_READ(dst, src) ({ \
+	int __sz = sizeof(*(dst)) < sizeof(*(src)) ? sizeof(*(dst)) : \
+						     sizeof(*(src)); \
+	bpf_core_read((char *)(dst) + sizeof(*(dst)) - __sz, __sz, \
+		      (const char *)(src) + sizeof(*(src)) - __sz); \
+})
+#endif
 
 SEC("raw_tracepoint/sys_enter")
 int test_core_mods(void *ctx)
diff --git a/tools/testing/selftests/bpf/progs/test_enable_stats.c b/tools/testing/selftests/bpf/progs/test_enable_stats.c
index 01a002ade529..1705097d01d7 100644
--- a/tools/testing/selftests/bpf/progs/test_enable_stats.c
+++ b/tools/testing/selftests/bpf/progs/test_enable_stats.c
@@ -13,6 +13,6 @@ __u64 count = 0;
 SEC("raw_tracepoint/sys_enter")
 int test_enable_stats(void *ctx)
 {
-	count += 1;
+	__sync_fetch_and_add(&count, 1);
 	return 0;
 }
diff --git a/tools/testing/selftests/bpf/progs/test_global_data.c b/tools/testing/selftests/bpf/progs/test_global_data.c
index 1319be1c54ba..719e314ef3e4 100644
--- a/tools/testing/selftests/bpf/progs/test_global_data.c
+++ b/tools/testing/selftests/bpf/progs/test_global_data.c
@@ -68,7 +68,7 @@ static struct foo struct3 = {
 		bpf_map_update_elem(&result_##map, &key, var, 0);	\
 	} while (0)
 
-SEC("classifier/static_data_load")
+SEC("tc")
 int load_static_data(struct __sk_buff *skb)
 {
 	static const __u64 bar = ~0;
diff --git a/tools/testing/selftests/bpf/progs/test_global_func1.c b/tools/testing/selftests/bpf/progs/test_global_func1.c
index 880260f6d536..7b42dad187b8 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func1.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func1.c
@@ -38,7 +38,7 @@ int f3(int val, struct __sk_buff *skb, int var)
 	return skb->ifindex * val * var;
 }
 
-SEC("classifier/test")
+SEC("tc")
 int test_cls(struct __sk_buff *skb)
 {
 	return f0(1, skb) + f1(skb) + f2(2, skb) + f3(3, skb, 4);
diff --git a/tools/testing/selftests/bpf/progs/test_global_func3.c b/tools/testing/selftests/bpf/progs/test_global_func3.c
index 86f0ecb304fc..01bf8275dfd6 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func3.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func3.c
@@ -54,7 +54,7 @@ int f8(struct __sk_buff *skb)
 }
 #endif
 
-SEC("classifier/test")
+SEC("tc")
 int test_cls(struct __sk_buff *skb)
 {
 #ifndef NO_FN8
diff --git a/tools/testing/selftests/bpf/progs/test_global_func5.c b/tools/testing/selftests/bpf/progs/test_global_func5.c
index 260c25b827ef..9248d03e0d06 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func5.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func5.c
@@ -24,7 +24,7 @@ int f3(int val, struct __sk_buff *skb)
 	return skb->ifindex * val;
 }
 
-SEC("classifier/test")
+SEC("tc")
 int test_cls(struct __sk_buff *skb)
 {
 	return f1(skb) + f2(2, skb) + f3(3, skb);
diff --git a/tools/testing/selftests/bpf/progs/test_global_func6.c b/tools/testing/selftests/bpf/progs/test_global_func6.c
index 69e19c64e10b..af8c78bdfb25 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func6.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func6.c
@@ -24,7 +24,7 @@ int f3(int val, struct __sk_buff *skb)
 	return skb->ifindex * val;
 }
 
-SEC("classifier/test")
+SEC("tc")
 int test_cls(struct __sk_buff *skb)
 {
 	return f1(skb) + f2(2, skb) + f3(3, skb);
diff --git a/tools/testing/selftests/bpf/progs/test_global_func7.c b/tools/testing/selftests/bpf/progs/test_global_func7.c
index 309b3f6136bd..6cb8e2f5254c 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func7.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func7.c
@@ -10,7 +10,7 @@ void foo(struct __sk_buff *skb)
 	skb->tc_index = 0;
 }
 
-SEC("classifier/test")
+SEC("tc")
 int test_cls(struct __sk_buff *skb)
 {
 	foo(skb);
diff --git a/tools/testing/selftests/bpf/progs/test_ksyms_module.c b/tools/testing/selftests/bpf/progs/test_ksyms_module.c
index d6a0b3086b90..0650d918c096 100644
--- a/tools/testing/selftests/bpf/progs/test_ksyms_module.c
+++ b/tools/testing/selftests/bpf/progs/test_ksyms_module.c
@@ -2,24 +2,48 @@
 /* Copyright (c) 2021 Facebook */
 
 #include "vmlinux.h"
-
 #include <bpf/bpf_helpers.h>
 
+#define X_0(x)
+#define X_1(x) x X_0(x)
+#define X_2(x) x X_1(x)
+#define X_3(x) x X_2(x)
+#define X_4(x) x X_3(x)
+#define X_5(x) x X_4(x)
+#define X_6(x) x X_5(x)
+#define X_7(x) x X_6(x)
+#define X_8(x) x X_7(x)
+#define X_9(x) x X_8(x)
+#define X_10(x) x X_9(x)
+#define REPEAT_256(Y) X_2(X_10(X_10(Y))) X_5(X_10(Y)) X_6(Y)
+
 extern const int bpf_testmod_ksym_percpu __ksym;
+extern void bpf_testmod_test_mod_kfunc(int i) __ksym;
+extern void bpf_testmod_invalid_mod_kfunc(void) __ksym __weak;
 
-int out_mod_ksym_global = 0;
-bool triggered = false;
+int out_bpf_testmod_ksym = 0;
+const volatile int x = 0;
 
-SEC("raw_tp/sys_enter")
-int handler(const void *ctx)
+SEC("tc")
+int load(struct __sk_buff *skb)
 {
-	int *val;
-	__u32 cpu;
-
-	val = (int *)bpf_this_cpu_ptr(&bpf_testmod_ksym_percpu);
-	out_mod_ksym_global = *val;
-	triggered = true;
+	/* This will be kept by clang, but removed by verifier. Since it is
+	 * marked as __weak, libbpf and gen_loader don't error out if BTF ID
+	 * is not found for it, instead imm and off is set to 0 for it.
+	 */
+	if (x)
+		bpf_testmod_invalid_mod_kfunc();
+	bpf_testmod_test_mod_kfunc(42);
+	out_bpf_testmod_ksym = *(int *)bpf_this_cpu_ptr(&bpf_testmod_ksym_percpu);
+	return 0;
+}
 
+SEC("tc")
+int load_256(struct __sk_buff *skb)
+{
+	/* this will fail if kfunc doesn't reuse its own btf fd index */
+	REPEAT_256(bpf_testmod_test_mod_kfunc(42););
+	bpf_testmod_test_mod_kfunc(42);
 	return 0;
 }
 
diff --git a/tools/testing/selftests/bpf/progs/test_ksyms_weak.c b/tools/testing/selftests/bpf/progs/test_ksyms_weak.c
index 5f8379aadb29..8eadbd4caf7a 100644
--- a/tools/testing/selftests/bpf/progs/test_ksyms_weak.c
+++ b/tools/testing/selftests/bpf/progs/test_ksyms_weak.c
@@ -38,7 +38,7 @@ int pass_handler(const void *ctx)
 	/* tests existing symbols. */
 	rq = (struct rq *)bpf_per_cpu_ptr(&runqueues, 0);
 	if (rq)
-		out__existing_typed = rq->cpu;
+		out__existing_typed = 0;
 	out__existing_typeless = (__u64)&bpf_prog_active;
 
 	/* tests non-existent symbols. */
diff --git a/tools/testing/selftests/bpf/progs/test_l4lb.c b/tools/testing/selftests/bpf/progs/test_l4lb.c
index 33493911d87a..04fee08863cb 100644
--- a/tools/testing/selftests/bpf/progs/test_l4lb.c
+++ b/tools/testing/selftests/bpf/progs/test_l4lb.c
@@ -21,8 +21,6 @@
 #include "test_iptunnel_common.h"
 #include <bpf/bpf_endian.h>
 
-int _version SEC("version") = 1;
-
 static inline __u32 rol32(__u32 word, unsigned int shift)
 {
 	return (word << shift) | (word >> ((-shift) & 31));
diff --git a/tools/testing/selftests/bpf/progs/test_map_in_map.c b/tools/testing/selftests/bpf/progs/test_map_in_map.c
index 1cfeb940cf9f..f416032ba858 100644
--- a/tools/testing/selftests/bpf/progs/test_map_in_map.c
+++ b/tools/testing/selftests/bpf/progs/test_map_in_map.c
@@ -9,21 +9,19 @@ struct {
 	__uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
 	__uint(max_entries, 1);
 	__uint(map_flags, 0);
-	__uint(key_size, sizeof(__u32));
-	/* must be sizeof(__u32) for map in map */
-	__uint(value_size, sizeof(__u32));
+	__type(key, __u32);
+	__type(value, __u32);
 } mim_array SEC(".maps");
 
 struct {
 	__uint(type, BPF_MAP_TYPE_HASH_OF_MAPS);
 	__uint(max_entries, 1);
 	__uint(map_flags, 0);
-	__uint(key_size, sizeof(int));
-	/* must be sizeof(__u32) for map in map */
-	__uint(value_size, sizeof(__u32));
+	__type(key, int);
+	__type(value, __u32);
 } mim_hash SEC(".maps");
 
-SEC("xdp_mimtest")
+SEC("xdp")
 int xdp_mimtest0(struct xdp_md *ctx)
 {
 	int value = 123;
@@ -49,5 +47,4 @@ int xdp_mimtest0(struct xdp_md *ctx)
 	return XDP_PASS;
 }
 
-int _version SEC("version") = 1;
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_map_in_map_invalid.c b/tools/testing/selftests/bpf/progs/test_map_in_map_invalid.c
index 703c08e06442..9c7d75cf0bd6 100644
--- a/tools/testing/selftests/bpf/progs/test_map_in_map_invalid.c
+++ b/tools/testing/selftests/bpf/progs/test_map_in_map_invalid.c
@@ -13,7 +13,7 @@ struct inner {
 struct {
 	__uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
 	__uint(max_entries, 0); /* This will make map creation to fail */
-	__uint(key_size, sizeof(__u32));
+	__type(key, __u32);
 	__array(values, struct inner);
 } mim SEC(".maps");
 
diff --git a/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c b/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c
index 6077a025092c..2c121c5d66a7 100644
--- a/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c
+++ b/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c
@@ -293,7 +293,7 @@ static int handle_passive_estab(struct bpf_sock_ops *skops)
 	return check_active_hdr_in(skops);
 }
 
-SEC("sockops/misc_estab")
+SEC("sockops")
 int misc_estab(struct bpf_sock_ops *skops)
 {
 	int true_val = 1;
diff --git a/tools/testing/selftests/bpf/progs/test_module_attach.c b/tools/testing/selftests/bpf/progs/test_module_attach.c
index bd37ceec5587..b36857093f71 100644
--- a/tools/testing/selftests/bpf/progs/test_module_attach.c
+++ b/tools/testing/selftests/bpf/progs/test_module_attach.c
@@ -27,6 +27,20 @@ int BPF_PROG(handle_raw_tp_bare,
 	return 0;
 }
 
+int raw_tp_writable_bare_in_val = 0;
+int raw_tp_writable_bare_early_ret = 0;
+int raw_tp_writable_bare_out_val = 0;
+
+SEC("raw_tp.w/bpf_testmod_test_writable_bare")
+int BPF_PROG(handle_raw_tp_writable_bare,
+	     struct bpf_testmod_test_writable_ctx *writable)
+{
+	raw_tp_writable_bare_in_val = writable->val;
+	writable->early_ret = raw_tp_writable_bare_early_ret;
+	writable->val = raw_tp_writable_bare_out_val;
+	return 0;
+}
+
 __u32 tp_btf_read_sz = 0;
 
 SEC("tp_btf/bpf_testmod_test_read")
diff --git a/tools/testing/selftests/bpf/progs/test_pe_preserve_elems.c b/tools/testing/selftests/bpf/progs/test_pe_preserve_elems.c
index fb22de7c365d..1249a945699f 100644
--- a/tools/testing/selftests/bpf/progs/test_pe_preserve_elems.c
+++ b/tools/testing/selftests/bpf/progs/test_pe_preserve_elems.c
@@ -7,15 +7,15 @@
 struct {
 	__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
 	__uint(max_entries, 1);
-	__uint(key_size, sizeof(int));
-	__uint(value_size, sizeof(int));
+	__type(key, int);
+	__type(value, int);
 } array_1 SEC(".maps");
 
 struct {
 	__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
 	__uint(max_entries, 1);
-	__uint(key_size, sizeof(int));
-	__uint(value_size, sizeof(int));
+	__type(key, int);
+	__type(value, int);
 	__uint(map_flags, BPF_F_PRESERVE_ELEMS);
 } array_2 SEC(".maps");
 
diff --git a/tools/testing/selftests/bpf/progs/test_perf_buffer.c b/tools/testing/selftests/bpf/progs/test_perf_buffer.c
index 8207a2dc2f9d..17d5b67744d5 100644
--- a/tools/testing/selftests/bpf/progs/test_perf_buffer.c
+++ b/tools/testing/selftests/bpf/progs/test_perf_buffer.c
@@ -7,19 +7,35 @@
 #include <bpf/bpf_tracing.h>
 
 struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__type(key, int);
+	__type(value, int);
+	__uint(max_entries, 1);
+} my_pid_map SEC(".maps");
+
+struct {
 	__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
-	__uint(key_size, sizeof(int));
-	__uint(value_size, sizeof(int));
+	__type(key, int);
+	__type(value, int);
 } perf_buf_map SEC(".maps");
 
 SEC("tp/raw_syscalls/sys_enter")
 int handle_sys_enter(void *ctx)
 {
+	int zero = 0, *my_pid, cur_pid;
 	int cpu = bpf_get_smp_processor_id();
 
+	my_pid = bpf_map_lookup_elem(&my_pid_map, &zero);
+	if (!my_pid)
+		return 1;
+
+	cur_pid = bpf_get_current_pid_tgid() >> 32;
+	if (cur_pid != *my_pid)
+		return 1;
+
 	bpf_perf_event_output(ctx, &perf_buf_map, BPF_F_CURRENT_CPU,
 			      &cpu, sizeof(cpu));
-	return 0;
+	return 1;
 }
 
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_pinning.c b/tools/testing/selftests/bpf/progs/test_pinning.c
index 4ef2630292b2..0facea6cbbae 100644
--- a/tools/testing/selftests/bpf/progs/test_pinning.c
+++ b/tools/testing/selftests/bpf/progs/test_pinning.c
@@ -3,8 +3,6 @@
 #include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
 
-int _version SEC("version") = 1;
-
 struct {
 	__uint(type, BPF_MAP_TYPE_ARRAY);
 	__uint(max_entries, 1);
diff --git a/tools/testing/selftests/bpf/progs/test_pinning_invalid.c b/tools/testing/selftests/bpf/progs/test_pinning_invalid.c
index 5412e0c732c7..2a56db1094b8 100644
--- a/tools/testing/selftests/bpf/progs/test_pinning_invalid.c
+++ b/tools/testing/selftests/bpf/progs/test_pinning_invalid.c
@@ -3,8 +3,6 @@
 #include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
 
-int _version SEC("version") = 1;
-
 struct {
 	__uint(type, BPF_MAP_TYPE_ARRAY);
 	__uint(max_entries, 1);
diff --git a/tools/testing/selftests/bpf/progs/test_pkt_access.c b/tools/testing/selftests/bpf/progs/test_pkt_access.c
index 852051064507..0558544e1ff0 100644
--- a/tools/testing/selftests/bpf/progs/test_pkt_access.c
+++ b/tools/testing/selftests/bpf/progs/test_pkt_access.c
@@ -15,7 +15,6 @@
 #include <bpf/bpf_endian.h>
 
 #define barrier() __asm__ __volatile__("": : :"memory")
-int _version SEC("version") = 1;
 
 /* llvm will optimize both subprograms into exactly the same BPF assembly
  *
@@ -97,7 +96,7 @@ int test_pkt_write_access_subprog(struct __sk_buff *skb, __u32 off)
 	return 0;
 }
 
-SEC("classifier/test_pkt_access")
+SEC("tc")
 int test_pkt_access(struct __sk_buff *skb)
 {
 	void *data_end = (void *)(long)skb->data_end;
diff --git a/tools/testing/selftests/bpf/progs/test_pkt_md_access.c b/tools/testing/selftests/bpf/progs/test_pkt_md_access.c
index 610c74ea9f64..d1839366f3e1 100644
--- a/tools/testing/selftests/bpf/progs/test_pkt_md_access.c
+++ b/tools/testing/selftests/bpf/progs/test_pkt_md_access.c
@@ -7,8 +7,6 @@
 #include <linux/pkt_cls.h>
 #include <bpf/bpf_helpers.h>
 
-int _version SEC("version") = 1;
-
 #if  __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
 #define TEST_FIELD(TYPE, FIELD, MASK)					\
 	{								\
@@ -27,7 +25,7 @@ int _version SEC("version") = 1;
 	}
 #endif
 
-SEC("classifier/test_pkt_md_access")
+SEC("tc")
 int test_pkt_md_access(struct __sk_buff *skb)
 {
 	TEST_FIELD(__u8,  len, 0xFF);
diff --git a/tools/testing/selftests/bpf/progs/test_probe_user.c b/tools/testing/selftests/bpf/progs/test_probe_user.c
index 89b3532ccc75..8812a90da4eb 100644
--- a/tools/testing/selftests/bpf/progs/test_probe_user.c
+++ b/tools/testing/selftests/bpf/progs/test_probe_user.c
@@ -8,13 +8,37 @@
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
 
+#if defined(__TARGET_ARCH_x86)
+#define SYSCALL_WRAPPER 1
+#define SYS_PREFIX "__x64_"
+#elif defined(__TARGET_ARCH_s390)
+#define SYSCALL_WRAPPER 1
+#define SYS_PREFIX "__s390x_"
+#elif defined(__TARGET_ARCH_arm64)
+#define SYSCALL_WRAPPER 1
+#define SYS_PREFIX "__arm64_"
+#else
+#define SYSCALL_WRAPPER 0
+#define SYS_PREFIX ""
+#endif
+
 static struct sockaddr_in old;
 
-SEC("kprobe/__sys_connect")
+SEC("kprobe/" SYS_PREFIX "sys_connect")
 int BPF_KPROBE(handle_sys_connect)
 {
-	void *ptr = (void *)PT_REGS_PARM2(ctx);
+#if SYSCALL_WRAPPER == 1
+	struct pt_regs *real_regs;
+#endif
 	struct sockaddr_in new;
+	void *ptr;
+
+#if SYSCALL_WRAPPER == 0
+	ptr = (void *)PT_REGS_PARM2(ctx);
+#else
+	real_regs = (struct pt_regs *)PT_REGS_PARM1(ctx);
+	bpf_probe_read_kernel(&ptr, sizeof(ptr), &PT_REGS_PARM2(real_regs));
+#endif
 
 	bpf_probe_read_user(&old, sizeof(old), ptr);
 	__builtin_memset(&new, 0xab, sizeof(new));
diff --git a/tools/testing/selftests/bpf/progs/test_queue_stack_map.h b/tools/testing/selftests/bpf/progs/test_queue_stack_map.h
index 4dd9806ad73b..0fcd3ff0e38a 100644
--- a/tools/testing/selftests/bpf/progs/test_queue_stack_map.h
+++ b/tools/testing/selftests/bpf/progs/test_queue_stack_map.h
@@ -8,8 +8,6 @@
 #include <linux/pkt_cls.h>
 #include <bpf/bpf_helpers.h>
 
-int _version SEC("version") = 1;
-
 struct {
 	__uint(type, MAP_TYPE);
 	__uint(max_entries, 32);
diff --git a/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c b/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c
index 26e77dcc7e91..7d56ed47cd4d 100644
--- a/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c
@@ -15,8 +15,6 @@
 #include <bpf/bpf_helpers.h>
 #include "test_select_reuseport_common.h"
 
-int _version SEC("version") = 1;
-
 #ifndef offsetof
 #define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
 #endif
@@ -24,8 +22,8 @@ int _version SEC("version") = 1;
 struct {
 	__uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
 	__uint(max_entries, 1);
-	__uint(key_size, sizeof(__u32));
-	__uint(value_size, sizeof(__u32));
+	__type(key, __u32);
+	__type(value, __u32);
 } outer_map SEC(".maps");
 
 struct {
diff --git a/tools/testing/selftests/bpf/progs/test_sk_assign.c b/tools/testing/selftests/bpf/progs/test_sk_assign.c
index 1ecd987005d2..02f79356d5eb 100644
--- a/tools/testing/selftests/bpf/progs/test_sk_assign.c
+++ b/tools/testing/selftests/bpf/progs/test_sk_assign.c
@@ -36,7 +36,6 @@ struct {
 	.pinning = PIN_GLOBAL_NS,
 };
 
-int _version SEC("version") = 1;
 char _license[] SEC("license") = "GPL";
 
 /* Fill 'tuple' with L3 info, and attempt to find L4. On fail, return NULL. */
@@ -159,7 +158,7 @@ assign:
 	return ret;
 }
 
-SEC("classifier/sk_assign_test")
+SEC("tc")
 int bpf_sk_assign_test(struct __sk_buff *skb)
 {
 	struct bpf_sock_tuple *tuple, ln = {0};
diff --git a/tools/testing/selftests/bpf/progs/test_sk_lookup.c b/tools/testing/selftests/bpf/progs/test_sk_lookup.c
index ac6f7f205e25..19d2465d9442 100644
--- a/tools/testing/selftests/bpf/progs/test_sk_lookup.c
+++ b/tools/testing/selftests/bpf/progs/test_sk_lookup.c
@@ -72,32 +72,32 @@ static const __u16 DST_PORT = 7007; /* Host byte order */
 static const __u32 DST_IP4 = IP4(127, 0, 0, 1);
 static const __u32 DST_IP6[] = IP6(0xfd000000, 0x0, 0x0, 0x00000001);
 
-SEC("sk_lookup/lookup_pass")
+SEC("sk_lookup")
 int lookup_pass(struct bpf_sk_lookup *ctx)
 {
 	return SK_PASS;
 }
 
-SEC("sk_lookup/lookup_drop")
+SEC("sk_lookup")
 int lookup_drop(struct bpf_sk_lookup *ctx)
 {
 	return SK_DROP;
 }
 
-SEC("sk_reuseport/reuse_pass")
+SEC("sk_reuseport")
 int reuseport_pass(struct sk_reuseport_md *ctx)
 {
 	return SK_PASS;
 }
 
-SEC("sk_reuseport/reuse_drop")
+SEC("sk_reuseport")
 int reuseport_drop(struct sk_reuseport_md *ctx)
 {
 	return SK_DROP;
 }
 
 /* Redirect packets destined for port DST_PORT to socket at redir_map[0]. */
-SEC("sk_lookup/redir_port")
+SEC("sk_lookup")
 int redir_port(struct bpf_sk_lookup *ctx)
 {
 	struct bpf_sock *sk;
@@ -116,7 +116,7 @@ int redir_port(struct bpf_sk_lookup *ctx)
 }
 
 /* Redirect packets destined for DST_IP4 address to socket at redir_map[0]. */
-SEC("sk_lookup/redir_ip4")
+SEC("sk_lookup")
 int redir_ip4(struct bpf_sk_lookup *ctx)
 {
 	struct bpf_sock *sk;
@@ -139,7 +139,7 @@ int redir_ip4(struct bpf_sk_lookup *ctx)
 }
 
 /* Redirect packets destined for DST_IP6 address to socket at redir_map[0]. */
-SEC("sk_lookup/redir_ip6")
+SEC("sk_lookup")
 int redir_ip6(struct bpf_sk_lookup *ctx)
 {
 	struct bpf_sock *sk;
@@ -164,7 +164,7 @@ int redir_ip6(struct bpf_sk_lookup *ctx)
 	return err ? SK_DROP : SK_PASS;
 }
 
-SEC("sk_lookup/select_sock_a")
+SEC("sk_lookup")
 int select_sock_a(struct bpf_sk_lookup *ctx)
 {
 	struct bpf_sock *sk;
@@ -179,7 +179,7 @@ int select_sock_a(struct bpf_sk_lookup *ctx)
 	return err ? SK_DROP : SK_PASS;
 }
 
-SEC("sk_lookup/select_sock_a_no_reuseport")
+SEC("sk_lookup")
 int select_sock_a_no_reuseport(struct bpf_sk_lookup *ctx)
 {
 	struct bpf_sock *sk;
@@ -194,7 +194,7 @@ int select_sock_a_no_reuseport(struct bpf_sk_lookup *ctx)
 	return err ? SK_DROP : SK_PASS;
 }
 
-SEC("sk_reuseport/select_sock_b")
+SEC("sk_reuseport")
 int select_sock_b(struct sk_reuseport_md *ctx)
 {
 	__u32 key = KEY_SERVER_B;
@@ -205,7 +205,7 @@ int select_sock_b(struct sk_reuseport_md *ctx)
 }
 
 /* Check that bpf_sk_assign() returns -EEXIST if socket already selected. */
-SEC("sk_lookup/sk_assign_eexist")
+SEC("sk_lookup")
 int sk_assign_eexist(struct bpf_sk_lookup *ctx)
 {
 	struct bpf_sock *sk;
@@ -238,7 +238,7 @@ out:
 }
 
 /* Check that bpf_sk_assign(BPF_SK_LOOKUP_F_REPLACE) can override selection. */
-SEC("sk_lookup/sk_assign_replace_flag")
+SEC("sk_lookup")
 int sk_assign_replace_flag(struct bpf_sk_lookup *ctx)
 {
 	struct bpf_sock *sk;
@@ -270,7 +270,7 @@ out:
 }
 
 /* Check that bpf_sk_assign(sk=NULL) is accepted. */
-SEC("sk_lookup/sk_assign_null")
+SEC("sk_lookup")
 int sk_assign_null(struct bpf_sk_lookup *ctx)
 {
 	struct bpf_sock *sk = NULL;
@@ -313,7 +313,7 @@ out:
 }
 
 /* Check that selected sk is accessible through context. */
-SEC("sk_lookup/access_ctx_sk")
+SEC("sk_lookup")
 int access_ctx_sk(struct bpf_sk_lookup *ctx)
 {
 	struct bpf_sock *sk1 = NULL, *sk2 = NULL;
@@ -379,7 +379,7 @@ out:
  * are not covered because they give bogus results, that is the
  * verifier ignores the offset.
  */
-SEC("sk_lookup/ctx_narrow_access")
+SEC("sk_lookup")
 int ctx_narrow_access(struct bpf_sk_lookup *ctx)
 {
 	struct bpf_sock *sk;
@@ -553,7 +553,7 @@ int ctx_narrow_access(struct bpf_sk_lookup *ctx)
 }
 
 /* Check that sk_assign rejects SERVER_A socket with -ESOCKNOSUPPORT */
-SEC("sk_lookup/sk_assign_esocknosupport")
+SEC("sk_lookup")
 int sk_assign_esocknosupport(struct bpf_sk_lookup *ctx)
 {
 	struct bpf_sock *sk;
@@ -578,28 +578,28 @@ out:
 	return ret;
 }
 
-SEC("sk_lookup/multi_prog_pass1")
+SEC("sk_lookup")
 int multi_prog_pass1(struct bpf_sk_lookup *ctx)
 {
 	bpf_map_update_elem(&run_map, &KEY_PROG1, &PROG_DONE, BPF_ANY);
 	return SK_PASS;
 }
 
-SEC("sk_lookup/multi_prog_pass2")
+SEC("sk_lookup")
 int multi_prog_pass2(struct bpf_sk_lookup *ctx)
 {
 	bpf_map_update_elem(&run_map, &KEY_PROG2, &PROG_DONE, BPF_ANY);
 	return SK_PASS;
 }
 
-SEC("sk_lookup/multi_prog_drop1")
+SEC("sk_lookup")
 int multi_prog_drop1(struct bpf_sk_lookup *ctx)
 {
 	bpf_map_update_elem(&run_map, &KEY_PROG1, &PROG_DONE, BPF_ANY);
 	return SK_DROP;
 }
 
-SEC("sk_lookup/multi_prog_drop2")
+SEC("sk_lookup")
 int multi_prog_drop2(struct bpf_sk_lookup *ctx)
 {
 	bpf_map_update_elem(&run_map, &KEY_PROG2, &PROG_DONE, BPF_ANY);
@@ -623,7 +623,7 @@ static __always_inline int select_server_a(struct bpf_sk_lookup *ctx)
 	return SK_PASS;
 }
 
-SEC("sk_lookup/multi_prog_redir1")
+SEC("sk_lookup")
 int multi_prog_redir1(struct bpf_sk_lookup *ctx)
 {
 	int ret;
@@ -633,7 +633,7 @@ int multi_prog_redir1(struct bpf_sk_lookup *ctx)
 	return SK_PASS;
 }
 
-SEC("sk_lookup/multi_prog_redir2")
+SEC("sk_lookup")
 int multi_prog_redir2(struct bpf_sk_lookup *ctx)
 {
 	int ret;
@@ -644,4 +644,3 @@ int multi_prog_redir2(struct bpf_sk_lookup *ctx)
 }
 
 char _license[] SEC("license") = "Dual BSD/GPL";
-__u32 _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c b/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c
index 8249075f088f..40f161480a2f 100644
--- a/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c
@@ -15,7 +15,6 @@
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_endian.h>
 
-int _version SEC("version") = 1;
 char _license[] SEC("license") = "GPL";
 
 /* Fill 'tuple' with L3 info, and attempt to find L4. On fail, return NULL. */
@@ -53,8 +52,8 @@ static struct bpf_sock_tuple *get_tuple(void *data, __u64 nh_off,
 	return result;
 }
 
-SEC("classifier/sk_lookup_success")
-int bpf_sk_lookup_test0(struct __sk_buff *skb)
+SEC("tc")
+int sk_lookup_success(struct __sk_buff *skb)
 {
 	void *data_end = (void *)(long)skb->data_end;
 	void *data = (void *)(long)skb->data;
@@ -79,8 +78,8 @@ int bpf_sk_lookup_test0(struct __sk_buff *skb)
 	return sk ? TC_ACT_OK : TC_ACT_UNSPEC;
 }
 
-SEC("classifier/sk_lookup_success_simple")
-int bpf_sk_lookup_test1(struct __sk_buff *skb)
+SEC("tc")
+int sk_lookup_success_simple(struct __sk_buff *skb)
 {
 	struct bpf_sock_tuple tuple = {};
 	struct bpf_sock *sk;
@@ -91,8 +90,8 @@ int bpf_sk_lookup_test1(struct __sk_buff *skb)
 	return 0;
 }
 
-SEC("classifier/err_use_after_free")
-int bpf_sk_lookup_uaf(struct __sk_buff *skb)
+SEC("tc")
+int err_use_after_free(struct __sk_buff *skb)
 {
 	struct bpf_sock_tuple tuple = {};
 	struct bpf_sock *sk;
@@ -106,8 +105,8 @@ int bpf_sk_lookup_uaf(struct __sk_buff *skb)
 	return family;
 }
 
-SEC("classifier/err_modify_sk_pointer")
-int bpf_sk_lookup_modptr(struct __sk_buff *skb)
+SEC("tc")
+int err_modify_sk_pointer(struct __sk_buff *skb)
 {
 	struct bpf_sock_tuple tuple = {};
 	struct bpf_sock *sk;
@@ -121,8 +120,8 @@ int bpf_sk_lookup_modptr(struct __sk_buff *skb)
 	return 0;
 }
 
-SEC("classifier/err_modify_sk_or_null_pointer")
-int bpf_sk_lookup_modptr_or_null(struct __sk_buff *skb)
+SEC("tc")
+int err_modify_sk_or_null_pointer(struct __sk_buff *skb)
 {
 	struct bpf_sock_tuple tuple = {};
 	struct bpf_sock *sk;
@@ -135,8 +134,8 @@ int bpf_sk_lookup_modptr_or_null(struct __sk_buff *skb)
 	return 0;
 }
 
-SEC("classifier/err_no_release")
-int bpf_sk_lookup_test2(struct __sk_buff *skb)
+SEC("tc")
+int err_no_release(struct __sk_buff *skb)
 {
 	struct bpf_sock_tuple tuple = {};
 
@@ -144,8 +143,8 @@ int bpf_sk_lookup_test2(struct __sk_buff *skb)
 	return 0;
 }
 
-SEC("classifier/err_release_twice")
-int bpf_sk_lookup_test3(struct __sk_buff *skb)
+SEC("tc")
+int err_release_twice(struct __sk_buff *skb)
 {
 	struct bpf_sock_tuple tuple = {};
 	struct bpf_sock *sk;
@@ -156,8 +155,8 @@ int bpf_sk_lookup_test3(struct __sk_buff *skb)
 	return 0;
 }
 
-SEC("classifier/err_release_unchecked")
-int bpf_sk_lookup_test4(struct __sk_buff *skb)
+SEC("tc")
+int err_release_unchecked(struct __sk_buff *skb)
 {
 	struct bpf_sock_tuple tuple = {};
 	struct bpf_sock *sk;
@@ -173,8 +172,8 @@ void lookup_no_release(struct __sk_buff *skb)
 	bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0);
 }
 
-SEC("classifier/err_no_release_subcall")
-int bpf_sk_lookup_test5(struct __sk_buff *skb)
+SEC("tc")
+int err_no_release_subcall(struct __sk_buff *skb)
 {
 	lookup_no_release(skb);
 	return 0;
diff --git a/tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c b/tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c
index 552f2090665c..c304cd5b8cad 100644
--- a/tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c
@@ -42,6 +42,4 @@ int log_cgroup_id(struct __sk_buff *skb)
 	return TC_ACT_OK;
 }
 
-int _version SEC("version") = 1;
-
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_skb_ctx.c b/tools/testing/selftests/bpf/progs/test_skb_ctx.c
index b02ea589ce7e..1d61b36e6067 100644
--- a/tools/testing/selftests/bpf/progs/test_skb_ctx.c
+++ b/tools/testing/selftests/bpf/progs/test_skb_ctx.c
@@ -3,7 +3,6 @@
 #include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
 
-int _version SEC("version") = 1;
 char _license[] SEC("license") = "GPL";
 
 SEC("skb_ctx")
@@ -25,6 +24,12 @@ int process(struct __sk_buff *skb)
 		return 1;
 	if (skb->gso_size != 10)
 		return 1;
+	if (skb->ingress_ifindex != 11)
+		return 1;
+	if (skb->ifindex != 1)
+		return 1;
+	if (skb->hwtstamp != 11)
+		return 1;
 
 	return 0;
 }
diff --git a/tools/testing/selftests/bpf/progs/test_skb_helpers.c b/tools/testing/selftests/bpf/progs/test_skb_helpers.c
index bb3fbf1a29e3..507215791c5b 100644
--- a/tools/testing/selftests/bpf/progs/test_skb_helpers.c
+++ b/tools/testing/selftests/bpf/progs/test_skb_helpers.c
@@ -14,7 +14,7 @@ struct {
 
 char _license[] SEC("license") = "GPL";
 
-SEC("classifier/test_skb_helpers")
+SEC("tc")
 int test_skb_helpers(struct __sk_buff *skb)
 {
 	struct task_struct *task;
diff --git a/tools/testing/selftests/bpf/progs/test_skc_to_unix_sock.c b/tools/testing/selftests/bpf/progs/test_skc_to_unix_sock.c
new file mode 100644
index 000000000000..a408ec95cba4
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_skc_to_unix_sock.c
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2021 Hengqi Chen */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_tracing_net.h"
+
+const volatile pid_t my_pid = 0;
+char path[256] = {};
+
+SEC("fentry/unix_listen")
+int BPF_PROG(unix_listen, struct socket *sock, int backlog)
+{
+	pid_t pid = bpf_get_current_pid_tgid() >> 32;
+	struct unix_sock *unix_sk;
+	int i, len;
+
+	if (pid != my_pid)
+		return 0;
+
+	unix_sk = (struct unix_sock *)bpf_skc_to_unix_sock(sock->sk);
+	if (!unix_sk)
+		return 0;
+
+	if (!UNIX_ABSTRACT(unix_sk))
+		return 0;
+
+	len = unix_sk->addr->len - sizeof(short);
+	path[0] = '@';
+	for (i = 1; i < len; i++) {
+		if (i >= sizeof(struct sockaddr_un))
+			break;
+
+		path[i] = unix_sk->addr->name->sun_path[i];
+	}
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_skeleton.c b/tools/testing/selftests/bpf/progs/test_skeleton.c
index 441fa1c552c8..1b1187d2967b 100644
--- a/tools/testing/selftests/bpf/progs/test_skeleton.c
+++ b/tools/testing/selftests/bpf/progs/test_skeleton.c
@@ -5,6 +5,8 @@
 #include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
 
+#define __read_mostly SEC(".data.read_mostly")
+
 struct s {
 	int a;
 	long long b;
@@ -40,9 +42,20 @@ int kern_ver = 0;
 
 struct s out5 = {};
 
+
+const volatile int in_dynarr_sz SEC(".rodata.dyn");
+const volatile int in_dynarr[4] SEC(".rodata.dyn") = { -1, -2, -3, -4 };
+
+int out_dynarr[4] SEC(".data.dyn") = { 1, 2, 3, 4 };
+
+int read_mostly_var __read_mostly;
+int out_mostly_var;
+
 SEC("raw_tp/sys_enter")
 int handler(const void *ctx)
 {
+	int i;
+
 	out1 = in1;
 	out2 = in2;
 	out3 = in3;
@@ -53,6 +66,11 @@ int handler(const void *ctx)
 	bpf_syscall = CONFIG_BPF_SYSCALL;
 	kern_ver = LINUX_KERNEL_VERSION;
 
+	for (i = 0; i < in_dynarr_sz; i++)
+		out_dynarr[i] = in_dynarr[i];
+
+	out_mostly_var = read_mostly_var;
+
 	return 0;
 }
 
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_kern.h b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h
index 1858435de7aa..2966564b8497 100644
--- a/tools/testing/selftests/bpf/progs/test_sockmap_kern.h
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h
@@ -361,5 +361,4 @@ int bpf_prog10(struct sk_msg_md *msg)
 	return SK_DROP;
 }
 
-int _version SEC("version") = 1;
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_listen.c b/tools/testing/selftests/bpf/progs/test_sockmap_listen.c
index a1cc58b10c7c..325c9f193432 100644
--- a/tools/testing/selftests/bpf/progs/test_sockmap_listen.c
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_listen.c
@@ -56,7 +56,7 @@ int prog_stream_verdict(struct __sk_buff *skb)
 	return verdict;
 }
 
-SEC("sk_skb/skb_verdict")
+SEC("sk_skb")
 int prog_skb_verdict(struct __sk_buff *skb)
 {
 	unsigned int *count;
@@ -116,5 +116,4 @@ int prog_reuseport(struct sk_reuseport_md *reuse)
 	return verdict;
 }
 
-int _version SEC("version") = 1;
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_skb_verdict_attach.c b/tools/testing/selftests/bpf/progs/test_sockmap_skb_verdict_attach.c
index 2d31f66e4f23..3c69aa971738 100644
--- a/tools/testing/selftests/bpf/progs/test_sockmap_skb_verdict_attach.c
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_skb_verdict_attach.c
@@ -9,7 +9,7 @@ struct {
 	__type(value, __u64);
 } sock_map SEC(".maps");
 
-SEC("sk_skb/skb_verdict")
+SEC("sk_skb")
 int prog_skb_verdict(struct __sk_buff *skb)
 {
 	return SK_DROP;
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_update.c b/tools/testing/selftests/bpf/progs/test_sockmap_update.c
index 9d0c9f28cab2..6d64ea536e3d 100644
--- a/tools/testing/selftests/bpf/progs/test_sockmap_update.c
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_update.c
@@ -24,7 +24,7 @@ struct {
 	__type(value, __u64);
 } dst_sock_hash SEC(".maps");
 
-SEC("classifier/copy_sock_map")
+SEC("tc")
 int copy_sock_map(void *ctx)
 {
 	struct bpf_sock *sk;
diff --git a/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c b/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c
index 0cf0134631b4..36a707e7c7a7 100644
--- a/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c
+++ b/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c
@@ -28,8 +28,8 @@ struct {
 	__uint(type, BPF_MAP_TYPE_STACK_TRACE);
 	__uint(max_entries, 128);
 	__uint(map_flags, BPF_F_STACK_BUILD_ID);
-	__uint(key_size, sizeof(__u32));
-	__uint(value_size, sizeof(stack_trace_t));
+	__type(key, __u32);
+	__type(value, stack_trace_t);
 } stackmap SEC(".maps");
 
 struct {
@@ -73,4 +73,3 @@ int oncpu(struct random_urandom_args *args)
 }
 
 char _license[] SEC("license") = "GPL";
-__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */
diff --git a/tools/testing/selftests/bpf/progs/test_stacktrace_map.c b/tools/testing/selftests/bpf/progs/test_stacktrace_map.c
index 00ed48672620..a8233e7f173b 100644
--- a/tools/testing/selftests/bpf/progs/test_stacktrace_map.c
+++ b/tools/testing/selftests/bpf/progs/test_stacktrace_map.c
@@ -27,8 +27,8 @@ typedef __u64 stack_trace_t[PERF_MAX_STACK_DEPTH];
 struct {
 	__uint(type, BPF_MAP_TYPE_STACK_TRACE);
 	__uint(max_entries, 16384);
-	__uint(key_size, sizeof(__u32));
-	__uint(value_size, sizeof(stack_trace_t));
+	__type(key, __u32);
+	__type(value, stack_trace_t);
 } stackmap SEC(".maps");
 
 struct {
diff --git a/tools/testing/selftests/bpf/progs/test_tc_bpf.c b/tools/testing/selftests/bpf/progs/test_tc_bpf.c
index 18a3a7ed924a..d28ca8d1f3d0 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_bpf.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_bpf.c
@@ -5,7 +5,7 @@
 
 /* Dummy prog to test TC-BPF API */
 
-SEC("classifier")
+SEC("tc")
 int cls(struct __sk_buff *skb)
 {
 	return 0;
diff --git a/tools/testing/selftests/bpf/progs/test_tc_neigh.c b/tools/testing/selftests/bpf/progs/test_tc_neigh.c
index 0c93d326a663..3e32ea375ab4 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_neigh.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_neigh.c
@@ -70,7 +70,7 @@ static __always_inline bool is_remote_ep_v6(struct __sk_buff *skb,
 	return v6_equal(ip6h->daddr, addr);
 }
 
-SEC("classifier/chk_egress")
+SEC("tc")
 int tc_chk(struct __sk_buff *skb)
 {
 	void *data_end = ctx_ptr(skb->data_end);
@@ -83,7 +83,7 @@ int tc_chk(struct __sk_buff *skb)
 	return !raw[0] && !raw[1] && !raw[2] ? TC_ACT_SHOT : TC_ACT_OK;
 }
 
-SEC("classifier/dst_ingress")
+SEC("tc")
 int tc_dst(struct __sk_buff *skb)
 {
 	__u8 zero[ETH_ALEN * 2];
@@ -108,7 +108,7 @@ int tc_dst(struct __sk_buff *skb)
 	return bpf_redirect_neigh(IFINDEX_SRC, NULL, 0, 0);
 }
 
-SEC("classifier/src_ingress")
+SEC("tc")
 int tc_src(struct __sk_buff *skb)
 {
 	__u8 zero[ETH_ALEN * 2];
diff --git a/tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c b/tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c
index f7ab69cf018e..ec4cce19362d 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c
@@ -75,7 +75,7 @@ static __always_inline int fill_fib_params_v6(struct __sk_buff *skb,
 	return 0;
 }
 
-SEC("classifier/chk_egress")
+SEC("tc")
 int tc_chk(struct __sk_buff *skb)
 {
 	void *data_end = ctx_ptr(skb->data_end);
@@ -143,13 +143,13 @@ static __always_inline int tc_redir(struct __sk_buff *skb)
 /* these are identical, but keep them separate for compatibility with the
  * section names expected by test_tc_redirect.sh
  */
-SEC("classifier/dst_ingress")
+SEC("tc")
 int tc_dst(struct __sk_buff *skb)
 {
 	return tc_redir(skb);
 }
 
-SEC("classifier/src_ingress")
+SEC("tc")
 int tc_src(struct __sk_buff *skb)
 {
 	return tc_redir(skb);
diff --git a/tools/testing/selftests/bpf/progs/test_tc_peer.c b/tools/testing/selftests/bpf/progs/test_tc_peer.c
index fe818cd5f010..365eacb5dc34 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_peer.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_peer.c
@@ -16,31 +16,31 @@ volatile const __u32 IFINDEX_DST;
 static const __u8 src_mac[] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55};
 static const __u8 dst_mac[] = {0x00, 0x22, 0x33, 0x44, 0x55, 0x66};
 
-SEC("classifier/chk_egress")
+SEC("tc")
 int tc_chk(struct __sk_buff *skb)
 {
 	return TC_ACT_SHOT;
 }
 
-SEC("classifier/dst_ingress")
+SEC("tc")
 int tc_dst(struct __sk_buff *skb)
 {
 	return bpf_redirect_peer(IFINDEX_SRC, 0);
 }
 
-SEC("classifier/src_ingress")
+SEC("tc")
 int tc_src(struct __sk_buff *skb)
 {
 	return bpf_redirect_peer(IFINDEX_DST, 0);
 }
 
-SEC("classifier/dst_ingress_l3")
+SEC("tc")
 int tc_dst_l3(struct __sk_buff *skb)
 {
 	return bpf_redirect(IFINDEX_SRC, 0);
 }
 
-SEC("classifier/src_ingress_l3")
+SEC("tc")
 int tc_src_l3(struct __sk_buff *skb)
 {
 	__u16 proto = skb->protocol;
diff --git a/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c b/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c
index 47cbe2eeae43..cd747cd93dbe 100644
--- a/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c
@@ -148,7 +148,7 @@ release:
 	bpf_sk_release(sk);
 }
 
-SEC("clsact/check_syncookie")
+SEC("tc")
 int check_syncookie_clsact(struct __sk_buff *skb)
 {
 	check_syncookie(skb, (void *)(long)skb->data,
@@ -156,7 +156,7 @@ int check_syncookie_clsact(struct __sk_buff *skb)
 	return TC_ACT_OK;
 }
 
-SEC("xdp/check_syncookie")
+SEC("xdp")
 int check_syncookie_xdp(struct xdp_md *ctx)
 {
 	check_syncookie(ctx, (void *)(long)ctx->data,
diff --git a/tools/testing/selftests/bpf/progs/test_tcp_estats.c b/tools/testing/selftests/bpf/progs/test_tcp_estats.c
index adc83a54c352..2c5c602c6011 100644
--- a/tools/testing/selftests/bpf/progs/test_tcp_estats.c
+++ b/tools/testing/selftests/bpf/progs/test_tcp_estats.c
@@ -255,4 +255,3 @@ int _dummy_tracepoint(struct dummy_tracepoint_args *arg)
 }
 
 char _license[] SEC("license") = "GPL";
-__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */
diff --git a/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c b/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c
index 678bd0fad29e..5f4e87ee949a 100644
--- a/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c
+++ b/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c
@@ -594,7 +594,7 @@ static int handle_parse_hdr(struct bpf_sock_ops *skops)
 	return CG_OK;
 }
 
-SEC("sockops/estab")
+SEC("sockops")
 int estab(struct bpf_sock_ops *skops)
 {
 	int true_val = 1;
diff --git a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
index 94f50f7e94d6..3ded05280757 100644
--- a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
@@ -16,7 +16,6 @@
 #include "test_tcpbpf.h"
 
 struct tcpbpf_globals global = {};
-int _version SEC("version") = 1;
 
 /**
  * SOL_TCP is defined in <netinet/tcp.h> while
diff --git a/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c b/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c
index ac63410bb541..540181c115a8 100644
--- a/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c
@@ -24,12 +24,10 @@ struct {
 struct {
 	__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
 	__uint(max_entries, 2);
-	__uint(key_size, sizeof(int));
-	__uint(value_size, sizeof(__u32));
+	__type(key, int);
+	__type(value, __u32);
 } perf_event_map SEC(".maps");
 
-int _version SEC("version") = 1;
-
 SEC("sockops")
 int bpf_testcb(struct bpf_sock_ops *skops)
 {
diff --git a/tools/testing/selftests/bpf/progs/test_tracepoint.c b/tools/testing/selftests/bpf/progs/test_tracepoint.c
index 4b825ee122cf..ce6974016f53 100644
--- a/tools/testing/selftests/bpf/progs/test_tracepoint.c
+++ b/tools/testing/selftests/bpf/progs/test_tracepoint.c
@@ -23,4 +23,3 @@ int oncpu(struct sched_switch_args *ctx)
 }
 
 char _license[] SEC("license") = "GPL";
-__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */
diff --git a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
index e7b673117436..ef0dde83b85a 100644
--- a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
@@ -26,8 +26,6 @@
 		bpf_trace_printk(fmt, sizeof(fmt), __LINE__, ret); \
 	} while (0)
 
-int _version SEC("version") = 1;
-
 struct geneve_opt {
 	__be16	opt_class;
 	__u8	type;
diff --git a/tools/testing/selftests/bpf/progs/test_xdp.c b/tools/testing/selftests/bpf/progs/test_xdp.c
index 31f9bce37491..d7a9a74b7245 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp.c
@@ -20,8 +20,6 @@
 #include <bpf/bpf_endian.h>
 #include "test_iptunnel_common.h"
 
-int _version SEC("version") = 1;
-
 struct {
 	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
 	__uint(max_entries, 256);
@@ -210,7 +208,7 @@ static __always_inline int handle_ipv6(struct xdp_md *xdp)
 	return XDP_TX;
 }
 
-SEC("xdp_tx_iptunnel")
+SEC("xdp")
 int _xdp_tx_iptunnel(struct xdp_md *xdp)
 {
 	void *data_end = (void *)(long)xdp->data_end;
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c
index 3d66599eee2e..199c61b7d062 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c
@@ -2,7 +2,7 @@
 #include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
 
-SEC("xdp_adjust_tail_grow")
+SEC("xdp")
 int _xdp_adjust_tail_grow(struct xdp_md *xdp)
 {
 	void *data_end = (void *)(long)xdp->data_end;
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c
index 22065a9cfb25..b7448253d135 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c
@@ -9,9 +9,7 @@
 #include <linux/if_ether.h>
 #include <bpf/bpf_helpers.h>
 
-int _version SEC("version") = 1;
-
-SEC("xdp_adjust_tail_shrink")
+SEC("xdp")
 int _xdp_adjust_tail_shrink(struct xdp_md *xdp)
 {
 	void *data_end = (void *)(long)xdp->data_end;
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c b/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c
index a038e827f850..58cf4345f5cc 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c
@@ -36,8 +36,8 @@ struct meta {
 
 struct {
 	__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
-	__uint(key_size, sizeof(int));
-	__uint(value_size, sizeof(int));
+	__type(key, int);
+	__type(value, int);
 } perf_buf_map SEC(".maps");
 
 __u64 test_result_fentry = 0;
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_devmap_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_devmap_helpers.c
index b360ba2bd441..807bf895f42c 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_devmap_helpers.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_devmap_helpers.c
@@ -5,7 +5,7 @@
 #include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
 
-SEC("xdp_dm_log")
+SEC("xdp")
 int xdpdm_devlog(struct xdp_md *ctx)
 {
 	char fmt[] = "devmap redirect: dev %u -> dev %u len %u\n";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_link.c b/tools/testing/selftests/bpf/progs/test_xdp_link.c
index eb93ea95d1d8..ee7d6ac0f615 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_link.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_link.c
@@ -5,7 +5,7 @@
 
 char LICENSE[] SEC("license") = "GPL";
 
-SEC("xdp/handler")
+SEC("xdp")
 int xdp_handler(struct xdp_md *xdp)
 {
 	return 0;
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_loop.c b/tools/testing/selftests/bpf/progs/test_xdp_loop.c
index fcabcda30ba3..c98fb44156f0 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_loop.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_loop.c
@@ -16,8 +16,6 @@
 #include <bpf/bpf_endian.h>
 #include "test_iptunnel_common.h"
 
-int _version SEC("version") = 1;
-
 struct {
 	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
 	__uint(max_entries, 256);
@@ -206,7 +204,7 @@ static __always_inline int handle_ipv6(struct xdp_md *xdp)
 	return XDP_TX;
 }
 
-SEC("xdp_tx_iptunnel")
+SEC("xdp")
 int _xdp_tx_iptunnel(struct xdp_md *xdp)
 {
 	void *data_end = (void *)(long)xdp->data_end;
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
index 3a67921f62b5..596c4e71bf3a 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
@@ -797,7 +797,7 @@ out:
 	return XDP_DROP;
 }
 
-SEC("xdp-test-v4")
+SEC("xdp")
 int balancer_ingress_v4(struct xdp_md *ctx)
 {
 	void *data = (void *)(long)ctx->data;
@@ -816,7 +816,7 @@ int balancer_ingress_v4(struct xdp_md *ctx)
 		return XDP_DROP;
 }
 
-SEC("xdp-test-v6")
+SEC("xdp")
 int balancer_ingress_v6(struct xdp_md *ctx)
 {
 	void *data = (void *)(long)ctx->data;
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_redirect.c b/tools/testing/selftests/bpf/progs/test_xdp_redirect.c
index a5337cd9400b..b778cad45485 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_redirect.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_redirect.c
@@ -12,8 +12,6 @@
 #include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
 
-int _version SEC("version") = 1;
-
 SEC("redirect_to_111")
 int xdp_redirect_to_111(struct xdp_md *xdp)
 {
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c
index 59ee4f182ff8..532025057711 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c
@@ -12,13 +12,13 @@ struct {
 	__uint(max_entries, 4);
 } cpu_map SEC(".maps");
 
-SEC("xdp_redir")
+SEC("xdp")
 int xdp_redir_prog(struct xdp_md *ctx)
 {
 	return bpf_redirect_map(&cpu_map, 1, 0);
 }
 
-SEC("xdp_dummy")
+SEC("xdp")
 int xdp_dummy_prog(struct xdp_md *ctx)
 {
 	return XDP_PASS;
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c
index 0ac086497722..1e6b9c38ea6d 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c
@@ -9,7 +9,7 @@ struct {
 	__uint(max_entries, 4);
 } dm_ports SEC(".maps");
 
-SEC("xdp_redir")
+SEC("xdp")
 int xdp_redir_prog(struct xdp_md *ctx)
 {
 	return bpf_redirect_map(&dm_ports, 1, 0);
@@ -18,7 +18,7 @@ int xdp_redir_prog(struct xdp_md *ctx)
 /* invalid program on DEVMAP entry;
  * SEC name means expected attach type not set
  */
-SEC("xdp_dummy")
+SEC("xdp")
 int xdp_dummy_prog(struct xdp_md *ctx)
 {
 	return XDP_PASS;
diff --git a/tools/testing/selftests/bpf/progs/trace_vprintk.c b/tools/testing/selftests/bpf/progs/trace_vprintk.c
new file mode 100644
index 000000000000..d327241ba047
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/trace_vprintk.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+int null_data_vprintk_ret = 0;
+int trace_vprintk_ret = 0;
+int trace_vprintk_ran = 0;
+
+SEC("fentry/__x64_sys_nanosleep")
+int sys_enter(void *ctx)
+{
+	static const char one[] = "1";
+	static const char three[] = "3";
+	static const char five[] = "5";
+	static const char seven[] = "7";
+	static const char nine[] = "9";
+	static const char f[] = "%pS\n";
+
+	/* runner doesn't search for \t, just ensure it compiles */
+	bpf_printk("\t");
+
+	trace_vprintk_ret = __bpf_vprintk("%s,%d,%s,%d,%s,%d,%s,%d,%s,%d %d\n",
+		one, 2, three, 4, five, 6, seven, 8, nine, 10, ++trace_vprintk_ran);
+
+	/* non-NULL fmt w/ NULL data should result in error */
+	null_data_vprintk_ret = bpf_trace_vprintk(f, sizeof(f), NULL, 0);
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/twfw.c b/tools/testing/selftests/bpf/progs/twfw.c
new file mode 100644
index 000000000000..de1b18a62b46
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/twfw.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <linux/types.h>
+#include <bpf/bpf_helpers.h>
+#include <linux/bpf.h>
+#include <stdint.h>
+
+#define TWFW_MAX_TIERS (64)
+/*
+ * load is successful
+ * #define TWFW_MAX_TIERS (64u)$
+ */
+
+struct twfw_tier_value {
+	unsigned long mask[1];
+};
+
+struct rule {
+	uint8_t seqnum;
+};
+
+struct rules_map {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__type(key, __u32);
+	__type(value, struct rule);
+	__uint(max_entries, 1);
+};
+
+struct tiers_map {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__type(key, __u32);
+	__type(value, struct twfw_tier_value);
+	__uint(max_entries, 1);
+};
+
+struct rules_map rules SEC(".maps");
+struct tiers_map tiers SEC(".maps");
+
+SEC("cgroup_skb/ingress")
+int twfw_verifier(struct __sk_buff* skb)
+{
+	const uint32_t key = 0;
+	const struct twfw_tier_value* tier = bpf_map_lookup_elem(&tiers, &key);
+	if (!tier)
+		return 1;
+
+	struct rule* rule = bpf_map_lookup_elem(&rules, &key);
+	if (!rule)
+		return 1;
+
+	if (rule && rule->seqnum < TWFW_MAX_TIERS) {
+		/* rule->seqnum / 64 should always be 0 */
+		unsigned long mask = tier->mask[rule->seqnum / 64];
+		if (mask)
+			return 0;
+	}
+	return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/xdp_dummy.c b/tools/testing/selftests/bpf/progs/xdp_dummy.c
index ea25e8881992..d988b2e0cee8 100644
--- a/tools/testing/selftests/bpf/progs/xdp_dummy.c
+++ b/tools/testing/selftests/bpf/progs/xdp_dummy.c
@@ -4,7 +4,7 @@
 #include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
 
-SEC("xdp_dummy")
+SEC("xdp")
 int xdp_dummy_prog(struct xdp_md *ctx)
 {
 	return XDP_PASS;
diff --git a/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c b/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c
index 880debcbcd65..8395782b6e0a 100644
--- a/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c
+++ b/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c
@@ -34,7 +34,7 @@ struct {
 	__uint(max_entries, 128);
 } mac_map SEC(".maps");
 
-SEC("xdp_redirect_map_multi")
+SEC("xdp")
 int xdp_redirect_map_multi_prog(struct xdp_md *ctx)
 {
 	void *data_end = (void *)(long)ctx->data_end;
@@ -63,7 +63,7 @@ int xdp_redirect_map_multi_prog(struct xdp_md *ctx)
 }
 
 /* The following 2 progs are for 2nd devmap prog testing */
-SEC("xdp_redirect_map_ingress")
+SEC("xdp")
 int xdp_redirect_map_all_prog(struct xdp_md *ctx)
 {
 	return bpf_redirect_map(&map_egress, 0,
diff --git a/tools/testing/selftests/bpf/progs/xdping_kern.c b/tools/testing/selftests/bpf/progs/xdping_kern.c
index 6b9ca40bd1f4..4ad73847b8a5 100644
--- a/tools/testing/selftests/bpf/progs/xdping_kern.c
+++ b/tools/testing/selftests/bpf/progs/xdping_kern.c
@@ -86,7 +86,7 @@ static __always_inline int icmp_check(struct xdp_md *ctx, int type)
 	return XDP_TX;
 }
 
-SEC("xdpclient")
+SEC("xdp")
 int xdping_client(struct xdp_md *ctx)
 {
 	void *data_end = (void *)(long)ctx->data_end;
@@ -150,7 +150,7 @@ int xdping_client(struct xdp_md *ctx)
 	return XDP_TX;
 }
 
-SEC("xdpserver")
+SEC("xdp")
 int xdping_server(struct xdp_md *ctx)
 {
 	void *data_end = (void *)(long)ctx->data_end;
diff --git a/tools/testing/selftests/bpf/progs/xdpwall.c b/tools/testing/selftests/bpf/progs/xdpwall.c
new file mode 100644
index 000000000000..7a891a0c3a39
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/xdpwall.c
@@ -0,0 +1,365 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <stdbool.h>
+#include <stdint.h>
+#include <linux/stddef.h>
+#include <linux/if_ether.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/bpf.h>
+#include <linux/types.h>
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_helpers.h>
+
+enum pkt_parse_err {
+	NO_ERR,
+	BAD_IP6_HDR,
+	BAD_IP4GUE_HDR,
+	BAD_IP6GUE_HDR,
+};
+
+enum pkt_flag {
+	TUNNEL = 0x1,
+	TCP_SYN = 0x2,
+	QUIC_INITIAL_FLAG = 0x4,
+	TCP_ACK = 0x8,
+	TCP_RST = 0x10
+};
+
+struct v4_lpm_key {
+	__u32 prefixlen;
+	__u32 src;
+};
+
+struct v4_lpm_val {
+	struct v4_lpm_key key;
+	__u8 val;
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, 16);
+	__type(key, struct in6_addr);
+	__type(value, bool);
+} v6_addr_map SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, 16);
+	__type(key, __u32);
+	__type(value, bool);
+} v4_addr_map SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_LPM_TRIE);
+	__uint(max_entries, 16);
+	__uint(key_size, sizeof(struct v4_lpm_key));
+	__uint(value_size, sizeof(struct v4_lpm_val));
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+} v4_lpm_val_map SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 16);
+	__type(key, int);
+	__type(value, __u8);
+} tcp_port_map SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 16);
+	__type(key, int);
+	__type(value, __u16);
+} udp_port_map SEC(".maps");
+
+enum ip_type { V4 = 1, V6 = 2 };
+
+struct fw_match_info {
+	__u8 v4_src_ip_match;
+	__u8 v6_src_ip_match;
+	__u8 v4_src_prefix_match;
+	__u8 v4_dst_prefix_match;
+	__u8 tcp_dp_match;
+	__u16 udp_sp_match;
+	__u16 udp_dp_match;
+	bool is_tcp;
+	bool is_tcp_syn;
+};
+
+struct pkt_info {
+	enum ip_type type;
+	union {
+		struct iphdr *ipv4;
+		struct ipv6hdr *ipv6;
+	} ip;
+	int sport;
+	int dport;
+	__u16 trans_hdr_offset;
+	__u8 proto;
+	__u8 flags;
+};
+
+static __always_inline struct ethhdr *parse_ethhdr(void *data, void *data_end)
+{
+	struct ethhdr *eth = data;
+
+	if (eth + 1 > data_end)
+		return NULL;
+
+	return eth;
+}
+
+static __always_inline __u8 filter_ipv6_addr(const struct in6_addr *ipv6addr)
+{
+	__u8 *leaf;
+
+	leaf = bpf_map_lookup_elem(&v6_addr_map, ipv6addr);
+
+	return leaf ? *leaf : 0;
+}
+
+static __always_inline __u8 filter_ipv4_addr(const __u32 ipaddr)
+{
+	__u8 *leaf;
+
+	leaf = bpf_map_lookup_elem(&v4_addr_map, &ipaddr);
+
+	return leaf ? *leaf : 0;
+}
+
+static __always_inline __u8 filter_ipv4_lpm(const __u32 ipaddr)
+{
+	struct v4_lpm_key v4_key = {};
+	struct v4_lpm_val *lpm_val;
+
+	v4_key.src = ipaddr;
+	v4_key.prefixlen = 32;
+
+	lpm_val = bpf_map_lookup_elem(&v4_lpm_val_map, &v4_key);
+
+	return lpm_val ? lpm_val->val : 0;
+}
+
+
+static __always_inline void
+filter_src_dst_ip(struct pkt_info* info, struct fw_match_info* match_info)
+{
+	if (info->type == V6) {
+		match_info->v6_src_ip_match =
+			filter_ipv6_addr(&info->ip.ipv6->saddr);
+	} else if (info->type == V4) {
+		match_info->v4_src_ip_match =
+			filter_ipv4_addr(info->ip.ipv4->saddr);
+		match_info->v4_src_prefix_match =
+			filter_ipv4_lpm(info->ip.ipv4->saddr);
+		match_info->v4_dst_prefix_match =
+			filter_ipv4_lpm(info->ip.ipv4->daddr);
+	}
+}
+
+static __always_inline void *
+get_transport_hdr(__u16 offset, void *data, void *data_end)
+{
+	if (offset > 255 || data + offset > data_end)
+		return NULL;
+
+	return data + offset;
+}
+
+static __always_inline bool tcphdr_only_contains_flag(struct tcphdr *tcp,
+						      __u32 FLAG)
+{
+	return (tcp_flag_word(tcp) &
+		(TCP_FLAG_ACK | TCP_FLAG_RST | TCP_FLAG_SYN | TCP_FLAG_FIN)) == FLAG;
+}
+
+static __always_inline void set_tcp_flags(struct pkt_info *info,
+					  struct tcphdr *tcp) {
+	if (tcphdr_only_contains_flag(tcp, TCP_FLAG_SYN))
+		info->flags |= TCP_SYN;
+	else if (tcphdr_only_contains_flag(tcp, TCP_FLAG_ACK))
+		info->flags |= TCP_ACK;
+	else if (tcphdr_only_contains_flag(tcp, TCP_FLAG_RST))
+		info->flags |= TCP_RST;
+}
+
+static __always_inline bool
+parse_tcp(struct pkt_info *info, void *transport_hdr, void *data_end)
+{
+	struct tcphdr *tcp = transport_hdr;
+
+	if (tcp + 1 > data_end)
+		return false;
+
+	info->sport = bpf_ntohs(tcp->source);
+	info->dport = bpf_ntohs(tcp->dest);
+	set_tcp_flags(info, tcp);
+
+	return true;
+}
+
+static __always_inline bool
+parse_udp(struct pkt_info *info, void *transport_hdr, void *data_end)
+{
+	struct udphdr *udp = transport_hdr;
+
+	if (udp + 1 > data_end)
+		return false;
+
+	info->sport = bpf_ntohs(udp->source);
+	info->dport = bpf_ntohs(udp->dest);
+
+	return true;
+}
+
+static __always_inline __u8 filter_tcp_port(int port)
+{
+	__u8 *leaf = bpf_map_lookup_elem(&tcp_port_map, &port);
+
+	return leaf ? *leaf : 0;
+}
+
+static __always_inline __u16 filter_udp_port(int port)
+{
+	__u16 *leaf = bpf_map_lookup_elem(&udp_port_map, &port);
+
+	return leaf ? *leaf : 0;
+}
+
+static __always_inline bool
+filter_transport_hdr(void *transport_hdr, void *data_end,
+		     struct pkt_info *info, struct fw_match_info *match_info)
+{
+	if (info->proto == IPPROTO_TCP) {
+		if (!parse_tcp(info, transport_hdr, data_end))
+			return false;
+
+		match_info->is_tcp = true;
+		match_info->is_tcp_syn = (info->flags & TCP_SYN) > 0;
+
+		match_info->tcp_dp_match = filter_tcp_port(info->dport);
+	} else if (info->proto == IPPROTO_UDP) {
+		if (!parse_udp(info, transport_hdr, data_end))
+			return false;
+
+		match_info->udp_dp_match = filter_udp_port(info->dport);
+		match_info->udp_sp_match = filter_udp_port(info->sport);
+	}
+
+	return true;
+}
+
+static __always_inline __u8
+parse_gue_v6(struct pkt_info *info, struct ipv6hdr *ip6h, void *data_end)
+{
+	struct udphdr *udp = (struct udphdr *)(ip6h + 1);
+	void *encap_data = udp + 1;
+
+	if (udp + 1 > data_end)
+		return BAD_IP6_HDR;
+
+	if (udp->dest != bpf_htons(6666))
+		return NO_ERR;
+
+	info->flags |= TUNNEL;
+
+	if (encap_data + 1 > data_end)
+		return BAD_IP6GUE_HDR;
+
+	if (*(__u8 *)encap_data & 0x30) {
+		struct ipv6hdr *inner_ip6h = encap_data;
+
+		if (inner_ip6h + 1 > data_end)
+			return BAD_IP6GUE_HDR;
+
+		info->type = V6;
+		info->proto = inner_ip6h->nexthdr;
+		info->ip.ipv6 = inner_ip6h;
+		info->trans_hdr_offset += sizeof(struct ipv6hdr) + sizeof(struct udphdr);
+	} else {
+		struct iphdr *inner_ip4h = encap_data;
+
+		if (inner_ip4h + 1 > data_end)
+			return BAD_IP6GUE_HDR;
+
+		info->type = V4;
+		info->proto = inner_ip4h->protocol;
+		info->ip.ipv4 = inner_ip4h;
+		info->trans_hdr_offset += sizeof(struct iphdr) + sizeof(struct udphdr);
+	}
+
+	return NO_ERR;
+}
+
+static __always_inline __u8 parse_ipv6_gue(struct pkt_info *info,
+					   void *data, void *data_end)
+{
+	struct ipv6hdr *ip6h = data + sizeof(struct ethhdr);
+
+	if (ip6h + 1 > data_end)
+		return BAD_IP6_HDR;
+
+	info->proto = ip6h->nexthdr;
+	info->ip.ipv6 = ip6h;
+	info->type = V6;
+	info->trans_hdr_offset = sizeof(struct ethhdr) + sizeof(struct ipv6hdr);
+
+	if (info->proto == IPPROTO_UDP)
+		return parse_gue_v6(info, ip6h, data_end);
+
+	return NO_ERR;
+}
+
+SEC("xdp")
+int edgewall(struct xdp_md *ctx)
+{
+	void *data_end = (void *)(long)(ctx->data_end);
+	void *data = (void *)(long)(ctx->data);
+	struct fw_match_info match_info = {};
+	struct pkt_info info = {};
+	__u8 parse_err = NO_ERR;
+	void *transport_hdr;
+	struct ethhdr *eth;
+	bool filter_res;
+	__u32 proto;
+
+	eth = parse_ethhdr(data, data_end);
+	if (!eth)
+		return XDP_DROP;
+
+	proto = eth->h_proto;
+	if (proto != bpf_htons(ETH_P_IPV6))
+		return XDP_DROP;
+
+	if (parse_ipv6_gue(&info, data, data_end))
+		return XDP_DROP;
+
+	if (info.proto == IPPROTO_ICMPV6)
+		return XDP_PASS;
+
+	if (info.proto != IPPROTO_TCP && info.proto != IPPROTO_UDP)
+		return XDP_DROP;
+
+	filter_src_dst_ip(&info, &match_info);
+
+	transport_hdr = get_transport_hdr(info.trans_hdr_offset, data,
+					  data_end);
+	if (!transport_hdr)
+		return XDP_DROP;
+
+	filter_res = filter_transport_hdr(transport_hdr, data_end,
+					  &info, &match_info);
+	if (!filter_res)
+		return XDP_DROP;
+
+	if (match_info.is_tcp && !match_info.is_tcp_syn)
+		return XDP_PASS;
+
+	return XDP_DROP;
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_bpftool.py b/tools/testing/selftests/bpf/test_bpftool.py
index 4fed2dc25c0a..1c2408ee1f5d 100644
--- a/tools/testing/selftests/bpf/test_bpftool.py
+++ b/tools/testing/selftests/bpf/test_bpftool.py
@@ -57,6 +57,11 @@ def default_iface(f):
         return f(*args, iface, **kwargs)
     return wrapper
 
+DMESG_EMITTING_HELPERS = [
+        "bpf_probe_write_user",
+        "bpf_trace_printk",
+        "bpf_trace_vprintk",
+    ]
 
 class TestBpftool(unittest.TestCase):
     @classmethod
@@ -67,10 +72,7 @@ class TestBpftool(unittest.TestCase):
 
     @default_iface
     def test_feature_dev_json(self, iface):
-        unexpected_helpers = [
-            "bpf_probe_write_user",
-            "bpf_trace_printk",
-        ]
+        unexpected_helpers = DMESG_EMITTING_HELPERS
         expected_keys = [
             "syscall_config",
             "program_types",
@@ -94,10 +96,7 @@ class TestBpftool(unittest.TestCase):
             bpftool_json(["feature", "probe"]),
             bpftool_json(["feature"]),
         ]
-        unexpected_helpers = [
-            "bpf_probe_write_user",
-            "bpf_trace_printk",
-        ]
+        unexpected_helpers = DMESG_EMITTING_HELPERS
         expected_keys = [
             "syscall_config",
             "system_config",
@@ -121,10 +120,7 @@ class TestBpftool(unittest.TestCase):
             bpftool_json(["feature", "probe", "kernel", "full"]),
             bpftool_json(["feature", "probe", "full"]),
         ]
-        expected_helpers = [
-            "bpf_probe_write_user",
-            "bpf_trace_printk",
-        ]
+        expected_helpers = DMESG_EMITTING_HELPERS
 
         for tc in test_cases:
             # Check if expected helpers are included at least once in any
@@ -157,7 +153,7 @@ class TestBpftool(unittest.TestCase):
                 not_full_set.add(helper)
 
         self.assertCountEqual(full_set - not_full_set,
-                                {"bpf_probe_write_user", "bpf_trace_printk"})
+                              set(DMESG_EMITTING_HELPERS))
         self.assertCountEqual(not_full_set - full_set, set())
 
     def test_feature_macros(self):
diff --git a/tools/testing/selftests/bpf/test_bpftool_build.sh b/tools/testing/selftests/bpf/test_bpftool_build.sh
index b03a87571592..1453a53ed547 100755
--- a/tools/testing/selftests/bpf/test_bpftool_build.sh
+++ b/tools/testing/selftests/bpf/test_bpftool_build.sh
@@ -90,6 +90,10 @@ echo -e "... through kbuild\n"
 
 if [ -f ".config" ] ; then
 	make_and_clean tools/bpf
+	## "make tools/bpf" sets $(OUTPUT) to ...tools/bpf/runqslower for
+	## runqslower, but the default (used for the "clean" target) is .output.
+	## Let's make sure we clean runqslower's directory properly.
+	make -C tools/bpf/runqslower OUTPUT=${KDIR_ROOT_DIR}/tools/bpf/runqslower/ clean
 
 	## $OUTPUT is overwritten in kbuild Makefile, and thus cannot be passed
 	## down from toplevel Makefile to bpftool's Makefile.
diff --git a/tools/testing/selftests/bpf/test_btf.h b/tools/testing/selftests/bpf/test_btf.h
index e2394eea4b7f..32c7a57867da 100644
--- a/tools/testing/selftests/bpf/test_btf.h
+++ b/tools/testing/selftests/bpf/test_btf.h
@@ -69,4 +69,7 @@
 #define BTF_TYPE_FLOAT_ENC(name, sz) \
 	BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_FLOAT, 0, 0), sz)
 
+#define BTF_DECL_TAG_ENC(value, type, component_idx)	\
+	BTF_TYPE_ENC(value, BTF_INFO_ENC(BTF_KIND_DECL_TAG, 0, 0), type), (component_idx)
+
 #endif /* _TEST_BTF_H */
diff --git a/tools/testing/selftests/bpf/test_flow_dissector.sh b/tools/testing/selftests/bpf/test_flow_dissector.sh
index 174b72a64a4c..dbd91221727d 100755
--- a/tools/testing/selftests/bpf/test_flow_dissector.sh
+++ b/tools/testing/selftests/bpf/test_flow_dissector.sh
@@ -26,22 +26,22 @@ if [[ -z $(ip netns identify $$) ]]; then
 			type flow_dissector
 
 		if ! unshare --net $bpftool prog attach pinned \
-			/sys/fs/bpf/flow/flow_dissector flow_dissector; then
+			/sys/fs/bpf/flow/_dissect flow_dissector; then
 			echo "Unexpected unsuccessful attach in namespace" >&2
 			err=1
 		fi
 
-		$bpftool prog attach pinned /sys/fs/bpf/flow/flow_dissector \
+		$bpftool prog attach pinned /sys/fs/bpf/flow/_dissect \
 			flow_dissector
 
 		if unshare --net $bpftool prog attach pinned \
-			/sys/fs/bpf/flow/flow_dissector flow_dissector; then
+			/sys/fs/bpf/flow/_dissect flow_dissector; then
 			echo "Unexpected successful attach in namespace" >&2
 			err=1
 		fi
 
 		if ! $bpftool prog detach pinned \
-			/sys/fs/bpf/flow/flow_dissector flow_dissector; then
+			/sys/fs/bpf/flow/_dissect flow_dissector; then
 			echo "Failed to detach flow dissector" >&2
 			err=1
 		fi
@@ -95,7 +95,7 @@ else
 fi
 
 # Attach BPF program
-./flow_dissector_load -p bpf_flow.o -s flow_dissector
+./flow_dissector_load -p bpf_flow.o -s _dissect
 
 # Setup
 tc qdisc add dev lo ingress
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index cc1cd240445d..c65986bd9d07 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -12,6 +12,11 @@
 #include <string.h>
 #include <execinfo.h> /* backtrace */
 #include <linux/membarrier.h>
+#include <sys/sysinfo.h> /* get_nprocs */
+#include <netinet/in.h>
+#include <sys/select.h>
+#include <sys/socket.h>
+#include <sys/un.h>
 
 /* Adapted from perf/util/string.c */
 static bool glob_match(const char *str, const char *pat)
@@ -45,9 +50,12 @@ struct prog_test_def {
 	const char *test_name;
 	int test_num;
 	void (*run_test)(void);
+	void (*run_serial_test)(void);
 	bool force_log;
 	int error_cnt;
 	int skip_cnt;
+	int sub_succ_cnt;
+	bool should_run;
 	bool tested;
 	bool need_cgroup_cleanup;
 
@@ -97,6 +105,10 @@ static void dump_test_log(const struct prog_test_def *test, bool failed)
 	if (stdout == env.stdout)
 		return;
 
+	/* worker always holds log */
+	if (env.worker_id != -1)
+		return;
+
 	fflush(stdout); /* exports env.log_buf & env.log_cnt */
 
 	if (env.verbosity > VERBOSE_NONE || test->force_log || failed) {
@@ -107,8 +119,6 @@ static void dump_test_log(const struct prog_test_def *test, bool failed)
 				fprintf(env.stdout, "\n");
 		}
 	}
-
-	fseeko(stdout, 0, SEEK_SET); /* rewind */
 }
 
 static void skip_account(void)
@@ -124,7 +134,8 @@ static void stdio_restore(void);
 /* A bunch of tests set custom affinity per-thread and/or per-process. Reset
  * it after each test/sub-test.
  */
-static void reset_affinity() {
+static void reset_affinity(void)
+{
 
 	cpu_set_t cpuset;
 	int i, err;
@@ -165,21 +176,21 @@ static void restore_netns(void)
 	}
 }
 
-void test__end_subtest()
+void test__end_subtest(void)
 {
 	struct prog_test_def *test = env.test;
 	int sub_error_cnt = test->error_cnt - test->old_error_cnt;
 
 	dump_test_log(test, sub_error_cnt);
 
-	fprintf(env.stdout, "#%d/%d %s/%s:%s\n",
+	fprintf(stdout, "#%d/%d %s/%s:%s\n",
 	       test->test_num, test->subtest_num, test->test_name, test->subtest_name,
 	       sub_error_cnt ? "FAIL" : (test->skip_cnt ? "SKIP" : "OK"));
 
 	if (sub_error_cnt)
-		env.fail_cnt++;
+		test->error_cnt++;
 	else if (test->skip_cnt == 0)
-		env.sub_succ_cnt++;
+		test->sub_succ_cnt++;
 	skip_account();
 
 	free(test->subtest_name);
@@ -217,7 +228,8 @@ bool test__start_subtest(const char *name)
 	return true;
 }
 
-void test__force_log() {
+void test__force_log(void)
+{
 	env.test->force_log = true;
 }
 
@@ -370,7 +382,7 @@ int extract_build_id(char *build_id, size_t size)
 
 	if (getline(&line, &len, fp) == -1)
 		goto err;
-	fclose(fp);
+	pclose(fp);
 
 	if (len > size)
 		len = size;
@@ -379,7 +391,7 @@ int extract_build_id(char *build_id, size_t size)
 	free(line);
 	return 0;
 err:
-	fclose(fp);
+	pclose(fp);
 	return -1;
 }
 
@@ -446,14 +458,17 @@ static int load_bpf_testmod(void)
 }
 
 /* extern declarations for test funcs */
-#define DEFINE_TEST(name) extern void test_##name(void);
+#define DEFINE_TEST(name)				\
+	extern void test_##name(void) __weak;		\
+	extern void serial_test_##name(void) __weak;
 #include <prog_tests/tests.h>
 #undef DEFINE_TEST
 
 static struct prog_test_def prog_test_defs[] = {
-#define DEFINE_TEST(name) {		\
-	.test_name = #name,		\
-	.run_test = &test_##name,	\
+#define DEFINE_TEST(name) {			\
+	.test_name = #name,			\
+	.run_test = &test_##name,		\
+	.run_serial_test = &serial_test_##name,	\
 },
 #include <prog_tests/tests.h>
 #undef DEFINE_TEST
@@ -474,6 +489,8 @@ enum ARG_KEYS {
 	ARG_LIST_TEST_NAMES = 'l',
 	ARG_TEST_NAME_GLOB_ALLOWLIST = 'a',
 	ARG_TEST_NAME_GLOB_DENYLIST = 'd',
+	ARG_NUM_WORKERS = 'j',
+	ARG_DEBUG = -1,
 };
 
 static const struct argp_option opts[] = {
@@ -495,6 +512,10 @@ static const struct argp_option opts[] = {
 	  "Run tests with name matching the pattern (supports '*' wildcard)." },
 	{ "deny", ARG_TEST_NAME_GLOB_DENYLIST, "NAMES", 0,
 	  "Don't run tests with name matching the pattern (supports '*' wildcard)." },
+	{ "workers", ARG_NUM_WORKERS, "WORKERS", OPTION_ARG_OPTIONAL,
+	  "Number of workers to run in parallel, default to number of cpus." },
+	{ "debug", ARG_DEBUG, NULL, 0,
+	  "print extra debug information for test_progs." },
 	{},
 };
 
@@ -650,7 +671,7 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
 				fprintf(stderr,
 					"Unable to setenv SELFTESTS_VERBOSE=1 (errno=%d)",
 					errno);
-				return -1;
+				return -EINVAL;
 			}
 		}
 
@@ -661,6 +682,20 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
 	case ARG_LIST_TEST_NAMES:
 		env->list_test_names = true;
 		break;
+	case ARG_NUM_WORKERS:
+		if (arg) {
+			env->workers = atoi(arg);
+			if (!env->workers) {
+				fprintf(stderr, "Invalid number of worker: %s.", arg);
+				return -EINVAL;
+			}
+		} else {
+			env->workers = get_nprocs();
+		}
+		break;
+	case ARG_DEBUG:
+		env->debug = true;
+		break;
 	case ARGP_KEY_ARG:
 		argp_usage(state);
 		break;
@@ -678,7 +713,7 @@ static void stdio_hijack(void)
 	env.stdout = stdout;
 	env.stderr = stderr;
 
-	if (env.verbosity > VERBOSE_NONE) {
+	if (env.verbosity > VERBOSE_NONE && env.worker_id == -1) {
 		/* nothing to do, output to stdout by default */
 		return;
 	}
@@ -704,10 +739,6 @@ static void stdio_restore(void)
 		return;
 
 	fclose(stdout);
-	free(env.log_buf);
-
-	env.log_buf = NULL;
-	env.log_cnt = 0;
 
 	stdout = env.stdout;
 	stderr = env.stderr;
@@ -743,6 +774,45 @@ int cd_flavor_subdir(const char *exec_name)
 	return chdir(flavor);
 }
 
+int trigger_module_test_read(int read_sz)
+{
+	int fd, err;
+
+	fd = open(BPF_TESTMOD_TEST_FILE, O_RDONLY);
+	err = -errno;
+	if (!ASSERT_GE(fd, 0, "testmod_file_open"))
+		return err;
+
+	read(fd, NULL, read_sz);
+	close(fd);
+
+	return 0;
+}
+
+int trigger_module_test_write(int write_sz)
+{
+	int fd, err;
+	char *buf = malloc(write_sz);
+
+	if (!buf)
+		return -ENOMEM;
+
+	memset(buf, 'a', write_sz);
+	buf[write_sz-1] = '\0';
+
+	fd = open(BPF_TESTMOD_TEST_FILE, O_WRONLY);
+	err = -errno;
+	if (!ASSERT_GE(fd, 0, "testmod_file_open")) {
+		free(buf);
+		return err;
+	}
+
+	write(fd, buf, write_sz);
+	close(fd);
+	free(buf);
+	return 0;
+}
+
 #define MAX_BACKTRACE_SZ 128
 void crash_handler(int signum)
 {
@@ -755,11 +825,498 @@ void crash_handler(int signum)
 		dump_test_log(env.test, true);
 	if (env.stdout)
 		stdio_restore();
-
+	if (env.worker_id != -1)
+		fprintf(stderr, "[%d]: ", env.worker_id);
 	fprintf(stderr, "Caught signal #%d!\nStack trace:\n", signum);
 	backtrace_symbols_fd(bt, sz, STDERR_FILENO);
 }
 
+static void sigint_handler(int signum)
+{
+	int i;
+
+	for (i = 0; i < env.workers; i++)
+		if (env.worker_socks[i] > 0)
+			close(env.worker_socks[i]);
+}
+
+static int current_test_idx;
+static pthread_mutex_t current_test_lock;
+static pthread_mutex_t stdout_output_lock;
+
+struct test_result {
+	int error_cnt;
+	int skip_cnt;
+	int sub_succ_cnt;
+
+	size_t log_cnt;
+	char *log_buf;
+};
+
+static struct test_result test_results[ARRAY_SIZE(prog_test_defs)];
+
+static inline const char *str_msg(const struct msg *msg, char *buf)
+{
+	switch (msg->type) {
+	case MSG_DO_TEST:
+		sprintf(buf, "MSG_DO_TEST %d", msg->do_test.test_num);
+		break;
+	case MSG_TEST_DONE:
+		sprintf(buf, "MSG_TEST_DONE %d (log: %d)",
+			msg->test_done.test_num,
+			msg->test_done.have_log);
+		break;
+	case MSG_TEST_LOG:
+		sprintf(buf, "MSG_TEST_LOG (cnt: %ld, last: %d)",
+			strlen(msg->test_log.log_buf),
+			msg->test_log.is_last);
+		break;
+	case MSG_EXIT:
+		sprintf(buf, "MSG_EXIT");
+		break;
+	default:
+		sprintf(buf, "UNKNOWN");
+		break;
+	}
+
+	return buf;
+}
+
+static int send_message(int sock, const struct msg *msg)
+{
+	char buf[256];
+
+	if (env.debug)
+		fprintf(stderr, "Sending msg: %s\n", str_msg(msg, buf));
+	return send(sock, msg, sizeof(*msg), 0);
+}
+
+static int recv_message(int sock, struct msg *msg)
+{
+	int ret;
+	char buf[256];
+
+	memset(msg, 0, sizeof(*msg));
+	ret = recv(sock, msg, sizeof(*msg), 0);
+	if (ret >= 0) {
+		if (env.debug)
+			fprintf(stderr, "Received msg: %s\n", str_msg(msg, buf));
+	}
+	return ret;
+}
+
+static void run_one_test(int test_num)
+{
+	struct prog_test_def *test = &prog_test_defs[test_num];
+
+	env.test = test;
+
+	if (test->run_test)
+		test->run_test();
+	else if (test->run_serial_test)
+		test->run_serial_test();
+
+	/* ensure last sub-test is finalized properly */
+	if (test->subtest_name)
+		test__end_subtest();
+
+	test->tested = true;
+
+	dump_test_log(test, test->error_cnt);
+
+	reset_affinity();
+	restore_netns();
+	if (test->need_cgroup_cleanup)
+		cleanup_cgroup_environment();
+}
+
+struct dispatch_data {
+	int worker_id;
+	int sock_fd;
+};
+
+static void *dispatch_thread(void *ctx)
+{
+	struct dispatch_data *data = ctx;
+	int sock_fd;
+	FILE *log_fd = NULL;
+
+	sock_fd = data->sock_fd;
+
+	while (true) {
+		int test_to_run = -1;
+		struct prog_test_def *test;
+		struct test_result *result;
+
+		/* grab a test */
+		{
+			pthread_mutex_lock(&current_test_lock);
+
+			if (current_test_idx >= prog_test_cnt) {
+				pthread_mutex_unlock(&current_test_lock);
+				goto done;
+			}
+
+			test = &prog_test_defs[current_test_idx];
+			test_to_run = current_test_idx;
+			current_test_idx++;
+
+			pthread_mutex_unlock(&current_test_lock);
+		}
+
+		if (!test->should_run || test->run_serial_test)
+			continue;
+
+		/* run test through worker */
+		{
+			struct msg msg_do_test;
+
+			msg_do_test.type = MSG_DO_TEST;
+			msg_do_test.do_test.test_num = test_to_run;
+			if (send_message(sock_fd, &msg_do_test) < 0) {
+				perror("Fail to send command");
+				goto done;
+			}
+			env.worker_current_test[data->worker_id] = test_to_run;
+		}
+
+		/* wait for test done */
+		{
+			int err;
+			struct msg msg_test_done;
+
+			err = recv_message(sock_fd, &msg_test_done);
+			if (err < 0)
+				goto error;
+			if (msg_test_done.type != MSG_TEST_DONE)
+				goto error;
+			if (test_to_run != msg_test_done.test_done.test_num)
+				goto error;
+
+			test->tested = true;
+			result = &test_results[test_to_run];
+
+			result->error_cnt = msg_test_done.test_done.error_cnt;
+			result->skip_cnt = msg_test_done.test_done.skip_cnt;
+			result->sub_succ_cnt = msg_test_done.test_done.sub_succ_cnt;
+
+			/* collect all logs */
+			if (msg_test_done.test_done.have_log) {
+				log_fd = open_memstream(&result->log_buf, &result->log_cnt);
+				if (!log_fd)
+					goto error;
+
+				while (true) {
+					struct msg msg_log;
+
+					if (recv_message(sock_fd, &msg_log) < 0)
+						goto error;
+					if (msg_log.type != MSG_TEST_LOG)
+						goto error;
+
+					fprintf(log_fd, "%s", msg_log.test_log.log_buf);
+					if (msg_log.test_log.is_last)
+						break;
+				}
+				fclose(log_fd);
+				log_fd = NULL;
+			}
+			/* output log */
+			{
+				pthread_mutex_lock(&stdout_output_lock);
+
+				if (result->log_cnt) {
+					result->log_buf[result->log_cnt] = '\0';
+					fprintf(stdout, "%s", result->log_buf);
+					if (result->log_buf[result->log_cnt - 1] != '\n')
+						fprintf(stdout, "\n");
+				}
+
+				fprintf(stdout, "#%d %s:%s\n",
+					test->test_num, test->test_name,
+					result->error_cnt ? "FAIL" : (result->skip_cnt ? "SKIP" : "OK"));
+
+				pthread_mutex_unlock(&stdout_output_lock);
+			}
+
+		} /* wait for test done */
+	} /* while (true) */
+error:
+	if (env.debug)
+		fprintf(stderr, "[%d]: Protocol/IO error: %s.\n", data->worker_id, strerror(errno));
+
+	if (log_fd)
+		fclose(log_fd);
+done:
+	{
+		struct msg msg_exit;
+
+		msg_exit.type = MSG_EXIT;
+		if (send_message(sock_fd, &msg_exit) < 0) {
+			if (env.debug)
+				fprintf(stderr, "[%d]: send_message msg_exit: %s.\n",
+					data->worker_id, strerror(errno));
+		}
+	}
+	return NULL;
+}
+
+static void print_all_error_logs(void)
+{
+	int i;
+
+	if (env.fail_cnt)
+		fprintf(stdout, "\nAll error logs:\n");
+
+	/* print error logs again */
+	for (i = 0; i < prog_test_cnt; i++) {
+		struct prog_test_def *test;
+		struct test_result *result;
+
+		test = &prog_test_defs[i];
+		result = &test_results[i];
+
+		if (!test->tested || !result->error_cnt)
+			continue;
+
+		fprintf(stdout, "\n#%d %s:%s\n",
+			test->test_num, test->test_name,
+			result->error_cnt ? "FAIL" : (result->skip_cnt ? "SKIP" : "OK"));
+
+		if (result->log_cnt) {
+			result->log_buf[result->log_cnt] = '\0';
+			fprintf(stdout, "%s", result->log_buf);
+			if (result->log_buf[result->log_cnt - 1] != '\n')
+				fprintf(stdout, "\n");
+		}
+	}
+}
+
+static int server_main(void)
+{
+	pthread_t *dispatcher_threads;
+	struct dispatch_data *data;
+	struct sigaction sigact_int = {
+		.sa_handler = sigint_handler,
+		.sa_flags = SA_RESETHAND,
+	};
+	int i;
+
+	sigaction(SIGINT, &sigact_int, NULL);
+
+	dispatcher_threads = calloc(sizeof(pthread_t), env.workers);
+	data = calloc(sizeof(struct dispatch_data), env.workers);
+
+	env.worker_current_test = calloc(sizeof(int), env.workers);
+	for (i = 0; i < env.workers; i++) {
+		int rc;
+
+		data[i].worker_id = i;
+		data[i].sock_fd = env.worker_socks[i];
+		rc = pthread_create(&dispatcher_threads[i], NULL, dispatch_thread, &data[i]);
+		if (rc < 0) {
+			perror("Failed to launch dispatcher thread");
+			exit(EXIT_ERR_SETUP_INFRA);
+		}
+	}
+
+	/* wait for all dispatcher to finish */
+	for (i = 0; i < env.workers; i++) {
+		while (true) {
+			int ret = pthread_tryjoin_np(dispatcher_threads[i], NULL);
+
+			if (!ret) {
+				break;
+			} else if (ret == EBUSY) {
+				if (env.debug)
+					fprintf(stderr, "Still waiting for thread %d (test %d).\n",
+						i,  env.worker_current_test[i] + 1);
+				usleep(1000 * 1000);
+				continue;
+			} else {
+				fprintf(stderr, "Unexpected error joining dispatcher thread: %d", ret);
+				break;
+			}
+		}
+	}
+	free(dispatcher_threads);
+	free(env.worker_current_test);
+	free(data);
+
+	/* run serial tests */
+	save_netns();
+
+	for (int i = 0; i < prog_test_cnt; i++) {
+		struct prog_test_def *test = &prog_test_defs[i];
+		struct test_result *result = &test_results[i];
+
+		if (!test->should_run || !test->run_serial_test)
+			continue;
+
+		stdio_hijack();
+
+		run_one_test(i);
+
+		stdio_restore();
+		if (env.log_buf) {
+			result->log_cnt = env.log_cnt;
+			result->log_buf = strdup(env.log_buf);
+
+			free(env.log_buf);
+			env.log_buf = NULL;
+			env.log_cnt = 0;
+		}
+		restore_netns();
+
+		fprintf(stdout, "#%d %s:%s\n",
+			test->test_num, test->test_name,
+			test->error_cnt ? "FAIL" : (test->skip_cnt ? "SKIP" : "OK"));
+
+		result->error_cnt = test->error_cnt;
+		result->skip_cnt = test->skip_cnt;
+		result->sub_succ_cnt = test->sub_succ_cnt;
+	}
+
+	/* generate summary */
+	fflush(stderr);
+	fflush(stdout);
+
+	for (i = 0; i < prog_test_cnt; i++) {
+		struct prog_test_def *current_test;
+		struct test_result *result;
+
+		current_test = &prog_test_defs[i];
+		result = &test_results[i];
+
+		if (!current_test->tested)
+			continue;
+
+		env.succ_cnt += result->error_cnt ? 0 : 1;
+		env.skip_cnt += result->skip_cnt;
+		if (result->error_cnt)
+			env.fail_cnt++;
+		env.sub_succ_cnt += result->sub_succ_cnt;
+	}
+
+	fprintf(stdout, "Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n",
+		env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt);
+
+	print_all_error_logs();
+
+	/* reap all workers */
+	for (i = 0; i < env.workers; i++) {
+		int wstatus, pid;
+
+		pid = waitpid(env.worker_pids[i], &wstatus, 0);
+		if (pid != env.worker_pids[i])
+			perror("Unable to reap worker");
+	}
+
+	return 0;
+}
+
+static int worker_main(int sock)
+{
+	save_netns();
+
+	while (true) {
+		/* receive command */
+		struct msg msg;
+
+		if (recv_message(sock, &msg) < 0)
+			goto out;
+
+		switch (msg.type) {
+		case MSG_EXIT:
+			if (env.debug)
+				fprintf(stderr, "[%d]: worker exit.\n",
+					env.worker_id);
+			goto out;
+		case MSG_DO_TEST: {
+			int test_to_run;
+			struct prog_test_def *test;
+			struct msg msg_done;
+
+			test_to_run = msg.do_test.test_num;
+			test = &prog_test_defs[test_to_run];
+
+			if (env.debug)
+				fprintf(stderr, "[%d]: #%d:%s running.\n",
+					env.worker_id,
+					test_to_run + 1,
+					test->test_name);
+
+			stdio_hijack();
+
+			run_one_test(test_to_run);
+
+			stdio_restore();
+
+			memset(&msg_done, 0, sizeof(msg_done));
+			msg_done.type = MSG_TEST_DONE;
+			msg_done.test_done.test_num = test_to_run;
+			msg_done.test_done.error_cnt = test->error_cnt;
+			msg_done.test_done.skip_cnt = test->skip_cnt;
+			msg_done.test_done.sub_succ_cnt = test->sub_succ_cnt;
+			msg_done.test_done.have_log = false;
+
+			if (env.verbosity > VERBOSE_NONE || test->force_log || test->error_cnt) {
+				if (env.log_cnt)
+					msg_done.test_done.have_log = true;
+			}
+			if (send_message(sock, &msg_done) < 0) {
+				perror("Fail to send message done");
+				goto out;
+			}
+
+			/* send logs */
+			if (msg_done.test_done.have_log) {
+				char *src;
+				size_t slen;
+
+				src = env.log_buf;
+				slen = env.log_cnt;
+				while (slen) {
+					struct msg msg_log;
+					char *dest;
+					size_t len;
+
+					memset(&msg_log, 0, sizeof(msg_log));
+					msg_log.type = MSG_TEST_LOG;
+					dest = msg_log.test_log.log_buf;
+					len = slen >= MAX_LOG_TRUNK_SIZE ? MAX_LOG_TRUNK_SIZE : slen;
+					memcpy(dest, src, len);
+
+					src += len;
+					slen -= len;
+					if (!slen)
+						msg_log.test_log.is_last = true;
+
+					assert(send_message(sock, &msg_log) >= 0);
+				}
+			}
+			if (env.log_buf) {
+				free(env.log_buf);
+				env.log_buf = NULL;
+				env.log_cnt = 0;
+			}
+			if (env.debug)
+				fprintf(stderr, "[%d]: #%d:%s done.\n",
+					env.worker_id,
+					test_to_run + 1,
+					test->test_name);
+			break;
+		} /* case MSG_DO_TEST */
+		default:
+			if (env.debug)
+				fprintf(stderr, "[%d]: unknown message.\n",  env.worker_id);
+			return -1;
+		}
+	}
+out:
+	return 0;
+}
+
 int main(int argc, char **argv)
 {
 	static const struct argp argp = {
@@ -770,7 +1327,7 @@ int main(int argc, char **argv)
 	struct sigaction sigact = {
 		.sa_handler = crash_handler,
 		.sa_flags = SA_RESETHAND,
-	};
+		};
 	int err, i;
 
 	sigaction(SIGSEGV, &sigact, NULL);
@@ -798,21 +1355,84 @@ int main(int argc, char **argv)
 		return -1;
 	}
 
-	save_netns();
-	stdio_hijack();
+	env.stdout = stdout;
+	env.stderr = stderr;
+
 	env.has_testmod = true;
 	if (!env.list_test_names && load_bpf_testmod()) {
 		fprintf(env.stderr, "WARNING! Selftests relying on bpf_testmod.ko will be skipped.\n");
 		env.has_testmod = false;
 	}
+
+	/* initializing tests */
 	for (i = 0; i < prog_test_cnt; i++) {
 		struct prog_test_def *test = &prog_test_defs[i];
 
-		env.test = test;
 		test->test_num = i + 1;
-
-		if (!should_run(&env.test_selector,
+		if (should_run(&env.test_selector,
 				test->test_num, test->test_name))
+			test->should_run = true;
+		else
+			test->should_run = false;
+
+		if ((test->run_test == NULL && test->run_serial_test == NULL) ||
+		    (test->run_test != NULL && test->run_serial_test != NULL)) {
+			fprintf(stderr, "Test %d:%s must have either test_%s() or serial_test_%sl() defined.\n",
+				test->test_num, test->test_name, test->test_name, test->test_name);
+			exit(EXIT_ERR_SETUP_INFRA);
+		}
+	}
+
+	/* ignore workers if we are just listing */
+	if (env.get_test_cnt || env.list_test_names)
+		env.workers = 0;
+
+	/* launch workers if requested */
+	env.worker_id = -1; /* main process */
+	if (env.workers) {
+		env.worker_pids = calloc(sizeof(__pid_t), env.workers);
+		env.worker_socks = calloc(sizeof(int), env.workers);
+		if (env.debug)
+			fprintf(stdout, "Launching %d workers.\n", env.workers);
+		for (i = 0; i < env.workers; i++) {
+			int sv[2];
+			pid_t pid;
+
+			if (socketpair(AF_UNIX, SOCK_SEQPACKET | SOCK_CLOEXEC, 0, sv) < 0) {
+				perror("Fail to create worker socket");
+				return -1;
+			}
+			pid = fork();
+			if (pid < 0) {
+				perror("Failed to fork worker");
+				return -1;
+			} else if (pid != 0) { /* main process */
+				close(sv[1]);
+				env.worker_pids[i] = pid;
+				env.worker_socks[i] = sv[0];
+			} else { /* inside each worker process */
+				close(sv[0]);
+				env.worker_id = i;
+				return worker_main(sv[1]);
+			}
+		}
+
+		if (env.worker_id == -1) {
+			server_main();
+			goto out;
+		}
+	}
+
+	/* The rest of the main process */
+
+	/* on single mode */
+	save_netns();
+
+	for (i = 0; i < prog_test_cnt; i++) {
+		struct prog_test_def *test = &prog_test_defs[i];
+		struct test_result *result;
+
+		if (!test->should_run)
 			continue;
 
 		if (env.get_test_cnt) {
@@ -826,33 +1446,35 @@ int main(int argc, char **argv)
 			continue;
 		}
 
-		test->run_test();
-		/* ensure last sub-test is finalized properly */
-		if (test->subtest_name)
-			test__end_subtest();
+		stdio_hijack();
 
-		test->tested = true;
+		run_one_test(i);
 
-		dump_test_log(test, test->error_cnt);
+		stdio_restore();
 
 		fprintf(env.stdout, "#%d %s:%s\n",
 			test->test_num, test->test_name,
 			test->error_cnt ? "FAIL" : (test->skip_cnt ? "SKIP" : "OK"));
 
+		result = &test_results[i];
+		result->error_cnt = test->error_cnt;
+		if (env.log_buf) {
+			result->log_buf = strdup(env.log_buf);
+			result->log_cnt = env.log_cnt;
+
+			free(env.log_buf);
+			env.log_buf = NULL;
+			env.log_cnt = 0;
+		}
+
 		if (test->error_cnt)
 			env.fail_cnt++;
 		else
 			env.succ_cnt++;
-		skip_account();
 
-		reset_affinity();
-		restore_netns();
-		if (test->need_cgroup_cleanup)
-			cleanup_cgroup_environment();
+		skip_account();
+		env.sub_succ_cnt += test->sub_succ_cnt;
 	}
-	if (!env.list_test_names && env.has_testmod)
-		unload_bpf_testmod();
-	stdio_restore();
 
 	if (env.get_test_cnt) {
 		printf("%d\n", env.succ_cnt);
@@ -865,14 +1487,18 @@ int main(int argc, char **argv)
 	fprintf(stdout, "Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n",
 		env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt);
 
+	print_all_error_logs();
+
+	close(env.saved_netns_fd);
 out:
+	if (!env.list_test_names && env.has_testmod)
+		unload_bpf_testmod();
 	free_str_set(&env.test_selector.blacklist);
 	free_str_set(&env.test_selector.whitelist);
 	free(env.test_selector.num_set);
 	free_str_set(&env.subtest_selector.blacklist);
 	free_str_set(&env.subtest_selector.whitelist);
 	free(env.subtest_selector.num_set);
-	close(env.saved_netns_fd);
 
 	if (env.succ_cnt + env.fail_cnt + env.skip_cnt == 0)
 		return EXIT_NO_TEST;
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index c8c2bf878f67..93c1ff705533 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -62,6 +62,7 @@ struct test_env {
 	struct test_selector test_selector;
 	struct test_selector subtest_selector;
 	bool verifier_stats;
+	bool debug;
 	enum verbosity verbosity;
 
 	bool jit_enabled;
@@ -69,7 +70,8 @@ struct test_env {
 	bool get_test_cnt;
 	bool list_test_names;
 
-	struct prog_test_def *test;
+	struct prog_test_def *test; /* current running tests */
+
 	FILE *stdout;
 	FILE *stderr;
 	char *log_buf;
@@ -82,6 +84,38 @@ struct test_env {
 	int skip_cnt; /* skipped tests */
 
 	int saved_netns_fd;
+	int workers; /* number of worker process */
+	int worker_id; /* id number of current worker, main process is -1 */
+	pid_t *worker_pids; /* array of worker pids */
+	int *worker_socks; /* array of worker socks */
+	int *worker_current_test; /* array of current running test for each worker */
+};
+
+#define MAX_LOG_TRUNK_SIZE 8192
+enum msg_type {
+	MSG_DO_TEST = 0,
+	MSG_TEST_DONE = 1,
+	MSG_TEST_LOG = 2,
+	MSG_EXIT = 255,
+};
+struct msg {
+	enum msg_type type;
+	union {
+		struct {
+			int test_num;
+		} do_test;
+		struct {
+			int test_num;
+			int sub_succ_cnt;
+			int error_cnt;
+			int skip_cnt;
+			bool have_log;
+		} test_done;
+		struct {
+			char log_buf[MAX_LOG_TRUNK_SIZE + 1];
+			bool is_last;
+		} test_log;
+	};
 };
 
 extern struct test_env env;
@@ -291,6 +325,8 @@ int compare_map_keys(int map1_fd, int map2_fd);
 int compare_stack_ips(int smap_fd, int amap_fd, int stack_trace_len);
 int extract_build_id(char *build_id, size_t size);
 int kern_sync_rcu(void);
+int trigger_module_test_read(int read_sz);
+int trigger_module_test_write(int write_sz);
 
 #ifdef __x86_64__
 #define SYS_NANOSLEEP_KPROBE_NAME "__x64_sys_nanosleep"
@@ -299,3 +335,5 @@ int kern_sync_rcu(void);
 #else
 #define SYS_NANOSLEEP_KPROBE_NAME "sys_nanosleep"
 #endif
+
+#define BPF_TESTMOD_TEST_FILE "/sys/kernel/bpf_testmod"
diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c
index eefd445b96fc..1ba7e7346afb 100644
--- a/tools/testing/selftests/bpf/test_sockmap.c
+++ b/tools/testing/selftests/bpf/test_sockmap.c
@@ -139,6 +139,7 @@ struct sockmap_options {
 	bool sendpage;
 	bool data_test;
 	bool drop_expected;
+	bool check_recved_len;
 	int iov_count;
 	int iov_length;
 	int rate;
@@ -556,8 +557,12 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
 	int err, i, flags = MSG_NOSIGNAL;
 	bool drop = opt->drop_expected;
 	bool data = opt->data_test;
+	int iov_alloc_length = iov_length;
 
-	err = msg_alloc_iov(&msg, iov_count, iov_length, data, tx);
+	if (!tx && opt->check_recved_len)
+		iov_alloc_length *= 2;
+
+	err = msg_alloc_iov(&msg, iov_count, iov_alloc_length, data, tx);
 	if (err)
 		goto out_errno;
 	if (peek_flag) {
@@ -665,6 +670,13 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
 
 			s->bytes_recvd += recv;
 
+			if (opt->check_recved_len && s->bytes_recvd > total_bytes) {
+				errno = EMSGSIZE;
+				fprintf(stderr, "recv failed(), bytes_recvd:%zd, total_bytes:%f\n",
+						s->bytes_recvd, total_bytes);
+				goto out_errno;
+			}
+
 			if (data) {
 				int chunk_sz = opt->sendpage ?
 						iov_length * cnt :
@@ -744,7 +756,8 @@ static int sendmsg_test(struct sockmap_options *opt)
 
 	rxpid = fork();
 	if (rxpid == 0) {
-		iov_buf -= (txmsg_pop - txmsg_start_pop + 1);
+		if (txmsg_pop || txmsg_start_pop)
+			iov_buf -= (txmsg_pop - txmsg_start_pop + 1);
 		if (opt->drop_expected || txmsg_ktls_skb_drop)
 			_exit(0);
 
@@ -1680,12 +1693,27 @@ static void test_txmsg_ingress_parser(int cgrp, struct sockmap_options *opt)
 {
 	txmsg_pass = 1;
 	skb_use_parser = 512;
+	if (ktls == 1)
+		skb_use_parser = 570;
 	opt->iov_length = 256;
 	opt->iov_count = 1;
 	opt->rate = 2;
 	test_exec(cgrp, opt);
 }
 
+static void test_txmsg_ingress_parser2(int cgrp, struct sockmap_options *opt)
+{
+	if (ktls == 1)
+		return;
+	skb_use_parser = 10;
+	opt->iov_length = 20;
+	opt->iov_count = 1;
+	opt->rate = 1;
+	opt->check_recved_len = true;
+	test_exec(cgrp, opt);
+	opt->check_recved_len = false;
+}
+
 char *map_names[] = {
 	"sock_map",
 	"sock_map_txmsg",
@@ -1784,7 +1812,8 @@ struct _test test[] = {
 	{"txmsg test pull-data", test_txmsg_pull},
 	{"txmsg test pop-data", test_txmsg_pop},
 	{"txmsg test push/pop data", test_txmsg_push_pop},
-	{"txmsg text ingress parser", test_txmsg_ingress_parser},
+	{"txmsg test ingress parser", test_txmsg_ingress_parser},
+	{"txmsg test ingress parser2", test_txmsg_ingress_parser2},
 };
 
 static int check_whitelist(struct _test *t, struct sockmap_options *opt)
diff --git a/tools/testing/selftests/bpf/test_sysctl.c b/tools/testing/selftests/bpf/test_sysctl.c
index a20a919244c0..a3bb6d399daa 100644
--- a/tools/testing/selftests/bpf/test_sysctl.c
+++ b/tools/testing/selftests/bpf/test_sysctl.c
@@ -124,7 +124,7 @@ static struct sysctl_test tests[] = {
 		.descr = "ctx:write sysctl:write read ok narrow",
 		.insns = {
 			/* u64 w = (u16)write & 1; */
-#if __BYTE_ORDER == __LITTLE_ENDIAN
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
 			BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_1,
 				    offsetof(struct bpf_sysctl, write)),
 #else
@@ -184,7 +184,7 @@ static struct sysctl_test tests[] = {
 		.descr = "ctx:file_pos sysctl:read read ok narrow",
 		.insns = {
 			/* If (file_pos == X) */
-#if __BYTE_ORDER == __LITTLE_ENDIAN
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
 			BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_1,
 				    offsetof(struct bpf_sysctl, file_pos)),
 #else
diff --git a/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh b/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh
index 9b3617d770a5..6413c1472554 100755
--- a/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh
+++ b/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh
@@ -76,8 +76,8 @@ DIR=$(dirname $0)
 TEST_IF=lo
 MAX_PING_TRIES=5
 BPF_PROG_OBJ="${DIR}/test_tcp_check_syncookie_kern.o"
-CLSACT_SECTION="clsact/check_syncookie"
-XDP_SECTION="xdp/check_syncookie"
+CLSACT_SECTION="tc"
+XDP_SECTION="xdp"
 BPF_PROG_ID=0
 PROG="${DIR}/test_tcp_check_syncookie_user"
 
diff --git a/tools/testing/selftests/bpf/test_tunnel.sh b/tools/testing/selftests/bpf/test_tunnel.sh
index 1ccbe804e8e1..ca1372924023 100755
--- a/tools/testing/selftests/bpf/test_tunnel.sh
+++ b/tools/testing/selftests/bpf/test_tunnel.sh
@@ -168,14 +168,15 @@ add_vxlan_tunnel()
 	ip netns exec at_ns0 \
 		ip link set dev $DEV_NS address 52:54:00:d9:01:00 up
 	ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
-	ip netns exec at_ns0 arp -s 10.1.1.200 52:54:00:d9:02:00
+	ip netns exec at_ns0 \
+		ip neigh add 10.1.1.200 lladdr 52:54:00:d9:02:00 dev $DEV_NS
 	ip netns exec at_ns0 iptables -A OUTPUT -j MARK --set-mark 0x800FF
 
 	# root namespace
 	ip link add dev $DEV type $TYPE external gbp dstport 4789
 	ip link set dev $DEV address 52:54:00:d9:02:00 up
 	ip addr add dev $DEV 10.1.1.200/24
-	arp -s 10.1.1.100 52:54:00:d9:01:00
+	ip neigh add 10.1.1.100 lladdr 52:54:00:d9:01:00 dev $DEV
 }
 
 add_ip6vxlan_tunnel()
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 3a9e332c5e36..25afe423b3f0 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -47,6 +47,10 @@
 #include "test_btf.h"
 #include "../../../include/linux/filter.h"
 
+#ifndef ENOTSUPP
+#define ENOTSUPP 524
+#endif
+
 #define MAX_INSNS	BPF_MAXINSNS
 #define MAX_TEST_INSNS	1000000
 #define MAX_FIXUPS	8
@@ -974,7 +978,7 @@ static int do_prog_test_run(int fd_prog, bool unpriv, uint32_t expected_val,
 
 	if (err) {
 		switch (saved_errno) {
-		case 524/*ENOTSUPP*/:
+		case ENOTSUPP:
 			printf("Did not run the program (not supported) ");
 			return 0;
 		case EPERM:
@@ -1119,6 +1123,12 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 		goto close_fds;
 	}
 
+	if (fd_prog < 0 && saved_errno == ENOTSUPP) {
+		printf("SKIP (program uses an unsupported feature)\n");
+		skips++;
+		goto close_fds;
+	}
+
 	alignment_prevented_execution = 0;
 
 	if (expected_ret == ACCEPT || expected_ret == VERBOSE_ACCEPT) {
diff --git a/tools/testing/selftests/bpf/test_xdp_meta.sh b/tools/testing/selftests/bpf/test_xdp_meta.sh
index 637fcf4fe4e3..d10cefd6eb09 100755
--- a/tools/testing/selftests/bpf/test_xdp_meta.sh
+++ b/tools/testing/selftests/bpf/test_xdp_meta.sh
@@ -1,5 +1,8 @@
 #!/bin/sh
 
+# Kselftest framework requirement - SKIP code is 4.
+readonly KSFT_SKIP=4
+
 cleanup()
 {
 	if [ "$?" = "0" ]; then
@@ -17,7 +20,7 @@ cleanup()
 ip link set dev lo xdp off 2>/dev/null > /dev/null
 if [ $? -ne 0 ];then
 	echo "selftests: [SKIP] Could not run test without the ip xdp support"
-	exit 0
+	exit $KSFT_SKIP
 fi
 set -e
 
diff --git a/tools/testing/selftests/bpf/test_xdp_redirect.sh b/tools/testing/selftests/bpf/test_xdp_redirect.sh
index c033850886f4..57c8db9972a6 100755
--- a/tools/testing/selftests/bpf/test_xdp_redirect.sh
+++ b/tools/testing/selftests/bpf/test_xdp_redirect.sh
@@ -52,8 +52,8 @@ test_xdp_redirect()
 		return 0
 	fi
 
-	ip -n ns1 link set veth11 $xdpmode obj xdp_dummy.o sec xdp_dummy &> /dev/null
-	ip -n ns2 link set veth22 $xdpmode obj xdp_dummy.o sec xdp_dummy &> /dev/null
+	ip -n ns1 link set veth11 $xdpmode obj xdp_dummy.o sec xdp &> /dev/null
+	ip -n ns2 link set veth22 $xdpmode obj xdp_dummy.o sec xdp &> /dev/null
 	ip link set dev veth1 $xdpmode obj test_xdp_redirect.o sec redirect_to_222 &> /dev/null
 	ip link set dev veth2 $xdpmode obj test_xdp_redirect.o sec redirect_to_111 &> /dev/null
 
diff --git a/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh b/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh
index 1538373157e3..351955c2bdfd 100755
--- a/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh
+++ b/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh
@@ -88,7 +88,7 @@ setup_ns()
 		# Add a neigh entry for IPv4 ping test
 		ip -n ns$i neigh add 192.0.2.253 lladdr 00:00:00:00:00:01 dev veth0
 		ip -n ns$i link set veth0 $mode obj \
-			xdp_dummy.o sec xdp_dummy &> /dev/null || \
+			xdp_dummy.o sec xdp &> /dev/null || \
 			{ test_fail "Unable to load dummy xdp" && exit 1; }
 		IFACES="$IFACES veth$i"
 		veth_mac[$i]=$(ip link show veth$i | awk '/link\/ether/ {print $2}')
diff --git a/tools/testing/selftests/bpf/test_xdp_veth.sh b/tools/testing/selftests/bpf/test_xdp_veth.sh
index 995278e684b6..a3a1eaee26ea 100755
--- a/tools/testing/selftests/bpf/test_xdp_veth.sh
+++ b/tools/testing/selftests/bpf/test_xdp_veth.sh
@@ -107,9 +107,9 @@ ip link set dev veth1 xdp pinned $BPF_DIR/progs/redirect_map_0
 ip link set dev veth2 xdp pinned $BPF_DIR/progs/redirect_map_1
 ip link set dev veth3 xdp pinned $BPF_DIR/progs/redirect_map_2
 
-ip -n ns1 link set dev veth11 xdp obj xdp_dummy.o sec xdp_dummy
+ip -n ns1 link set dev veth11 xdp obj xdp_dummy.o sec xdp
 ip -n ns2 link set dev veth22 xdp obj xdp_tx.o sec xdp
-ip -n ns3 link set dev veth33 xdp obj xdp_dummy.o sec xdp_dummy
+ip -n ns3 link set dev veth33 xdp obj xdp_dummy.o sec xdp
 
 trap cleanup EXIT
 
diff --git a/tools/testing/selftests/bpf/test_xdp_vlan.sh b/tools/testing/selftests/bpf/test_xdp_vlan.sh
index bb8b0da91686..0cbc7604a2f8 100755
--- a/tools/testing/selftests/bpf/test_xdp_vlan.sh
+++ b/tools/testing/selftests/bpf/test_xdp_vlan.sh
@@ -2,6 +2,9 @@
 # SPDX-License-Identifier: GPL-2.0
 # Author: Jesper Dangaard Brouer <hawk@kernel.org>
 
+# Kselftest framework requirement - SKIP code is 4.
+readonly KSFT_SKIP=4
+
 # Allow wrapper scripts to name test
 if [ -z "$TESTNAME" ]; then
     TESTNAME=xdp_vlan
@@ -94,7 +97,7 @@ while true; do
 	    -h | --help )
 		usage;
 		echo "selftests: $TESTNAME [SKIP] usage help info requested"
-		exit 0
+		exit $KSFT_SKIP
 		;;
 	    * )
 		shift
@@ -117,7 +120,7 @@ fi
 ip link set dev lo xdpgeneric off 2>/dev/null > /dev/null
 if [ $? -ne 0 ]; then
 	echo "selftests: $TESTNAME [SKIP] need ip xdp support"
-	exit 0
+	exit $KSFT_SKIP
 fi
 
 # Interactive mode likely require us to cleanup netns
diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c
index e7a19b04d4ea..7b7f918eda77 100644
--- a/tools/testing/selftests/bpf/trace_helpers.c
+++ b/tools/testing/selftests/bpf/trace_helpers.c
@@ -1,4 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
+#include <ctype.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
diff --git a/tools/testing/selftests/bpf/verifier/array_access.c b/tools/testing/selftests/bpf/verifier/array_access.c
index 1b1c798e9248..1b138cd2b187 100644
--- a/tools/testing/selftests/bpf/verifier/array_access.c
+++ b/tools/testing/selftests/bpf/verifier/array_access.c
@@ -186,7 +186,7 @@
 	},
 	.fixup_map_hash_48b = { 3 },
 	.errstr_unpriv = "R0 leaks addr",
-	.errstr = "R0 unbounded memory access",
+	.errstr = "invalid access to map value, value_size=48 off=44 size=8",
 	.result_unpriv = REJECT,
 	.result = REJECT,
 	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
diff --git a/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c b/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c
index 6e52dfc64415..c22dc83a41fd 100644
--- a/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c
+++ b/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c
@@ -119,3 +119,41 @@
 	},
 	.result = ACCEPT,
 },
+{
+	"Dest pointer in r0 - fail",
+	.insns = {
+		/* val = 0; */
+		BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+		/* r0 = &val */
+		BPF_MOV64_REG(BPF_REG_0, BPF_REG_10),
+		/* r0 = atomic_cmpxchg(&val, r0, 1); */
+		BPF_MOV64_IMM(BPF_REG_1, 1),
+		BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, BPF_REG_10, BPF_REG_1, -8),
+		/* if (r0 != 0) exit(1); */
+		BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+		BPF_MOV64_IMM(BPF_REG_0, 1),
+		BPF_EXIT_INSN(),
+		/* exit(0); */
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	},
+	.result = ACCEPT,
+},
+{
+	"Dest pointer in r0 - succeed",
+	.insns = {
+		/* r0 = &val */
+		BPF_MOV64_REG(BPF_REG_0, BPF_REG_10),
+		/* val = r0; */
+		BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+		/* r0 = atomic_cmpxchg(&val, r0, 0); */
+		BPF_MOV64_IMM(BPF_REG_1, 0),
+		BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, BPF_REG_10, BPF_REG_1, -8),
+		/* r1 = *r0 */
+		BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, -8),
+		/* exit(0); */
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	},
+	.result = ACCEPT,
+},
diff --git a/tools/testing/selftests/bpf/verifier/atomic_fetch.c b/tools/testing/selftests/bpf/verifier/atomic_fetch.c
new file mode 100644
index 000000000000..3bc9ff7a860b
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/atomic_fetch.c
@@ -0,0 +1,57 @@
+#define __ATOMIC_FETCH_OP_TEST(src_reg, dst_reg, operand1, op, operand2, expect) \
+	{								\
+		"atomic fetch " #op ", src=" #dst_reg " dst=" #dst_reg,	\
+		.insns = {						\
+			/* u64 val = operan1; */			\
+			BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, operand1),	\
+			/* u64 old = atomic_fetch_add(&val, operand2); */ \
+			BPF_MOV64_REG(dst_reg, BPF_REG_10),		\
+			BPF_MOV64_IMM(src_reg, operand2),		\
+			BPF_ATOMIC_OP(BPF_DW, op,			\
+				      dst_reg, src_reg, -8),		\
+			/* if (old != operand1) exit(1); */		\
+			BPF_JMP_IMM(BPF_JEQ, src_reg, operand1, 2),	\
+			BPF_MOV64_IMM(BPF_REG_0, 1),			\
+			BPF_EXIT_INSN(),				\
+			/* if (val != result) exit (2); */		\
+			BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -8),	\
+			BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, expect, 2),	\
+			BPF_MOV64_IMM(BPF_REG_0, 2),			\
+			BPF_EXIT_INSN(),				\
+			/* exit(0); */					\
+			BPF_MOV64_IMM(BPF_REG_0, 0),			\
+			BPF_EXIT_INSN(),				\
+		},							\
+		.result = ACCEPT,					\
+	}
+__ATOMIC_FETCH_OP_TEST(BPF_REG_1, BPF_REG_2, 1, BPF_ADD | BPF_FETCH, 2, 3),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_0, BPF_REG_1, 1, BPF_ADD | BPF_FETCH, 2, 3),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_1, BPF_REG_0, 1, BPF_ADD | BPF_FETCH, 2, 3),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_2, BPF_REG_3, 1, BPF_ADD | BPF_FETCH, 2, 3),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_4, BPF_REG_5, 1, BPF_ADD | BPF_FETCH, 2, 3),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_9, BPF_REG_8, 1, BPF_ADD | BPF_FETCH, 2, 3),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_1, BPF_REG_2, 0x010, BPF_AND | BPF_FETCH, 0x011, 0x010),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_0, BPF_REG_1, 0x010, BPF_AND | BPF_FETCH, 0x011, 0x010),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_1, BPF_REG_0, 0x010, BPF_AND | BPF_FETCH, 0x011, 0x010),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_2, BPF_REG_3, 0x010, BPF_AND | BPF_FETCH, 0x011, 0x010),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_4, BPF_REG_5, 0x010, BPF_AND | BPF_FETCH, 0x011, 0x010),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_9, BPF_REG_8, 0x010, BPF_AND | BPF_FETCH, 0x011, 0x010),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_1, BPF_REG_2, 0x010, BPF_OR | BPF_FETCH, 0x011, 0x011),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_0, BPF_REG_1, 0x010, BPF_OR | BPF_FETCH, 0x011, 0x011),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_1, BPF_REG_0, 0x010, BPF_OR | BPF_FETCH, 0x011, 0x011),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_2, BPF_REG_3, 0x010, BPF_OR | BPF_FETCH, 0x011, 0x011),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_4, BPF_REG_5, 0x010, BPF_OR | BPF_FETCH, 0x011, 0x011),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_9, BPF_REG_8, 0x010, BPF_OR | BPF_FETCH, 0x011, 0x011),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_1, BPF_REG_2, 0x010, BPF_XOR | BPF_FETCH, 0x011, 0x001),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_0, BPF_REG_1, 0x010, BPF_XOR | BPF_FETCH, 0x011, 0x001),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_1, BPF_REG_0, 0x010, BPF_XOR | BPF_FETCH, 0x011, 0x001),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_2, BPF_REG_3, 0x010, BPF_XOR | BPF_FETCH, 0x011, 0x001),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_4, BPF_REG_5, 0x010, BPF_XOR | BPF_FETCH, 0x011, 0x001),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_9, BPF_REG_8, 0x010, BPF_XOR | BPF_FETCH, 0x011, 0x001),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_1, BPF_REG_2, 0x010, BPF_XCHG, 0x011, 0x011),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_0, BPF_REG_1, 0x010, BPF_XCHG, 0x011, 0x011),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_1, BPF_REG_0, 0x010, BPF_XCHG, 0x011, 0x011),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_2, BPF_REG_3, 0x010, BPF_XCHG, 0x011, 0x011),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_4, BPF_REG_5, 0x010, BPF_XCHG, 0x011, 0x011),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_9, BPF_REG_8, 0x010, BPF_XCHG, 0x011, 0x011),
+#undef __ATOMIC_FETCH_OP_TEST
diff --git a/tools/testing/selftests/bpf/verifier/atomic_invalid.c b/tools/testing/selftests/bpf/verifier/atomic_invalid.c
new file mode 100644
index 000000000000..39272720b2f6
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/atomic_invalid.c
@@ -0,0 +1,25 @@
+#define __INVALID_ATOMIC_ACCESS_TEST(op)					\
+	{								\
+		"atomic " #op " access through non-pointer ",			\
+		.insns = {						\
+			BPF_MOV64_IMM(BPF_REG_0, 1),			\
+			BPF_MOV64_IMM(BPF_REG_1, 0),			\
+			BPF_ATOMIC_OP(BPF_DW, op, BPF_REG_1, BPF_REG_0, -8), \
+			BPF_MOV64_IMM(BPF_REG_0, 0),			\
+			BPF_EXIT_INSN(),				\
+		},							\
+		.result = REJECT,					\
+		.errstr = "R1 invalid mem access 'inv'"			\
+	}
+__INVALID_ATOMIC_ACCESS_TEST(BPF_ADD),
+__INVALID_ATOMIC_ACCESS_TEST(BPF_ADD | BPF_FETCH),
+__INVALID_ATOMIC_ACCESS_TEST(BPF_ADD),
+__INVALID_ATOMIC_ACCESS_TEST(BPF_ADD | BPF_FETCH),
+__INVALID_ATOMIC_ACCESS_TEST(BPF_AND),
+__INVALID_ATOMIC_ACCESS_TEST(BPF_AND | BPF_FETCH),
+__INVALID_ATOMIC_ACCESS_TEST(BPF_OR),
+__INVALID_ATOMIC_ACCESS_TEST(BPF_OR | BPF_FETCH),
+__INVALID_ATOMIC_ACCESS_TEST(BPF_XOR),
+__INVALID_ATOMIC_ACCESS_TEST(BPF_XOR | BPF_FETCH),
+__INVALID_ATOMIC_ACCESS_TEST(BPF_XCHG),
+__INVALID_ATOMIC_ACCESS_TEST(BPF_CMPXCHG),
diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c
index 336a749673d1..d7b74eb28333 100644
--- a/tools/testing/selftests/bpf/verifier/calls.c
+++ b/tools/testing/selftests/bpf/verifier/calls.c
@@ -1,4 +1,27 @@
 {
+	"calls: invalid kfunc call not eliminated",
+	.insns = {
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	.result  = REJECT,
+	.errstr = "invalid kernel function call not eliminated in verifier pass",
+},
+{
+	"calls: invalid kfunc call unreachable",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_JMP_IMM(BPF_JGT, BPF_REG_0, 0, 2),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+	.result  = ACCEPT,
+},
+{
 	"calls: basic sanity",
 	.insns = {
 	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
diff --git a/tools/testing/selftests/bpf/verifier/ctx_skb.c b/tools/testing/selftests/bpf/verifier/ctx_skb.c
index 2022c0f2cd75..83cecfbd6739 100644
--- a/tools/testing/selftests/bpf/verifier/ctx_skb.c
+++ b/tools/testing/selftests/bpf/verifier/ctx_skb.c
@@ -502,7 +502,7 @@
 	"check skb->hash byte load permitted",
 	.insns = {
 	BPF_MOV64_IMM(BPF_REG_0, 0),
-#if __BYTE_ORDER == __LITTLE_ENDIAN
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
 	BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
 		    offsetof(struct __sk_buff, hash)),
 #else
@@ -537,7 +537,7 @@
 	"check skb->hash byte load permitted 3",
 	.insns = {
 	BPF_MOV64_IMM(BPF_REG_0, 0),
-#if __BYTE_ORDER == __LITTLE_ENDIAN
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
 	BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
 		    offsetof(struct __sk_buff, hash) + 3),
 #else
@@ -646,7 +646,7 @@
 	"check skb->hash half load permitted",
 	.insns = {
 	BPF_MOV64_IMM(BPF_REG_0, 0),
-#if __BYTE_ORDER == __LITTLE_ENDIAN
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
 	BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
 		    offsetof(struct __sk_buff, hash)),
 #else
@@ -661,7 +661,7 @@
 	"check skb->hash half load permitted 2",
 	.insns = {
 	BPF_MOV64_IMM(BPF_REG_0, 0),
-#if __BYTE_ORDER == __LITTLE_ENDIAN
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
 	BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
 		    offsetof(struct __sk_buff, hash) + 2),
 #else
@@ -676,7 +676,7 @@
 	"check skb->hash half load not permitted, unaligned 1",
 	.insns = {
 	BPF_MOV64_IMM(BPF_REG_0, 0),
-#if __BYTE_ORDER == __LITTLE_ENDIAN
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
 	BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
 		    offsetof(struct __sk_buff, hash) + 1),
 #else
@@ -693,7 +693,7 @@
 	"check skb->hash half load not permitted, unaligned 3",
 	.insns = {
 	BPF_MOV64_IMM(BPF_REG_0, 0),
-#if __BYTE_ORDER == __LITTLE_ENDIAN
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
 	BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
 		    offsetof(struct __sk_buff, hash) + 3),
 #else
@@ -951,7 +951,7 @@
 	"check skb->data half load not permitted",
 	.insns = {
 	BPF_MOV64_IMM(BPF_REG_0, 0),
-#if __BYTE_ORDER == __LITTLE_ENDIAN
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
 	BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
 		    offsetof(struct __sk_buff, data)),
 #else
@@ -1058,6 +1058,66 @@
 	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 },
 {
+	"padding after gso_size is not accessible",
+	.insns = {
+	BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+		    offsetofend(struct __sk_buff, gso_size)),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.result = REJECT,
+	.result_unpriv = REJECT,
+	.errstr = "invalid bpf_context access off=180 size=4",
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+},
+{
+	"read hwtstamp from CGROUP_SKB",
+	.insns = {
+	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+		    offsetof(struct __sk_buff, hwtstamp)),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.result = ACCEPT,
+	.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+},
+{
+	"read hwtstamp from CGROUP_SKB",
+	.insns = {
+	BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1,
+		    offsetof(struct __sk_buff, hwtstamp)),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.result = ACCEPT,
+	.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+},
+{
+	"write hwtstamp from CGROUP_SKB",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
+		    offsetof(struct __sk_buff, hwtstamp)),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.result = REJECT,
+	.result_unpriv = REJECT,
+	.errstr = "invalid bpf_context access off=184 size=8",
+	.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+},
+{
+	"read hwtstamp from CLS",
+	.insns = {
+	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+		    offsetof(struct __sk_buff, hwtstamp)),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.result = ACCEPT,
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+},
+{
 	"check wire_len is not readable by sockets",
 	.insns = {
 		BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
diff --git a/tools/testing/selftests/bpf/verifier/jit.c b/tools/testing/selftests/bpf/verifier/jit.c
index df215e004566..79021c30e51e 100644
--- a/tools/testing/selftests/bpf/verifier/jit.c
+++ b/tools/testing/selftests/bpf/verifier/jit.c
@@ -62,6 +62,11 @@
 	BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_2, 2),
 	BPF_MOV64_IMM(BPF_REG_0, 1),
 	BPF_EXIT_INSN(),
+	BPF_LD_IMM64(BPF_REG_3, 0xfefefeULL),
+	BPF_ALU64_IMM(BPF_MUL, BPF_REG_3, 0xefefef),
+	BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_2, 2),
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_EXIT_INSN(),
 	BPF_MOV32_REG(BPF_REG_2, BPF_REG_2),
 	BPF_LD_IMM64(BPF_REG_0, 0xfefefeULL),
 	BPF_ALU32_REG(BPF_MUL, BPF_REG_0, BPF_REG_1),
@@ -73,11 +78,69 @@
 	BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_2, 2),
 	BPF_MOV64_IMM(BPF_REG_0, 1),
 	BPF_EXIT_INSN(),
+	BPF_LD_IMM64(BPF_REG_3, 0xfefefeULL),
+	BPF_ALU32_IMM(BPF_MUL, BPF_REG_3, 0xefefef),
+	BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_2, 2),
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_EXIT_INSN(),
+	BPF_LD_IMM64(BPF_REG_0, 0xfefefeULL),
+	BPF_LD_IMM64(BPF_REG_2, 0x2ad4d4aaULL),
+	BPF_ALU32_IMM(BPF_MUL, BPF_REG_0, 0x2b),
+	BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_2, 2),
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_EXIT_INSN(),
 	BPF_LD_IMM64(BPF_REG_0, 0x952a7bbcULL),
 	BPF_LD_IMM64(BPF_REG_1, 0xfefefeULL),
-	BPF_LD_IMM64(BPF_REG_2, 0xeeff0d413122ULL),
-	BPF_ALU32_REG(BPF_MUL, BPF_REG_2, BPF_REG_1),
-	BPF_JMP_REG(BPF_JEQ, BPF_REG_2, BPF_REG_0, 2),
+	BPF_LD_IMM64(BPF_REG_5, 0xeeff0d413122ULL),
+	BPF_ALU32_REG(BPF_MUL, BPF_REG_5, BPF_REG_1),
+	BPF_JMP_REG(BPF_JEQ, BPF_REG_5, BPF_REG_0, 2),
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_EXIT_INSN(),
+	BPF_MOV64_IMM(BPF_REG_0, 2),
+	BPF_EXIT_INSN(),
+	},
+	.result = ACCEPT,
+	.retval = 2,
+},
+{
+	"jit: various div tests",
+	.insns = {
+	BPF_LD_IMM64(BPF_REG_2, 0xefeffeULL),
+	BPF_LD_IMM64(BPF_REG_0, 0xeeff0d413122ULL),
+	BPF_LD_IMM64(BPF_REG_1, 0xfefeeeULL),
+	BPF_ALU64_REG(BPF_DIV, BPF_REG_0, BPF_REG_1),
+	BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_2, 2),
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_EXIT_INSN(),
+	BPF_LD_IMM64(BPF_REG_3, 0xeeff0d413122ULL),
+	BPF_ALU64_IMM(BPF_DIV, BPF_REG_3, 0xfefeeeULL),
+	BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_2, 2),
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_EXIT_INSN(),
+	BPF_LD_IMM64(BPF_REG_2, 0xaa93ULL),
+	BPF_ALU64_IMM(BPF_MOD, BPF_REG_1, 0xbeefULL),
+	BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 2),
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_EXIT_INSN(),
+	BPF_LD_IMM64(BPF_REG_1, 0xfefeeeULL),
+	BPF_LD_IMM64(BPF_REG_3, 0xbeefULL),
+	BPF_ALU64_REG(BPF_MOD, BPF_REG_1, BPF_REG_3),
+	BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 2),
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_EXIT_INSN(),
+	BPF_LD_IMM64(BPF_REG_2, 0x5ee1dULL),
+	BPF_LD_IMM64(BPF_REG_1, 0xfefeeeULL),
+	BPF_LD_IMM64(BPF_REG_3, 0x2bULL),
+	BPF_ALU32_REG(BPF_DIV, BPF_REG_1, BPF_REG_3),
+	BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 2),
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_EXIT_INSN(),
+	BPF_ALU32_REG(BPF_DIV, BPF_REG_1, BPF_REG_1),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 1, 2),
+	BPF_MOV64_IMM(BPF_REG_0, 1),
+	BPF_EXIT_INSN(),
+	BPF_ALU64_REG(BPF_MOD, BPF_REG_2, BPF_REG_2),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_2, 0, 2),
 	BPF_MOV64_IMM(BPF_REG_0, 1),
 	BPF_EXIT_INSN(),
 	BPF_MOV64_IMM(BPF_REG_0, 2),
diff --git a/tools/testing/selftests/bpf/verifier/lwt.c b/tools/testing/selftests/bpf/verifier/lwt.c
index 2cab6a3966bb..5c8944d0b091 100644
--- a/tools/testing/selftests/bpf/verifier/lwt.c
+++ b/tools/testing/selftests/bpf/verifier/lwt.c
@@ -174,7 +174,7 @@
 	"check skb->tc_classid half load not permitted for lwt prog",
 	.insns = {
 	BPF_MOV64_IMM(BPF_REG_0, 0),
-#if __BYTE_ORDER == __LITTLE_ENDIAN
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
 	BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
 		    offsetof(struct __sk_buff, tc_classid)),
 #else
diff --git a/tools/testing/selftests/bpf/verifier/perf_event_sample_period.c b/tools/testing/selftests/bpf/verifier/perf_event_sample_period.c
index 471c1a5950d8..d8a9b1a1f9a2 100644
--- a/tools/testing/selftests/bpf/verifier/perf_event_sample_period.c
+++ b/tools/testing/selftests/bpf/verifier/perf_event_sample_period.c
@@ -2,7 +2,7 @@
 	"check bpf_perf_event_data->sample_period byte load permitted",
 	.insns = {
 	BPF_MOV64_IMM(BPF_REG_0, 0),
-#if __BYTE_ORDER == __LITTLE_ENDIAN
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
 	BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
 		    offsetof(struct bpf_perf_event_data, sample_period)),
 #else
@@ -18,7 +18,7 @@
 	"check bpf_perf_event_data->sample_period half load permitted",
 	.insns = {
 	BPF_MOV64_IMM(BPF_REG_0, 0),
-#if __BYTE_ORDER == __LITTLE_ENDIAN
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
 	BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
 		    offsetof(struct bpf_perf_event_data, sample_period)),
 #else
@@ -34,7 +34,7 @@
 	"check bpf_perf_event_data->sample_period word load permitted",
 	.insns = {
 	BPF_MOV64_IMM(BPF_REG_0, 0),
-#if __BYTE_ORDER == __LITTLE_ENDIAN
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
 	BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
 		    offsetof(struct bpf_perf_event_data, sample_period)),
 #else
diff --git a/tools/testing/selftests/bpf/verifier/spill_fill.c b/tools/testing/selftests/bpf/verifier/spill_fill.c
index 0b943897aaf6..c9991c3f3bd2 100644
--- a/tools/testing/selftests/bpf/verifier/spill_fill.c
+++ b/tools/testing/selftests/bpf/verifier/spill_fill.c
@@ -104,3 +104,164 @@
 	.result = ACCEPT,
 	.retval = POINTER_VALUE,
 },
+{
+	"Spill and refill a u32 const scalar.  Offset to skb->data",
+	.insns = {
+	BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+		    offsetof(struct __sk_buff, data)),
+	BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+		    offsetof(struct __sk_buff, data_end)),
+	/* r4 = 20 */
+	BPF_MOV32_IMM(BPF_REG_4, 20),
+	/* *(u32 *)(r10 -8) = r4 */
+	BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_4, -8),
+	/* r4 = *(u32 *)(r10 -8) */
+	BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_10, -8),
+	/* r0 = r2 */
+	BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+	/* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=inv20 */
+	BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_4),
+	/* if (r0 > r3) R0=pkt,off=20 R2=pkt R3=pkt_end R4=inv20 */
+	BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+	/* r0 = *(u32 *)r2 R0=pkt,off=20,r=20 R2=pkt,r=20 R3=pkt_end R4=inv20 */
+	BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 0),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.result = ACCEPT,
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+},
+{
+	"Spill a u32 const, refill from another half of the uninit u32 from the stack",
+	.insns = {
+	/* r4 = 20 */
+	BPF_MOV32_IMM(BPF_REG_4, 20),
+	/* *(u32 *)(r10 -8) = r4 */
+	BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_4, -8),
+	/* r4 = *(u32 *)(r10 -4) fp-8=????rrrr*/
+	BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_10, -4),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.result = REJECT,
+	.errstr = "invalid read from stack off -4+0 size 4",
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+},
+{
+	"Spill a u32 const scalar.  Refill as u16.  Offset to skb->data",
+	.insns = {
+	BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+		    offsetof(struct __sk_buff, data)),
+	BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+		    offsetof(struct __sk_buff, data_end)),
+	/* r4 = 20 */
+	BPF_MOV32_IMM(BPF_REG_4, 20),
+	/* *(u32 *)(r10 -8) = r4 */
+	BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_4, -8),
+	/* r4 = *(u16 *)(r10 -8) */
+	BPF_LDX_MEM(BPF_H, BPF_REG_4, BPF_REG_10, -8),
+	/* r0 = r2 */
+	BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+	/* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=inv,umax=65535 */
+	BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_4),
+	/* if (r0 > r3) R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=inv,umax=65535 */
+	BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+	/* r0 = *(u32 *)r2 R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=inv20 */
+	BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 0),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.result = REJECT,
+	.errstr = "invalid access to packet",
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+},
+{
+	"Spill a u32 const scalar.  Refill as u16 from fp-6.  Offset to skb->data",
+	.insns = {
+	BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+		    offsetof(struct __sk_buff, data)),
+	BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+		    offsetof(struct __sk_buff, data_end)),
+	/* r4 = 20 */
+	BPF_MOV32_IMM(BPF_REG_4, 20),
+	/* *(u32 *)(r10 -8) = r4 */
+	BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_4, -8),
+	/* r4 = *(u16 *)(r10 -6) */
+	BPF_LDX_MEM(BPF_H, BPF_REG_4, BPF_REG_10, -6),
+	/* r0 = r2 */
+	BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+	/* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=inv,umax=65535 */
+	BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_4),
+	/* if (r0 > r3) R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=inv,umax=65535 */
+	BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+	/* r0 = *(u32 *)r2 R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=inv20 */
+	BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 0),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.result = REJECT,
+	.errstr = "invalid access to packet",
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+},
+{
+	"Spill and refill a u32 const scalar at non 8byte aligned stack addr.  Offset to skb->data",
+	.insns = {
+	BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+		    offsetof(struct __sk_buff, data)),
+	BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+		    offsetof(struct __sk_buff, data_end)),
+	/* r4 = 20 */
+	BPF_MOV32_IMM(BPF_REG_4, 20),
+	/* *(u32 *)(r10 -8) = r4 */
+	BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_4, -8),
+	/* *(u32 *)(r10 -4) = r4 */
+	BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_4, -4),
+	/* r4 = *(u32 *)(r10 -4),  */
+	BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_10, -4),
+	/* r0 = r2 */
+	BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+	/* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=inv,umax=U32_MAX */
+	BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_4),
+	/* if (r0 > r3) R0=pkt,umax=U32_MAX R2=pkt R3=pkt_end R4=inv */
+	BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
+	/* r0 = *(u32 *)r2 R0=pkt,umax=U32_MAX R2=pkt R3=pkt_end R4=inv */
+	BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 0),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.result = REJECT,
+	.errstr = "invalid access to packet",
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+},
+{
+	"Spill and refill a umax=40 bounded scalar.  Offset to skb->data",
+	.insns = {
+	BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
+		    offsetof(struct __sk_buff, data)),
+	BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
+		    offsetof(struct __sk_buff, data_end)),
+	BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_1,
+		    offsetof(struct __sk_buff, tstamp)),
+	BPF_JMP_IMM(BPF_JLE, BPF_REG_4, 40, 2),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	/* *(u32 *)(r10 -8) = r4 R4=inv,umax=40 */
+	BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_4, -8),
+	/* r4 = (*u32 *)(r10 - 8) */
+	BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_10, -8),
+	/* r2 += r4 R2=pkt R4=inv,umax=40 */
+	BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_4),
+	/* r0 = r2 R2=pkt,umax=40 R4=inv,umax=40 */
+	BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+	/* r2 += 20 R0=pkt,umax=40 R2=pkt,umax=40 */
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 20),
+	/* if (r2 > r3) R0=pkt,umax=40 R2=pkt,off=20,umax=40 */
+	BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_3, 1),
+	/* r0 = *(u32 *)r0 R0=pkt,r=20,umax=40 R2=pkt,off=20,r=20,umax=40 */
+	BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.result = ACCEPT,
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+},
diff --git a/tools/testing/selftests/bpf/vmtest.sh b/tools/testing/selftests/bpf/vmtest.sh
index 8889b3f55236..027198768fad 100755
--- a/tools/testing/selftests/bpf/vmtest.sh
+++ b/tools/testing/selftests/bpf/vmtest.sh
@@ -224,10 +224,10 @@ EOF
 		-nodefaults \
 		-display none \
 		-serial mon:stdio \
-		-cpu kvm64 \
+		-cpu host \
 		-enable-kvm \
-		-smp 4 \
-		-m 2G \
+		-smp 8 \
+		-m 4G \
 		-drive file="${rootfs_img}",format=raw,index=1,media=disk,if=virtio,cache=none \
 		-kernel "${kernel_bzimage}" \
 		-append "root=/dev/vda rw console=ttyS0,115200"
diff --git a/tools/testing/selftests/bpf/xdping.c b/tools/testing/selftests/bpf/xdping.c
index 842d9155d36c..30f12637f4e4 100644
--- a/tools/testing/selftests/bpf/xdping.c
+++ b/tools/testing/selftests/bpf/xdping.c
@@ -178,9 +178,8 @@ int main(int argc, char **argv)
 		return 1;
 	}
 
-	main_prog = bpf_object__find_program_by_title(obj,
-						      server ? "xdpserver" :
-							       "xdpclient");
+	main_prog = bpf_object__find_program_by_name(obj,
+						     server ? "xdping_server" : "xdping_client");
 	if (main_prog)
 		prog_fd = bpf_program__fd(main_prog);
 	if (!main_prog || prog_fd < 0) {
@@ -188,7 +187,7 @@ int main(int argc, char **argv)
 		return 1;
 	}
 
-	map = bpf_map__next(NULL, obj);
+	map = bpf_object__next_map(obj, NULL);
 	if (map)
 		map_fd = bpf_map__fd(map);
 	if (!map || map_fd < 0) {
diff --git a/tools/testing/selftests/bpf/xdpxceiver.c b/tools/testing/selftests/bpf/xdpxceiver.c
index f53ce2683f8d..6c7cf8aadc79 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.c
+++ b/tools/testing/selftests/bpf/xdpxceiver.c
@@ -19,7 +19,7 @@
  * Virtual Ethernet interfaces.
  *
  * For each mode, the following tests are run:
- *    a. nopoll - soft-irq processing
+ *    a. nopoll - soft-irq processing in run-to-completion mode
  *    b. poll - using poll() syscall
  *    c. Socket Teardown
  *       Create a Tx and a Rx socket, Tx from one socket, Rx on another. Destroy
@@ -45,6 +45,10 @@
  *       Configure sockets at indexes 0 and 1, run a traffic on queue ids 0,
  *       then remove xsk sockets from queue 0 on both veth interfaces and
  *       finally run a traffic on queues ids 1
+ *    g. unaligned mode
+ *    h. tests for invalid and corner case Tx descriptors so that the correct ones
+ *       are discarded and let through, respectively.
+ *    i. 2K frame size tests
  *
  * Total tests: 12
  *
@@ -112,13 +116,10 @@ static void __exit_with_error(int error, const char *file, const char *func, int
 
 #define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, __LINE__)
 
-#define print_ksft_result(void)\
-	(ksft_test_result_pass("PASS: %s %s %s%s%s%s\n", configured_mode ? "DRV" : "SKB",\
-			       test_type == TEST_TYPE_POLL ? "POLL" : "NOPOLL",\
-			       test_type == TEST_TYPE_TEARDOWN ? "Socket Teardown" : "",\
-			       test_type == TEST_TYPE_BIDI ? "Bi-directional Sockets" : "",\
-			       test_type == TEST_TYPE_STATS ? "Stats" : "",\
-			       test_type == TEST_TYPE_BPF_RES ? "BPF RES" : ""))
+#define mode_string(test) (test)->ifobj_tx->xdp_flags & XDP_FLAGS_SKB_MODE ? "SKB" : "DRV"
+
+#define print_ksft_result(test)						\
+	(ksft_test_result_pass("PASS: %s %s\n", mode_string(test), (test)->name))
 
 static void memset32_htonl(void *dest, u32 val, u32 size)
 {
@@ -235,80 +236,46 @@ static void gen_udp_csum(struct udphdr *udp_hdr, struct iphdr *ip_hdr)
 	    udp_csum(ip_hdr->saddr, ip_hdr->daddr, UDP_PKT_SIZE, IPPROTO_UDP, (u16 *)udp_hdr);
 }
 
-static void xsk_configure_umem(struct ifobject *data, void *buffer, u64 size, int idx)
+static int xsk_configure_umem(struct xsk_umem_info *umem, void *buffer, u64 size)
 {
 	struct xsk_umem_config cfg = {
 		.fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
 		.comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
-		.frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE,
-		.frame_headroom = frame_headroom,
+		.frame_size = umem->frame_size,
+		.frame_headroom = umem->frame_headroom,
 		.flags = XSK_UMEM__DEFAULT_FLAGS
 	};
-	struct xsk_umem_info *umem;
 	int ret;
 
-	umem = calloc(1, sizeof(struct xsk_umem_info));
-	if (!umem)
-		exit_with_error(errno);
+	if (umem->unaligned_mode)
+		cfg.flags |= XDP_UMEM_UNALIGNED_CHUNK_FLAG;
 
 	ret = xsk_umem__create(&umem->umem, buffer, size,
 			       &umem->fq, &umem->cq, &cfg);
 	if (ret)
-		exit_with_error(-ret);
+		return ret;
 
 	umem->buffer = buffer;
-
-	data->umem_arr[idx] = umem;
-}
-
-static void xsk_populate_fill_ring(struct xsk_umem_info *umem)
-{
-	int ret, i;
-	u32 idx = 0;
-
-	ret = xsk_ring_prod__reserve(&umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS, &idx);
-	if (ret != XSK_RING_PROD__DEFAULT_NUM_DESCS)
-		exit_with_error(-ret);
-	for (i = 0; i < XSK_RING_PROD__DEFAULT_NUM_DESCS; i++)
-		*xsk_ring_prod__fill_addr(&umem->fq, idx++) = i * XSK_UMEM__DEFAULT_FRAME_SIZE;
-	xsk_ring_prod__submit(&umem->fq, XSK_RING_PROD__DEFAULT_NUM_DESCS);
+	return 0;
 }
 
-static int xsk_configure_socket(struct ifobject *ifobject, int idx)
+static int xsk_configure_socket(struct xsk_socket_info *xsk, struct xsk_umem_info *umem,
+				struct ifobject *ifobject, u32 qid)
 {
 	struct xsk_socket_config cfg;
-	struct xsk_socket_info *xsk;
 	struct xsk_ring_cons *rxr;
 	struct xsk_ring_prod *txr;
-	int ret;
-
-	xsk = calloc(1, sizeof(struct xsk_socket_info));
-	if (!xsk)
-		exit_with_error(errno);
 
-	xsk->umem = ifobject->umem;
-	cfg.rx_size = rxqsize;
+	xsk->umem = umem;
+	cfg.rx_size = xsk->rxqsize;
 	cfg.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
 	cfg.libbpf_flags = 0;
-	cfg.xdp_flags = xdp_flags;
-	cfg.bind_flags = xdp_bind_flags;
-
-	if (test_type != TEST_TYPE_BIDI) {
-		rxr = (ifobject->fv.vector == rx) ? &xsk->rx : NULL;
-		txr = (ifobject->fv.vector == tx) ? &xsk->tx : NULL;
-	} else {
-		rxr = &xsk->rx;
-		txr = &xsk->tx;
-	}
-
-	ret = xsk_socket__create(&xsk->xsk, ifobject->ifname, idx,
-				 ifobject->umem->umem, rxr, txr, &cfg);
-	if (ret)
-		return 1;
+	cfg.xdp_flags = ifobject->xdp_flags;
+	cfg.bind_flags = ifobject->bind_flags;
 
-	ifobject->xsk_arr[idx] = xsk;
-
-	return 0;
+	txr = ifobject->tx_on ? &xsk->tx : NULL;
+	rxr = ifobject->rx_on ? &xsk->rx : NULL;
+	return xsk_socket__create(&xsk->xsk, ifobject->ifname, qid, umem->umem, rxr, txr, &cfg);
 }
 
 static struct option long_options[] = {
@@ -354,45 +321,44 @@ static int switch_namespace(const char *nsname)
 	return nsfd;
 }
 
-static int validate_interfaces(void)
+static bool validate_interface(struct ifobject *ifobj)
 {
-	bool ret = true;
-
-	for (int i = 0; i < MAX_INTERFACES; i++) {
-		if (!strcmp(ifdict[i]->ifname, "")) {
-			ret = false;
-			ksft_test_result_fail("ERROR: interfaces: -i <int>,<ns> -i <int>,<ns>.");
-		}
-	}
-	return ret;
+	if (!strcmp(ifobj->ifname, ""))
+		return false;
+	return true;
 }
 
-static void parse_command_line(int argc, char **argv)
+static void parse_command_line(struct ifobject *ifobj_tx, struct ifobject *ifobj_rx, int argc,
+			       char **argv)
 {
-	int option_index, interface_index = 0, c;
+	struct ifobject *ifobj;
+	u32 interface_nb = 0;
+	int option_index, c;
 
 	opterr = 0;
 
 	for (;;) {
-		c = getopt_long(argc, argv, "i:Dv", long_options, &option_index);
+		char *sptr, *token;
 
+		c = getopt_long(argc, argv, "i:Dv", long_options, &option_index);
 		if (c == -1)
 			break;
 
 		switch (c) {
 		case 'i':
-			if (interface_index == MAX_INTERFACES)
+			if (interface_nb == 0)
+				ifobj = ifobj_tx;
+			else if (interface_nb == 1)
+				ifobj = ifobj_rx;
+			else
 				break;
-			char *sptr, *token;
 
 			sptr = strndupa(optarg, strlen(optarg));
-			memcpy(ifdict[interface_index]->ifname,
-			       strsep(&sptr, ","), MAX_INTERFACE_NAME_CHARS);
+			memcpy(ifobj->ifname, strsep(&sptr, ","), MAX_INTERFACE_NAME_CHARS);
 			token = strsep(&sptr, ",");
 			if (token)
-				memcpy(ifdict[interface_index]->nsname, token,
-				       MAX_INTERFACES_NAMESPACE_CHARS);
-			interface_index++;
+				memcpy(ifobj->nsname, token, MAX_INTERFACES_NAMESPACE_CHARS);
+			interface_nb++;
 			break;
 		case 'D':
 			opt_pkt_dump = true;
@@ -405,11 +371,85 @@ static void parse_command_line(int argc, char **argv)
 			ksft_exit_xfail();
 		}
 	}
+}
 
-	if (!validate_interfaces()) {
-		usage(basename(argv[0]));
-		ksft_exit_xfail();
+static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
+			     struct ifobject *ifobj_rx)
+{
+	u32 i, j;
+
+	for (i = 0; i < MAX_INTERFACES; i++) {
+		struct ifobject *ifobj = i ? ifobj_rx : ifobj_tx;
+
+		ifobj->umem = &ifobj->umem_arr[0];
+		ifobj->xsk = &ifobj->xsk_arr[0];
+		ifobj->use_poll = false;
+		ifobj->pacing_on = true;
+		ifobj->pkt_stream = test->pkt_stream_default;
+
+		if (i == 0) {
+			ifobj->rx_on = false;
+			ifobj->tx_on = true;
+		} else {
+			ifobj->rx_on = true;
+			ifobj->tx_on = false;
+		}
+
+		for (j = 0; j < MAX_SOCKETS; j++) {
+			memset(&ifobj->umem_arr[j], 0, sizeof(ifobj->umem_arr[j]));
+			memset(&ifobj->xsk_arr[j], 0, sizeof(ifobj->xsk_arr[j]));
+			ifobj->umem_arr[j].num_frames = DEFAULT_UMEM_BUFFERS;
+			ifobj->umem_arr[j].frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
+			ifobj->xsk_arr[j].rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS;
+		}
+	}
+
+	test->ifobj_tx = ifobj_tx;
+	test->ifobj_rx = ifobj_rx;
+	test->current_step = 0;
+	test->total_steps = 1;
+	test->nb_sockets = 1;
+}
+
+static void test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
+			   struct ifobject *ifobj_rx, enum test_mode mode)
+{
+	struct pkt_stream *pkt_stream;
+	u32 i;
+
+	pkt_stream = test->pkt_stream_default;
+	memset(test, 0, sizeof(*test));
+	test->pkt_stream_default = pkt_stream;
+
+	for (i = 0; i < MAX_INTERFACES; i++) {
+		struct ifobject *ifobj = i ? ifobj_rx : ifobj_tx;
+
+		ifobj->xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
+		if (mode == TEST_MODE_SKB)
+			ifobj->xdp_flags |= XDP_FLAGS_SKB_MODE;
+		else
+			ifobj->xdp_flags |= XDP_FLAGS_DRV_MODE;
+
+		ifobj->bind_flags = XDP_USE_NEED_WAKEUP | XDP_COPY;
 	}
+
+	__test_spec_init(test, ifobj_tx, ifobj_rx);
+}
+
+static void test_spec_reset(struct test_spec *test)
+{
+	__test_spec_init(test, test->ifobj_tx, test->ifobj_rx);
+}
+
+static void test_spec_set_name(struct test_spec *test, const char *name)
+{
+	strncpy(test->name, name, MAX_TEST_NAME_SIZE);
+}
+
+static void pkt_stream_reset(struct pkt_stream *pkt_stream)
+{
+	if (pkt_stream)
+		pkt_stream->rx_pkt_nb = 0;
 }
 
 static struct pkt *pkt_stream_get_pkt(struct pkt_stream *pkt_stream, u32 pkt_nb)
@@ -420,29 +460,104 @@ static struct pkt *pkt_stream_get_pkt(struct pkt_stream *pkt_stream, u32 pkt_nb)
 	return &pkt_stream->pkts[pkt_nb];
 }
 
-static struct pkt_stream *pkt_stream_generate(u32 nb_pkts, u32 pkt_len)
+static struct pkt *pkt_stream_get_next_rx_pkt(struct pkt_stream *pkt_stream)
+{
+	while (pkt_stream->rx_pkt_nb < pkt_stream->nb_pkts) {
+		if (pkt_stream->pkts[pkt_stream->rx_pkt_nb].valid)
+			return &pkt_stream->pkts[pkt_stream->rx_pkt_nb++];
+		pkt_stream->rx_pkt_nb++;
+	}
+	return NULL;
+}
+
+static void pkt_stream_delete(struct pkt_stream *pkt_stream)
+{
+	free(pkt_stream->pkts);
+	free(pkt_stream);
+}
+
+static void pkt_stream_restore_default(struct test_spec *test)
+{
+	if (test->ifobj_tx->pkt_stream != test->pkt_stream_default) {
+		pkt_stream_delete(test->ifobj_tx->pkt_stream);
+		test->ifobj_tx->pkt_stream = test->pkt_stream_default;
+	}
+	test->ifobj_rx->pkt_stream = test->pkt_stream_default;
+}
+
+static struct pkt_stream *__pkt_stream_alloc(u32 nb_pkts)
 {
 	struct pkt_stream *pkt_stream;
-	u32 i;
 
-	pkt_stream = malloc(sizeof(*pkt_stream));
+	pkt_stream = calloc(1, sizeof(*pkt_stream));
 	if (!pkt_stream)
-		exit_with_error(ENOMEM);
+		return NULL;
 
 	pkt_stream->pkts = calloc(nb_pkts, sizeof(*pkt_stream->pkts));
-	if (!pkt_stream->pkts)
+	if (!pkt_stream->pkts) {
+		free(pkt_stream);
+		return NULL;
+	}
+
+	pkt_stream->nb_pkts = nb_pkts;
+	return pkt_stream;
+}
+
+static struct pkt_stream *pkt_stream_generate(struct xsk_umem_info *umem, u32 nb_pkts, u32 pkt_len)
+{
+	struct pkt_stream *pkt_stream;
+	u32 i;
+
+	pkt_stream = __pkt_stream_alloc(nb_pkts);
+	if (!pkt_stream)
 		exit_with_error(ENOMEM);
 
 	pkt_stream->nb_pkts = nb_pkts;
 	for (i = 0; i < nb_pkts; i++) {
-		pkt_stream->pkts[i].addr = (i % num_frames) * XSK_UMEM__DEFAULT_FRAME_SIZE;
+		pkt_stream->pkts[i].addr = (i % umem->num_frames) * umem->frame_size;
 		pkt_stream->pkts[i].len = pkt_len;
 		pkt_stream->pkts[i].payload = i;
+
+		if (pkt_len > umem->frame_size)
+			pkt_stream->pkts[i].valid = false;
+		else
+			pkt_stream->pkts[i].valid = true;
 	}
 
 	return pkt_stream;
 }
 
+static struct pkt_stream *pkt_stream_clone(struct xsk_umem_info *umem,
+					   struct pkt_stream *pkt_stream)
+{
+	return pkt_stream_generate(umem, pkt_stream->nb_pkts, pkt_stream->pkts[0].len);
+}
+
+static void pkt_stream_replace(struct test_spec *test, u32 nb_pkts, u32 pkt_len)
+{
+	struct pkt_stream *pkt_stream;
+
+	pkt_stream = pkt_stream_generate(test->ifobj_tx->umem, nb_pkts, pkt_len);
+	test->ifobj_tx->pkt_stream = pkt_stream;
+	test->ifobj_rx->pkt_stream = pkt_stream;
+}
+
+static void pkt_stream_replace_half(struct test_spec *test, u32 pkt_len, int offset)
+{
+	struct xsk_umem_info *umem = test->ifobj_tx->umem;
+	struct pkt_stream *pkt_stream;
+	u32 i;
+
+	pkt_stream = pkt_stream_clone(umem, test->pkt_stream_default);
+	for (i = 1; i < test->pkt_stream_default->nb_pkts; i += 2) {
+		pkt_stream->pkts[i].addr = (i % umem->num_frames) * umem->frame_size + offset;
+		pkt_stream->pkts[i].len = pkt_len;
+	}
+
+	test->ifobj_tx->pkt_stream = pkt_stream;
+	test->ifobj_rx->pkt_stream = pkt_stream;
+}
+
 static struct pkt *pkt_generate(struct ifobject *ifobject, u32 pkt_nb)
 {
 	struct pkt *pkt = pkt_stream_get_pkt(ifobject->pkt_stream, pkt_nb);
@@ -453,6 +568,8 @@ static struct pkt *pkt_generate(struct ifobject *ifobject, u32 pkt_nb)
 
 	if (!pkt)
 		return NULL;
+	if (!pkt->valid || pkt->len < PKT_SIZE)
+		return pkt;
 
 	data = xsk_umem__get_data(ifobject->umem->buffer, pkt->addr);
 	udp_hdr = (struct udphdr *)(data + sizeof(struct ethhdr) + sizeof(struct iphdr));
@@ -467,6 +584,26 @@ static struct pkt *pkt_generate(struct ifobject *ifobject, u32 pkt_nb)
 	return pkt;
 }
 
+static void pkt_stream_generate_custom(struct test_spec *test, struct pkt *pkts, u32 nb_pkts)
+{
+	struct pkt_stream *pkt_stream;
+	u32 i;
+
+	pkt_stream = __pkt_stream_alloc(nb_pkts);
+	if (!pkt_stream)
+		exit_with_error(ENOMEM);
+
+	test->ifobj_tx->pkt_stream = pkt_stream;
+	test->ifobj_rx->pkt_stream = pkt_stream;
+
+	for (i = 0; i < nb_pkts; i++) {
+		pkt_stream->pkts[i].addr = pkts[i].addr;
+		pkt_stream->pkts[i].len = pkts[i].len;
+		pkt_stream->pkts[i].payload = i;
+		pkt_stream->pkts[i].valid = pkts[i].valid;
+	}
+}
+
 static void pkt_dump(void *pkt, u32 len)
 {
 	char s[INET_ADDRSTRLEN];
@@ -504,9 +641,28 @@ static void pkt_dump(void *pkt, u32 len)
 	fprintf(stdout, "---------------------------------------\n");
 }
 
-static bool is_pkt_valid(struct pkt *pkt, void *buffer, const struct xdp_desc *desc)
+static bool is_offset_correct(struct xsk_umem_info *umem, struct pkt_stream *pkt_stream, u64 addr,
+			      u64 pkt_stream_addr)
 {
-	void *data = xsk_umem__get_data(buffer, desc->addr);
+	u32 headroom = umem->unaligned_mode ? 0 : umem->frame_headroom;
+	u32 offset = addr % umem->frame_size, expected_offset = 0;
+
+	if (!pkt_stream->use_addr_for_fill)
+		pkt_stream_addr = 0;
+
+	expected_offset += (pkt_stream_addr + headroom + XDP_PACKET_HEADROOM) % umem->frame_size;
+
+	if (offset == expected_offset)
+		return true;
+
+	ksft_test_result_fail("ERROR: [%s] expected [%u], got [%u]\n", __func__, expected_offset,
+			      offset);
+	return false;
+}
+
+static bool is_pkt_valid(struct pkt *pkt, void *buffer, u64 addr, u32 len)
+{
+	void *data = xsk_umem__get_data(buffer, addr);
 	struct iphdr *iphdr = (struct iphdr *)(data + sizeof(struct ethhdr));
 
 	if (!pkt) {
@@ -514,19 +670,24 @@ static bool is_pkt_valid(struct pkt *pkt, void *buffer, const struct xdp_desc *d
 		return false;
 	}
 
+	if (len < PKT_SIZE) {
+		/*Do not try to verify packets that are smaller than minimum size. */
+		return true;
+	}
+
+	if (pkt->len != len) {
+		ksft_test_result_fail
+			("ERROR: [%s] expected length [%d], got length [%d]\n",
+			 __func__, pkt->len, len);
+		return false;
+	}
+
 	if (iphdr->version == IP_PKT_VER && iphdr->tos == IP_PKT_TOS) {
 		u32 seqnum = ntohl(*((u32 *)(data + PKT_HDR_SIZE)));
 
-		if (opt_pkt_dump && test_type != TEST_TYPE_STATS)
+		if (opt_pkt_dump)
 			pkt_dump(data, PKT_SIZE);
 
-		if (pkt->len != desc->len) {
-			ksft_test_result_fail
-				("ERROR: [%s] expected length [%d], got length [%d]\n",
-					__func__, pkt->len, desc->len);
-			return false;
-		}
-
 		if (pkt->payload != seqnum) {
 			ksft_test_result_fail
 				("ERROR: [%s] expected seqnum [%d], got seqnum [%d]\n",
@@ -558,14 +719,20 @@ static void complete_pkts(struct xsk_socket_info *xsk, int batch_size)
 	unsigned int rcvd;
 	u32 idx;
 
-	if (!xsk->outstanding_tx)
-		return;
-
 	if (xsk_ring_prod__needs_wakeup(&xsk->tx))
 		kick_tx(xsk);
 
 	rcvd = xsk_ring_cons__peek(&xsk->umem->cq, batch_size, &idx);
 	if (rcvd) {
+		if (rcvd > xsk->outstanding_tx) {
+			u64 addr = *xsk_ring_cons__comp_addr(&xsk->umem->cq, idx + rcvd - 1);
+
+			ksft_test_result_fail("ERROR: [%s] Too many packets completed\n",
+					      __func__);
+			ksft_print_msg("Last completion address: %llx\n", addr);
+			return;
+		}
+
 		xsk_ring_cons__release(&xsk->umem->cq, rcvd);
 		xsk->outstanding_tx -= rcvd;
 	}
@@ -574,15 +741,16 @@ static void complete_pkts(struct xsk_socket_info *xsk, int batch_size)
 static void receive_pkts(struct pkt_stream *pkt_stream, struct xsk_socket_info *xsk,
 			 struct pollfd *fds)
 {
-	u32 idx_rx = 0, idx_fq = 0, rcvd, i, pkt_count = 0;
-	struct pkt *pkt;
+	struct pkt *pkt = pkt_stream_get_next_rx_pkt(pkt_stream);
+	struct xsk_umem_info *umem = xsk->umem;
+	u32 idx_rx = 0, idx_fq = 0, rcvd, i;
+	u32 total = 0;
 	int ret;
 
-	pkt = pkt_stream_get_pkt(pkt_stream, pkt_count++);
 	while (pkt) {
 		rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx);
 		if (!rcvd) {
-			if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) {
+			if (xsk_ring_prod__needs_wakeup(&umem->fq)) {
 				ret = poll(fds, 1, POLL_TMOUT);
 				if (ret < 0)
 					exit_with_error(-ret);
@@ -590,40 +758,58 @@ static void receive_pkts(struct pkt_stream *pkt_stream, struct xsk_socket_info *
 			continue;
 		}
 
-		ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq);
+		ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq);
 		while (ret != rcvd) {
 			if (ret < 0)
 				exit_with_error(-ret);
-			if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) {
+			if (xsk_ring_prod__needs_wakeup(&umem->fq)) {
 				ret = poll(fds, 1, POLL_TMOUT);
 				if (ret < 0)
 					exit_with_error(-ret);
 			}
-			ret = xsk_ring_prod__reserve(&xsk->umem->fq, rcvd, &idx_fq);
+			ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq);
 		}
 
 		for (i = 0; i < rcvd; i++) {
 			const struct xdp_desc *desc = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++);
 			u64 addr = desc->addr, orig;
 
+			if (!pkt) {
+				ksft_test_result_fail("ERROR: [%s] Received too many packets.\n",
+						      __func__);
+				ksft_print_msg("Last packet has addr: %llx len: %u\n",
+					       addr, desc->len);
+				return;
+			}
+
 			orig = xsk_umem__extract_addr(addr);
 			addr = xsk_umem__add_offset_to_addr(addr);
-			if (!is_pkt_valid(pkt, xsk->umem->buffer, desc))
+
+			if (!is_pkt_valid(pkt, umem->buffer, addr, desc->len))
+				return;
+			if (!is_offset_correct(umem, pkt_stream, addr, pkt->addr))
 				return;
 
-			*xsk_ring_prod__fill_addr(&xsk->umem->fq, idx_fq++) = orig;
-			pkt = pkt_stream_get_pkt(pkt_stream, pkt_count++);
+			*xsk_ring_prod__fill_addr(&umem->fq, idx_fq++) = orig;
+			pkt = pkt_stream_get_next_rx_pkt(pkt_stream);
 		}
 
-		xsk_ring_prod__submit(&xsk->umem->fq, rcvd);
+		xsk_ring_prod__submit(&umem->fq, rcvd);
 		xsk_ring_cons__release(&xsk->rx, rcvd);
+
+		pthread_mutex_lock(&pacing_mutex);
+		pkts_in_flight -= rcvd;
+		total += rcvd;
+		if (pkts_in_flight < umem->num_frames)
+			pthread_cond_signal(&pacing_cond);
+		pthread_mutex_unlock(&pacing_mutex);
 	}
 }
 
 static u32 __send_pkts(struct ifobject *ifobject, u32 pkt_nb)
 {
 	struct xsk_socket_info *xsk = ifobject->xsk;
-	u32 i, idx;
+	u32 i, idx, valid_pkts = 0;
 
 	while (xsk_ring_prod__reserve(&xsk->tx, BATCH_SIZE, &idx) < BATCH_SIZE)
 		complete_pkts(xsk, BATCH_SIZE);
@@ -638,15 +824,23 @@ static u32 __send_pkts(struct ifobject *ifobject, u32 pkt_nb)
 		tx_desc->addr = pkt->addr;
 		tx_desc->len = pkt->len;
 		pkt_nb++;
+		if (pkt->valid)
+			valid_pkts++;
 	}
 
-	xsk_ring_prod__submit(&xsk->tx, i);
-	if (stat_test_type != STAT_TEST_TX_INVALID)
-		xsk->outstanding_tx += i;
-	else if (xsk_ring_prod__needs_wakeup(&xsk->tx))
+	pthread_mutex_lock(&pacing_mutex);
+	pkts_in_flight += valid_pkts;
+	if (ifobject->pacing_on && pkts_in_flight >= ifobject->umem->num_frames - BATCH_SIZE) {
 		kick_tx(xsk);
+		pthread_cond_wait(&pacing_cond, &pacing_mutex);
+	}
+	pthread_mutex_unlock(&pacing_mutex);
+
+	xsk_ring_prod__submit(&xsk->tx, i);
+	xsk->outstanding_tx += valid_pkts;
 	complete_pkts(xsk, i);
 
+	usleep(10);
 	return i;
 }
 
@@ -658,29 +852,25 @@ static void wait_for_tx_completion(struct xsk_socket_info *xsk)
 
 static void send_pkts(struct ifobject *ifobject)
 {
-	struct pollfd fds[MAX_SOCKS] = { };
+	struct pollfd fds = { };
 	u32 pkt_cnt = 0;
 
-	fds[0].fd = xsk_socket__fd(ifobject->xsk->xsk);
-	fds[0].events = POLLOUT;
+	fds.fd = xsk_socket__fd(ifobject->xsk->xsk);
+	fds.events = POLLOUT;
 
 	while (pkt_cnt < ifobject->pkt_stream->nb_pkts) {
-		u32 sent;
-
-		if (test_type == TEST_TYPE_POLL) {
+		if (ifobject->use_poll) {
 			int ret;
 
-			ret = poll(fds, 1, POLL_TMOUT);
+			ret = poll(&fds, 1, POLL_TMOUT);
 			if (ret <= 0)
 				continue;
 
-			if (!(fds[0].revents & POLLOUT))
+			if (!(fds.revents & POLLOUT))
 				continue;
 		}
 
-		sent = __send_pkts(ifobject, pkt_cnt);
-		pkt_cnt += sent;
-		usleep(10);
+		pkt_cnt += __send_pkts(ifobject, pkt_cnt);
 	}
 
 	wait_for_tx_completion(ifobject->xsk);
@@ -698,7 +888,7 @@ static bool rx_stats_are_valid(struct ifobject *ifobject)
 	optlen = sizeof(stats);
 	err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, &stats, &optlen);
 	if (err) {
-		ksft_test_result_fail("ERROR: [%s] getsockopt(XDP_STATISTICS) error %u %s\n",
+		ksft_test_result_fail("ERROR Rx: [%s] getsockopt(XDP_STATISTICS) error %u %s\n",
 				      __func__, -err, strerror(-err));
 		return true;
 	}
@@ -739,7 +929,7 @@ static void tx_stats_validate(struct ifobject *ifobject)
 	optlen = sizeof(stats);
 	err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, &stats, &optlen);
 	if (err) {
-		ksft_test_result_fail("ERROR: [%s] getsockopt(XDP_STATISTICS) error %u %s\n",
+		ksft_test_result_fail("ERROR Tx: [%s] getsockopt(XDP_STATISTICS) error %u %s\n",
 				      __func__, -err, strerror(-err));
 		return;
 	}
@@ -751,71 +941,62 @@ static void tx_stats_validate(struct ifobject *ifobject)
 			      __func__, stats.tx_invalid_descs, ifobject->pkt_stream->nb_pkts);
 }
 
-static void thread_common_ops(struct ifobject *ifobject, void *bufs)
+static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject)
 {
-	u64 umem_sz = num_frames * XSK_UMEM__DEFAULT_FRAME_SIZE;
 	int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
-	size_t mmap_sz = umem_sz;
-	int ctr = 0;
-	int ret;
+	u32 i;
 
 	ifobject->ns_fd = switch_namespace(ifobject->nsname);
 
-	if (test_type == TEST_TYPE_BPF_RES)
-		mmap_sz *= 2;
+	if (ifobject->umem->unaligned_mode)
+		mmap_flags |= MAP_HUGETLB;
 
-	bufs = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE, mmap_flags, -1, 0);
-	if (bufs == MAP_FAILED)
-		exit_with_error(errno);
+	for (i = 0; i < test->nb_sockets; i++) {
+		u64 umem_sz = ifobject->umem->num_frames * ifobject->umem->frame_size;
+		u32 ctr = 0;
+		void *bufs;
+		int ret;
 
-	while (ctr++ < SOCK_RECONF_CTR) {
-		xsk_configure_umem(ifobject, bufs, umem_sz, 0);
-		ifobject->umem = ifobject->umem_arr[0];
-		ret = xsk_configure_socket(ifobject, 0);
-		if (!ret)
-			break;
+		bufs = mmap(NULL, umem_sz, PROT_READ | PROT_WRITE, mmap_flags, -1, 0);
+		if (bufs == MAP_FAILED)
+			exit_with_error(errno);
 
-		/* Retry Create Socket if it fails as xsk_socket__create() is asynchronous */
-		usleep(USLEEP_MAX);
-		if (ctr >= SOCK_RECONF_CTR)
+		ret = xsk_configure_umem(&ifobject->umem_arr[i], bufs, umem_sz);
+		if (ret)
 			exit_with_error(-ret);
-	}
 
-	ifobject->umem = ifobject->umem_arr[0];
-	ifobject->xsk = ifobject->xsk_arr[0];
+		while (ctr++ < SOCK_RECONF_CTR) {
+			ret = xsk_configure_socket(&ifobject->xsk_arr[i], &ifobject->umem_arr[i],
+						   ifobject, i);
+			if (!ret)
+				break;
 
-	if (test_type == TEST_TYPE_BPF_RES) {
-		xsk_configure_umem(ifobject, (u8 *)bufs + umem_sz, umem_sz, 1);
-		ifobject->umem = ifobject->umem_arr[1];
-		ret = xsk_configure_socket(ifobject, 1);
+			/* Retry if it fails as xsk_socket__create() is asynchronous */
+			if (ctr >= SOCK_RECONF_CTR)
+				exit_with_error(-ret);
+			usleep(USLEEP_MAX);
+		}
 	}
 
-	ifobject->umem = ifobject->umem_arr[0];
-	ifobject->xsk = ifobject->xsk_arr[0];
-	print_verbose("Interface [%s] vector [%s]\n",
-		      ifobject->ifname, ifobject->fv.vector == tx ? "Tx" : "Rx");
-}
-
-static bool testapp_is_test_two_stepped(void)
-{
-	return (test_type != TEST_TYPE_BIDI && test_type != TEST_TYPE_BPF_RES) || second_step;
+	ifobject->umem = &ifobject->umem_arr[0];
+	ifobject->xsk = &ifobject->xsk_arr[0];
 }
 
 static void testapp_cleanup_xsk_res(struct ifobject *ifobj)
 {
-	if (testapp_is_test_two_stepped()) {
-		xsk_socket__delete(ifobj->xsk->xsk);
-		(void)xsk_umem__delete(ifobj->umem->umem);
-	}
+	print_verbose("Destroying socket\n");
+	xsk_socket__delete(ifobj->xsk->xsk);
+	munmap(ifobj->umem->buffer, ifobj->umem->num_frames * ifobj->umem->frame_size);
+	xsk_umem__delete(ifobj->umem->umem);
 }
 
 static void *worker_testapp_validate_tx(void *arg)
 {
-	struct ifobject *ifobject = (struct ifobject *)arg;
-	void *bufs = NULL;
+	struct test_spec *test = (struct test_spec *)arg;
+	struct ifobject *ifobject = test->ifobj_tx;
 
-	if (!second_step)
-		thread_common_ops(ifobject, bufs);
+	if (test->current_step == 1)
+		thread_common_ops(test, ifobject);
 
 	print_verbose("Sending %d packets on interface %s\n", ifobject->pkt_stream->nb_pkts,
 		      ifobject->ifname);
@@ -824,24 +1005,55 @@ static void *worker_testapp_validate_tx(void *arg)
 	if (stat_test_type == STAT_TEST_TX_INVALID)
 		tx_stats_validate(ifobject);
 
-	testapp_cleanup_xsk_res(ifobject);
+	if (test->total_steps == test->current_step)
+		testapp_cleanup_xsk_res(ifobject);
 	pthread_exit(NULL);
 }
 
+static void xsk_populate_fill_ring(struct xsk_umem_info *umem, struct pkt_stream *pkt_stream)
+{
+	u32 idx = 0, i, buffers_to_fill;
+	int ret;
+
+	if (umem->num_frames < XSK_RING_PROD__DEFAULT_NUM_DESCS)
+		buffers_to_fill = umem->num_frames;
+	else
+		buffers_to_fill = XSK_RING_PROD__DEFAULT_NUM_DESCS;
+
+	ret = xsk_ring_prod__reserve(&umem->fq, buffers_to_fill, &idx);
+	if (ret != buffers_to_fill)
+		exit_with_error(ENOSPC);
+	for (i = 0; i < buffers_to_fill; i++) {
+		u64 addr;
+
+		if (pkt_stream->use_addr_for_fill) {
+			struct pkt *pkt = pkt_stream_get_pkt(pkt_stream, i);
+
+			if (!pkt)
+				break;
+			addr = pkt->addr;
+		} else {
+			addr = i * umem->frame_size;
+		}
+
+		*xsk_ring_prod__fill_addr(&umem->fq, idx++) = addr;
+	}
+	xsk_ring_prod__submit(&umem->fq, buffers_to_fill);
+}
+
 static void *worker_testapp_validate_rx(void *arg)
 {
-	struct ifobject *ifobject = (struct ifobject *)arg;
-	struct pollfd fds[MAX_SOCKS] = { };
-	void *bufs = NULL;
+	struct test_spec *test = (struct test_spec *)arg;
+	struct ifobject *ifobject = test->ifobj_rx;
+	struct pollfd fds = { };
 
-	if (!second_step)
-		thread_common_ops(ifobject, bufs);
+	if (test->current_step == 1)
+		thread_common_ops(test, ifobject);
 
-	if (stat_test_type != STAT_TEST_RX_FILL_EMPTY)
-		xsk_populate_fill_ring(ifobject->umem);
+	xsk_populate_fill_ring(ifobject->umem, ifobject->pkt_stream);
 
-	fds[0].fd = xsk_socket__fd(ifobject->xsk->xsk);
-	fds[0].events = POLLIN;
+	fds.fd = xsk_socket__fd(ifobject->xsk->xsk);
+	fds.events = POLLIN;
 
 	pthread_barrier_wait(&barr);
 
@@ -849,151 +1061,239 @@ static void *worker_testapp_validate_rx(void *arg)
 		while (!rx_stats_are_valid(ifobject))
 			continue;
 	else
-		receive_pkts(ifobject->pkt_stream, ifobject->xsk, fds);
+		receive_pkts(ifobject->pkt_stream, ifobject->xsk, &fds);
 
-	if (test_type == TEST_TYPE_TEARDOWN)
-		print_verbose("Destroying socket\n");
-
-	testapp_cleanup_xsk_res(ifobject);
+	if (test->total_steps == test->current_step)
+		testapp_cleanup_xsk_res(ifobject);
 	pthread_exit(NULL);
 }
 
-static void testapp_validate(void)
+static void testapp_validate_traffic(struct test_spec *test)
 {
-	bool bidi = test_type == TEST_TYPE_BIDI;
-	bool bpf = test_type == TEST_TYPE_BPF_RES;
-	struct pkt_stream *pkt_stream;
+	struct ifobject *ifobj_tx = test->ifobj_tx;
+	struct ifobject *ifobj_rx = test->ifobj_rx;
+	pthread_t t0, t1;
 
 	if (pthread_barrier_init(&barr, NULL, 2))
 		exit_with_error(errno);
 
-	if (stat_test_type == STAT_TEST_TX_INVALID)
-		pkt_stream = pkt_stream_generate(DEFAULT_PKT_CNT, XSK_UMEM__INVALID_FRAME_SIZE);
-	else
-		pkt_stream = pkt_stream_generate(DEFAULT_PKT_CNT, PKT_SIZE);
-	ifdict_tx->pkt_stream = pkt_stream;
-	ifdict_rx->pkt_stream = pkt_stream;
+	test->current_step++;
+	pkt_stream_reset(ifobj_rx->pkt_stream);
+	pkts_in_flight = 0;
 
 	/*Spawn RX thread */
-	pthread_create(&t0, NULL, ifdict_rx->func_ptr, ifdict_rx);
+	pthread_create(&t0, NULL, ifobj_rx->func_ptr, test);
 
 	pthread_barrier_wait(&barr);
 	if (pthread_barrier_destroy(&barr))
 		exit_with_error(errno);
 
 	/*Spawn TX thread */
-	pthread_create(&t1, NULL, ifdict_tx->func_ptr, ifdict_tx);
+	pthread_create(&t1, NULL, ifobj_tx->func_ptr, test);
 
 	pthread_join(t1, NULL);
 	pthread_join(t0, NULL);
-
-	if (!(test_type == TEST_TYPE_TEARDOWN) && !bidi && !bpf && !(test_type == TEST_TYPE_STATS))
-		print_ksft_result();
 }
 
-static void testapp_teardown(void)
+static void testapp_teardown(struct test_spec *test)
 {
 	int i;
 
+	test_spec_set_name(test, "TEARDOWN");
 	for (i = 0; i < MAX_TEARDOWN_ITER; i++) {
-		print_verbose("Creating socket\n");
-		testapp_validate();
+		testapp_validate_traffic(test);
+		test_spec_reset(test);
 	}
-
-	print_ksft_result();
 }
 
-static void swap_vectors(struct ifobject *ifobj1, struct ifobject *ifobj2)
+static void swap_directions(struct ifobject **ifobj1, struct ifobject **ifobj2)
 {
-	void *(*tmp_func_ptr)(void *) = ifobj1->func_ptr;
-	enum fvector tmp_vector = ifobj1->fv.vector;
+	thread_func_t tmp_func_ptr = (*ifobj1)->func_ptr;
+	struct ifobject *tmp_ifobj = (*ifobj1);
 
-	ifobj1->func_ptr = ifobj2->func_ptr;
-	ifobj1->fv.vector = ifobj2->fv.vector;
+	(*ifobj1)->func_ptr = (*ifobj2)->func_ptr;
+	(*ifobj2)->func_ptr = tmp_func_ptr;
 
-	ifobj2->func_ptr = tmp_func_ptr;
-	ifobj2->fv.vector = tmp_vector;
-
-	ifdict_tx = ifobj1;
-	ifdict_rx = ifobj2;
+	*ifobj1 = *ifobj2;
+	*ifobj2 = tmp_ifobj;
 }
 
-static void testapp_bidi(void)
+static void testapp_bidi(struct test_spec *test)
 {
-	for (int i = 0; i < MAX_BIDI_ITER; i++) {
-		print_verbose("Creating socket\n");
-		testapp_validate();
-		if (!second_step) {
-			print_verbose("Switching Tx/Rx vectors\n");
-			swap_vectors(ifdict[1], ifdict[0]);
-		}
-		second_step = true;
-	}
+	test_spec_set_name(test, "BIDIRECTIONAL");
+	test->ifobj_tx->rx_on = true;
+	test->ifobj_rx->tx_on = true;
+	test->total_steps = 2;
+	testapp_validate_traffic(test);
 
-	swap_vectors(ifdict[0], ifdict[1]);
+	print_verbose("Switching Tx/Rx vectors\n");
+	swap_directions(&test->ifobj_rx, &test->ifobj_tx);
+	testapp_validate_traffic(test);
 
-	print_ksft_result();
+	swap_directions(&test->ifobj_rx, &test->ifobj_tx);
 }
 
-static void swap_xsk_res(void)
+static void swap_xsk_resources(struct ifobject *ifobj_tx, struct ifobject *ifobj_rx)
 {
-	xsk_socket__delete(ifdict_tx->xsk->xsk);
-	xsk_umem__delete(ifdict_tx->umem->umem);
-	xsk_socket__delete(ifdict_rx->xsk->xsk);
-	xsk_umem__delete(ifdict_rx->umem->umem);
-	ifdict_tx->umem = ifdict_tx->umem_arr[1];
-	ifdict_tx->xsk = ifdict_tx->xsk_arr[1];
-	ifdict_rx->umem = ifdict_rx->umem_arr[1];
-	ifdict_rx->xsk = ifdict_rx->xsk_arr[1];
+	xsk_socket__delete(ifobj_tx->xsk->xsk);
+	xsk_umem__delete(ifobj_tx->umem->umem);
+	xsk_socket__delete(ifobj_rx->xsk->xsk);
+	xsk_umem__delete(ifobj_rx->umem->umem);
+	ifobj_tx->umem = &ifobj_tx->umem_arr[1];
+	ifobj_tx->xsk = &ifobj_tx->xsk_arr[1];
+	ifobj_rx->umem = &ifobj_rx->umem_arr[1];
+	ifobj_rx->xsk = &ifobj_rx->xsk_arr[1];
 }
 
-static void testapp_bpf_res(void)
+static void testapp_bpf_res(struct test_spec *test)
 {
-	int i;
+	test_spec_set_name(test, "BPF_RES");
+	test->total_steps = 2;
+	test->nb_sockets = 2;
+	testapp_validate_traffic(test);
 
-	for (i = 0; i < MAX_BPF_ITER; i++) {
-		print_verbose("Creating socket\n");
-		testapp_validate();
-		if (!second_step)
-			swap_xsk_res();
-		second_step = true;
-	}
+	swap_xsk_resources(test->ifobj_tx, test->ifobj_rx);
+	testapp_validate_traffic(test);
+}
 
-	print_ksft_result();
+static void testapp_headroom(struct test_spec *test)
+{
+	test_spec_set_name(test, "UMEM_HEADROOM");
+	test->ifobj_rx->umem->frame_headroom = UMEM_HEADROOM_TEST_SIZE;
+	testapp_validate_traffic(test);
 }
 
-static void testapp_stats(void)
+static void testapp_stats(struct test_spec *test)
 {
-	for (int i = 0; i < STAT_TEST_TYPE_MAX; i++) {
-		stat_test_type = i;
+	int i;
 
-		/* reset defaults */
-		rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS;
-		frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM;
+	for (i = 0; i < STAT_TEST_TYPE_MAX; i++) {
+		test_spec_reset(test);
+		stat_test_type = i;
+		/* No or few packets will be received so cannot pace packets */
+		test->ifobj_tx->pacing_on = false;
 
 		switch (stat_test_type) {
 		case STAT_TEST_RX_DROPPED:
-			frame_headroom = XSK_UMEM__DEFAULT_FRAME_SIZE -
-						XDP_PACKET_HEADROOM - 1;
+			test_spec_set_name(test, "STAT_RX_DROPPED");
+			test->ifobj_rx->umem->frame_headroom = test->ifobj_rx->umem->frame_size -
+				XDP_PACKET_HEADROOM - 1;
+			testapp_validate_traffic(test);
 			break;
 		case STAT_TEST_RX_FULL:
-			rxqsize = RX_FULL_RXQSIZE;
+			test_spec_set_name(test, "STAT_RX_FULL");
+			test->ifobj_rx->xsk->rxqsize = RX_FULL_RXQSIZE;
+			testapp_validate_traffic(test);
 			break;
 		case STAT_TEST_TX_INVALID:
-			continue;
+			test_spec_set_name(test, "STAT_TX_INVALID");
+			pkt_stream_replace(test, DEFAULT_PKT_CNT, XSK_UMEM__INVALID_FRAME_SIZE);
+			testapp_validate_traffic(test);
+
+			pkt_stream_restore_default(test);
+			break;
+		case STAT_TEST_RX_FILL_EMPTY:
+			test_spec_set_name(test, "STAT_RX_FILL_EMPTY");
+			test->ifobj_rx->pkt_stream = pkt_stream_generate(test->ifobj_rx->umem, 0,
+									 MIN_PKT_SIZE);
+			if (!test->ifobj_rx->pkt_stream)
+				exit_with_error(ENOMEM);
+			test->ifobj_rx->pkt_stream->use_addr_for_fill = true;
+			testapp_validate_traffic(test);
+
+			pkt_stream_restore_default(test);
+			break;
 		default:
 			break;
 		}
-		testapp_validate();
 	}
 
-	print_ksft_result();
+	/* To only see the whole stat set being completed unless an individual test fails. */
+	test_spec_set_name(test, "STATS");
+}
+
+/* Simple test */
+static bool hugepages_present(struct ifobject *ifobject)
+{
+	const size_t mmap_sz = 2 * ifobject->umem->num_frames * ifobject->umem->frame_size;
+	void *bufs;
+
+	bufs = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE,
+		    MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE | MAP_HUGETLB, -1, 0);
+	if (bufs == MAP_FAILED)
+		return false;
+
+	munmap(bufs, mmap_sz);
+	return true;
+}
+
+static bool testapp_unaligned(struct test_spec *test)
+{
+	if (!hugepages_present(test->ifobj_tx)) {
+		ksft_test_result_skip("No 2M huge pages present.\n");
+		return false;
+	}
+
+	test_spec_set_name(test, "UNALIGNED_MODE");
+	test->ifobj_tx->umem->unaligned_mode = true;
+	test->ifobj_rx->umem->unaligned_mode = true;
+	/* Let half of the packets straddle a buffer boundrary */
+	pkt_stream_replace_half(test, PKT_SIZE, -PKT_SIZE / 2);
+	test->ifobj_rx->pkt_stream->use_addr_for_fill = true;
+	testapp_validate_traffic(test);
+
+	pkt_stream_restore_default(test);
+	return true;
+}
+
+static void testapp_single_pkt(struct test_spec *test)
+{
+	struct pkt pkts[] = {{0x1000, PKT_SIZE, 0, true}};
+
+	pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts));
+	testapp_validate_traffic(test);
+	pkt_stream_restore_default(test);
 }
 
-static void init_iface(struct ifobject *ifobj, const char *dst_mac,
-		       const char *src_mac, const char *dst_ip,
-		       const char *src_ip, const u16 dst_port,
-		       const u16 src_port, enum fvector vector)
+static void testapp_invalid_desc(struct test_spec *test)
+{
+	struct pkt pkts[] = {
+		/* Zero packet length at address zero allowed */
+		{0, 0, 0, true},
+		/* Zero packet length allowed */
+		{0x1000, 0, 0, true},
+		/* Straddling the start of umem */
+		{-2, PKT_SIZE, 0, false},
+		/* Packet too large */
+		{0x2000, XSK_UMEM__INVALID_FRAME_SIZE, 0, false},
+		/* After umem ends */
+		{UMEM_SIZE, PKT_SIZE, 0, false},
+		/* Straddle the end of umem */
+		{UMEM_SIZE - PKT_SIZE / 2, PKT_SIZE, 0, false},
+		/* Straddle a page boundrary */
+		{0x3000 - PKT_SIZE / 2, PKT_SIZE, 0, false},
+		/* Straddle a 2K boundrary */
+		{0x3800 - PKT_SIZE / 2, PKT_SIZE, 0, true},
+		/* Valid packet for synch so that something is received */
+		{0x4000, PKT_SIZE, 0, true}};
+
+	if (test->ifobj_tx->umem->unaligned_mode) {
+		/* Crossing a page boundrary allowed */
+		pkts[6].valid = true;
+	}
+	if (test->ifobj_tx->umem->frame_size == XSK_UMEM__DEFAULT_FRAME_SIZE / 2) {
+		/* Crossing a 2K frame size boundrary not allowed */
+		pkts[7].valid = false;
+	}
+
+	pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts));
+	testapp_validate_traffic(test);
+	pkt_stream_restore_default(test);
+}
+
+static void init_iface(struct ifobject *ifobj, const char *dst_mac, const char *src_mac,
+		       const char *dst_ip, const char *src_ip, const u16 dst_port,
+		       const u16 src_port, thread_func_t func_ptr)
 {
 	struct in_addr ip;
 
@@ -1009,58 +1309,80 @@ static void init_iface(struct ifobject *ifobj, const char *dst_mac,
 	ifobj->dst_port = dst_port;
 	ifobj->src_port = src_port;
 
-	if (vector == tx) {
-		ifobj->fv.vector = tx;
-		ifobj->func_ptr = worker_testapp_validate_tx;
-		ifdict_tx = ifobj;
-	} else {
-		ifobj->fv.vector = rx;
-		ifobj->func_ptr = worker_testapp_validate_rx;
-		ifdict_rx = ifobj;
-	}
+	ifobj->func_ptr = func_ptr;
 }
 
-static void run_pkt_test(int mode, int type)
+static void run_pkt_test(struct test_spec *test, enum test_mode mode, enum test_type type)
 {
 	test_type = type;
 
 	/* reset defaults after potential previous test */
-	xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
-	second_step = 0;
 	stat_test_type = -1;
-	rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS;
-	frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM;
-
-	configured_mode = mode;
-
-	switch (mode) {
-	case (TEST_MODE_SKB):
-		xdp_flags |= XDP_FLAGS_SKB_MODE;
-		break;
-	case (TEST_MODE_DRV):
-		xdp_flags |= XDP_FLAGS_DRV_MODE;
-		break;
-	default:
-		break;
-	}
 
 	switch (test_type) {
 	case TEST_TYPE_STATS:
-		testapp_stats();
+		testapp_stats(test);
 		break;
 	case TEST_TYPE_TEARDOWN:
-		testapp_teardown();
+		testapp_teardown(test);
 		break;
 	case TEST_TYPE_BIDI:
-		testapp_bidi();
+		testapp_bidi(test);
 		break;
 	case TEST_TYPE_BPF_RES:
-		testapp_bpf_res();
+		testapp_bpf_res(test);
+		break;
+	case TEST_TYPE_RUN_TO_COMPLETION:
+		test_spec_set_name(test, "RUN_TO_COMPLETION");
+		testapp_validate_traffic(test);
+		break;
+	case TEST_TYPE_RUN_TO_COMPLETION_SINGLE_PKT:
+		test_spec_set_name(test, "RUN_TO_COMPLETION_SINGLE_PKT");
+		testapp_single_pkt(test);
+		break;
+	case TEST_TYPE_RUN_TO_COMPLETION_2K_FRAME:
+		test_spec_set_name(test, "RUN_TO_COMPLETION_2K_FRAME_SIZE");
+		test->ifobj_tx->umem->frame_size = 2048;
+		test->ifobj_rx->umem->frame_size = 2048;
+		pkt_stream_replace(test, DEFAULT_PKT_CNT, MIN_PKT_SIZE);
+		testapp_validate_traffic(test);
+
+		pkt_stream_restore_default(test);
+		break;
+	case TEST_TYPE_POLL:
+		test->ifobj_tx->use_poll = true;
+		test->ifobj_rx->use_poll = true;
+		test_spec_set_name(test, "POLL");
+		testapp_validate_traffic(test);
+		break;
+	case TEST_TYPE_ALIGNED_INV_DESC:
+		test_spec_set_name(test, "ALIGNED_INV_DESC");
+		testapp_invalid_desc(test);
+		break;
+	case TEST_TYPE_ALIGNED_INV_DESC_2K_FRAME:
+		test_spec_set_name(test, "ALIGNED_INV_DESC_2K_FRAME_SIZE");
+		test->ifobj_tx->umem->frame_size = 2048;
+		test->ifobj_rx->umem->frame_size = 2048;
+		testapp_invalid_desc(test);
+		break;
+	case TEST_TYPE_UNALIGNED_INV_DESC:
+		test_spec_set_name(test, "UNALIGNED_INV_DESC");
+		test->ifobj_tx->umem->unaligned_mode = true;
+		test->ifobj_rx->umem->unaligned_mode = true;
+		testapp_invalid_desc(test);
+		break;
+	case TEST_TYPE_UNALIGNED:
+		if (!testapp_unaligned(test))
+			return;
+		break;
+	case TEST_TYPE_HEADROOM:
+		testapp_headroom(test);
 		break;
 	default:
-		testapp_validate();
 		break;
 	}
+
+	print_ksft_result(test);
 }
 
 static struct ifobject *ifobject_create(void)
@@ -1071,11 +1393,11 @@ static struct ifobject *ifobject_create(void)
 	if (!ifobj)
 		return NULL;
 
-	ifobj->xsk_arr = calloc(2, sizeof(struct xsk_socket_info *));
+	ifobj->xsk_arr = calloc(MAX_SOCKETS, sizeof(*ifobj->xsk_arr));
 	if (!ifobj->xsk_arr)
 		goto out_xsk_arr;
 
-	ifobj->umem_arr = calloc(2, sizeof(struct xsk_umem_info *));
+	ifobj->umem_arr = calloc(MAX_SOCKETS, sizeof(*ifobj->umem_arr));
 	if (!ifobj->umem_arr)
 		goto out_umem_arr;
 
@@ -1098,34 +1420,53 @@ static void ifobject_delete(struct ifobject *ifobj)
 int main(int argc, char **argv)
 {
 	struct rlimit _rlim = { RLIM_INFINITY, RLIM_INFINITY };
-	int i, j;
+	struct pkt_stream *pkt_stream_default;
+	struct ifobject *ifobj_tx, *ifobj_rx;
+	struct test_spec test;
+	u32 i, j;
 
 	if (setrlimit(RLIMIT_MEMLOCK, &_rlim))
 		exit_with_error(errno);
 
-	for (i = 0; i < MAX_INTERFACES; i++) {
-		ifdict[i] = ifobject_create();
-		if (!ifdict[i])
-			exit_with_error(ENOMEM);
-	}
+	ifobj_tx = ifobject_create();
+	if (!ifobj_tx)
+		exit_with_error(ENOMEM);
+	ifobj_rx = ifobject_create();
+	if (!ifobj_rx)
+		exit_with_error(ENOMEM);
 
 	setlocale(LC_ALL, "");
 
-	parse_command_line(argc, argv);
+	parse_command_line(ifobj_tx, ifobj_rx, argc, argv);
 
-	init_iface(ifdict[tx], MAC1, MAC2, IP1, IP2, UDP_PORT1, UDP_PORT2, tx);
-	init_iface(ifdict[rx], MAC2, MAC1, IP2, IP1, UDP_PORT2, UDP_PORT1, rx);
+	if (!validate_interface(ifobj_tx) || !validate_interface(ifobj_rx)) {
+		usage(basename(argv[0]));
+		ksft_exit_xfail();
+	}
+
+	init_iface(ifobj_tx, MAC1, MAC2, IP1, IP2, UDP_PORT1, UDP_PORT2,
+		   worker_testapp_validate_tx);
+	init_iface(ifobj_rx, MAC2, MAC1, IP2, IP1, UDP_PORT2, UDP_PORT1,
+		   worker_testapp_validate_rx);
+
+	test_spec_init(&test, ifobj_tx, ifobj_rx, 0);
+	pkt_stream_default = pkt_stream_generate(ifobj_tx->umem, DEFAULT_PKT_CNT, PKT_SIZE);
+	if (!pkt_stream_default)
+		exit_with_error(ENOMEM);
+	test.pkt_stream_default = pkt_stream_default;
 
 	ksft_set_plan(TEST_MODE_MAX * TEST_TYPE_MAX);
 
 	for (i = 0; i < TEST_MODE_MAX; i++)
 		for (j = 0; j < TEST_TYPE_MAX; j++) {
-			run_pkt_test(i, j);
+			test_spec_init(&test, ifobj_tx, ifobj_rx, i);
+			run_pkt_test(&test, i, j);
 			usleep(USLEEP_MAX);
 		}
 
-	for (i = 0; i < MAX_INTERFACES; i++)
-		ifobject_delete(ifdict[i]);
+	pkt_stream_delete(pkt_stream_default);
+	ifobject_delete(ifobj_tx);
+	ifobject_delete(ifobj_rx);
 
 	ksft_exit_pass();
 	return 0;
diff --git a/tools/testing/selftests/bpf/xdpxceiver.h b/tools/testing/selftests/bpf/xdpxceiver.h
index 7e49b9fbe25e..2f705f44b748 100644
--- a/tools/testing/selftests/bpf/xdpxceiver.h
+++ b/tools/testing/selftests/bpf/xdpxceiver.h
@@ -20,10 +20,9 @@
 #define MAX_INTERFACES 2
 #define MAX_INTERFACE_NAME_CHARS 7
 #define MAX_INTERFACES_NAMESPACE_CHARS 10
-#define MAX_SOCKS 1
+#define MAX_SOCKETS 2
+#define MAX_TEST_NAME_SIZE 32
 #define MAX_TEARDOWN_ITER 10
-#define MAX_BIDI_ITER 2
-#define MAX_BPF_ITER 2
 #define PKT_HDR_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) + \
 			sizeof(struct udphdr))
 #define MIN_PKT_SIZE 64
@@ -36,10 +35,13 @@
 #define UDP_PKT_DATA_SIZE (UDP_PKT_SIZE - sizeof(struct udphdr))
 #define USLEEP_MAX 10000
 #define SOCK_RECONF_CTR 10
-#define BATCH_SIZE 8
+#define BATCH_SIZE 64
 #define POLL_TMOUT 1000
 #define DEFAULT_PKT_CNT (4 * 1024)
+#define DEFAULT_UMEM_BUFFERS (DEFAULT_PKT_CNT / 4)
+#define UMEM_SIZE (DEFAULT_UMEM_BUFFERS * XSK_UMEM__DEFAULT_FRAME_SIZE)
 #define RX_FULL_RXQSIZE 32
+#define UMEM_HEADROOM_TEST_SIZE 128
 #define XSK_UMEM__INVALID_FRAME_SIZE (XSK_UMEM__DEFAULT_FRAME_SIZE + 1)
 
 #define print_verbose(x...) do { if (opt_verbose) ksft_print_msg(x); } while (0)
@@ -51,8 +53,15 @@ enum test_mode {
 };
 
 enum test_type {
-	TEST_TYPE_NOPOLL,
+	TEST_TYPE_RUN_TO_COMPLETION,
+	TEST_TYPE_RUN_TO_COMPLETION_2K_FRAME,
+	TEST_TYPE_RUN_TO_COMPLETION_SINGLE_PKT,
 	TEST_TYPE_POLL,
+	TEST_TYPE_UNALIGNED,
+	TEST_TYPE_ALIGNED_INV_DESC,
+	TEST_TYPE_ALIGNED_INV_DESC_2K_FRAME,
+	TEST_TYPE_UNALIGNED_INV_DESC,
+	TEST_TYPE_HEADROOM,
 	TEST_TYPE_TEARDOWN,
 	TEST_TYPE_BIDI,
 	TEST_TYPE_STATS,
@@ -68,25 +77,21 @@ enum stat_test_type {
 	STAT_TEST_TYPE_MAX
 };
 
-static int configured_mode;
 static bool opt_pkt_dump;
-static u32 num_frames = DEFAULT_PKT_CNT / 4;
-static bool second_step;
 static int test_type;
 
 static bool opt_verbose;
-
-static u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
-static u32 xdp_bind_flags = XDP_USE_NEED_WAKEUP | XDP_COPY;
 static int stat_test_type;
-static u32 rxqsize;
-static u32 frame_headroom;
 
 struct xsk_umem_info {
 	struct xsk_ring_prod fq;
 	struct xsk_ring_cons cq;
 	struct xsk_umem *umem;
+	u32 num_frames;
+	u32 frame_headroom;
 	void *buffer;
+	u32 frame_size;
+	bool unaligned_mode;
 };
 
 struct xsk_socket_info {
@@ -95,51 +100,63 @@ struct xsk_socket_info {
 	struct xsk_umem_info *umem;
 	struct xsk_socket *xsk;
 	u32 outstanding_tx;
-};
-
-struct flow_vector {
-	enum fvector {
-		tx,
-		rx,
-	} vector;
+	u32 rxqsize;
 };
 
 struct pkt {
 	u64 addr;
 	u32 len;
 	u32 payload;
+	bool valid;
 };
 
 struct pkt_stream {
 	u32 nb_pkts;
+	u32 rx_pkt_nb;
 	struct pkt *pkts;
+	bool use_addr_for_fill;
 };
 
+typedef void *(*thread_func_t)(void *arg);
+
 struct ifobject {
 	char ifname[MAX_INTERFACE_NAME_CHARS];
 	char nsname[MAX_INTERFACES_NAMESPACE_CHARS];
 	struct xsk_socket_info *xsk;
-	struct xsk_socket_info **xsk_arr;
-	struct xsk_umem_info **umem_arr;
+	struct xsk_socket_info *xsk_arr;
 	struct xsk_umem_info *umem;
-	void *(*func_ptr)(void *arg);
-	struct flow_vector fv;
+	struct xsk_umem_info *umem_arr;
+	thread_func_t func_ptr;
 	struct pkt_stream *pkt_stream;
 	int ns_fd;
 	u32 dst_ip;
 	u32 src_ip;
+	u32 xdp_flags;
+	u32 bind_flags;
 	u16 src_port;
 	u16 dst_port;
+	bool tx_on;
+	bool rx_on;
+	bool use_poll;
+	bool pacing_on;
 	u8 dst_mac[ETH_ALEN];
 	u8 src_mac[ETH_ALEN];
 };
 
-static struct ifobject *ifdict[MAX_INTERFACES];
-static struct ifobject *ifdict_rx;
-static struct ifobject *ifdict_tx;
+struct test_spec {
+	struct ifobject *ifobj_tx;
+	struct ifobject *ifobj_rx;
+	struct pkt_stream *pkt_stream_default;
+	u16 total_steps;
+	u16 current_step;
+	u16 nb_sockets;
+	char name[MAX_TEST_NAME_SIZE];
+};
 
-/*threads*/
 pthread_barrier_t barr;
-pthread_t t0, t1;
+pthread_mutex_t pacing_mutex = PTHREAD_MUTEX_INITIALIZER;
+pthread_cond_t pacing_cond = PTHREAD_COND_INITIALIZER;
+
+u32 pkts_in_flight;
 
 #endif				/* XDPXCEIVER_H */
diff --git a/tools/testing/selftests/core/close_range_test.c b/tools/testing/selftests/core/close_range_test.c
index 73eb29c916d1..aa7d13d91963 100644
--- a/tools/testing/selftests/core/close_range_test.c
+++ b/tools/testing/selftests/core/close_range_test.c
@@ -54,7 +54,7 @@ static inline int sys_close_range(unsigned int fd, unsigned int max_fd,
 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
 #endif
 
-TEST(close_range)
+TEST(core_close_range)
 {
 	int i, ret;
 	int open_fds[101];
diff --git a/tools/testing/selftests/drivers/net/dsa/test_bridge_fdb_stress.sh b/tools/testing/selftests/drivers/net/dsa/test_bridge_fdb_stress.sh
new file mode 100755
index 000000000000..dca8be6092b9
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/test_bridge_fdb_stress.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Bridge FDB entries can be offloaded to DSA switches without holding the
+# rtnl_mutex. Traditionally this mutex has conferred drivers implicit
+# serialization, which means their code paths are not well tested in the
+# presence of concurrency.
+# This test creates a background task that stresses the FDB by adding and
+# deleting an entry many times in a row without the rtnl_mutex held.
+# It then tests the driver resistance to concurrency by calling .ndo_fdb_dump
+# (with rtnl_mutex held) from a foreground task.
+# Since either the FDB dump or the additions/removals can fail, but the
+# additions and removals are performed in deferred as opposed to process
+# context, we cannot simply check for user space error codes.
+
+WAIT_TIME=1
+NUM_NETIFS=1
+REQUIRE_JQ="no"
+REQUIRE_MZ="no"
+NETIF_CREATE="no"
+lib_dir=$(dirname $0)/../../../net/forwarding
+source $lib_dir/lib.sh
+
+cleanup() {
+	echo "Cleaning up"
+	kill $pid && wait $pid &> /dev/null
+	ip link del br0
+	echo "Please check kernel log for errors"
+}
+trap 'cleanup' EXIT
+
+eth=${NETIFS[p1]}
+
+ip link del br0 2&>1 >/dev/null || :
+ip link add br0 type bridge && ip link set $eth master br0
+
+(while :; do
+	bridge fdb add 00:01:02:03:04:05 dev $eth master static
+	bridge fdb del 00:01:02:03:04:05 dev $eth master static
+done) &
+pid=$!
+
+for i in $(seq 1 50); do
+	bridge fdb show > /dev/null
+	sleep 3
+	echo "$((${i} * 2))% complete..."
+done
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_control.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_control.sh
index a37273473c1b..d3a891d421ab 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_control.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_control.sh
@@ -87,6 +87,7 @@ ALL_TESTS="
 NUM_NETIFS=4
 source $lib_dir/lib.sh
 source $lib_dir/devlink_lib.sh
+source mlxsw_lib.sh
 
 h1_create()
 {
@@ -626,8 +627,7 @@ ipv6_redirect_test()
 
 ptp_event_test()
 {
-	# PTP is only supported on Spectrum-1, for now.
-	[[ "$DEVLINK_VIDDID" != "15b3:cb84" ]] && return
+	mlxsw_only_on_spectrum 1 || return
 
 	# PTP Sync (0)
 	devlink_trap_stats_test "PTP Time-Critical Event Message" "ptp_event" \
@@ -638,8 +638,7 @@ ptp_event_test()
 
 ptp_general_test()
 {
-	# PTP is only supported on Spectrum-1, for now.
-	[[ "$DEVLINK_VIDDID" != "15b3:cb84" ]] && return
+	mlxsw_only_on_spectrum 1 || return
 
 	# PTP Announce (b)
 	devlink_trap_stats_test "PTP General Message" "ptp_general" \
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh
index 508a702f0021..0bd5ffc218ac 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh
@@ -272,13 +272,17 @@ __rate_test()
 
 rate_test()
 {
-	local id
+	local last_policer=$(devlink -j -p trap policer show |
+			     jq '[.[]["'$DEVLINK_DEV'"][].policer] | max')
 
-	for id in $(devlink_trap_policer_ids_get); do
-		echo
-		log_info "Running rate test for policer $id"
-		__rate_test $id
-	done
+	log_info "Running rate test for policer 1"
+	__rate_test 1
+
+	log_info "Running rate test for policer $((last_policer / 2))"
+	__rate_test $((last_policer / 2))
+
+	log_info "Running rate test for policer $last_policer"
+	__rate_test $last_policer
 }
 
 __burst_test()
@@ -342,13 +346,17 @@ __burst_test()
 
 burst_test()
 {
-	local id
+	local last_policer=$(devlink -j -p trap policer show |
+			     jq '[.[]["'$DEVLINK_DEV'"][].policer] | max')
+
+	log_info "Running burst test for policer 1"
+	__burst_test 1
+
+	log_info "Running burst test for policer $((last_policer / 2))"
+	__burst_test $((last_policer / 2))
 
-	for id in $(devlink_trap_policer_ids_get); do
-		echo
-		log_info "Running burst size test for policer $id"
-		__burst_test $id
-	done
+	log_info "Running burst test for policer $last_policer"
+	__burst_test $last_policer
 }
 
 trap cleanup EXIT
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh
index 8817851da7a9..e9a82cae8c9a 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh
@@ -13,7 +13,7 @@
 #                     |
 # +-------------------|-----+
 # | SW1               |     |
-# |              $swp1 +    |
+# |             $swp1 +     |
 # |      192.0.2.2/28       |
 # |                         |
 # |  + g1a (gre)            |
@@ -27,8 +27,8 @@
 #    |
 # +--|----------------------+
 # |  |                 VRF2 |
-# | + $rp2                  |
-# |   198.51.100.2/28       |
+# |  + $rp2                 |
+# |    198.51.100.2/28      |
 # +-------------------------+
 
 lib_dir=$(dirname $0)/../../../net/forwarding
@@ -116,12 +116,16 @@ cleanup()
 	forwarding_restore
 }
 
-ecn_payload_get()
+ipip_payload_get()
 {
+	local flags=$1; shift
+	local key=$1; shift
+
 	p=$(:
-		)"0"$(		              : GRE flags
+		)"$flags"$(		      : GRE flags
 	        )"0:00:"$(                    : Reserved + version
 		)"08:00:"$(		      : ETH protocol type
+		)"$key"$( 		      : Key
 		)"4"$(	                      : IP version
 		)"5:"$(                       : IHL
 		)"00:"$(                      : IP TOS
@@ -137,6 +141,11 @@ ecn_payload_get()
 	echo $p
 }
 
+ecn_payload_get()
+{
+	echo $(ipip_payload_get "0")
+}
+
 ecn_decap_test()
 {
 	local trap_name="decap_error"
@@ -171,31 +180,6 @@ ecn_decap_test()
 	tc filter del dev $swp1 egress protocol ip pref 1 handle 101 flower
 }
 
-ipip_payload_get()
-{
-	local flags=$1; shift
-	local key=$1; shift
-
-	p=$(:
-		)"$flags"$(		      : GRE flags
-	        )"0:00:"$(                    : Reserved + version
-		)"08:00:"$(		      : ETH protocol type
-		)"$key"$( 		      : Key
-		)"4"$(	                      : IP version
-		)"5:"$(                       : IHL
-		)"00:"$(                      : IP TOS
-		)"00:14:"$(                   : IP total length
-		)"00:00:"$(                   : IP identification
-		)"20:00:"$(                   : IP flags + frag off
-		)"30:"$(                      : IP TTL
-		)"01:"$(                      : IP proto
-		)"E7:E6:"$(    	              : IP header csum
-		)"C0:00:01:01:"$(             : IP saddr : 192.0.1.1
-		)"C0:00:02:01:"$(             : IP daddr : 192.0.2.1
-		)
-	echo $p
-}
-
 no_matching_tunnel_test()
 {
 	local trap_name="decap_error"
@@ -239,7 +223,8 @@ decap_error_test()
 	no_matching_tunnel_test "Decap error: Source IP check failed" \
 		192.0.2.68 "0"
 	no_matching_tunnel_test \
-		"Decap error: Key exists but was not expected" $sip "2" ":E9:"
+		"Decap error: Key exists but was not expected" $sip "2" \
+		"00:00:00:E9:"
 
 	# Destroy the tunnel and create new one with key
 	__addr_add_del g1 del 192.0.2.65/32
@@ -251,7 +236,8 @@ decap_error_test()
 	no_matching_tunnel_test \
 		"Decap error: Key does not exist but was expected" $sip "0"
 	no_matching_tunnel_test \
-		"Decap error: Packet has a wrong key field" $sip "2" "E8:"
+		"Decap error: Packet has a wrong key field" $sip "2" \
+		"00:00:00:E8:"
 }
 
 trap cleanup EXIT
diff --git a/tools/testing/selftests/drivers/net/mlxsw/mlxsw_lib.sh b/tools/testing/selftests/drivers/net/mlxsw/mlxsw_lib.sh
index cbe50f260a40..a95856aafd2a 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/mlxsw_lib.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/mlxsw_lib.sh
@@ -11,3 +11,53 @@ if [[ ! -v MLXSW_CHIP ]]; then
 		exit 1
 	fi
 fi
+
+MLXSW_SPECTRUM_REV=$(case $MLXSW_CHIP in
+			     mlxsw_spectrum)
+				     echo 1 ;;
+			     mlxsw_spectrum*)
+				     echo ${MLXSW_CHIP#mlxsw_spectrum} ;;
+			     *)
+				     echo "Couldn't determine Spectrum chip revision." \
+					  > /dev/stderr ;;
+		     esac)
+
+mlxsw_on_spectrum()
+{
+	local rev=$1; shift
+	local op="=="
+	local rev2=${rev%+}
+
+	if [[ $rev2 != $rev ]]; then
+		op=">="
+	fi
+
+	((MLXSW_SPECTRUM_REV $op rev2))
+}
+
+__mlxsw_only_on_spectrum()
+{
+	local rev=$1; shift
+	local caller=$1; shift
+	local src=$1; shift
+
+	if ! mlxsw_on_spectrum "$rev"; then
+		log_test_skip $src:$caller "(Spectrum-$rev only)"
+		return 1
+	fi
+}
+
+mlxsw_only_on_spectrum()
+{
+	local caller=${FUNCNAME[1]}
+	local src=${BASH_SOURCE[1]}
+	local rev
+
+	for rev in "$@"; do
+		if __mlxsw_only_on_spectrum "$rev" "$caller" "$src"; then
+			return 0
+		fi
+	done
+
+	return 1
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/rif_mac_profile_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/rif_mac_profile_scale.sh
new file mode 100644
index 000000000000..71e7681f15f6
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/rif_mac_profile_scale.sh
@@ -0,0 +1,72 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test for RIF MAC profiles resource. The test adds VLAN netdevices according to
+# the maximum number of RIF MAC profiles, sets each of them with a random
+# MAC address, and checks that eventually the number of occupied RIF MAC
+# profiles equals the maximum number of RIF MAC profiles.
+
+
+RIF_MAC_PROFILE_NUM_NETIFS=2
+
+rif_mac_profiles_create()
+{
+	local count=$1; shift
+	local should_fail=$1; shift
+	local batch_file="$(mktemp)"
+
+	for ((i = 1; i <= count; i++)); do
+		vlan=$(( i*10 ))
+		m=$(( i*11 ))
+
+		cat >> $batch_file <<-EOF
+			link add link $h1 name $h1.$vlan \
+				address 00:$m:$m:$m:$m:$m type vlan id $vlan
+			address add 192.0.$m.1/24 dev $h1.$vlan
+		EOF
+	done
+
+	ip -b $batch_file &> /dev/null
+	check_err_fail $should_fail $? "RIF creation"
+
+	rm -f $batch_file
+}
+
+rif_mac_profile_test()
+{
+	local count=$1; shift
+	local should_fail=$1; shift
+
+	rif_mac_profiles_create $count $should_fail
+
+	occ=$(devlink -j resource show $DEVLINK_DEV \
+	      | jq '.[][][] | select(.name=="rif_mac_profiles") |.["occ"]')
+
+	[[ $occ -eq $count ]]
+	check_err_fail $should_fail $? "Attempt to use $count profiles (actual result $occ)"
+}
+
+rif_mac_profile_setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	h2=${NETIFS[p2]}
+
+	# Disable IPv6 on the two interfaces to avoid IPv6 link-local addresses
+	# being generated and RIFs being created.
+	sysctl_set net.ipv6.conf.$h1.disable_ipv6 1
+	sysctl_set net.ipv6.conf.$h2.disable_ipv6 1
+
+	ip link set $h1 up
+	ip link set $h2 up
+}
+
+rif_mac_profile_cleanup()
+{
+	pre_cleanup
+
+	ip link set $h2 down
+	ip link set $h1 down
+
+	sysctl_restore net.ipv6.conf.$h2.disable_ipv6
+	sysctl_restore net.ipv6.conf.$h1.disable_ipv6
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/rif_mac_profiles.sh b/tools/testing/selftests/drivers/net/mlxsw/rif_mac_profiles.sh
new file mode 100755
index 000000000000..c18340cee55d
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/rif_mac_profiles.sh
@@ -0,0 +1,213 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	mac_profile_test
+"
+NUM_NETIFS=4
+source $lib_dir/lib.sh
+source $lib_dir/tc_common.sh
+source $lib_dir/devlink_lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/24
+	ip route add 198.51.100.0/24 vrf v$h1 nexthop via 192.0.2.2
+
+	tc qdisc add dev $h1 ingress
+}
+
+h1_destroy()
+{
+	tc qdisc del dev $h1 ingress
+
+	ip route del 198.51.100.0/24 vrf v$h1
+	simple_if_fini $h1 192.0.2.1/24
+}
+
+h2_create()
+{
+	simple_if_init $h2 198.51.100.1/24
+	ip route add 192.0.2.0/24 vrf v$h2 nexthop via 198.51.100.2
+
+	tc qdisc add dev $h2 ingress
+}
+
+h2_destroy()
+{
+	tc qdisc del dev $h2 ingress
+
+	ip route del 192.0.2.0/24 vrf v$h2
+	simple_if_fini $h2 198.51.100.1/24
+}
+
+router_create()
+{
+	ip link set dev $rp1 up
+	ip link set dev $rp2 up
+
+	tc qdisc add dev $rp1 clsact
+	tc qdisc add dev $rp2 clsact
+	ip address add 192.0.2.2/24 dev $rp1
+	ip address add 198.51.100.2/24 dev $rp2
+}
+
+router_destroy()
+{
+	ip address del 198.51.100.2/24 dev $rp2
+	ip address del 192.0.2.2/24 dev $rp1
+	tc qdisc del dev $rp2 clsact
+	tc qdisc del dev $rp1 clsact
+
+	ip link set dev $rp2 down
+	ip link set dev $rp1 down
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	rp1=${NETIFS[p2]}
+
+	rp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+
+	router_create
+
+	forwarding_enable
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	forwarding_restore
+
+	router_destroy
+
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+h1_to_h2()
+{
+	local test_name=$@; shift
+	local smac=$(mac_get $rp2)
+
+	RET=0
+
+	# Replace neighbour to avoid first packet being forwarded in software
+	ip neigh replace dev $rp2 198.51.100.1 lladdr $(mac_get $h2)
+
+	# Add a filter to ensure that packets are forwarded in hardware. Cannot
+	# match on source MAC because it is not set in eACL after routing
+	tc filter add dev $rp2 egress proto ip pref 1 handle 101 \
+		flower skip_sw ip_proto udp src_port 12345 dst_port 54321 \
+		action pass
+
+	# Add a filter to ensure that packets are received with the correct
+	# source MAC
+	tc filter add dev $h2 ingress proto ip pref 1 handle 101 \
+		flower skip_sw src_mac $smac ip_proto udp src_port 12345 \
+		dst_port 54321 action pass
+
+	$MZ $h1 -a own -b $(mac_get $rp1) -t udp "sp=12345,dp=54321" \
+		-A 192.0.2.1 -B 198.51.100.1 -c 10 -p 100 -d 1msec -q
+
+	tc_check_packets "dev $rp2 egress" 101 10
+	check_err $? "packets not forwarded in hardware"
+
+	tc_check_packets "dev $h2 ingress" 101 10
+	check_err $? "packets not forwarded with correct source mac"
+
+	log_test "h1->h2: $test_name"
+
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+	tc filter del dev $rp2 egress protocol ip pref 1 handle 101 flower
+	ip neigh del dev $rp2 198.51.100.1 lladdr $(mac_get $h2)
+}
+
+h2_to_h1()
+{
+	local test_name=$@; shift
+	local rp1_mac=$(mac_get $rp1)
+
+	RET=0
+
+	ip neigh replace dev $rp1 192.0.2.1 lladdr $(mac_get $h1)
+
+	tc filter add dev $rp1 egress proto ip pref 1 handle 101 \
+		flower skip_sw ip_proto udp src_port 54321 dst_port 12345 \
+		action pass
+
+	tc filter add dev $h1 ingress proto ip pref 1 handle 101 \
+		flower skip_sw src_mac $rp1_mac ip_proto udp src_port 54321 \
+		dst_port 12345 action pass
+
+	$MZ $h2 -a own -b $(mac_get $rp2) -t udp "sp=54321,dp=12345" \
+		-A 198.51.100.1 -B 192.0.2.1 -c 10 -p 100 -d 1msec -q
+
+	tc_check_packets "dev $rp1 egress" 101 10
+	check_err $? "packets not forwarded in hardware"
+
+	tc_check_packets "dev $h1 ingress" 101 10
+	check_err $? "packets not forwarded with correct source mac"
+
+	log_test "h2->h1: $test_name"
+
+	tc filter del dev $h1 ingress protocol ip pref 1 handle 101 flower
+	tc filter del dev $rp1 egress protocol ip pref 1 handle 101 flower
+	ip neigh del dev $rp1 192.0.2.1 lladdr $(mac_get $h1)
+}
+
+smac_test()
+{
+	local test_name=$@; shift
+
+	# Test that packets forwarded to $h2 via $rp2 are forwarded with the
+	# current source MAC of $rp2
+	h1_to_h2 $test_name
+
+	# Test that packets forwarded to $h1 via $rp1 are forwarded with the
+	# current source MAC of $rp1. This MAC is never changed during the test,
+	# but given the shared nature of MAC profile, the point is to see that
+	# changes to the MAC of $rp2 do not affect that of $rp1
+	h2_to_h1 $test_name
+}
+
+mac_profile_test()
+{
+	local rp2_mac=$(mac_get $rp2)
+
+	# Test behavior when the RIF backing $rp2 is transitioned to use
+	# a new MAC profile
+	ip link set dev $rp2 addr 00:11:22:33:44:55
+	smac_test "new mac profile"
+
+	# Test behavior when the MAC profile used by the RIF is edited
+	ip link set dev $rp2 address 00:22:22:22:22:22
+	smac_test "edit mac profile"
+
+	# Restore original MAC
+	ip link set dev $rp2 addr $rp2_mac
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+mac_profiles=$(devlink_resource_size_get rif_mac_profiles)
+if [[ $mac_profiles -ne 1 ]]; then
+	tests_run
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/rif_mac_profiles_occ.sh b/tools/testing/selftests/drivers/net/mlxsw/rif_mac_profiles_occ.sh
new file mode 100755
index 000000000000..b513f64d9092
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/rif_mac_profiles_occ.sh
@@ -0,0 +1,117 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	rif_mac_profile_edit_test
+"
+NUM_NETIFS=2
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	h2=${NETIFS[p2]}
+
+	# Disable IPv6 on the two interfaces to avoid IPv6 link-local addresses
+	# being generated and RIFs being created
+	sysctl_set net.ipv6.conf.$h1.disable_ipv6 1
+	sysctl_set net.ipv6.conf.$h2.disable_ipv6 1
+
+	ip link set $h1 up
+	ip link set $h2 up
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	ip link set $h2 down
+	ip link set $h1 down
+
+	sysctl_restore net.ipv6.conf.$h2.disable_ipv6
+	sysctl_restore net.ipv6.conf.$h1.disable_ipv6
+
+	# Reload in order to clean all the RIFs and RIF MAC profiles created
+	devlink_reload
+}
+
+create_max_rif_mac_profiles()
+{
+	local count=$1; shift
+	local batch_file="$(mktemp)"
+
+	for ((i = 1; i <= count; i++)); do
+		vlan=$(( i*10 ))
+		m=$(( i*11 ))
+
+		cat >> $batch_file <<-EOF
+			link add link $h1 name $h1.$vlan \
+				address 00:$m:$m:$m:$m:$m type vlan id $vlan
+			address add 192.0.$m.1/24 dev $h1.$vlan
+		EOF
+	done
+
+	ip -b $batch_file &> /dev/null
+	rm -f $batch_file
+}
+
+rif_mac_profile_replacement_test()
+{
+	local h1_10_mac=$(mac_get $h1.10)
+
+	RET=0
+
+	ip link set $h1.10 address 00:12:34:56:78:99
+	check_err $?
+
+	log_test "RIF MAC profile replacement"
+
+	ip link set $h1.10 address $h1_10_mac
+}
+
+rif_mac_profile_shared_replacement_test()
+{
+	local count=$1; shift
+	local i=$((count + 1))
+	local vlan=$(( i*10 ))
+	local m=11
+
+	RET=0
+
+	# Create a VLAN netdevice that has the same MAC as the first one.
+	ip link add link $h1 name $h1.$vlan address 00:$m:$m:$m:$m:$m \
+		type vlan id $vlan
+	ip address add 192.0.$m.1/24 dev $h1.$vlan
+
+	# MAC replacement should fail because all the MAC profiles are in use
+	# and the profile is shared between multiple RIFs
+	m=$(( i*11 ))
+	ip link set $h1.$vlan address 00:$m:$m:$m:$m:$m &> /dev/null
+	check_fail $?
+
+	log_test "RIF MAC profile shared replacement"
+
+	ip link del dev $h1.$vlan
+}
+
+rif_mac_profile_edit_test()
+{
+	local count=$(devlink_resource_size_get rif_mac_profiles)
+
+	create_max_rif_mac_profiles $count
+
+	rif_mac_profile_replacement_test
+	rif_mac_profile_shared_replacement_test $count
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh
index a217f9f6775b..04f03ae9d8fb 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh
@@ -10,9 +10,7 @@
 lib_dir=$(dirname $0)/../../../net/forwarding
 
 ALL_TESTS="
-	rif_set_addr_test
 	rif_vrf_set_addr_test
-	rif_inherit_bridge_addr_test
 	rif_non_inherit_bridge_addr_test
 	vlan_interface_deletion_test
 	bridge_deletion_test
@@ -60,55 +58,6 @@ cleanup()
 	ip link set dev $swp1 down
 }
 
-rif_set_addr_test()
-{
-	local swp1_mac=$(mac_get $swp1)
-	local swp2_mac=$(mac_get $swp2)
-
-	RET=0
-
-	# $swp1 and $swp2 likely got their IPv6 local addresses already, but
-	# here we need to test the transition to RIF.
-	ip addr flush dev $swp1
-	ip addr flush dev $swp2
-	sleep .1
-
-	ip addr add dev $swp1 192.0.2.1/28
-	check_err $?
-
-	ip link set dev $swp1 addr 00:11:22:33:44:55
-	check_err $?
-
-	# IP address enablement should be rejected if the MAC address prefix
-	# doesn't match other RIFs.
-	ip addr add dev $swp2 192.0.2.2/28 &>/dev/null
-	check_fail $? "IP address addition passed for a device with a wrong MAC"
-	ip addr add dev $swp2 192.0.2.2/28 2>&1 >/dev/null \
-	    | grep -q mlxsw_spectrum
-	check_err $? "no extack for IP address addition"
-
-	ip link set dev $swp2 addr 00:11:22:33:44:66
-	check_err $?
-	ip addr add dev $swp2 192.0.2.2/28 &>/dev/null
-	check_err $?
-
-	# Change of MAC address of a RIF should be forbidden if the new MAC
-	# doesn't share the prefix with other MAC addresses.
-	ip link set dev $swp2 addr 00:11:22:33:00:66 &>/dev/null
-	check_fail $? "change of MAC address passed for a wrong MAC"
-	ip link set dev $swp2 addr 00:11:22:33:00:66 2>&1 >/dev/null \
-	    | grep -q mlxsw_spectrum
-	check_err $? "no extack for MAC address change"
-
-	log_test "RIF - bad MAC change"
-
-	ip addr del dev $swp2 192.0.2.2/28
-	ip addr del dev $swp1 192.0.2.1/28
-
-	ip link set dev $swp2 addr $swp2_mac
-	ip link set dev $swp1 addr $swp1_mac
-}
-
 rif_vrf_set_addr_test()
 {
 	# Test that it is possible to set an IP address on a VRF upper despite
@@ -128,45 +77,6 @@ rif_vrf_set_addr_test()
 	ip link del dev vrf-test
 }
 
-rif_inherit_bridge_addr_test()
-{
-	RET=0
-
-	# Create first RIF
-	ip addr add dev $swp1 192.0.2.1/28
-	check_err $?
-
-	# Create a FID RIF
-	ip link add name br1 up type bridge vlan_filtering 0
-	ip link set dev $swp2 master br1
-	ip addr add dev br1 192.0.2.17/28
-	check_err $?
-
-	# Prepare a device with a low MAC address
-	ip link add name d up type dummy
-	ip link set dev d addr 00:11:22:33:44:55
-
-	# Attach the device to br1. That prompts bridge address change, which
-	# should be vetoed, thus preventing the attachment.
-	ip link set dev d master br1 &>/dev/null
-	check_fail $? "Device with low MAC was permitted to attach a bridge with RIF"
-	ip link set dev d master br1 2>&1 >/dev/null \
-	    | grep -q mlxsw_spectrum
-	check_err $? "no extack for bridge attach rejection"
-
-	ip link set dev $swp2 addr 00:11:22:33:44:55 &>/dev/null
-	check_fail $? "Changing swp2's MAC address permitted"
-	ip link set dev $swp2 addr 00:11:22:33:44:55 2>&1 >/dev/null \
-	    | grep -q mlxsw_spectrum
-	check_err $? "no extack for bridge port MAC address change rejection"
-
-	log_test "RIF - attach port with bad MAC to bridge"
-
-	ip link del dev d
-	ip link del dev br1
-	ip addr del dev $swp1 192.0.2.1/28
-}
-
 rif_non_inherit_bridge_addr_test()
 {
 	local swp2_mac=$(mac_get $swp2)
@@ -779,7 +689,7 @@ nexthop_obj_offload_test()
 	setup_wait
 
 	ip nexthop add id 1 via 192.0.2.2 dev $swp1
-	ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud reachable \
+	ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud perm \
 		dev $swp1
 
 	busywait "$TIMEOUT" wait_for_offload \
@@ -791,7 +701,7 @@ nexthop_obj_offload_test()
 		ip nexthop show id 1
 	check_err $? "nexthop marked as offloaded after setting neigh to failed state"
 
-	ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud reachable \
+	ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud perm \
 		dev $swp1
 	busywait "$TIMEOUT" wait_for_offload \
 		ip nexthop show id 1
@@ -828,11 +738,11 @@ nexthop_obj_group_offload_test()
 	ip nexthop add id 1 via 192.0.2.2 dev $swp1
 	ip nexthop add id 2 via 2001:db8:1::2 dev $swp1
 	ip nexthop add id 10 group 1/2
-	ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud reachable \
+	ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud perm \
 		dev $swp1
-	ip neigh replace 192.0.2.3 lladdr 00:11:22:33:44:55 nud reachable \
+	ip neigh replace 192.0.2.3 lladdr 00:11:22:33:44:55 nud perm \
 		dev $swp1
-	ip neigh replace 2001:db8:1::2 lladdr 00:11:22:33:44:55 nud reachable \
+	ip neigh replace 2001:db8:1::2 lladdr 00:11:22:33:44:55 nud perm \
 		dev $swp1
 
 	busywait "$TIMEOUT" wait_for_offload \
@@ -888,11 +798,11 @@ nexthop_obj_bucket_offload_test()
 	ip nexthop add id 1 via 192.0.2.2 dev $swp1
 	ip nexthop add id 2 via 2001:db8:1::2 dev $swp1
 	ip nexthop add id 10 group 1/2 type resilient buckets 32 idle_timer 0
-	ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud reachable \
+	ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud perm \
 		dev $swp1
-	ip neigh replace 192.0.2.3 lladdr 00:11:22:33:44:55 nud reachable \
+	ip neigh replace 192.0.2.3 lladdr 00:11:22:33:44:55 nud perm \
 		dev $swp1
-	ip neigh replace 2001:db8:1::2 lladdr 00:11:22:33:44:55 nud reachable \
+	ip neigh replace 2001:db8:1::2 lladdr 00:11:22:33:44:55 nud perm \
 		dev $swp1
 
 	busywait "$TIMEOUT" wait_for_offload \
@@ -921,7 +831,7 @@ nexthop_obj_bucket_offload_test()
 	check_err $? "nexthop bucket not marked as offloaded after revalidating nexthop"
 
 	# Revalidate nexthop id 2 by changing its neighbour
-	ip neigh replace 2001:db8:1::2 lladdr 00:11:22:33:44:55 nud reachable \
+	ip neigh replace 2001:db8:1::2 lladdr 00:11:22:33:44:55 nud perm \
 		dev $swp1
 	busywait "$TIMEOUT" wait_for_offload \
 		ip nexthop bucket show nhid 2
@@ -971,9 +881,9 @@ nexthop_obj_route_offload_test()
 	setup_wait
 
 	ip nexthop add id 1 via 192.0.2.2 dev $swp1
-	ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud reachable \
+	ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud perm \
 		dev $swp1
-	ip neigh replace 192.0.2.3 lladdr 00:11:22:33:44:55 nud reachable \
+	ip neigh replace 192.0.2.3 lladdr 00:11:22:33:44:55 nud perm \
 		dev $swp1
 
 	ip route replace 198.51.100.0/24 nhid 1
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_offload.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_offload.sh
new file mode 100755
index 000000000000..071a33d10c20
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_offload.sh
@@ -0,0 +1,290 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test qdisc offload indication
+
+
+ALL_TESTS="
+	test_root
+	test_port_tbf
+	test_etsprio
+	test_etsprio_port_tbf
+"
+NUM_NETIFS=1
+lib_dir=$(dirname $0)/../../../net/forwarding
+source $lib_dir/lib.sh
+
+check_not_offloaded()
+{
+	local handle=$1; shift
+	local h
+	local offloaded
+
+	h=$(qdisc_stats_get $h1 "$handle" .handle)
+	[[ $h == '"'$handle'"' ]]
+	check_err $? "Qdisc with handle $handle does not exist"
+
+	offloaded=$(qdisc_stats_get $h1 "$handle" .offloaded)
+	[[ $offloaded == true ]]
+	check_fail $? "Qdisc with handle $handle offloaded, but should not be"
+}
+
+check_all_offloaded()
+{
+	local handle=$1; shift
+
+	if [[ ! -z $handle ]]; then
+		local offloaded=$(qdisc_stats_get $h1 "$handle" .offloaded)
+		[[ $offloaded == true ]]
+		check_err $? "Qdisc with handle $handle not offloaded"
+	fi
+
+	local unoffloaded=$(tc q sh dev $h1 invisible |
+				grep -v offloaded |
+				sed s/root/parent\ root/ |
+				cut -d' ' -f 5)
+	[[ -z $unoffloaded ]]
+	check_err $? "Qdiscs with following parents not offloaded: $unoffloaded"
+
+	pre_cleanup
+}
+
+with_ets()
+{
+	local handle=$1; shift
+	local locus=$1; shift
+
+	tc qdisc add dev $h1 $locus handle $handle \
+	   ets bands 8 priomap 7 6 5 4 3 2 1 0
+	"$@"
+	tc qdisc del dev $h1 $locus
+}
+
+with_prio()
+{
+	local handle=$1; shift
+	local locus=$1; shift
+
+	tc qdisc add dev $h1 $locus handle $handle \
+	   prio bands 8 priomap 7 6 5 4 3 2 1 0
+	"$@"
+	tc qdisc del dev $h1 $locus
+}
+
+with_red()
+{
+	local handle=$1; shift
+	local locus=$1; shift
+
+	tc qdisc add dev $h1 $locus handle $handle \
+	   red limit 1000000 min 200000 max 300000 probability 0.5 avpkt 1500
+	"$@"
+	tc qdisc del dev $h1 $locus
+}
+
+with_tbf()
+{
+	local handle=$1; shift
+	local locus=$1; shift
+
+	tc qdisc add dev $h1 $locus handle $handle \
+	   tbf rate 400Mbit burst 128K limit 1M
+	"$@"
+	tc qdisc del dev $h1 $locus
+}
+
+with_pfifo()
+{
+	local handle=$1; shift
+	local locus=$1; shift
+
+	tc qdisc add dev $h1 $locus handle $handle pfifo limit 100K
+	"$@"
+	tc qdisc del dev $h1 $locus
+}
+
+with_bfifo()
+{
+	local handle=$1; shift
+	local locus=$1; shift
+
+	tc qdisc add dev $h1 $locus handle $handle bfifo limit 100K
+	"$@"
+	tc qdisc del dev $h1 $locus
+}
+
+with_drr()
+{
+	local handle=$1; shift
+	local locus=$1; shift
+
+	tc qdisc add dev $h1 $locus handle $handle drr
+	"$@"
+	tc qdisc del dev $h1 $locus
+}
+
+with_qdiscs()
+{
+	local handle=$1; shift
+	local parent=$1; shift
+	local kind=$1; shift
+	local next_handle=$((handle * 2))
+	local locus;
+
+	if [[ $kind == "--" ]]; then
+		local cmd=$1; shift
+		$cmd $(printf %x: $parent) "$@"
+	else
+		if ((parent == 0)); then
+			locus=root
+		else
+			locus=$(printf "parent %x:1" $parent)
+		fi
+
+		with_$kind $(printf %x: $handle) "$locus" \
+			with_qdiscs $next_handle $handle "$@"
+	fi
+}
+
+get_name()
+{
+	local parent=$1; shift
+	local name=$(echo "" "${@^^}" | tr ' ' -)
+
+	if ((parent != 0)); then
+		kind=$(qdisc_stats_get $h1 $parent: .kind)
+		kind=${kind%\"}
+		kind=${kind#\"}
+		name="-${kind^^}$name"
+	fi
+
+	echo root$name
+}
+
+do_test_offloaded()
+{
+	local handle=$1; shift
+	local parent=$1; shift
+
+	RET=0
+	with_qdiscs $handle $parent "$@" -- check_all_offloaded
+	log_test $(get_name $parent "$@")" offloaded"
+}
+
+do_test_nooffload()
+{
+	local handle=$1; shift
+	local parent=$1; shift
+
+	local name=$(echo "${@^^}" | tr ' ' -)
+	local kind
+
+	RET=0
+	with_qdiscs $handle $parent "$@" -- check_not_offloaded
+	log_test $(get_name $parent "$@")" not offloaded"
+}
+
+do_test_combinations()
+{
+	local handle=$1; shift
+	local parent=$1; shift
+
+	local cont
+	local leaf
+	local fifo
+
+	for cont in "" ets prio; do
+		for leaf in "" red tbf "red tbf" "tbf red"; do
+			for fifo in "" pfifo bfifo; do
+				if [[ -z "$cont$leaf$fifo" ]]; then
+					continue
+				fi
+				do_test_offloaded $handle $parent \
+						  $cont $leaf $fifo
+			done
+		done
+	done
+
+	for cont in ets prio; do
+		for leaf in red tbf; do
+			do_test_nooffload $handle $parent $cont red tbf $leaf
+			do_test_nooffload $handle $parent $cont tbf red $leaf
+		done
+		for leaf in "red red" "tbf tbf"; do
+			do_test_nooffload $handle $parent $cont $leaf
+		done
+	done
+
+	do_test_nooffload $handle $parent drr
+}
+
+test_root()
+{
+	do_test_combinations 1 0
+}
+
+test_port_tbf()
+{
+	with_tbf 1: root \
+		do_test_combinations 8 1
+}
+
+do_test_etsprio()
+{
+	local parent=$1; shift
+	local tbfpfx=$1; shift
+	local cont
+
+	for cont in ets prio; do
+		RET=0
+		with_$cont 8: "$parent" \
+			with_red 11: "parent 8:1" \
+			with_red 12: "parent 8:2" \
+			with_tbf 13: "parent 8:3" \
+			with_tbf 14: "parent 8:4" \
+			check_all_offloaded
+		log_test "root$tbfpfx-ETS-{RED,TBF} offloaded"
+
+		RET=0
+		with_$cont 8: "$parent" \
+			with_red 81: "parent 8:1" \
+				with_tbf 811: "parent 81:1" \
+			with_tbf 84: "parent 8:4" \
+				with_red 841: "parent 84:1" \
+			check_all_offloaded
+		log_test "root$tbfpfx-ETS-{RED-TBF,TBF-RED} offloaded"
+
+		RET=0
+		with_$cont 8: "$parent" \
+			with_red 81: "parent 8:1" \
+				with_tbf 811: "parent 81:1" \
+					with_bfifo 8111: "parent 811:1" \
+			with_tbf 82: "parent 8:2" \
+				with_red 821: "parent 82:1" \
+					with_bfifo 8211: "parent 821:1" \
+			check_all_offloaded
+		log_test "root$tbfpfx-ETS-{RED-TBF-bFIFO,TBF-RED-bFIFO} offloaded"
+	done
+}
+
+test_etsprio()
+{
+	do_test_etsprio root ""
+}
+
+test_etsprio_port_tbf()
+{
+	with_tbf 1: root \
+		do_test_etsprio "parent 1:1" "-TBF"
+}
+
+cleanup()
+{
+	tc qdisc del dev $h1 root &>/dev/null
+}
+
+trap cleanup EXIT
+h1=${NETIFS[p1]}
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh
index 33ddd01689be..f260f01db0e8 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh
@@ -73,6 +73,7 @@ CHECK_TC="yes"
 lib_dir=$(dirname $0)/../../../net/forwarding
 source $lib_dir/lib.sh
 source $lib_dir/devlink_lib.sh
+source mlxsw_lib.sh
 source qos_lib.sh
 
 ipaddr()
@@ -331,6 +332,14 @@ get_nmarked()
 	ethtool_stats_get $swp3 ecn_marked
 }
 
+get_qdisc_nmarked()
+{
+	local vlan=$1; shift
+
+	busywait_for_counter 1100 +1 \
+		qdisc_stats_get $swp3 $(get_qdisc_handle $vlan) .marked
+}
+
 get_qdisc_npackets()
 {
 	local vlan=$1; shift
@@ -384,14 +393,15 @@ build_backlog()
 
 check_marking()
 {
+	local get_nmarked=$1; shift
 	local vlan=$1; shift
 	local cond=$1; shift
 
 	local npackets_0=$(get_qdisc_npackets $vlan)
-	local nmarked_0=$(get_nmarked $vlan)
+	local nmarked_0=$($get_nmarked $vlan)
 	sleep 5
 	local npackets_1=$(get_qdisc_npackets $vlan)
-	local nmarked_1=$(get_nmarked $vlan)
+	local nmarked_1=$($get_nmarked $vlan)
 
 	local nmarked_d=$((nmarked_1 - nmarked_0))
 	local npackets_d=$((npackets_1 - npackets_0))
@@ -404,6 +414,7 @@ check_marking()
 ecn_test_common()
 {
 	local name=$1; shift
+	local get_nmarked=$1; shift
 	local vlan=$1; shift
 	local limit=$1; shift
 	local backlog
@@ -416,7 +427,7 @@ ecn_test_common()
 	RET=0
 	backlog=$(build_backlog $vlan $((2 * limit / 3)) udp)
 	check_err $? "Could not build the requested backlog"
-	pct=$(check_marking $vlan "== 0")
+	pct=$(check_marking "$get_nmarked" $vlan "== 0")
 	check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0."
 	log_test "TC $((vlan - 10)): $name backlog < limit"
 
@@ -426,22 +437,23 @@ ecn_test_common()
 	RET=0
 	backlog=$(build_backlog $vlan $((3 * limit / 2)) tcp tos=0x01)
 	check_err $? "Could not build the requested backlog"
-	pct=$(check_marking $vlan ">= 95")
+	pct=$(check_marking "$get_nmarked" $vlan ">= 95")
 	check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected >= 95."
 	log_test "TC $((vlan - 10)): $name backlog > limit"
 }
 
-do_ecn_test()
+__do_ecn_test()
 {
+	local get_nmarked=$1; shift
 	local vlan=$1; shift
 	local limit=$1; shift
-	local name=ECN
+	local name=${1-ECN}; shift
 
 	start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) \
 			  $h3_mac tos=0x01
 	sleep 1
 
-	ecn_test_common "$name" $vlan $limit
+	ecn_test_common "$name" "$get_nmarked" $vlan $limit
 
 	# Up there we saw that UDP gets accepted when backlog is below the
 	# limit. Now that it is above, it should all get dropped, and backlog
@@ -455,6 +467,23 @@ do_ecn_test()
 	sleep 1
 }
 
+do_ecn_test()
+{
+	local vlan=$1; shift
+	local limit=$1; shift
+
+	__do_ecn_test get_nmarked "$vlan" "$limit"
+}
+
+do_ecn_test_perband()
+{
+	local vlan=$1; shift
+	local limit=$1; shift
+
+	mlxsw_only_on_spectrum 3+ || return
+	__do_ecn_test get_qdisc_nmarked "$vlan" "$limit" "per-band ECN"
+}
+
 do_ecn_nodrop_test()
 {
 	local vlan=$1; shift
@@ -465,7 +494,7 @@ do_ecn_nodrop_test()
 			  $h3_mac tos=0x01
 	sleep 1
 
-	ecn_test_common "$name" $vlan $limit
+	ecn_test_common "$name" get_nmarked $vlan $limit
 
 	# Up there we saw that UDP gets accepted when backlog is below the
 	# limit. Now that it is above, in nodrop mode, make sure it goes to
@@ -495,7 +524,7 @@ do_red_test()
 	RET=0
 	backlog=$(build_backlog $vlan $((2 * limit / 3)) tcp tos=0x01)
 	check_err $? "Could not build the requested backlog"
-	pct=$(check_marking $vlan "== 0")
+	pct=$(check_marking get_nmarked $vlan "== 0")
 	check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0."
 	log_test "TC $((vlan - 10)): RED backlog < limit"
 
@@ -503,7 +532,7 @@ do_red_test()
 	RET=0
 	backlog=$(build_backlog $vlan $((3 * limit / 2)) tcp tos=0x01)
 	check_fail $? "Traffic went into backlog instead of being early-dropped"
-	pct=$(check_marking $vlan "== 0")
+	pct=$(check_marking get_nmarked $vlan "== 0")
 	check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0."
 	local diff=$((limit - backlog))
 	pct=$((100 * diff / limit))
@@ -544,6 +573,55 @@ do_mc_backlog_test()
 	log_test "TC $((vlan - 10)): Qdisc reports MC backlog"
 }
 
+do_mark_test()
+{
+	local vlan=$1; shift
+	local limit=$1; shift
+	local subtest=$1; shift
+	local fetch_counter=$1; shift
+	local should_fail=$1; shift
+	local base
+
+	mlxsw_only_on_spectrum 2+ || return
+
+	RET=0
+
+	start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) \
+			  $h3_mac tos=0x01
+
+	# Create a bit of a backlog and observe no mirroring due to marks.
+	qevent_rule_install_$subtest
+
+	build_backlog $vlan $((2 * limit / 3)) tcp tos=0x01 >/dev/null
+
+	base=$($fetch_counter)
+	count=$(busywait 1100 until_counter_is ">= $((base + 1))" \
+		$fetch_counter)
+	check_fail $? "Spurious packets ($base -> $count) observed without buffer pressure"
+
+	# Above limit, everything should be mirrored, we should see lots of
+	# packets.
+	build_backlog $vlan $((3 * limit / 2)) tcp tos=0x01 >/dev/null
+	busywait_for_counter 1100 +10000 \
+		 $fetch_counter > /dev/null
+	check_err_fail "$should_fail" $? "ECN-marked packets $subtest'd"
+
+	# When the rule is uninstalled, there should be no mirroring.
+	qevent_rule_uninstall_$subtest
+	busywait_for_counter 1100 +10 \
+		 $fetch_counter > /dev/null
+	check_fail $? "Spurious packets observed after uninstall"
+
+	if ((should_fail)); then
+		log_test "TC $((vlan - 10)): marked packets not $subtest'd"
+	else
+		log_test "TC $((vlan - 10)): marked packets $subtest'd"
+	fi
+
+	stop_traffic
+	sleep 1
+}
+
 do_drop_test()
 {
 	local vlan=$1; shift
@@ -551,10 +629,10 @@ do_drop_test()
 	local trigger=$1; shift
 	local subtest=$1; shift
 	local fetch_counter=$1; shift
-	local backlog
 	local base
 	local now
-	local pct
+
+	mlxsw_only_on_spectrum 2+ || return
 
 	RET=0
 
@@ -628,6 +706,22 @@ do_drop_mirror_test()
 	tc filter del dev $h2 ingress pref 1 handle 101 flower
 }
 
+do_mark_mirror_test()
+{
+	local vlan=$1; shift
+	local limit=$1; shift
+
+	tc filter add dev $h2 ingress pref 1 handle 101 prot ip \
+	   flower skip_sw ip_proto tcp \
+	   action drop
+
+	do_mark_test "$vlan" "$limit" mirror \
+		     qevent_counter_fetch_mirror \
+		     $(: should_fail=)0
+
+	tc filter del dev $h2 ingress pref 1 handle 101 flower
+}
+
 qevent_rule_install_trap()
 {
 	tc filter add block 10 pref 1234 handle 102 matchall skip_sw \
@@ -655,3 +749,14 @@ do_drop_trap_test()
 	do_drop_test "$vlan" "$limit" "$trap_name" trap \
 		     "qevent_counter_fetch_trap $trap_name"
 }
+
+qevent_rule_install_trap_fwd()
+{
+	tc filter add block 10 pref 1234 handle 102 matchall skip_sw \
+	   action trap_fwd hw_stats disabled
+}
+
+qevent_rule_uninstall_trap_fwd()
+{
+	tc filter del block 10 pref 1234 handle 102 matchall
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh
index f3ef3274f9b3..1e5ad3209436 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh
@@ -4,11 +4,13 @@
 ALL_TESTS="
 	ping_ipv4
 	ecn_test
+	ecn_test_perband
 	ecn_nodrop_test
 	red_test
 	mc_backlog_test
 	red_mirror_test
 	red_trap_test
+	ecn_mirror_test
 "
 : ${QDISC:=ets}
 source sch_red_core.sh
@@ -21,28 +23,60 @@ source sch_red_core.sh
 BACKLOG1=200000
 BACKLOG2=500000
 
-install_qdisc()
+install_root_qdisc()
 {
-	local -a args=("$@")
-
 	tc qdisc add dev $swp3 root handle 10: $QDISC \
 	   bands 8 priomap 7 6 5 4 3 2 1 0
+}
+
+install_qdisc_tc0()
+{
+	local -a args=("$@")
+
 	tc qdisc add dev $swp3 parent 10:8 handle 108: red \
 	   limit 1000000 min $BACKLOG1 max $((BACKLOG1 + 1)) \
 	   probability 1.0 avpkt 8000 burst 38 "${args[@]}"
+}
+
+install_qdisc_tc1()
+{
+	local -a args=("$@")
+
 	tc qdisc add dev $swp3 parent 10:7 handle 107: red \
 	   limit 1000000 min $BACKLOG2 max $((BACKLOG2 + 1)) \
 	   probability 1.0 avpkt 8000 burst 63 "${args[@]}"
+}
+
+install_qdisc()
+{
+	install_root_qdisc
+	install_qdisc_tc0 "$@"
+	install_qdisc_tc1 "$@"
 	sleep 1
 }
 
-uninstall_qdisc()
+uninstall_qdisc_tc0()
 {
-	tc qdisc del dev $swp3 parent 10:7
 	tc qdisc del dev $swp3 parent 10:8
+}
+
+uninstall_qdisc_tc1()
+{
+	tc qdisc del dev $swp3 parent 10:7
+}
+
+uninstall_root_qdisc()
+{
 	tc qdisc del dev $swp3 root
 }
 
+uninstall_qdisc()
+{
+	uninstall_qdisc_tc0
+	uninstall_qdisc_tc1
+	uninstall_root_qdisc
+}
+
 ecn_test()
 {
 	install_qdisc ecn
@@ -53,6 +87,16 @@ ecn_test()
 	uninstall_qdisc
 }
 
+ecn_test_perband()
+{
+	install_qdisc ecn
+
+	do_ecn_test_perband 10 $BACKLOG1
+	do_ecn_test_perband 11 $BACKLOG2
+
+	uninstall_qdisc
+}
+
 ecn_nodrop_test()
 {
 	install_qdisc ecn nodrop
@@ -112,6 +156,16 @@ red_trap_test()
 	uninstall_qdisc
 }
 
+ecn_mirror_test()
+{
+	install_qdisc ecn qevent mark block 10
+
+	do_mark_mirror_test 10 $BACKLOG1
+	do_mark_mirror_test 11 $BACKLOG2
+
+	uninstall_qdisc
+}
+
 trap cleanup EXIT
 
 setup_prepare
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh
index ede9c38d3eff..d79a82f317d2 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh
@@ -4,6 +4,7 @@
 ALL_TESTS="
 	ping_ipv4
 	ecn_test
+	ecn_test_perband
 	ecn_nodrop_test
 	red_test
 	mc_backlog_test
@@ -35,6 +36,13 @@ ecn_test()
 	uninstall_qdisc
 }
 
+ecn_test_perband()
+{
+	install_qdisc ecn
+	do_ecn_test_perband 10 $BACKLOG
+	uninstall_qdisc
+}
+
 ecn_nodrop_test()
 {
 	install_qdisc ecn nodrop
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/devlink_trap_tunnel_ipip6.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/devlink_trap_tunnel_ipip6.sh
new file mode 100755
index 000000000000..f62ce479c266
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/devlink_trap_tunnel_ipip6.sh
@@ -0,0 +1,250 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test devlink-trap tunnel exceptions functionality over mlxsw.
+# Check all exception traps to make sure they are triggered under the right
+# conditions.
+
+# +-------------------------+
+# | H1                      |
+# |               $h1 +     |
+# |  2001:db8:1::1/64 |     |
+# +-------------------|-----+
+#                     |
+# +-------------------|-----+
+# | SW1               |     |
+# |             $swp1 +     |
+# |  2001:db8:1::2/64       |
+# |                         |
+# |  + g1 (ip6gre)          |
+# |    loc=2001:db8:3::1    |
+# |    rem=2001:db8:3::2    |
+# |    tos=inherit          |
+# |                         |
+# |  + $rp1                 |
+# |  | 2001:db8:10::1/64    |
+# +--|----------------------+
+#    |
+# +--|----------------------+
+# |  |                 VRF2 |
+# |  + $rp2                 |
+# |    2001:db8:10::2/64    |
+# +-------------------------+
+
+lib_dir=$(dirname $0)/../../../../net/forwarding
+
+ALL_TESTS="
+	decap_error_test
+"
+
+NUM_NETIFS=4
+source $lib_dir/lib.sh
+source $lib_dir/tc_common.sh
+source $lib_dir/devlink_lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 2001:db8:1::1/64
+}
+
+vrf2_create()
+{
+	simple_if_init $rp2 2001:db8:10::2/64
+}
+
+vrf2_destroy()
+{
+	simple_if_fini $rp2 2001:db8:10::2/64
+}
+
+switch_create()
+{
+	ip link set dev $swp1 up
+	__addr_add_del $swp1 add 2001:db8:1::2/64
+	tc qdisc add dev $swp1 clsact
+
+	tunnel_create g1 ip6gre 2001:db8:3::1 2001:db8:3::2 tos inherit \
+		ttl inherit
+	ip link set dev g1 up
+	__addr_add_del g1 add 2001:db8:3::1/128
+
+	ip link set dev $rp1 up
+	__addr_add_del $rp1 add 2001:db8:10::1/64
+}
+
+switch_destroy()
+{
+	__addr_add_del $rp1 del 2001:db8:10::1/64
+	ip link set dev $rp1 down
+
+	__addr_add_del g1 del 2001:db8:3::1/128
+	ip link set dev g1 down
+	tunnel_destroy g1
+
+	tc qdisc del dev $swp1 clsact
+	__addr_add_del $swp1 del 2001:db8:1::2/64
+	ip link set dev $swp1 down
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	rp1=${NETIFS[p3]}
+	rp2=${NETIFS[p4]}
+
+	forwarding_enable
+	vrf_prepare
+	h1_create
+	switch_create
+	vrf2_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	vrf2_destroy
+	switch_destroy
+	h1_destroy
+	vrf_cleanup
+	forwarding_restore
+}
+
+ipip_payload_get()
+{
+	local saddr="20:01:0d:b8:00:02:00:00:00:00:00:00:00:00:00:01"
+	local daddr="20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01"
+	local flags=$1; shift
+	local key=$1; shift
+
+	p=$(:
+		)"$flags"$(		      : GRE flags
+	        )"0:00:"$(                    : Reserved + version
+		)"86:dd:"$(		      : ETH protocol type
+		)"$key"$( 		      : Key
+		)"6"$(	                      : IP version
+		)"0:0"$(		      : Traffic class
+		)"0:00:00:"$(		      : Flow label
+		)"00:00:"$(                   : Payload length
+		)"3a:"$(                      : Next header
+		)"04:"$(                      : Hop limit
+		)"$saddr:"$(                  : IP saddr
+		)"$daddr:"$(                  : IP daddr
+		)
+	echo $p
+}
+
+ecn_payload_get()
+{
+	echo $(ipip_payload_get "0")
+}
+
+ecn_decap_test()
+{
+	local trap_name="decap_error"
+	local desc=$1; shift
+	local ecn_desc=$1; shift
+	local outer_tos=$1; shift
+	local mz_pid
+
+	RET=0
+
+	tc filter add dev $swp1 egress protocol ipv6 pref 1 handle 101 \
+		flower src_ip 2001:db8:2::1 dst_ip 2001:db8:1::1 skip_sw \
+		action pass
+
+	rp1_mac=$(mac_get $rp1)
+	rp2_mac=$(mac_get $rp2)
+	payload=$(ecn_payload_get)
+
+	ip vrf exec v$rp2 $MZ -6 $rp2 -c 0 -d 1msec -a $rp2_mac -b $rp1_mac \
+		-A 2001:db8:3::2 -B 2001:db8:3::1 -t ip \
+			tos=$outer_tos,next=47,p=$payload -q &
+	mz_pid=$!
+
+	devlink_trap_exception_test $trap_name
+
+	tc_check_packets "dev $swp1 egress" 101 0
+	check_err $? "Packets were not dropped"
+
+	log_test "$desc: Inner ECN is not ECT and outer is $ecn_desc"
+
+	kill $mz_pid && wait $mz_pid &> /dev/null
+	tc filter del dev $swp1 egress protocol ipv6 pref 1 handle 101 flower
+}
+
+no_matching_tunnel_test()
+{
+	local trap_name="decap_error"
+	local desc=$1; shift
+	local sip=$1; shift
+	local mz_pid
+
+	RET=0
+
+	tc filter add dev $swp1 egress protocol ipv6 pref 1 handle 101 \
+		flower src_ip 2001:db8:2::1 dst_ip 2001:db8:1::1 action pass
+
+	rp1_mac=$(mac_get $rp1)
+	rp2_mac=$(mac_get $rp2)
+	payload=$(ipip_payload_get "$@")
+
+	ip vrf exec v$rp2 $MZ -6 $rp2 -c 0 -d 1msec -a $rp2_mac -b $rp1_mac \
+		-A $sip -B 2001:db8:3::1 -t ip next=47,p=$payload -q &
+	mz_pid=$!
+
+	devlink_trap_exception_test $trap_name
+
+	tc_check_packets "dev $swp1 egress" 101 0
+	check_err $? "Packets were not dropped"
+
+	log_test "$desc"
+
+	kill $mz_pid && wait $mz_pid &> /dev/null
+	tc filter del dev $swp1 egress protocol ipv6 pref 1 handle 101 flower
+}
+
+decap_error_test()
+{
+	# Correct source IP - the remote address
+	local sip=2001:db8:3::2
+
+	ecn_decap_test "Decap error" "ECT(1)" 01
+	ecn_decap_test "Decap error" "ECT(0)" 02
+	ecn_decap_test "Decap error" "CE" 03
+
+	no_matching_tunnel_test "Decap error: Source IP check failed" \
+		2001:db8:4::2 "0"
+	no_matching_tunnel_test \
+		"Decap error: Key exists but was not expected" $sip "2" \
+		"00:00:00:E9:"
+
+	# Destroy the tunnel and create new one with key
+	__addr_add_del g1 del 2001:db8:3::1/128
+	tunnel_destroy g1
+
+	tunnel_create g1 ip6gre 2001:db8:3::1 2001:db8:3::2 tos inherit \
+		ttl inherit key 233
+	__addr_add_del g1 add 2001:db8:3::1/128
+
+	no_matching_tunnel_test \
+		"Decap error: Key does not exist but was expected" $sip "0"
+	no_matching_tunnel_test \
+		"Decap error: Packet has a wrong key field" $sip "2" \
+		"00:00:00:E8:"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh
index 50654f8a8c37..e9f65bd2e299 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh
@@ -7,12 +7,9 @@ NUM_NETIFS=6
 source $lib_dir/lib.sh
 source $lib_dir/tc_common.sh
 source $lib_dir/devlink_lib.sh
+source ../mlxsw_lib.sh
 
-if [[ "$DEVLINK_VIDDID" != "15b3:cf6c" && \
-	"$DEVLINK_VIDDID" != "15b3:cf70" ]]; then
-	echo "SKIP: test is tailored for Mellanox Spectrum-2 and Spectrum-3"
-	exit 1
-fi
+mlxsw_only_on_spectrum 2+ || exit 1
 
 current_test=""
 
@@ -28,7 +25,7 @@ cleanup()
 
 trap cleanup EXIT
 
-ALL_TESTS="router tc_flower mirror_gre tc_police port"
+ALL_TESTS="router tc_flower mirror_gre tc_police port rif_mac_profile"
 for current_test in ${TESTS:-$ALL_TESTS}; do
 	RET_FIN=0
 	source ${current_test}_scale.sh
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/rif_mac_profile_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/rif_mac_profile_scale.sh
new file mode 100644
index 000000000000..303d7cbe3c45
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/rif_mac_profile_scale.sh
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+source ../rif_mac_profile_scale.sh
+
+rif_mac_profile_get_target()
+{
+	local should_fail=$1
+	local target
+
+	target=$(devlink_resource_size_get rif_mac_profiles)
+
+	if ((! should_fail)); then
+		echo $target
+	else
+		echo $((target + 1))
+	fi
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_lib_spectrum.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_lib_spectrum.sh
index 73035e25085d..06a80f40daa4 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_lib_spectrum.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_lib_spectrum.sh
@@ -2,11 +2,9 @@
 # SPDX-License-Identifier: GPL-2.0
 
 source "../../../../net/forwarding/devlink_lib.sh"
+source ../mlxsw_lib.sh
 
-if [ "$DEVLINK_VIDDID" != "15b3:cb84" ]; then
-	echo "SKIP: test is tailored for Mellanox Spectrum"
-	exit 1
-fi
+mlxsw_only_on_spectrum 1 || exit 1
 
 # Needed for returning to default
 declare -A KVD_DEFAULTS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
index 685dfb3478b3..bcb110e830ce 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
@@ -22,7 +22,7 @@ cleanup()
 devlink_sp_read_kvd_defaults
 trap cleanup EXIT
 
-ALL_TESTS="router tc_flower mirror_gre tc_police port"
+ALL_TESTS="router tc_flower mirror_gre tc_police port rif_mac_profile"
 for current_test in ${TESTS:-$ALL_TESTS}; do
 	RET_FIN=0
 	source ${current_test}_scale.sh
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/rif_mac_profile_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/rif_mac_profile_scale.sh
new file mode 100644
index 000000000000..303d7cbe3c45
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/rif_mac_profile_scale.sh
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+source ../rif_mac_profile_scale.sh
+
+rif_mac_profile_get_target()
+{
+	local should_fail=$1
+	local target
+
+	target=$(devlink_resource_size_get rif_mac_profiles)
+
+	if ((! should_fail)); then
+		echo $target
+	else
+		echo $((target + 1))
+	fi
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh
index 5ec3beb637c8..0441a18f098b 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh
@@ -20,6 +20,7 @@ NUM_NETIFS=2
 source $lib_dir/tc_common.sh
 source $lib_dir/lib.sh
 source $lib_dir/devlink_lib.sh
+source mlxsw_lib.sh
 
 switch_create()
 {
@@ -169,7 +170,7 @@ matchall_sample_egress_test()
 
 	# It is forbidden in mlxsw driver to have matchall with sample action
 	# bound on egress. Spectrum-1 specific restriction
-	[[ "$DEVLINK_VIDDID" != "15b3:cb84" ]] && return
+	mlxsw_only_on_spectrum 1 || return
 
 	tc qdisc add dev $swp1 clsact
 
diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh
index 373d5f2a846e..83a0210e7544 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh
@@ -51,6 +51,7 @@ NUM_NETIFS=8
 CAPTURE_FILE=$(mktemp)
 source $lib_dir/lib.sh
 source $lib_dir/devlink_lib.sh
+source mlxsw_lib.sh
 
 # Available at https://github.com/Mellanox/libpsample
 require_command psample
@@ -431,7 +432,7 @@ tc_sample_md_out_tc_test()
 	RET=0
 
 	# Output traffic class is not supported on Spectrum-1.
-	[[ "$DEVLINK_VIDDID" == "15b3:cb84" ]] && return
+	mlxsw_only_on_spectrum 2+ || return
 
 	tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
 		skip_sw action sample rate 5 group 1
@@ -477,7 +478,7 @@ tc_sample_md_out_tc_occ_test()
 	RET=0
 
 	# Output traffic class occupancy is not supported on Spectrum-1.
-	[[ "$DEVLINK_VIDDID" == "15b3:cb84" ]] && return
+	mlxsw_only_on_spectrum 2+ || return
 
 	tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
 		skip_sw action sample rate 1024 group 1
@@ -521,7 +522,7 @@ tc_sample_md_latency_test()
 	RET=0
 
 	# Egress sampling not supported on Spectrum-1.
-	[[ "$DEVLINK_VIDDID" == "15b3:cb84" ]] && return
+	mlxsw_only_on_spectrum 2+ || return
 
 	tc filter add dev $rp2 egress protocol all pref 1 handle 101 matchall \
 		skip_sw action sample rate 5 group 1
@@ -550,7 +551,7 @@ tc_sample_acl_group_conflict_test()
 	# port with different groups.
 
 	# Policy-based sampling is not supported on Spectrum-1.
-	[[ "$DEVLINK_VIDDID" == "15b3:cb84" ]] && return
+	mlxsw_only_on_spectrum 2+ || return
 
 	tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \
 		skip_sw action sample rate 1024 group 1
@@ -579,7 +580,7 @@ __tc_sample_acl_rate_test()
 	RET=0
 
 	# Policy-based sampling is not supported on Spectrum-1.
-	[[ "$DEVLINK_VIDDID" == "15b3:cb84" ]] && return
+	mlxsw_only_on_spectrum 2+ || return
 
 	tc filter add dev $port $bind protocol ip pref 1 handle 101 flower \
 		skip_sw dst_ip 198.51.100.1 action sample rate 32 group 1
@@ -631,7 +632,7 @@ tc_sample_acl_max_rate_test()
 	RET=0
 
 	# Policy-based sampling is not supported on Spectrum-1.
-	[[ "$DEVLINK_VIDDID" == "15b3:cb84" ]] && return
+	mlxsw_only_on_spectrum 2+ || return
 
 	tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \
 		skip_sw action sample rate $((2 ** 24 - 1)) group 1
diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh
index 7ca1f030d209..922744059aaa 100644
--- a/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh
@@ -50,7 +50,7 @@ function make_netdev {
 	modprobe netdevsim
     fi
 
-    echo $NSIM_ID > /sys/bus/netdevsim/new_device
+    echo $NSIM_ID $@ > /sys/bus/netdevsim/new_device
     # get new device name
     ls /sys/bus/netdevsim/devices/netdevsim${NSIM_ID}/net/
 }
diff --git a/tools/testing/selftests/drivers/net/netdevsim/tc-mq-visibility.sh b/tools/testing/selftests/drivers/net/netdevsim/tc-mq-visibility.sh
new file mode 100755
index 000000000000..fd13c8cfb7a8
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/tc-mq-visibility.sh
@@ -0,0 +1,77 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only
+
+source ethtool-common.sh
+
+set -o pipefail
+
+n_children() {
+    n=$(tc qdisc show dev $NDEV | grep '^qdisc' | wc -l)
+    echo $((n - 1))
+}
+
+tcq() {
+    tc qdisc $1 dev $NDEV ${@:2}
+}
+
+n_child_assert() {
+    n=$(n_children)
+    if [ $n -ne $1 ]; then
+	echo "ERROR ($root): ${@:2}, expected $1 have $n"
+	((num_errors++))
+    else
+	((num_passes++))
+    fi
+}
+
+
+for root in mq mqprio; do
+    NDEV=$(make_netdev 1 4)
+
+    opts=
+    [ $root == "mqprio" ] && opts='hw 0 num_tc 1 map 0 0 0 0  queues 1@0'
+
+    tcq add root handle 100: $root $opts
+    n_child_assert 4 'Init'
+
+    # All defaults
+
+    for n in 3 2 1 2 3 4 1 4; do
+	ethtool -L $NDEV combined $n
+	n_child_assert $n "Change queues to $n while down"
+    done
+
+    ip link set dev $NDEV up
+
+    for n in 3 2 1 2 3 4 1 4; do
+	ethtool -L $NDEV combined $n
+	n_child_assert $n "Change queues to $n while up"
+    done
+
+    # One real one
+    tcq replace parent 100:4 handle 204: pfifo_fast
+    n_child_assert 4 "One real queue"
+
+    ethtool -L $NDEV combined 1
+    n_child_assert 2 "One real queue, one default"
+
+    ethtool -L $NDEV combined 4
+    n_child_assert 4 "One real queue, rest default"
+
+    # Graft some
+    tcq replace parent 100:1 handle 204:
+    n_child_assert 3 "Grafted"
+
+    ethtool -L $NDEV combined 1
+    n_child_assert 1 "Grafted, one"
+
+    cleanup_nsim
+done
+
+if [ $num_errors -eq 0 ]; then
+    echo "PASSED all $((num_passes)) checks"
+    exit 0
+else
+    echo "FAILED $num_errors/$((num_errors+num_passes)) checks"
+    exit 1
+fi
diff --git a/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh
index f7d84549cc3e..eaf8a04a7ca5 100755
--- a/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh
+++ b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh
@@ -156,6 +156,11 @@ create_tcam_skeleton()
 
 setup_prepare()
 {
+	ip link set $eth0 up
+	ip link set $eth1 up
+	ip link set $eth2 up
+	ip link set $eth3 up
+
 	create_tcam_skeleton $eth0
 
 	ip link add br0 type bridge
@@ -242,9 +247,9 @@ test_vlan_push()
 	tcpdump_cleanup
 }
 
-test_vlan_modify()
+test_vlan_ingress_modify()
 {
-	printf "Testing VLAN modification..		"
+	printf "Testing ingress VLAN modification..		"
 
 	ip link set br0 type bridge vlan_filtering 1
 	bridge vlan add dev $eth0 vid 200
@@ -280,6 +285,44 @@ test_vlan_modify()
 	ip link set br0 type bridge vlan_filtering 0
 }
 
+test_vlan_egress_modify()
+{
+	printf "Testing egress VLAN modification..		"
+
+	tc qdisc add dev $eth1 clsact
+
+	ip link set br0 type bridge vlan_filtering 1
+	bridge vlan add dev $eth0 vid 200
+	bridge vlan add dev $eth1 vid 200
+
+	tc filter add dev $eth1 egress chain $(ES0) pref 3 \
+		protocol 802.1Q flower skip_sw vlan_id 200 vlan_prio 0 \
+		action vlan modify id 300 priority 7
+
+	tcpdump_start $eth2
+
+	$MZ $eth3.200 -q -c 1 -p 64 -a $eth3_mac -b $eth2_mac -t ip
+
+	sleep 1
+
+	tcpdump_stop
+
+	if tcpdump_show | grep -q "$eth3_mac > $eth2_mac, .* vlan 300"; then
+		echo "OK"
+	else
+		echo "FAIL"
+	fi
+
+	tcpdump_cleanup
+
+	tc filter del dev $eth1 egress chain $(ES0) pref 3
+	tc qdisc del dev $eth1 clsact
+
+	bridge vlan del dev $eth0 vid 200
+	bridge vlan del dev $eth1 vid 200
+	ip link set br0 type bridge vlan_filtering 0
+}
+
 test_skbedit_priority()
 {
 	local num_pkts=100
@@ -304,7 +347,8 @@ trap cleanup EXIT
 ALL_TESTS="
 	test_vlan_pop
 	test_vlan_push
-	test_vlan_modify
+	test_vlan_ingress_modify
+	test_vlan_egress_modify
 	test_skbedit_priority
 "
 
diff --git a/tools/testing/selftests/ftrace/ftracetest b/tools/testing/selftests/ftrace/ftracetest
index 8ec1922e974e..c3311c8c4089 100755
--- a/tools/testing/selftests/ftrace/ftracetest
+++ b/tools/testing/selftests/ftrace/ftracetest
@@ -428,7 +428,7 @@ for t in $TEST_CASES; do
     exit 1
   fi
 done
-(cd $TRACING_DIR; initialize_ftrace) # for cleanup
+(cd $TRACING_DIR; finish_ftrace) # for cleanup
 
 prlog ""
 prlog "# of passed: " `echo $PASSED_CASES | wc -w`
diff --git a/tools/testing/selftests/ftrace/test.d/functions b/tools/testing/selftests/ftrace/test.d/functions
index 000fd05e84b1..5f6cbec847fc 100644
--- a/tools/testing/selftests/ftrace/test.d/functions
+++ b/tools/testing/selftests/ftrace/test.d/functions
@@ -124,10 +124,22 @@ initialize_ftrace() { # Reset ftrace to initial-state
     [ -f uprobe_events ] && echo > uprobe_events
     [ -f synthetic_events ] && echo > synthetic_events
     [ -f snapshot ] && echo 0 > snapshot
+
+# Stop tracing while reading the trace file by default, to prevent
+# the test results while checking it and to avoid taking a long time
+# to check the result.
+    [ -f options/pause-on-trace ] && echo 1 > options/pause-on-trace
+
     clear_trace
     enable_tracing
 }
 
+finish_ftrace() {
+    initialize_ftrace
+# And recover it to default.
+    [ -f options/pause-on-trace ] && echo 0 > options/pause-on-trace
+}
+
 check_requires() { # Check required files and tracers
     for i in "$@" ; do
         r=${i%:README}
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-expressions.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-expressions.tc
new file mode 100644
index 000000000000..05ffba299dbf
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-expressions.tc
@@ -0,0 +1,63 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event trigger - test histogram expression parsing
+# requires: set_event events/sched/sched_process_fork/trigger events/sched/sched_process_fork/hist error_log "<var1>=<field|var_ref|numeric_literal>":README
+
+
+fail() { #msg
+    echo $1
+    exit_fail
+}
+
+test_hist_expr() { # test_name expression expected_val
+    trigger="events/sched/sched_process_fork/trigger"
+
+    reset_trigger_file $trigger
+
+    echo "Test hist trigger expressions - $1"
+
+    echo "hist:keys=common_pid:x=$2" > $trigger
+
+    for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done
+
+    actual=`grep -o 'x=[[:digit:]]*' $trigger | awk -F= '{ print $2 }'`
+
+    if [ $actual != $3 ]; then
+        fail "Failed hist trigger expression evaluation: Expression: $2 Expected: $3, Actual: $actual"
+    fi
+
+    reset_trigger_file $trigger
+}
+
+check_error() { # test_name command-with-error-pos-by-^
+    trigger="events/sched/sched_process_fork/trigger"
+
+    echo "Test hist trigger expressions - $1"
+    ftrace_errlog_check 'hist:sched:sched_process_fork' "$2" $trigger
+}
+
+test_hist_expr "Variable assignment" "123" "123"
+
+test_hist_expr "Subtraction not associative" "16-8-4-2" "2"
+
+test_hist_expr "Division not associative" "64/8/4/2" "1"
+
+test_hist_expr "Same precedence operators (+,-) evaluated left to right" "16-8+4+2" "14"
+
+test_hist_expr "Same precedence operators (*,/) evaluated left to right" "4*3/2*2" "12"
+
+test_hist_expr "Multiplication evaluated before addition/subtraction" "4+3*2-2" "8"
+
+test_hist_expr "Division evaluated before addition/subtraction" "4+6/2-2" "5"
+
+# err pos for "too many subexpressions" is dependent on where
+# the last subexpression was detected. This can vary depending
+# on how the expression tree was generated.
+check_error "Too many subexpressions" 'hist:keys=common_pid:x=32+^10*3/20-4'
+check_error "Too many subexpressions" 'hist:keys=common_pid:x=^1+2+3+4+5'
+
+check_error "Unary minus not supported in subexpression" 'hist:keys=common_pid:x=-(^1)+2'
+
+check_error "Division by zero" 'hist:keys=common_pid:x=3/^0'
+
+exit 0
diff --git a/tools/testing/selftests/futex/functional/.gitignore b/tools/testing/selftests/futex/functional/.gitignore
index 0e78b49d0f2f..fbcbdb6963b3 100644
--- a/tools/testing/selftests/futex/functional/.gitignore
+++ b/tools/testing/selftests/futex/functional/.gitignore
@@ -8,3 +8,4 @@ futex_wait_uninitialized_heap
 futex_wait_wouldblock
 futex_wait
 futex_requeue
+futex_waitv
diff --git a/tools/testing/selftests/futex/functional/Makefile b/tools/testing/selftests/futex/functional/Makefile
index bd1fec59e010..5cc38de9d8ea 100644
--- a/tools/testing/selftests/futex/functional/Makefile
+++ b/tools/testing/selftests/futex/functional/Makefile
@@ -17,7 +17,8 @@ TEST_GEN_FILES := \
 	futex_wait_uninitialized_heap \
 	futex_wait_private_mapped_file \
 	futex_wait \
-	futex_requeue
+	futex_requeue \
+	futex_waitv
 
 TEST_PROGS := run.sh
 
diff --git a/tools/testing/selftests/futex/functional/futex_wait_timeout.c b/tools/testing/selftests/futex/functional/futex_wait_timeout.c
index 1f8f6daaf1e7..3651ce17beeb 100644
--- a/tools/testing/selftests/futex/functional/futex_wait_timeout.c
+++ b/tools/testing/selftests/futex/functional/futex_wait_timeout.c
@@ -17,6 +17,7 @@
 
 #include <pthread.h>
 #include "futextest.h"
+#include "futex2test.h"
 #include "logging.h"
 
 #define TEST_NAME "futex-wait-timeout"
@@ -96,6 +97,12 @@ int main(int argc, char *argv[])
 	struct timespec to;
 	pthread_t thread;
 	int c;
+	struct futex_waitv waitv = {
+			.uaddr = (uintptr_t)&f1,
+			.val = f1,
+			.flags = FUTEX_32,
+			.__reserved = 0
+		};
 
 	while ((c = getopt(argc, argv, "cht:v:")) != -1) {
 		switch (c) {
@@ -118,7 +125,7 @@ int main(int argc, char *argv[])
 	}
 
 	ksft_print_header();
-	ksft_set_plan(7);
+	ksft_set_plan(9);
 	ksft_print_msg("%s: Block on a futex and wait for timeout\n",
 	       basename(argv[0]));
 	ksft_print_msg("\tArguments: timeout=%ldns\n", timeout_ns);
@@ -175,6 +182,18 @@ int main(int argc, char *argv[])
 	res = futex_lock_pi(&futex_pi, NULL, 0, FUTEX_CLOCK_REALTIME);
 	test_timeout(res, &ret, "futex_lock_pi invalid timeout flag", ENOSYS);
 
+	/* futex_waitv with CLOCK_MONOTONIC */
+	if (futex_get_abs_timeout(CLOCK_MONOTONIC, &to, timeout_ns))
+		return RET_FAIL;
+	res = futex_waitv(&waitv, 1, 0, &to, CLOCK_MONOTONIC);
+	test_timeout(res, &ret, "futex_waitv monotonic", ETIMEDOUT);
+
+	/* futex_waitv with CLOCK_REALTIME */
+	if (futex_get_abs_timeout(CLOCK_REALTIME, &to, timeout_ns))
+		return RET_FAIL;
+	res = futex_waitv(&waitv, 1, 0, &to, CLOCK_REALTIME);
+	test_timeout(res, &ret, "futex_waitv realtime", ETIMEDOUT);
+
 	ksft_print_cnts();
 	return ret;
 }
diff --git a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c
index 0ae390ff8164..7d7a6a06cdb7 100644
--- a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c
+++ b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c
@@ -22,6 +22,7 @@
 #include <string.h>
 #include <time.h>
 #include "futextest.h"
+#include "futex2test.h"
 #include "logging.h"
 
 #define TEST_NAME "futex-wait-wouldblock"
@@ -42,6 +43,12 @@ int main(int argc, char *argv[])
 	futex_t f1 = FUTEX_INITIALIZER;
 	int res, ret = RET_PASS;
 	int c;
+	struct futex_waitv waitv = {
+			.uaddr = (uintptr_t)&f1,
+			.val = f1+1,
+			.flags = FUTEX_32,
+			.__reserved = 0
+		};
 
 	while ((c = getopt(argc, argv, "cht:v:")) != -1) {
 		switch (c) {
@@ -61,18 +68,44 @@ int main(int argc, char *argv[])
 	}
 
 	ksft_print_header();
-	ksft_set_plan(1);
+	ksft_set_plan(2);
 	ksft_print_msg("%s: Test the unexpected futex value in FUTEX_WAIT\n",
 	       basename(argv[0]));
 
 	info("Calling futex_wait on f1: %u @ %p with val=%u\n", f1, &f1, f1+1);
 	res = futex_wait(&f1, f1+1, &to, FUTEX_PRIVATE_FLAG);
 	if (!res || errno != EWOULDBLOCK) {
-		fail("futex_wait returned: %d %s\n",
-		     res ? errno : res, res ? strerror(errno) : "");
+		ksft_test_result_fail("futex_wait returned: %d %s\n",
+				      res ? errno : res,
+				      res ? strerror(errno) : "");
 		ret = RET_FAIL;
+	} else {
+		ksft_test_result_pass("futex_wait\n");
 	}
 
-	print_result(TEST_NAME, ret);
+	if (clock_gettime(CLOCK_MONOTONIC, &to)) {
+		error("clock_gettime failed\n", errno);
+		return errno;
+	}
+
+	to.tv_nsec += timeout_ns;
+
+	if (to.tv_nsec >= 1000000000) {
+		to.tv_sec++;
+		to.tv_nsec -= 1000000000;
+	}
+
+	info("Calling futex_waitv on f1: %u @ %p with val=%u\n", f1, &f1, f1+1);
+	res = futex_waitv(&waitv, 1, 0, &to, CLOCK_MONOTONIC);
+	if (!res || errno != EWOULDBLOCK) {
+		ksft_test_result_pass("futex_waitv returned: %d %s\n",
+				      res ? errno : res,
+				      res ? strerror(errno) : "");
+		ret = RET_FAIL;
+	} else {
+		ksft_test_result_pass("futex_waitv\n");
+	}
+
+	ksft_print_cnts();
 	return ret;
 }
diff --git a/tools/testing/selftests/futex/functional/futex_waitv.c b/tools/testing/selftests/futex/functional/futex_waitv.c
new file mode 100644
index 000000000000..a94337f677e1
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/futex_waitv.c
@@ -0,0 +1,237 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * futex_waitv() test by André Almeida <andrealmeid@collabora.com>
+ *
+ * Copyright 2021 Collabora Ltd.
+ */
+
+#include <errno.h>
+#include <error.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <pthread.h>
+#include <stdint.h>
+#include <sys/shm.h>
+#include "futextest.h"
+#include "futex2test.h"
+#include "logging.h"
+
+#define TEST_NAME "futex-wait"
+#define WAKE_WAIT_US 10000
+#define NR_FUTEXES 30
+static struct futex_waitv waitv[NR_FUTEXES];
+u_int32_t futexes[NR_FUTEXES] = {0};
+
+void usage(char *prog)
+{
+	printf("Usage: %s\n", prog);
+	printf("  -c	Use color\n");
+	printf("  -h	Display this help message\n");
+	printf("  -v L	Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
+	       VQUIET, VCRITICAL, VINFO);
+}
+
+void *waiterfn(void *arg)
+{
+	struct timespec to;
+	int res;
+
+	/* setting absolute timeout for futex2 */
+	if (clock_gettime(CLOCK_MONOTONIC, &to))
+		error("gettime64 failed\n", errno);
+
+	to.tv_sec++;
+
+	res = futex_waitv(waitv, NR_FUTEXES, 0, &to, CLOCK_MONOTONIC);
+	if (res < 0) {
+		ksft_test_result_fail("futex_waitv returned: %d %s\n",
+				      errno, strerror(errno));
+	} else if (res != NR_FUTEXES - 1) {
+		ksft_test_result_fail("futex_waitv returned: %d, expecting %d\n",
+				      res, NR_FUTEXES - 1);
+	}
+
+	return NULL;
+}
+
+int main(int argc, char *argv[])
+{
+	pthread_t waiter;
+	int res, ret = RET_PASS;
+	struct timespec to;
+	int c, i;
+
+	while ((c = getopt(argc, argv, "cht:v:")) != -1) {
+		switch (c) {
+		case 'c':
+			log_color(1);
+			break;
+		case 'h':
+			usage(basename(argv[0]));
+			exit(0);
+		case 'v':
+			log_verbosity(atoi(optarg));
+			break;
+		default:
+			usage(basename(argv[0]));
+			exit(1);
+		}
+	}
+
+	ksft_print_header();
+	ksft_set_plan(7);
+	ksft_print_msg("%s: Test FUTEX_WAITV\n",
+		       basename(argv[0]));
+
+	for (i = 0; i < NR_FUTEXES; i++) {
+		waitv[i].uaddr = (uintptr_t)&futexes[i];
+		waitv[i].flags = FUTEX_32 | FUTEX_PRIVATE_FLAG;
+		waitv[i].val = 0;
+		waitv[i].__reserved = 0;
+	}
+
+	/* Private waitv */
+	if (pthread_create(&waiter, NULL, waiterfn, NULL))
+		error("pthread_create failed\n", errno);
+
+	usleep(WAKE_WAIT_US);
+
+	res = futex_wake(u64_to_ptr(waitv[NR_FUTEXES - 1].uaddr), 1, FUTEX_PRIVATE_FLAG);
+	if (res != 1) {
+		ksft_test_result_fail("futex_wake private returned: %d %s\n",
+				      res ? errno : res,
+				      res ? strerror(errno) : "");
+		ret = RET_FAIL;
+	} else {
+		ksft_test_result_pass("futex_waitv private\n");
+	}
+
+	/* Shared waitv */
+	for (i = 0; i < NR_FUTEXES; i++) {
+		int shm_id = shmget(IPC_PRIVATE, 4096, IPC_CREAT | 0666);
+
+		if (shm_id < 0) {
+			perror("shmget");
+			exit(1);
+		}
+
+		unsigned int *shared_data = shmat(shm_id, NULL, 0);
+
+		*shared_data = 0;
+		waitv[i].uaddr = (uintptr_t)shared_data;
+		waitv[i].flags = FUTEX_32;
+		waitv[i].val = 0;
+		waitv[i].__reserved = 0;
+	}
+
+	if (pthread_create(&waiter, NULL, waiterfn, NULL))
+		error("pthread_create failed\n", errno);
+
+	usleep(WAKE_WAIT_US);
+
+	res = futex_wake(u64_to_ptr(waitv[NR_FUTEXES - 1].uaddr), 1, 0);
+	if (res != 1) {
+		ksft_test_result_fail("futex_wake shared returned: %d %s\n",
+				      res ? errno : res,
+				      res ? strerror(errno) : "");
+		ret = RET_FAIL;
+	} else {
+		ksft_test_result_pass("futex_waitv shared\n");
+	}
+
+	for (i = 0; i < NR_FUTEXES; i++)
+		shmdt(u64_to_ptr(waitv[i].uaddr));
+
+	/* Testing a waiter without FUTEX_32 flag */
+	waitv[0].flags = FUTEX_PRIVATE_FLAG;
+
+	if (clock_gettime(CLOCK_MONOTONIC, &to))
+		error("gettime64 failed\n", errno);
+
+	to.tv_sec++;
+
+	res = futex_waitv(waitv, NR_FUTEXES, 0, &to, CLOCK_MONOTONIC);
+	if (res == EINVAL) {
+		ksft_test_result_fail("futex_waitv private returned: %d %s\n",
+				      res ? errno : res,
+				      res ? strerror(errno) : "");
+		ret = RET_FAIL;
+	} else {
+		ksft_test_result_pass("futex_waitv without FUTEX_32\n");
+	}
+
+	/* Testing a waiter with an unaligned address */
+	waitv[0].flags = FUTEX_PRIVATE_FLAG | FUTEX_32;
+	waitv[0].uaddr = 1;
+
+	if (clock_gettime(CLOCK_MONOTONIC, &to))
+		error("gettime64 failed\n", errno);
+
+	to.tv_sec++;
+
+	res = futex_waitv(waitv, NR_FUTEXES, 0, &to, CLOCK_MONOTONIC);
+	if (res == EINVAL) {
+		ksft_test_result_fail("futex_wake private returned: %d %s\n",
+				      res ? errno : res,
+				      res ? strerror(errno) : "");
+		ret = RET_FAIL;
+	} else {
+		ksft_test_result_pass("futex_waitv with an unaligned address\n");
+	}
+
+	/* Testing a NULL address for waiters.uaddr */
+	waitv[0].uaddr = 0x00000000;
+
+	if (clock_gettime(CLOCK_MONOTONIC, &to))
+		error("gettime64 failed\n", errno);
+
+	to.tv_sec++;
+
+	res = futex_waitv(waitv, NR_FUTEXES, 0, &to, CLOCK_MONOTONIC);
+	if (res == EINVAL) {
+		ksft_test_result_fail("futex_waitv private returned: %d %s\n",
+				      res ? errno : res,
+				      res ? strerror(errno) : "");
+		ret = RET_FAIL;
+	} else {
+		ksft_test_result_pass("futex_waitv NULL address in waitv.uaddr\n");
+	}
+
+	/* Testing a NULL address for *waiters */
+	if (clock_gettime(CLOCK_MONOTONIC, &to))
+		error("gettime64 failed\n", errno);
+
+	to.tv_sec++;
+
+	res = futex_waitv(NULL, NR_FUTEXES, 0, &to, CLOCK_MONOTONIC);
+	if (res == EINVAL) {
+		ksft_test_result_fail("futex_waitv private returned: %d %s\n",
+				      res ? errno : res,
+				      res ? strerror(errno) : "");
+		ret = RET_FAIL;
+	} else {
+		ksft_test_result_pass("futex_waitv NULL address in *waiters\n");
+	}
+
+	/* Testing an invalid clockid */
+	if (clock_gettime(CLOCK_MONOTONIC, &to))
+		error("gettime64 failed\n", errno);
+
+	to.tv_sec++;
+
+	res = futex_waitv(NULL, NR_FUTEXES, 0, &to, CLOCK_TAI);
+	if (res == EINVAL) {
+		ksft_test_result_fail("futex_waitv private returned: %d %s\n",
+				      res ? errno : res,
+				      res ? strerror(errno) : "");
+		ret = RET_FAIL;
+	} else {
+		ksft_test_result_pass("futex_waitv invalid clockid\n");
+	}
+
+	ksft_print_cnts();
+	return ret;
+}
diff --git a/tools/testing/selftests/futex/functional/run.sh b/tools/testing/selftests/futex/functional/run.sh
index 11a9d62290f5..5ccd599da6c3 100755
--- a/tools/testing/selftests/futex/functional/run.sh
+++ b/tools/testing/selftests/futex/functional/run.sh
@@ -79,3 +79,6 @@ echo
 
 echo
 ./futex_requeue $COLOR
+
+echo
+./futex_waitv $COLOR
diff --git a/tools/testing/selftests/futex/include/futex2test.h b/tools/testing/selftests/futex/include/futex2test.h
new file mode 100644
index 000000000000..9d305520e849
--- /dev/null
+++ b/tools/testing/selftests/futex/include/futex2test.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Futex2 library addons for futex tests
+ *
+ * Copyright 2021 Collabora Ltd.
+ */
+#include <stdint.h>
+
+#define u64_to_ptr(x) ((void *)(uintptr_t)(x))
+
+/**
+ * futex_waitv - Wait at multiple futexes, wake on any
+ * @waiters:    Array of waiters
+ * @nr_waiters: Length of waiters array
+ * @flags: Operation flags
+ * @timo:  Optional timeout for operation
+ */
+static inline int futex_waitv(volatile struct futex_waitv *waiters, unsigned long nr_waiters,
+			      unsigned long flags, struct timespec *timo, clockid_t clockid)
+{
+	return syscall(__NR_futex_waitv, waiters, nr_waiters, flags, timo, clockid);
+}
diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index b8dbabe24ac2..d4a830139683 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -1,4 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0-only
+/aarch64/arch_timer
 /aarch64/debug-exceptions
 /aarch64/get-reg-list
 /aarch64/psci_cpu_on_test
@@ -12,6 +13,7 @@
 /x86_64/emulator_error_test
 /x86_64/get_cpuid_test
 /x86_64/get_msr_index_features
+/x86_64/kvm_clock_test
 /x86_64/kvm_pv_test
 /x86_64/hyperv_clock
 /x86_64/hyperv_cpuid
@@ -53,3 +55,4 @@
 /set_memory_region_test
 /steal_time
 /kvm_binary_stats_test
+/system_counter_offset_test
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index d1774f461393..c23e89dea0b6 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -35,7 +35,7 @@ endif
 
 LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/rbtree.c lib/sparsebit.c lib/test_util.c lib/guest_modes.c lib/perf_test_util.c
 LIBKVM_x86_64 = lib/x86_64/apic.c lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c lib/x86_64/handlers.S
-LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c lib/aarch64/handlers.S
+LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c lib/aarch64/handlers.S lib/aarch64/spinlock.c lib/aarch64/gic.c lib/aarch64/gic_v3.c lib/aarch64/vgic.c
 LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c lib/s390x/diag318_test_handler.c
 
 TEST_GEN_PROGS_x86_64 = x86_64/cr4_cpuid_sync_test
@@ -46,6 +46,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/get_cpuid_test
 TEST_GEN_PROGS_x86_64 += x86_64/hyperv_clock
 TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid
 TEST_GEN_PROGS_x86_64 += x86_64/hyperv_features
+TEST_GEN_PROGS_x86_64 += x86_64/kvm_clock_test
 TEST_GEN_PROGS_x86_64 += x86_64/kvm_pv_test
 TEST_GEN_PROGS_x86_64 += x86_64/mmio_warning_test
 TEST_GEN_PROGS_x86_64 += x86_64/mmu_role_test
@@ -85,7 +86,9 @@ TEST_GEN_PROGS_x86_64 += rseq_test
 TEST_GEN_PROGS_x86_64 += set_memory_region_test
 TEST_GEN_PROGS_x86_64 += steal_time
 TEST_GEN_PROGS_x86_64 += kvm_binary_stats_test
+TEST_GEN_PROGS_x86_64 += system_counter_offset_test
 
+TEST_GEN_PROGS_aarch64 += aarch64/arch_timer
 TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions
 TEST_GEN_PROGS_aarch64 += aarch64/get-reg-list
 TEST_GEN_PROGS_aarch64 += aarch64/psci_cpu_on_test
@@ -95,6 +98,8 @@ TEST_GEN_PROGS_aarch64 += dirty_log_test
 TEST_GEN_PROGS_aarch64 += dirty_log_perf_test
 TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus
 TEST_GEN_PROGS_aarch64 += kvm_page_table_test
+TEST_GEN_PROGS_aarch64 += memslot_modification_stress_test
+TEST_GEN_PROGS_aarch64 += memslot_perf_test
 TEST_GEN_PROGS_aarch64 += rseq_test
 TEST_GEN_PROGS_aarch64 += set_memory_region_test
 TEST_GEN_PROGS_aarch64 += steal_time
diff --git a/tools/testing/selftests/kvm/aarch64/arch_timer.c b/tools/testing/selftests/kvm/aarch64/arch_timer.c
new file mode 100644
index 000000000000..bf6a45b0b8dc
--- /dev/null
+++ b/tools/testing/selftests/kvm/aarch64/arch_timer.c
@@ -0,0 +1,479 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * arch_timer.c - Tests the aarch64 timer IRQ functionality
+ *
+ * The test validates both the virtual and physical timer IRQs using
+ * CVAL and TVAL registers. This consitutes the four stages in the test.
+ * The guest's main thread configures the timer interrupt for a stage
+ * and waits for it to fire, with a timeout equal to the timer period.
+ * It asserts that the timeout doesn't exceed the timer period.
+ *
+ * On the other hand, upon receipt of an interrupt, the guest's interrupt
+ * handler validates the interrupt by checking if the architectural state
+ * is in compliance with the specifications.
+ *
+ * The test provides command-line options to configure the timer's
+ * period (-p), number of vCPUs (-n), and iterations per stage (-i).
+ * To stress-test the timer stack even more, an option to migrate the
+ * vCPUs across pCPUs (-m), at a particular rate, is also provided.
+ *
+ * Copyright (c) 2021, Google LLC.
+ */
+
+#define _GNU_SOURCE
+
+#include <stdlib.h>
+#include <pthread.h>
+#include <linux/kvm.h>
+#include <linux/sizes.h>
+#include <linux/bitmap.h>
+#include <sys/sysinfo.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "delay.h"
+#include "arch_timer.h"
+#include "gic.h"
+#include "vgic.h"
+
+#define NR_VCPUS_DEF			4
+#define NR_TEST_ITERS_DEF		5
+#define TIMER_TEST_PERIOD_MS_DEF	10
+#define TIMER_TEST_ERR_MARGIN_US	100
+#define TIMER_TEST_MIGRATION_FREQ_MS	2
+
+struct test_args {
+	int nr_vcpus;
+	int nr_iter;
+	int timer_period_ms;
+	int migration_freq_ms;
+};
+
+static struct test_args test_args = {
+	.nr_vcpus = NR_VCPUS_DEF,
+	.nr_iter = NR_TEST_ITERS_DEF,
+	.timer_period_ms = TIMER_TEST_PERIOD_MS_DEF,
+	.migration_freq_ms = TIMER_TEST_MIGRATION_FREQ_MS,
+};
+
+#define msecs_to_usecs(msec)		((msec) * 1000LL)
+
+#define GICD_BASE_GPA			0x8000000ULL
+#define GICR_BASE_GPA			0x80A0000ULL
+
+enum guest_stage {
+	GUEST_STAGE_VTIMER_CVAL = 1,
+	GUEST_STAGE_VTIMER_TVAL,
+	GUEST_STAGE_PTIMER_CVAL,
+	GUEST_STAGE_PTIMER_TVAL,
+	GUEST_STAGE_MAX,
+};
+
+/* Shared variables between host and guest */
+struct test_vcpu_shared_data {
+	int nr_iter;
+	enum guest_stage guest_stage;
+	uint64_t xcnt;
+};
+
+struct test_vcpu {
+	uint32_t vcpuid;
+	pthread_t pt_vcpu_run;
+	struct kvm_vm *vm;
+};
+
+static struct test_vcpu test_vcpu[KVM_MAX_VCPUS];
+static struct test_vcpu_shared_data vcpu_shared_data[KVM_MAX_VCPUS];
+
+static int vtimer_irq, ptimer_irq;
+
+static unsigned long *vcpu_done_map;
+static pthread_mutex_t vcpu_done_map_lock;
+
+static void
+guest_configure_timer_action(struct test_vcpu_shared_data *shared_data)
+{
+	switch (shared_data->guest_stage) {
+	case GUEST_STAGE_VTIMER_CVAL:
+		timer_set_next_cval_ms(VIRTUAL, test_args.timer_period_ms);
+		shared_data->xcnt = timer_get_cntct(VIRTUAL);
+		timer_set_ctl(VIRTUAL, CTL_ENABLE);
+		break;
+	case GUEST_STAGE_VTIMER_TVAL:
+		timer_set_next_tval_ms(VIRTUAL, test_args.timer_period_ms);
+		shared_data->xcnt = timer_get_cntct(VIRTUAL);
+		timer_set_ctl(VIRTUAL, CTL_ENABLE);
+		break;
+	case GUEST_STAGE_PTIMER_CVAL:
+		timer_set_next_cval_ms(PHYSICAL, test_args.timer_period_ms);
+		shared_data->xcnt = timer_get_cntct(PHYSICAL);
+		timer_set_ctl(PHYSICAL, CTL_ENABLE);
+		break;
+	case GUEST_STAGE_PTIMER_TVAL:
+		timer_set_next_tval_ms(PHYSICAL, test_args.timer_period_ms);
+		shared_data->xcnt = timer_get_cntct(PHYSICAL);
+		timer_set_ctl(PHYSICAL, CTL_ENABLE);
+		break;
+	default:
+		GUEST_ASSERT(0);
+	}
+}
+
+static void guest_validate_irq(unsigned int intid,
+				struct test_vcpu_shared_data *shared_data)
+{
+	enum guest_stage stage = shared_data->guest_stage;
+	uint64_t xcnt = 0, xcnt_diff_us, cval = 0;
+	unsigned long xctl = 0;
+	unsigned int timer_irq = 0;
+
+	if (stage == GUEST_STAGE_VTIMER_CVAL ||
+		stage == GUEST_STAGE_VTIMER_TVAL) {
+		xctl = timer_get_ctl(VIRTUAL);
+		timer_set_ctl(VIRTUAL, CTL_IMASK);
+		xcnt = timer_get_cntct(VIRTUAL);
+		cval = timer_get_cval(VIRTUAL);
+		timer_irq = vtimer_irq;
+	} else if (stage == GUEST_STAGE_PTIMER_CVAL ||
+		stage == GUEST_STAGE_PTIMER_TVAL) {
+		xctl = timer_get_ctl(PHYSICAL);
+		timer_set_ctl(PHYSICAL, CTL_IMASK);
+		xcnt = timer_get_cntct(PHYSICAL);
+		cval = timer_get_cval(PHYSICAL);
+		timer_irq = ptimer_irq;
+	} else {
+		GUEST_ASSERT(0);
+	}
+
+	xcnt_diff_us = cycles_to_usec(xcnt - shared_data->xcnt);
+
+	/* Make sure we are dealing with the correct timer IRQ */
+	GUEST_ASSERT_2(intid == timer_irq, intid, timer_irq);
+
+	/* Basic 'timer condition met' check */
+	GUEST_ASSERT_3(xcnt >= cval, xcnt, cval, xcnt_diff_us);
+	GUEST_ASSERT_1(xctl & CTL_ISTATUS, xctl);
+}
+
+static void guest_irq_handler(struct ex_regs *regs)
+{
+	unsigned int intid = gic_get_and_ack_irq();
+	uint32_t cpu = guest_get_vcpuid();
+	struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu];
+
+	guest_validate_irq(intid, shared_data);
+
+	WRITE_ONCE(shared_data->nr_iter, shared_data->nr_iter + 1);
+
+	gic_set_eoi(intid);
+}
+
+static void guest_run_stage(struct test_vcpu_shared_data *shared_data,
+				enum guest_stage stage)
+{
+	uint32_t irq_iter, config_iter;
+
+	shared_data->guest_stage = stage;
+	shared_data->nr_iter = 0;
+
+	for (config_iter = 0; config_iter < test_args.nr_iter; config_iter++) {
+		/* Setup the next interrupt */
+		guest_configure_timer_action(shared_data);
+
+		/* Setup a timeout for the interrupt to arrive */
+		udelay(msecs_to_usecs(test_args.timer_period_ms) +
+			TIMER_TEST_ERR_MARGIN_US);
+
+		irq_iter = READ_ONCE(shared_data->nr_iter);
+		GUEST_ASSERT_2(config_iter + 1 == irq_iter,
+				config_iter + 1, irq_iter);
+	}
+}
+
+static void guest_code(void)
+{
+	uint32_t cpu = guest_get_vcpuid();
+	struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu];
+
+	local_irq_disable();
+
+	gic_init(GIC_V3, test_args.nr_vcpus,
+		(void *)GICD_BASE_GPA, (void *)GICR_BASE_GPA);
+
+	timer_set_ctl(VIRTUAL, CTL_IMASK);
+	timer_set_ctl(PHYSICAL, CTL_IMASK);
+
+	gic_irq_enable(vtimer_irq);
+	gic_irq_enable(ptimer_irq);
+	local_irq_enable();
+
+	guest_run_stage(shared_data, GUEST_STAGE_VTIMER_CVAL);
+	guest_run_stage(shared_data, GUEST_STAGE_VTIMER_TVAL);
+	guest_run_stage(shared_data, GUEST_STAGE_PTIMER_CVAL);
+	guest_run_stage(shared_data, GUEST_STAGE_PTIMER_TVAL);
+
+	GUEST_DONE();
+}
+
+static void *test_vcpu_run(void *arg)
+{
+	struct ucall uc;
+	struct test_vcpu *vcpu = arg;
+	struct kvm_vm *vm = vcpu->vm;
+	uint32_t vcpuid = vcpu->vcpuid;
+	struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[vcpuid];
+
+	vcpu_run(vm, vcpuid);
+
+	/* Currently, any exit from guest is an indication of completion */
+	pthread_mutex_lock(&vcpu_done_map_lock);
+	set_bit(vcpuid, vcpu_done_map);
+	pthread_mutex_unlock(&vcpu_done_map_lock);
+
+	switch (get_ucall(vm, vcpuid, &uc)) {
+	case UCALL_SYNC:
+	case UCALL_DONE:
+		break;
+	case UCALL_ABORT:
+		sync_global_from_guest(vm, *shared_data);
+		TEST_FAIL("%s at %s:%ld\n\tvalues: %lu, %lu; %lu, vcpu: %u; stage: %u; iter: %u",
+			(const char *)uc.args[0], __FILE__, uc.args[1],
+			uc.args[2], uc.args[3], uc.args[4], vcpuid,
+			shared_data->guest_stage, shared_data->nr_iter);
+		break;
+	default:
+		TEST_FAIL("Unexpected guest exit\n");
+	}
+
+	return NULL;
+}
+
+static uint32_t test_get_pcpu(void)
+{
+	uint32_t pcpu;
+	unsigned int nproc_conf;
+	cpu_set_t online_cpuset;
+
+	nproc_conf = get_nprocs_conf();
+	sched_getaffinity(0, sizeof(cpu_set_t), &online_cpuset);
+
+	/* Randomly find an available pCPU to place a vCPU on */
+	do {
+		pcpu = rand() % nproc_conf;
+	} while (!CPU_ISSET(pcpu, &online_cpuset));
+
+	return pcpu;
+}
+
+static int test_migrate_vcpu(struct test_vcpu *vcpu)
+{
+	int ret;
+	cpu_set_t cpuset;
+	uint32_t new_pcpu = test_get_pcpu();
+
+	CPU_ZERO(&cpuset);
+	CPU_SET(new_pcpu, &cpuset);
+
+	pr_debug("Migrating vCPU: %u to pCPU: %u\n", vcpu->vcpuid, new_pcpu);
+
+	ret = pthread_setaffinity_np(vcpu->pt_vcpu_run,
+					sizeof(cpuset), &cpuset);
+
+	/* Allow the error where the vCPU thread is already finished */
+	TEST_ASSERT(ret == 0 || ret == ESRCH,
+			"Failed to migrate the vCPU:%u to pCPU: %u; ret: %d\n",
+			vcpu->vcpuid, new_pcpu, ret);
+
+	return ret;
+}
+
+static void *test_vcpu_migration(void *arg)
+{
+	unsigned int i, n_done;
+	bool vcpu_done;
+
+	do {
+		usleep(msecs_to_usecs(test_args.migration_freq_ms));
+
+		for (n_done = 0, i = 0; i < test_args.nr_vcpus; i++) {
+			pthread_mutex_lock(&vcpu_done_map_lock);
+			vcpu_done = test_bit(i, vcpu_done_map);
+			pthread_mutex_unlock(&vcpu_done_map_lock);
+
+			if (vcpu_done) {
+				n_done++;
+				continue;
+			}
+
+			test_migrate_vcpu(&test_vcpu[i]);
+		}
+	} while (test_args.nr_vcpus != n_done);
+
+	return NULL;
+}
+
+static void test_run(struct kvm_vm *vm)
+{
+	int i, ret;
+	pthread_t pt_vcpu_migration;
+
+	pthread_mutex_init(&vcpu_done_map_lock, NULL);
+	vcpu_done_map = bitmap_zalloc(test_args.nr_vcpus);
+	TEST_ASSERT(vcpu_done_map, "Failed to allocate vcpu done bitmap\n");
+
+	for (i = 0; i < test_args.nr_vcpus; i++) {
+		ret = pthread_create(&test_vcpu[i].pt_vcpu_run, NULL,
+				test_vcpu_run, &test_vcpu[i]);
+		TEST_ASSERT(!ret, "Failed to create vCPU-%d pthread\n", i);
+	}
+
+	/* Spawn a thread to control the vCPU migrations */
+	if (test_args.migration_freq_ms) {
+		srand(time(NULL));
+
+		ret = pthread_create(&pt_vcpu_migration, NULL,
+					test_vcpu_migration, NULL);
+		TEST_ASSERT(!ret, "Failed to create the migration pthread\n");
+	}
+
+
+	for (i = 0; i < test_args.nr_vcpus; i++)
+		pthread_join(test_vcpu[i].pt_vcpu_run, NULL);
+
+	if (test_args.migration_freq_ms)
+		pthread_join(pt_vcpu_migration, NULL);
+
+	bitmap_free(vcpu_done_map);
+}
+
+static void test_init_timer_irq(struct kvm_vm *vm)
+{
+	/* Timer initid should be same for all the vCPUs, so query only vCPU-0 */
+	int vcpu0_fd = vcpu_get_fd(vm, 0);
+
+	kvm_device_access(vcpu0_fd, KVM_ARM_VCPU_TIMER_CTRL,
+			KVM_ARM_VCPU_TIMER_IRQ_PTIMER, &ptimer_irq, false);
+	kvm_device_access(vcpu0_fd, KVM_ARM_VCPU_TIMER_CTRL,
+			KVM_ARM_VCPU_TIMER_IRQ_VTIMER, &vtimer_irq, false);
+
+	sync_global_to_guest(vm, ptimer_irq);
+	sync_global_to_guest(vm, vtimer_irq);
+
+	pr_debug("ptimer_irq: %d; vtimer_irq: %d\n", ptimer_irq, vtimer_irq);
+}
+
+static struct kvm_vm *test_vm_create(void)
+{
+	struct kvm_vm *vm;
+	unsigned int i;
+	int nr_vcpus = test_args.nr_vcpus;
+
+	vm = vm_create_default_with_vcpus(nr_vcpus, 0, 0, guest_code, NULL);
+
+	vm_init_descriptor_tables(vm);
+	vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT, guest_irq_handler);
+
+	for (i = 0; i < nr_vcpus; i++) {
+		vcpu_init_descriptor_tables(vm, i);
+
+		test_vcpu[i].vcpuid = i;
+		test_vcpu[i].vm = vm;
+	}
+
+	ucall_init(vm, NULL);
+	test_init_timer_irq(vm);
+	vgic_v3_setup(vm, nr_vcpus, GICD_BASE_GPA, GICR_BASE_GPA);
+
+	/* Make all the test's cmdline args visible to the guest */
+	sync_global_to_guest(vm, test_args);
+
+	return vm;
+}
+
+static void test_print_help(char *name)
+{
+	pr_info("Usage: %s [-h] [-n nr_vcpus] [-i iterations] [-p timer_period_ms]\n",
+		name);
+	pr_info("\t-n: Number of vCPUs to configure (default: %u; max: %u)\n",
+		NR_VCPUS_DEF, KVM_MAX_VCPUS);
+	pr_info("\t-i: Number of iterations per stage (default: %u)\n",
+		NR_TEST_ITERS_DEF);
+	pr_info("\t-p: Periodicity (in ms) of the guest timer (default: %u)\n",
+		TIMER_TEST_PERIOD_MS_DEF);
+	pr_info("\t-m: Frequency (in ms) of vCPUs to migrate to different pCPU. 0 to turn off (default: %u)\n",
+		TIMER_TEST_MIGRATION_FREQ_MS);
+	pr_info("\t-h: print this help screen\n");
+}
+
+static bool parse_args(int argc, char *argv[])
+{
+	int opt;
+
+	while ((opt = getopt(argc, argv, "hn:i:p:m:")) != -1) {
+		switch (opt) {
+		case 'n':
+			test_args.nr_vcpus = atoi(optarg);
+			if (test_args.nr_vcpus <= 0) {
+				pr_info("Positive value needed for -n\n");
+				goto err;
+			} else if (test_args.nr_vcpus > KVM_MAX_VCPUS) {
+				pr_info("Max allowed vCPUs: %u\n",
+					KVM_MAX_VCPUS);
+				goto err;
+			}
+			break;
+		case 'i':
+			test_args.nr_iter = atoi(optarg);
+			if (test_args.nr_iter <= 0) {
+				pr_info("Positive value needed for -i\n");
+				goto err;
+			}
+			break;
+		case 'p':
+			test_args.timer_period_ms = atoi(optarg);
+			if (test_args.timer_period_ms <= 0) {
+				pr_info("Positive value needed for -p\n");
+				goto err;
+			}
+			break;
+		case 'm':
+			test_args.migration_freq_ms = atoi(optarg);
+			if (test_args.migration_freq_ms < 0) {
+				pr_info("0 or positive value needed for -m\n");
+				goto err;
+			}
+			break;
+		case 'h':
+		default:
+			goto err;
+		}
+	}
+
+	return true;
+
+err:
+	test_print_help(argv[0]);
+	return false;
+}
+
+int main(int argc, char *argv[])
+{
+	struct kvm_vm *vm;
+
+	/* Tell stdout not to buffer its content */
+	setbuf(stdout, NULL);
+
+	if (!parse_args(argc, argv))
+		exit(KSFT_SKIP);
+
+	if (test_args.migration_freq_ms && get_nprocs() < 2) {
+		print_skip("At least two physical CPUs needed for vCPU migration");
+		exit(KSFT_SKIP);
+	}
+
+	vm = test_vm_create();
+	test_run(vm);
+	kvm_vm_free(vm);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
index e5e6c92b60da..ea189d83abf7 100644
--- a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
+++ b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
@@ -34,16 +34,16 @@ static void reset_debug_state(void)
 {
 	asm volatile("msr daifset, #8");
 
-	write_sysreg(osdlr_el1, 0);
-	write_sysreg(oslar_el1, 0);
+	write_sysreg(0, osdlr_el1);
+	write_sysreg(0, oslar_el1);
 	isb();
 
-	write_sysreg(mdscr_el1, 0);
+	write_sysreg(0, mdscr_el1);
 	/* This test only uses the first bp and wp slot. */
-	write_sysreg(dbgbvr0_el1, 0);
-	write_sysreg(dbgbcr0_el1, 0);
-	write_sysreg(dbgwcr0_el1, 0);
-	write_sysreg(dbgwvr0_el1, 0);
+	write_sysreg(0, dbgbvr0_el1);
+	write_sysreg(0, dbgbcr0_el1);
+	write_sysreg(0, dbgwcr0_el1);
+	write_sysreg(0, dbgwvr0_el1);
 	isb();
 }
 
@@ -53,14 +53,14 @@ static void install_wp(uint64_t addr)
 	uint32_t mdscr;
 
 	wcr = DBGWCR_LEN8 | DBGWCR_RD | DBGWCR_WR | DBGWCR_EL1 | DBGWCR_E;
-	write_sysreg(dbgwcr0_el1, wcr);
-	write_sysreg(dbgwvr0_el1, addr);
+	write_sysreg(wcr, dbgwcr0_el1);
+	write_sysreg(addr, dbgwvr0_el1);
 	isb();
 
 	asm volatile("msr daifclr, #8");
 
 	mdscr = read_sysreg(mdscr_el1) | MDSCR_KDE | MDSCR_MDE;
-	write_sysreg(mdscr_el1, mdscr);
+	write_sysreg(mdscr, mdscr_el1);
 	isb();
 }
 
@@ -70,14 +70,14 @@ static void install_hw_bp(uint64_t addr)
 	uint32_t mdscr;
 
 	bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E;
-	write_sysreg(dbgbcr0_el1, bcr);
-	write_sysreg(dbgbvr0_el1, addr);
+	write_sysreg(bcr, dbgbcr0_el1);
+	write_sysreg(addr, dbgbvr0_el1);
 	isb();
 
 	asm volatile("msr daifclr, #8");
 
 	mdscr = read_sysreg(mdscr_el1) | MDSCR_KDE | MDSCR_MDE;
-	write_sysreg(mdscr_el1, mdscr);
+	write_sysreg(mdscr, mdscr_el1);
 	isb();
 }
 
@@ -88,7 +88,7 @@ static void install_ss(void)
 	asm volatile("msr daifclr, #8");
 
 	mdscr = read_sysreg(mdscr_el1) | MDSCR_KDE | MDSCR_SS;
-	write_sysreg(mdscr_el1, mdscr);
+	write_sysreg(mdscr, mdscr_el1);
 	isb();
 }
 
@@ -190,7 +190,7 @@ static int debug_version(struct kvm_vm *vm)
 {
 	uint64_t id_aa64dfr0;
 
-	get_reg(vm, VCPU_ID, ARM64_SYS_REG(ID_AA64DFR0_EL1), &id_aa64dfr0);
+	get_reg(vm, VCPU_ID, KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1), &id_aa64dfr0);
 	return id_aa64dfr0 & 0xf;
 }
 
diff --git a/tools/testing/selftests/kvm/aarch64/psci_cpu_on_test.c b/tools/testing/selftests/kvm/aarch64/psci_cpu_on_test.c
index 018c269990e1..4c5f6814030f 100644
--- a/tools/testing/selftests/kvm/aarch64/psci_cpu_on_test.c
+++ b/tools/testing/selftests/kvm/aarch64/psci_cpu_on_test.c
@@ -91,7 +91,7 @@ int main(void)
 	init.features[0] |= (1 << KVM_ARM_VCPU_POWER_OFF);
 	aarch64_vcpu_add_default(vm, VCPU_ID_TARGET, &init, guest_main);
 
-	get_reg(vm, VCPU_ID_TARGET, ARM64_SYS_REG(MPIDR_EL1), &target_mpidr);
+	get_reg(vm, VCPU_ID_TARGET, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1), &target_mpidr);
 	vcpu_args_set(vm, VCPU_ID_SOURCE, 1, target_mpidr & MPIDR_HWID_BITMASK);
 	vcpu_run(vm, VCPU_ID_SOURCE);
 
diff --git a/tools/testing/selftests/kvm/aarch64/vgic_init.c b/tools/testing/selftests/kvm/aarch64/vgic_init.c
index 623f31a14326..34379c98d2f4 100644
--- a/tools/testing/selftests/kvm/aarch64/vgic_init.c
+++ b/tools/testing/selftests/kvm/aarch64/vgic_init.c
@@ -13,25 +13,28 @@
 #include "test_util.h"
 #include "kvm_util.h"
 #include "processor.h"
+#include "vgic.h"
 
 #define NR_VCPUS		4
 
-#define REDIST_REGION_ATTR_ADDR(count, base, flags, index) (((uint64_t)(count) << 52) | \
-	((uint64_t)((base) >> 16) << 16) | ((uint64_t)(flags) << 12) | index)
 #define REG_OFFSET(vcpu, offset) (((uint64_t)vcpu << 32) | offset)
 
 #define GICR_TYPER 0x8
 
+#define VGIC_DEV_IS_V2(_d) ((_d) == KVM_DEV_TYPE_ARM_VGIC_V2)
+#define VGIC_DEV_IS_V3(_d) ((_d) == KVM_DEV_TYPE_ARM_VGIC_V3)
+
 struct vm_gic {
 	struct kvm_vm *vm;
 	int gic_fd;
+	uint32_t gic_dev_type;
 };
 
-static int max_ipa_bits;
+static uint64_t max_phys_size;
 
 /* helper to access a redistributor register */
-static int access_redist_reg(int gicv3_fd, int vcpu, int offset,
-			     uint32_t *val, bool write)
+static int access_v3_redist_reg(int gicv3_fd, int vcpu, int offset,
+				uint32_t *val, bool write)
 {
 	uint64_t attr = REG_OFFSET(vcpu, offset);
 
@@ -58,12 +61,13 @@ static int run_vcpu(struct kvm_vm *vm, uint32_t vcpuid)
 	return 0;
 }
 
-static struct vm_gic vm_gic_create(void)
+static struct vm_gic vm_gic_create_with_vcpus(uint32_t gic_dev_type, uint32_t nr_vcpus)
 {
 	struct vm_gic v;
 
-	v.vm = vm_create_default_with_vcpus(NR_VCPUS, 0, 0, guest_code, NULL);
-	v.gic_fd = kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V3, false);
+	v.gic_dev_type = gic_dev_type;
+	v.vm = vm_create_default_with_vcpus(nr_vcpus, 0, 0, guest_code, NULL);
+	v.gic_fd = kvm_create_device(v.vm, gic_dev_type, false);
 
 	return v;
 }
@@ -74,78 +78,129 @@ static void vm_gic_destroy(struct vm_gic *v)
 	kvm_vm_free(v->vm);
 }
 
+struct vgic_region_attr {
+	uint64_t attr;
+	uint64_t size;
+	uint64_t alignment;
+};
+
+struct vgic_region_attr gic_v3_dist_region = {
+	.attr = KVM_VGIC_V3_ADDR_TYPE_DIST,
+	.size = 0x10000,
+	.alignment = 0x10000,
+};
+
+struct vgic_region_attr gic_v3_redist_region = {
+	.attr = KVM_VGIC_V3_ADDR_TYPE_REDIST,
+	.size = NR_VCPUS * 0x20000,
+	.alignment = 0x10000,
+};
+
+struct vgic_region_attr gic_v2_dist_region = {
+	.attr = KVM_VGIC_V2_ADDR_TYPE_DIST,
+	.size = 0x1000,
+	.alignment = 0x1000,
+};
+
+struct vgic_region_attr gic_v2_cpu_region = {
+	.attr = KVM_VGIC_V2_ADDR_TYPE_CPU,
+	.size = 0x2000,
+	.alignment = 0x1000,
+};
+
 /**
- * Helper routine that performs KVM device tests in general and
- * especially ARM_VGIC_V3 ones. Eventually the ARM_VGIC_V3
- * device gets created, a legacy RDIST region is set at @0x0
- * and a DIST region is set @0x60000
+ * Helper routine that performs KVM device tests in general. Eventually the
+ * ARM_VGIC (GICv2 or GICv3) device gets created with an overlapping
+ * DIST/REDIST (or DIST/CPUIF for GICv2). Assumption is 4 vcpus are going to be
+ * used hence the overlap. In the case of GICv3, A RDIST region is set at @0x0
+ * and a DIST region is set @0x70000. The GICv2 case sets a CPUIF @0x0 and a
+ * DIST region @0x1000.
  */
 static void subtest_dist_rdist(struct vm_gic *v)
 {
 	int ret;
 	uint64_t addr;
+	struct vgic_region_attr rdist; /* CPU interface in GICv2*/
+	struct vgic_region_attr dist;
+
+	rdist = VGIC_DEV_IS_V3(v->gic_dev_type) ? gic_v3_redist_region
+						: gic_v2_cpu_region;
+	dist = VGIC_DEV_IS_V3(v->gic_dev_type) ? gic_v3_dist_region
+						: gic_v2_dist_region;
 
 	/* Check existing group/attributes */
 	kvm_device_check_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-			      KVM_VGIC_V3_ADDR_TYPE_DIST);
+			      dist.attr);
 
 	kvm_device_check_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-			      KVM_VGIC_V3_ADDR_TYPE_REDIST);
+			      rdist.attr);
 
 	/* check non existing attribute */
-	ret = _kvm_device_check_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, 0);
+	ret = _kvm_device_check_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, -1);
 	TEST_ASSERT(ret && errno == ENXIO, "attribute not supported");
 
 	/* misaligned DIST and REDIST address settings */
-	addr = 0x1000;
+	addr = dist.alignment / 0x10;
 	ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-				 KVM_VGIC_V3_ADDR_TYPE_DIST, &addr, true);
-	TEST_ASSERT(ret && errno == EINVAL, "GICv3 dist base not 64kB aligned");
+				 dist.attr, &addr, true);
+	TEST_ASSERT(ret && errno == EINVAL, "GIC dist base not aligned");
 
+	addr = rdist.alignment / 0x10;
 	ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-				 KVM_VGIC_V3_ADDR_TYPE_REDIST, &addr, true);
-	TEST_ASSERT(ret && errno == EINVAL, "GICv3 redist base not 64kB aligned");
+				 rdist.attr, &addr, true);
+	TEST_ASSERT(ret && errno == EINVAL, "GIC redist/cpu base not aligned");
 
 	/* out of range address */
-	if (max_ipa_bits) {
-		addr = 1ULL << max_ipa_bits;
-		ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-					 KVM_VGIC_V3_ADDR_TYPE_DIST, &addr, true);
-		TEST_ASSERT(ret && errno == E2BIG, "dist address beyond IPA limit");
+	addr = max_phys_size;
+	ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+				 dist.attr, &addr, true);
+	TEST_ASSERT(ret && errno == E2BIG, "dist address beyond IPA limit");
 
-		ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-					 KVM_VGIC_V3_ADDR_TYPE_REDIST, &addr, true);
-		TEST_ASSERT(ret && errno == E2BIG, "redist address beyond IPA limit");
-	}
+	ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+				 rdist.attr, &addr, true);
+	TEST_ASSERT(ret && errno == E2BIG, "redist address beyond IPA limit");
+
+	/* Space for half a rdist (a rdist is: 2 * rdist.alignment). */
+	addr = max_phys_size - dist.alignment;
+	ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+				 rdist.attr, &addr, true);
+	TEST_ASSERT(ret && errno == E2BIG,
+			"half of the redist is beyond IPA limit");
 
 	/* set REDIST base address @0x0*/
 	addr = 0x00000;
 	kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-			  KVM_VGIC_V3_ADDR_TYPE_REDIST, &addr, true);
+			  rdist.attr, &addr, true);
 
 	/* Attempt to create a second legacy redistributor region */
 	addr = 0xE0000;
 	ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-				 KVM_VGIC_V3_ADDR_TYPE_REDIST, &addr, true);
-	TEST_ASSERT(ret && errno == EEXIST, "GICv3 redist base set again");
+				 rdist.attr, &addr, true);
+	TEST_ASSERT(ret && errno == EEXIST, "GIC redist base set again");
 
-	/* Attempt to mix legacy and new redistributor regions */
-	addr = REDIST_REGION_ATTR_ADDR(NR_VCPUS, 0x100000, 0, 0);
-	ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
-				 KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true);
-	TEST_ASSERT(ret && errno == EINVAL, "attempt to mix GICv3 REDIST and REDIST_REGION");
+	ret = _kvm_device_check_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+				     KVM_VGIC_V3_ADDR_TYPE_REDIST);
+	if (!ret) {
+		/* Attempt to mix legacy and new redistributor regions */
+		addr = REDIST_REGION_ATTR_ADDR(NR_VCPUS, 0x100000, 0, 0);
+		ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+					 KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION,
+					 &addr, true);
+		TEST_ASSERT(ret && errno == EINVAL,
+			    "attempt to mix GICv3 REDIST and REDIST_REGION");
+	}
 
 	/*
 	 * Set overlapping DIST / REDIST, cannot be detected here. Will be detected
 	 * on first vcpu run instead.
 	 */
-	addr = 3 * 2 * 0x10000;
-	kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, KVM_VGIC_V3_ADDR_TYPE_DIST,
-			  &addr, true);
+	addr = rdist.size - rdist.alignment;
+	kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+			  dist.attr, &addr, true);
 }
 
 /* Test the new REDIST region API */
-static void subtest_redist_regions(struct vm_gic *v)
+static void subtest_v3_redist_regions(struct vm_gic *v)
 {
 	uint64_t addr, expected_addr;
 	int ret;
@@ -199,12 +254,19 @@ static void subtest_redist_regions(struct vm_gic *v)
 	kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
 			  KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true);
 
-	addr = REDIST_REGION_ATTR_ADDR(1, 1ULL << max_ipa_bits, 0, 2);
+	addr = REDIST_REGION_ATTR_ADDR(1, max_phys_size, 0, 2);
 	ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
 				 KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true);
 	TEST_ASSERT(ret && errno == E2BIG,
 		    "register redist region with base address beyond IPA range");
 
+	/* The last redist is above the pa range. */
+	addr = REDIST_REGION_ATTR_ADDR(2, max_phys_size - 0x30000, 0, 2);
+	ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+				 KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true);
+	TEST_ASSERT(ret && errno == E2BIG,
+		    "register redist region with top address beyond IPA range");
+
 	addr = 0x260000;
 	ret = _kvm_device_access(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
 				 KVM_VGIC_V3_ADDR_TYPE_REDIST, &addr, true);
@@ -249,13 +311,12 @@ static void subtest_redist_regions(struct vm_gic *v)
  * VGIC KVM device is created and initialized before the secondary CPUs
  * get created
  */
-static void test_vgic_then_vcpus(void)
+static void test_vgic_then_vcpus(uint32_t gic_dev_type)
 {
 	struct vm_gic v;
 	int ret, i;
 
-	v.vm = vm_create_default(0, 0, guest_code);
-	v.gic_fd = kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V3, false);
+	v = vm_gic_create_with_vcpus(gic_dev_type, 1);
 
 	subtest_dist_rdist(&v);
 
@@ -270,12 +331,12 @@ static void test_vgic_then_vcpus(void)
 }
 
 /* All the VCPUs are created before the VGIC KVM device gets initialized */
-static void test_vcpus_then_vgic(void)
+static void test_vcpus_then_vgic(uint32_t gic_dev_type)
 {
 	struct vm_gic v;
 	int ret;
 
-	v = vm_gic_create();
+	v = vm_gic_create_with_vcpus(gic_dev_type, NR_VCPUS);
 
 	subtest_dist_rdist(&v);
 
@@ -285,15 +346,15 @@ static void test_vcpus_then_vgic(void)
 	vm_gic_destroy(&v);
 }
 
-static void test_new_redist_regions(void)
+static void test_v3_new_redist_regions(void)
 {
 	void *dummy = NULL;
 	struct vm_gic v;
 	uint64_t addr;
 	int ret;
 
-	v = vm_gic_create();
-	subtest_redist_regions(&v);
+	v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS);
+	subtest_v3_redist_regions(&v);
 	kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
 			  KVM_DEV_ARM_VGIC_CTRL_INIT, NULL, true);
 
@@ -303,8 +364,8 @@ static void test_new_redist_regions(void)
 
 	/* step2 */
 
-	v = vm_gic_create();
-	subtest_redist_regions(&v);
+	v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS);
+	subtest_v3_redist_regions(&v);
 
 	addr = REDIST_REGION_ATTR_ADDR(1, 0x280000, 0, 2);
 	kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
@@ -317,8 +378,8 @@ static void test_new_redist_regions(void)
 
 	/* step 3 */
 
-	v = vm_gic_create();
-	subtest_redist_regions(&v);
+	v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS);
+	subtest_v3_redist_regions(&v);
 
 	_kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
 			  KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, dummy, true);
@@ -338,7 +399,7 @@ static void test_new_redist_regions(void)
 	vm_gic_destroy(&v);
 }
 
-static void test_typer_accesses(void)
+static void test_v3_typer_accesses(void)
 {
 	struct vm_gic v;
 	uint64_t addr;
@@ -351,12 +412,12 @@ static void test_typer_accesses(void)
 
 	vm_vcpu_add_default(v.vm, 3, guest_code);
 
-	ret = access_redist_reg(v.gic_fd, 1, GICR_TYPER, &val, false);
+	ret = access_v3_redist_reg(v.gic_fd, 1, GICR_TYPER, &val, false);
 	TEST_ASSERT(ret && errno == EINVAL, "attempting to read GICR_TYPER of non created vcpu");
 
 	vm_vcpu_add_default(v.vm, 1, guest_code);
 
-	ret = access_redist_reg(v.gic_fd, 1, GICR_TYPER, &val, false);
+	ret = access_v3_redist_reg(v.gic_fd, 1, GICR_TYPER, &val, false);
 	TEST_ASSERT(ret && errno == EBUSY, "read GICR_TYPER before GIC initialized");
 
 	vm_vcpu_add_default(v.vm, 2, guest_code);
@@ -365,7 +426,7 @@ static void test_typer_accesses(void)
 			  KVM_DEV_ARM_VGIC_CTRL_INIT, NULL, true);
 
 	for (i = 0; i < NR_VCPUS ; i++) {
-		ret = access_redist_reg(v.gic_fd, 0, GICR_TYPER, &val, false);
+		ret = access_v3_redist_reg(v.gic_fd, 0, GICR_TYPER, &val, false);
 		TEST_ASSERT(!ret && !val, "read GICR_TYPER before rdist region setting");
 	}
 
@@ -374,10 +435,10 @@ static void test_typer_accesses(void)
 			  KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true);
 
 	/* The 2 first rdists should be put there (vcpu 0 and 3) */
-	ret = access_redist_reg(v.gic_fd, 0, GICR_TYPER, &val, false);
+	ret = access_v3_redist_reg(v.gic_fd, 0, GICR_TYPER, &val, false);
 	TEST_ASSERT(!ret && !val, "read typer of rdist #0");
 
-	ret = access_redist_reg(v.gic_fd, 3, GICR_TYPER, &val, false);
+	ret = access_v3_redist_reg(v.gic_fd, 3, GICR_TYPER, &val, false);
 	TEST_ASSERT(!ret && val == 0x310, "read typer of rdist #1");
 
 	addr = REDIST_REGION_ATTR_ADDR(10, 0x100000, 0, 1);
@@ -385,11 +446,11 @@ static void test_typer_accesses(void)
 				 KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true);
 	TEST_ASSERT(ret && errno == EINVAL, "collision with previous rdist region");
 
-	ret = access_redist_reg(v.gic_fd, 1, GICR_TYPER, &val, false);
+	ret = access_v3_redist_reg(v.gic_fd, 1, GICR_TYPER, &val, false);
 	TEST_ASSERT(!ret && val == 0x100,
 		    "no redist region attached to vcpu #1 yet, last cannot be returned");
 
-	ret = access_redist_reg(v.gic_fd, 2, GICR_TYPER, &val, false);
+	ret = access_v3_redist_reg(v.gic_fd, 2, GICR_TYPER, &val, false);
 	TEST_ASSERT(!ret && val == 0x200,
 		    "no redist region attached to vcpu #2, last cannot be returned");
 
@@ -397,10 +458,10 @@ static void test_typer_accesses(void)
 	kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
 			  KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true);
 
-	ret = access_redist_reg(v.gic_fd, 1, GICR_TYPER, &val, false);
+	ret = access_v3_redist_reg(v.gic_fd, 1, GICR_TYPER, &val, false);
 	TEST_ASSERT(!ret && val == 0x100, "read typer of rdist #1");
 
-	ret = access_redist_reg(v.gic_fd, 2, GICR_TYPER, &val, false);
+	ret = access_v3_redist_reg(v.gic_fd, 2, GICR_TYPER, &val, false);
 	TEST_ASSERT(!ret && val == 0x210,
 		    "read typer of rdist #1, last properly returned");
 
@@ -417,7 +478,7 @@ static void test_typer_accesses(void)
  * rdist region #2 @0x200000 2 rdist capacity
  *     rdists: 1, 2
  */
-static void test_last_bit_redist_regions(void)
+static void test_v3_last_bit_redist_regions(void)
 {
 	uint32_t vcpuids[] = { 0, 3, 5, 4, 1, 2 };
 	struct vm_gic v;
@@ -444,29 +505,29 @@ static void test_last_bit_redist_regions(void)
 	kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
 			  KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr, true);
 
-	ret = access_redist_reg(v.gic_fd, 0, GICR_TYPER, &val, false);
+	ret = access_v3_redist_reg(v.gic_fd, 0, GICR_TYPER, &val, false);
 	TEST_ASSERT(!ret && val == 0x000, "read typer of rdist #0");
 
-	ret = access_redist_reg(v.gic_fd, 1, GICR_TYPER, &val, false);
+	ret = access_v3_redist_reg(v.gic_fd, 1, GICR_TYPER, &val, false);
 	TEST_ASSERT(!ret && val == 0x100, "read typer of rdist #1");
 
-	ret = access_redist_reg(v.gic_fd, 2, GICR_TYPER, &val, false);
+	ret = access_v3_redist_reg(v.gic_fd, 2, GICR_TYPER, &val, false);
 	TEST_ASSERT(!ret && val == 0x200, "read typer of rdist #2");
 
-	ret = access_redist_reg(v.gic_fd, 3, GICR_TYPER, &val, false);
+	ret = access_v3_redist_reg(v.gic_fd, 3, GICR_TYPER, &val, false);
 	TEST_ASSERT(!ret && val == 0x310, "read typer of rdist #3");
 
-	ret = access_redist_reg(v.gic_fd, 5, GICR_TYPER, &val, false);
+	ret = access_v3_redist_reg(v.gic_fd, 5, GICR_TYPER, &val, false);
 	TEST_ASSERT(!ret && val == 0x500, "read typer of rdist #5");
 
-	ret = access_redist_reg(v.gic_fd, 4, GICR_TYPER, &val, false);
+	ret = access_v3_redist_reg(v.gic_fd, 4, GICR_TYPER, &val, false);
 	TEST_ASSERT(!ret && val == 0x410, "read typer of rdist #4");
 
 	vm_gic_destroy(&v);
 }
 
 /* Test last bit with legacy region */
-static void test_last_bit_single_rdist(void)
+static void test_v3_last_bit_single_rdist(void)
 {
 	uint32_t vcpuids[] = { 0, 3, 5, 4, 1, 2 };
 	struct vm_gic v;
@@ -485,28 +546,106 @@ static void test_last_bit_single_rdist(void)
 	kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
 			  KVM_VGIC_V3_ADDR_TYPE_REDIST, &addr, true);
 
-	ret = access_redist_reg(v.gic_fd, 0, GICR_TYPER, &val, false);
+	ret = access_v3_redist_reg(v.gic_fd, 0, GICR_TYPER, &val, false);
 	TEST_ASSERT(!ret && val == 0x000, "read typer of rdist #0");
 
-	ret = access_redist_reg(v.gic_fd, 3, GICR_TYPER, &val, false);
+	ret = access_v3_redist_reg(v.gic_fd, 3, GICR_TYPER, &val, false);
 	TEST_ASSERT(!ret && val == 0x300, "read typer of rdist #1");
 
-	ret = access_redist_reg(v.gic_fd, 5, GICR_TYPER, &val, false);
+	ret = access_v3_redist_reg(v.gic_fd, 5, GICR_TYPER, &val, false);
 	TEST_ASSERT(!ret && val == 0x500, "read typer of rdist #2");
 
-	ret = access_redist_reg(v.gic_fd, 1, GICR_TYPER, &val, false);
+	ret = access_v3_redist_reg(v.gic_fd, 1, GICR_TYPER, &val, false);
 	TEST_ASSERT(!ret && val == 0x100, "read typer of rdist #3");
 
-	ret = access_redist_reg(v.gic_fd, 2, GICR_TYPER, &val, false);
+	ret = access_v3_redist_reg(v.gic_fd, 2, GICR_TYPER, &val, false);
 	TEST_ASSERT(!ret && val == 0x210, "read typer of rdist #3");
 
 	vm_gic_destroy(&v);
 }
 
-void test_kvm_device(void)
+/* Uses the legacy REDIST region API. */
+static void test_v3_redist_ipa_range_check_at_vcpu_run(void)
+{
+	struct vm_gic v;
+	int ret, i;
+	uint64_t addr;
+
+	v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, 1);
+
+	/* Set space for 3 redists, we have 1 vcpu, so this succeeds. */
+	addr = max_phys_size - (3 * 2 * 0x10000);
+	kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+				 KVM_VGIC_V3_ADDR_TYPE_REDIST, &addr, true);
+
+	addr = 0x00000;
+	kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+			KVM_VGIC_V3_ADDR_TYPE_DIST, &addr, true);
+
+	/* Add the rest of the VCPUs */
+	for (i = 1; i < NR_VCPUS; ++i)
+		vm_vcpu_add_default(v.vm, i, guest_code);
+
+	kvm_device_access(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+			  KVM_DEV_ARM_VGIC_CTRL_INIT, NULL, true);
+
+	/* Attempt to run a vcpu without enough redist space. */
+	ret = run_vcpu(v.vm, 2);
+	TEST_ASSERT(ret && errno == EINVAL,
+		"redist base+size above PA range detected on 1st vcpu run");
+
+	vm_gic_destroy(&v);
+}
+
+static void test_v3_its_region(void)
+{
+	struct vm_gic v;
+	uint64_t addr;
+	int its_fd, ret;
+
+	v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS);
+	its_fd = kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_ITS, false);
+
+	addr = 0x401000;
+	ret = _kvm_device_access(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+			  KVM_VGIC_ITS_ADDR_TYPE, &addr, true);
+	TEST_ASSERT(ret && errno == EINVAL,
+		"ITS region with misaligned address");
+
+	addr = max_phys_size;
+	ret = _kvm_device_access(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+			  KVM_VGIC_ITS_ADDR_TYPE, &addr, true);
+	TEST_ASSERT(ret && errno == E2BIG,
+		"register ITS region with base address beyond IPA range");
+
+	addr = max_phys_size - 0x10000;
+	ret = _kvm_device_access(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+			  KVM_VGIC_ITS_ADDR_TYPE, &addr, true);
+	TEST_ASSERT(ret && errno == E2BIG,
+		"Half of ITS region is beyond IPA range");
+
+	/* This one succeeds setting the ITS base */
+	addr = 0x400000;
+	kvm_device_access(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+			  KVM_VGIC_ITS_ADDR_TYPE, &addr, true);
+
+	addr = 0x300000;
+	ret = _kvm_device_access(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+			  KVM_VGIC_ITS_ADDR_TYPE, &addr, true);
+	TEST_ASSERT(ret && errno == EEXIST, "ITS base set again");
+
+	close(its_fd);
+	vm_gic_destroy(&v);
+}
+
+/*
+ * Returns 0 if it's possible to create GIC device of a given type (V2 or V3).
+ */
+int test_kvm_device(uint32_t gic_dev_type)
 {
 	struct vm_gic v;
 	int ret, fd;
+	uint32_t other;
 
 	v.vm = vm_create_default_with_vcpus(NR_VCPUS, 0, 0, guest_code, NULL);
 
@@ -514,38 +653,70 @@ void test_kvm_device(void)
 	ret = _kvm_create_device(v.vm, 0, true, &fd);
 	TEST_ASSERT(ret && errno == ENODEV, "unsupported device");
 
-	/* trial mode with VGIC_V3 device */
-	ret = _kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V3, true, &fd);
-	if (ret) {
-		print_skip("GICv3 not supported");
-		exit(KSFT_SKIP);
-	}
-	v.gic_fd = kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V3, false);
+	/* trial mode */
+	ret = _kvm_create_device(v.vm, gic_dev_type, true, &fd);
+	if (ret)
+		return ret;
+	v.gic_fd = kvm_create_device(v.vm, gic_dev_type, false);
+
+	ret = _kvm_create_device(v.vm, gic_dev_type, false, &fd);
+	TEST_ASSERT(ret && errno == EEXIST, "create GIC device twice");
 
-	ret = _kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V3, false, &fd);
-	TEST_ASSERT(ret && errno == EEXIST, "create GICv3 device twice");
+	kvm_create_device(v.vm, gic_dev_type, true);
 
-	kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V3, true);
+	/* try to create the other gic_dev_type */
+	other = VGIC_DEV_IS_V2(gic_dev_type) ? KVM_DEV_TYPE_ARM_VGIC_V3
+					     : KVM_DEV_TYPE_ARM_VGIC_V2;
 
-	if (!_kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V2, true, &fd)) {
-		ret = _kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V2, false, &fd);
-		TEST_ASSERT(ret && errno == EINVAL, "create GICv2 while v3 exists");
+	if (!_kvm_create_device(v.vm, other, true, &fd)) {
+		ret = _kvm_create_device(v.vm, other, false, &fd);
+		TEST_ASSERT(ret && errno == EINVAL,
+				"create GIC device while other version exists");
 	}
 
 	vm_gic_destroy(&v);
+
+	return 0;
+}
+
+void run_tests(uint32_t gic_dev_type)
+{
+	test_vcpus_then_vgic(gic_dev_type);
+	test_vgic_then_vcpus(gic_dev_type);
+
+	if (VGIC_DEV_IS_V3(gic_dev_type)) {
+		test_v3_new_redist_regions();
+		test_v3_typer_accesses();
+		test_v3_last_bit_redist_regions();
+		test_v3_last_bit_single_rdist();
+		test_v3_redist_ipa_range_check_at_vcpu_run();
+		test_v3_its_region();
+	}
 }
 
 int main(int ac, char **av)
 {
-	max_ipa_bits = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE);
+	int ret;
+	int pa_bits;
+
+	pa_bits = vm_guest_mode_params[VM_MODE_DEFAULT].pa_bits;
+	max_phys_size = 1ULL << pa_bits;
 
-	test_kvm_device();
-	test_vcpus_then_vgic();
-	test_vgic_then_vcpus();
-	test_new_redist_regions();
-	test_typer_accesses();
-	test_last_bit_redist_regions();
-	test_last_bit_single_rdist();
+	ret = test_kvm_device(KVM_DEV_TYPE_ARM_VGIC_V3);
+	if (!ret) {
+		pr_info("Running GIC_v3 tests.\n");
+		run_tests(KVM_DEV_TYPE_ARM_VGIC_V3);
+		return 0;
+	}
+
+	ret = test_kvm_device(KVM_DEV_TYPE_ARM_VGIC_V2);
+	if (!ret) {
+		pr_info("Running GIC_v2 tests.\n");
+		run_tests(KVM_DEV_TYPE_ARM_VGIC_V2);
+		return 0;
+	}
 
+	print_skip("No GICv2 nor GICv3 support");
+	exit(KSFT_SKIP);
 	return 0;
 }
diff --git a/tools/testing/selftests/kvm/include/aarch64/arch_timer.h b/tools/testing/selftests/kvm/include/aarch64/arch_timer.h
new file mode 100644
index 000000000000..cb7c03de3a21
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/aarch64/arch_timer.h
@@ -0,0 +1,142 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * ARM Generic Timer specific interface
+ */
+
+#ifndef SELFTEST_KVM_ARCH_TIMER_H
+#define SELFTEST_KVM_ARCH_TIMER_H
+
+#include "processor.h"
+
+enum arch_timer {
+	VIRTUAL,
+	PHYSICAL,
+};
+
+#define CTL_ENABLE	(1 << 0)
+#define CTL_IMASK	(1 << 1)
+#define CTL_ISTATUS	(1 << 2)
+
+#define msec_to_cycles(msec)	\
+	(timer_get_cntfrq() * (uint64_t)(msec) / 1000)
+
+#define usec_to_cycles(usec)	\
+	(timer_get_cntfrq() * (uint64_t)(usec) / 1000000)
+
+#define cycles_to_usec(cycles) \
+	((uint64_t)(cycles) * 1000000 / timer_get_cntfrq())
+
+static inline uint32_t timer_get_cntfrq(void)
+{
+	return read_sysreg(cntfrq_el0);
+}
+
+static inline uint64_t timer_get_cntct(enum arch_timer timer)
+{
+	isb();
+
+	switch (timer) {
+	case VIRTUAL:
+		return read_sysreg(cntvct_el0);
+	case PHYSICAL:
+		return read_sysreg(cntpct_el0);
+	default:
+		GUEST_ASSERT_1(0, timer);
+	}
+
+	/* We should not reach here */
+	return 0;
+}
+
+static inline void timer_set_cval(enum arch_timer timer, uint64_t cval)
+{
+	switch (timer) {
+	case VIRTUAL:
+		write_sysreg(cval, cntv_cval_el0);
+		break;
+	case PHYSICAL:
+		write_sysreg(cval, cntp_cval_el0);
+		break;
+	default:
+		GUEST_ASSERT_1(0, timer);
+	}
+
+	isb();
+}
+
+static inline uint64_t timer_get_cval(enum arch_timer timer)
+{
+	switch (timer) {
+	case VIRTUAL:
+		return read_sysreg(cntv_cval_el0);
+	case PHYSICAL:
+		return read_sysreg(cntp_cval_el0);
+	default:
+		GUEST_ASSERT_1(0, timer);
+	}
+
+	/* We should not reach here */
+	return 0;
+}
+
+static inline void timer_set_tval(enum arch_timer timer, uint32_t tval)
+{
+	switch (timer) {
+	case VIRTUAL:
+		write_sysreg(tval, cntv_tval_el0);
+		break;
+	case PHYSICAL:
+		write_sysreg(tval, cntp_tval_el0);
+		break;
+	default:
+		GUEST_ASSERT_1(0, timer);
+	}
+
+	isb();
+}
+
+static inline void timer_set_ctl(enum arch_timer timer, uint32_t ctl)
+{
+	switch (timer) {
+	case VIRTUAL:
+		write_sysreg(ctl, cntv_ctl_el0);
+		break;
+	case PHYSICAL:
+		write_sysreg(ctl, cntp_ctl_el0);
+		break;
+	default:
+		GUEST_ASSERT_1(0, timer);
+	}
+
+	isb();
+}
+
+static inline uint32_t timer_get_ctl(enum arch_timer timer)
+{
+	switch (timer) {
+	case VIRTUAL:
+		return read_sysreg(cntv_ctl_el0);
+	case PHYSICAL:
+		return read_sysreg(cntp_ctl_el0);
+	default:
+		GUEST_ASSERT_1(0, timer);
+	}
+
+	/* We should not reach here */
+	return 0;
+}
+
+static inline void timer_set_next_cval_ms(enum arch_timer timer, uint32_t msec)
+{
+	uint64_t now_ct = timer_get_cntct(timer);
+	uint64_t next_ct = now_ct + msec_to_cycles(msec);
+
+	timer_set_cval(timer, next_ct);
+}
+
+static inline void timer_set_next_tval_ms(enum arch_timer timer, uint32_t msec)
+{
+	timer_set_tval(timer, msec_to_cycles(msec));
+}
+
+#endif /* SELFTEST_KVM_ARCH_TIMER_H */
diff --git a/tools/testing/selftests/kvm/include/aarch64/delay.h b/tools/testing/selftests/kvm/include/aarch64/delay.h
new file mode 100644
index 000000000000..329e4f5079ea
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/aarch64/delay.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * ARM simple delay routines
+ */
+
+#ifndef SELFTEST_KVM_ARM_DELAY_H
+#define SELFTEST_KVM_ARM_DELAY_H
+
+#include "arch_timer.h"
+
+static inline void __delay(uint64_t cycles)
+{
+	enum arch_timer timer = VIRTUAL;
+	uint64_t start = timer_get_cntct(timer);
+
+	while ((timer_get_cntct(timer) - start) < cycles)
+		cpu_relax();
+}
+
+static inline void udelay(unsigned long usec)
+{
+	__delay(usec_to_cycles(usec));
+}
+
+#endif /* SELFTEST_KVM_ARM_DELAY_H */
diff --git a/tools/testing/selftests/kvm/include/aarch64/gic.h b/tools/testing/selftests/kvm/include/aarch64/gic.h
new file mode 100644
index 000000000000..85dd1e53048e
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/aarch64/gic.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * ARM Generic Interrupt Controller (GIC) specific defines
+ */
+
+#ifndef SELFTEST_KVM_GIC_H
+#define SELFTEST_KVM_GIC_H
+
+enum gic_type {
+	GIC_V3,
+	GIC_TYPE_MAX,
+};
+
+void gic_init(enum gic_type type, unsigned int nr_cpus,
+		void *dist_base, void *redist_base);
+void gic_irq_enable(unsigned int intid);
+void gic_irq_disable(unsigned int intid);
+unsigned int gic_get_and_ack_irq(void);
+void gic_set_eoi(unsigned int intid);
+
+#endif /* SELFTEST_KVM_GIC_H */
diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h b/tools/testing/selftests/kvm/include/aarch64/processor.h
index c0273aefa63d..27d8e1bb5b36 100644
--- a/tools/testing/selftests/kvm/include/aarch64/processor.h
+++ b/tools/testing/selftests/kvm/include/aarch64/processor.h
@@ -9,20 +9,24 @@
 
 #include "kvm_util.h"
 #include <linux/stringify.h>
+#include <linux/types.h>
+#include <asm/sysreg.h>
 
 
 #define ARM64_CORE_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \
 			   KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x))
 
-#define CPACR_EL1               3, 0,  1, 0, 2
-#define TCR_EL1                 3, 0,  2, 0, 2
-#define MAIR_EL1                3, 0, 10, 2, 0
-#define MPIDR_EL1               3, 0,  0, 0, 5
-#define TTBR0_EL1               3, 0,  2, 0, 0
-#define SCTLR_EL1               3, 0,  1, 0, 0
-#define VBAR_EL1                3, 0, 12, 0, 0
-
-#define ID_AA64DFR0_EL1         3, 0,  0, 5, 0
+/*
+ * KVM_ARM64_SYS_REG(sys_reg_id): Helper macro to convert
+ * SYS_* register definitions in asm/sysreg.h to use in KVM
+ * calls such as get_reg() and set_reg().
+ */
+#define KVM_ARM64_SYS_REG(sys_reg_id)			\
+	ARM64_SYS_REG(sys_reg_Op0(sys_reg_id),		\
+			sys_reg_Op1(sys_reg_id),	\
+			sys_reg_CRn(sys_reg_id),	\
+			sys_reg_CRm(sys_reg_id),	\
+			sys_reg_Op2(sys_reg_id))
 
 /*
  * Default MAIR
@@ -59,7 +63,7 @@ static inline void set_reg(struct kvm_vm *vm, uint32_t vcpuid, uint64_t id, uint
 	vcpu_ioctl(vm, vcpuid, KVM_SET_ONE_REG, &reg);
 }
 
-void aarch64_vcpu_setup(struct kvm_vm *vm, int vcpuid, struct kvm_vcpu_init *init);
+void aarch64_vcpu_setup(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_vcpu_init *init);
 void aarch64_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid,
 			      struct kvm_vcpu_init *init, void *guest_code);
 
@@ -118,18 +122,64 @@ void vm_install_exception_handler(struct kvm_vm *vm,
 void vm_install_sync_handler(struct kvm_vm *vm,
 		int vector, int ec, handler_fn handler);
 
-#define write_sysreg(reg, val)						  \
-({									  \
-	u64 __val = (u64)(val);						  \
-	asm volatile("msr " __stringify(reg) ", %x0" : : "rZ" (__val));	  \
-})
+static inline void cpu_relax(void)
+{
+	asm volatile("yield" ::: "memory");
+}
 
-#define read_sysreg(reg)						  \
-({	u64 val;							  \
-	asm volatile("mrs %0, "__stringify(reg) : "=r"(val) : : "memory");\
-	val;								  \
+#define isb()		asm volatile("isb" : : : "memory")
+#define dsb(opt)	asm volatile("dsb " #opt : : : "memory")
+#define dmb(opt)	asm volatile("dmb " #opt : : : "memory")
+
+#define dma_wmb()	dmb(oshst)
+#define __iowmb()	dma_wmb()
+
+#define dma_rmb()	dmb(oshld)
+
+#define __iormb(v)							\
+({									\
+	unsigned long tmp;						\
+									\
+	dma_rmb();							\
+									\
+	/*								\
+	 * Courtesy of arch/arm64/include/asm/io.h:			\
+	 * Create a dummy control dependency from the IO read to any	\
+	 * later instructions. This ensures that a subsequent call	\
+	 * to udelay() will be ordered due to the ISB in __delay().	\
+	 */								\
+	asm volatile("eor	%0, %1, %1\n"				\
+		     "cbnz	%0, ."					\
+		     : "=r" (tmp) : "r" ((unsigned long)(v))		\
+		     : "memory");					\
 })
 
-#define isb()	asm volatile("isb" : : : "memory")
+static __always_inline void __raw_writel(u32 val, volatile void *addr)
+{
+	asm volatile("str %w0, [%1]" : : "rZ" (val), "r" (addr));
+}
+
+static __always_inline u32 __raw_readl(const volatile void *addr)
+{
+	u32 val;
+	asm volatile("ldr %w0, [%1]" : "=r" (val) : "r" (addr));
+	return val;
+}
+
+#define writel_relaxed(v,c)	((void)__raw_writel((__force u32)cpu_to_le32(v),(c)))
+#define readl_relaxed(c)	({ u32 __r = le32_to_cpu((__force __le32)__raw_readl(c)); __r; })
+
+#define writel(v,c)		({ __iowmb(); writel_relaxed((v),(c));})
+#define readl(c)		({ u32 __v = readl_relaxed(c); __iormb(__v); __v; })
+
+static inline void local_irq_enable(void)
+{
+	asm volatile("msr daifclr, #3" : : : "memory");
+}
+
+static inline void local_irq_disable(void)
+{
+	asm volatile("msr daifset, #3" : : : "memory");
+}
 
 #endif /* SELFTEST_KVM_PROCESSOR_H */
diff --git a/tools/testing/selftests/kvm/include/aarch64/spinlock.h b/tools/testing/selftests/kvm/include/aarch64/spinlock.h
new file mode 100644
index 000000000000..cf0984106d14
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/aarch64/spinlock.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef SELFTEST_KVM_ARM64_SPINLOCK_H
+#define SELFTEST_KVM_ARM64_SPINLOCK_H
+
+struct spinlock {
+	int v;
+};
+
+extern void spin_lock(struct spinlock *lock);
+extern void spin_unlock(struct spinlock *lock);
+
+#endif /* SELFTEST_KVM_ARM64_SPINLOCK_H */
diff --git a/tools/testing/selftests/kvm/include/aarch64/vgic.h b/tools/testing/selftests/kvm/include/aarch64/vgic.h
new file mode 100644
index 000000000000..0ecfb253893c
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/aarch64/vgic.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * ARM Generic Interrupt Controller (GIC) host specific defines
+ */
+
+#ifndef SELFTEST_KVM_VGIC_H
+#define SELFTEST_KVM_VGIC_H
+
+#include <linux/kvm.h>
+
+#define REDIST_REGION_ATTR_ADDR(count, base, flags, index) \
+	(((uint64_t)(count) << 52) | \
+	((uint64_t)((base) >> 16) << 16) | \
+	((uint64_t)(flags) << 12) | \
+	index)
+
+int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus,
+		uint64_t gicd_base_gpa, uint64_t gicr_base_gpa);
+
+#endif /* SELFTEST_KVM_VGIC_H */
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index 010b59b13917..f6b3794f306b 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -19,6 +19,8 @@
 #define KVM_DEV_PATH "/dev/kvm"
 #define KVM_MAX_VCPUS 512
 
+#define NSEC_PER_SEC 1000000000L
+
 /*
  * Callers of kvm_util only have an incomplete/opaque description of the
  * structure kvm_util is using to maintain the state of a VM.
@@ -238,6 +240,15 @@ int _kvm_device_access(int dev_fd, uint32_t group, uint64_t attr,
 int kvm_device_access(int dev_fd, uint32_t group, uint64_t attr,
 		      void *val, bool write);
 
+int _vcpu_has_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group,
+			  uint64_t attr);
+int vcpu_has_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group,
+			 uint64_t attr);
+int _vcpu_access_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group,
+			  uint64_t attr, void *val, bool write);
+int vcpu_access_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group,
+			 uint64_t attr, void *val, bool write);
+
 const char *exit_reason_str(unsigned int exit_reason);
 
 void virt_pgd_alloc(struct kvm_vm *vm);
@@ -400,4 +411,6 @@ uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc);
 int vm_get_stats_fd(struct kvm_vm *vm);
 int vcpu_get_stats_fd(struct kvm_vm *vm, uint32_t vcpuid);
 
+uint32_t guest_get_vcpuid(void);
+
 #endif /* SELFTEST_KVM_UTIL_H */
diff --git a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c
index 0299cd81b8ba..f968dfd4ee88 100644
--- a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c
+++ b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c
@@ -53,7 +53,7 @@ int main(int argc, char *argv[])
 		kvm_max_vcpu_id = kvm_max_vcpus;
 
 	TEST_ASSERT(kvm_max_vcpu_id >= kvm_max_vcpus,
-		    "KVM_MAX_VCPU_ID (%d) must be at least as large as KVM_MAX_VCPUS (%d).",
+		    "KVM_MAX_VCPU_IDS (%d) must be at least as large as KVM_MAX_VCPUS (%d).",
 		    kvm_max_vcpu_id, kvm_max_vcpus);
 
 	test_vcpu_creation(0, kvm_max_vcpus);
diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic.c b/tools/testing/selftests/kvm/lib/aarch64/gic.c
new file mode 100644
index 000000000000..fff4fc27504d
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/aarch64/gic.c
@@ -0,0 +1,95 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ARM Generic Interrupt Controller (GIC) support
+ */
+
+#include <errno.h>
+#include <linux/bits.h>
+#include <linux/sizes.h>
+
+#include "kvm_util.h"
+
+#include <gic.h>
+#include "gic_private.h"
+#include "processor.h"
+#include "spinlock.h"
+
+static const struct gic_common_ops *gic_common_ops;
+static struct spinlock gic_lock;
+
+static void gic_cpu_init(unsigned int cpu, void *redist_base)
+{
+	gic_common_ops->gic_cpu_init(cpu, redist_base);
+}
+
+static void
+gic_dist_init(enum gic_type type, unsigned int nr_cpus, void *dist_base)
+{
+	const struct gic_common_ops *gic_ops = NULL;
+
+	spin_lock(&gic_lock);
+
+	/* Distributor initialization is needed only once per VM */
+	if (gic_common_ops) {
+		spin_unlock(&gic_lock);
+		return;
+	}
+
+	if (type == GIC_V3)
+		gic_ops = &gicv3_ops;
+
+	GUEST_ASSERT(gic_ops);
+
+	gic_ops->gic_init(nr_cpus, dist_base);
+	gic_common_ops = gic_ops;
+
+	/* Make sure that the initialized data is visible to all the vCPUs */
+	dsb(sy);
+
+	spin_unlock(&gic_lock);
+}
+
+void gic_init(enum gic_type type, unsigned int nr_cpus,
+		void *dist_base, void *redist_base)
+{
+	uint32_t cpu = guest_get_vcpuid();
+
+	GUEST_ASSERT(type < GIC_TYPE_MAX);
+	GUEST_ASSERT(dist_base);
+	GUEST_ASSERT(redist_base);
+	GUEST_ASSERT(nr_cpus);
+
+	gic_dist_init(type, nr_cpus, dist_base);
+	gic_cpu_init(cpu, redist_base);
+}
+
+void gic_irq_enable(unsigned int intid)
+{
+	GUEST_ASSERT(gic_common_ops);
+	gic_common_ops->gic_irq_enable(intid);
+}
+
+void gic_irq_disable(unsigned int intid)
+{
+	GUEST_ASSERT(gic_common_ops);
+	gic_common_ops->gic_irq_disable(intid);
+}
+
+unsigned int gic_get_and_ack_irq(void)
+{
+	uint64_t irqstat;
+	unsigned int intid;
+
+	GUEST_ASSERT(gic_common_ops);
+
+	irqstat = gic_common_ops->gic_read_iar();
+	intid = irqstat & GENMASK(23, 0);
+
+	return intid;
+}
+
+void gic_set_eoi(unsigned int intid)
+{
+	GUEST_ASSERT(gic_common_ops);
+	gic_common_ops->gic_write_eoir(intid);
+}
diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic_private.h b/tools/testing/selftests/kvm/lib/aarch64/gic_private.h
new file mode 100644
index 000000000000..d81d739433dc
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/aarch64/gic_private.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * ARM Generic Interrupt Controller (GIC) private defines that's only
+ * shared among the GIC library code.
+ */
+
+#ifndef SELFTEST_KVM_GIC_PRIVATE_H
+#define SELFTEST_KVM_GIC_PRIVATE_H
+
+struct gic_common_ops {
+	void (*gic_init)(unsigned int nr_cpus, void *dist_base);
+	void (*gic_cpu_init)(unsigned int cpu, void *redist_base);
+	void (*gic_irq_enable)(unsigned int intid);
+	void (*gic_irq_disable)(unsigned int intid);
+	uint64_t (*gic_read_iar)(void);
+	void (*gic_write_eoir)(uint32_t irq);
+};
+
+extern const struct gic_common_ops gicv3_ops;
+
+#endif /* SELFTEST_KVM_GIC_PRIVATE_H */
diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c b/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c
new file mode 100644
index 000000000000..2dbf3339b62e
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c
@@ -0,0 +1,240 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ARM Generic Interrupt Controller (GIC) v3 support
+ */
+
+#include <linux/sizes.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "delay.h"
+
+#include "gic_v3.h"
+#include "gic_private.h"
+
+struct gicv3_data {
+	void *dist_base;
+	void *redist_base[GICV3_MAX_CPUS];
+	unsigned int nr_cpus;
+	unsigned int nr_spis;
+};
+
+#define sgi_base_from_redist(redist_base) (redist_base + SZ_64K)
+
+enum gicv3_intid_range {
+	SGI_RANGE,
+	PPI_RANGE,
+	SPI_RANGE,
+	INVALID_RANGE,
+};
+
+static struct gicv3_data gicv3_data;
+
+static void gicv3_gicd_wait_for_rwp(void)
+{
+	unsigned int count = 100000; /* 1s */
+
+	while (readl(gicv3_data.dist_base + GICD_CTLR) & GICD_CTLR_RWP) {
+		GUEST_ASSERT(count--);
+		udelay(10);
+	}
+}
+
+static void gicv3_gicr_wait_for_rwp(void *redist_base)
+{
+	unsigned int count = 100000; /* 1s */
+
+	while (readl(redist_base + GICR_CTLR) & GICR_CTLR_RWP) {
+		GUEST_ASSERT(count--);
+		udelay(10);
+	}
+}
+
+static enum gicv3_intid_range get_intid_range(unsigned int intid)
+{
+	switch (intid) {
+	case 0 ... 15:
+		return SGI_RANGE;
+	case 16 ... 31:
+		return PPI_RANGE;
+	case 32 ... 1019:
+		return SPI_RANGE;
+	}
+
+	/* We should not be reaching here */
+	GUEST_ASSERT(0);
+
+	return INVALID_RANGE;
+}
+
+static uint64_t gicv3_read_iar(void)
+{
+	uint64_t irqstat = read_sysreg_s(SYS_ICC_IAR1_EL1);
+
+	dsb(sy);
+	return irqstat;
+}
+
+static void gicv3_write_eoir(uint32_t irq)
+{
+	write_sysreg_s(irq, SYS_ICC_EOIR1_EL1);
+	isb();
+}
+
+static void
+gicv3_config_irq(unsigned int intid, unsigned int offset)
+{
+	uint32_t cpu = guest_get_vcpuid();
+	uint32_t mask = 1 << (intid % 32);
+	enum gicv3_intid_range intid_range = get_intid_range(intid);
+	void *reg;
+
+	/* We care about 'cpu' only for SGIs or PPIs */
+	if (intid_range == SGI_RANGE || intid_range == PPI_RANGE) {
+		GUEST_ASSERT(cpu < gicv3_data.nr_cpus);
+
+		reg = sgi_base_from_redist(gicv3_data.redist_base[cpu]) +
+			offset;
+		writel(mask, reg);
+		gicv3_gicr_wait_for_rwp(gicv3_data.redist_base[cpu]);
+	} else if (intid_range == SPI_RANGE) {
+		reg = gicv3_data.dist_base + offset + (intid / 32) * 4;
+		writel(mask, reg);
+		gicv3_gicd_wait_for_rwp();
+	} else {
+		GUEST_ASSERT(0);
+	}
+}
+
+static void gicv3_irq_enable(unsigned int intid)
+{
+	gicv3_config_irq(intid, GICD_ISENABLER);
+}
+
+static void gicv3_irq_disable(unsigned int intid)
+{
+	gicv3_config_irq(intid, GICD_ICENABLER);
+}
+
+static void gicv3_enable_redist(void *redist_base)
+{
+	uint32_t val = readl(redist_base + GICR_WAKER);
+	unsigned int count = 100000; /* 1s */
+
+	val &= ~GICR_WAKER_ProcessorSleep;
+	writel(val, redist_base + GICR_WAKER);
+
+	/* Wait until the processor is 'active' */
+	while (readl(redist_base + GICR_WAKER) & GICR_WAKER_ChildrenAsleep) {
+		GUEST_ASSERT(count--);
+		udelay(10);
+	}
+}
+
+static inline void *gicr_base_cpu(void *redist_base, uint32_t cpu)
+{
+	/* Align all the redistributors sequentially */
+	return redist_base + cpu * SZ_64K * 2;
+}
+
+static void gicv3_cpu_init(unsigned int cpu, void *redist_base)
+{
+	void *sgi_base;
+	unsigned int i;
+	void *redist_base_cpu;
+
+	GUEST_ASSERT(cpu < gicv3_data.nr_cpus);
+
+	redist_base_cpu = gicr_base_cpu(redist_base, cpu);
+	sgi_base = sgi_base_from_redist(redist_base_cpu);
+
+	gicv3_enable_redist(redist_base_cpu);
+
+	/*
+	 * Mark all the SGI and PPI interrupts as non-secure Group-1.
+	 * Also, deactivate and disable them.
+	 */
+	writel(~0, sgi_base + GICR_IGROUPR0);
+	writel(~0, sgi_base + GICR_ICACTIVER0);
+	writel(~0, sgi_base + GICR_ICENABLER0);
+
+	/* Set a default priority for all the SGIs and PPIs */
+	for (i = 0; i < 32; i += 4)
+		writel(GICD_INT_DEF_PRI_X4,
+				sgi_base + GICR_IPRIORITYR0 + i);
+
+	gicv3_gicr_wait_for_rwp(redist_base_cpu);
+
+	/* Enable the GIC system register (ICC_*) access */
+	write_sysreg_s(read_sysreg_s(SYS_ICC_SRE_EL1) | ICC_SRE_EL1_SRE,
+			SYS_ICC_SRE_EL1);
+
+	/* Set a default priority threshold */
+	write_sysreg_s(ICC_PMR_DEF_PRIO, SYS_ICC_PMR_EL1);
+
+	/* Enable non-secure Group-1 interrupts */
+	write_sysreg_s(ICC_IGRPEN1_EL1_ENABLE, SYS_ICC_GRPEN1_EL1);
+
+	gicv3_data.redist_base[cpu] = redist_base_cpu;
+}
+
+static void gicv3_dist_init(void)
+{
+	void *dist_base = gicv3_data.dist_base;
+	unsigned int i;
+
+	/* Disable the distributor until we set things up */
+	writel(0, dist_base + GICD_CTLR);
+	gicv3_gicd_wait_for_rwp();
+
+	/*
+	 * Mark all the SPI interrupts as non-secure Group-1.
+	 * Also, deactivate and disable them.
+	 */
+	for (i = 32; i < gicv3_data.nr_spis; i += 32) {
+		writel(~0, dist_base + GICD_IGROUPR + i / 8);
+		writel(~0, dist_base + GICD_ICACTIVER + i / 8);
+		writel(~0, dist_base + GICD_ICENABLER + i / 8);
+	}
+
+	/* Set a default priority for all the SPIs */
+	for (i = 32; i < gicv3_data.nr_spis; i += 4)
+		writel(GICD_INT_DEF_PRI_X4,
+				dist_base + GICD_IPRIORITYR + i);
+
+	/* Wait for the settings to sync-in */
+	gicv3_gicd_wait_for_rwp();
+
+	/* Finally, enable the distributor globally with ARE */
+	writel(GICD_CTLR_ARE_NS | GICD_CTLR_ENABLE_G1A |
+			GICD_CTLR_ENABLE_G1, dist_base + GICD_CTLR);
+	gicv3_gicd_wait_for_rwp();
+}
+
+static void gicv3_init(unsigned int nr_cpus, void *dist_base)
+{
+	GUEST_ASSERT(nr_cpus <= GICV3_MAX_CPUS);
+
+	gicv3_data.nr_cpus = nr_cpus;
+	gicv3_data.dist_base = dist_base;
+	gicv3_data.nr_spis = GICD_TYPER_SPIS(
+				readl(gicv3_data.dist_base + GICD_TYPER));
+	if (gicv3_data.nr_spis > 1020)
+		gicv3_data.nr_spis = 1020;
+
+	/*
+	 * Initialize only the distributor for now.
+	 * The redistributor and CPU interfaces are initialized
+	 * later for every PE.
+	 */
+	gicv3_dist_init();
+}
+
+const struct gic_common_ops gicv3_ops = {
+	.gic_init = gicv3_init,
+	.gic_cpu_init = gicv3_cpu_init,
+	.gic_irq_enable = gicv3_irq_enable,
+	.gic_irq_disable = gicv3_irq_disable,
+	.gic_read_iar = gicv3_read_iar,
+	.gic_write_eoir = gicv3_write_eoir,
+};
diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic_v3.h b/tools/testing/selftests/kvm/lib/aarch64/gic_v3.h
new file mode 100644
index 000000000000..b51536d469a6
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/aarch64/gic_v3.h
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * ARM Generic Interrupt Controller (GIC) v3 specific defines
+ */
+
+#ifndef SELFTEST_KVM_GICV3_H
+#define SELFTEST_KVM_GICV3_H
+
+#include <asm/sysreg.h>
+
+/*
+ * Distributor registers
+ */
+#define GICD_CTLR			0x0000
+#define GICD_TYPER			0x0004
+#define GICD_IGROUPR			0x0080
+#define GICD_ISENABLER			0x0100
+#define GICD_ICENABLER			0x0180
+#define GICD_ICACTIVER			0x0380
+#define GICD_IPRIORITYR			0x0400
+
+/*
+ * The assumption is that the guest runs in a non-secure mode.
+ * The following bits of GICD_CTLR are defined accordingly.
+ */
+#define GICD_CTLR_RWP			(1U << 31)
+#define GICD_CTLR_nASSGIreq		(1U << 8)
+#define GICD_CTLR_ARE_NS		(1U << 4)
+#define GICD_CTLR_ENABLE_G1A		(1U << 1)
+#define GICD_CTLR_ENABLE_G1		(1U << 0)
+
+#define GICD_TYPER_SPIS(typer)		((((typer) & 0x1f) + 1) * 32)
+#define GICD_INT_DEF_PRI_X4		0xa0a0a0a0
+
+/*
+ * Redistributor registers
+ */
+#define GICR_CTLR			0x000
+#define GICR_WAKER			0x014
+
+#define GICR_CTLR_RWP			(1U << 3)
+
+#define GICR_WAKER_ProcessorSleep	(1U << 1)
+#define GICR_WAKER_ChildrenAsleep	(1U << 2)
+
+/*
+ * Redistributor registers, offsets from SGI base
+ */
+#define GICR_IGROUPR0			GICD_IGROUPR
+#define GICR_ISENABLER0			GICD_ISENABLER
+#define GICR_ICENABLER0			GICD_ICENABLER
+#define GICR_ICACTIVER0			GICD_ICACTIVER
+#define GICR_IPRIORITYR0		GICD_IPRIORITYR
+
+/* CPU interface registers */
+#define SYS_ICC_PMR_EL1			sys_reg(3, 0, 4, 6, 0)
+#define SYS_ICC_IAR1_EL1		sys_reg(3, 0, 12, 12, 0)
+#define SYS_ICC_EOIR1_EL1		sys_reg(3, 0, 12, 12, 1)
+#define SYS_ICC_SRE_EL1			sys_reg(3, 0, 12, 12, 5)
+#define SYS_ICC_GRPEN1_EL1		sys_reg(3, 0, 12, 12, 7)
+
+#define ICC_PMR_DEF_PRIO		0xf0
+
+#define ICC_SRE_EL1_SRE			(1U << 0)
+
+#define ICC_IGRPEN1_EL1_ENABLE		(1U << 0)
+
+#define GICV3_MAX_CPUS			512
+
+#endif /* SELFTEST_KVM_GICV3_H */
diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c
index 632b74d6b3ca..b4eeeafd2a70 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/processor.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c
@@ -212,7 +212,7 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
 	}
 }
 
-void aarch64_vcpu_setup(struct kvm_vm *vm, int vcpuid, struct kvm_vcpu_init *init)
+void aarch64_vcpu_setup(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_vcpu_init *init)
 {
 	struct kvm_vcpu_init default_init = { .target = -1, };
 	uint64_t sctlr_el1, tcr_el1;
@@ -232,10 +232,10 @@ void aarch64_vcpu_setup(struct kvm_vm *vm, int vcpuid, struct kvm_vcpu_init *ini
 	 * Enable FP/ASIMD to avoid trapping when accessing Q0-Q15
 	 * registers, which the variable argument list macros do.
 	 */
-	set_reg(vm, vcpuid, ARM64_SYS_REG(CPACR_EL1), 3 << 20);
+	set_reg(vm, vcpuid, KVM_ARM64_SYS_REG(SYS_CPACR_EL1), 3 << 20);
 
-	get_reg(vm, vcpuid, ARM64_SYS_REG(SCTLR_EL1), &sctlr_el1);
-	get_reg(vm, vcpuid, ARM64_SYS_REG(TCR_EL1), &tcr_el1);
+	get_reg(vm, vcpuid, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1), &sctlr_el1);
+	get_reg(vm, vcpuid, KVM_ARM64_SYS_REG(SYS_TCR_EL1), &tcr_el1);
 
 	switch (vm->mode) {
 	case VM_MODE_P52V48_4K:
@@ -273,10 +273,11 @@ void aarch64_vcpu_setup(struct kvm_vm *vm, int vcpuid, struct kvm_vcpu_init *ini
 	tcr_el1 |= (1 << 8) | (1 << 10) | (3 << 12);
 	tcr_el1 |= (64 - vm->va_bits) /* T0SZ */;
 
-	set_reg(vm, vcpuid, ARM64_SYS_REG(SCTLR_EL1), sctlr_el1);
-	set_reg(vm, vcpuid, ARM64_SYS_REG(TCR_EL1), tcr_el1);
-	set_reg(vm, vcpuid, ARM64_SYS_REG(MAIR_EL1), DEFAULT_MAIR_EL1);
-	set_reg(vm, vcpuid, ARM64_SYS_REG(TTBR0_EL1), vm->pgd);
+	set_reg(vm, vcpuid, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1), sctlr_el1);
+	set_reg(vm, vcpuid, KVM_ARM64_SYS_REG(SYS_TCR_EL1), tcr_el1);
+	set_reg(vm, vcpuid, KVM_ARM64_SYS_REG(SYS_MAIR_EL1), DEFAULT_MAIR_EL1);
+	set_reg(vm, vcpuid, KVM_ARM64_SYS_REG(SYS_TTBR0_EL1), vm->pgd);
+	set_reg(vm, vcpuid, KVM_ARM64_SYS_REG(SYS_TPIDR_EL1), vcpuid);
 }
 
 void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent)
@@ -362,7 +363,7 @@ void vcpu_init_descriptor_tables(struct kvm_vm *vm, uint32_t vcpuid)
 {
 	extern char vectors;
 
-	set_reg(vm, vcpuid, ARM64_SYS_REG(VBAR_EL1), (uint64_t)&vectors);
+	set_reg(vm, vcpuid, KVM_ARM64_SYS_REG(SYS_VBAR_EL1), (uint64_t)&vectors);
 }
 
 void route_exception(struct ex_regs *regs, int vector)
@@ -426,3 +427,8 @@ void vm_install_exception_handler(struct kvm_vm *vm, int vector,
 	assert(vector < VECTOR_NUM);
 	handlers->exception_handlers[vector][0] = handler;
 }
+
+uint32_t guest_get_vcpuid(void)
+{
+	return read_sysreg(tpidr_el1);
+}
diff --git a/tools/testing/selftests/kvm/lib/aarch64/spinlock.c b/tools/testing/selftests/kvm/lib/aarch64/spinlock.c
new file mode 100644
index 000000000000..a076e780be5d
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/aarch64/spinlock.c
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ARM64 Spinlock support
+ */
+#include <stdint.h>
+
+#include "spinlock.h"
+
+void spin_lock(struct spinlock *lock)
+{
+	int val, res;
+
+	asm volatile(
+	"1:	ldaxr	%w0, [%2]\n"
+	"	cbnz	%w0, 1b\n"
+	"	mov	%w0, #1\n"
+	"	stxr	%w1, %w0, [%2]\n"
+	"	cbnz	%w1, 1b\n"
+	: "=&r" (val), "=&r" (res)
+	: "r" (&lock->v)
+	: "memory");
+}
+
+void spin_unlock(struct spinlock *lock)
+{
+	asm volatile("stlr wzr, [%0]\n"	: : "r" (&lock->v) : "memory");
+}
diff --git a/tools/testing/selftests/kvm/lib/aarch64/vgic.c b/tools/testing/selftests/kvm/lib/aarch64/vgic.c
new file mode 100644
index 000000000000..b9b271ff520d
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/aarch64/vgic.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ARM Generic Interrupt Controller (GIC) v3 host support
+ */
+
+#include <linux/kvm.h>
+#include <linux/sizes.h>
+#include <asm/kvm.h>
+
+#include "kvm_util.h"
+#include "../kvm_util_internal.h"
+#include "vgic.h"
+
+/*
+ * vGIC-v3 default host setup
+ *
+ * Input args:
+ *	vm - KVM VM
+ *	nr_vcpus - Number of vCPUs supported by this VM
+ *	gicd_base_gpa - Guest Physical Address of the Distributor region
+ *	gicr_base_gpa - Guest Physical Address of the Redistributor region
+ *
+ * Output args: None
+ *
+ * Return: GIC file-descriptor or negative error code upon failure
+ *
+ * The function creates a vGIC-v3 device and maps the distributor and
+ * redistributor regions of the guest. Since it depends on the number of
+ * vCPUs for the VM, it must be called after all the vCPUs have been created.
+ */
+int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus,
+		uint64_t gicd_base_gpa, uint64_t gicr_base_gpa)
+{
+	int gic_fd;
+	uint64_t redist_attr;
+	struct list_head *iter;
+	unsigned int nr_gic_pages, nr_vcpus_created = 0;
+
+	TEST_ASSERT(nr_vcpus, "Number of vCPUs cannot be empty\n");
+
+	/*
+	 * Make sure that the caller is infact calling this
+	 * function after all the vCPUs are added.
+	 */
+	list_for_each(iter, &vm->vcpus)
+		nr_vcpus_created++;
+	TEST_ASSERT(nr_vcpus == nr_vcpus_created,
+			"Number of vCPUs requested (%u) doesn't match with the ones created for the VM (%u)\n",
+			nr_vcpus, nr_vcpus_created);
+
+	/* Distributor setup */
+	gic_fd = kvm_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_V3, false);
+	kvm_device_access(gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+			KVM_VGIC_V3_ADDR_TYPE_DIST, &gicd_base_gpa, true);
+	nr_gic_pages = vm_calc_num_guest_pages(vm->mode, KVM_VGIC_V3_DIST_SIZE);
+	virt_map(vm, gicd_base_gpa, gicd_base_gpa,  nr_gic_pages);
+
+	/* Redistributor setup */
+	redist_attr = REDIST_REGION_ATTR_ADDR(nr_vcpus, gicr_base_gpa, 0, 0);
+	kvm_device_access(gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+			KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &redist_attr, true);
+	nr_gic_pages = vm_calc_num_guest_pages(vm->mode,
+						KVM_VGIC_V3_REDIST_SIZE * nr_vcpus);
+	virt_map(vm, gicr_base_gpa, gicr_base_gpa,  nr_gic_pages);
+
+	kvm_device_access(gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+				KVM_DEV_ARM_VGIC_CTRL_INIT, NULL, true);
+
+	return gic_fd;
+}
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 10a8ed691c66..041004c0fda7 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -1792,7 +1792,7 @@ void vcpu_sregs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs)
 void vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs)
 {
 	int ret = _vcpu_sregs_set(vm, vcpuid, sregs);
-	TEST_ASSERT(ret == 0, "KVM_RUN IOCTL failed, "
+	TEST_ASSERT(ret == 0, "KVM_SET_SREGS IOCTL failed, "
 		"rc: %i errno: %i", ret, errno);
 }
 
@@ -1984,7 +1984,7 @@ int kvm_device_check_attr(int dev_fd, uint32_t group, uint64_t attr)
 {
 	int ret = _kvm_device_check_attr(dev_fd, group, attr);
 
-	TEST_ASSERT(ret >= 0, "KVM_HAS_DEVICE_ATTR failed, rc: %i errno: %i", ret, errno);
+	TEST_ASSERT(!ret, "KVM_HAS_DEVICE_ATTR failed, rc: %i errno: %i", ret, errno);
 	return ret;
 }
 
@@ -2008,7 +2008,7 @@ int kvm_create_device(struct kvm_vm *vm, uint64_t type, bool test)
 	ret = _kvm_create_device(vm, type, test, &fd);
 
 	if (!test) {
-		TEST_ASSERT(ret >= 0,
+		TEST_ASSERT(!ret,
 			    "KVM_CREATE_DEVICE IOCTL failed, rc: %i errno: %i", ret, errno);
 		return fd;
 	}
@@ -2036,7 +2036,45 @@ int kvm_device_access(int dev_fd, uint32_t group, uint64_t attr,
 {
 	int ret = _kvm_device_access(dev_fd, group, attr, val, write);
 
-	TEST_ASSERT(ret >= 0, "KVM_SET|GET_DEVICE_ATTR IOCTL failed, rc: %i errno: %i", ret, errno);
+	TEST_ASSERT(!ret, "KVM_SET|GET_DEVICE_ATTR IOCTL failed, rc: %i errno: %i", ret, errno);
+	return ret;
+}
+
+int _vcpu_has_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group,
+			  uint64_t attr)
+{
+	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+
+	TEST_ASSERT(vcpu, "nonexistent vcpu id: %d", vcpuid);
+
+	return _kvm_device_check_attr(vcpu->fd, group, attr);
+}
+
+int vcpu_has_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group,
+				 uint64_t attr)
+{
+	int ret = _vcpu_has_device_attr(vm, vcpuid, group, attr);
+
+	TEST_ASSERT(!ret, "KVM_HAS_DEVICE_ATTR IOCTL failed, rc: %i errno: %i", ret, errno);
+	return ret;
+}
+
+int _vcpu_access_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group,
+			     uint64_t attr, void *val, bool write)
+{
+	struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+
+	TEST_ASSERT(vcpu, "nonexistent vcpu id: %d", vcpuid);
+
+	return _kvm_device_access(vcpu->fd, group, attr, val, write);
+}
+
+int vcpu_access_device_attr(struct kvm_vm *vm, uint32_t vcpuid, uint32_t group,
+			    uint64_t attr, void *val, bool write)
+{
+	int ret = _vcpu_access_device_attr(vm, vcpuid, group, attr, val, write);
+
+	TEST_ASSERT(!ret, "KVM_SET|GET_DEVICE_ATTR IOCTL failed, rc: %i errno: %i", ret, errno);
 	return ret;
 }
 
diff --git a/tools/testing/selftests/kvm/lib/sparsebit.c b/tools/testing/selftests/kvm/lib/sparsebit.c
index a0d0c83d83de..50e0cf41a7dd 100644
--- a/tools/testing/selftests/kvm/lib/sparsebit.c
+++ b/tools/testing/selftests/kvm/lib/sparsebit.c
@@ -1866,7 +1866,7 @@ void sparsebit_validate_internal(struct sparsebit *s)
 		 * of total bits set.
 		 */
 		if (s->num_set != total_bits_set) {
-			fprintf(stderr, "Number of bits set missmatch,\n"
+			fprintf(stderr, "Number of bits set mismatch,\n"
 				"  s->num_set: 0x%lx total_bits_set: 0x%lx",
 				s->num_set, total_bits_set);
 
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index 28cb881f440d..82c39db91369 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -660,6 +660,7 @@ void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
 
 	/* Create VCPU */
 	vm_vcpu_add(vm, vcpuid);
+	vcpu_set_cpuid(vm, vcpuid, kvm_get_supported_cpuid());
 	vcpu_setup(vm, vcpuid);
 
 	/* Setup guest general purpose registers */
@@ -672,9 +673,6 @@ void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
 	/* Setup the MP state */
 	mp_state.mp_state = 0;
 	vcpu_set_mp_state(vm, vcpuid, &mp_state);
-
-	/* Setup supported CPUIDs */
-	vcpu_set_cpuid(vm, vcpuid, kvm_get_supported_cpuid());
 }
 
 /*
diff --git a/tools/testing/selftests/kvm/lib/x86_64/svm.c b/tools/testing/selftests/kvm/lib/x86_64/svm.c
index 2ac98d70d02b..161eba7cd128 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/svm.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/svm.c
@@ -54,6 +54,18 @@ static void vmcb_set_seg(struct vmcb_seg *seg, u16 selector,
 	seg->base = base;
 }
 
+/*
+ * Avoid using memset to clear the vmcb, since libc may not be
+ * available in L1 (and, even if it is, features that libc memset may
+ * want to use, like AVX, may not be enabled).
+ */
+static void clear_vmcb(struct vmcb *vmcb)
+{
+	int n = sizeof(*vmcb) / sizeof(u32);
+
+	asm volatile ("rep stosl" : "+c"(n), "+D"(vmcb) : "a"(0) : "memory");
+}
+
 void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_rsp)
 {
 	struct vmcb *vmcb = svm->vmcb;
@@ -70,7 +82,7 @@ void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_r
 	wrmsr(MSR_EFER, efer | EFER_SVME);
 	wrmsr(MSR_VM_HSAVE_PA, svm->save_area_gpa);
 
-	memset(vmcb, 0, sizeof(*vmcb));
+	clear_vmcb(vmcb);
 	asm volatile ("vmsave %0\n\t" : : "a" (vmcb_gpa) : "memory");
 	vmcb_set_seg(&save->es, get_es(), 0, -1U, data_seg_attr);
 	vmcb_set_seg(&save->cs, get_cs(), 0, -1U, code_seg_attr);
diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c
index d6e381e01db7..1727f75e0c2c 100644
--- a/tools/testing/selftests/kvm/memslot_perf_test.c
+++ b/tools/testing/selftests/kvm/memslot_perf_test.c
@@ -127,43 +127,54 @@ static bool verbose;
 			pr_info(__VA_ARGS__);	\
 	} while (0)
 
+static void check_mmio_access(struct vm_data *vm, struct kvm_run *run)
+{
+	TEST_ASSERT(vm->mmio_ok, "Unexpected mmio exit");
+	TEST_ASSERT(run->mmio.is_write, "Unexpected mmio read");
+	TEST_ASSERT(run->mmio.len == 8,
+		    "Unexpected exit mmio size = %u", run->mmio.len);
+	TEST_ASSERT(run->mmio.phys_addr >= vm->mmio_gpa_min &&
+		    run->mmio.phys_addr <= vm->mmio_gpa_max,
+		    "Unexpected exit mmio address = 0x%llx",
+		    run->mmio.phys_addr);
+}
+
 static void *vcpu_worker(void *data)
 {
 	struct vm_data *vm = data;
 	struct kvm_run *run;
 	struct ucall uc;
-	uint64_t cmd;
 
 	run = vcpu_state(vm->vm, VCPU_ID);
 	while (1) {
 		vcpu_run(vm->vm, VCPU_ID);
 
-		if (run->exit_reason == KVM_EXIT_IO) {
-			cmd = get_ucall(vm->vm, VCPU_ID, &uc);
-			if (cmd != UCALL_SYNC)
-				break;
-
+		switch (get_ucall(vm->vm, VCPU_ID, &uc)) {
+		case UCALL_SYNC:
+			TEST_ASSERT(uc.args[1] == 0,
+				"Unexpected sync ucall, got %lx",
+				(ulong)uc.args[1]);
 			sem_post(&vcpu_ready);
 			continue;
-		}
-
-		if (run->exit_reason != KVM_EXIT_MMIO)
+		case UCALL_NONE:
+			if (run->exit_reason == KVM_EXIT_MMIO)
+				check_mmio_access(vm, run);
+			else
+				goto done;
 			break;
-
-		TEST_ASSERT(vm->mmio_ok, "Unexpected mmio exit");
-		TEST_ASSERT(run->mmio.is_write, "Unexpected mmio read");
-		TEST_ASSERT(run->mmio.len == 8,
-			    "Unexpected exit mmio size = %u", run->mmio.len);
-		TEST_ASSERT(run->mmio.phys_addr >= vm->mmio_gpa_min &&
-			    run->mmio.phys_addr <= vm->mmio_gpa_max,
-			    "Unexpected exit mmio address = 0x%llx",
-			    run->mmio.phys_addr);
+		case UCALL_ABORT:
+			TEST_FAIL("%s at %s:%ld, val = %lu",
+					(const char *)uc.args[0],
+					__FILE__, uc.args[1], uc.args[2]);
+			break;
+		case UCALL_DONE:
+			goto done;
+		default:
+			TEST_FAIL("Unknown ucall %lu", uc.cmd);
+		}
 	}
 
-	if (run->exit_reason == KVM_EXIT_IO && cmd == UCALL_ABORT)
-		TEST_FAIL("%s at %s:%ld, val = %lu", (const char *)uc.args[0],
-			  __FILE__, uc.args[1], uc.args[2]);
-
+done:
 	return NULL;
 }
 
@@ -268,6 +279,7 @@ static bool prepare_vm(struct vm_data *data, int nslots, uint64_t *maxslots,
 	TEST_ASSERT(data->hva_slots, "malloc() fail");
 
 	data->vm = vm_create_default(VCPU_ID, mempages, guest_code);
+	ucall_init(data->vm, NULL);
 
 	pr_info_v("Adding slots 1..%i, each slot with %"PRIu64" pages + %"PRIu64" extra pages last\n",
 		max_mem_slots - 1, data->pages_per_slot, rempages);
diff --git a/tools/testing/selftests/kvm/system_counter_offset_test.c b/tools/testing/selftests/kvm/system_counter_offset_test.c
new file mode 100644
index 000000000000..b337bbbfa41f
--- /dev/null
+++ b/tools/testing/selftests/kvm/system_counter_offset_test.c
@@ -0,0 +1,132 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021, Google LLC.
+ *
+ * Tests for adjusting the system counter from userspace
+ */
+#include <asm/kvm_para.h>
+#include <stdint.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <time.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+#define VCPU_ID 0
+
+#ifdef __x86_64__
+
+struct test_case {
+	uint64_t tsc_offset;
+};
+
+static struct test_case test_cases[] = {
+	{ 0 },
+	{ 180 * NSEC_PER_SEC },
+	{ -180 * NSEC_PER_SEC },
+};
+
+static void check_preconditions(struct kvm_vm *vm)
+{
+	if (!_vcpu_has_device_attr(vm, VCPU_ID, KVM_VCPU_TSC_CTRL, KVM_VCPU_TSC_OFFSET))
+		return;
+
+	print_skip("KVM_VCPU_TSC_OFFSET not supported; skipping test");
+	exit(KSFT_SKIP);
+}
+
+static void setup_system_counter(struct kvm_vm *vm, struct test_case *test)
+{
+	vcpu_access_device_attr(vm, VCPU_ID, KVM_VCPU_TSC_CTRL,
+				KVM_VCPU_TSC_OFFSET, &test->tsc_offset, true);
+}
+
+static uint64_t guest_read_system_counter(struct test_case *test)
+{
+	return rdtsc();
+}
+
+static uint64_t host_read_guest_system_counter(struct test_case *test)
+{
+	return rdtsc() + test->tsc_offset;
+}
+
+#else /* __x86_64__ */
+
+#error test not implemented for this architecture!
+
+#endif
+
+#define GUEST_SYNC_CLOCK(__stage, __val)			\
+		GUEST_SYNC_ARGS(__stage, __val, 0, 0, 0)
+
+static void guest_main(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(test_cases); i++) {
+		struct test_case *test = &test_cases[i];
+
+		GUEST_SYNC_CLOCK(i, guest_read_system_counter(test));
+	}
+}
+
+static void handle_sync(struct ucall *uc, uint64_t start, uint64_t end)
+{
+	uint64_t obs = uc->args[2];
+
+	TEST_ASSERT(start <= obs && obs <= end,
+		    "unexpected system counter value: %"PRIu64" expected range: [%"PRIu64", %"PRIu64"]",
+		    obs, start, end);
+
+	pr_info("system counter value: %"PRIu64" expected range [%"PRIu64", %"PRIu64"]\n",
+		obs, start, end);
+}
+
+static void handle_abort(struct ucall *uc)
+{
+	TEST_FAIL("%s at %s:%ld", (const char *)uc->args[0],
+		  __FILE__, uc->args[1]);
+}
+
+static void enter_guest(struct kvm_vm *vm)
+{
+	uint64_t start, end;
+	struct ucall uc;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(test_cases); i++) {
+		struct test_case *test = &test_cases[i];
+
+		setup_system_counter(vm, test);
+		start = host_read_guest_system_counter(test);
+		vcpu_run(vm, VCPU_ID);
+		end = host_read_guest_system_counter(test);
+
+		switch (get_ucall(vm, VCPU_ID, &uc)) {
+		case UCALL_SYNC:
+			handle_sync(&uc, start, end);
+			break;
+		case UCALL_ABORT:
+			handle_abort(&uc);
+			return;
+		default:
+			TEST_ASSERT(0, "unhandled ucall %ld\n",
+				    get_ucall(vm, VCPU_ID, &uc));
+		}
+	}
+}
+
+int main(void)
+{
+	struct kvm_vm *vm;
+
+	vm = vm_create_default(VCPU_ID, 0, guest_main);
+	check_preconditions(vm);
+	ucall_init(vm, NULL);
+
+	enter_guest(vm);
+	kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c
index f40fd097cb35..6f6fd189dda3 100644
--- a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c
+++ b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c
@@ -109,8 +109,7 @@ int main(int argc, char *argv[])
 		}
 	}
 
-	kvm_vm_free(vm);
-
 done:
+	kvm_vm_free(vm);
 	return 0;
 }
diff --git a/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c b/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c
new file mode 100644
index 000000000000..97731454f3f3
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c
@@ -0,0 +1,203 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021, Google LLC.
+ *
+ * Tests for adjusting the KVM clock from userspace
+ */
+#include <asm/kvm_para.h>
+#include <asm/pvclock.h>
+#include <asm/pvclock-abi.h>
+#include <stdint.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <time.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+#define VCPU_ID 0
+
+struct test_case {
+	uint64_t kvmclock_base;
+	int64_t realtime_offset;
+};
+
+static struct test_case test_cases[] = {
+	{ .kvmclock_base = 0 },
+	{ .kvmclock_base = 180 * NSEC_PER_SEC },
+	{ .kvmclock_base = 0, .realtime_offset = -180 * NSEC_PER_SEC },
+	{ .kvmclock_base = 0, .realtime_offset = 180 * NSEC_PER_SEC },
+};
+
+#define GUEST_SYNC_CLOCK(__stage, __val)			\
+		GUEST_SYNC_ARGS(__stage, __val, 0, 0, 0)
+
+static void guest_main(vm_paddr_t pvti_pa, struct pvclock_vcpu_time_info *pvti)
+{
+	int i;
+
+	wrmsr(MSR_KVM_SYSTEM_TIME_NEW, pvti_pa | KVM_MSR_ENABLED);
+	for (i = 0; i < ARRAY_SIZE(test_cases); i++)
+		GUEST_SYNC_CLOCK(i, __pvclock_read_cycles(pvti, rdtsc()));
+}
+
+#define EXPECTED_FLAGS (KVM_CLOCK_REALTIME | KVM_CLOCK_HOST_TSC)
+
+static inline void assert_flags(struct kvm_clock_data *data)
+{
+	TEST_ASSERT((data->flags & EXPECTED_FLAGS) == EXPECTED_FLAGS,
+		    "unexpected clock data flags: %x (want set: %x)",
+		    data->flags, EXPECTED_FLAGS);
+}
+
+static void handle_sync(struct ucall *uc, struct kvm_clock_data *start,
+			struct kvm_clock_data *end)
+{
+	uint64_t obs, exp_lo, exp_hi;
+
+	obs = uc->args[2];
+	exp_lo = start->clock;
+	exp_hi = end->clock;
+
+	assert_flags(start);
+	assert_flags(end);
+
+	TEST_ASSERT(exp_lo <= obs && obs <= exp_hi,
+		    "unexpected kvm-clock value: %"PRIu64" expected range: [%"PRIu64", %"PRIu64"]",
+		    obs, exp_lo, exp_hi);
+
+	pr_info("kvm-clock value: %"PRIu64" expected range [%"PRIu64", %"PRIu64"]\n",
+		obs, exp_lo, exp_hi);
+}
+
+static void handle_abort(struct ucall *uc)
+{
+	TEST_FAIL("%s at %s:%ld", (const char *)uc->args[0],
+		  __FILE__, uc->args[1]);
+}
+
+static void setup_clock(struct kvm_vm *vm, struct test_case *test_case)
+{
+	struct kvm_clock_data data;
+
+	memset(&data, 0, sizeof(data));
+
+	data.clock = test_case->kvmclock_base;
+	if (test_case->realtime_offset) {
+		struct timespec ts;
+		int r;
+
+		data.flags |= KVM_CLOCK_REALTIME;
+		do {
+			r = clock_gettime(CLOCK_REALTIME, &ts);
+			if (!r)
+				break;
+		} while (errno == EINTR);
+
+		TEST_ASSERT(!r, "clock_gettime() failed: %d\n", r);
+
+		data.realtime = ts.tv_sec * NSEC_PER_SEC;
+		data.realtime += ts.tv_nsec;
+		data.realtime += test_case->realtime_offset;
+	}
+
+	vm_ioctl(vm, KVM_SET_CLOCK, &data);
+}
+
+static void enter_guest(struct kvm_vm *vm)
+{
+	struct kvm_clock_data start, end;
+	struct kvm_run *run;
+	struct ucall uc;
+	int i, r;
+
+	run = vcpu_state(vm, VCPU_ID);
+
+	for (i = 0; i < ARRAY_SIZE(test_cases); i++) {
+		setup_clock(vm, &test_cases[i]);
+
+		vm_ioctl(vm, KVM_GET_CLOCK, &start);
+
+		r = _vcpu_run(vm, VCPU_ID);
+		vm_ioctl(vm, KVM_GET_CLOCK, &end);
+
+		TEST_ASSERT(!r, "vcpu_run failed: %d\n", r);
+		TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+			    "unexpected exit reason: %u (%s)",
+			    run->exit_reason, exit_reason_str(run->exit_reason));
+
+		switch (get_ucall(vm, VCPU_ID, &uc)) {
+		case UCALL_SYNC:
+			handle_sync(&uc, &start, &end);
+			break;
+		case UCALL_ABORT:
+			handle_abort(&uc);
+			return;
+		default:
+			TEST_ASSERT(0, "unhandled ucall: %ld\n", uc.cmd);
+		}
+	}
+}
+
+#define CLOCKSOURCE_PATH "/sys/devices/system/clocksource/clocksource0/current_clocksource"
+
+static void check_clocksource(void)
+{
+	char *clk_name;
+	struct stat st;
+	FILE *fp;
+
+	fp = fopen(CLOCKSOURCE_PATH, "r");
+	if (!fp) {
+		pr_info("failed to open clocksource file: %d; assuming TSC.\n",
+			errno);
+		return;
+	}
+
+	if (fstat(fileno(fp), &st)) {
+		pr_info("failed to stat clocksource file: %d; assuming TSC.\n",
+			errno);
+		goto out;
+	}
+
+	clk_name = malloc(st.st_size);
+	TEST_ASSERT(clk_name, "failed to allocate buffer to read file\n");
+
+	if (!fgets(clk_name, st.st_size, fp)) {
+		pr_info("failed to read clocksource file: %d; assuming TSC.\n",
+			ferror(fp));
+		goto out;
+	}
+
+	TEST_ASSERT(!strncmp(clk_name, "tsc\n", st.st_size),
+		    "clocksource not supported: %s", clk_name);
+out:
+	fclose(fp);
+}
+
+int main(void)
+{
+	vm_vaddr_t pvti_gva;
+	vm_paddr_t pvti_gpa;
+	struct kvm_vm *vm;
+	int flags;
+
+	flags = kvm_check_cap(KVM_CAP_ADJUST_CLOCK);
+	if (!(flags & KVM_CLOCK_REALTIME)) {
+		print_skip("KVM_CLOCK_REALTIME not supported; flags: %x",
+			   flags);
+		exit(KSFT_SKIP);
+	}
+
+	check_clocksource();
+
+	vm = vm_create_default(VCPU_ID, 0, guest_main);
+
+	pvti_gva = vm_vaddr_alloc(vm, getpagesize(), 0x10000);
+	pvti_gpa = addr_gva2gpa(vm, pvti_gva);
+	vcpu_args_set(vm, VCPU_ID, 2, pvti_gpa, pvti_gva);
+
+	enter_guest(vm);
+	kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c b/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c
index 8039e1eff938..9f55ccd169a1 100644
--- a/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c
+++ b/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c
@@ -84,7 +84,7 @@ int get_warnings_count(void)
 	f = popen("dmesg | grep \"WARNING:\" | wc -l", "r");
 	if (fscanf(f, "%d", &warnings) < 1)
 		warnings = 0;
-	fclose(f);
+	pclose(f);
 
 	return warnings;
 }
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c
index 7e33a350b053..e683d0ac3e45 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c
@@ -161,7 +161,7 @@ int main(int argc, char *argv[])
 		}
 	}
 
-	kvm_vm_free(vm);
 done:
+	kvm_vm_free(vm);
 	return 0;
 }
diff --git a/tools/testing/selftests/lkdtm/config b/tools/testing/selftests/lkdtm/config
index 38edea25631b..a26a3fa9e925 100644
--- a/tools/testing/selftests/lkdtm/config
+++ b/tools/testing/selftests/lkdtm/config
@@ -8,3 +8,4 @@ CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT=y
 CONFIG_INIT_ON_ALLOC_DEFAULT_ON=y
 CONFIG_UBSAN_BOUNDS=y
 CONFIG_UBSAN_TRAP=y
+CONFIG_STACKPROTECTOR_STRONG=y
diff --git a/tools/testing/selftests/lkdtm/run.sh b/tools/testing/selftests/lkdtm/run.sh
index e95e79bd3126..95e904959207 100755
--- a/tools/testing/selftests/lkdtm/run.sh
+++ b/tools/testing/selftests/lkdtm/run.sh
@@ -56,8 +56,14 @@ if echo "$test" | grep -q '^#' ; then
 fi
 
 # If no expected output given, assume an Oops with back trace is success.
+repeat=1
 if [ -z "$expect" ]; then
 	expect="call trace:"
+else
+	if echo "$expect" | grep -q '^repeat:' ; then
+		repeat=$(echo "$expect" | cut -d' ' -f1 | cut -d: -f2)
+		expect=$(echo "$expect" | cut -d' ' -f2-)
+	fi
 fi
 
 # Prepare log for report checking
@@ -83,7 +89,9 @@ dmesg > "$DMESG"
 # the signal that killed the subprocess, we must ignore the failure and
 # continue. However we don't silence stderr since there might be other
 # useful details reported there in the case of other unexpected conditions.
-echo "$test" | cat >"$TRIGGER" || true
+for i in $(seq 1 $repeat); do
+	echo "$test" | cat >"$TRIGGER" || true
+done
 
 # Record and dump the results
 dmesg | comm --nocheck-order -13 "$DMESG" - > "$LOG" || true
diff --git a/tools/testing/selftests/lkdtm/tests.txt b/tools/testing/selftests/lkdtm/tests.txt
index 09f7bfa383cc..6b36b7f5dcf9 100644
--- a/tools/testing/selftests/lkdtm/tests.txt
+++ b/tools/testing/selftests/lkdtm/tests.txt
@@ -12,6 +12,7 @@ CORRUPT_LIST_ADD list_add corruption
 CORRUPT_LIST_DEL list_del corruption
 STACK_GUARD_PAGE_LEADING
 STACK_GUARD_PAGE_TRAILING
+REPORT_STACK_CANARY repeat:2 ok: stack canaries differ
 UNSET_SMEP pinned CR4 bits changed:
 DOUBLE_FAULT
 CORRUPT_PAC
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index 19deb9cdf72f..7581a7348e1b 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -31,3 +31,8 @@ rxtimestamp
 timestamping
 txtimestamp
 so_netns_cookie
+test_unix_oob
+gro
+ioam6_parser
+toeplitz
+cmsg_so_mark
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 492b273743b4..aee76d1bb9da 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -22,12 +22,14 @@ TEST_PROGS += devlink_port_split.py
 TEST_PROGS += drop_monitor_tests.sh
 TEST_PROGS += vrf_route_leaking.sh
 TEST_PROGS += bareudp.sh
+TEST_PROGS += amt.sh
 TEST_PROGS += unicast_extensions.sh
 TEST_PROGS += udpgro_fwd.sh
 TEST_PROGS += veth.sh
 TEST_PROGS += ioam6.sh
 TEST_PROGS += gro.sh
 TEST_PROGS += gre_gso.sh
+TEST_PROGS += cmsg_so_mark.sh
 TEST_PROGS_EXTENDED := in_netns.sh
 TEST_GEN_FILES =  socket nettest
 TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any
@@ -44,6 +46,7 @@ TEST_GEN_FILES += gro
 TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
 TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls
 TEST_GEN_FILES += toeplitz
+TEST_GEN_FILES += cmsg_so_mark
 
 TEST_FILES := settings
 
diff --git a/tools/testing/selftests/net/amt.sh b/tools/testing/selftests/net/amt.sh
new file mode 100644
index 000000000000..75528788cb95
--- /dev/null
+++ b/tools/testing/selftests/net/amt.sh
@@ -0,0 +1,284 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+# Author: Taehee Yoo <ap420073@gmail.com>
+#
+# This script evaluates the AMT driver.
+# There are four network-namespaces, LISTENER, SOURCE, GATEWAY, RELAY.
+# The role of LISTENER is to listen multicast traffic.
+# In order to do that, it send IGMP group join message.
+# The role of SOURCE is to send multicast traffic to listener.
+# The role of GATEWAY is to work Gateway role of AMT interface.
+# The role of RELAY is to work Relay role of AMT interface.
+#
+#
+#       +------------------------+
+#       |    LISTENER netns      |
+#       |                        |
+#       |  +------------------+  |
+#       |  |       l_gw       |  |
+#       |  |  192.168.0.2/24  |  |
+#       |  |  2001:db8::2/64  |  |
+#       |  +------------------+  |
+#       |            .           |
+#       +------------------------+
+#                    .
+#                    .
+#       +-----------------------------------------------------+
+#       |            .         GATEWAY netns                  |
+#       |            .                                        |
+#       |+---------------------------------------------------+|
+#       ||           .          br0                          ||
+#       || +------------------+       +------------------+   ||
+#       || |       gw_l       |       |       amtg       |   ||
+#       || |  192.168.0.1/24  |       +--------+---------+   ||
+#       || |  2001:db8::1/64  |                |             ||
+#       || +------------------+                |             ||
+#       |+-------------------------------------|-------------+|
+#       |                                      |              |
+#       |                             +--------+---------+    |
+#       |                             |     gw_relay     |    |
+#       |                             |    10.0.0.1/24   |    |
+#       |                             +------------------+    |
+#       |                                      .              |
+#       +-----------------------------------------------------+
+#                                              .
+#                                              .
+#       +-----------------------------------------------------+
+#       |                       RELAY netns    .              |
+#       |                             +------------------+    |
+#       |                             |     relay_gw     |    |
+#       |                             |    10.0.0.2/24   |    |
+#       |                             +--------+---------+    |
+#       |                                      |              |
+#       |                                      |              |
+#       |  +------------------+       +--------+---------+    |
+#       |  |     relay_src    |       |       amtr       |    |
+#       |  |   172.17.0.1/24  |       +------------------+    |
+#       |  | 2001:db8:3::1/64 |                               |
+#       |  +------------------+                               |
+#       |            .                                        |
+#       |            .                                        |
+#       +-----------------------------------------------------+
+#                    .
+#                    .
+#       +------------------------+
+#       |            .           |
+#       |  +------------------+  |
+#       |  |     src_relay    |  |
+#       |  |   172.17.0.2/24  |  |
+#       |  | 2001:db8:3::2/64 |  |
+#       |  +------------------+  |
+#       |      SOURCE netns      |
+#       +------------------------+
+#==============================================================================
+
+readonly LISTENER=$(mktemp -u listener-XXXXXXXX)
+readonly GATEWAY=$(mktemp -u gateway-XXXXXXXX)
+readonly RELAY=$(mktemp -u relay-XXXXXXXX)
+readonly SOURCE=$(mktemp -u source-XXXXXXXX)
+ERR=4
+err=0
+
+exit_cleanup()
+{
+	for ns in "$@"; do
+		ip netns delete "${ns}" 2>/dev/null || true
+	done
+
+	exit $ERR
+}
+
+create_namespaces()
+{
+	ip netns add "${LISTENER}" || exit_cleanup
+	ip netns add "${GATEWAY}" || exit_cleanup "${LISTENER}"
+	ip netns add "${RELAY}" || exit_cleanup "${LISTENER}" "${GATEWAY}"
+	ip netns add "${SOURCE}" || exit_cleanup "${LISTENER}" "${GATEWAY}" \
+		"${RELAY}"
+}
+
+# The trap function handler
+#
+exit_cleanup_all()
+{
+	exit_cleanup "${LISTENER}" "${GATEWAY}" "${RELAY}" "${SOURCE}"
+}
+
+setup_interface()
+{
+	for ns in "${LISTENER}" "${GATEWAY}" "${RELAY}" "${SOURCE}"; do
+		ip -netns "${ns}" link set dev lo up
+	done;
+
+	ip link add l_gw type veth peer name gw_l
+	ip link add gw_relay type veth peer name relay_gw
+	ip link add relay_src type veth peer name src_relay
+
+	ip link set l_gw netns "${LISTENER}" up
+	ip link set gw_l netns "${GATEWAY}" up
+	ip link set gw_relay netns "${GATEWAY}" up
+	ip link set relay_gw netns "${RELAY}" up
+	ip link set relay_src netns "${RELAY}" up
+	ip link set src_relay netns "${SOURCE}" up mtu 1400
+
+	ip netns exec "${LISTENER}" ip a a 192.168.0.2/24 dev l_gw
+	ip netns exec "${LISTENER}" ip r a default via 192.168.0.1 dev l_gw
+	ip netns exec "${LISTENER}" ip a a 2001:db8::2/64 dev l_gw
+	ip netns exec "${LISTENER}" ip r a default via 2001:db8::1 dev l_gw
+	ip netns exec "${LISTENER}" ip a a 239.0.0.1/32 dev l_gw autojoin
+	ip netns exec "${LISTENER}" ip a a ff0e::5:6/128 dev l_gw autojoin
+
+	ip netns exec "${GATEWAY}" ip a a 192.168.0.1/24 dev gw_l
+	ip netns exec "${GATEWAY}" ip a a 2001:db8::1/64 dev gw_l
+	ip netns exec "${GATEWAY}" ip a a 10.0.0.1/24 dev gw_relay
+	ip netns exec "${GATEWAY}" ip link add br0 type bridge
+	ip netns exec "${GATEWAY}" ip link set br0 up
+	ip netns exec "${GATEWAY}" ip link set gw_l master br0
+	ip netns exec "${GATEWAY}" ip link set gw_l up
+	ip netns exec "${GATEWAY}" ip link add amtg master br0 type amt \
+		mode gateway local 10.0.0.1 discovery 10.0.0.2 dev gw_relay \
+		gateway_port 2268 relay_port 2268
+	ip netns exec "${RELAY}" ip a a 10.0.0.2/24 dev relay_gw
+	ip netns exec "${RELAY}" ip link add amtr type amt mode relay \
+		local 10.0.0.2 dev relay_gw relay_port 2268 max_tunnels 4
+	ip netns exec "${RELAY}" ip a a 172.17.0.1/24 dev relay_src
+	ip netns exec "${RELAY}" ip a a 2001:db8:3::1/64 dev relay_src
+	ip netns exec "${SOURCE}" ip a a 172.17.0.2/24 dev src_relay
+	ip netns exec "${SOURCE}" ip a a 2001:db8:3::2/64 dev src_relay
+	ip netns exec "${SOURCE}" ip r a default via 172.17.0.1 dev src_relay
+	ip netns exec "${SOURCE}" ip r a default via 2001:db8:3::1 dev src_relay
+	ip netns exec "${RELAY}" ip link set amtr up
+	ip netns exec "${GATEWAY}" ip link set amtg up
+}
+
+setup_sysctl()
+{
+	ip netns exec "${RELAY}" sysctl net.ipv4.ip_forward=1 -w -q
+}
+
+setup_iptables()
+{
+	ip netns exec "${RELAY}" iptables -t mangle -I PREROUTING \
+		-d 239.0.0.1 -j TTL --ttl-set 2
+	ip netns exec "${RELAY}" ip6tables -t mangle -I PREROUTING \
+		-j HL --hl-set 2
+}
+
+setup_mcast_routing()
+{
+	ip netns exec "${RELAY}" smcrouted
+	ip netns exec "${RELAY}" smcroutectl a relay_src \
+		172.17.0.2 239.0.0.1 amtr
+	ip netns exec "${RELAY}" smcroutectl a relay_src \
+		2001:db8:3::2 ff0e::5:6 amtr
+}
+
+test_remote_ip()
+{
+	REMOTE=$(ip netns exec "${GATEWAY}" \
+		ip -d -j link show amtg | jq .[0].linkinfo.info_data.remote)
+	if [ $REMOTE == "\"10.0.0.2\"" ]; then
+		printf "TEST: %-60s  [ OK ]\n" "amt discovery"
+	else
+		printf "TEST: %-60s  [FAIL]\n" "amt discovery"
+		ERR=1
+	fi
+}
+
+send_mcast_torture4()
+{
+	ip netns exec "${SOURCE}" bash -c \
+		'cat /dev/urandom | head -c 1G | nc -w 1 -u 239.0.0.1 4001'
+}
+
+
+send_mcast_torture6()
+{
+	ip netns exec "${SOURCE}" bash -c \
+		'cat /dev/urandom | head -c 1G | nc -w 1 -u ff0e::5:6 6001'
+}
+
+check_features()
+{
+        ip link help 2>&1 | grep -q amt
+        if [ $? -ne 0 ]; then
+                echo "Missing amt support in iproute2" >&2
+                exit_cleanup
+        fi
+}
+
+test_ipv4_forward()
+{
+	RESULT4=$(ip netns exec "${LISTENER}" nc -w 1 -l -u 239.0.0.1 4000)
+	if [ "$RESULT4" == "172.17.0.2" ]; then
+		printf "TEST: %-60s  [ OK ]\n" "IPv4 amt multicast forwarding"
+		exit 0
+	else
+		printf "TEST: %-60s  [FAIL]\n" "IPv4 amt multicast forwarding"
+		exit 1
+	fi
+}
+
+test_ipv6_forward()
+{
+	RESULT6=$(ip netns exec "${LISTENER}" nc -w 1 -l -u ff0e::5:6 6000)
+	if [ "$RESULT6" == "2001:db8:3::2" ]; then
+		printf "TEST: %-60s  [ OK ]\n" "IPv6 amt multicast forwarding"
+		exit 0
+	else
+		printf "TEST: %-60s  [FAIL]\n" "IPv6 amt multicast forwarding"
+		exit 1
+	fi
+}
+
+send_mcast4()
+{
+	sleep 2
+	ip netns exec "${SOURCE}" bash -c \
+		'echo 172.17.0.2 | nc -w 1 -u 239.0.0.1 4000' &
+}
+
+send_mcast6()
+{
+	sleep 2
+	ip netns exec "${SOURCE}" bash -c \
+		'echo 2001:db8:3::2 | nc -w 1 -u ff0e::5:6 6000' &
+}
+
+check_features
+
+create_namespaces
+
+set -e
+trap exit_cleanup_all EXIT
+
+setup_interface
+setup_sysctl
+setup_iptables
+setup_mcast_routing
+test_remote_ip
+test_ipv4_forward &
+pid=$!
+send_mcast4
+wait $pid || err=$?
+if [ $err -eq 1 ]; then
+	ERR=1
+fi
+test_ipv6_forward &
+pid=$!
+send_mcast6
+wait $pid || err=$?
+if [ $err -eq 1 ]; then
+	ERR=1
+fi
+send_mcast_torture4
+printf "TEST: %-60s  [ OK ]\n" "IPv4 amt traffic forwarding torture"
+send_mcast_torture6
+printf "TEST: %-60s  [ OK ]\n" "IPv6 amt traffic forwarding torture"
+sleep 5
+if [ "${ERR}" -eq 1 ]; then
+        echo "Some tests failed." >&2
+else
+        ERR=0
+fi
diff --git a/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh b/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh
new file mode 100755
index 000000000000..b5af08af8559
--- /dev/null
+++ b/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh
@@ -0,0 +1,220 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Tests sysctl options {arp,ndisc}_evict_nocarrier={0,1}
+#
+# Create a veth pair and set IPs/routes on both. Then ping to establish
+# an entry in the ARP/ND table. Depending on the test set sysctl option to
+# 1 or 0. Set remote veth down which will cause local veth to go into a no
+# carrier state. Depending on the test check the ARP/ND table:
+#
+# {arp,ndisc}_evict_nocarrier=1 should contain no ARP/ND after no carrier
+# {arp,ndisc}_evict_nocarrer=0 should still contain the single ARP/ND entry
+#
+
+readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)"
+readonly V4_ADDR0=10.0.10.1
+readonly V4_ADDR1=10.0.10.2
+readonly V6_ADDR0=2001:db8:91::1
+readonly V6_ADDR1=2001:db8:91::2
+nsid=100
+
+cleanup_v6()
+{
+    ip netns del me
+    ip netns del peer
+
+    sysctl -w net.ipv4.conf.veth0.ndisc_evict_nocarrier=1 >/dev/null 2>&1
+    sysctl -w net.ipv4.conf.all.ndisc_evict_nocarrier=1 >/dev/null 2>&1
+}
+
+create_ns()
+{
+    local n=${1}
+
+    ip netns del ${n} 2>/dev/null
+
+    ip netns add ${n}
+    ip netns set ${n} $((nsid++))
+    ip -netns ${n} link set lo up
+}
+
+
+setup_v6() {
+    create_ns me
+    create_ns peer
+
+    IP="ip -netns me"
+
+    $IP li add veth1 type veth peer name veth2
+    $IP li set veth1 up
+    $IP -6 addr add $V6_ADDR0/64 dev veth1 nodad
+    $IP li set veth2 netns peer up
+    ip -netns peer -6 addr add $V6_ADDR1/64 dev veth2 nodad
+
+    ip netns exec me sysctl -w $1 >/dev/null 2>&1
+
+    # Establish an ND cache entry
+    ip netns exec me ping -6 -c1 -Iveth1 $V6_ADDR1 >/dev/null 2>&1
+    # Should have the veth1 entry in ND table
+    ip netns exec me ip -6 neigh get $V6_ADDR1 dev veth1 >/dev/null 2>&1
+    if [ $? -ne 0 ]; then
+        cleanup_v6
+        echo "failed"
+        exit
+    fi
+
+    # Set veth2 down, which will put veth1 in NOCARRIER state
+    ip netns exec peer ip link set veth2 down
+}
+
+setup_v4() {
+    ip netns add "${PEER_NS}"
+    ip link add name veth0 type veth peer name veth1
+    ip link set dev veth0 up
+    ip link set dev veth1 netns "${PEER_NS}"
+    ip netns exec "${PEER_NS}" ip link set dev veth1 up
+    ip addr add $V4_ADDR0/24 dev veth0
+    ip netns exec "${PEER_NS}" ip addr add $V4_ADDR1/24 dev veth1
+    ip netns exec ${PEER_NS} ip route add default via $V4_ADDR1 dev veth1
+    ip route add default via $V4_ADDR0 dev veth0
+
+    sysctl -w "$1" >/dev/null 2>&1
+
+    # Establish an ARP cache entry
+    ping -c1 -I veth0 $V4_ADDR1 -q >/dev/null 2>&1
+    # Should have the veth1 entry in ARP table
+    ip neigh get $V4_ADDR1 dev veth0 >/dev/null 2>&1
+    if [ $? -ne 0 ]; then
+        cleanup_v4
+        echo "failed"
+        exit
+    fi
+
+    # Set veth1 down, which will put veth0 in NOCARRIER state
+    ip netns exec "${PEER_NS}" ip link set veth1 down
+}
+
+cleanup_v4() {
+    ip neigh flush dev veth0
+    ip link del veth0
+    local -r ns="$(ip netns list|grep $PEER_NS)"
+    [ -n "$ns" ] && ip netns del $ns 2>/dev/null
+
+    sysctl -w net.ipv4.conf.veth0.arp_evict_nocarrier=1 >/dev/null 2>&1
+    sysctl -w net.ipv4.conf.all.arp_evict_nocarrier=1 >/dev/null 2>&1
+}
+
+# Run test when arp_evict_nocarrier = 1 (default).
+run_arp_evict_nocarrier_enabled() {
+    echo "run arp_evict_nocarrier=1 test"
+    setup_v4 "net.ipv4.conf.veth0.arp_evict_nocarrier=1"
+
+    # ARP table should be empty
+    ip neigh get $V4_ADDR1 dev veth0 >/dev/null 2>&1
+
+    if [ $? -eq 0 ];then
+        echo "failed"
+    else
+        echo "ok"
+    fi
+
+    cleanup_v4
+}
+
+# Run test when arp_evict_nocarrier = 0
+run_arp_evict_nocarrier_disabled() {
+    echo "run arp_evict_nocarrier=0 test"
+    setup_v4 "net.ipv4.conf.veth0.arp_evict_nocarrier=0"
+
+    # ARP table should still contain the entry
+    ip neigh get $V4_ADDR1 dev veth0 >/dev/null 2>&1
+
+    if [ $? -eq 0 ];then
+        echo "ok"
+    else
+        echo "failed"
+    fi
+
+    cleanup_v4
+}
+
+run_arp_evict_nocarrier_disabled_all() {
+    echo "run all.arp_evict_nocarrier=0 test"
+    setup_v4 "net.ipv4.conf.all.arp_evict_nocarrier=0"
+
+    # ARP table should still contain the entry
+    ip neigh get $V4_ADDR1 dev veth0 >/dev/null 2>&1
+
+    if [ $? -eq 0 ];then
+        echo "ok"
+    else
+        echo "failed"
+    fi
+
+    cleanup_v4
+}
+
+run_ndisc_evict_nocarrier_enabled() {
+    echo "run ndisc_evict_nocarrier=1 test"
+
+    setup_v6 "net.ipv6.conf.veth1.ndisc_evict_nocarrier=1"
+
+    ip netns exec me ip -6 neigh get $V6_ADDR1 dev veth1 >/dev/null 2>&1
+
+    if [ $? -eq 0 ];then
+        echo "failed"
+    else
+        echo "ok"
+    fi
+
+    cleanup_v6
+}
+
+run_ndisc_evict_nocarrier_disabled() {
+    echo "run ndisc_evict_nocarrier=0 test"
+
+    setup_v6 "net.ipv6.conf.veth1.ndisc_evict_nocarrier=0"
+
+    ip netns exec me ip -6 neigh get $V6_ADDR1 dev veth1 >/dev/null 2>&1
+
+    if [ $? -eq 0 ];then
+        echo "ok"
+    else
+        echo "failed"
+    fi
+
+    cleanup_v6
+}
+
+run_ndisc_evict_nocarrier_disabled_all() {
+    echo "run all.ndisc_evict_nocarrier=0 test"
+
+    setup_v6 "net.ipv6.conf.all.ndisc_evict_nocarrier=0"
+
+    ip netns exec me ip -6 neigh get $V6_ADDR1 dev veth1 >/dev/null 2>&1
+
+    if [ $? -eq 0 ];then
+        echo "ok"
+    else
+        echo "failed"
+    fi
+
+    cleanup_v6
+}
+
+run_all_tests() {
+    run_arp_evict_nocarrier_enabled
+    run_arp_evict_nocarrier_disabled
+    run_arp_evict_nocarrier_disabled_all
+    run_ndisc_evict_nocarrier_enabled
+    run_ndisc_evict_nocarrier_disabled
+    run_ndisc_evict_nocarrier_disabled_all
+}
+
+if [ "$(id -u)" -ne 0 ];then
+	echo "SKIP: Need root privileges"
+	exit $ksft_skip;
+fi
+
+run_all_tests
diff --git a/tools/testing/selftests/net/cmsg_so_mark.c b/tools/testing/selftests/net/cmsg_so_mark.c
new file mode 100644
index 000000000000..27f2804892a7
--- /dev/null
+++ b/tools/testing/selftests/net/cmsg_so_mark.c
@@ -0,0 +1,67 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include <errno.h>
+#include <netdb.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <linux/types.h>
+#include <sys/socket.h>
+
+int main(int argc, const char **argv)
+{
+	char cbuf[CMSG_SPACE(sizeof(__u32))];
+	struct addrinfo hints, *ai;
+	struct cmsghdr *cmsg;
+	struct iovec iov[1];
+	struct msghdr msg;
+	int mark;
+	int err;
+	int fd;
+
+	if (argc != 4) {
+		fprintf(stderr, "Usage: %s <dst_ip> <port> <mark>\n", argv[0]);
+		return 1;
+	}
+	mark = atoi(argv[3]);
+
+	memset(&hints, 0, sizeof(hints));
+	hints.ai_family = AF_UNSPEC;
+	hints.ai_socktype = SOCK_DGRAM;
+
+	ai = NULL;
+	err = getaddrinfo(argv[1], argv[2], &hints, &ai);
+	if (err) {
+		fprintf(stderr, "Can't resolve address: %s\n", strerror(errno));
+		return 1;
+	}
+
+	fd = socket(ai->ai_family, SOCK_DGRAM, IPPROTO_UDP);
+	if (fd < 0) {
+		fprintf(stderr, "Can't open socket: %s\n", strerror(errno));
+		freeaddrinfo(ai);
+		return 1;
+	}
+
+	iov[0].iov_base = "bla";
+	iov[0].iov_len = 4;
+
+	msg.msg_name = ai->ai_addr;
+	msg.msg_namelen = ai->ai_addrlen;
+	msg.msg_iov = iov;
+	msg.msg_iovlen = 1;
+	msg.msg_control = cbuf;
+	msg.msg_controllen = sizeof(cbuf);
+
+	cmsg = CMSG_FIRSTHDR(&msg);
+	cmsg->cmsg_level = SOL_SOCKET;
+	cmsg->cmsg_type = SO_MARK;
+	cmsg->cmsg_len = CMSG_LEN(sizeof(__u32));
+	*(__u32 *)CMSG_DATA(cmsg) = mark;
+
+	err = sendmsg(fd, &msg, 0);
+
+	close(fd);
+	freeaddrinfo(ai);
+	return err != 4;
+}
diff --git a/tools/testing/selftests/net/cmsg_so_mark.sh b/tools/testing/selftests/net/cmsg_so_mark.sh
new file mode 100755
index 000000000000..19c6aab8d0e9
--- /dev/null
+++ b/tools/testing/selftests/net/cmsg_so_mark.sh
@@ -0,0 +1,61 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NS=ns
+IP4=172.16.0.1/24
+TGT4=172.16.0.2
+IP6=2001:db8:1::1/64
+TGT6=2001:db8:1::2
+MARK=1000
+
+cleanup()
+{
+    ip netns del $NS
+}
+
+trap cleanup EXIT
+
+# Namespaces
+ip netns add $NS
+
+# Connectivity
+ip -netns $NS link add type dummy
+ip -netns $NS link set dev dummy0 up
+ip -netns $NS addr add $IP4 dev dummy0
+ip -netns $NS addr add $IP6 dev dummy0
+
+ip -netns $NS rule add fwmark $MARK lookup 300
+ip -6 -netns $NS rule add fwmark $MARK lookup 300
+ip -netns $NS route add prohibit any table 300
+ip -6 -netns $NS route add prohibit any table 300
+
+# Test
+BAD=0
+TOTAL=0
+
+check_result() {
+    ((TOTAL++))
+    if [ $1 -ne $2 ]; then
+	echo "  Case $3 returned $1, expected $2"
+	((BAD++))
+    fi
+}
+
+ip netns exec $NS ./cmsg_so_mark $TGT4 1234 $((MARK + 1))
+check_result $? 0 "IPv4 pass"
+ip netns exec $NS ./cmsg_so_mark $TGT6 1234 $((MARK + 1))
+check_result $? 0 "IPv6 pass"
+
+ip netns exec $NS ./cmsg_so_mark $TGT4 1234 $MARK
+check_result $? 1 "IPv4 rejection"
+ip netns exec $NS ./cmsg_so_mark $TGT6 1234 $MARK
+check_result $? 1 "IPv6 rejection"
+
+# Summary
+if [ $BAD -ne 0 ]; then
+    echo "FAIL - $BAD/$TOTAL cases failed"
+    exit 1
+else
+    echo "OK"
+    exit 0
+fi
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index 86ab429fe7f3..ead7963b9bf0 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -44,3 +44,4 @@ CONFIG_NET_ACT_MIRRED=m
 CONFIG_BAREUDP=m
 CONFIG_IPV6_IOAM6_LWTUNNEL=y
 CONFIG_CRYPTO_SM4=y
+CONFIG_AMT=m
diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh
index 0d293391e9a4..b5a69ad191b0 100755
--- a/tools/testing/selftests/net/fib_nexthops.sh
+++ b/tools/testing/selftests/net/fib_nexthops.sh
@@ -2078,6 +2078,7 @@ basic_res()
 		"id 101 index 0 nhid 2 id 101 index 1 nhid 2 id 101 index 2 nhid 1 id 101 index 3 nhid 1"
 	log_test $? 0 "Dump all nexthop buckets in a group"
 
+	sleep 0.1
 	(( $($IP -j nexthop bucket list id 101 |
 	     jq '[.[] | select(.bucket.idle_time > 0 and
 	                       .bucket.idle_time < 2)] | length') == 4 ))
diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
index 675eff45b037..1162836f8f32 100755
--- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
@@ -482,10 +482,15 @@ v3exc_timeout_test()
 	local X=("192.0.2.20" "192.0.2.30")
 
 	# GMI should be 3 seconds
-	ip link set dev br0 type bridge mcast_query_interval 100 mcast_query_response_interval 100
+	ip link set dev br0 type bridge mcast_query_interval 100 \
+					mcast_query_response_interval 100 \
+					mcast_membership_interval 300
 
 	v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP
-	ip link set dev br0 type bridge mcast_query_interval 500 mcast_query_response_interval 500
+	ip link set dev br0 type bridge mcast_query_interval 500 \
+					mcast_query_response_interval 500 \
+					mcast_membership_interval 1500
+
 	$MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_ALLOW2" -q
 	sleep 3
 	bridge -j -d -s mdb show dev br0 \
@@ -517,7 +522,8 @@ v3exc_timeout_test()
 	log_test "IGMPv3 group $TEST_GROUP exclude timeout"
 
 	ip link set dev br0 type bridge mcast_query_interval 12500 \
-					mcast_query_response_interval 1000
+					mcast_query_response_interval 1000 \
+					mcast_membership_interval 26000
 
 	v3cleanup $swp1 $TEST_GROUP
 }
diff --git a/tools/testing/selftests/net/forwarding/bridge_mld.sh b/tools/testing/selftests/net/forwarding/bridge_mld.sh
index ffdcfa87ca2b..e2b9ff773c6b 100755
--- a/tools/testing/selftests/net/forwarding/bridge_mld.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_mld.sh
@@ -479,10 +479,15 @@ mldv2exc_timeout_test()
 	local X=("2001:db8:1::20" "2001:db8:1::30")
 
 	# GMI should be 3 seconds
-	ip link set dev br0 type bridge mcast_query_interval 100 mcast_query_response_interval 100
+	ip link set dev br0 type bridge mcast_query_interval 100 \
+					mcast_query_response_interval 100 \
+					mcast_membership_interval 300
 
 	mldv2exclude_prepare $h1
-	ip link set dev br0 type bridge mcast_query_interval 500 mcast_query_response_interval 500
+	ip link set dev br0 type bridge mcast_query_interval 500 \
+					mcast_query_response_interval 500 \
+					mcast_membership_interval 1500
+
 	$MZ $h1 -c 1 $MZPKT_ALLOW2 -q
 	sleep 3
 	bridge -j -d -s mdb show dev br0 \
@@ -514,7 +519,8 @@ mldv2exc_timeout_test()
 	log_test "MLDv2 group $TEST_GROUP exclude timeout"
 
 	ip link set dev br0 type bridge mcast_query_interval 12500 \
-					mcast_query_response_interval 1000
+					mcast_query_response_interval 1000 \
+					mcast_membership_interval 26000
 
 	mldv2cleanup $swp1
 }
diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh
index 2c14a86adaaa..de9944d42027 100644
--- a/tools/testing/selftests/net/forwarding/devlink_lib.sh
+++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh
@@ -563,12 +563,6 @@ devlink_trap_group_policer_get()
 		| jq '.[][][]["policer"]'
 }
 
-devlink_trap_policer_ids_get()
-{
-	devlink -j -p trap policer show \
-		| jq '.[]["'$DEVLINK_DEV'"][]["policer"]'
-}
-
 devlink_port_by_netdev()
 {
 	local if_name=$1
diff --git a/tools/testing/selftests/net/forwarding/forwarding.config.sample b/tools/testing/selftests/net/forwarding/forwarding.config.sample
index e5e2fbeca22e..bf17e485684f 100644
--- a/tools/testing/selftests/net/forwarding/forwarding.config.sample
+++ b/tools/testing/selftests/net/forwarding/forwarding.config.sample
@@ -39,5 +39,9 @@ NETIF_CREATE=yes
 # Timeout (in seconds) before ping exits regardless of how many packets have
 # been sent or received
 PING_TIMEOUT=5
+# Flag for tc match, supposed to be skip_sw/skip_hw which means do not process
+# filter by software/hardware
+TC_FLAG=skip_hw
 # IPv6 traceroute utility name.
 TROUTE6=traceroute6
+
diff --git a/tools/testing/selftests/net/forwarding/ip6gre_flat.sh b/tools/testing/selftests/net/forwarding/ip6gre_flat.sh
new file mode 100755
index 000000000000..96c97064f2d3
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/ip6gre_flat.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test IP-in-IP GRE tunnel without key.
+# This test uses flat topology for IP tunneling tests. See ip6gre_lib.sh for
+# more details.
+
+ALL_TESTS="
+	gre_flat
+	gre_mtu_change
+"
+
+NUM_NETIFS=6
+source lib.sh
+source ip6gre_lib.sh
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	ol1=${NETIFS[p2]}
+
+	ul1=${NETIFS[p3]}
+	ul2=${NETIFS[p4]}
+
+	ol2=${NETIFS[p5]}
+	h2=${NETIFS[p6]}
+
+	forwarding_enable
+	vrf_prepare
+	h1_create
+	h2_create
+	sw1_flat_create $ol1 $ul1
+	sw2_flat_create $ol2 $ul2
+}
+
+gre_flat()
+{
+	test_traffic_ip4ip6 "GRE flat IPv4-in-IPv6"
+	test_traffic_ip6ip6 "GRE flat IPv6-in-IPv6"
+}
+
+gre_mtu_change()
+{
+	test_mtu_change
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	sw2_flat_destroy $ol2 $ul2
+	sw1_flat_destroy $ol1 $ul1
+	h2_destroy
+	h1_destroy
+	vrf_cleanup
+	forwarding_restore
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/ip6gre_flat_key.sh b/tools/testing/selftests/net/forwarding/ip6gre_flat_key.sh
new file mode 100755
index 000000000000..ff9fb0db9bd1
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/ip6gre_flat_key.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test IP-in-IP GRE tunnel with key.
+# This test uses flat topology for IP tunneling tests. See ip6gre_lib.sh for
+# more details.
+
+ALL_TESTS="
+	gre_flat
+	gre_mtu_change
+"
+
+NUM_NETIFS=6
+source lib.sh
+source ip6gre_lib.sh
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	ol1=${NETIFS[p2]}
+
+	ul1=${NETIFS[p3]}
+	ul2=${NETIFS[p4]}
+
+	ol2=${NETIFS[p5]}
+	h2=${NETIFS[p6]}
+
+	forwarding_enable
+	vrf_prepare
+	h1_create
+	h2_create
+	sw1_flat_create $ol1 $ul1 key 233
+	sw2_flat_create $ol2 $ul2 key 233
+}
+
+gre_flat()
+{
+	test_traffic_ip4ip6 "GRE flat IPv4-in-IPv6 with key"
+	test_traffic_ip6ip6 "GRE flat IPv6-in-IPv6 with key"
+}
+
+gre_mtu_change()
+{
+	test_mtu_change
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	sw2_flat_destroy $ol2 $ul2
+	sw1_flat_destroy $ol1 $ul1
+	h2_destroy
+	h1_destroy
+	vrf_cleanup
+	forwarding_restore
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/ip6gre_flat_keys.sh b/tools/testing/selftests/net/forwarding/ip6gre_flat_keys.sh
new file mode 100755
index 000000000000..12c138785242
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/ip6gre_flat_keys.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test IP-in-IP GRE tunnel with keys.
+# This test uses flat topology for IP tunneling tests. See ip6gre_lib.sh for
+# more details.
+
+ALL_TESTS="
+	gre_flat
+	gre_mtu_change
+"
+
+NUM_NETIFS=6
+source lib.sh
+source ip6gre_lib.sh
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	ol1=${NETIFS[p2]}
+
+	ul1=${NETIFS[p3]}
+	ul2=${NETIFS[p4]}
+
+	ol2=${NETIFS[p5]}
+	h2=${NETIFS[p6]}
+
+	forwarding_enable
+	vrf_prepare
+	h1_create
+	h2_create
+	sw1_flat_create $ol1 $ul1 ikey 111 okey 222
+	sw2_flat_create $ol2 $ul2 ikey 222 okey 111
+}
+
+gre_flat()
+{
+	test_traffic_ip4ip6 "GRE flat IPv4-in-IPv6 with ikey/okey"
+	test_traffic_ip6ip6 "GRE flat IPv6-in-IPv6 with ikey/okey"
+}
+
+gre_mtu_change()
+{
+	test_mtu_change	gre
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	sw2_flat_destroy $ol2 $ul2
+	sw1_flat_destroy $ol1 $ul1
+	h2_destroy
+	h1_destroy
+	vrf_cleanup
+	forwarding_restore
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/ip6gre_hier.sh b/tools/testing/selftests/net/forwarding/ip6gre_hier.sh
new file mode 100755
index 000000000000..83b55c30a5c3
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/ip6gre_hier.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test IP-in-IP GRE tunnels without key.
+# This test uses hierarchical topology for IP tunneling tests. See
+# ip6gre_lib.sh for more details.
+
+ALL_TESTS="
+	gre_hier
+	gre_mtu_change
+"
+
+NUM_NETIFS=6
+source lib.sh
+source ip6gre_lib.sh
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	ol1=${NETIFS[p2]}
+
+	ul1=${NETIFS[p3]}
+	ul2=${NETIFS[p4]}
+
+	ol2=${NETIFS[p5]}
+	h2=${NETIFS[p6]}
+
+	forwarding_enable
+	vrf_prepare
+	h1_create
+	h2_create
+	sw1_hierarchical_create $ol1 $ul1
+	sw2_hierarchical_create $ol2 $ul2
+}
+
+gre_hier()
+{
+	test_traffic_ip4ip6 "GRE hierarchical IPv4-in-IPv6"
+	test_traffic_ip6ip6 "GRE hierarchical IPv6-in-IPv6"
+}
+
+gre_mtu_change()
+{
+	test_mtu_change gre
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	sw2_hierarchical_destroy $ol2 $ul2
+	sw1_hierarchical_destroy $ol1 $ul1
+	h2_destroy
+	h1_destroy
+	vrf_cleanup
+	forwarding_restore
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/ip6gre_hier_key.sh b/tools/testing/selftests/net/forwarding/ip6gre_hier_key.sh
new file mode 100755
index 000000000000..256607916d92
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/ip6gre_hier_key.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test IP-in-IP GRE tunnels without key.
+# This test uses hierarchical topology for IP tunneling tests. See
+# ip6gre_lib.sh for more details.
+
+ALL_TESTS="
+	gre_hier
+	gre_mtu_change
+"
+
+NUM_NETIFS=6
+source lib.sh
+source ip6gre_lib.sh
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	ol1=${NETIFS[p2]}
+
+	ul1=${NETIFS[p3]}
+	ul2=${NETIFS[p4]}
+
+	ol2=${NETIFS[p5]}
+	h2=${NETIFS[p6]}
+
+	forwarding_enable
+	vrf_prepare
+	h1_create
+	h2_create
+	sw1_hierarchical_create $ol1 $ul1 key 22
+	sw2_hierarchical_create $ol2 $ul2 key 22
+}
+
+gre_hier()
+{
+	test_traffic_ip4ip6 "GRE hierarchical IPv4-in-IPv6 with key"
+	test_traffic_ip6ip6 "GRE hierarchical IPv6-in-IPv6 with key"
+}
+
+gre_mtu_change()
+{
+	test_mtu_change gre
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	sw2_hierarchical_destroy $ol2 $ul2
+	sw1_hierarchical_destroy $ol1 $ul1
+	h2_destroy
+	h1_destroy
+	vrf_cleanup
+	forwarding_restore
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/ip6gre_hier_keys.sh b/tools/testing/selftests/net/forwarding/ip6gre_hier_keys.sh
new file mode 100755
index 000000000000..ad1bcd6334a8
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/ip6gre_hier_keys.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test IP-in-IP GRE tunnels without key.
+# This test uses hierarchical topology for IP tunneling tests. See
+# ip6gre_lib.sh for more details.
+
+ALL_TESTS="
+	gre_hier
+	gre_mtu_change
+"
+
+NUM_NETIFS=6
+source lib.sh
+source ip6gre_lib.sh
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	ol1=${NETIFS[p2]}
+
+	ul1=${NETIFS[p3]}
+	ul2=${NETIFS[p4]}
+
+	ol2=${NETIFS[p5]}
+	h2=${NETIFS[p6]}
+
+	forwarding_enable
+	vrf_prepare
+	h1_create
+	h2_create
+	sw1_hierarchical_create $ol1 $ul1 ikey 111 okey 222
+	sw2_hierarchical_create $ol2 $ul2 ikey 222 okey 111
+}
+
+gre_hier()
+{
+	test_traffic_ip4ip6 "GRE hierarchical IPv4-in-IPv6 with ikey/okey"
+	test_traffic_ip6ip6 "GRE hierarchical IPv6-in-IPv6 with ikey/okey"
+}
+
+gre_mtu_change()
+{
+	test_mtu_change gre
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	sw2_hierarchical_destroy $ol2 $ul2
+	sw1_hierarchical_destroy $ol1 $ul1
+	h2_destroy
+	h1_destroy
+	vrf_cleanup
+	forwarding_restore
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/ip6gre_lib.sh b/tools/testing/selftests/net/forwarding/ip6gre_lib.sh
new file mode 100644
index 000000000000..58a3597037b1
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/ip6gre_lib.sh
@@ -0,0 +1,438 @@
+# SPDX-License-Identifier: GPL-2.0
+#!/bin/bash
+
+# Handles creation and destruction of IP-in-IP or GRE tunnels over the given
+# topology. Supports both flat and hierarchical models.
+#
+# Flat Model:
+# Overlay and underlay share the same VRF.
+# SW1 uses default VRF so tunnel has no bound dev.
+# SW2 uses non-default VRF tunnel has a bound dev.
+# +--------------------------------+
+# | H1                             |
+# |                     $h1 +      |
+# |        198.51.100.1/24  |      |
+# |        2001:db8:1::1/64 |      |
+# +-------------------------|------+
+#                           |
+# +-------------------------|-------------------+
+# | SW1                     |                   |
+# |                    $ol1 +                   |
+# |        198.51.100.2/24                      |
+# |        2001:db8:1::2/64                     |
+# |                                             |
+# |      + g1a (ip6gre)                         |
+# |        loc=2001:db8:3::1                    |
+# |        rem=2001:db8:3::2 --.                |
+# |        tos=inherit         |                |
+# |                            .                |
+# |      .---------------------                 |
+# |      |                                      |
+# |      v                                      |
+# |      + $ul1.111 (vlan)                      |
+# |      | 2001:db8:10::1/64                    |
+# |       \                                     |
+# |        \____________                        |
+# |                     |                       |
+# | VRF default         + $ul1                  |
+# +---------------------|-----------------------+
+#                       |
+# +---------------------|-----------------------+
+# | SW2                 |                       |
+# |                $ul2 +                       |
+# |          ___________|                       |
+# |         /                                   |
+# |        /                                    |
+# |       + $ul2.111 (vlan)                     |
+# |       ^ 2001:db8:10::2/64                   |
+# |       |                                     |
+# |       |                                     |
+# |       '----------------------.              |
+# |       + g2a (ip6gre)         |              |
+# |         loc=2001:db8:3::2    |              |
+# |         rem=2001:db8:3::1  --'              |
+# |         tos=inherit                         |
+# |                                             |
+# |                     + $ol2                  |
+# |                     | 203.0.113.2/24        |
+# | VRF v$ol2           | 2001:db8:2::2/64      |
+# +---------------------|-----------------------+
+# +---------------------|----------+
+# | H2                  |          |
+# |                 $h2 +          |
+# |    203.0.113.1/24              |
+# |    2001:db8:2::1/64            |
+# +--------------------------------+
+#
+# Hierarchical model:
+# The tunnel is bound to a device in a different VRF
+#
+# +--------------------------------+
+# | H1                             |
+# |                     $h1 +      |
+# |        198.51.100.1/24  |      |
+# |        2001:db8:1::1/64 |      |
+# +-------------------------|------+
+#                           |
+# +-------------------------|-------------------+
+# | SW1                     |                   |
+# | +-----------------------|-----------------+ |
+# | |                  $ol1 +                 | |
+# | |      198.51.100.2/24                    | |
+# | |      2001:db8:1::2/64                   | |
+# | |                                         | |
+# | |              + g1a (ip6gre)             | |
+# | |                loc=2001:db8:3::1        | |
+# | |                rem=2001:db8:3::2        | |
+# | |                tos=inherit              | |
+# | |                    ^                    | |
+# | |   VRF v$ol1        |                    | |
+# | +--------------------|--------------------+ |
+# |                      |                      |
+# | +--------------------|--------------------+ |
+# | |   VRF v$ul1        |                    | |
+# | |                    |                    | |
+# | |                    v                    | |
+# | |             dummy1 +                    | |
+# | |       2001:db8:3::1/64                  | |
+# | |         .-----------'                   | |
+# | |         |                               | |
+# | |         v                               | |
+# | |         + $ul1.111 (vlan)               | |
+# | |         | 2001:db8:10::1/64             | |
+# | |         \                               | |
+# | |          \__________                    | |
+# | |                     |                   | |
+# | |                     + $ul1              | |
+# | +---------------------|-------------------+ |
+# +-----------------------|---------------------+
+#                         |
+# +-----------------------|---------------------+
+# | SW2                   |                     |
+# | +---------------------|-------------------+ |
+# | |                     + $ul2              | |
+# | |                _____|                   | |
+# | |               /                         | |
+# | |              /                          | |
+# | |              | $ul2.111 (vlan)          | |
+# | |              + 2001:db8:10::2/64        | |
+# | |              ^                          | |
+# | |              |                          | |
+# | |              '------.                   | |
+# | |              dummy2 +                   | |
+# | |              2001:db8:3::2/64           | |
+# | |                     ^                   | |
+# | |                     |                   | |
+# | |                     |                   | |
+# | | VRF v$ul2           |                   | |
+# | +---------------------|-------------------+ |
+# |                       |                     |
+# | +---------------------|-------------------+ |
+# | | VRF v$ol2           |                   | |
+# | |                     |                   | |
+# | |                     v                   | |
+# | |        g2a (ip6gre) +                   | |
+# | |        loc=2001:db8:3::2                | |
+# | |        rem=2001:db8:3::1                | |
+# | |        tos=inherit                      | |
+# | |                                         | |
+# | |                $ol2 +                   | |
+# | |    203.0.113.2/24   |                   | |
+# | |    2001:db8:2::2/64 |                   | |
+# | +---------------------|-------------------+ |
+# +-----------------------|---------------------+
+#                         |
+# +-----------------------|--------+
+# | H2                    |        |
+# |                   $h2 +        |
+# |      203.0.113.1/24            |
+# |      2001:db8:2::1/64          |
+# +--------------------------------+
+
+source lib.sh
+source tc_common.sh
+
+h1_create()
+{
+	simple_if_init $h1 198.51.100.1/24 2001:db8:1::1/64
+	ip route add vrf v$h1 203.0.113.0/24 via 198.51.100.2
+	ip -6 route add vrf v$h1 2001:db8:2::/64 via 2001:db8:1::2
+}
+
+h1_destroy()
+{
+	ip -6 route del vrf v$h1 2001:db8:2::/64 via 2001:db8:1::2
+	ip route del vrf v$h1 203.0.113.0/24 via 198.51.100.2
+	simple_if_fini $h1 198.51.100.1/24 2001:db8:1::1/64
+}
+
+h2_create()
+{
+	simple_if_init $h2 203.0.113.1/24 2001:db8:2::1/64
+	ip route add vrf v$h2 198.51.100.0/24 via 203.0.113.2
+	ip -6 route add vrf v$h2 2001:db8:1::/64 via 2001:db8:2::2
+}
+
+h2_destroy()
+{
+	ip -6 route del vrf v$h2 2001:db8:1::/64 via 2001:db8:2::2
+	ip route del vrf v$h2 198.51.100.0/24 via 203.0.113.2
+	simple_if_fini $h2 203.0.113.1/24 2001:db8:2::1/64
+}
+
+sw1_flat_create()
+{
+	local ol1=$1; shift
+	local ul1=$1; shift
+
+	ip link set dev $ol1 up
+        __addr_add_del $ol1 add 198.51.100.2/24 2001:db8:1::2/64
+
+	ip link set dev $ul1 up
+	vlan_create $ul1 111 "" 2001:db8:10::1/64
+
+	tunnel_create g1a ip6gre 2001:db8:3::1 2001:db8:3::2 tos inherit \
+		ttl inherit "$@"
+	ip link set dev g1a up
+        __addr_add_del g1a add "2001:db8:3::1/128"
+
+	ip -6 route add 2001:db8:3::2/128 via 2001:db8:10::2
+	ip route add 203.0.113.0/24 dev g1a
+	ip -6 route add 2001:db8:2::/64 dev g1a
+}
+
+sw1_flat_destroy()
+{
+	local ol1=$1; shift
+	local ul1=$1; shift
+
+	ip -6 route del 2001:db8:2::/64
+	ip route del 203.0.113.0/24
+	ip -6 route del 2001:db8:3::2/128 via 2001:db8:10::2
+
+	__simple_if_fini g1a 2001:db8:3::1/128
+	tunnel_destroy g1a
+
+	vlan_destroy $ul1 111
+	__simple_if_fini $ul1
+	__simple_if_fini $ol1 198.51.100.2/24 2001:db8:1::2/64
+}
+
+sw2_flat_create()
+{
+	local ol2=$1; shift
+	local ul2=$1; shift
+
+	simple_if_init $ol2 203.0.113.2/24 2001:db8:2::2/64
+	__simple_if_init $ul2 v$ol2
+	vlan_create $ul2 111 v$ol2 2001:db8:10::2/64
+
+	tunnel_create g2a ip6gre 2001:db8:3::2 2001:db8:3::1 tos inherit \
+		ttl inherit dev v$ol2 "$@"
+	__simple_if_init g2a v$ol2 2001:db8:3::2/128
+
+	# Replace neighbor to avoid 1 dropped packet due to "unresolved neigh"
+	ip neigh replace dev $ol2 203.0.113.1 lladdr $(mac_get $h2)
+	ip -6 neigh replace dev $ol2 2001:db8:2::1 lladdr $(mac_get $h2)
+
+	ip -6 route add vrf v$ol2 2001:db8:3::1/128 via 2001:db8:10::1
+	ip route add vrf v$ol2 198.51.100.0/24 dev g2a
+	ip -6 route add vrf v$ol2 2001:db8:1::/64 dev g2a
+}
+
+sw2_flat_destroy()
+{
+	local ol2=$1; shift
+	local ul2=$1; shift
+
+	ip -6 route del vrf v$ol2 2001:db8:2::/64
+	ip route del vrf v$ol2 198.51.100.0/24
+	ip -6 route del vrf v$ol2 2001:db8:3::1/128 via 2001:db8:10::1
+
+	__simple_if_fini g2a 2001:db8:3::2/128
+	tunnel_destroy g2a
+
+	vlan_destroy $ul2 111
+	__simple_if_fini $ul2
+	simple_if_fini $ol2 203.0.113.2/24 2001:db8:2::2/64
+}
+
+sw1_hierarchical_create()
+{
+	local ol1=$1; shift
+	local ul1=$1; shift
+
+	simple_if_init $ol1 198.51.100.2/24 2001:db8:1::2/64
+	simple_if_init $ul1
+	ip link add name dummy1 type dummy
+	__simple_if_init dummy1 v$ul1 2001:db8:3::1/64
+
+	vlan_create $ul1 111 v$ul1 2001:db8:10::1/64
+	tunnel_create g1a ip6gre 2001:db8:3::1 2001:db8:3::2 tos inherit \
+		ttl inherit dev dummy1 "$@"
+	ip link set dev g1a master v$ol1
+
+	ip -6 route add vrf v$ul1 2001:db8:3::2/128 via 2001:db8:10::2
+	ip route add vrf v$ol1 203.0.113.0/24 dev g1a
+	ip -6 route add vrf v$ol1 2001:db8:2::/64 dev g1a
+}
+
+sw1_hierarchical_destroy()
+{
+	local ol1=$1; shift
+	local ul1=$1; shift
+
+	ip -6 route del vrf v$ol1 2001:db8:2::/64
+	ip route del vrf v$ol1 203.0.113.0/24
+	ip -6 route del vrf v$ul1 2001:db8:3::2/128
+
+	tunnel_destroy g1a
+	vlan_destroy $ul1 111
+
+	__simple_if_fini dummy1 2001:db8:3::1/64
+	ip link del dev dummy1
+
+	simple_if_fini $ul1
+	simple_if_fini $ol1 198.51.100.2/24 2001:db8:1::2/64
+}
+
+sw2_hierarchical_create()
+{
+	local ol2=$1; shift
+	local ul2=$1; shift
+
+	simple_if_init $ol2 203.0.113.2/24 2001:db8:2::2/64
+	simple_if_init $ul2
+
+	ip link add name dummy2 type dummy
+	__simple_if_init dummy2 v$ul2 2001:db8:3::2/64
+
+	vlan_create $ul2 111 v$ul2 2001:db8:10::2/64
+	tunnel_create g2a ip6gre 2001:db8:3::2 2001:db8:3::1 tos inherit \
+		ttl inherit dev dummy2 "$@"
+	ip link set dev g2a master v$ol2
+
+	# Replace neighbor to avoid 1 dropped packet due to "unresolved neigh"
+	ip neigh replace dev $ol2 203.0.113.1 lladdr $(mac_get $h2)
+	ip -6 neigh replace dev $ol2 2001:db8:2::1 lladdr $(mac_get $h2)
+
+	ip -6 route add vrf v$ul2 2001:db8:3::1/128 via 2001:db8:10::1
+	ip route add vrf v$ol2 198.51.100.0/24 dev g2a
+	ip -6 route add vrf v$ol2 2001:db8:1::/64 dev g2a
+}
+
+sw2_hierarchical_destroy()
+{
+	local ol2=$1; shift
+	local ul2=$1; shift
+
+	ip -6 route del vrf v$ol2 2001:db8:2::/64
+	ip route del vrf v$ol2 198.51.100.0/24
+	ip -6 route del vrf v$ul2 2001:db8:3::1/128
+
+	tunnel_destroy g2a
+	vlan_destroy $ul2 111
+
+	__simple_if_fini dummy2 2001:db8:3::2/64
+	ip link del dev dummy2
+
+	simple_if_fini $ul2
+	simple_if_fini $ol2 203.0.113.2/24 2001:db8:2::2/64
+}
+
+test_traffic_ip4ip6()
+{
+	RET=0
+
+	h1mac=$(mac_get $h1)
+	ol1mac=$(mac_get $ol1)
+
+	tc qdisc add dev $ul1 clsact
+	tc filter add dev $ul1 egress proto all pref 1 handle 101 \
+		flower $TC_FLAG action pass
+
+	tc qdisc add dev $ol2 clsact
+	tc filter add dev $ol2 egress protocol ipv4 pref 1 handle 101 \
+		flower $TC_FLAG dst_ip 203.0.113.1 action pass
+
+	$MZ $h1 -c 1000 -p 64 -a $h1mac -b $ol1mac -A 198.51.100.1 \
+		-B 203.0.113.1 -t ip -q -d 1msec
+
+	# Check ports after encap and after decap.
+	tc_check_at_least_x_packets "dev $ul1 egress" 101 1000
+	check_err $? "Packets did not go through $ul1, tc_flag = $TC_FLAG"
+
+	tc_check_at_least_x_packets "dev $ol2 egress" 101 1000
+	check_err $? "Packets did not go through $ol2, tc_flag = $TC_FLAG"
+
+	log_test "$@"
+
+	tc filter del dev $ol2 egress protocol ipv4 pref 1 handle 101 flower
+	tc qdisc del dev $ol2 clsact
+	tc filter del dev $ul1 egress proto all pref 1 handle 101 flower
+	tc qdisc del dev $ul1 clsact
+}
+
+test_traffic_ip6ip6()
+{
+	RET=0
+
+	h1mac=$(mac_get $h1)
+	ol1mac=$(mac_get $ol1)
+
+	tc qdisc add dev $ul1 clsact
+	tc filter add dev $ul1 egress proto all pref 1 handle 101 \
+		flower $TC_FLAG action pass
+
+	tc qdisc add dev $ol2 clsact
+	tc filter add dev $ol2 egress protocol ipv6 pref 1 handle 101 \
+		flower $TC_FLAG dst_ip 2001:db8:2::1 action pass
+
+	$MZ -6 $h1 -c 1000 -p 64 -a $h1mac -b $ol1mac -A 2001:db8:1::1 \
+		-B 2001:db8:2::1 -t ip -q -d 1msec
+
+	# Check ports after encap and after decap.
+	tc_check_at_least_x_packets "dev $ul1 egress" 101 1000
+	check_err $? "Packets did not go through $ul1, tc_flag = $TC_FLAG"
+
+	tc_check_at_least_x_packets "dev $ol2 egress" 101 1000
+	check_err $? "Packets did not go through $ol2, tc_flag = $TC_FLAG"
+
+	log_test "$@"
+
+	tc filter del dev $ol2 egress protocol ipv6 pref 1 handle 101 flower
+	tc qdisc del dev $ol2 clsact
+	tc filter del dev $ul1 egress proto all pref 1 handle 101 flower
+	tc qdisc del dev $ul1 clsact
+}
+
+topo_mtu_change()
+{
+	local mtu=$1
+
+	ip link set mtu $mtu dev $h1
+	ip link set mtu $mtu dev $ol1
+	ip link set mtu $mtu dev g1a
+	ip link set mtu $mtu dev $ul1
+	ip link set mtu $mtu dev $ul1.111
+	ip link set mtu $mtu dev $h2
+	ip link set mtu $mtu dev $ol2
+	ip link set mtu $mtu dev g2a
+	ip link set mtu $mtu dev $ul2
+	ip link set mtu $mtu dev $ul2.111
+}
+
+test_mtu_change()
+{
+	RET=0
+
+	ping6_do $h1 2001:db8:2::1 "-s 1800 -w 3"
+	check_fail $? "ping GRE IPv6 should not pass with packet size 1800"
+
+	RET=0
+
+	topo_mtu_change	2000
+	ping6_do $h1 2001:db8:2::1 "-s 1800 -w 3"
+	check_err $?
+	log_test "ping GRE IPv6, packet size 1800 after MTU change"
+}
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 92087d423bcf..dfd827b7a9f9 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -23,6 +23,8 @@ MC_CLI=${MC_CLI:=smcroutectl}
 PING_TIMEOUT=${PING_TIMEOUT:=5}
 WAIT_TIMEOUT=${WAIT_TIMEOUT:=20}
 INTERFACE_TIMEOUT=${INTERFACE_TIMEOUT:=600}
+REQUIRE_JQ=${REQUIRE_JQ:=yes}
+REQUIRE_MZ=${REQUIRE_MZ:=yes}
 
 relative_path="${BASH_SOURCE%/*}"
 if [[ "$relative_path" == "${BASH_SOURCE}" ]]; then
@@ -141,8 +143,12 @@ require_command()
 	fi
 }
 
-require_command jq
-require_command $MZ
+if [[ "$REQUIRE_JQ" = "yes" ]]; then
+	require_command jq
+fi
+if [[ "$REQUIRE_MZ" = "yes" ]]; then
+	require_command $MZ
+fi
 
 if [[ ! -v NUM_NETIFS ]]; then
 	echo "SKIP: importer does not define \"NUM_NETIFS\""
@@ -280,6 +286,15 @@ log_test()
 	return 0
 }
 
+log_test_skip()
+{
+	local test_name=$1
+	local opt_str=$2
+
+	printf "TEST: %-60s  [SKIP]\n" "$test_name $opt_str"
+	return 0
+}
+
 log_info()
 {
 	local msg=$1
diff --git a/tools/testing/selftests/net/forwarding/sch_tbf_etsprio.sh b/tools/testing/selftests/net/forwarding/sch_tbf_etsprio.sh
index 8bd85da1905a..75a37c189ef3 100644
--- a/tools/testing/selftests/net/forwarding/sch_tbf_etsprio.sh
+++ b/tools/testing/selftests/net/forwarding/sch_tbf_etsprio.sh
@@ -4,9 +4,12 @@
 ALL_TESTS="
 	ping_ipv4
 	tbf_test
+	tbf_root_test
 "
 source $lib_dir/sch_tbf_core.sh
 
+QDISC_TYPE=${QDISC% *}
+
 tbf_test_one()
 {
 	local bs=$1; shift
@@ -22,6 +25,8 @@ tbf_test_one()
 
 tbf_test()
 {
+	log_info "Testing root-$QDISC_TYPE-tbf"
+
 	# This test is used for both ETS and PRIO. Even though we only need two
 	# bands, PRIO demands a minimum of three.
 	tc qdisc add dev $swp2 root handle 10: $QDISC 3 priomap 2 1 0
@@ -29,6 +34,29 @@ tbf_test()
 	tc qdisc del dev $swp2 root
 }
 
+tbf_root_test()
+{
+	local bs=128K
+
+	log_info "Testing root-tbf-$QDISC_TYPE"
+
+	tc qdisc replace dev $swp2 root handle 1: \
+		tbf rate 400Mbit burst $bs limit 1M
+	tc qdisc replace dev $swp2 parent 1:1 handle 10: \
+		$QDISC 3 priomap 2 1 0
+	tc qdisc replace dev $swp2 parent 10:3 handle 103: \
+		bfifo limit 1M
+	tc qdisc replace dev $swp2 parent 10:2 handle 102: \
+		bfifo limit 1M
+	tc qdisc replace dev $swp2 parent 10:1 handle 101: \
+		bfifo limit 1M
+
+	do_tbf_test 10 400 $bs
+	do_tbf_test 11 400 $bs
+
+	tc qdisc del dev $swp2 root
+}
+
 trap cleanup EXIT
 
 setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/tc_common.sh b/tools/testing/selftests/net/forwarding/tc_common.sh
index 0e18e8be6e2a..bce8bb8d2b6f 100644
--- a/tools/testing/selftests/net/forwarding/tc_common.sh
+++ b/tools/testing/selftests/net/forwarding/tc_common.sh
@@ -16,6 +16,16 @@ tc_check_packets()
 		 tc_rule_handle_stats_get "$id" "$handle" > /dev/null
 }
 
+tc_check_at_least_x_packets()
+{
+	local id=$1
+	local handle=$2
+	local count=$3
+
+	busywait "$TC_HIT_TIMEOUT" until_counter_is ">= $count" \
+		 tc_rule_handle_stats_get "$id" "$handle" > /dev/null
+}
+
 tc_check_packets_hitting()
 {
 	local id=$1
diff --git a/tools/testing/selftests/net/ioam6.sh b/tools/testing/selftests/net/ioam6.sh
index a2489ec398fe..a2b9fad5a9a6 100755
--- a/tools/testing/selftests/net/ioam6.sh
+++ b/tools/testing/selftests/net/ioam6.sh
@@ -6,7 +6,7 @@
 # This script evaluates the IOAM insertion for IPv6 by checking the IOAM data
 # consistency directly inside packets on the receiver side. Tests are divided
 # into three categories: OUTPUT (evaluates the IOAM processing by the sender),
-# INPUT (evaluates the IOAM processing by the receiver) and GLOBAL (evaluates
+# INPUT (evaluates the IOAM processing by a receiver) and GLOBAL (evaluates
 # wider use cases that do not fall into the other two categories). Both OUTPUT
 # and INPUT tests only use a two-node topology (alpha and beta), while GLOBAL
 # tests use the entire three-node topology (alpha, beta, gamma). Each test is
@@ -200,7 +200,7 @@ check_kernel_compatibility()
   ip -netns ioam-tmp-node link set veth0 up
   ip -netns ioam-tmp-node link set veth1 up
 
-  ip -netns ioam-tmp-node ioam namespace add 0 &>/dev/null
+  ip -netns ioam-tmp-node ioam namespace add 0
   ns_ad=$?
 
   ip -netns ioam-tmp-node ioam namespace show | grep -q "namespace 0"
@@ -214,11 +214,11 @@ check_kernel_compatibility()
     exit 1
   fi
 
-  ip -netns ioam-tmp-node route add db02::/64 encap ioam6 trace prealloc \
-         type 0x800000 ns 0 size 4 dev veth0 &>/dev/null
+  ip -netns ioam-tmp-node route add db02::/64 encap ioam6 mode inline \
+         trace prealloc type 0x800000 ns 0 size 4 dev veth0
   tr_ad=$?
 
-  ip -netns ioam-tmp-node -6 route | grep -q "encap ioam6 trace"
+  ip -netns ioam-tmp-node -6 route | grep -q "encap ioam6"
   tr_sh=$?
 
   if [[ $tr_ad != 0 || $tr_sh != 0 ]]
@@ -232,6 +232,30 @@ check_kernel_compatibility()
 
   ip link del veth0 2>/dev/null || true
   ip netns del ioam-tmp-node || true
+
+  lsmod | grep -q "ip6_tunnel"
+  ip6tnl_loaded=$?
+
+  if [ $ip6tnl_loaded = 0 ]
+  then
+    encap_tests=0
+  else
+    modprobe ip6_tunnel &>/dev/null
+    lsmod | grep -q "ip6_tunnel"
+    encap_tests=$?
+
+    if [ $encap_tests != 0 ]
+    then
+      ip a | grep -q "ip6tnl0"
+      encap_tests=$?
+
+      if [ $encap_tests != 0 ]
+      then
+        echo "Note: ip6_tunnel not found neither as a module nor inside the" \
+             "kernel, tests that require it (encap mode) will be omitted"
+      fi
+    fi
+  fi
 }
 
 cleanup()
@@ -242,6 +266,11 @@ cleanup()
   ip netns del ioam-node-alpha || true
   ip netns del ioam-node-beta || true
   ip netns del ioam-node-gamma || true
+
+  if [ $ip6tnl_loaded != 0 ]
+  then
+    modprobe -r ip6_tunnel 2>/dev/null || true
+  fi
 }
 
 setup()
@@ -329,6 +358,12 @@ log_test_failed()
   printf "TEST: %-60s  [FAIL]\n" "${desc}"
 }
 
+log_results()
+{
+  echo "- Tests passed: ${npassed}"
+  echo "- Tests failed: ${nfailed}"
+}
+
 run_test()
 {
   local name=$1
@@ -349,17 +384,27 @@ run_test()
   ip netns exec $node_src ping6 -t 64 -c 1 -W 1 $ip6_dst &>/dev/null
   if [ $? != 0 ]
   then
+    nfailed=$((nfailed+1))
     log_test_failed "${desc}"
     kill -2 $spid &>/dev/null
   else
     wait $spid
-    [ $? = 0 ] && log_test_passed "${desc}" || log_test_failed "${desc}"
+    if [ $? = 0 ]
+    then
+      npassed=$((npassed+1))
+      log_test_passed "${desc}"
+    else
+      nfailed=$((nfailed+1))
+      log_test_failed "${desc}"
+    fi
   fi
 }
 
 run()
 {
   echo
+  printf "%0.s-" {1..74}
+  echo
   echo "OUTPUT tests"
   printf "%0.s-" {1..74}
   echo
@@ -369,7 +414,8 @@ run()
 
   for t in $TESTS_OUTPUT
   do
-    $t
+    $t "inline"
+    [ $encap_tests = 0 ] && $t "encap"
   done
 
   # clean OUTPUT settings
@@ -378,6 +424,8 @@ run()
 
 
   echo
+  printf "%0.s-" {1..74}
+  echo
   echo "INPUT tests"
   printf "%0.s-" {1..74}
   echo
@@ -387,7 +435,8 @@ run()
 
   for t in $TESTS_INPUT
   do
-    $t
+    $t "inline"
+    [ $encap_tests = 0 ] && $t "encap"
   done
 
   # clean INPUT settings
@@ -396,7 +445,8 @@ run()
   ip -netns ioam-node-alpha ioam namespace set 123 schema ${ALPHA[8]}
   ip -netns ioam-node-alpha route change db01::/64 dev veth0
 
-
+  echo
+  printf "%0.s-" {1..74}
   echo
   echo "GLOBAL tests"
   printf "%0.s-" {1..74}
@@ -404,8 +454,12 @@ run()
 
   for t in $TESTS_GLOBAL
   do
-    $t
+    $t "inline"
+    [ $encap_tests = 0 ] && $t "encap"
   done
+
+  echo
+  log_results
 }
 
 bit2type=(
@@ -431,11 +485,16 @@ out_undef_ns()
   ##############################################################################
   local desc="Unknown IOAM namespace"
 
-  ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace prealloc \
-         type 0x800000 ns 0 size 4 dev veth0
+  [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1"
+  [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 up
 
-  run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-beta db01::2 \
-         db01::1 veth0 0x800000 0
+  ip -netns ioam-node-alpha route change db01::/64 encap ioam6 mode $mode \
+         trace prealloc type 0x800000 ns 0 size 4 dev veth0
+
+  run_test ${FUNCNAME[0]} "${desc} ($1 mode)" ioam-node-alpha ioam-node-beta \
+         db01::2 db01::1 veth0 0x800000 0
+
+  [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 down
 }
 
 out_no_room()
@@ -446,11 +505,16 @@ out_no_room()
   ##############################################################################
   local desc="Missing trace room"
 
-  ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace prealloc \
-         type 0xc00000 ns 123 size 4 dev veth0
+  [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1"
+  [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 up
+
+  ip -netns ioam-node-alpha route change db01::/64 encap ioam6 mode $mode \
+         trace prealloc type 0xc00000 ns 123 size 4 dev veth0
 
-  run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-beta db01::2 \
-         db01::1 veth0 0xc00000 123
+  run_test ${FUNCNAME[0]} "${desc} ($1 mode)" ioam-node-alpha ioam-node-beta \
+         db01::2 db01::1 veth0 0xc00000 123
+
+  [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 down
 }
 
 out_bits()
@@ -465,10 +529,13 @@ out_bits()
   local tmp=${bit2size[22]}
   bit2size[22]=$(( $tmp + ${#ALPHA[9]} + ((4 - (${#ALPHA[9]} % 4)) % 4) ))
 
+  [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1"
+  [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 up
+
   for i in {0..22}
   do
-    ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace \
-           prealloc type ${bit2type[$i]} ns 123 size ${bit2size[$i]} \
+    ip -netns ioam-node-alpha route change db01::/64 encap ioam6 mode $mode \
+           trace prealloc type ${bit2type[$i]} ns 123 size ${bit2size[$i]} \
            dev veth0 &>/dev/null
 
     local cmd_res=$?
@@ -485,11 +552,13 @@ out_bits()
         log_test_failed "$descr"
       fi
     else
-      run_test "out_bit$i" "$descr" ioam-node-alpha ioam-node-beta \
-             db01::2 db01::1 veth0 ${bit2type[$i]} 123
+	run_test "out_bit$i" "$descr ($1 mode)" ioam-node-alpha \
+           ioam-node-beta db01::2 db01::1 veth0 ${bit2type[$i]} 123
     fi
   done
 
+  [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 down
+
   bit2size[22]=$tmp
 }
 
@@ -501,11 +570,16 @@ out_full_supp_trace()
   ##############################################################################
   local desc="Full supported trace"
 
-  ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace prealloc \
-         type 0xfff002 ns 123 size 100 dev veth0
+  [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1"
+  [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 up
 
-  run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-beta db01::2 \
-         db01::1 veth0 0xfff002 123
+  ip -netns ioam-node-alpha route change db01::/64 encap ioam6 mode $mode \
+         trace prealloc type 0xfff002 ns 123 size 100 dev veth0
+
+  run_test ${FUNCNAME[0]} "${desc} ($1 mode)" ioam-node-alpha ioam-node-beta \
+         db01::2 db01::1 veth0 0xfff002 123
+
+  [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 down
 }
 
 
@@ -526,11 +600,16 @@ in_undef_ns()
   ##############################################################################
   local desc="Unknown IOAM namespace"
 
-  ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace prealloc \
-         type 0x800000 ns 0 size 4 dev veth0
+  [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1"
+  [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 up
+
+  ip -netns ioam-node-alpha route change db01::/64 encap ioam6 mode $mode \
+         trace prealloc type 0x800000 ns 0 size 4 dev veth0
 
-  run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-beta db01::2 \
-         db01::1 veth0 0x800000 0
+  run_test ${FUNCNAME[0]} "${desc} ($1 mode)" ioam-node-alpha ioam-node-beta \
+         db01::2 db01::1 veth0 0x800000 0
+
+  [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 down
 }
 
 in_no_room()
@@ -541,11 +620,16 @@ in_no_room()
   ##############################################################################
   local desc="Missing trace room"
 
-  ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace prealloc \
-         type 0xc00000 ns 123 size 4 dev veth0
+  [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1"
+  [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 up
+
+  ip -netns ioam-node-alpha route change db01::/64 encap ioam6 mode $mode \
+         trace prealloc type 0xc00000 ns 123 size 4 dev veth0
+
+  run_test ${FUNCNAME[0]} "${desc} ($1 mode)" ioam-node-alpha ioam-node-beta \
+         db01::2 db01::1 veth0 0xc00000 123
 
-  run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-beta db01::2 \
-         db01::1 veth0 0xc00000 123
+  [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 down
 }
 
 in_bits()
@@ -560,15 +644,21 @@ in_bits()
   local tmp=${bit2size[22]}
   bit2size[22]=$(( $tmp + ${#BETA[9]} + ((4 - (${#BETA[9]} % 4)) % 4) ))
 
+  [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1"
+  [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 up
+
   for i in {0..11} {22..22}
   do
-    ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace \
-           prealloc type ${bit2type[$i]} ns 123 size ${bit2size[$i]} dev veth0
+    ip -netns ioam-node-alpha route change db01::/64 encap ioam6 mode $mode \
+           trace prealloc type ${bit2type[$i]} ns 123 size ${bit2size[$i]} \
+           dev veth0
 
-    run_test "in_bit$i" "${desc/<n>/$i}" ioam-node-alpha ioam-node-beta \
-           db01::2 db01::1 veth0 ${bit2type[$i]} 123
+    run_test "in_bit$i" "${desc/<n>/$i} ($1 mode)" ioam-node-alpha \
+           ioam-node-beta db01::2 db01::1 veth0 ${bit2type[$i]} 123
   done
 
+  [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 down
+
   bit2size[22]=$tmp
 }
 
@@ -585,11 +675,16 @@ in_oflag()
   #   back the IOAM namespace that was previously configured on the sender.
   ip -netns ioam-node-alpha ioam namespace add 123
 
-  ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace prealloc \
-         type 0xc00000 ns 123 size 4 dev veth0
+  [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1"
+  [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 up
+
+  ip -netns ioam-node-alpha route change db01::/64 encap ioam6 mode $mode \
+         trace prealloc type 0xc00000 ns 123 size 4 dev veth0
 
-  run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-beta db01::2 \
-         db01::1 veth0 0xc00000 123
+  run_test ${FUNCNAME[0]} "${desc} ($1 mode)" ioam-node-alpha ioam-node-beta \
+         db01::2 db01::1 veth0 0xc00000 123
+
+  [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 down
 
   # And we clean the exception for this test to get things back to normal for
   # other INPUT tests
@@ -604,11 +699,16 @@ in_full_supp_trace()
   ##############################################################################
   local desc="Full supported trace"
 
-  ip -netns ioam-node-alpha route change db01::/64 encap ioam6 trace prealloc \
-         type 0xfff002 ns 123 size 80 dev veth0
+  [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1"
+  [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 up
+
+  ip -netns ioam-node-alpha route change db01::/64 encap ioam6 mode $mode \
+         trace prealloc type 0xfff002 ns 123 size 80 dev veth0
+
+  run_test ${FUNCNAME[0]} "${desc} ($1 mode)" ioam-node-alpha ioam-node-beta \
+         db01::2 db01::1 veth0 0xfff002 123
 
-  run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-beta db01::2 \
-         db01::1 veth0 0xfff002 123
+  [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 down
 }
 
 
@@ -627,11 +727,16 @@ fwd_full_supp_trace()
   ##############################################################################
   local desc="Forward - Full supported trace"
 
-  ip -netns ioam-node-alpha route change db02::/64 encap ioam6 trace prealloc \
-         type 0xfff002 ns 123 size 244 via db01::1 dev veth0
+  [ "$1" = "encap" ] && mode="$1 tundst db02::2" || mode="$1"
+  [ "$1" = "encap" ] && ip -netns ioam-node-gamma link set ip6tnl0 up
 
-  run_test ${FUNCNAME[0]} "${desc}" ioam-node-alpha ioam-node-gamma db01::2 \
-         db02::2 veth0 0xfff002 123
+  ip -netns ioam-node-alpha route change db02::/64 encap ioam6 mode $mode \
+         trace prealloc type 0xfff002 ns 123 size 244 via db01::1 dev veth0
+
+  run_test ${FUNCNAME[0]} "${desc} ($1 mode)" ioam-node-alpha ioam-node-gamma \
+         db01::2 db02::2 veth0 0xfff002 123
+
+  [ "$1" = "encap" ] && ip -netns ioam-node-gamma link set ip6tnl0 down
 }
 
 
@@ -641,6 +746,9 @@ fwd_full_supp_trace()
 #                                                                              #
 ################################################################################
 
+npassed=0
+nfailed=0
+
 if [ "$(id -u)" -ne 0 ]
 then
   echo "SKIP: Need root privileges"
diff --git a/tools/testing/selftests/net/mptcp/.gitignore b/tools/testing/selftests/net/mptcp/.gitignore
index 260336d5f0b1..7569d892967a 100644
--- a/tools/testing/selftests/net/mptcp/.gitignore
+++ b/tools/testing/selftests/net/mptcp/.gitignore
@@ -1,4 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0-only
 mptcp_connect
+mptcp_sockopt
 pm_nl_ctl
 *.pcap
diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile
index f1464f09b080..bbf4e448bad9 100644
--- a/tools/testing/selftests/net/mptcp/Makefile
+++ b/tools/testing/selftests/net/mptcp/Makefile
@@ -8,7 +8,7 @@ CFLAGS =  -Wall -Wl,--no-as-needed -O2 -g  -I$(top_srcdir)/usr/include
 TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh diag.sh \
 	      simult_flows.sh mptcp_sockopt.sh
 
-TEST_GEN_FILES = mptcp_connect pm_nl_ctl
+TEST_GEN_FILES = mptcp_connect pm_nl_ctl mptcp_sockopt
 
 TEST_FILES := settings
 
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c
index 89c4753c2760..95e81d557b08 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c
@@ -14,6 +14,7 @@
 #include <strings.h>
 #include <signal.h>
 #include <unistd.h>
+#include <time.h>
 
 #include <sys/poll.h>
 #include <sys/sendfile.h>
@@ -64,6 +65,7 @@ static int cfg_sndbuf;
 static int cfg_rcvbuf;
 static bool cfg_join;
 static bool cfg_remove;
+static unsigned int cfg_time;
 static unsigned int cfg_do_w;
 static int cfg_wait;
 static uint32_t cfg_mark;
@@ -78,9 +80,10 @@ static struct cfg_cmsg_types cfg_cmsg_types;
 static void die_usage(void)
 {
 	fprintf(stderr, "Usage: mptcp_connect [-6] [-u] [-s MPTCP|TCP] [-p port] [-m mode]"
-		"[-l] [-w sec] connect_address\n");
+		"[-l] [-w sec] [-t num] [-T num] connect_address\n");
 	fprintf(stderr, "\t-6 use ipv6\n");
 	fprintf(stderr, "\t-t num -- set poll timeout to num\n");
+	fprintf(stderr, "\t-T num -- set expected runtime to num ms\n");
 	fprintf(stderr, "\t-S num -- set SO_SNDBUF to num\n");
 	fprintf(stderr, "\t-R num -- set SO_RCVBUF to num\n");
 	fprintf(stderr, "\t-p num -- use port num\n");
@@ -448,7 +451,7 @@ static void set_nonblock(int fd)
 	fcntl(fd, F_SETFL, flags | O_NONBLOCK);
 }
 
-static int copyfd_io_poll(int infd, int peerfd, int outfd)
+static int copyfd_io_poll(int infd, int peerfd, int outfd, bool *in_closed_after_out)
 {
 	struct pollfd fds = {
 		.fd = peerfd,
@@ -487,9 +490,11 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd)
 				 */
 				fds.events &= ~POLLIN;
 
-				if ((fds.events & POLLOUT) == 0)
+				if ((fds.events & POLLOUT) == 0) {
+					*in_closed_after_out = true;
 					/* and nothing more to send */
 					break;
+				}
 
 			/* Else, still have data to transmit */
 			} else if (len < 0) {
@@ -547,7 +552,7 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd)
 	}
 
 	/* leave some time for late join/announce */
-	if (cfg_join || cfg_remove)
+	if (cfg_remove)
 		usleep(cfg_wait);
 
 	close(peerfd);
@@ -646,7 +651,7 @@ static int do_sendfile(int infd, int outfd, unsigned int count)
 }
 
 static int copyfd_io_mmap(int infd, int peerfd, int outfd,
-			  unsigned int size)
+			  unsigned int size, bool *in_closed_after_out)
 {
 	int err;
 
@@ -664,13 +669,14 @@ static int copyfd_io_mmap(int infd, int peerfd, int outfd,
 		shutdown(peerfd, SHUT_WR);
 
 		err = do_recvfile(peerfd, outfd);
+		*in_closed_after_out = true;
 	}
 
 	return err;
 }
 
 static int copyfd_io_sendfile(int infd, int peerfd, int outfd,
-			      unsigned int size)
+			      unsigned int size, bool *in_closed_after_out)
 {
 	int err;
 
@@ -685,6 +691,7 @@ static int copyfd_io_sendfile(int infd, int peerfd, int outfd,
 		if (err)
 			return err;
 		err = do_recvfile(peerfd, outfd);
+		*in_closed_after_out = true;
 	}
 
 	return err;
@@ -692,27 +699,62 @@ static int copyfd_io_sendfile(int infd, int peerfd, int outfd,
 
 static int copyfd_io(int infd, int peerfd, int outfd)
 {
+	bool in_closed_after_out = false;
+	struct timespec start, end;
 	int file_size;
+	int ret;
+
+	if (cfg_time && (clock_gettime(CLOCK_MONOTONIC, &start) < 0))
+		xerror("can not fetch start time %d", errno);
 
 	switch (cfg_mode) {
 	case CFG_MODE_POLL:
-		return copyfd_io_poll(infd, peerfd, outfd);
+		ret = copyfd_io_poll(infd, peerfd, outfd, &in_closed_after_out);
+		break;
+
 	case CFG_MODE_MMAP:
 		file_size = get_infd_size(infd);
 		if (file_size < 0)
 			return file_size;
-		return copyfd_io_mmap(infd, peerfd, outfd, file_size);
+		ret = copyfd_io_mmap(infd, peerfd, outfd, file_size, &in_closed_after_out);
+		break;
+
 	case CFG_MODE_SENDFILE:
 		file_size = get_infd_size(infd);
 		if (file_size < 0)
 			return file_size;
-		return copyfd_io_sendfile(infd, peerfd, outfd, file_size);
+		ret = copyfd_io_sendfile(infd, peerfd, outfd, file_size, &in_closed_after_out);
+		break;
+
+	default:
+		fprintf(stderr, "Invalid mode %d\n", cfg_mode);
+
+		die_usage();
+		return 1;
 	}
 
-	fprintf(stderr, "Invalid mode %d\n", cfg_mode);
+	if (ret)
+		return ret;
 
-	die_usage();
-	return 1;
+	if (cfg_time) {
+		unsigned int delta_ms;
+
+		if (clock_gettime(CLOCK_MONOTONIC, &end) < 0)
+			xerror("can not fetch end time %d", errno);
+		delta_ms = (end.tv_sec - start.tv_sec) * 1000 + (end.tv_nsec - start.tv_nsec) / 1000000;
+		if (delta_ms > cfg_time) {
+			xerror("transfer slower than expected! runtime %d ms, expected %d ms",
+			       delta_ms, cfg_time);
+		}
+
+		/* show the runtime only if this end shutdown(wr) before receiving the EOF,
+		 * (that is, if this end got the longer runtime)
+		 */
+		if (in_closed_after_out)
+			fprintf(stderr, "%d", delta_ms);
+	}
+
+	return 0;
 }
 
 static void check_sockaddr(int pf, struct sockaddr_storage *ss,
@@ -1005,12 +1047,11 @@ static void parse_opts(int argc, char **argv)
 {
 	int c;
 
-	while ((c = getopt(argc, argv, "6jr:lp:s:hut:m:S:R:w:M:P:c:")) != -1) {
+	while ((c = getopt(argc, argv, "6jr:lp:s:hut:T:m:S:R:w:M:P:c:")) != -1) {
 		switch (c) {
 		case 'j':
 			cfg_join = true;
 			cfg_mode = CFG_MODE_POLL;
-			cfg_wait = 400000;
 			break;
 		case 'r':
 			cfg_remove = true;
@@ -1043,6 +1084,9 @@ static void parse_opts(int argc, char **argv)
 			if (poll_timeout <= 0)
 				poll_timeout = -1;
 			break;
+		case 'T':
+			cfg_time = atoi(optarg);
+			break;
 		case 'm':
 			cfg_mode = parse_mode(optarg);
 			break;
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index 255793c5ac4f..7ef639a9d4a6 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -297,7 +297,7 @@ do_transfer()
 	if [ "$test_link_fail" -eq 2 ];then
 		timeout ${timeout_test} \
 			ip netns exec ${listener_ns} \
-				$mptcp_connect -t ${timeout_poll} -l -p $port -s ${cl_proto} \
+				$mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \
 					${local_addr} < "$sinfail" > "$sout" &
 	else
 		timeout ${timeout_test} \
@@ -945,12 +945,15 @@ subflows_tests()
 
 	# subflow limited by client
 	reset
+	ip netns exec $ns1 ./pm_nl_ctl limits 0 0
+	ip netns exec $ns2 ./pm_nl_ctl limits 0 0
 	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_join_nr "single subflow, limited by client" 0 0 0
 
 	# subflow limited by server
 	reset
+	ip netns exec $ns1 ./pm_nl_ctl limits 0 0
 	ip netns exec $ns2 ./pm_nl_ctl limits 0 1
 	ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
 	run_tests $ns1 $ns2 10.0.1.1
@@ -973,7 +976,7 @@ subflows_tests()
 	run_tests $ns1 $ns2 10.0.1.1
 	chk_join_nr "multiple subflows" 2 2 2
 
-	# multiple subflows limited by serverf
+	# multiple subflows limited by server
 	reset
 	ip netns exec $ns1 ./pm_nl_ctl limits 0 1
 	ip netns exec $ns2 ./pm_nl_ctl limits 0 2
diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c
new file mode 100644
index 000000000000..417b11cafafe
--- /dev/null
+++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c
@@ -0,0 +1,683 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <assert.h>
+#include <errno.h>
+#include <limits.h>
+#include <string.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <unistd.h>
+
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+#include <netdb.h>
+#include <netinet/in.h>
+
+#include <linux/tcp.h>
+
+static int pf = AF_INET;
+
+#ifndef IPPROTO_MPTCP
+#define IPPROTO_MPTCP 262
+#endif
+#ifndef SOL_MPTCP
+#define SOL_MPTCP 284
+#endif
+
+#ifndef MPTCP_INFO
+struct mptcp_info {
+	__u8	mptcpi_subflows;
+	__u8	mptcpi_add_addr_signal;
+	__u8	mptcpi_add_addr_accepted;
+	__u8	mptcpi_subflows_max;
+	__u8	mptcpi_add_addr_signal_max;
+	__u8	mptcpi_add_addr_accepted_max;
+	__u32	mptcpi_flags;
+	__u32	mptcpi_token;
+	__u64	mptcpi_write_seq;
+	__u64	mptcpi_snd_una;
+	__u64	mptcpi_rcv_nxt;
+	__u8	mptcpi_local_addr_used;
+	__u8	mptcpi_local_addr_max;
+	__u8	mptcpi_csum_enabled;
+};
+
+struct mptcp_subflow_data {
+	__u32		size_subflow_data;		/* size of this structure in userspace */
+	__u32		num_subflows;			/* must be 0, set by kernel */
+	__u32		size_kernel;			/* must be 0, set by kernel */
+	__u32		size_user;			/* size of one element in data[] */
+} __attribute__((aligned(8)));
+
+struct mptcp_subflow_addrs {
+	union {
+		__kernel_sa_family_t sa_family;
+		struct sockaddr sa_local;
+		struct sockaddr_in sin_local;
+		struct sockaddr_in6 sin6_local;
+		struct __kernel_sockaddr_storage ss_local;
+	};
+	union {
+		struct sockaddr sa_remote;
+		struct sockaddr_in sin_remote;
+		struct sockaddr_in6 sin6_remote;
+		struct __kernel_sockaddr_storage ss_remote;
+	};
+};
+
+#define MPTCP_INFO		1
+#define MPTCP_TCPINFO		2
+#define MPTCP_SUBFLOW_ADDRS	3
+#endif
+
+struct so_state {
+	struct mptcp_info mi;
+	uint64_t mptcpi_rcv_delta;
+	uint64_t tcpi_rcv_delta;
+};
+
+static void die_perror(const char *msg)
+{
+	perror(msg);
+	exit(1);
+}
+
+static void die_usage(int r)
+{
+	fprintf(stderr, "Usage: mptcp_sockopt [-6]\n");
+	exit(r);
+}
+
+static void xerror(const char *fmt, ...)
+{
+	va_list ap;
+
+	va_start(ap, fmt);
+	vfprintf(stderr, fmt, ap);
+	va_end(ap);
+	fputc('\n', stderr);
+	exit(1);
+}
+
+static const char *getxinfo_strerr(int err)
+{
+	if (err == EAI_SYSTEM)
+		return strerror(errno);
+
+	return gai_strerror(err);
+}
+
+static void xgetaddrinfo(const char *node, const char *service,
+			 const struct addrinfo *hints,
+			 struct addrinfo **res)
+{
+	int err = getaddrinfo(node, service, hints, res);
+
+	if (err) {
+		const char *errstr = getxinfo_strerr(err);
+
+		fprintf(stderr, "Fatal: getaddrinfo(%s:%s): %s\n",
+			node ? node : "", service ? service : "", errstr);
+		exit(1);
+	}
+}
+
+static int sock_listen_mptcp(const char * const listenaddr,
+			     const char * const port)
+{
+	int sock;
+	struct addrinfo hints = {
+		.ai_protocol = IPPROTO_TCP,
+		.ai_socktype = SOCK_STREAM,
+		.ai_flags = AI_PASSIVE | AI_NUMERICHOST
+	};
+
+	hints.ai_family = pf;
+
+	struct addrinfo *a, *addr;
+	int one = 1;
+
+	xgetaddrinfo(listenaddr, port, &hints, &addr);
+	hints.ai_family = pf;
+
+	for (a = addr; a; a = a->ai_next) {
+		sock = socket(a->ai_family, a->ai_socktype, IPPROTO_MPTCP);
+		if (sock < 0)
+			continue;
+
+		if (-1 == setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &one,
+				     sizeof(one)))
+			perror("setsockopt");
+
+		if (bind(sock, a->ai_addr, a->ai_addrlen) == 0)
+			break; /* success */
+
+		perror("bind");
+		close(sock);
+		sock = -1;
+	}
+
+	freeaddrinfo(addr);
+
+	if (sock < 0)
+		xerror("could not create listen socket");
+
+	if (listen(sock, 20))
+		die_perror("listen");
+
+	return sock;
+}
+
+static int sock_connect_mptcp(const char * const remoteaddr,
+			      const char * const port, int proto)
+{
+	struct addrinfo hints = {
+		.ai_protocol = IPPROTO_TCP,
+		.ai_socktype = SOCK_STREAM,
+	};
+	struct addrinfo *a, *addr;
+	int sock = -1;
+
+	hints.ai_family = pf;
+
+	xgetaddrinfo(remoteaddr, port, &hints, &addr);
+	for (a = addr; a; a = a->ai_next) {
+		sock = socket(a->ai_family, a->ai_socktype, proto);
+		if (sock < 0)
+			continue;
+
+		if (connect(sock, a->ai_addr, a->ai_addrlen) == 0)
+			break; /* success */
+
+		die_perror("connect");
+	}
+
+	if (sock < 0)
+		xerror("could not create connect socket");
+
+	freeaddrinfo(addr);
+	return sock;
+}
+
+static void parse_opts(int argc, char **argv)
+{
+	int c;
+
+	while ((c = getopt(argc, argv, "h6")) != -1) {
+		switch (c) {
+		case 'h':
+			die_usage(0);
+			break;
+		case '6':
+			pf = AF_INET6;
+			break;
+		default:
+			die_usage(1);
+			break;
+		}
+	}
+}
+
+static void do_getsockopt_bogus_sf_data(int fd, int optname)
+{
+	struct mptcp_subflow_data good_data;
+	struct bogus_data {
+		struct mptcp_subflow_data d;
+		char buf[2];
+	} bd;
+	socklen_t olen, _olen;
+	int ret;
+
+	memset(&bd, 0, sizeof(bd));
+	memset(&good_data, 0, sizeof(good_data));
+
+	olen = sizeof(good_data);
+	good_data.size_subflow_data = olen;
+
+	ret = getsockopt(fd, SOL_MPTCP, optname, &bd, &olen);
+	assert(ret < 0); /* 0 size_subflow_data */
+	assert(olen == sizeof(good_data));
+
+	bd.d = good_data;
+
+	ret = getsockopt(fd, SOL_MPTCP, optname, &bd, &olen);
+	assert(ret == 0);
+	assert(olen == sizeof(good_data));
+	assert(bd.d.num_subflows == 1);
+	assert(bd.d.size_kernel > 0);
+	assert(bd.d.size_user == 0);
+
+	bd.d = good_data;
+	_olen = rand() % olen;
+	olen = _olen;
+	ret = getsockopt(fd, SOL_MPTCP, optname, &bd, &olen);
+	assert(ret < 0);	/* bogus olen */
+	assert(olen == _olen);	/* must be unchanged */
+
+	bd.d = good_data;
+	olen = sizeof(good_data);
+	bd.d.size_kernel = 1;
+	ret = getsockopt(fd, SOL_MPTCP, optname, &bd, &olen);
+	assert(ret < 0); /* size_kernel not 0 */
+
+	bd.d = good_data;
+	olen = sizeof(good_data);
+	bd.d.num_subflows = 1;
+	ret = getsockopt(fd, SOL_MPTCP, optname, &bd, &olen);
+	assert(ret < 0); /* num_subflows not 0 */
+
+	/* forward compat check: larger struct mptcp_subflow_data on 'old' kernel */
+	bd.d = good_data;
+	olen = sizeof(bd);
+	bd.d.size_subflow_data = sizeof(bd);
+
+	ret = getsockopt(fd, SOL_MPTCP, optname, &bd, &olen);
+	assert(ret == 0);
+
+	/* olen must be truncated to real data size filled by kernel: */
+	assert(olen == sizeof(good_data));
+
+	assert(bd.d.size_subflow_data == sizeof(bd));
+
+	bd.d = good_data;
+	bd.d.size_subflow_data += 1;
+	bd.d.size_user = 1;
+	olen = bd.d.size_subflow_data + 1;
+	_olen = olen;
+
+	ret = getsockopt(fd, SOL_MPTCP, optname, &bd, &_olen);
+	assert(ret == 0);
+
+	/* no truncation, kernel should have filled 1 byte of optname payload in buf[1]: */
+	assert(olen == _olen);
+
+	assert(bd.d.size_subflow_data == sizeof(good_data) + 1);
+	assert(bd.buf[0] == 0);
+}
+
+static void do_getsockopt_mptcp_info(struct so_state *s, int fd, size_t w)
+{
+	struct mptcp_info i;
+	socklen_t olen;
+	int ret;
+
+	olen = sizeof(i);
+	ret = getsockopt(fd, SOL_MPTCP, MPTCP_INFO, &i, &olen);
+
+	if (ret < 0)
+		die_perror("getsockopt MPTCP_INFO");
+
+	assert(olen == sizeof(i));
+
+	if (s->mi.mptcpi_write_seq == 0)
+		s->mi = i;
+
+	assert(s->mi.mptcpi_write_seq + w == i.mptcpi_write_seq);
+
+	s->mptcpi_rcv_delta = i.mptcpi_rcv_nxt - s->mi.mptcpi_rcv_nxt;
+}
+
+static void do_getsockopt_tcp_info(struct so_state *s, int fd, size_t r, size_t w)
+{
+	struct my_tcp_info {
+		struct mptcp_subflow_data d;
+		struct tcp_info ti[2];
+	} ti;
+	int ret, tries = 5;
+	socklen_t olen;
+
+	do {
+		memset(&ti, 0, sizeof(ti));
+
+		ti.d.size_subflow_data = sizeof(struct mptcp_subflow_data);
+		ti.d.size_user = sizeof(struct tcp_info);
+		olen = sizeof(ti);
+
+		ret = getsockopt(fd, SOL_MPTCP, MPTCP_TCPINFO, &ti, &olen);
+		if (ret < 0)
+			xerror("getsockopt MPTCP_TCPINFO (tries %d, %m)");
+
+		assert(olen <= sizeof(ti));
+		assert(ti.d.size_user == ti.d.size_kernel);
+		assert(ti.d.size_user == sizeof(struct tcp_info));
+		assert(ti.d.num_subflows == 1);
+
+		assert(olen > (socklen_t)sizeof(struct mptcp_subflow_data));
+		olen -= sizeof(struct mptcp_subflow_data);
+		assert(olen == sizeof(struct tcp_info));
+
+		if (ti.ti[0].tcpi_bytes_sent == w &&
+		    ti.ti[0].tcpi_bytes_received == r)
+			goto done;
+
+		if (r == 0 && ti.ti[0].tcpi_bytes_sent == w &&
+		    ti.ti[0].tcpi_bytes_received) {
+			s->tcpi_rcv_delta = ti.ti[0].tcpi_bytes_received;
+			goto done;
+		}
+
+		/* wait and repeat, might be that tx is still ongoing */
+		sleep(1);
+	} while (tries-- > 0);
+
+	xerror("tcpi_bytes_sent %" PRIu64 ", want %zu. tcpi_bytes_received %" PRIu64 ", want %zu",
+		ti.ti[0].tcpi_bytes_sent, w, ti.ti[0].tcpi_bytes_received, r);
+
+done:
+	do_getsockopt_bogus_sf_data(fd, MPTCP_TCPINFO);
+}
+
+static void do_getsockopt_subflow_addrs(int fd)
+{
+	struct sockaddr_storage remote, local;
+	socklen_t olen, rlen, llen;
+	int ret;
+	struct my_addrs {
+		struct mptcp_subflow_data d;
+		struct mptcp_subflow_addrs addr[2];
+	} addrs;
+
+	memset(&addrs, 0, sizeof(addrs));
+	memset(&local, 0, sizeof(local));
+	memset(&remote, 0, sizeof(remote));
+
+	addrs.d.size_subflow_data = sizeof(struct mptcp_subflow_data);
+	addrs.d.size_user = sizeof(struct mptcp_subflow_addrs);
+	olen = sizeof(addrs);
+
+	ret = getsockopt(fd, SOL_MPTCP, MPTCP_SUBFLOW_ADDRS, &addrs, &olen);
+	if (ret < 0)
+		die_perror("getsockopt MPTCP_SUBFLOW_ADDRS");
+
+	assert(olen <= sizeof(addrs));
+	assert(addrs.d.size_user == addrs.d.size_kernel);
+	assert(addrs.d.size_user == sizeof(struct mptcp_subflow_addrs));
+	assert(addrs.d.num_subflows == 1);
+
+	assert(olen > (socklen_t)sizeof(struct mptcp_subflow_data));
+	olen -= sizeof(struct mptcp_subflow_data);
+	assert(olen == sizeof(struct mptcp_subflow_addrs));
+
+	llen = sizeof(local);
+	ret = getsockname(fd, (struct sockaddr *)&local, &llen);
+	if (ret < 0)
+		die_perror("getsockname");
+	rlen = sizeof(remote);
+	ret = getpeername(fd, (struct sockaddr *)&remote, &rlen);
+	if (ret < 0)
+		die_perror("getpeername");
+
+	assert(rlen > 0);
+	assert(rlen == llen);
+
+	assert(remote.ss_family == local.ss_family);
+
+	assert(memcmp(&local, &addrs.addr[0].ss_local, sizeof(local)) == 0);
+	assert(memcmp(&remote, &addrs.addr[0].ss_remote, sizeof(remote)) == 0);
+
+	memset(&addrs, 0, sizeof(addrs));
+
+	addrs.d.size_subflow_data = sizeof(struct mptcp_subflow_data);
+	addrs.d.size_user = sizeof(sa_family_t);
+	olen = sizeof(addrs.d) + sizeof(sa_family_t);
+
+	ret = getsockopt(fd, SOL_MPTCP, MPTCP_SUBFLOW_ADDRS, &addrs, &olen);
+	assert(ret == 0);
+	assert(olen == sizeof(addrs.d) + sizeof(sa_family_t));
+
+	assert(addrs.addr[0].sa_family == pf);
+	assert(addrs.addr[0].sa_family == local.ss_family);
+
+	assert(memcmp(&local, &addrs.addr[0].ss_local, sizeof(local)) != 0);
+	assert(memcmp(&remote, &addrs.addr[0].ss_remote, sizeof(remote)) != 0);
+
+	do_getsockopt_bogus_sf_data(fd, MPTCP_SUBFLOW_ADDRS);
+}
+
+static void do_getsockopts(struct so_state *s, int fd, size_t r, size_t w)
+{
+	do_getsockopt_mptcp_info(s, fd, w);
+
+	do_getsockopt_tcp_info(s, fd, r, w);
+
+	do_getsockopt_subflow_addrs(fd);
+}
+
+static void connect_one_server(int fd, int pipefd)
+{
+	char buf[4096], buf2[4096];
+	size_t len, i, total;
+	struct so_state s;
+	bool eof = false;
+	ssize_t ret;
+
+	memset(&s, 0, sizeof(s));
+
+	len = rand() % (sizeof(buf) - 1);
+
+	if (len < 128)
+		len = 128;
+
+	for (i = 0; i < len ; i++) {
+		buf[i] = rand() % 26;
+		buf[i] += 'A';
+	}
+
+	buf[i] = '\n';
+
+	do_getsockopts(&s, fd, 0, 0);
+
+	/* un-block server */
+	ret = read(pipefd, buf2, 4);
+	assert(ret == 4);
+	close(pipefd);
+
+	assert(strncmp(buf2, "xmit", 4) == 0);
+
+	ret = write(fd, buf, len);
+	if (ret < 0)
+		die_perror("write");
+
+	if (ret != (ssize_t)len)
+		xerror("short write");
+
+	total = 0;
+	do {
+		ret = read(fd, buf2 + total, sizeof(buf2) - total);
+		if (ret < 0)
+			die_perror("read");
+		if (ret == 0) {
+			eof = true;
+			break;
+		}
+
+		total += ret;
+	} while (total < len);
+
+	if (total != len)
+		xerror("total %lu, len %lu eof %d\n", total, len, eof);
+
+	if (memcmp(buf, buf2, len))
+		xerror("data corruption");
+
+	if (s.tcpi_rcv_delta)
+		assert(s.tcpi_rcv_delta <= total);
+
+	do_getsockopts(&s, fd, ret, ret);
+
+	if (eof)
+		total += 1; /* sequence advances due to FIN */
+
+	assert(s.mptcpi_rcv_delta == (uint64_t)total);
+	close(fd);
+}
+
+static void process_one_client(int fd, int pipefd)
+{
+	ssize_t ret, ret2, ret3;
+	struct so_state s;
+	char buf[4096];
+
+	memset(&s, 0, sizeof(s));
+	do_getsockopts(&s, fd, 0, 0);
+
+	ret = write(pipefd, "xmit", 4);
+	assert(ret == 4);
+
+	ret = read(fd, buf, sizeof(buf));
+	if (ret < 0)
+		die_perror("read");
+
+	assert(s.mptcpi_rcv_delta <= (uint64_t)ret);
+
+	if (s.tcpi_rcv_delta)
+		assert(s.tcpi_rcv_delta == (uint64_t)ret);
+
+	ret2 = write(fd, buf, ret);
+	if (ret2 < 0)
+		die_perror("write");
+
+	/* wait for hangup */
+	ret3 = read(fd, buf, 1);
+	if (ret3 != 0)
+		xerror("expected EOF, got %lu", ret3);
+
+	do_getsockopts(&s, fd, ret, ret2);
+	if (s.mptcpi_rcv_delta != (uint64_t)ret + 1)
+		xerror("mptcpi_rcv_delta %" PRIu64 ", expect %" PRIu64, s.mptcpi_rcv_delta, ret + 1, s.mptcpi_rcv_delta - ret);
+	close(fd);
+}
+
+static int xaccept(int s)
+{
+	int fd = accept(s, NULL, 0);
+
+	if (fd < 0)
+		die_perror("accept");
+
+	return fd;
+}
+
+static int server(int pipefd)
+{
+	int fd = -1, r;
+
+	switch (pf) {
+	case AF_INET:
+		fd = sock_listen_mptcp("127.0.0.1", "15432");
+		break;
+	case AF_INET6:
+		fd = sock_listen_mptcp("::1", "15432");
+		break;
+	default:
+		xerror("Unknown pf %d\n", pf);
+		break;
+	}
+
+	r = write(pipefd, "conn", 4);
+	assert(r == 4);
+
+	alarm(15);
+	r = xaccept(fd);
+
+	process_one_client(r, pipefd);
+
+	return 0;
+}
+
+static int client(int pipefd)
+{
+	int fd = -1;
+
+	alarm(15);
+
+	switch (pf) {
+	case AF_INET:
+		fd = sock_connect_mptcp("127.0.0.1", "15432", IPPROTO_MPTCP);
+		break;
+	case AF_INET6:
+		fd = sock_connect_mptcp("::1", "15432", IPPROTO_MPTCP);
+		break;
+	default:
+		xerror("Unknown pf %d\n", pf);
+	}
+
+	connect_one_server(fd, pipefd);
+
+	return 0;
+}
+
+static pid_t xfork(void)
+{
+	pid_t p = fork();
+
+	if (p < 0)
+		die_perror("fork");
+
+	return p;
+}
+
+static int rcheck(int wstatus, const char *what)
+{
+	if (WIFEXITED(wstatus)) {
+		if (WEXITSTATUS(wstatus) == 0)
+			return 0;
+		fprintf(stderr, "%s exited, status=%d\n", what, WEXITSTATUS(wstatus));
+		return WEXITSTATUS(wstatus);
+	} else if (WIFSIGNALED(wstatus)) {
+		xerror("%s killed by signal %d\n", what, WTERMSIG(wstatus));
+	} else if (WIFSTOPPED(wstatus)) {
+		xerror("%s stopped by signal %d\n", what, WSTOPSIG(wstatus));
+	}
+
+	return 111;
+}
+
+int main(int argc, char *argv[])
+{
+	int e1, e2, wstatus;
+	pid_t s, c, ret;
+	int pipefds[2];
+
+	parse_opts(argc, argv);
+
+	e1 = pipe(pipefds);
+	if (e1 < 0)
+		die_perror("pipe");
+
+	s = xfork();
+	if (s == 0)
+		return server(pipefds[1]);
+
+	close(pipefds[1]);
+
+	/* wait until server bound a socket */
+	e1 = read(pipefds[0], &e1, 4);
+	assert(e1 == 4);
+
+	c = xfork();
+	if (c == 0)
+		return client(pipefds[0]);
+
+	close(pipefds[0]);
+
+	ret = waitpid(s, &wstatus, 0);
+	if (ret == -1)
+		die_perror("waitpid");
+	e1 = rcheck(wstatus, "server");
+	ret = waitpid(c, &wstatus, 0);
+	if (ret == -1)
+		die_perror("waitpid");
+	e2 = rcheck(wstatus, "client");
+
+	return e1 ? e1 : e2;
+}
diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
index 1579e471a5e7..41de643788b8 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
@@ -239,12 +239,35 @@ make_file()
 	echo "Created $name (size $size KB) containing data sent by $who"
 }
 
+do_mptcp_sockopt_tests()
+{
+	local lret=0
+
+	./mptcp_sockopt
+	lret=$?
+
+	if [ $lret -ne 0 ]; then
+		echo "FAIL: SOL_MPTCP getsockopt" 1>&2
+		ret=$lret
+		return
+	fi
+
+	./mptcp_sockopt -6
+	lret=$?
+
+	if [ $lret -ne 0 ]; then
+		echo "FAIL: SOL_MPTCP getsockopt (ipv6)" 1>&2
+		ret=$lret
+		return
+	fi
+}
+
 run_tests()
 {
 	listener_ns="$1"
 	connector_ns="$2"
 	connect_addr="$3"
-	lret=0
+	local lret=0
 
 	do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr}
 
@@ -268,9 +291,13 @@ trap cleanup EXIT
 run_tests $ns1 $ns2 10.0.1.1
 run_tests $ns1 $ns2 dead:beef:1::1
 
-
 if [ $ret -eq 0 ];then
 	echo "PASS: all packets had packet mark set"
 fi
 
+do_mptcp_sockopt_tests
+if [ $ret -eq 0 ];then
+	echo "PASS: SOL_MPTCP getsockopt has expected information"
+fi
+
 exit $ret
diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh
index 3c741abe034e..cbacf9f6538b 100755
--- a/tools/testing/selftests/net/mptcp/pm_netlink.sh
+++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh
@@ -70,7 +70,7 @@ check()
 
 check "ip netns exec $ns1 ./pm_nl_ctl dump" "" "defaults addr list"
 check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 0
-subflows 0" "defaults limits"
+subflows 2" "defaults limits"
 
 ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.1
 ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.2 flags subflow dev lo
@@ -118,11 +118,11 @@ check "ip netns exec $ns1 ./pm_nl_ctl dump" "" "flush addrs"
 
 ip netns exec $ns1 ./pm_nl_ctl limits 9 1
 check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 0
-subflows 0" "rcv addrs above hard limit"
+subflows 2" "rcv addrs above hard limit"
 
 ip netns exec $ns1 ./pm_nl_ctl limits 1 9
 check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 0
-subflows 0" "subflows above hard limit"
+subflows 2" "subflows above hard limit"
 
 ip netns exec $ns1 ./pm_nl_ctl limits 8 8
 check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 8
diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh
index 910d8126af8f..f441ff7904fc 100755
--- a/tools/testing/selftests/net/mptcp/simult_flows.sh
+++ b/tools/testing/selftests/net/mptcp/simult_flows.sh
@@ -51,7 +51,7 @@ setup()
 	sout=$(mktemp)
 	cout=$(mktemp)
 	capout=$(mktemp)
-	size=$((2048 * 4096))
+	size=$((2 * 2048 * 4096))
 	dd if=/dev/zero of=$small bs=4096 count=20 >/dev/null 2>&1
 	dd if=/dev/zero of=$large bs=4096 count=$((size / 4096)) >/dev/null 2>&1
 
@@ -161,17 +161,15 @@ do_transfer()
 
 	timeout ${timeout_test} \
 		ip netns exec ${ns3} \
-			./mptcp_connect -jt ${timeout_poll} -l -p $port \
+			./mptcp_connect -jt ${timeout_poll} -l -p $port -T $time \
 				0.0.0.0 < "$sin" > "$sout" &
 	local spid=$!
 
 	wait_local_port_listen "${ns3}" "${port}"
 
-	local start
-	start=$(date +%s%3N)
 	timeout ${timeout_test} \
 		ip netns exec ${ns1} \
-			./mptcp_connect -jt ${timeout_poll} -p $port \
+			./mptcp_connect -jt ${timeout_poll} -p $port -T $time \
 				10.0.3.3 < "$cin" > "$cout" &
 	local cpid=$!
 
@@ -180,27 +178,20 @@ do_transfer()
 	wait $spid
 	local rets=$?
 
-	local stop
-	stop=$(date +%s%3N)
-
 	if $capture; then
 		sleep 1
 		kill ${cappid_listener}
 		kill ${cappid_connector}
 	fi
 
-	local duration
-	duration=$((stop-start))
-
 	cmp $sin $cout > /dev/null 2>&1
 	local cmps=$?
 	cmp $cin $sout > /dev/null 2>&1
 	local cmpc=$?
 
-	printf "%16s" "$duration max $max_time "
+	printf "%-16s" " max $max_time "
 	if [ $retc -eq 0 ] && [ $rets -eq 0 ] && \
-	   [ $cmpc -eq 0 ] && [ $cmps -eq 0 ] && \
-	   [ $duration -lt $max_time ]; then
+	   [ $cmpc -eq 0 ] && [ $cmps -eq 0 ]; then
 		echo "[ OK ]"
 		cat "$capout"
 		return 0
@@ -244,23 +235,24 @@ run_test()
 	tc -n $ns2 qdisc add dev ns2eth1 root netem rate ${rate1}mbit $delay1
 	tc -n $ns2 qdisc add dev ns2eth2 root netem rate ${rate2}mbit $delay2
 
-	# time is measure in ms
-	local time=$((size * 8 * 1000 / (( $rate1 + $rate2) * 1024 *1024) ))
+	# time is measured in ms, account for transfer size, affegated link speed
+	# and header overhead (10%)
+	local time=$((size * 8 * 1000 * 10 / (( $rate1 + $rate2) * 1024 *1024 * 9) ))
 
 	# mptcp_connect will do some sleeps to allow the mp_join handshake
-	# completion
-	time=$((time + 1350))
+	# completion (see mptcp_connect): 200ms on each side, add some slack
+	time=$((time + 450))
 
-	printf "%-50s" "$msg"
-	do_transfer $small $large $((time * 11 / 10))
+	printf "%-60s" "$msg"
+	do_transfer $small $large $time
 	lret=$?
 	if [ $lret -ne 0 ]; then
 		ret=$lret
 		[ $bail -eq 0 ] || exit $ret
 	fi
 
-	printf "%-50s" "$msg - reverse direction"
-	do_transfer $large $small $((time * 11 / 10))
+	printf "%-60s" "$msg - reverse direction"
+	do_transfer $large $small $time
 	lret=$?
 	if [ $lret -ne 0 ]; then
 		ret=$lret
diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c
index 97fceb9be9ed..d3047e251fe9 100644
--- a/tools/testing/selftests/net/tls.c
+++ b/tools/testing/selftests/net/tls.c
@@ -29,6 +29,8 @@ struct tls_crypto_info_keys {
 	union {
 		struct tls12_crypto_info_aes_gcm_128 aes128;
 		struct tls12_crypto_info_chacha20_poly1305 chacha20;
+		struct tls12_crypto_info_sm4_gcm sm4gcm;
+		struct tls12_crypto_info_sm4_ccm sm4ccm;
 	};
 	size_t len;
 };
@@ -49,6 +51,16 @@ static void tls_crypto_info_init(uint16_t tls_version, uint16_t cipher_type,
 		tls12->aes128.info.version = tls_version;
 		tls12->aes128.info.cipher_type = cipher_type;
 		break;
+	case TLS_CIPHER_SM4_GCM:
+		tls12->len = sizeof(struct tls12_crypto_info_sm4_gcm);
+		tls12->sm4gcm.info.version = tls_version;
+		tls12->sm4gcm.info.cipher_type = cipher_type;
+		break;
+	case TLS_CIPHER_SM4_CCM:
+		tls12->len = sizeof(struct tls12_crypto_info_sm4_ccm);
+		tls12->sm4ccm.info.version = tls_version;
+		tls12->sm4ccm.info.cipher_type = cipher_type;
+		break;
 	default:
 		break;
 	}
@@ -148,13 +160,13 @@ FIXTURE_VARIANT(tls)
 	uint16_t cipher_type;
 };
 
-FIXTURE_VARIANT_ADD(tls, 12_gcm)
+FIXTURE_VARIANT_ADD(tls, 12_aes_gcm)
 {
 	.tls_version = TLS_1_2_VERSION,
 	.cipher_type = TLS_CIPHER_AES_GCM_128,
 };
 
-FIXTURE_VARIANT_ADD(tls, 13_gcm)
+FIXTURE_VARIANT_ADD(tls, 13_aes_gcm)
 {
 	.tls_version = TLS_1_3_VERSION,
 	.cipher_type = TLS_CIPHER_AES_GCM_128,
@@ -172,6 +184,18 @@ FIXTURE_VARIANT_ADD(tls, 13_chacha)
 	.cipher_type = TLS_CIPHER_CHACHA20_POLY1305,
 };
 
+FIXTURE_VARIANT_ADD(tls, 13_sm4_gcm)
+{
+	.tls_version = TLS_1_3_VERSION,
+	.cipher_type = TLS_CIPHER_SM4_GCM,
+};
+
+FIXTURE_VARIANT_ADD(tls, 13_sm4_ccm)
+{
+	.tls_version = TLS_1_3_VERSION,
+	.cipher_type = TLS_CIPHER_SM4_CCM,
+};
+
 FIXTURE_SETUP(tls)
 {
 	struct tls_crypto_info_keys tls12;
diff --git a/tools/testing/selftests/powerpc/security/mitigation-patching.sh b/tools/testing/selftests/powerpc/security/mitigation-patching.sh
index 00197acb7ff1..b0b20e0b4e30 100755
--- a/tools/testing/selftests/powerpc/security/mitigation-patching.sh
+++ b/tools/testing/selftests/powerpc/security/mitigation-patching.sh
@@ -13,7 +13,7 @@ function do_one
 
     orig=$(cat "$mitigation")
 
-    start=$EPOCHSECONDS
+    start=$(date +%s)
     now=$start
 
     while [[ $((now-start)) -lt "$TIMEOUT" ]]
@@ -21,7 +21,7 @@ function do_one
         echo 0 > "$mitigation"
         echo 1 > "$mitigation"
 
-        now=$EPOCHSECONDS
+        now=$(date +%s)
     done
 
     echo "$orig" > "$mitigation"
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh
index 03126eb6ec5a..c7d42ef80c53 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh
@@ -149,6 +149,7 @@ do
 done
 
 # Download and expand the tarball on all systems.
+echo Build-products tarball: `du -h $T/binres.tgz` | tee -a "$oldrun/remote-log"
 for i in $systems
 do
 	echo Downloading tarball to $i `date` | tee -a "$oldrun/remote-log"
diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh
index f442d84fb2a3..6cf9ec6a3d1c 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh
@@ -184,7 +184,7 @@ do
 		TORTURE_KCONFIG_KASAN_ARG="CONFIG_DEBUG_INFO=y CONFIG_KASAN=y"; export TORTURE_KCONFIG_KASAN_ARG
 		;;
 	--kcsan)
-		TORTURE_KCONFIG_KCSAN_ARG="CONFIG_DEBUG_INFO=y CONFIG_KCSAN=y CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC=n CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY=n CONFIG_KCSAN_REPORT_ONCE_IN_MS=100000 CONFIG_KCSAN_INTERRUPT_WATCHER=y CONFIG_KCSAN_VERBOSE=y CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=y"; export TORTURE_KCONFIG_KCSAN_ARG
+		TORTURE_KCONFIG_KCSAN_ARG="CONFIG_DEBUG_INFO=y CONFIG_KCSAN=y CONFIG_KCSAN_STRICT=y CONFIG_KCSAN_REPORT_ONCE_IN_MS=100000 CONFIG_KCSAN_VERBOSE=y CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=y"; export TORTURE_KCONFIG_KCSAN_ARG
 		;;
 	--kmake-arg|--kmake-args)
 		checkarg --kmake-arg "(kernel make arguments)" $# "$2" '.*' '^error$'
diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh
index 363f56081eff..eae88aacca2a 100755
--- a/tools/testing/selftests/rcutorture/bin/torture.sh
+++ b/tools/testing/selftests/rcutorture/bin/torture.sh
@@ -351,7 +351,7 @@ fi
 if test "$do_scftorture" = "yes"
 then
 	torture_bootargs="scftorture.nthreads=$HALF_ALLOTED_CPUS torture.disable_onoff_at_boot"
-	torture_set "scftorture" tools/testing/selftests/rcutorture/bin/kvm.sh --torture scf --allcpus --duration "$duration_scftorture" --configs "$configs_scftorture" --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --trust-make
+	torture_set "scftorture" tools/testing/selftests/rcutorture/bin/kvm.sh --torture scf --allcpus --duration "$duration_scftorture" --configs "$configs_scftorture" --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --memory 1G --trust-make
 fi
 
 if test "$do_refscale" = yes
@@ -434,7 +434,12 @@ then
 	batchno=1
 	if test -s $T/xz-todo
 	then
-		echo Size before compressing: `du -sh $tdir | awk '{ print $1 }'` `date` 2>&1 | tee -a "$tdir/log-xz" | tee -a $T/log
+		for i in `cat $T/xz-todo`
+		do
+			find $i -name 'vmlinux*' -print
+		done | wc -l | awk '{ print $1 }' > $T/xz-todo-count
+		n2compress="`cat $T/xz-todo-count`"
+		echo Size before compressing $n2compress files: `du -sh $tdir | awk '{ print $1 }'` `date` 2>&1 | tee -a "$tdir/log-xz" | tee -a $T/log
 		for i in `cat $T/xz-todo`
 		do
 			echo Compressing vmlinux files in ${i}: `date` >> "$tdir/log-xz" 2>&1
@@ -456,7 +461,7 @@ then
 			echo Waiting for final batch $batchno of $ncompresses compressions `date` | tee -a "$tdir/log-xz" | tee -a $T/log
 		fi
 		wait
-		echo Size after compressing: `du -sh $tdir | awk '{ print $1 }'` `date` 2>&1 | tee -a "$tdir/log-xz" | tee -a $T/log
+		echo Size after compressing $n2compress files: `du -sh $tdir | awk '{ print $1 }'` `date` 2>&1 | tee -a "$tdir/log-xz" | tee -a $T/log
 		echo Total duration `get_starttime_duration $starttime`. | tee -a $T/log
 	else
 		echo No compression needed: `date` >> "$tdir/log-xz" 2>&1
diff --git a/tools/testing/selftests/sched/cs_prctl_test.c b/tools/testing/selftests/sched/cs_prctl_test.c
index 7db9cf822dc7..8109b17dc764 100644
--- a/tools/testing/selftests/sched/cs_prctl_test.c
+++ b/tools/testing/selftests/sched/cs_prctl_test.c
@@ -62,6 +62,17 @@ enum pid_type {PIDTYPE_PID = 0, PIDTYPE_TGID, PIDTYPE_PGID};
 
 const int THREAD_CLONE_FLAGS = CLONE_THREAD | CLONE_SIGHAND | CLONE_FS | CLONE_VM | CLONE_FILES;
 
+struct child_args {
+	int num_threads;
+	int pfd[2];
+	int cpid;
+	int thr_tids[MAX_THREADS];
+};
+
+static struct child_args procs[MAX_PROCESSES];
+static int num_processes = 2;
+static int need_cleanup = 0;
+
 static int _prctl(int option, unsigned long arg2, unsigned long arg3, unsigned long arg4,
 		  unsigned long arg5)
 {
@@ -78,8 +89,14 @@ static int _prctl(int option, unsigned long arg2, unsigned long arg3, unsigned l
 #define handle_error(msg) __handle_error(__FILE__, __LINE__, msg)
 static void __handle_error(char *fn, int ln, char *msg)
 {
+	int pidx;
 	printf("(%s:%d) - ", fn, ln);
 	perror(msg);
+	if (need_cleanup) {
+		for (pidx = 0; pidx < num_processes; ++pidx)
+			kill(procs[pidx].cpid, 15);
+		need_cleanup = 0;
+	}
 	exit(EXIT_FAILURE);
 }
 
@@ -106,13 +123,6 @@ static unsigned long get_cs_cookie(int pid)
 	return cookie;
 }
 
-struct child_args {
-	int num_threads;
-	int pfd[2];
-	int cpid;
-	int thr_tids[MAX_THREADS];
-};
-
 static int child_func_thread(void __attribute__((unused))*arg)
 {
 	while (1)
@@ -212,10 +222,7 @@ void _validate(int line, int val, char *msg)
 
 int main(int argc, char *argv[])
 {
-	struct child_args procs[MAX_PROCESSES];
-
 	int keypress = 0;
-	int num_processes = 2;
 	int num_threads = 3;
 	int delay = 0;
 	int res = 0;
@@ -262,6 +269,7 @@ int main(int argc, char *argv[])
 
 	printf("\n## Create a thread/process/process group hiearchy\n");
 	create_processes(num_processes, num_threads, procs);
+	need_cleanup = 1;
 	disp_processes(num_processes, procs);
 	validate(get_cs_cookie(0) == 0);
 
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 1d64891e6492..d425688cf59c 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -276,12 +276,12 @@ int seccomp(unsigned int op, unsigned int flags, void *args)
 }
 #endif
 
-#if __BYTE_ORDER == __LITTLE_ENDIAN
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
 #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]))
-#elif __BYTE_ORDER == __BIG_ENDIAN
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
 #define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]) + sizeof(__u32))
 #else
-#error "wut? Unknown __BYTE_ORDER?!"
+#error "wut? Unknown __BYTE_ORDER__?!"
 #endif
 
 #define SIBLING_EXIT_UNKILLED	0xbadbeef
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index b4142cd1c5c2..8a1f62ab3c8e 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -18,7 +18,7 @@ TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \
 			test_FCMOV test_FCOMI test_FISTTP \
 			vdso_restorer
 TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip syscall_numbering \
-			corrupt_xstate_header
+			corrupt_xstate_header amx
 # Some selftests require 32bit support enabled also on 64bit systems
 TARGETS_C_32BIT_NEEDED := ldt_gdt ptrace_syscall
 
diff --git a/tools/testing/selftests/x86/amx.c b/tools/testing/selftests/x86/amx.c
new file mode 100644
index 000000000000..3615ef4a48bb
--- /dev/null
+++ b/tools/testing/selftests/x86/amx.c
@@ -0,0 +1,851 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <err.h>
+#include <errno.h>
+#include <pthread.h>
+#include <setjmp.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdbool.h>
+#include <unistd.h>
+#include <x86intrin.h>
+
+#include <sys/auxv.h>
+#include <sys/mman.h>
+#include <sys/shm.h>
+#include <sys/syscall.h>
+#include <sys/wait.h>
+
+#ifndef __x86_64__
+# error This test is 64-bit only
+#endif
+
+#define XSAVE_HDR_OFFSET	512
+#define XSAVE_HDR_SIZE		64
+
+struct xsave_buffer {
+	union {
+		struct {
+			char legacy[XSAVE_HDR_OFFSET];
+			char header[XSAVE_HDR_SIZE];
+			char extended[0];
+		};
+		char bytes[0];
+	};
+};
+
+static inline uint64_t xgetbv(uint32_t index)
+{
+	uint32_t eax, edx;
+
+	asm volatile("xgetbv;"
+		     : "=a" (eax), "=d" (edx)
+		     : "c" (index));
+	return eax + ((uint64_t)edx << 32);
+}
+
+static inline void cpuid(uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx)
+{
+	asm volatile("cpuid;"
+		     : "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx)
+		     : "0" (*eax), "2" (*ecx));
+}
+
+static inline void xsave(struct xsave_buffer *xbuf, uint64_t rfbm)
+{
+	uint32_t rfbm_lo = rfbm;
+	uint32_t rfbm_hi = rfbm >> 32;
+
+	asm volatile("xsave (%%rdi)"
+		     : : "D" (xbuf), "a" (rfbm_lo), "d" (rfbm_hi)
+		     : "memory");
+}
+
+static inline void xrstor(struct xsave_buffer *xbuf, uint64_t rfbm)
+{
+	uint32_t rfbm_lo = rfbm;
+	uint32_t rfbm_hi = rfbm >> 32;
+
+	asm volatile("xrstor (%%rdi)"
+		     : : "D" (xbuf), "a" (rfbm_lo), "d" (rfbm_hi));
+}
+
+/* err() exits and will not return */
+#define fatal_error(msg, ...)	err(1, "[FAIL]\t" msg, ##__VA_ARGS__)
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+		       int flags)
+{
+	struct sigaction sa;
+
+	memset(&sa, 0, sizeof(sa));
+	sa.sa_sigaction = handler;
+	sa.sa_flags = SA_SIGINFO | flags;
+	sigemptyset(&sa.sa_mask);
+	if (sigaction(sig, &sa, 0))
+		fatal_error("sigaction");
+}
+
+static void clearhandler(int sig)
+{
+	struct sigaction sa;
+
+	memset(&sa, 0, sizeof(sa));
+	sa.sa_handler = SIG_DFL;
+	sigemptyset(&sa.sa_mask);
+	if (sigaction(sig, &sa, 0))
+		fatal_error("sigaction");
+}
+
+#define XFEATURE_XTILECFG	17
+#define XFEATURE_XTILEDATA	18
+#define XFEATURE_MASK_XTILECFG	(1 << XFEATURE_XTILECFG)
+#define XFEATURE_MASK_XTILEDATA	(1 << XFEATURE_XTILEDATA)
+#define XFEATURE_MASK_XTILE	(XFEATURE_MASK_XTILECFG | XFEATURE_MASK_XTILEDATA)
+
+#define CPUID_LEAF1_ECX_XSAVE_MASK	(1 << 26)
+#define CPUID_LEAF1_ECX_OSXSAVE_MASK	(1 << 27)
+static inline void check_cpuid_xsave(void)
+{
+	uint32_t eax, ebx, ecx, edx;
+
+	/*
+	 * CPUID.1:ECX.XSAVE[bit 26] enumerates general
+	 * support for the XSAVE feature set, including
+	 * XGETBV.
+	 */
+	eax = 1;
+	ecx = 0;
+	cpuid(&eax, &ebx, &ecx, &edx);
+	if (!(ecx & CPUID_LEAF1_ECX_XSAVE_MASK))
+		fatal_error("cpuid: no CPU xsave support");
+	if (!(ecx & CPUID_LEAF1_ECX_OSXSAVE_MASK))
+		fatal_error("cpuid: no OS xsave support");
+}
+
+static uint32_t xbuf_size;
+
+static struct {
+	uint32_t xbuf_offset;
+	uint32_t size;
+} xtiledata;
+
+#define CPUID_LEAF_XSTATE		0xd
+#define CPUID_SUBLEAF_XSTATE_USER	0x0
+#define TILE_CPUID			0x1d
+#define TILE_PALETTE_ID			0x1
+
+static void check_cpuid_xtiledata(void)
+{
+	uint32_t eax, ebx, ecx, edx;
+
+	eax = CPUID_LEAF_XSTATE;
+	ecx = CPUID_SUBLEAF_XSTATE_USER;
+	cpuid(&eax, &ebx, &ecx, &edx);
+
+	/*
+	 * EBX enumerates the size (in bytes) required by the XSAVE
+	 * instruction for an XSAVE area containing all the user state
+	 * components corresponding to bits currently set in XCR0.
+	 *
+	 * Stash that off so it can be used to allocate buffers later.
+	 */
+	xbuf_size = ebx;
+
+	eax = CPUID_LEAF_XSTATE;
+	ecx = XFEATURE_XTILEDATA;
+
+	cpuid(&eax, &ebx, &ecx, &edx);
+	/*
+	 * eax: XTILEDATA state component size
+	 * ebx: XTILEDATA state component offset in user buffer
+	 */
+	if (!eax || !ebx)
+		fatal_error("xstate cpuid: invalid tile data size/offset: %d/%d",
+				eax, ebx);
+
+	xtiledata.size	      = eax;
+	xtiledata.xbuf_offset = ebx;
+}
+
+/* The helpers for managing XSAVE buffer and tile states: */
+
+struct xsave_buffer *alloc_xbuf(void)
+{
+	struct xsave_buffer *xbuf;
+
+	/* XSAVE buffer should be 64B-aligned. */
+	xbuf = aligned_alloc(64, xbuf_size);
+	if (!xbuf)
+		fatal_error("aligned_alloc()");
+	return xbuf;
+}
+
+static inline void clear_xstate_header(struct xsave_buffer *buffer)
+{
+	memset(&buffer->header, 0, sizeof(buffer->header));
+}
+
+static inline uint64_t get_xstatebv(struct xsave_buffer *buffer)
+{
+	/* XSTATE_BV is at the beginning of the header: */
+	return *(uint64_t *)&buffer->header;
+}
+
+static inline void set_xstatebv(struct xsave_buffer *buffer, uint64_t bv)
+{
+	/* XSTATE_BV is at the beginning of the header: */
+	*(uint64_t *)(&buffer->header) = bv;
+}
+
+static void set_rand_tiledata(struct xsave_buffer *xbuf)
+{
+	int *ptr = (int *)&xbuf->bytes[xtiledata.xbuf_offset];
+	int data;
+	int i;
+
+	/*
+	 * Ensure that 'data' is never 0.  This ensures that
+	 * the registers are never in their initial configuration
+	 * and thus never tracked as being in the init state.
+	 */
+	data = rand() | 1;
+
+	for (i = 0; i < xtiledata.size / sizeof(int); i++, ptr++)
+		*ptr = data;
+}
+
+struct xsave_buffer *stashed_xsave;
+
+static void init_stashed_xsave(void)
+{
+	stashed_xsave = alloc_xbuf();
+	if (!stashed_xsave)
+		fatal_error("failed to allocate stashed_xsave\n");
+	clear_xstate_header(stashed_xsave);
+}
+
+static void free_stashed_xsave(void)
+{
+	free(stashed_xsave);
+}
+
+/* See 'struct _fpx_sw_bytes' at sigcontext.h */
+#define SW_BYTES_OFFSET		464
+/* N.B. The struct's field name varies so read from the offset. */
+#define SW_BYTES_BV_OFFSET	(SW_BYTES_OFFSET + 8)
+
+static inline struct _fpx_sw_bytes *get_fpx_sw_bytes(void *buffer)
+{
+	return (struct _fpx_sw_bytes *)(buffer + SW_BYTES_OFFSET);
+}
+
+static inline uint64_t get_fpx_sw_bytes_features(void *buffer)
+{
+	return *(uint64_t *)(buffer + SW_BYTES_BV_OFFSET);
+}
+
+/* Work around printf() being unsafe in signals: */
+#define SIGNAL_BUF_LEN 1000
+char signal_message_buffer[SIGNAL_BUF_LEN];
+void sig_print(char *msg)
+{
+	int left = SIGNAL_BUF_LEN - strlen(signal_message_buffer) - 1;
+
+	strncat(signal_message_buffer, msg, left);
+}
+
+static volatile bool noperm_signaled;
+static int noperm_errs;
+/*
+ * Signal handler for when AMX is used but
+ * permission has not been obtained.
+ */
+static void handle_noperm(int sig, siginfo_t *si, void *ctx_void)
+{
+	ucontext_t *ctx = (ucontext_t *)ctx_void;
+	void *xbuf = ctx->uc_mcontext.fpregs;
+	struct _fpx_sw_bytes *sw_bytes;
+	uint64_t features;
+
+	/* Reset the signal message buffer: */
+	signal_message_buffer[0] = '\0';
+	sig_print("\tAt SIGILL handler,\n");
+
+	if (si->si_code != ILL_ILLOPC) {
+		noperm_errs++;
+		sig_print("[FAIL]\tInvalid signal code.\n");
+	} else {
+		sig_print("[OK]\tValid signal code (ILL_ILLOPC).\n");
+	}
+
+	sw_bytes = get_fpx_sw_bytes(xbuf);
+	/*
+	 * Without permission, the signal XSAVE buffer should not
+	 * have room for AMX register state (aka. xtiledata).
+	 * Check that the size does not overlap with where xtiledata
+	 * will reside.
+	 *
+	 * This also implies that no state components *PAST*
+	 * XTILEDATA (features >=19) can be present in the buffer.
+	 */
+	if (sw_bytes->xstate_size <= xtiledata.xbuf_offset) {
+		sig_print("[OK]\tValid xstate size\n");
+	} else {
+		noperm_errs++;
+		sig_print("[FAIL]\tInvalid xstate size\n");
+	}
+
+	features = get_fpx_sw_bytes_features(xbuf);
+	/*
+	 * Without permission, the XTILEDATA feature
+	 * bit should not be set.
+	 */
+	if ((features & XFEATURE_MASK_XTILEDATA) == 0) {
+		sig_print("[OK]\tValid xstate mask\n");
+	} else {
+		noperm_errs++;
+		sig_print("[FAIL]\tInvalid xstate mask\n");
+	}
+
+	noperm_signaled = true;
+	ctx->uc_mcontext.gregs[REG_RIP] += 3; /* Skip the faulting XRSTOR */
+}
+
+/* Return true if XRSTOR is successful; otherwise, false. */
+static inline bool xrstor_safe(struct xsave_buffer *xbuf, uint64_t mask)
+{
+	noperm_signaled = false;
+	xrstor(xbuf, mask);
+
+	/* Print any messages produced by the signal code: */
+	printf("%s", signal_message_buffer);
+	/*
+	 * Reset the buffer to make sure any future printing
+	 * only outputs new messages:
+	 */
+	signal_message_buffer[0] = '\0';
+
+	if (noperm_errs)
+		fatal_error("saw %d errors in noperm signal handler\n", noperm_errs);
+
+	return !noperm_signaled;
+}
+
+/*
+ * Use XRSTOR to populate the XTILEDATA registers with
+ * random data.
+ *
+ * Return true if successful; otherwise, false.
+ */
+static inline bool load_rand_tiledata(struct xsave_buffer *xbuf)
+{
+	clear_xstate_header(xbuf);
+	set_xstatebv(xbuf, XFEATURE_MASK_XTILEDATA);
+	set_rand_tiledata(xbuf);
+	return xrstor_safe(xbuf, XFEATURE_MASK_XTILEDATA);
+}
+
+/* Return XTILEDATA to its initial configuration. */
+static inline void init_xtiledata(void)
+{
+	clear_xstate_header(stashed_xsave);
+	xrstor_safe(stashed_xsave, XFEATURE_MASK_XTILEDATA);
+}
+
+enum expected_result { FAIL_EXPECTED, SUCCESS_EXPECTED };
+
+/* arch_prctl() and sigaltstack() test */
+
+#define ARCH_GET_XCOMP_PERM	0x1022
+#define ARCH_REQ_XCOMP_PERM	0x1023
+
+static void req_xtiledata_perm(void)
+{
+	syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_PERM, XFEATURE_XTILEDATA);
+}
+
+static void validate_req_xcomp_perm(enum expected_result exp)
+{
+	unsigned long bitmask;
+	long rc;
+
+	rc = syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_PERM, XFEATURE_XTILEDATA);
+	if (exp == FAIL_EXPECTED) {
+		if (rc) {
+			printf("[OK]\tARCH_REQ_XCOMP_PERM saw expected failure..\n");
+			return;
+		}
+
+		fatal_error("ARCH_REQ_XCOMP_PERM saw unexpected success.\n");
+	} else if (rc) {
+		fatal_error("ARCH_REQ_XCOMP_PERM saw unexpected failure.\n");
+	}
+
+	rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_PERM, &bitmask);
+	if (rc) {
+		fatal_error("prctl(ARCH_GET_XCOMP_PERM) error: %ld", rc);
+	} else if (bitmask & XFEATURE_MASK_XTILE) {
+		printf("\tARCH_REQ_XCOMP_PERM is successful.\n");
+	}
+}
+
+static void validate_xcomp_perm(enum expected_result exp)
+{
+	bool load_success = load_rand_tiledata(stashed_xsave);
+
+	if (exp == FAIL_EXPECTED) {
+		if (load_success) {
+			noperm_errs++;
+			printf("[FAIL]\tLoad tiledata succeeded.\n");
+		} else {
+			printf("[OK]\tLoad tiledata failed.\n");
+		}
+	} else if (exp == SUCCESS_EXPECTED) {
+		if (load_success) {
+			printf("[OK]\tLoad tiledata succeeded.\n");
+		} else {
+			noperm_errs++;
+			printf("[FAIL]\tLoad tiledata failed.\n");
+		}
+	}
+}
+
+#ifndef AT_MINSIGSTKSZ
+#  define AT_MINSIGSTKSZ	51
+#endif
+
+static void *alloc_altstack(unsigned int size)
+{
+	void *altstack;
+
+	altstack = mmap(NULL, size, PROT_READ | PROT_WRITE,
+			MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
+
+	if (altstack == MAP_FAILED)
+		fatal_error("mmap() for altstack");
+
+	return altstack;
+}
+
+static void setup_altstack(void *addr, unsigned long size, enum expected_result exp)
+{
+	stack_t ss;
+	int rc;
+
+	memset(&ss, 0, sizeof(ss));
+	ss.ss_size = size;
+	ss.ss_sp = addr;
+
+	rc = sigaltstack(&ss, NULL);
+
+	if (exp == FAIL_EXPECTED) {
+		if (rc) {
+			printf("[OK]\tsigaltstack() failed.\n");
+		} else {
+			fatal_error("sigaltstack() succeeded unexpectedly.\n");
+		}
+	} else if (rc) {
+		fatal_error("sigaltstack()");
+	}
+}
+
+static void test_dynamic_sigaltstack(void)
+{
+	unsigned int small_size, enough_size;
+	unsigned long minsigstksz;
+	void *altstack;
+
+	minsigstksz = getauxval(AT_MINSIGSTKSZ);
+	printf("\tAT_MINSIGSTKSZ = %lu\n", minsigstksz);
+	/*
+	 * getauxval() itself can return 0 for failure or
+	 * success.  But, in this case, AT_MINSIGSTKSZ
+	 * will always return a >=0 value if implemented.
+	 * Just check for 0.
+	 */
+	if (minsigstksz == 0) {
+		printf("no support for AT_MINSIGSTKSZ, skipping sigaltstack tests\n");
+		return;
+	}
+
+	enough_size = minsigstksz * 2;
+
+	altstack = alloc_altstack(enough_size);
+	printf("\tAllocate memory for altstack (%u bytes).\n", enough_size);
+
+	/*
+	 * Try setup_altstack() with a size which can not fit
+	 * XTILEDATA.  ARCH_REQ_XCOMP_PERM should fail.
+	 */
+	small_size = minsigstksz - xtiledata.size;
+	printf("\tAfter sigaltstack() with small size (%u bytes).\n", small_size);
+	setup_altstack(altstack, small_size, SUCCESS_EXPECTED);
+	validate_req_xcomp_perm(FAIL_EXPECTED);
+
+	/*
+	 * Try setup_altstack() with a size derived from
+	 * AT_MINSIGSTKSZ.  It should be more than large enough
+	 * and thus ARCH_REQ_XCOMP_PERM should succeed.
+	 */
+	printf("\tAfter sigaltstack() with enough size (%u bytes).\n", enough_size);
+	setup_altstack(altstack, enough_size, SUCCESS_EXPECTED);
+	validate_req_xcomp_perm(SUCCESS_EXPECTED);
+
+	/*
+	 * Try to coerce setup_altstack() to again accept a
+	 * too-small altstack.  This ensures that big-enough
+	 * sigaltstacks can not shrink to a too-small value
+	 * once XTILEDATA permission is established.
+	 */
+	printf("\tThen, sigaltstack() with small size (%u bytes).\n", small_size);
+	setup_altstack(altstack, small_size, FAIL_EXPECTED);
+}
+
+static void test_dynamic_state(void)
+{
+	pid_t parent, child, grandchild;
+
+	parent = fork();
+	if (parent < 0) {
+		/* fork() failed */
+		fatal_error("fork");
+	} else if (parent > 0) {
+		int status;
+		/* fork() succeeded.  Now in the parent. */
+
+		wait(&status);
+		if (!WIFEXITED(status) || WEXITSTATUS(status))
+			fatal_error("arch_prctl test parent exit");
+		return;
+	}
+	/* fork() succeeded.  Now in the child . */
+
+	printf("[RUN]\tCheck ARCH_REQ_XCOMP_PERM around process fork() and sigaltack() test.\n");
+
+	printf("\tFork a child.\n");
+	child = fork();
+	if (child < 0) {
+		fatal_error("fork");
+	} else if (child > 0) {
+		int status;
+
+		wait(&status);
+		if (!WIFEXITED(status) || WEXITSTATUS(status))
+			fatal_error("arch_prctl test child exit");
+		_exit(0);
+	}
+
+	/*
+	 * The permission request should fail without an
+	 * XTILEDATA-compatible signal stack
+	 */
+	printf("\tTest XCOMP_PERM at child.\n");
+	validate_xcomp_perm(FAIL_EXPECTED);
+
+	/*
+	 * Set up an XTILEDATA-compatible signal stack and
+	 * also obtain permission to populate XTILEDATA.
+	 */
+	printf("\tTest dynamic sigaltstack at child:\n");
+	test_dynamic_sigaltstack();
+
+	/* Ensure that XTILEDATA can be populated. */
+	printf("\tTest XCOMP_PERM again at child.\n");
+	validate_xcomp_perm(SUCCESS_EXPECTED);
+
+	printf("\tFork a grandchild.\n");
+	grandchild = fork();
+	if (grandchild < 0) {
+		/* fork() failed */
+		fatal_error("fork");
+	} else if (!grandchild) {
+		/* fork() succeeded.  Now in the (grand)child. */
+		printf("\tTest XCOMP_PERM at grandchild.\n");
+
+		/*
+		 * Ensure that the grandchild inherited
+		 * permission and a compatible sigaltstack:
+		 */
+		validate_xcomp_perm(SUCCESS_EXPECTED);
+	} else {
+		int status;
+		/* fork() succeeded.  Now in the parent. */
+
+		wait(&status);
+		if (!WIFEXITED(status) || WEXITSTATUS(status))
+			fatal_error("fork test grandchild");
+	}
+
+	_exit(0);
+}
+
+/*
+ * Save current register state and compare it to @xbuf1.'
+ *
+ * Returns false if @xbuf1 matches the registers.
+ * Returns true  if @xbuf1 differs from the registers.
+ */
+static inline bool __validate_tiledata_regs(struct xsave_buffer *xbuf1)
+{
+	struct xsave_buffer *xbuf2;
+	int ret;
+
+	xbuf2 = alloc_xbuf();
+	if (!xbuf2)
+		fatal_error("failed to allocate XSAVE buffer\n");
+
+	xsave(xbuf2, XFEATURE_MASK_XTILEDATA);
+	ret = memcmp(&xbuf1->bytes[xtiledata.xbuf_offset],
+		     &xbuf2->bytes[xtiledata.xbuf_offset],
+		     xtiledata.size);
+
+	free(xbuf2);
+
+	if (ret == 0)
+		return false;
+	return true;
+}
+
+static inline void validate_tiledata_regs_same(struct xsave_buffer *xbuf)
+{
+	int ret = __validate_tiledata_regs(xbuf);
+
+	if (ret != 0)
+		fatal_error("TILEDATA registers changed");
+}
+
+static inline void validate_tiledata_regs_changed(struct xsave_buffer *xbuf)
+{
+	int ret = __validate_tiledata_regs(xbuf);
+
+	if (ret == 0)
+		fatal_error("TILEDATA registers did not change");
+}
+
+/* tiledata inheritance test */
+
+static void test_fork(void)
+{
+	pid_t child, grandchild;
+
+	child = fork();
+	if (child < 0) {
+		/* fork() failed */
+		fatal_error("fork");
+	} else if (child > 0) {
+		/* fork() succeeded.  Now in the parent. */
+		int status;
+
+		wait(&status);
+		if (!WIFEXITED(status) || WEXITSTATUS(status))
+			fatal_error("fork test child");
+		return;
+	}
+	/* fork() succeeded.  Now in the child. */
+	printf("[RUN]\tCheck tile data inheritance.\n\tBefore fork(), load tiledata\n");
+
+	load_rand_tiledata(stashed_xsave);
+
+	grandchild = fork();
+	if (grandchild < 0) {
+		/* fork() failed */
+		fatal_error("fork");
+	} else if (grandchild > 0) {
+		/* fork() succeeded.  Still in the first child. */
+		int status;
+
+		wait(&status);
+		if (!WIFEXITED(status) || WEXITSTATUS(status))
+			fatal_error("fork test grand child");
+		_exit(0);
+	}
+	/* fork() succeeded.  Now in the (grand)child. */
+
+	/*
+	 * TILEDATA registers are not preserved across fork().
+	 * Ensure that their value has changed:
+	 */
+	validate_tiledata_regs_changed(stashed_xsave);
+
+	_exit(0);
+}
+
+/* Context switching test */
+
+static struct _ctxtswtest_cfg {
+	unsigned int iterations;
+	unsigned int num_threads;
+} ctxtswtest_config;
+
+struct futex_info {
+	pthread_t thread;
+	int nr;
+	pthread_mutex_t mutex;
+	struct futex_info *next;
+};
+
+static void *check_tiledata(void *info)
+{
+	struct futex_info *finfo = (struct futex_info *)info;
+	struct xsave_buffer *xbuf;
+	int i;
+
+	xbuf = alloc_xbuf();
+	if (!xbuf)
+		fatal_error("unable to allocate XSAVE buffer");
+
+	/*
+	 * Load random data into 'xbuf' and then restore
+	 * it to the tile registers themselves.
+	 */
+	load_rand_tiledata(xbuf);
+	for (i = 0; i < ctxtswtest_config.iterations; i++) {
+		pthread_mutex_lock(&finfo->mutex);
+
+		/*
+		 * Ensure the register values have not
+		 * diverged from those recorded in 'xbuf'.
+		 */
+		validate_tiledata_regs_same(xbuf);
+
+		/* Load new, random values into xbuf and registers */
+		load_rand_tiledata(xbuf);
+
+		/*
+		 * The last thread's last unlock will be for
+		 * thread 0's mutex.  However, thread 0 will
+		 * have already exited the loop and the mutex
+		 * will already be unlocked.
+		 *
+		 * Because this is not an ERRORCHECK mutex,
+		 * that inconsistency will be silently ignored.
+		 */
+		pthread_mutex_unlock(&finfo->next->mutex);
+	}
+
+	free(xbuf);
+	/*
+	 * Return this thread's finfo, which is
+	 * a unique value for this thread.
+	 */
+	return finfo;
+}
+
+static int create_threads(int num, struct futex_info *finfo)
+{
+	int i;
+
+	for (i = 0; i < num; i++) {
+		int next_nr;
+
+		finfo[i].nr = i;
+		/*
+		 * Thread 'i' will wait on this mutex to
+		 * be unlocked.  Lock it immediately after
+		 * initialization:
+		 */
+		pthread_mutex_init(&finfo[i].mutex, NULL);
+		pthread_mutex_lock(&finfo[i].mutex);
+
+		next_nr = (i + 1) % num;
+		finfo[i].next = &finfo[next_nr];
+
+		if (pthread_create(&finfo[i].thread, NULL, check_tiledata, &finfo[i]))
+			fatal_error("pthread_create()");
+	}
+	return 0;
+}
+
+static void affinitize_cpu0(void)
+{
+	cpu_set_t cpuset;
+
+	CPU_ZERO(&cpuset);
+	CPU_SET(0, &cpuset);
+
+	if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0)
+		fatal_error("sched_setaffinity to CPU 0");
+}
+
+static void test_context_switch(void)
+{
+	struct futex_info *finfo;
+	int i;
+
+	/* Affinitize to one CPU to force context switches */
+	affinitize_cpu0();
+
+	req_xtiledata_perm();
+
+	printf("[RUN]\tCheck tiledata context switches, %d iterations, %d threads.\n",
+	       ctxtswtest_config.iterations,
+	       ctxtswtest_config.num_threads);
+
+
+	finfo = malloc(sizeof(*finfo) * ctxtswtest_config.num_threads);
+	if (!finfo)
+		fatal_error("malloc()");
+
+	create_threads(ctxtswtest_config.num_threads, finfo);
+
+	/*
+	 * This thread wakes up thread 0
+	 * Thread 0 will wake up 1
+	 * Thread 1 will wake up 2
+	 * ...
+	 * the last thread will wake up 0
+	 *
+	 * ... this will repeat for the configured
+	 * number of iterations.
+	 */
+	pthread_mutex_unlock(&finfo[0].mutex);
+
+	/* Wait for all the threads to finish: */
+	for (i = 0; i < ctxtswtest_config.num_threads; i++) {
+		void *thread_retval;
+		int rc;
+
+		rc = pthread_join(finfo[i].thread, &thread_retval);
+
+		if (rc)
+			fatal_error("pthread_join() failed for thread %d err: %d\n",
+					i, rc);
+
+		if (thread_retval != &finfo[i])
+			fatal_error("unexpected thread retval for thread %d: %p\n",
+					i, thread_retval);
+
+	}
+
+	printf("[OK]\tNo incorrect case was found.\n");
+
+	free(finfo);
+}
+
+int main(void)
+{
+	/* Check hardware availability at first */
+	check_cpuid_xsave();
+	check_cpuid_xtiledata();
+
+	init_stashed_xsave();
+	sethandler(SIGILL, handle_noperm, 0);
+
+	test_dynamic_state();
+
+	/* Request permission for the following tests */
+	req_xtiledata_perm();
+
+	test_fork();
+
+	ctxtswtest_config.iterations = 10;
+	ctxtswtest_config.num_threads = 5;
+	test_context_switch();
+
+	clearhandler(SIGILL);
+	free_stashed_xsave();
+
+	return 0;
+}
diff --git a/tools/testing/selftests/x86/iopl.c b/tools/testing/selftests/x86/iopl.c
index bab2f6e06b63..7e3e09c1abac 100644
--- a/tools/testing/selftests/x86/iopl.c
+++ b/tools/testing/selftests/x86/iopl.c
@@ -85,48 +85,88 @@ static void expect_gp_outb(unsigned short port)
 	printf("[OK]\toutb to 0x%02hx failed\n", port);
 }
 
-static bool try_cli(void)
+#define RET_FAULTED	0
+#define RET_FAIL	1
+#define RET_EMUL	2
+
+static int try_cli(void)
 {
+	unsigned long flags;
+
 	sethandler(SIGSEGV, sigsegv, SA_RESETHAND);
 	if (sigsetjmp(jmpbuf, 1) != 0) {
-		return false;
+		return RET_FAULTED;
 	} else {
-		asm volatile ("cli");
-		return true;
+		asm volatile("cli; pushf; pop %[flags]"
+				: [flags] "=rm" (flags));
+
+		/* X86_FLAGS_IF */
+		if (!(flags & (1 << 9)))
+			return RET_FAIL;
+		else
+			return RET_EMUL;
 	}
 	clearhandler(SIGSEGV);
 }
 
-static bool try_sti(void)
+static int try_sti(bool irqs_off)
 {
+	unsigned long flags;
+
 	sethandler(SIGSEGV, sigsegv, SA_RESETHAND);
 	if (sigsetjmp(jmpbuf, 1) != 0) {
-		return false;
+		return RET_FAULTED;
 	} else {
-		asm volatile ("sti");
-		return true;
+		asm volatile("sti; pushf; pop %[flags]"
+				: [flags] "=rm" (flags));
+
+		/* X86_FLAGS_IF */
+		if (irqs_off && (flags & (1 << 9)))
+			return RET_FAIL;
+		else
+			return RET_EMUL;
 	}
 	clearhandler(SIGSEGV);
 }
 
-static void expect_gp_sti(void)
+static void expect_gp_sti(bool irqs_off)
 {
-	if (try_sti()) {
+	int ret = try_sti(irqs_off);
+
+	switch (ret) {
+	case RET_FAULTED:
+		printf("[OK]\tSTI faulted\n");
+		break;
+	case RET_EMUL:
+		printf("[OK]\tSTI NOPped\n");
+		break;
+	default:
 		printf("[FAIL]\tSTI worked\n");
 		nerrs++;
-	} else {
-		printf("[OK]\tSTI faulted\n");
 	}
 }
 
-static void expect_gp_cli(void)
+/*
+ * Returns whether it managed to disable interrupts.
+ */
+static bool test_cli(void)
 {
-	if (try_cli()) {
+	int ret = try_cli();
+
+	switch (ret) {
+	case RET_FAULTED:
+		printf("[OK]\tCLI faulted\n");
+		break;
+	case RET_EMUL:
+		printf("[OK]\tCLI NOPped\n");
+		break;
+	default:
 		printf("[FAIL]\tCLI worked\n");
 		nerrs++;
-	} else {
-		printf("[OK]\tCLI faulted\n");
+		return true;
 	}
+
+	return false;
 }
 
 int main(void)
@@ -152,8 +192,7 @@ int main(void)
 	}
 
 	/* Make sure that CLI/STI are blocked even with IOPL level 3 */
-	expect_gp_cli();
-	expect_gp_sti();
+	expect_gp_sti(test_cli());
 	expect_ok_outb(0x80);
 
 	/* Establish an I/O bitmap to test the restore */
@@ -204,8 +243,7 @@ int main(void)
 	printf("[RUN]\tparent: write to 0x80 (should fail)\n");
 
 	expect_gp_outb(0x80);
-	expect_gp_cli();
-	expect_gp_sti();
+	expect_gp_sti(test_cli());
 
 	/* Test the capability checks. */
 	printf("\tiopl(3)\n");
diff --git a/tools/testing/selftests/x86/test_vsyscall.c b/tools/testing/selftests/x86/test_vsyscall.c
index 65c141ebfbbd..5b45e6986aea 100644
--- a/tools/testing/selftests/x86/test_vsyscall.c
+++ b/tools/testing/selftests/x86/test_vsyscall.c
@@ -497,7 +497,7 @@ static int test_process_vm_readv(void)
 	}
 
 	if (vsyscall_map_r) {
-		if (!memcmp(buf, (const void *)0xffffffffff600000, 4096)) {
+		if (!memcmp(buf, remote.iov_base, sizeof(buf))) {
 			printf("[OK]\tIt worked and read correct data\n");
 		} else {
 			printf("[FAIL]\tIt worked but returned incorrect data\n");