364 files changed, 23733 insertions, 2432 deletions
diff --git a/tools/testing/kunit/configs/broken_on_uml.config b/tools/testing/kunit/configs/broken_on_uml.config
index 239b9f03da2c..a7f0603d33f6 100644
--- a/tools/testing/kunit/configs/broken_on_uml.config
+++ b/tools/testing/kunit/configs/broken_on_uml.config
@@ -39,3 +39,4 @@
 # CONFIG_QCOM_CPR is not set
 # CONFIG_RESET_BRCMSTB_RESCAL is not set
 # CONFIG_RESET_INTEL_GW is not set
+# CONFIG_ADI_AXI_ADC is not set
diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py
index 425ef40067e7..ebf5f5763dee 100755
--- a/tools/testing/kunit/kunit.py
+++ b/tools/testing/kunit/kunit.py
@@ -17,6 +17,7 @@ from collections import namedtuple
 from enum import Enum, auto
 
 import kunit_config
+import kunit_json
 import kunit_kernel
 import kunit_parser
 
@@ -30,9 +31,9 @@ KunitBuildRequest = namedtuple('KunitBuildRequest',
 KunitExecRequest = namedtuple('KunitExecRequest',
 			      ['timeout', 'build_dir', 'alltests'])
 KunitParseRequest = namedtuple('KunitParseRequest',
-			       ['raw_output', 'input_data'])
+			       ['raw_output', 'input_data', 'build_dir', 'json'])
 KunitRequest = namedtuple('KunitRequest', ['raw_output','timeout', 'jobs',
-					   'build_dir', 'alltests',
+					   'build_dir', 'alltests', 'json',
 					   'make_options'])
 
 KernelDirectoryPath = sys.argv[0].split('tools/testing/kunit/')[0]
@@ -113,12 +114,22 @@ def parse_tests(request: KunitParseRequest) -> KunitResult:
 	test_result = kunit_parser.TestResult(kunit_parser.TestStatus.SUCCESS,
 					      [],
 					      'Tests not Parsed.')
+
 	if request.raw_output:
 		kunit_parser.raw_output(request.input_data)
 	else:
 		test_result = kunit_parser.parse_run_tests(request.input_data)
 	parse_end = time.time()
 
+	if request.json:
+		json_obj = kunit_json.get_json_result(
+					test_result=test_result,
+					def_config='kunit_defconfig',
+					build_dir=request.build_dir,
+					json_path=request.json)
+		if request.json == 'stdout':
+			print(json_obj)
+
 	if test_result.status != kunit_parser.TestStatus.SUCCESS:
 		return KunitResult(KunitStatus.TEST_FAILURE, test_result,
 				   parse_end - parse_start)
@@ -151,7 +162,9 @@ def run_tests(linux: kunit_kernel.LinuxSourceTree,
 		return exec_result
 
 	parse_request = KunitParseRequest(request.raw_output,
-					  exec_result.result)
+					  exec_result.result,
+					  request.build_dir,
+					  request.json)
 	parse_result = parse_tests(parse_request)
 
 	run_end = time.time()
@@ -195,7 +208,12 @@ def add_exec_opts(parser):
 def add_parse_opts(parser):
 	parser.add_argument('--raw_output', help='don\'t format output from kernel',
 			    action='store_true')
-
+	parser.add_argument('--json',
+			    nargs='?',
+			    help='Stores test results in a JSON, and either '
+			    'prints to stdout or saves to file if a '
+			    'filename is specified',
+			    type=str, const='stdout', default=None)
 
 def main(argv, linux=None):
 	parser = argparse.ArgumentParser(
@@ -237,10 +255,16 @@ def main(argv, linux=None):
 
 	cli_args = parser.parse_args(argv)
 
+	if get_kernel_root_path():
+		os.chdir(get_kernel_root_path())
+
 	if cli_args.subcommand == 'run':
 		if not os.path.exists(cli_args.build_dir):
 			os.mkdir(cli_args.build_dir)
 
+		if not os.path.exists(kunit_kernel.kunitconfig_path):
+			create_default_kunitconfig()
+
 		if not linux:
 			linux = kunit_kernel.LinuxSourceTree()
 
@@ -249,14 +273,18 @@ def main(argv, linux=None):
 				       cli_args.jobs,
 				       cli_args.build_dir,
 				       cli_args.alltests,
+				       cli_args.json,
 				       cli_args.make_options)
 		result = run_tests(linux, request)
 		if result.status != KunitStatus.SUCCESS:
 			sys.exit(1)
 	elif cli_args.subcommand == 'config':
-		if cli_args.build_dir:
-			if not os.path.exists(cli_args.build_dir):
-				os.mkdir(cli_args.build_dir)
+		if cli_args.build_dir and (
+				not os.path.exists(cli_args.build_dir)):
+			os.mkdir(cli_args.build_dir)
+
+		if not os.path.exists(kunit_kernel.kunitconfig_path):
+			create_default_kunitconfig()
 
 		if not linux:
 			linux = kunit_kernel.LinuxSourceTree()
@@ -270,10 +298,6 @@ def main(argv, linux=None):
 		if result.status != KunitStatus.SUCCESS:
 			sys.exit(1)
 	elif cli_args.subcommand == 'build':
-		if cli_args.build_dir:
-			if not os.path.exists(cli_args.build_dir):
-				os.mkdir(cli_args.build_dir)
-
 		if not linux:
 			linux = kunit_kernel.LinuxSourceTree()
 
@@ -288,10 +312,6 @@ def main(argv, linux=None):
 		if result.status != KunitStatus.SUCCESS:
 			sys.exit(1)
 	elif cli_args.subcommand == 'exec':
-		if cli_args.build_dir:
-			if not os.path.exists(cli_args.build_dir):
-				os.mkdir(cli_args.build_dir)
-
 		if not linux:
 			linux = kunit_kernel.LinuxSourceTree()
 
@@ -300,7 +320,9 @@ def main(argv, linux=None):
 						cli_args.alltests)
 		exec_result = exec_tests(linux, exec_request)
 		parse_request = KunitParseRequest(cli_args.raw_output,
-						  exec_result.result)
+						  exec_result.result,
+						  cli_args.build_dir,
+						  cli_args.json)
 		result = parse_tests(parse_request)
 		kunit_parser.print_with_timestamp((
 			'Elapsed time: %.3fs\n') % (
@@ -314,7 +336,9 @@ def main(argv, linux=None):
 			with open(cli_args.file, 'r') as f:
 				kunit_output = f.read().splitlines()
 		request = KunitParseRequest(cli_args.raw_output,
-					    kunit_output)
+					    kunit_output,
+					    cli_args.build_dir,
+					    cli_args.json)
 		result = parse_tests(request)
 		if result.status != KunitStatus.SUCCESS:
 			sys.exit(1)
diff --git a/tools/testing/kunit/kunit_json.py b/tools/testing/kunit/kunit_json.py
new file mode 100644
index 000000000000..624b31b2dbd6
--- /dev/null
+++ b/tools/testing/kunit/kunit_json.py
@@ -0,0 +1,63 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Generates JSON from KUnit results according to
+# KernelCI spec: https://github.com/kernelci/kernelci-doc/wiki/Test-API
+#
+# Copyright (C) 2020, Google LLC.
+# Author: Heidi Fahim <heidifahim@google.com>
+
+import json
+import os
+
+import kunit_parser
+
+from kunit_parser import TestStatus
+
+def get_json_result(test_result, def_config, build_dir, json_path):
+	sub_groups = []
+
+	# Each test suite is mapped to a KernelCI sub_group
+	for test_suite in test_result.suites:
+		sub_group = {
+			"name": test_suite.name,
+			"arch": "UM",
+			"defconfig": def_config,
+			"build_environment": build_dir,
+			"test_cases": [],
+			"lab_name": None,
+			"kernel": None,
+			"job": None,
+			"git_branch": "kselftest",
+		}
+		test_cases = []
+		# TODO: Add attachments attribute in test_case with detailed
+		#  failure message, see https://api.kernelci.org/schema-test-case.html#get
+		for case in test_suite.cases:
+			test_case = {"name": case.name, "status": "FAIL"}
+			if case.status == TestStatus.SUCCESS:
+				test_case["status"] = "PASS"
+			elif case.status == TestStatus.TEST_CRASHED:
+				test_case["status"] = "ERROR"
+			test_cases.append(test_case)
+		sub_group["test_cases"] = test_cases
+		sub_groups.append(sub_group)
+	test_group = {
+		"name": "KUnit Test Group",
+		"arch": "UM",
+		"defconfig": def_config,
+		"build_environment": build_dir,
+		"sub_groups": sub_groups,
+		"lab_name": None,
+		"kernel": None,
+		"job": None,
+		"git_branch": "kselftest",
+	}
+	json_obj = json.dumps(test_group, indent=4)
+	if json_path != 'stdout':
+		with open(json_path, 'w') as result_path:
+			result_path.write(json_obj)
+		root = __file__.split('tools/testing/kunit/')[0]
+		kunit_parser.print_with_timestamp(
+			"Test results stored in %s" %
+			os.path.join(root, result_path.name))
+	return json_obj
diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py
index e20e2056cb38..b557b1e93f98 100644
--- a/tools/testing/kunit/kunit_kernel.py
+++ b/tools/testing/kunit/kunit_kernel.py
@@ -36,9 +36,9 @@ class LinuxSourceTreeOperations(object):
 		try:
 			subprocess.check_output(['make', 'mrproper'], stderr=subprocess.STDOUT)
 		except OSError as e:
-			raise ConfigError('Could not call make command: ' + e)
+			raise ConfigError('Could not call make command: ' + str(e))
 		except subprocess.CalledProcessError as e:
-			raise ConfigError(e.output)
+			raise ConfigError(e.output.decode())
 
 	def make_olddefconfig(self, build_dir, make_options):
 		command = ['make', 'ARCH=um', 'olddefconfig']
@@ -49,22 +49,27 @@ class LinuxSourceTreeOperations(object):
 		try:
 			subprocess.check_output(command, stderr=subprocess.STDOUT)
 		except OSError as e:
-			raise ConfigError('Could not call make command: ' + e)
+			raise ConfigError('Could not call make command: ' + str(e))
 		except subprocess.CalledProcessError as e:
-			raise ConfigError(e.output)
+			raise ConfigError(e.output.decode())
 
-	def make_allyesconfig(self):
+	def make_allyesconfig(self, build_dir, make_options):
 		kunit_parser.print_with_timestamp(
 			'Enabling all CONFIGs for UML...')
+		command = ['make', 'ARCH=um', 'allyesconfig']
+		if make_options:
+			command.extend(make_options)
+		if build_dir:
+			command += ['O=' + build_dir]
 		process = subprocess.Popen(
-			['make', 'ARCH=um', 'allyesconfig'],
+			command,
 			stdout=subprocess.DEVNULL,
 			stderr=subprocess.STDOUT)
 		process.wait()
 		kunit_parser.print_with_timestamp(
 			'Disabling broken configs to run KUnit tests...')
 		with ExitStack() as es:
-			config = open(KCONFIG_PATH, 'a')
+			config = open(get_kconfig_path(build_dir), 'a')
 			disable = open(BROKEN_ALLCONFIG_PATH, 'r').read()
 			config.write(disable)
 		kunit_parser.print_with_timestamp(
@@ -79,9 +84,9 @@ class LinuxSourceTreeOperations(object):
 		try:
 			subprocess.check_output(command, stderr=subprocess.STDOUT)
 		except OSError as e:
-			raise BuildError('Could not call execute make: ' + e)
+			raise BuildError('Could not call execute make: ' + str(e))
 		except subprocess.CalledProcessError as e:
-			raise BuildError(e.output)
+			raise BuildError(e.output.decode())
 
 	def linux_bin(self, params, timeout, build_dir, outfile):
 		"""Runs the Linux UML binary. Must be named 'linux'."""
@@ -161,9 +166,9 @@ class LinuxSourceTree(object):
 			return self.build_config(build_dir, make_options)
 
 	def build_um_kernel(self, alltests, jobs, build_dir, make_options):
-		if alltests:
-			self._ops.make_allyesconfig()
 		try:
+			if alltests:
+				self._ops.make_allyesconfig(build_dir, make_options)
 			self._ops.make_olddefconfig(build_dir, make_options)
 			self._ops.make(jobs, build_dir, make_options)
 		except (ConfigError, BuildError) as e:
diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py
index f13e0c0d6663..8019e3dd4c32 100644
--- a/tools/testing/kunit/kunit_parser.py
+++ b/tools/testing/kunit/kunit_parser.py
@@ -45,10 +45,11 @@ class TestStatus(Enum):
 	FAILURE = auto()
 	TEST_CRASHED = auto()
 	NO_TESTS = auto()
+	FAILURE_TO_PARSE_TESTS = auto()
 
 kunit_start_re = re.compile(r'TAP version [0-9]+$')
 kunit_end_re = re.compile('(List of all partitions:|'
-			  'Kernel panic - not syncing: VFS:|reboot: System halted)')
+			  'Kernel panic - not syncing: VFS:)')
 
 def isolate_kunit_output(kernel_output):
 	started = False
@@ -109,7 +110,7 @@ OkNotOkResult = namedtuple('OkNotOkResult', ['is_ok','description', 'text'])
 
 OK_NOT_OK_SUBTEST = re.compile(r'^[\s]+(ok|not ok) [0-9]+ - (.*)$')
 
-OK_NOT_OK_MODULE = re.compile(r'^(ok|not ok) [0-9]+ - (.*)$')
+OK_NOT_OK_MODULE = re.compile(r'^(ok|not ok) ([0-9]+) - (.*)$')
 
 def parse_ok_not_ok_test_case(lines: List[str], test_case: TestCase) -> bool:
 	save_non_diagnositic(lines, test_case)
@@ -197,7 +198,9 @@ def max_status(left: TestStatus, right: TestStatus) -> TestStatus:
 	else:
 		return TestStatus.SUCCESS
 
-def parse_ok_not_ok_test_suite(lines: List[str], test_suite: TestSuite) -> bool:
+def parse_ok_not_ok_test_suite(lines: List[str],
+			       test_suite: TestSuite,
+			       expected_suite_index: int) -> bool:
 	consume_non_diagnositic(lines)
 	if not lines:
 		test_suite.status = TestStatus.TEST_CRASHED
@@ -210,6 +213,12 @@ def parse_ok_not_ok_test_suite(lines: List[str], test_suite: TestSuite) -> bool:
 			test_suite.status = TestStatus.SUCCESS
 		else:
 			test_suite.status = TestStatus.FAILURE
+		suite_index = int(match.group(2))
+		if suite_index != expected_suite_index:
+			print_with_timestamp(
+				red('[ERROR] ') + 'expected_suite_index ' +
+				str(expected_suite_index) + ', but got ' +
+				str(suite_index))
 		return True
 	else:
 		return False
@@ -222,7 +231,7 @@ def bubble_up_test_case_errors(test_suite: TestSuite) -> TestStatus:
 	max_test_case_status = bubble_up_errors(lambda x: x.status, test_suite.cases)
 	return max_status(max_test_case_status, test_suite.status)
 
-def parse_test_suite(lines: List[str]) -> TestSuite:
+def parse_test_suite(lines: List[str], expected_suite_index: int) -> TestSuite:
 	if not lines:
 		return None
 	consume_non_diagnositic(lines)
@@ -241,7 +250,7 @@ def parse_test_suite(lines: List[str]) -> TestSuite:
 			break
 		test_suite.cases.append(test_case)
 		expected_test_case_num -= 1
-	if parse_ok_not_ok_test_suite(lines, test_suite):
+	if parse_ok_not_ok_test_suite(lines, test_suite, expected_suite_index):
 		test_suite.status = bubble_up_test_case_errors(test_suite)
 		return test_suite
 	elif not lines:
@@ -261,6 +270,17 @@ def parse_tap_header(lines: List[str]) -> bool:
 	else:
 		return False
 
+TEST_PLAN = re.compile(r'[0-9]+\.\.([0-9]+)')
+
+def parse_test_plan(lines: List[str]) -> int:
+	consume_non_diagnositic(lines)
+	match = TEST_PLAN.match(lines[0])
+	if match:
+		lines.pop(0)
+		return int(match.group(1))
+	else:
+		return None
+
 def bubble_up_suite_errors(test_suite_list: List[TestSuite]) -> TestStatus:
 	return bubble_up_errors(lambda x: x.status, test_suite_list)
 
@@ -268,20 +288,33 @@ def parse_test_result(lines: List[str]) -> TestResult:
 	consume_non_diagnositic(lines)
 	if not lines or not parse_tap_header(lines):
 		return TestResult(TestStatus.NO_TESTS, [], lines)
+	expected_test_suite_num = parse_test_plan(lines)
+	if not expected_test_suite_num:
+		return TestResult(TestStatus.FAILURE_TO_PARSE_TESTS, [], lines)
 	test_suites = []
-	test_suite = parse_test_suite(lines)
-	while test_suite:
-		test_suites.append(test_suite)
-		test_suite = parse_test_suite(lines)
-	return TestResult(bubble_up_suite_errors(test_suites), test_suites, lines)
+	for i in range(1, expected_test_suite_num + 1):
+		test_suite = parse_test_suite(lines, i)
+		if test_suite:
+			test_suites.append(test_suite)
+		else:
+			print_with_timestamp(
+				red('[ERROR] ') + ' expected ' +
+				str(expected_test_suite_num) +
+				' test suites, but got ' + str(i - 2))
+			break
+	test_suite = parse_test_suite(lines, -1)
+	if test_suite:
+		print_with_timestamp(red('[ERROR] ') +
+			'got unexpected test suite: ' + test_suite.name)
+	if test_suites:
+		return TestResult(bubble_up_suite_errors(test_suites), test_suites, lines)
+	else:
+		return TestResult(TestStatus.NO_TESTS, [], lines)
 
-def parse_run_tests(kernel_output) -> TestResult:
+def print_and_count_results(test_result: TestResult) -> None:
 	total_tests = 0
 	failed_tests = 0
 	crashed_tests = 0
-	test_result = parse_test_result(list(isolate_kunit_output(kernel_output)))
-	if test_result.status == TestStatus.NO_TESTS:
-		print_with_timestamp(red('[ERROR] ') + 'no kunit output detected')
 	for test_suite in test_result.suites:
 		if test_suite.status == TestStatus.SUCCESS:
 			print_suite_divider(green('[PASSED] ') + test_suite.name)
@@ -303,6 +336,21 @@ def parse_run_tests(kernel_output) -> TestResult:
 				print_with_timestamp(red('[FAILED] ') + test_case.name)
 				print_log(map(yellow, test_case.log))
 				print_with_timestamp('')
+	return total_tests, failed_tests, crashed_tests
+
+def parse_run_tests(kernel_output) -> TestResult:
+	total_tests = 0
+	failed_tests = 0
+	crashed_tests = 0
+	test_result = parse_test_result(list(isolate_kunit_output(kernel_output)))
+	if test_result.status == TestStatus.NO_TESTS:
+		print(red('[ERROR] ') + yellow('no tests run!'))
+	elif test_result.status == TestStatus.FAILURE_TO_PARSE_TESTS:
+		print(red('[ERROR] ') + yellow('could not parse test results!'))
+	else:
+		(total_tests,
+		 failed_tests,
+		 crashed_tests) = print_and_count_results(test_result)
 	print_with_timestamp(DIVIDER)
 	fmt = green if test_result.status == TestStatus.SUCCESS else red
 	print_with_timestamp(
diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py
index 287c74d821c3..99c3c5671ea4 100755
--- a/tools/testing/kunit/kunit_tool_test.py
+++ b/tools/testing/kunit/kunit_tool_test.py
@@ -11,11 +11,13 @@ from unittest import mock
 
 import tempfile, shutil # Handling test_tmpdir
 
+import json
 import os
 
 import kunit_config
 import kunit_parser
 import kunit_kernel
+import kunit_json
 import kunit
 
 test_tmpdir = ''
@@ -230,6 +232,37 @@ class KUnitParserTest(unittest.TestCase):
 			result = kunit_parser.parse_run_tests(file.readlines())
 		self.assertEqual('kunit-resource-test', result.suites[0].name)
 
+class KUnitJsonTest(unittest.TestCase):
+
+	def _json_for(self, log_file):
+		with(open(get_absolute_path(log_file))) as file:
+			test_result = kunit_parser.parse_run_tests(file)
+			json_obj = kunit_json.get_json_result(
+				test_result=test_result,
+				def_config='kunit_defconfig',
+				build_dir=None,
+				json_path='stdout')
+		return json.loads(json_obj)
+
+	def test_failed_test_json(self):
+		result = self._json_for(
+			'test_data/test_is_test_passed-failure.log')
+		self.assertEqual(
+			{'name': 'example_simple_test', 'status': 'FAIL'},
+			result["sub_groups"][1]["test_cases"][0])
+
+	def test_crashed_test_json(self):
+		result = self._json_for(
+			'test_data/test_is_test_passed-crash.log')
+		self.assertEqual(
+			{'name': 'example_simple_test', 'status': 'ERROR'},
+			result["sub_groups"][1]["test_cases"][0])
+
+	def test_no_tests_json(self):
+		result = self._json_for(
+			'test_data/test_is_test_passed-no_tests_run.log')
+		self.assertEqual(0, len(result['sub_groups']))
+
 class StrContains(str):
 	def __eq__(self, other):
 		return self in other
diff --git a/tools/testing/kunit/test_data/test_is_test_passed-all_passed.log b/tools/testing/kunit/test_data/test_is_test_passed-all_passed.log
index 62ebc0288355..bc0dc8fe35b7 100644
--- a/tools/testing/kunit/test_data/test_is_test_passed-all_passed.log
+++ b/tools/testing/kunit/test_data/test_is_test_passed-all_passed.log
@@ -1,4 +1,5 @@
 TAP version 14
+1..2
 	# Subtest: sysctl_test
 	1..8
 	# sysctl_test_dointvec_null_tbl_data: sysctl_test_dointvec_null_tbl_data passed
diff --git a/tools/testing/kunit/test_data/test_is_test_passed-crash.log b/tools/testing/kunit/test_data/test_is_test_passed-crash.log
index 0b249870c8be..4d97f6708c4a 100644
--- a/tools/testing/kunit/test_data/test_is_test_passed-crash.log
+++ b/tools/testing/kunit/test_data/test_is_test_passed-crash.log
@@ -1,6 +1,7 @@
 printk: console [tty0] enabled
 printk: console [mc-1] enabled
 TAP version 14
+1..2
 	# Subtest: sysctl_test
 	1..8
 	# sysctl_test_dointvec_null_tbl_data: sysctl_test_dointvec_null_tbl_data passed
diff --git a/tools/testing/kunit/test_data/test_is_test_passed-failure.log b/tools/testing/kunit/test_data/test_is_test_passed-failure.log
index 9e89d32d5667..7a416497e3be 100644
--- a/tools/testing/kunit/test_data/test_is_test_passed-failure.log
+++ b/tools/testing/kunit/test_data/test_is_test_passed-failure.log
@@ -1,4 +1,5 @@
 TAP version 14
+1..2
 	# Subtest: sysctl_test
 	1..8
 	# sysctl_test_dointvec_null_tbl_data: sysctl_test_dointvec_null_tbl_data passed
diff --git a/tools/testing/nvdimm/dax-dev.c b/tools/testing/nvdimm/dax-dev.c
index 7e5d979e73cb..fb342a8c98d3 100644
--- a/tools/testing/nvdimm/dax-dev.c
+++ b/tools/testing/nvdimm/dax-dev.c
@@ -9,12 +9,19 @@
 phys_addr_t dax_pgoff_to_phys(struct dev_dax *dev_dax, pgoff_t pgoff,
 		unsigned long size)
 {
-	struct resource *res = &dev_dax->region->res;
-	phys_addr_t addr;
+	int i;
 
-	addr = pgoff * PAGE_SIZE + res->start;
-	if (addr >= res->start && addr <= res->end) {
-		if (addr + size - 1 <= res->end) {
+	for (i = 0; i < dev_dax->nr_range; i++) {
+		struct dev_dax_range *dax_range = &dev_dax->ranges[i];
+		struct range *range = &dax_range->range;
+		unsigned long long pgoff_end;
+		phys_addr_t addr;
+
+		pgoff_end = dax_range->pgoff + PHYS_PFN(range_len(range)) - 1;
+		if (pgoff < dax_range->pgoff || pgoff > pgoff_end)
+			continue;
+		addr = PFN_PHYS(pgoff - dax_range->pgoff) + range->start;
+		if (addr + size - 1 <= range->end) {
 			if (get_nfit_res(addr)) {
 				struct page *page;
 
@@ -23,9 +30,10 @@ phys_addr_t dax_pgoff_to_phys(struct dev_dax *dev_dax, pgoff_t pgoff,
 
 				page = vmalloc_to_page((void *)addr);
 				return PFN_PHYS(page_to_pfn(page));
-			} else
-				return addr;
+			}
+			return addr;
 		}
+		break;
 	}
 	return -1;
 }
diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c
index 03e40b3b0106..c62d372d426f 100644
--- a/tools/testing/nvdimm/test/iomap.c
+++ b/tools/testing/nvdimm/test/iomap.c
@@ -126,7 +126,7 @@ static void dev_pagemap_percpu_release(struct percpu_ref *ref)
 void *__wrap_devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
 {
 	int error;
-	resource_size_t offset = pgmap->res.start;
+	resource_size_t offset = pgmap->range.start;
 	struct nfit_test_resource *nfit_res = get_nfit_res(offset);
 
 	if (!nfit_res)
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index a1a5dc645b40..2ac0fff6dad8 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -23,7 +23,8 @@
 #include "nfit_test.h"
 #include "../watermark.h"
 
-#include <asm/mcsafe_test.h>
+#include <asm/copy_mc_test.h>
+#include <asm/mce.h>
 
 /*
  * Generate an NFIT table to describe the following topology:
@@ -3283,7 +3284,7 @@ static struct platform_driver nfit_test_driver = {
 	.id_table = nfit_test_id,
 };
 
-static char mcsafe_buf[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE)));
+static char copy_mc_buf[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE)));
 
 enum INJECT {
 	INJECT_NONE,
@@ -3291,7 +3292,7 @@ enum INJECT {
 	INJECT_DST,
 };
 
-static void mcsafe_test_init(char *dst, char *src, size_t size)
+static void copy_mc_test_init(char *dst, char *src, size_t size)
 {
 	size_t i;
 
@@ -3300,7 +3301,7 @@ static void mcsafe_test_init(char *dst, char *src, size_t size)
 		src[i] = (char) i;
 }
 
-static bool mcsafe_test_validate(unsigned char *dst, unsigned char *src,
+static bool copy_mc_test_validate(unsigned char *dst, unsigned char *src,
 		size_t size, unsigned long rem)
 {
 	size_t i;
@@ -3321,12 +3322,12 @@ static bool mcsafe_test_validate(unsigned char *dst, unsigned char *src,
 	return true;
 }
 
-void mcsafe_test(void)
+void copy_mc_test(void)
 {
 	char *inject_desc[] = { "none", "source", "destination" };
 	enum INJECT inj;
 
-	if (IS_ENABLED(CONFIG_MCSAFE_TEST)) {
+	if (IS_ENABLED(CONFIG_COPY_MC_TEST)) {
 		pr_info("%s: run...\n", __func__);
 	} else {
 		pr_info("%s: disabled, skip.\n", __func__);
@@ -3344,31 +3345,31 @@ void mcsafe_test(void)
 
 			switch (inj) {
 			case INJECT_NONE:
-				mcsafe_inject_src(NULL);
-				mcsafe_inject_dst(NULL);
-				dst = &mcsafe_buf[2048];
-				src = &mcsafe_buf[1024 - i];
+				copy_mc_inject_src(NULL);
+				copy_mc_inject_dst(NULL);
+				dst = &copy_mc_buf[2048];
+				src = &copy_mc_buf[1024 - i];
 				expect = 0;
 				break;
 			case INJECT_SRC:
-				mcsafe_inject_src(&mcsafe_buf[1024]);
-				mcsafe_inject_dst(NULL);
-				dst = &mcsafe_buf[2048];
-				src = &mcsafe_buf[1024 - i];
+				copy_mc_inject_src(&copy_mc_buf[1024]);
+				copy_mc_inject_dst(NULL);
+				dst = &copy_mc_buf[2048];
+				src = &copy_mc_buf[1024 - i];
 				expect = 512 - i;
 				break;
 			case INJECT_DST:
-				mcsafe_inject_src(NULL);
-				mcsafe_inject_dst(&mcsafe_buf[2048]);
-				dst = &mcsafe_buf[2048 - i];
-				src = &mcsafe_buf[1024];
+				copy_mc_inject_src(NULL);
+				copy_mc_inject_dst(&copy_mc_buf[2048]);
+				dst = &copy_mc_buf[2048 - i];
+				src = &copy_mc_buf[1024];
 				expect = 512 - i;
 				break;
 			}
 
-			mcsafe_test_init(dst, src, 512);
-			rem = __memcpy_mcsafe(dst, src, 512);
-			valid = mcsafe_test_validate(dst, src, 512, expect);
+			copy_mc_test_init(dst, src, 512);
+			rem = copy_mc_fragile(dst, src, 512);
+			valid = copy_mc_test_validate(dst, src, 512, expect);
 			if (rem == expect && valid)
 				continue;
 			pr_info("%s: copy(%#lx, %#lx, %d) off: %d rem: %ld %s expect: %ld\n",
@@ -3380,8 +3381,8 @@ void mcsafe_test(void)
 		}
 	}
 
-	mcsafe_inject_src(NULL);
-	mcsafe_inject_dst(NULL);
+	copy_mc_inject_src(NULL);
+	copy_mc_inject_dst(NULL);
 }
 
 static __init int nfit_test_init(void)
@@ -3392,7 +3393,7 @@ static __init int nfit_test_init(void)
 	libnvdimm_test();
 	acpi_nfit_test();
 	device_dax_test();
-	mcsafe_test();
+	copy_mc_test();
 	dax_pmem_test();
 	dax_pmem_core_test();
 #ifdef CONFIG_DEV_DAX_PMEM_COMPAT
diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c
index 8995092d541e..3b796dd5e577 100644
--- a/tools/testing/radix-tree/idr-test.c
+++ b/tools/testing/radix-tree/idr-test.c
@@ -523,8 +523,27 @@ static void *ida_random_fn(void *arg)
 	return NULL;
 }
 
+static void *ida_leak_fn(void *arg)
+{
+	struct ida *ida = arg;
+	time_t s = time(NULL);
+	int i, ret;
+
+	rcu_register_thread();
+
+	do for (i = 0; i < 1000; i++) {
+		ret = ida_alloc_range(ida, 128, 128, GFP_KERNEL);
+		if (ret >= 0)
+			ida_free(ida, 128);
+	} while (time(NULL) < s + 2);
+
+	rcu_unregister_thread();
+	return NULL;
+}
+
 void ida_thread_tests(void)
 {
+	DEFINE_IDA(ida);
 	pthread_t threads[20];
 	int i;
 
@@ -536,6 +555,16 @@ void ida_thread_tests(void)
 
 	while (i--)
 		pthread_join(threads[i], NULL);
+
+	for (i = 0; i < ARRAY_SIZE(threads); i++)
+		if (pthread_create(&threads[i], NULL, ida_leak_fn, &ida)) {
+			perror("creating ida thread");
+			exit(1);
+		}
+
+	while (i--)
+		pthread_join(threads[i], NULL);
+	assert(ida_is_empty(&ida));
 }
 
 void ida_tests(void)
diff --git a/tools/testing/radix-tree/linux/kernel.h b/tools/testing/radix-tree/linux/kernel.h
index 4568248222ae..39867fd80c8f 100644
--- a/tools/testing/radix-tree/linux/kernel.h
+++ b/tools/testing/radix-tree/linux/kernel.h
@@ -22,4 +22,5 @@
 #define __releases(x)
 #define __must_hold(x)
 
+#define EXPORT_PER_CPU_SYMBOL_GPL(x)
 #endif /* _KERNEL_H */
diff --git a/tools/testing/radix-tree/linux/local_lock.h b/tools/testing/radix-tree/linux/local_lock.h
new file mode 100644
index 000000000000..b3cf8b233ca4
--- /dev/null
+++ b/tools/testing/radix-tree/linux/local_lock.h
@@ -0,0 +1,8 @@
+#ifndef _LINUX_LOCAL_LOCK
+#define _LINUX_LOCAL_LOCK
+typedef struct { } local_lock_t;
+
+static inline void local_lock(local_lock_t *lock) { }
+static inline void local_unlock(local_lock_t *lock) { }
+#define INIT_LOCAL_LOCK(x) { }
+#endif
diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h
index 34dab4d18744..7ef7067e942c 100644
--- a/tools/testing/radix-tree/test.h
+++ b/tools/testing/radix-tree/test.h
@@ -56,8 +56,4 @@ int root_tag_get(struct radix_tree_root *root, unsigned int tag);
 unsigned long node_maxindex(struct radix_tree_node *);
 unsigned long shift_maxindex(unsigned int shift);
 int radix_tree_cpu_dead(unsigned int cpu);
-struct radix_tree_preload {
-	unsigned nr;
-	struct radix_tree_node *nodes;
-};
 extern struct radix_tree_preload radix_tree_preloads;
diff --git a/tools/testing/scatterlist/Makefile b/tools/testing/scatterlist/Makefile
index cbb003d9305e..c65233876622 100644
--- a/tools/testing/scatterlist/Makefile
+++ b/tools/testing/scatterlist/Makefile
@@ -14,7 +14,7 @@ targets: include $(TARGETS)
 main: $(OFILES)
 
 clean:
-	$(RM) $(TARGETS) $(OFILES) scatterlist.c linux/scatterlist.h linux/highmem.h linux/kmemleak.h asm/io.h
+	$(RM) $(TARGETS) $(OFILES) scatterlist.c linux/scatterlist.h linux/highmem.h linux/kmemleak.h linux/slab.h asm/io.h
 	@rmdir asm
 
 scatterlist.c: ../../../lib/scatterlist.c
@@ -28,4 +28,5 @@ include: ../../../include/linux/scatterlist.h
 	@touch asm/io.h
 	@touch linux/highmem.h
 	@touch linux/kmemleak.h
+	@touch linux/slab.h
 	@cp $< linux/scatterlist.h
diff --git a/tools/testing/scatterlist/linux/mm.h b/tools/testing/scatterlist/linux/mm.h
index 6f9ac14aa800..6ae907f375d2 100644
--- a/tools/testing/scatterlist/linux/mm.h
+++ b/tools/testing/scatterlist/linux/mm.h
@@ -114,6 +114,12 @@ static inline void *kmalloc(unsigned int size, unsigned int flags)
 	return malloc(size);
 }
 
+static inline void *
+kmalloc_array(unsigned int n, unsigned int size, unsigned int flags)
+{
+	return malloc(n * size);
+}
+
 #define kfree(x) free(x)
 
 #define kmemleak_alloc(a, b, c, d)
@@ -122,4 +128,33 @@ static inline void *kmalloc(unsigned int size, unsigned int flags)
 #define PageSlab(p) (0)
 #define flush_kernel_dcache_page(p)
 
+#define MAX_ERRNO	4095
+
+#define IS_ERR_VALUE(x) unlikely((unsigned long)(void *)(x) >= (unsigned long)-MAX_ERRNO)
+
+static inline void * __must_check ERR_PTR(long error)
+{
+	return (void *) error;
+}
+
+static inline long __must_check PTR_ERR(__force const void *ptr)
+{
+	return (long) ptr;
+}
+
+static inline bool __must_check IS_ERR(__force const void *ptr)
+{
+	return IS_ERR_VALUE((unsigned long)ptr);
+}
+
+static inline int __must_check PTR_ERR_OR_ZERO(__force const void *ptr)
+{
+	if (IS_ERR(ptr))
+		return PTR_ERR(ptr);
+	else
+		return 0;
+}
+
+#define IS_ENABLED(x) (0)
+
 #endif
diff --git a/tools/testing/scatterlist/main.c b/tools/testing/scatterlist/main.c
index 0a1464181226..b2c7e9f7b8d3 100644
--- a/tools/testing/scatterlist/main.c
+++ b/tools/testing/scatterlist/main.c
@@ -5,6 +5,15 @@
 
 #define MAX_PAGES (64)
 
+struct test {
+	int alloc_ret;
+	unsigned num_pages;
+	unsigned *pfn;
+	unsigned size;
+	unsigned int max_seg;
+	unsigned int expected_segments;
+};
+
 static void set_pages(struct page **pages, const unsigned *array, unsigned num)
 {
 	unsigned int i;
@@ -17,17 +26,32 @@ static void set_pages(struct page **pages, const unsigned *array, unsigned num)
 
 #define pfn(...) (unsigned []){ __VA_ARGS__ }
 
+static void fail(struct test *test, struct sg_table *st, const char *cond)
+{
+	unsigned int i;
+
+	fprintf(stderr, "Failed on '%s'!\n\n", cond);
+
+	printf("size = %u, max segment = %u, expected nents = %u\nst->nents = %u, st->orig_nents= %u\n",
+	       test->size, test->max_seg, test->expected_segments, st->nents,
+	       st->orig_nents);
+
+	printf("%u input PFNs:", test->num_pages);
+	for (i = 0; i < test->num_pages; i++)
+		printf(" %x", test->pfn[i]);
+	printf("\n");
+
+	exit(1);
+}
+
+#define VALIDATE(cond, st, test) \
+	if (!(cond)) \
+		fail((test), (st), #cond);
+
 int main(void)
 {
 	const unsigned int sgmax = SCATTERLIST_MAX_SEGMENT;
-	struct test {
-		int alloc_ret;
-		unsigned num_pages;
-		unsigned *pfn;
-		unsigned size;
-		unsigned int max_seg;
-		unsigned int expected_segments;
-	} *test, tests[] = {
+	struct test *test, tests[] = {
 		{ -EINVAL, 1, pfn(0), PAGE_SIZE, PAGE_SIZE + 1, 1 },
 		{ -EINVAL, 1, pfn(0), PAGE_SIZE, 0, 1 },
 		{ -EINVAL, 1, pfn(0), PAGE_SIZE, sgmax + 1, 1 },
@@ -55,20 +79,19 @@ int main(void)
 	for (i = 0, test = tests; test->expected_segments; test++, i++) {
 		struct page *pages[MAX_PAGES];
 		struct sg_table st;
-		int ret;
+		struct scatterlist *sg;
 
 		set_pages(pages, test->pfn, test->num_pages);
 
-		ret = __sg_alloc_table_from_pages(&st, pages, test->num_pages,
-						  0, test->size, test->max_seg,
-						  GFP_KERNEL);
-		assert(ret == test->alloc_ret);
+		sg = __sg_alloc_table_from_pages(&st, pages, test->num_pages, 0,
+				test->size, test->max_seg, NULL, 0, GFP_KERNEL);
+		assert(PTR_ERR_OR_ZERO(sg) == test->alloc_ret);
 
 		if (test->alloc_ret)
 			continue;
 
-		assert(st.nents == test->expected_segments);
-		assert(st.orig_nents == test->expected_segments);
+		VALIDATE(st.nents == test->expected_segments, &st, test);
+		VALIDATE(st.orig_nents == test->expected_segments, &st, test);
 
 		sg_free_table(&st);
 	}
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 9018f45d631d..d9c283503159 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -88,10 +88,10 @@ endif
 # of the targets gets built.
 FORCE_TARGETS ?=
 
-# Clear LDFLAGS and MAKEFLAGS if called from main
-# Makefile to avoid test build failures when test
-# Makefile doesn't have explicit build rules.
-ifeq (1,$(MAKELEVEL))
+# Clear LDFLAGS and MAKEFLAGS when implicit rules are missing.  This provides
+# implicit rules to sub-test Makefiles which avoids build failures in test
+# Makefile that don't have explicit build rules.
+ifeq (,$(LINK.c))
 override LDFLAGS =
 override MAKEFLAGS =
 endif
@@ -206,6 +206,7 @@ KSFT_INSTALL_PATH := $(abspath $(KSFT_INSTALL_PATH))
 # Avoid changing the rest of the logic here and lib.mk.
 INSTALL_PATH := $(KSFT_INSTALL_PATH)
 ALL_SCRIPT := $(INSTALL_PATH)/run_kselftest.sh
+TEST_LIST := $(INSTALL_PATH)/kselftest-list.txt
 
 install: all
 ifdef INSTALL_PATH
@@ -214,6 +215,8 @@ ifdef INSTALL_PATH
 	install -m 744 kselftest/module.sh $(INSTALL_PATH)/kselftest/
 	install -m 744 kselftest/runner.sh $(INSTALL_PATH)/kselftest/
 	install -m 744 kselftest/prefix.pl $(INSTALL_PATH)/kselftest/
+	install -m 744 run_kselftest.sh $(INSTALL_PATH)/
+	rm -f $(TEST_LIST)
 	@ret=1;	\
 	for TARGET in $(TARGETS); do \
 		BUILD_TARGET=$$BUILD/$$TARGET;	\
@@ -222,33 +225,18 @@ ifdef INSTALL_PATH
 		ret=$$((ret * $$?));		\
 	done; exit $$ret;
 
-	@# Ask all targets to emit their test scripts
-	echo "#!/bin/sh" > $(ALL_SCRIPT)
-	echo "BASE_DIR=\$$(realpath \$$(dirname \$$0))" >> $(ALL_SCRIPT)
-	echo "cd \$$BASE_DIR" >> $(ALL_SCRIPT)
-	echo ". ./kselftest/runner.sh" >> $(ALL_SCRIPT)
-	echo "ROOT=\$$PWD" >> $(ALL_SCRIPT)
-	echo "if [ \"\$$1\" = \"--summary\" ]; then" >> $(ALL_SCRIPT)
-	echo "  logfile=\$$BASE_DIR/output.log" >> $(ALL_SCRIPT)
-	echo "  cat /dev/null > \$$logfile" >> $(ALL_SCRIPT)
-	echo "fi" >> $(ALL_SCRIPT)
 
-	@# While building run_kselftest.sh skip also non-existent TARGET dirs:
+	@# Ask all targets to emit their test scripts
+	@# While building kselftest-list.text skip also non-existent TARGET dirs:
 	@# they could be the result of a build failure and should NOT be
 	@# included in the generated runlist.
 	for TARGET in $(TARGETS); do \
 		BUILD_TARGET=$$BUILD/$$TARGET;	\
 		[ ! -d $(INSTALL_PATH)/$$TARGET ] && echo "Skipping non-existent dir: $$TARGET" && continue; \
-		echo "[ -w /dev/kmsg ] && echo \"kselftest: Running tests in $$TARGET\" >> /dev/kmsg" >> $(ALL_SCRIPT); \
-		echo "cd $$TARGET" >> $(ALL_SCRIPT); \
-		echo -n "run_many" >> $(ALL_SCRIPT); \
 		echo -n "Emit Tests for $$TARGET\n"; \
-		$(MAKE) -s --no-print-directory OUTPUT=$$BUILD_TARGET -C $$TARGET emit_tests >> $(ALL_SCRIPT); \
-		echo "" >> $(ALL_SCRIPT);	    \
-		echo "cd \$$ROOT" >> $(ALL_SCRIPT); \
+		$(MAKE) -s --no-print-directory OUTPUT=$$BUILD_TARGET COLLECTION=$$TARGET \
+			-C $$TARGET emit_tests >> $(TEST_LIST); \
 	done;
-
-	chmod u+x $(ALL_SCRIPT)
 else
 	$(error Error: set INSTALL_PATH to use install)
 endif
diff --git a/tools/testing/selftests/arm64/Makefile b/tools/testing/selftests/arm64/Makefile
index 93b567d23c8b..2c9d012797a7 100644
--- a/tools/testing/selftests/arm64/Makefile
+++ b/tools/testing/selftests/arm64/Makefile
@@ -4,7 +4,7 @@
 ARCH ?= $(shell uname -m 2>/dev/null || echo not)
 
 ifneq (,$(filter $(ARCH),aarch64 arm64))
-ARM64_SUBTARGETS ?= tags signal
+ARM64_SUBTARGETS ?= tags signal pauth fp mte
 else
 ARM64_SUBTARGETS :=
 endif
diff --git a/tools/testing/selftests/arm64/fp/.gitignore b/tools/testing/selftests/arm64/fp/.gitignore
new file mode 100644
index 000000000000..d66f76d2a650
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/.gitignore
@@ -0,0 +1,5 @@
+fpsimd-test
+sve-probe-vls
+sve-ptrace
+sve-test
+vlset
diff --git a/tools/testing/selftests/arm64/fp/Makefile b/tools/testing/selftests/arm64/fp/Makefile
new file mode 100644
index 000000000000..a57009d3a0dc
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/Makefile
@@ -0,0 +1,17 @@
+# SPDX-License-Identifier: GPL-2.0
+
+CFLAGS += -I../../../../../usr/include/
+TEST_GEN_PROGS := sve-ptrace sve-probe-vls
+TEST_PROGS_EXTENDED := fpsimd-test fpsimd-stress sve-test sve-stress vlset
+
+all: $(TEST_GEN_PROGS) $(TEST_PROGS_EXTENDED)
+
+fpsimd-test: fpsimd-test.o
+	$(CC) -nostdlib $^ -o $@
+sve-ptrace: sve-ptrace.o sve-ptrace-asm.o
+sve-probe-vls: sve-probe-vls.o
+sve-test: sve-test.o
+	$(CC) -nostdlib $^ -o $@
+vlset: vlset.o
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/arm64/fp/README b/tools/testing/selftests/arm64/fp/README
new file mode 100644
index 000000000000..03e3dad865d8
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/README
@@ -0,0 +1,100 @@
+This directory contains a mix of tests integrated with kselftest and
+standalone stress tests.
+
+kselftest tests
+===============
+
+sve-probe-vls - Checks the SVE vector length enumeration interface
+sve-ptrace - Checks the SVE ptrace interface
+
+Running the non-kselftest tests
+===============================
+
+sve-stress performs an SVE context switch stress test, as described
+below.
+
+(The fpsimd-stress test works the same way; just substitute "fpsimd" for
+"sve" in the following commands.)
+
+
+The test runs until killed by the user.
+
+If no context switch error was detected, you will see output such as
+the following:
+
+$ ./sve-stress
+(wait for some time)
+^C
+Vector length:        512 bits
+PID:    1573
+Terminated by signal 15, no error, iterations=9467, signals=1014
+Vector length:  512 bits
+PID:    1575
+Terminated by signal 15, no error, iterations=9448, signals=1028
+Vector length:  512 bits
+PID:    1577
+Terminated by signal 15, no error, iterations=9436, signals=1039
+Vector length:  512 bits
+PID:    1579
+Terminated by signal 15, no error, iterations=9421, signals=1039
+Vector length:  512 bits
+PID:    1581
+Terminated by signal 15, no error, iterations=9403, signals=1039
+Vector length:  512 bits
+PID:    1583
+Terminated by signal 15, no error, iterations=9385, signals=1036
+Vector length:  512 bits
+PID:    1585
+Terminated by signal 15, no error, iterations=9376, signals=1039
+Vector length:  512 bits
+PID:    1587
+Terminated by signal 15, no error, iterations=9361, signals=1039
+Vector length:  512 bits
+PID:    1589
+Terminated by signal 15, no error, iterations=9350, signals=1039
+
+
+If an error was detected, details of the mismatch will be printed
+instead of "no error".
+
+Ideally, the test should be allowed to run for many minutes or hours
+to maximise test coverage.
+
+
+KVM stress testing
+==================
+
+To try to reproduce the bugs that we have been observing, sve-stress
+should be run in parallel in two KVM guests, while simultaneously
+running on the host.
+
+1) Start 2 guests, using the following command for each:
+
+$ lkvm run --console=virtio -pconsole=hvc0 --sve Image
+
+(Depending on the hardware GIC implementation, you may also need
+--irqchip=gicv3.  New kvmtool defaults to that if appropriate, but I
+can't remember whether my branch is new enough for that.  Try without
+the option first.)
+
+Kvmtool occupies the terminal until you kill it (Ctrl+A x),
+or until the guest terminates.  It is therefore recommended to run
+each instance in separate terminal (use screen or ssh etc.)  This
+allows multiple guests to be run in parallel while running other
+commands on the host.
+
+Within the guest, the host filesystem is accessible, mounted on /host.
+
+2) Run the sve-stress on *each* guest with the Vector-Length set to 32:
+guest$ ./vlset --inherit 32 ./sve-stress
+
+3) Run the sve-stress on the host with the maximum Vector-Length:
+host$ ./vlset --inherit --max ./sve-stress
+
+
+Again, the test should be allowed to run for many minutes or hours to
+maximise test coverage.
+
+If no error is detected, you will see output from each sve-stress
+instance similar to that illustrated above; otherwise details of the
+observed mismatches will be printed.
diff --git a/tools/testing/selftests/arm64/fp/asm-offsets.h b/tools/testing/selftests/arm64/fp/asm-offsets.h
new file mode 100644
index 000000000000..a180851496ec
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/asm-offsets.h
@@ -0,0 +1,11 @@
+#define sa_sz 32
+#define sa_flags 8
+#define sa_handler 0
+#define sa_mask_sz 8
+#define SIGUSR1 10
+#define SIGTERM 15
+#define SIGINT 2
+#define SIGABRT 6
+#define SA_NODEFER 1073741824
+#define SA_SIGINFO 4
+#define ucontext_regs 184
diff --git a/tools/testing/selftests/arm64/fp/assembler.h b/tools/testing/selftests/arm64/fp/assembler.h
new file mode 100644
index 000000000000..8944f2189206
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/assembler.h
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2015-2019 ARM Limited.
+// Original author: Dave Martin <Dave.Martin@arm.com>
+
+#ifndef ASSEMBLER_H
+#define ASSEMBLER_H
+
+.macro __for from:req, to:req
+	.if (\from) == (\to)
+		_for__body %\from
+	.else
+		__for \from, %(\from) + ((\to) - (\from)) / 2
+		__for %(\from) + ((\to) - (\from)) / 2 + 1, \to
+	.endif
+.endm
+
+.macro _for var:req, from:req, to:req, insn:vararg
+	.macro _for__body \var:req
+		.noaltmacro
+		\insn
+		.altmacro
+	.endm
+
+	.altmacro
+	__for \from, \to
+	.noaltmacro
+
+	.purgem _for__body
+.endm
+
+.macro function name
+	.macro endfunction
+		.type \name, @function
+		.purgem endfunction
+	.endm
+\name:
+.endm
+
+.macro define_accessor name, num, insn
+	.macro \name\()_entry n
+		\insn \n, 1
+		ret
+	.endm
+
+function \name
+	adr	x2, .L__accessor_tbl\@
+	add	x2, x2, x0, lsl #3
+	br	x2
+
+.L__accessor_tbl\@:
+	_for x, 0, (\num) - 1, \name\()_entry \x
+endfunction
+
+	.purgem \name\()_entry
+.endm
+
+#endif /* ! ASSEMBLER_H */
diff --git a/tools/testing/selftests/arm64/fp/fpsimd-stress b/tools/testing/selftests/arm64/fp/fpsimd-stress
new file mode 100755
index 000000000000..781b5b022eaf
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/fpsimd-stress
@@ -0,0 +1,60 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only
+# Copyright (C) 2015-2019 ARM Limited.
+# Original author: Dave Martin <Dave.Martin@arm.com>
+
+set -ue
+
+NR_CPUS=`nproc`
+
+pids=
+logs=
+
+cleanup () {
+	trap - INT TERM CHLD
+	set +e
+
+	if [ -n "$pids" ]; then
+		kill $pids
+		wait $pids
+		pids=
+	fi
+
+	if [ -n "$logs" ]; then
+		cat $logs
+		rm $logs
+		logs=
+	fi
+}
+
+interrupt () {
+	cleanup
+	exit 0
+}
+
+child_died () {
+	cleanup
+	exit 1
+}
+
+trap interrupt INT TERM EXIT
+trap child_died CHLD
+
+for x in `seq 0 $((NR_CPUS * 4))`; do
+	log=`mktemp`
+	logs=$logs\ $log
+	./fpsimd-test >$log &
+	pids=$pids\ $!
+done
+
+# Wait for all child processes to be created:
+sleep 10
+
+while :; do
+	kill -USR1 $pids
+done &
+pids=$pids\ $!
+
+wait
+
+exit 1
diff --git a/tools/testing/selftests/arm64/fp/fpsimd-test.S b/tools/testing/selftests/arm64/fp/fpsimd-test.S
new file mode 100644
index 000000000000..1c5556bdd11d
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/fpsimd-test.S
@@ -0,0 +1,482 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2015-2019 ARM Limited.
+// Original author: Dave Martin <Dave.Martin@arm.com>
+//
+// Simple FPSIMD context switch test
+// Repeatedly writes unique test patterns into each FPSIMD register
+// and reads them back to verify integrity.
+//
+// for x in `seq 1 NR_CPUS`; do fpsimd-test & pids=$pids\ $! ; done
+// (leave it running for as long as you want...)
+// kill $pids
+
+#include <asm/unistd.h>
+#include "assembler.h"
+#include "asm-offsets.h"
+
+#define NVR	32
+#define MAXVL_B	(128 / 8)
+
+.macro _vldr Vn:req, Xt:req
+	ld1	{v\Vn\().2d}, [x\Xt]
+.endm
+
+.macro _vstr Vn:req, Xt:req
+	st1	{v\Vn\().2d}, [x\Xt]
+.endm
+
+// Generate accessor functions to read/write programmatically selected
+// FPSIMD registers.
+// x0 is the register index to access
+// x1 is the memory address to read from (getv,setp) or store to (setv,setp)
+// All clobber x0-x2
+define_accessor setv, NVR, _vldr
+define_accessor getv, NVR, _vstr
+
+// Print a single character x0 to stdout
+// Clobbers x0-x2,x8
+function putc
+	str	x0, [sp, #-16]!
+
+	mov	x0, #1			// STDOUT_FILENO
+	mov	x1, sp
+	mov	x2, #1
+	mov	x8, #__NR_write
+	svc	#0
+
+	add	sp, sp, #16
+	ret
+endfunction
+
+// Print a NUL-terminated string starting at address x0 to stdout
+// Clobbers x0-x3,x8
+function puts
+	mov	x1, x0
+
+	mov	x2, #0
+0:	ldrb	w3, [x0], #1
+	cbz	w3, 1f
+	add	x2, x2, #1
+	b	0b
+
+1:	mov	w0, #1			// STDOUT_FILENO
+	mov	x8, #__NR_write
+	svc	#0
+
+	ret
+endfunction
+
+// Utility macro to print a literal string
+// Clobbers x0-x4,x8
+.macro puts string
+	.pushsection .rodata.str1.1, "aMS", 1
+.L__puts_literal\@: .string "\string"
+	.popsection
+
+	ldr	x0, =.L__puts_literal\@
+	bl	puts
+.endm
+
+// Print an unsigned decimal number x0 to stdout
+// Clobbers x0-x4,x8
+function putdec
+	mov	x1, sp
+	str	x30, [sp, #-32]!	// Result can't be > 20 digits
+
+	mov	x2, #0
+	strb	w2, [x1, #-1]!		// Write the NUL terminator
+
+	mov	x2, #10
+0:	udiv	x3, x0, x2		// div-mod loop to generate the digits
+	msub	x0, x3, x2, x0
+	add	w0, w0, #'0'
+	strb	w0, [x1, #-1]!
+	mov	x0, x3
+	cbnz	x3, 0b
+
+	ldrb	w0, [x1]
+	cbnz	w0, 1f
+	mov	w0, #'0'		// Print "0" for 0, not ""
+	strb	w0, [x1, #-1]!
+
+1:	mov	x0, x1
+	bl	puts
+
+	ldr	x30, [sp], #32
+	ret
+endfunction
+
+// Print an unsigned decimal number x0 to stdout, followed by a newline
+// Clobbers x0-x5,x8
+function putdecn
+	mov	x5, x30
+
+	bl	putdec
+	mov	x0, #'\n'
+	bl	putc
+
+	ret	x5
+endfunction
+
+
+// Clobbers x0-x3,x8
+function puthexb
+	str	x30, [sp, #-0x10]!
+
+	mov	w3, w0
+	lsr	w0, w0, #4
+	bl	puthexnibble
+	mov	w0, w3
+
+	ldr	x30, [sp], #0x10
+	// fall through to puthexnibble
+endfunction
+// Clobbers x0-x2,x8
+function puthexnibble
+	and	w0, w0, #0xf
+	cmp	w0, #10
+	blo	1f
+	add	w0, w0, #'a' - ('9' + 1)
+1:	add	w0, w0, #'0'
+	b	putc
+endfunction
+
+// x0=data in, x1=size in, clobbers x0-x5,x8
+function dumphex
+	str	x30, [sp, #-0x10]!
+
+	mov	x4, x0
+	mov	x5, x1
+
+0:	subs	x5, x5, #1
+	b.lo	1f
+	ldrb	w0, [x4], #1
+	bl	puthexb
+	b	0b
+
+1:	ldr	x30, [sp], #0x10
+	ret
+endfunction
+
+// Declare some storate space to shadow the SVE register contents:
+.pushsection .text
+.data
+.align 4
+vref:
+	.space	MAXVL_B * NVR
+scratch:
+	.space	MAXVL_B
+.popsection
+
+// Trivial memory copy: copy x2 bytes, starting at address x1, to address x0.
+// Clobbers x0-x3
+function memcpy
+	cmp	x2, #0
+	b.eq	1f
+0:	ldrb	w3, [x1], #1
+	strb	w3, [x0], #1
+	subs	x2, x2, #1
+	b.ne	0b
+1:	ret
+endfunction
+
+// Generate a test pattern for storage in SVE registers
+// x0: pid	(16 bits)
+// x1: register number (6 bits)
+// x2: generation (4 bits)
+function pattern
+	orr	w1, w0, w1, lsl #16
+	orr	w2, w1, w2, lsl #28
+
+	ldr	x0, =scratch
+	mov	w1, #MAXVL_B / 4
+
+0:	str	w2, [x0], #4
+	add	w2, w2, #(1 << 22)
+	subs	w1, w1, #1
+	bne	0b
+
+	ret
+endfunction
+
+// Get the address of shadow data for FPSIMD V-register V<xn>
+.macro _adrv xd, xn, nrtmp
+	ldr	\xd, =vref
+	mov	x\nrtmp, #16
+	madd	\xd, x\nrtmp, \xn, \xd
+.endm
+
+// Set up test pattern in a FPSIMD V-register
+// x0: pid
+// x1: register number
+// x2: generation
+function setup_vreg
+	mov	x4, x30
+
+	mov	x6, x1
+	bl	pattern
+	_adrv	x0, x6, 2
+	mov	x5, x0
+	ldr	x1, =scratch
+	bl	memcpy
+
+	mov	x0, x6
+	mov	x1, x5
+	bl	setv
+
+	ret	x4
+endfunction
+
+// Fill x1 bytes starting at x0 with 0xae (for canary purposes)
+// Clobbers x1, x2.
+function memfill_ae
+	mov	w2, #0xae
+	b	memfill
+endfunction
+
+// Fill x1 bytes starting at x0 with 0.
+// Clobbers x1, x2.
+function memclr
+	mov	w2, #0
+endfunction
+	// fall through to memfill
+
+// Trivial memory fill: fill x1 bytes starting at address x0 with byte w2
+// Clobbers x1
+function memfill
+	cmp	x1, #0
+	b.eq	1f
+
+0:	strb	w2, [x0], #1
+	subs	x1, x1, #1
+	b.ne	0b
+
+1:	ret
+endfunction
+
+// Trivial memory compare: compare x2 bytes starting at address x0 with
+// bytes starting at address x1.
+// Returns only if all bytes match; otherwise, the program is aborted.
+// Clobbers x0-x5.
+function memcmp
+	cbz	x2, 1f
+
+	mov	x5, #0
+0:	ldrb	w3, [x0, x5]
+	ldrb	w4, [x1, x5]
+	add	x5, x5, #1
+	cmp	w3, w4
+	b.ne	barf
+	subs	x2, x2, #1
+	b.ne	0b
+
+1:	ret
+endfunction
+
+// Verify that a FPSIMD V-register matches its shadow in memory, else abort
+// x0: reg number
+// Clobbers x0-x5.
+function check_vreg
+	mov	x3, x30
+
+	_adrv	x5, x0, 6
+	mov	x4, x0
+	ldr	x7, =scratch
+
+	mov	x0, x7
+	mov	x1, x6
+	bl	memfill_ae
+
+	mov	x0, x4
+	mov	x1, x7
+	bl	getv
+
+	mov	x0, x5
+	mov	x1, x7
+	mov	x2, x6
+	mov	x30, x3
+	b	memcmp
+endfunction
+
+// Any SVE register modified here can cause corruption in the main
+// thread -- but *only* the registers modified here.
+function irritator_handler
+	// Increment the irritation signal count (x23):
+	ldr	x0, [x2, #ucontext_regs + 8 * 23]
+	add	x0, x0, #1
+	str	x0, [x2, #ucontext_regs + 8 * 23]
+
+	// Corrupt some random V-regs
+	adr	x0, .text + (irritator_handler - .text) / 16 * 16
+	movi	v0.8b, #7
+	movi	v9.16b, #9
+	movi	v31.8b, #31
+
+	ret
+endfunction
+
+function terminate_handler
+	mov	w21, w0
+	mov	x20, x2
+
+	puts	"Terminated by signal "
+	mov	w0, w21
+	bl	putdec
+	puts	", no error, iterations="
+	ldr	x0, [x20, #ucontext_regs + 8 * 22]
+	bl	putdec
+	puts	", signals="
+	ldr	x0, [x20, #ucontext_regs + 8 * 23]
+	bl	putdecn
+
+	mov	x0, #0
+	mov	x8, #__NR_exit
+	svc	#0
+endfunction
+
+// w0: signal number
+// x1: sa_action
+// w2: sa_flags
+// Clobbers x0-x6,x8
+function setsignal
+	str	x30, [sp, #-((sa_sz + 15) / 16 * 16 + 16)]!
+
+	mov	w4, w0
+	mov	x5, x1
+	mov	w6, w2
+
+	add	x0, sp, #16
+	mov	x1, #sa_sz
+	bl	memclr
+
+	mov	w0, w4
+	add	x1, sp, #16
+	str	w6, [x1, #sa_flags]
+	str	x5, [x1, #sa_handler]
+	mov	x2, #0
+	mov	x3, #sa_mask_sz
+	mov	x8, #__NR_rt_sigaction
+	svc	#0
+
+	cbz	w0, 1f
+
+	puts	"sigaction failure\n"
+	b	.Labort
+
+1:	ldr	x30, [sp], #((sa_sz + 15) / 16 * 16 + 16)
+	ret
+endfunction
+
+// Main program entry point
+.globl _start
+function _start
+_start:
+	// Sanity-check and report the vector length
+
+	mov	x19, #128
+	cmp	x19, #128
+	b.lo	1f
+	cmp	x19, #2048
+	b.hi	1f
+	tst	x19, #(8 - 1)
+	b.eq	2f
+
+1:	puts	"Bad vector length: "
+	mov	x0, x19
+	bl	putdecn
+	b	.Labort
+
+2:	puts	"Vector length:\t"
+	mov	x0, x19
+	bl	putdec
+	puts	" bits\n"
+
+	// Obtain our PID, to ensure test pattern uniqueness between processes
+
+	mov	x8, #__NR_getpid
+	svc	#0
+	mov	x20, x0
+
+	puts	"PID:\t"
+	mov	x0, x20
+	bl	putdecn
+
+	mov	x23, #0		// Irritation signal count
+
+	mov	w0, #SIGINT
+	adr	x1, terminate_handler
+	mov	w2, #SA_SIGINFO
+	bl	setsignal
+
+	mov	w0, #SIGTERM
+	adr	x1, terminate_handler
+	mov	w2, #SA_SIGINFO
+	bl	setsignal
+
+	mov	w0, #SIGUSR1
+	adr	x1, irritator_handler
+	mov	w2, #SA_SIGINFO
+	orr	w2, w2, #SA_NODEFER
+	bl	setsignal
+
+	mov	x22, #0		// generation number, increments per iteration
+.Ltest_loop:
+
+	mov	x21, #0		// Set up V-regs & shadow with test pattern
+0:	mov	x0, x20
+	mov	x1, x21
+	and	x2, x22, #0xf
+	bl	setup_vreg
+	add	x21, x21, #1
+	cmp	x21, #NVR
+	b.lo	0b
+
+// Can't do this when SVE state is volatile across SVC:
+	mov	x8, #__NR_sched_yield	// Encourage preemption
+	svc	#0
+
+	mov	x21, #0
+0:	mov	x0, x21
+	bl	check_vreg
+	add	x21, x21, #1
+	cmp	x21, #NVR
+	b.lo	0b
+
+	add	x22, x22, #1
+	b	.Ltest_loop
+
+.Labort:
+	mov	x0, #0
+	mov	x1, #SIGABRT
+	mov	x8, #__NR_kill
+	svc	#0
+endfunction
+
+function barf
+	mov	x10, x0	// expected data
+	mov	x11, x1	// actual data
+	mov	x12, x2	// data size
+
+	puts	"Mistatch: PID="
+	mov	x0, x20
+	bl	putdec
+	puts	", iteration="
+	mov	x0, x22
+	bl	putdec
+	puts	", reg="
+	mov	x0, x21
+	bl	putdecn
+	puts	"\tExpected ["
+	mov	x0, x10
+	mov	x1, x12
+	bl	dumphex
+	puts	"]\n\tGot      ["
+	mov	x0, x11
+	mov	x1, x12
+	bl	dumphex
+	puts	"]\n"
+
+	mov	x8, #__NR_exit
+	mov	x1, #1
+	svc	#0
+endfunction
diff --git a/tools/testing/selftests/arm64/fp/sve-probe-vls.c b/tools/testing/selftests/arm64/fp/sve-probe-vls.c
new file mode 100644
index 000000000000..b29cbc642c57
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/sve-probe-vls.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2015-2020 ARM Limited.
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+#include <assert.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/auxv.h>
+#include <sys/prctl.h>
+#include <asm/sigcontext.h>
+
+#include "../../kselftest.h"
+
+int main(int argc, char **argv)
+{
+	unsigned int vq;
+	int vl;
+	static unsigned int vqs[SVE_VQ_MAX];
+	unsigned int nvqs = 0;
+
+	ksft_print_header();
+	ksft_set_plan(2);
+
+	if (!(getauxval(AT_HWCAP) & HWCAP_SVE))
+		ksft_exit_skip("SVE not available");
+
+	/*
+	 * Enumerate up to SVE_VQ_MAX vector lengths
+	 */
+	for (vq = SVE_VQ_MAX; vq > 0; --vq) {
+		vl = prctl(PR_SVE_SET_VL, vq * 16);
+		if (vl == -1)
+			ksft_exit_fail_msg("PR_SVE_SET_VL failed: %s (%d)\n",
+					   strerror(errno), errno);
+
+		vl &= PR_SVE_VL_LEN_MASK;
+
+		if (!sve_vl_valid(vl))
+			ksft_exit_fail_msg("VL %d invalid\n", vl);
+		vq = sve_vq_from_vl(vl);
+
+		if (!(nvqs < SVE_VQ_MAX))
+			ksft_exit_fail_msg("Too many VLs %u >= SVE_VQ_MAX\n",
+					   nvqs);
+		vqs[nvqs++] = vq;
+	}
+	ksft_test_result_pass("Enumerated %d vector lengths\n", nvqs);
+	ksft_test_result_pass("All vector lengths valid\n");
+
+	/* Print out the vector lengths in ascending order: */
+	while (nvqs--)
+		ksft_print_msg("%u\n", 16 * vqs[nvqs]);
+
+	ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace-asm.S b/tools/testing/selftests/arm64/fp/sve-ptrace-asm.S
new file mode 100644
index 000000000000..3e81f9fab574
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/sve-ptrace-asm.S
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2015-2019 ARM Limited.
+// Original author: Dave Martin <Dave.Martin@arm.com>
+#include <asm/unistd.h>
+
+.arch_extension sve
+
+.globl sve_store_patterns
+
+sve_store_patterns:
+	mov	x1, x0
+
+	index	z0.b, #0, #1
+	str	q0, [x1]
+
+	mov	w8, #__NR_getpid
+	svc	#0
+	str	q0, [x1, #0x10]
+
+	mov	z1.d, z0.d
+	str	q0, [x1, #0x20]
+
+	mov	w8, #__NR_getpid
+	svc	#0
+	str	q0, [x1, #0x30]
+
+	mov	z1.d, z0.d
+	str	q0, [x1, #0x40]
+
+	ret
+
+.size	sve_store_patterns, . - sve_store_patterns
+.type	sve_store_patterns, @function
diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c
new file mode 100644
index 000000000000..b2282be6f938
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c
@@ -0,0 +1,336 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2015-2020 ARM Limited.
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+#include <errno.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/auxv.h>
+#include <sys/ptrace.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <sys/wait.h>
+#include <asm/sigcontext.h>
+#include <asm/ptrace.h>
+
+#include "../../kselftest.h"
+
+/* <linux/elf.h> and <sys/auxv.h> don't like each other, so: */
+#ifndef NT_ARM_SVE
+#define NT_ARM_SVE 0x405
+#endif
+
+/* Number of registers filled in by sve_store_patterns */
+#define NR_VREGS 5
+
+void sve_store_patterns(__uint128_t v[NR_VREGS]);
+
+static void dump(const void *buf, size_t size)
+{
+	size_t i;
+	const unsigned char *p = buf;
+
+	for (i = 0; i < size; ++i)
+		printf(" %.2x", *p++);
+}
+
+static int check_vregs(const __uint128_t vregs[NR_VREGS])
+{
+	int i;
+	int ok = 1;
+
+	for (i = 0; i < NR_VREGS; ++i) {
+		printf("# v[%d]:", i);
+		dump(&vregs[i], sizeof vregs[i]);
+		putchar('\n');
+
+		if (vregs[i] != vregs[0])
+			ok = 0;
+	}
+
+	return ok;
+}
+
+static int do_child(void)
+{
+	if (ptrace(PTRACE_TRACEME, -1, NULL, NULL))
+		ksft_exit_fail_msg("PTRACE_TRACEME", strerror(errno));
+
+	if (raise(SIGSTOP))
+		ksft_exit_fail_msg("raise(SIGSTOP)", strerror(errno));
+
+	return EXIT_SUCCESS;
+}
+
+static struct user_sve_header *get_sve(pid_t pid, void **buf, size_t *size)
+{
+	struct user_sve_header *sve;
+	void *p;
+	size_t sz = sizeof *sve;
+	struct iovec iov;
+
+	while (1) {
+		if (*size < sz) {
+			p = realloc(*buf, sz);
+			if (!p) {
+				errno = ENOMEM;
+				goto error;
+			}
+
+			*buf = p;
+			*size = sz;
+		}
+
+		iov.iov_base = *buf;
+		iov.iov_len = sz;
+		if (ptrace(PTRACE_GETREGSET, pid, NT_ARM_SVE, &iov))
+			goto error;
+
+		sve = *buf;
+		if (sve->size <= sz)
+			break;
+
+		sz = sve->size;
+	}
+
+	return sve;
+
+error:
+	return NULL;
+}
+
+static int set_sve(pid_t pid, const struct user_sve_header *sve)
+{
+	struct iovec iov;
+
+	iov.iov_base = (void *)sve;
+	iov.iov_len = sve->size;
+	return ptrace(PTRACE_SETREGSET, pid, NT_ARM_SVE, &iov);
+}
+
+static void dump_sve_regs(const struct user_sve_header *sve, unsigned int num,
+			  unsigned int vlmax)
+{
+	unsigned int vq;
+	unsigned int i;
+
+	if ((sve->flags & SVE_PT_REGS_MASK) != SVE_PT_REGS_SVE)
+		ksft_exit_fail_msg("Dumping non-SVE register\n");
+
+	if (vlmax > sve->vl)
+		vlmax = sve->vl;
+
+	vq = sve_vq_from_vl(sve->vl);
+	for (i = 0; i < num; ++i) {
+		printf("# z%u:", i);
+		dump((const char *)sve + SVE_PT_SVE_ZREG_OFFSET(vq, i),
+		     vlmax);
+		printf("%s\n", vlmax == sve->vl ? "" : " ...");
+	}
+}
+
+static int do_parent(pid_t child)
+{
+	int ret = EXIT_FAILURE;
+	pid_t pid;
+	int status;
+	siginfo_t si;
+	void *svebuf = NULL, *newsvebuf;
+	size_t svebufsz = 0, newsvebufsz;
+	struct user_sve_header *sve, *new_sve;
+	struct user_fpsimd_state *fpsimd;
+	unsigned int i, j;
+	unsigned char *p;
+	unsigned int vq;
+
+	/* Attach to the child */
+	while (1) {
+		int sig;
+
+		pid = wait(&status);
+		if (pid == -1) {
+			perror("wait");
+			goto error;
+		}
+
+		/*
+		 * This should never happen but it's hard to flag in
+		 * the framework.
+		 */
+		if (pid != child)
+			continue;
+
+		if (WIFEXITED(status) || WIFSIGNALED(status))
+			ksft_exit_fail_msg("Child died unexpectedly\n");
+
+		ksft_test_result(WIFSTOPPED(status), "WIFSTOPPED(%d)\n",
+				 status);
+		if (!WIFSTOPPED(status))
+			goto error;
+
+		sig = WSTOPSIG(status);
+
+		if (ptrace(PTRACE_GETSIGINFO, pid, NULL, &si)) {
+			if (errno == ESRCH)
+				goto disappeared;
+
+			if (errno == EINVAL) {
+				sig = 0; /* bust group-stop */
+				goto cont;
+			}
+
+			ksft_test_result_fail("PTRACE_GETSIGINFO: %s\n",
+					      strerror(errno));
+			goto error;
+		}
+
+		if (sig == SIGSTOP && si.si_code == SI_TKILL &&
+		    si.si_pid == pid)
+			break;
+
+	cont:
+		if (ptrace(PTRACE_CONT, pid, NULL, sig)) {
+			if (errno == ESRCH)
+				goto disappeared;
+
+			ksft_test_result_fail("PTRACE_CONT: %s\n",
+					      strerror(errno));
+			goto error;
+		}
+	}
+
+	sve = get_sve(pid, &svebuf, &svebufsz);
+	if (!sve) {
+		int e = errno;
+
+		ksft_test_result_fail("get_sve: %s\n", strerror(errno));
+		if (e == ESRCH)
+			goto disappeared;
+
+		goto error;
+	} else {
+		ksft_test_result_pass("get_sve\n");
+	}
+
+	ksft_test_result((sve->flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD,
+			 "FPSIMD registers\n");
+	if ((sve->flags & SVE_PT_REGS_MASK) != SVE_PT_REGS_FPSIMD)
+		goto error;
+
+	fpsimd = (struct user_fpsimd_state *)((char *)sve +
+					      SVE_PT_FPSIMD_OFFSET);
+	for (i = 0; i < 32; ++i) {
+		p = (unsigned char *)&fpsimd->vregs[i];
+
+		for (j = 0; j < sizeof fpsimd->vregs[i]; ++j)
+			p[j] = j;
+	}
+
+	if (set_sve(pid, sve)) {
+		int e = errno;
+
+		ksft_test_result_fail("set_sve(FPSIMD): %s\n",
+				      strerror(errno));
+		if (e == ESRCH)
+			goto disappeared;
+
+		goto error;
+	}
+
+	vq = sve_vq_from_vl(sve->vl);
+
+	newsvebufsz = SVE_PT_SVE_ZREG_OFFSET(vq, 1);
+	new_sve = newsvebuf = malloc(newsvebufsz);
+	if (!new_sve) {
+		errno = ENOMEM;
+		perror(NULL);
+		goto error;
+	}
+
+	*new_sve = *sve;
+	new_sve->flags &= ~SVE_PT_REGS_MASK;
+	new_sve->flags |= SVE_PT_REGS_SVE;
+	memset((char *)new_sve + SVE_PT_SVE_ZREG_OFFSET(vq, 0),
+	       0, SVE_PT_SVE_ZREG_SIZE(vq));
+	new_sve->size = SVE_PT_SVE_ZREG_OFFSET(vq, 1);
+	if (set_sve(pid, new_sve)) {
+		int e = errno;
+
+		ksft_test_result_fail("set_sve(ZREG): %s\n", strerror(errno));
+		if (e == ESRCH)
+			goto disappeared;
+
+		goto error;
+	}
+
+	new_sve = get_sve(pid, &newsvebuf, &newsvebufsz);
+	if (!new_sve) {
+		int e = errno;
+
+		ksft_test_result_fail("get_sve(ZREG): %s\n", strerror(errno));
+		if (e == ESRCH)
+			goto disappeared;
+
+		goto error;
+	}
+
+	ksft_test_result((new_sve->flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_SVE,
+			 "SVE registers\n");
+	if ((new_sve->flags & SVE_PT_REGS_MASK) != SVE_PT_REGS_SVE)
+		goto error;
+
+	dump_sve_regs(new_sve, 3, sizeof fpsimd->vregs[0]);
+
+	p = (unsigned char *)new_sve + SVE_PT_SVE_ZREG_OFFSET(vq, 1);
+	for (i = 0; i < sizeof fpsimd->vregs[0]; ++i) {
+		unsigned char expected = i;
+
+		if (__BYTE_ORDER == __BIG_ENDIAN)
+			expected = sizeof fpsimd->vregs[0] - 1 - expected;
+
+		ksft_test_result(p[i] == expected, "p[%d] == expected\n", i);
+		if (p[i] != expected)
+			goto error;
+	}
+
+	ret = EXIT_SUCCESS;
+
+error:
+	kill(child, SIGKILL);
+
+disappeared:
+	return ret;
+}
+
+int main(void)
+{
+	int ret = EXIT_SUCCESS;
+	__uint128_t v[NR_VREGS];
+	pid_t child;
+
+	ksft_print_header();
+	ksft_set_plan(20);
+
+	if (!(getauxval(AT_HWCAP) & HWCAP_SVE))
+		ksft_exit_skip("SVE not available\n");
+
+	sve_store_patterns(v);
+
+	if (!check_vregs(v))
+		ksft_exit_fail_msg("Initial check_vregs() failed\n");
+
+	child = fork();
+	if (!child)
+		return do_child();
+
+	if (do_parent(child))
+		ret = EXIT_FAILURE;
+
+	ksft_print_cnts();
+
+	return 0;
+}
diff --git a/tools/testing/selftests/arm64/fp/sve-stress b/tools/testing/selftests/arm64/fp/sve-stress
new file mode 100755
index 000000000000..24dd0922cc02
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/sve-stress
@@ -0,0 +1,59 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only
+# Copyright (C) 2015-2019 ARM Limited.
+# Original author: Dave Martin <Dave.Martin@arm.com>
+
+set -ue
+
+NR_CPUS=`nproc`
+
+pids=
+logs=
+
+cleanup () {
+	trap - INT TERM CHLD
+	set +e
+
+	if [ -n "$pids" ]; then
+		kill $pids
+		wait $pids
+		pids=
+	fi
+
+	if [ -n "$logs" ]; then
+		cat $logs
+		rm $logs
+		logs=
+	fi
+}
+
+interrupt () {
+	cleanup
+	exit 0
+}
+
+child_died () {
+	cleanup
+	exit 1
+}
+
+trap interrupt INT TERM EXIT
+
+for x in `seq 0 $((NR_CPUS * 4))`; do
+	log=`mktemp`
+	logs=$logs\ $log
+	./sve-test >$log &
+	pids=$pids\ $!
+done
+
+# Wait for all child processes to be created:
+sleep 10
+
+while :; do
+	kill -USR1 $pids
+done &
+pids=$pids\ $!
+
+wait
+
+exit 1
diff --git a/tools/testing/selftests/arm64/fp/sve-test.S b/tools/testing/selftests/arm64/fp/sve-test.S
new file mode 100644
index 000000000000..f95074c9b48b
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/sve-test.S
@@ -0,0 +1,672 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2015-2019 ARM Limited.
+// Original author: Dave Martin <Dave.Martin@arm.com>
+//
+// Simple Scalable Vector Extension context switch test
+// Repeatedly writes unique test patterns into each SVE register
+// and reads them back to verify integrity.
+//
+// for x in `seq 1 NR_CPUS`; do sve-test & pids=$pids\ $! ; done
+// (leave it running for as long as you want...)
+// kill $pids
+
+#include <asm/unistd.h>
+#include "assembler.h"
+#include "asm-offsets.h"
+
+#define NZR	32
+#define NPR	16
+#define MAXVL_B	(2048 / 8)
+
+.arch_extension sve
+
+.macro _sve_ldr_v zt, xn
+	ldr	z\zt, [x\xn]
+.endm
+
+.macro _sve_str_v zt, xn
+	str	z\zt, [x\xn]
+.endm
+
+.macro _sve_ldr_p pt, xn
+	ldr	p\pt, [x\xn]
+.endm
+
+.macro _sve_str_p pt, xn
+	str	p\pt, [x\xn]
+.endm
+
+// Generate accessor functions to read/write programmatically selected
+// SVE registers.
+// x0 is the register index to access
+// x1 is the memory address to read from (getz,setp) or store to (setz,setp)
+// All clobber x0-x2
+define_accessor setz, NZR, _sve_ldr_v
+define_accessor getz, NZR, _sve_str_v
+define_accessor setp, NPR, _sve_ldr_p
+define_accessor getp, NPR, _sve_str_p
+
+// Print a single character x0 to stdout
+// Clobbers x0-x2,x8
+function putc
+	str	x0, [sp, #-16]!
+
+	mov	x0, #1			// STDOUT_FILENO
+	mov	x1, sp
+	mov	x2, #1
+	mov	x8, #__NR_write
+	svc	#0
+
+	add	sp, sp, #16
+	ret
+endfunction
+
+// Print a NUL-terminated string starting at address x0 to stdout
+// Clobbers x0-x3,x8
+function puts
+	mov	x1, x0
+
+	mov	x2, #0
+0:	ldrb	w3, [x0], #1
+	cbz	w3, 1f
+	add	x2, x2, #1
+	b	0b
+
+1:	mov	w0, #1			// STDOUT_FILENO
+	mov	x8, #__NR_write
+	svc	#0
+
+	ret
+endfunction
+
+// Utility macro to print a literal string
+// Clobbers x0-x4,x8
+.macro puts string
+	.pushsection .rodata.str1.1, "aMS", 1
+.L__puts_literal\@: .string "\string"
+	.popsection
+
+	ldr	x0, =.L__puts_literal\@
+	bl	puts
+.endm
+
+// Print an unsigned decimal number x0 to stdout
+// Clobbers x0-x4,x8
+function putdec
+	mov	x1, sp
+	str	x30, [sp, #-32]!	// Result can't be > 20 digits
+
+	mov	x2, #0
+	strb	w2, [x1, #-1]!		// Write the NUL terminator
+
+	mov	x2, #10
+0:	udiv	x3, x0, x2		// div-mod loop to generate the digits
+	msub	x0, x3, x2, x0
+	add	w0, w0, #'0'
+	strb	w0, [x1, #-1]!
+	mov	x0, x3
+	cbnz	x3, 0b
+
+	ldrb	w0, [x1]
+	cbnz	w0, 1f
+	mov	w0, #'0'		// Print "0" for 0, not ""
+	strb	w0, [x1, #-1]!
+
+1:	mov	x0, x1
+	bl	puts
+
+	ldr	x30, [sp], #32
+	ret
+endfunction
+
+// Print an unsigned decimal number x0 to stdout, followed by a newline
+// Clobbers x0-x5,x8
+function putdecn
+	mov	x5, x30
+
+	bl	putdec
+	mov	x0, #'\n'
+	bl	putc
+
+	ret	x5
+endfunction
+
+// Clobbers x0-x3,x8
+function puthexb
+	str	x30, [sp, #-0x10]!
+
+	mov	w3, w0
+	lsr	w0, w0, #4
+	bl	puthexnibble
+	mov	w0, w3
+
+	ldr	x30, [sp], #0x10
+	// fall through to puthexnibble
+endfunction
+// Clobbers x0-x2,x8
+function puthexnibble
+	and	w0, w0, #0xf
+	cmp	w0, #10
+	blo	1f
+	add	w0, w0, #'a' - ('9' + 1)
+1:	add	w0, w0, #'0'
+	b	putc
+endfunction
+
+// x0=data in, x1=size in, clobbers x0-x5,x8
+function dumphex
+	str	x30, [sp, #-0x10]!
+
+	mov	x4, x0
+	mov	x5, x1
+
+0:	subs	x5, x5, #1
+	b.lo	1f
+	ldrb	w0, [x4], #1
+	bl	puthexb
+	b	0b
+
+1:	ldr	x30, [sp], #0x10
+	ret
+endfunction
+
+// Declare some storate space to shadow the SVE register contents:
+.pushsection .text
+.data
+.align 4
+zref:
+	.space	MAXVL_B * NZR
+pref:
+	.space	MAXVL_B / 8 * NPR
+ffrref:
+	.space	MAXVL_B / 8
+scratch:
+	.space	MAXVL_B
+.popsection
+
+// Trivial memory copy: copy x2 bytes, starting at address x1, to address x0.
+// Clobbers x0-x3
+function memcpy
+	cmp	x2, #0
+	b.eq	1f
+0:	ldrb	w3, [x1], #1
+	strb	w3, [x0], #1
+	subs	x2, x2, #1
+	b.ne	0b
+1:	ret
+endfunction
+
+// Generate a test pattern for storage in SVE registers
+// x0: pid	(16 bits)
+// x1: register number (6 bits)
+// x2: generation (4 bits)
+
+// These values are used to constuct a 32-bit pattern that is repeated in the
+// scratch buffer as many times as will fit:
+// bits 31:28	generation number (increments once per test_loop)
+// bits 27:22	32-bit lane index
+// bits 21:16	register number
+// bits 15: 0	pid
+
+function pattern
+	orr	w1, w0, w1, lsl #16
+	orr	w2, w1, w2, lsl #28
+
+	ldr	x0, =scratch
+	mov	w1, #MAXVL_B / 4
+
+0:	str	w2, [x0], #4
+	add	w2, w2, #(1 << 22)
+	subs	w1, w1, #1
+	bne	0b
+
+	ret
+endfunction
+
+// Get the address of shadow data for SVE Z-register Z<xn>
+.macro _adrz xd, xn, nrtmp
+	ldr	\xd, =zref
+	rdvl	x\nrtmp, #1
+	madd	\xd, x\nrtmp, \xn, \xd
+.endm
+
+// Get the address of shadow data for SVE P-register P<xn - NZR>
+.macro _adrp xd, xn, nrtmp
+	ldr	\xd, =pref
+	rdvl	x\nrtmp, #1
+	lsr	x\nrtmp, x\nrtmp, #3
+	sub	\xn, \xn, #NZR
+	madd	\xd, x\nrtmp, \xn, \xd
+.endm
+
+// Set up test pattern in a SVE Z-register
+// x0: pid
+// x1: register number
+// x2: generation
+function setup_zreg
+	mov	x4, x30
+
+	mov	x6, x1
+	bl	pattern
+	_adrz	x0, x6, 2
+	mov	x5, x0
+	ldr	x1, =scratch
+	bl	memcpy
+
+	mov	x0, x6
+	mov	x1, x5
+	bl	setz
+
+	ret	x4
+endfunction
+
+// Set up test pattern in a SVE P-register
+// x0: pid
+// x1: register number
+// x2: generation
+function setup_preg
+	mov	x4, x30
+
+	mov	x6, x1
+	bl	pattern
+	_adrp	x0, x6, 2
+	mov	x5, x0
+	ldr	x1, =scratch
+	bl	memcpy
+
+	mov	x0, x6
+	mov	x1, x5
+	bl	setp
+
+	ret	x4
+endfunction
+
+// Set up test pattern in the FFR
+// x0: pid
+// x2: generation
+// Beware: corrupts P0.
+function setup_ffr
+	mov	x4, x30
+
+	bl	pattern
+	ldr	x0, =ffrref
+	ldr	x1, =scratch
+	rdvl	x2, #1
+	lsr	x2, x2, #3
+	bl	memcpy
+
+	mov	x0, #0
+	ldr	x1, =ffrref
+	bl	setp
+
+	wrffr	p0.b
+
+	ret	x4
+endfunction
+
+// Fill x1 bytes starting at x0 with 0xae (for canary purposes)
+// Clobbers x1, x2.
+function memfill_ae
+	mov	w2, #0xae
+	b	memfill
+endfunction
+
+// Fill x1 bytes starting at x0 with 0.
+// Clobbers x1, x2.
+function memclr
+	mov	w2, #0
+endfunction
+	// fall through to memfill
+
+// Trivial memory fill: fill x1 bytes starting at address x0 with byte w2
+// Clobbers x1
+function memfill
+	cmp	x1, #0
+	b.eq	1f
+
+0:	strb	w2, [x0], #1
+	subs	x1, x1, #1
+	b.ne	0b
+
+1:	ret
+endfunction
+
+// Trivial memory compare: compare x2 bytes starting at address x0 with
+// bytes starting at address x1.
+// Returns only if all bytes match; otherwise, the program is aborted.
+// Clobbers x0-x5.
+function memcmp
+	cbz	x2, 2f
+
+	stp	x0, x1, [sp, #-0x20]!
+	str	x2, [sp, #0x10]
+
+	mov	x5, #0
+0:	ldrb	w3, [x0, x5]
+	ldrb	w4, [x1, x5]
+	add	x5, x5, #1
+	cmp	w3, w4
+	b.ne	1f
+	subs	x2, x2, #1
+	b.ne	0b
+
+1:	ldr	x2, [sp, #0x10]
+	ldp	x0, x1, [sp], #0x20
+	b.ne	barf
+
+2:	ret
+endfunction
+
+// Verify that a SVE Z-register matches its shadow in memory, else abort
+// x0: reg number
+// Clobbers x0-x7.
+function check_zreg
+	mov	x3, x30
+
+	_adrz	x5, x0, 6
+	mov	x4, x0
+	ldr	x7, =scratch
+
+	mov	x0, x7
+	mov	x1, x6
+	bl	memfill_ae
+
+	mov	x0, x4
+	mov	x1, x7
+	bl	getz
+
+	mov	x0, x5
+	mov	x1, x7
+	mov	x2, x6
+	mov	x30, x3
+	b	memcmp
+endfunction
+
+// Verify that a SVE P-register matches its shadow in memory, else abort
+// x0: reg number
+// Clobbers x0-x7.
+function check_preg
+	mov	x3, x30
+
+	_adrp	x5, x0, 6
+	mov	x4, x0
+	ldr	x7, =scratch
+
+	mov	x0, x7
+	mov	x1, x6
+	bl	memfill_ae
+
+	mov	x0, x4
+	mov	x1, x7
+	bl	getp
+
+	mov	x0, x5
+	mov	x1, x7
+	mov	x2, x6
+	mov	x30, x3
+	b	memcmp
+endfunction
+
+// Verify that the FFR matches its shadow in memory, else abort
+// Beware -- corrupts P0.
+// Clobbers x0-x5.
+function check_ffr
+	mov	x3, x30
+
+	ldr	x4, =scratch
+	rdvl	x5, #1
+	lsr	x5, x5, #3
+
+	mov	x0, x4
+	mov	x1, x5
+	bl	memfill_ae
+
+	rdffr	p0.b
+	mov	x0, #0
+	mov	x1, x4
+	bl	getp
+
+	ldr	x0, =ffrref
+	mov	x1, x4
+	mov	x2, x5
+	mov	x30, x3
+	b	memcmp
+endfunction
+
+// Any SVE register modified here can cause corruption in the main
+// thread -- but *only* the registers modified here.
+function irritator_handler
+	// Increment the irritation signal count (x23):
+	ldr	x0, [x2, #ucontext_regs + 8 * 23]
+	add	x0, x0, #1
+	str	x0, [x2, #ucontext_regs + 8 * 23]
+
+	// Corrupt some random Z-regs
+	adr	x0, .text + (irritator_handler - .text) / 16 * 16
+	movi	v0.8b, #1
+	movi	v9.16b, #2
+	movi	v31.8b, #3
+	// And P0
+	rdffr	p0.b
+	// And FFR
+	wrffr	p15.b
+
+	ret
+endfunction
+
+function terminate_handler
+	mov	w21, w0
+	mov	x20, x2
+
+	puts	"Terminated by signal "
+	mov	w0, w21
+	bl	putdec
+	puts	", no error, iterations="
+	ldr	x0, [x20, #ucontext_regs + 8 * 22]
+	bl	putdec
+	puts	", signals="
+	ldr	x0, [x20, #ucontext_regs + 8 * 23]
+	bl	putdecn
+
+	mov	x0, #0
+	mov	x8, #__NR_exit
+	svc	#0
+endfunction
+
+// w0: signal number
+// x1: sa_action
+// w2: sa_flags
+// Clobbers x0-x6,x8
+function setsignal
+	str	x30, [sp, #-((sa_sz + 15) / 16 * 16 + 16)]!
+
+	mov	w4, w0
+	mov	x5, x1
+	mov	w6, w2
+
+	add	x0, sp, #16
+	mov	x1, #sa_sz
+	bl	memclr
+
+	mov	w0, w4
+	add	x1, sp, #16
+	str	w6, [x1, #sa_flags]
+	str	x5, [x1, #sa_handler]
+	mov	x2, #0
+	mov	x3, #sa_mask_sz
+	mov	x8, #__NR_rt_sigaction
+	svc	#0
+
+	cbz	w0, 1f
+
+	puts	"sigaction failure\n"
+	b	.Labort
+
+1:	ldr	x30, [sp], #((sa_sz + 15) / 16 * 16 + 16)
+	ret
+endfunction
+
+// Main program entry point
+.globl _start
+function _start
+_start:
+	// Sanity-check and report the vector length
+
+	rdvl	x19, #8
+	cmp	x19, #128
+	b.lo	1f
+	cmp	x19, #2048
+	b.hi	1f
+	tst	x19, #(8 - 1)
+	b.eq	2f
+
+1:	puts	"Bad vector length: "
+	mov	x0, x19
+	bl	putdecn
+	b	.Labort
+
+2:	puts	"Vector length:\t"
+	mov	x0, x19
+	bl	putdec
+	puts	" bits\n"
+
+	// Obtain our PID, to ensure test pattern uniqueness between processes
+
+	mov	x8, #__NR_getpid
+	svc	#0
+	mov	x20, x0
+
+	puts	"PID:\t"
+	mov	x0, x20
+	bl	putdecn
+
+	mov	x23, #0		// Irritation signal count
+
+	mov	w0, #SIGINT
+	adr	x1, terminate_handler
+	mov	w2, #SA_SIGINFO
+	bl	setsignal
+
+	mov	w0, #SIGTERM
+	adr	x1, terminate_handler
+	mov	w2, #SA_SIGINFO
+	bl	setsignal
+
+	mov	w0, #SIGUSR1
+	adr	x1, irritator_handler
+	mov	w2, #SA_SIGINFO
+	orr	w2, w2, #SA_NODEFER
+	bl	setsignal
+
+	mov	x22, #0		// generation number, increments per iteration
+.Ltest_loop:
+	rdvl	x0, #8
+	cmp	x0, x19
+	b.ne	vl_barf
+
+	mov	x21, #0		// Set up Z-regs & shadow with test pattern
+0:	mov	x0, x20
+	mov	x1, x21
+	and	x2, x22, #0xf
+	bl	setup_zreg
+	add	x21, x21, #1
+	cmp	x21, #NZR
+	b.lo	0b
+
+	mov	x0, x20		// Set up FFR & shadow with test pattern
+	mov	x1, #NZR + NPR
+	and	x2, x22, #0xf
+	bl	setup_ffr
+
+0:	mov	x0, x20		// Set up P-regs & shadow with test pattern
+	mov	x1, x21
+	and	x2, x22, #0xf
+	bl	setup_preg
+	add	x21, x21, #1
+	cmp	x21, #NZR + NPR
+	b.lo	0b
+
+// Can't do this when SVE state is volatile across SVC:
+//	mov	x8, #__NR_sched_yield	// Encourage preemption
+//	svc	#0
+
+	mov	x21, #0
+0:	mov	x0, x21
+	bl	check_zreg
+	add	x21, x21, #1
+	cmp	x21, #NZR
+	b.lo	0b
+
+0:	mov	x0, x21
+	bl	check_preg
+	add	x21, x21, #1
+	cmp	x21, #NZR + NPR
+	b.lo	0b
+
+	bl	check_ffr
+
+	add	x22, x22, #1
+	b	.Ltest_loop
+
+.Labort:
+	mov	x0, #0
+	mov	x1, #SIGABRT
+	mov	x8, #__NR_kill
+	svc	#0
+endfunction
+
+function barf
+// fpsimd.c acitivty log dump hack
+//	ldr	w0, =0xdeadc0de
+//	mov	w8, #__NR_exit
+//	svc	#0
+// end hack
+	mov	x10, x0	// expected data
+	mov	x11, x1	// actual data
+	mov	x12, x2	// data size
+
+	puts	"Mistatch: PID="
+	mov	x0, x20
+	bl	putdec
+	puts	", iteration="
+	mov	x0, x22
+	bl	putdec
+	puts	", reg="
+	mov	x0, x21
+	bl	putdecn
+	puts	"\tExpected ["
+	mov	x0, x10
+	mov	x1, x12
+	bl	dumphex
+	puts	"]\n\tGot      ["
+	mov	x0, x11
+	mov	x1, x12
+	bl	dumphex
+	puts	"]\n"
+
+	mov	x8, #__NR_getpid
+	svc	#0
+// fpsimd.c acitivty log dump hack
+//	ldr	w0, =0xdeadc0de
+//	mov	w8, #__NR_exit
+//	svc	#0
+// ^ end of hack
+	mov	x1, #SIGABRT
+	mov	x8, #__NR_kill
+	svc	#0
+//	mov	x8, #__NR_exit
+//	mov	x1, #1
+//	svc	#0
+endfunction
+
+function vl_barf
+	mov	x10, x0
+
+	puts	"Bad active VL: "
+	mov	x0, x10
+	bl	putdecn
+
+	mov	x8, #__NR_exit
+	mov	x1, #1
+	svc	#0
+endfunction
diff --git a/tools/testing/selftests/arm64/fp/vlset.c b/tools/testing/selftests/arm64/fp/vlset.c
new file mode 100644
index 000000000000..308d27a68226
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/vlset.c
@@ -0,0 +1,155 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2015-2019 ARM Limited.
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+#define _GNU_SOURCE
+#include <assert.h>
+#include <errno.h>
+#include <limits.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <getopt.h>
+#include <unistd.h>
+#include <sys/auxv.h>
+#include <sys/prctl.h>
+#include <asm/hwcap.h>
+#include <asm/sigcontext.h>
+
+static int inherit = 0;
+static int no_inherit = 0;
+static int force = 0;
+static unsigned long vl;
+
+static const struct option options[] = {
+	{ "force",	no_argument, NULL, 'f' },
+	{ "inherit",	no_argument, NULL, 'i' },
+	{ "max",	no_argument, NULL, 'M' },
+	{ "no-inherit",	no_argument, &no_inherit, 1 },
+	{ "help",	no_argument, NULL, '?' },
+	{}
+};
+
+static char const *program_name;
+
+static int parse_options(int argc, char **argv)
+{
+	int c;
+	char *rest;
+
+	program_name = strrchr(argv[0], '/');
+	if (program_name)
+		++program_name;
+	else
+		program_name = argv[0];
+
+	while ((c = getopt_long(argc, argv, "Mfhi", options, NULL)) != -1)
+		switch (c) {
+		case 'M':	vl = SVE_VL_MAX; break;
+		case 'f':	force = 1; break;
+		case 'i':	inherit = 1; break;
+		case 0:		break;
+		default:	goto error;
+		}
+
+	if (inherit && no_inherit)
+		goto error;
+
+	if (!vl) {
+		/* vector length */
+		if (optind >= argc)
+			goto error;
+
+		errno = 0;
+		vl = strtoul(argv[optind], &rest, 0);
+		if (*rest) {
+			vl = ULONG_MAX;
+			errno = EINVAL;
+		}
+		if (vl == ULONG_MAX && errno) {
+			fprintf(stderr, "%s: %s: %s\n",
+				program_name, argv[optind], strerror(errno));
+			goto error;
+		}
+
+		++optind;
+	}
+
+	/* command */
+	if (optind >= argc)
+		goto error;
+
+	return 0;
+
+error:
+	fprintf(stderr,
+		"Usage: %s [-f | --force] "
+		"[-i | --inherit | --no-inherit] "
+		"{-M | --max | <vector length>} "
+		"<command> [<arguments> ...]\n",
+		program_name);
+	return -1;
+}
+
+int main(int argc, char **argv)
+{
+	int ret = 126;	/* same as sh(1) command-not-executable error */
+	long flags;
+	char *path;
+	int t, e;
+
+	if (parse_options(argc, argv))
+		return 2;	/* same as sh(1) builtin incorrect-usage */
+
+	if (vl & ~(vl & PR_SVE_VL_LEN_MASK)) {
+		fprintf(stderr, "%s: Invalid vector length %lu\n",
+			program_name, vl);
+		return 2;	/* same as sh(1) builtin incorrect-usage */
+	}
+
+	if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) {
+		fprintf(stderr, "%s: Scalable Vector Extension not present\n",
+			program_name);
+
+		if (!force)
+			goto error;
+
+		fputs("Going ahead anyway (--force):  "
+		      "This is a debug option.  Don't rely on it.\n",
+		      stderr);
+	}
+
+	flags = PR_SVE_SET_VL_ONEXEC;
+	if (inherit)
+		flags |= PR_SVE_VL_INHERIT;
+
+	t = prctl(PR_SVE_SET_VL, vl | flags);
+	if (t < 0) {
+		fprintf(stderr, "%s: PR_SVE_SET_VL: %s\n",
+			program_name, strerror(errno));
+		goto error;
+	}
+
+	t = prctl(PR_SVE_GET_VL);
+	if (t == -1) {
+		fprintf(stderr, "%s: PR_SVE_GET_VL: %s\n",
+			program_name, strerror(errno));
+		goto error;
+	}
+	flags = PR_SVE_VL_LEN_MASK;
+	flags = t & ~flags;
+
+	assert(optind < argc);
+	path = argv[optind];
+
+	execvp(path, &argv[optind]);
+	e = errno;
+	if (errno == ENOENT)
+		ret = 127;	/* same as sh(1) not-found error */
+	fprintf(stderr, "%s: %s: %s\n", program_name, path, strerror(e));
+
+error:
+	return ret;		/* same as sh(1) not-executable error */
+}
diff --git a/tools/testing/selftests/arm64/mte/.gitignore b/tools/testing/selftests/arm64/mte/.gitignore
new file mode 100644
index 000000000000..bc3ac63f3314
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/.gitignore
@@ -0,0 +1,6 @@
+check_buffer_fill
+check_tags_inclusion
+check_child_memory
+check_mmap_options
+check_ksm_options
+check_user_mem
diff --git a/tools/testing/selftests/arm64/mte/Makefile b/tools/testing/selftests/arm64/mte/Makefile
new file mode 100644
index 000000000000..2480226dfe57
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/Makefile
@@ -0,0 +1,29 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2020 ARM Limited
+
+CFLAGS += -std=gnu99 -I.
+SRCS := $(filter-out mte_common_util.c,$(wildcard *.c))
+PROGS := $(patsubst %.c,%,$(SRCS))
+
+#Add mte compiler option
+ifneq ($(shell $(CC) --version 2>&1 | head -n 1 | grep gcc),)
+CFLAGS += -march=armv8.5-a+memtag
+endif
+
+#check if the compiler works well
+mte_cc_support := $(shell if ($(CC) $(CFLAGS) -E -x c /dev/null -o /dev/null 2>&1) then echo "1"; fi)
+
+ifeq ($(mte_cc_support),1)
+# Generated binaries to be installed by top KSFT script
+TEST_GEN_PROGS := $(PROGS)
+
+# Get Kernel headers installed and use them.
+KSFT_KHDR_INSTALL := 1
+endif
+
+# Include KSFT lib.mk.
+include ../../lib.mk
+
+ifeq ($(mte_cc_support),1)
+$(TEST_GEN_PROGS): mte_common_util.c mte_common_util.h mte_helper.S
+endif
diff --git a/tools/testing/selftests/arm64/mte/check_buffer_fill.c b/tools/testing/selftests/arm64/mte/check_buffer_fill.c
new file mode 100644
index 000000000000..242635d79035
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/check_buffer_fill.c
@@ -0,0 +1,475 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2020 ARM Limited
+
+#define _GNU_SOURCE
+
+#include <stddef.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "kselftest.h"
+#include "mte_common_util.h"
+#include "mte_def.h"
+
+#define OVERFLOW_RANGE MT_GRANULE_SIZE
+
+static int sizes[] = {
+	1, 555, 1033, MT_GRANULE_SIZE - 1, MT_GRANULE_SIZE,
+	/* page size - 1*/ 0, /* page_size */ 0, /* page size + 1 */ 0
+};
+
+enum mte_block_test_alloc {
+	UNTAGGED_TAGGED,
+	TAGGED_UNTAGGED,
+	TAGGED_TAGGED,
+	BLOCK_ALLOC_MAX,
+};
+
+static int check_buffer_by_byte(int mem_type, int mode)
+{
+	char *ptr;
+	int i, j, item;
+	bool err;
+
+	mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+	item = sizeof(sizes)/sizeof(int);
+
+	for (i = 0; i < item; i++) {
+		ptr = (char *)mte_allocate_memory(sizes[i], mem_type, 0, true);
+		if (check_allocated_memory(ptr, sizes[i], mem_type, true) != KSFT_PASS)
+			return KSFT_FAIL;
+		mte_initialize_current_context(mode, (uintptr_t)ptr, sizes[i]);
+		/* Set some value in tagged memory */
+		for (j = 0; j < sizes[i]; j++)
+			ptr[j] = '1';
+		mte_wait_after_trig();
+		err = cur_mte_cxt.fault_valid;
+		/* Check the buffer whether it is filled. */
+		for (j = 0; j < sizes[i] && !err; j++) {
+			if (ptr[j] != '1')
+				err = true;
+		}
+		mte_free_memory((void *)ptr, sizes[i], mem_type, true);
+
+		if (err)
+			break;
+	}
+	if (!err)
+		return KSFT_PASS;
+	else
+		return KSFT_FAIL;
+}
+
+static int check_buffer_underflow_by_byte(int mem_type, int mode,
+					  int underflow_range)
+{
+	char *ptr;
+	int i, j, item, last_index;
+	bool err;
+	char *und_ptr = NULL;
+
+	mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+	item = sizeof(sizes)/sizeof(int);
+	for (i = 0; i < item; i++) {
+		ptr = (char *)mte_allocate_memory_tag_range(sizes[i], mem_type, 0,
+							    underflow_range, 0);
+		if (check_allocated_memory_range(ptr, sizes[i], mem_type,
+					       underflow_range, 0) != KSFT_PASS)
+			return KSFT_FAIL;
+
+		mte_initialize_current_context(mode, (uintptr_t)ptr, -underflow_range);
+		last_index = 0;
+		/* Set some value in tagged memory and make the buffer underflow */
+		for (j = sizes[i] - 1; (j >= -underflow_range) &&
+				       (cur_mte_cxt.fault_valid == false); j--) {
+			ptr[j] = '1';
+			last_index = j;
+		}
+		mte_wait_after_trig();
+		err = false;
+		/* Check whether the buffer is filled */
+		for (j = 0; j < sizes[i]; j++) {
+			if (ptr[j] != '1') {
+				err = true;
+				ksft_print_msg("Buffer is not filled at index:%d of ptr:0x%lx\n",
+						j, ptr);
+				break;
+			}
+		}
+		if (err)
+			goto check_buffer_underflow_by_byte_err;
+
+		switch (mode) {
+		case MTE_NONE_ERR:
+			if (cur_mte_cxt.fault_valid == true || last_index != -underflow_range) {
+				err = true;
+				break;
+			}
+			/* There were no fault so the underflow area should be filled */
+			und_ptr = (char *) MT_CLEAR_TAG((size_t) ptr - underflow_range);
+			for (j = 0 ; j < underflow_range; j++) {
+				if (und_ptr[j] != '1') {
+					err = true;
+					break;
+				}
+			}
+			break;
+		case MTE_ASYNC_ERR:
+			/* Imprecise fault should occur otherwise return error */
+			if (cur_mte_cxt.fault_valid == false) {
+				err = true;
+				break;
+			}
+			/*
+			 * The imprecise fault is checked after the write to the buffer,
+			 * so the underflow area before the fault should be filled.
+			 */
+			und_ptr = (char *) MT_CLEAR_TAG((size_t) ptr);
+			for (j = last_index ; j < 0 ; j++) {
+				if (und_ptr[j] != '1') {
+					err = true;
+					break;
+				}
+			}
+			break;
+		case MTE_SYNC_ERR:
+			/* Precise fault should occur otherwise return error */
+			if (!cur_mte_cxt.fault_valid || (last_index != (-1))) {
+				err = true;
+				break;
+			}
+			/* Underflow area should not be filled */
+			und_ptr = (char *) MT_CLEAR_TAG((size_t) ptr);
+			if (und_ptr[-1] == '1')
+				err = true;
+			break;
+		default:
+			err = true;
+		break;
+		}
+check_buffer_underflow_by_byte_err:
+		mte_free_memory_tag_range((void *)ptr, sizes[i], mem_type, underflow_range, 0);
+		if (err)
+			break;
+	}
+	return (err ? KSFT_FAIL : KSFT_PASS);
+}
+
+static int check_buffer_overflow_by_byte(int mem_type, int mode,
+					  int overflow_range)
+{
+	char *ptr;
+	int i, j, item, last_index;
+	bool err;
+	size_t tagged_size, overflow_size;
+	char *over_ptr = NULL;
+
+	mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+	item = sizeof(sizes)/sizeof(int);
+	for (i = 0; i < item; i++) {
+		ptr = (char *)mte_allocate_memory_tag_range(sizes[i], mem_type, 0,
+							    0, overflow_range);
+		if (check_allocated_memory_range(ptr, sizes[i], mem_type,
+						 0, overflow_range) != KSFT_PASS)
+			return KSFT_FAIL;
+
+		tagged_size = MT_ALIGN_UP(sizes[i]);
+
+		mte_initialize_current_context(mode, (uintptr_t)ptr, sizes[i] + overflow_range);
+
+		/* Set some value in tagged memory and make the buffer underflow */
+		for (j = 0, last_index = 0 ; (j < (sizes[i] + overflow_range)) &&
+					     (cur_mte_cxt.fault_valid == false); j++) {
+			ptr[j] = '1';
+			last_index = j;
+		}
+		mte_wait_after_trig();
+		err = false;
+		/* Check whether the buffer is filled */
+		for (j = 0; j < sizes[i]; j++) {
+			if (ptr[j] != '1') {
+				err = true;
+				ksft_print_msg("Buffer is not filled at index:%d of ptr:0x%lx\n",
+						j, ptr);
+				break;
+			}
+		}
+		if (err)
+			goto check_buffer_overflow_by_byte_err;
+
+		overflow_size = overflow_range - (tagged_size - sizes[i]);
+
+		switch (mode) {
+		case MTE_NONE_ERR:
+			if ((cur_mte_cxt.fault_valid == true) ||
+			    (last_index != (sizes[i] + overflow_range - 1))) {
+				err = true;
+				break;
+			}
+			/* There were no fault so the overflow area should be filled */
+			over_ptr = (char *) MT_CLEAR_TAG((size_t) ptr + tagged_size);
+			for (j = 0 ; j < overflow_size; j++) {
+				if (over_ptr[j] != '1') {
+					err = true;
+					break;
+				}
+			}
+			break;
+		case MTE_ASYNC_ERR:
+			/* Imprecise fault should occur otherwise return error */
+			if (cur_mte_cxt.fault_valid == false) {
+				err = true;
+				break;
+			}
+			/*
+			 * The imprecise fault is checked after the write to the buffer,
+			 * so the overflow area should be filled before the fault.
+			 */
+			over_ptr = (char *) MT_CLEAR_TAG((size_t) ptr);
+			for (j = tagged_size ; j < last_index; j++) {
+				if (over_ptr[j] != '1') {
+					err = true;
+					break;
+				}
+			}
+			break;
+		case MTE_SYNC_ERR:
+			/* Precise fault should occur otherwise return error */
+			if (!cur_mte_cxt.fault_valid || (last_index != tagged_size)) {
+				err = true;
+				break;
+			}
+			/* Underflow area should not be filled */
+			over_ptr = (char *) MT_CLEAR_TAG((size_t) ptr + tagged_size);
+			for (j = 0 ; j < overflow_size; j++) {
+				if (over_ptr[j] == '1')
+					err = true;
+			}
+			break;
+		default:
+			err = true;
+		break;
+		}
+check_buffer_overflow_by_byte_err:
+		mte_free_memory_tag_range((void *)ptr, sizes[i], mem_type, 0, overflow_range);
+		if (err)
+			break;
+	}
+	return (err ? KSFT_FAIL : KSFT_PASS);
+}
+
+static int check_buffer_by_block_iterate(int mem_type, int mode, size_t size)
+{
+	char *src, *dst;
+	int j, result = KSFT_PASS;
+	enum mte_block_test_alloc alloc_type = UNTAGGED_TAGGED;
+
+	for (alloc_type = UNTAGGED_TAGGED; alloc_type < (int) BLOCK_ALLOC_MAX; alloc_type++) {
+		switch (alloc_type) {
+		case UNTAGGED_TAGGED:
+			src = (char *)mte_allocate_memory(size, mem_type, 0, false);
+			if (check_allocated_memory(src, size, mem_type, false) != KSFT_PASS)
+				return KSFT_FAIL;
+
+			dst = (char *)mte_allocate_memory(size, mem_type, 0, true);
+			if (check_allocated_memory(dst, size, mem_type, true) != KSFT_PASS) {
+				mte_free_memory((void *)src, size, mem_type, false);
+				return KSFT_FAIL;
+			}
+
+			break;
+		case TAGGED_UNTAGGED:
+			dst = (char *)mte_allocate_memory(size, mem_type, 0, false);
+			if (check_allocated_memory(dst, size, mem_type, false) != KSFT_PASS)
+				return KSFT_FAIL;
+
+			src = (char *)mte_allocate_memory(size, mem_type, 0, true);
+			if (check_allocated_memory(src, size, mem_type, true) != KSFT_PASS) {
+				mte_free_memory((void *)dst, size, mem_type, false);
+				return KSFT_FAIL;
+			}
+			break;
+		case TAGGED_TAGGED:
+			src = (char *)mte_allocate_memory(size, mem_type, 0, true);
+			if (check_allocated_memory(src, size, mem_type, true) != KSFT_PASS)
+				return KSFT_FAIL;
+
+			dst = (char *)mte_allocate_memory(size, mem_type, 0, true);
+			if (check_allocated_memory(dst, size, mem_type, true) != KSFT_PASS) {
+				mte_free_memory((void *)src, size, mem_type, true);
+				return KSFT_FAIL;
+			}
+			break;
+		default:
+			return KSFT_FAIL;
+		}
+
+		cur_mte_cxt.fault_valid = false;
+		result = KSFT_PASS;
+		mte_initialize_current_context(mode, (uintptr_t)dst, size);
+		/* Set some value in memory and copy*/
+		memset((void *)src, (int)'1', size);
+		memcpy((void *)dst, (void *)src, size);
+		mte_wait_after_trig();
+		if (cur_mte_cxt.fault_valid) {
+			result = KSFT_FAIL;
+			goto check_buffer_by_block_err;
+		}
+		/* Check the buffer whether it is filled. */
+		for (j = 0; j < size; j++) {
+			if (src[j] != dst[j] || src[j] != '1') {
+				result = KSFT_FAIL;
+				break;
+			}
+		}
+check_buffer_by_block_err:
+		mte_free_memory((void *)src, size, mem_type,
+				MT_FETCH_TAG((uintptr_t)src) ? true : false);
+		mte_free_memory((void *)dst, size, mem_type,
+				MT_FETCH_TAG((uintptr_t)dst) ? true : false);
+		if (result != KSFT_PASS)
+			return result;
+	}
+	return result;
+}
+
+static int check_buffer_by_block(int mem_type, int mode)
+{
+	int i, item, result = KSFT_PASS;
+
+	mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+	item = sizeof(sizes)/sizeof(int);
+	cur_mte_cxt.fault_valid = false;
+	for (i = 0; i < item; i++) {
+		result = check_buffer_by_block_iterate(mem_type, mode, sizes[i]);
+		if (result != KSFT_PASS)
+			break;
+	}
+	return result;
+}
+
+static int compare_memory_tags(char *ptr, size_t size, int tag)
+{
+	int i, new_tag;
+
+	for (i = 0 ; i < size ; i += MT_GRANULE_SIZE) {
+		new_tag = MT_FETCH_TAG((uintptr_t)(mte_get_tag_address(ptr + i)));
+		if (tag != new_tag) {
+			ksft_print_msg("FAIL: child mte tag mismatch\n");
+			return KSFT_FAIL;
+		}
+	}
+	return KSFT_PASS;
+}
+
+static int check_memory_initial_tags(int mem_type, int mode, int mapping)
+{
+	char *ptr;
+	int run, fd;
+	int total = sizeof(sizes)/sizeof(int);
+
+	mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+	for (run = 0; run < total; run++) {
+		/* check initial tags for anonymous mmap */
+		ptr = (char *)mte_allocate_memory(sizes[run], mem_type, mapping, false);
+		if (check_allocated_memory(ptr, sizes[run], mem_type, false) != KSFT_PASS)
+			return KSFT_FAIL;
+		if (compare_memory_tags(ptr, sizes[run], 0) != KSFT_PASS) {
+			mte_free_memory((void *)ptr, sizes[run], mem_type, false);
+			return KSFT_FAIL;
+		}
+		mte_free_memory((void *)ptr, sizes[run], mem_type, false);
+
+		/* check initial tags for file mmap */
+		fd = create_temp_file();
+		if (fd == -1)
+			return KSFT_FAIL;
+		ptr = (char *)mte_allocate_file_memory(sizes[run], mem_type, mapping, false, fd);
+		if (check_allocated_memory(ptr, sizes[run], mem_type, false) != KSFT_PASS) {
+			close(fd);
+			return KSFT_FAIL;
+		}
+		if (compare_memory_tags(ptr, sizes[run], 0) != KSFT_PASS) {
+			mte_free_memory((void *)ptr, sizes[run], mem_type, false);
+			close(fd);
+			return KSFT_FAIL;
+		}
+		mte_free_memory((void *)ptr, sizes[run], mem_type, false);
+		close(fd);
+	}
+	return KSFT_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+	int err;
+	size_t page_size = getpagesize();
+	int item = sizeof(sizes)/sizeof(int);
+
+	sizes[item - 3] = page_size - 1;
+	sizes[item - 2] = page_size;
+	sizes[item - 1] = page_size + 1;
+
+	err = mte_default_setup();
+	if (err)
+		return err;
+
+	/* Register SIGSEGV handler */
+	mte_register_signal(SIGSEGV, mte_default_handler);
+
+	/* Buffer by byte tests */
+	evaluate_test(check_buffer_by_byte(USE_MMAP, MTE_SYNC_ERR),
+	"Check buffer correctness by byte with sync err mode and mmap memory\n");
+	evaluate_test(check_buffer_by_byte(USE_MMAP, MTE_ASYNC_ERR),
+	"Check buffer correctness by byte with async err mode and mmap memory\n");
+	evaluate_test(check_buffer_by_byte(USE_MPROTECT, MTE_SYNC_ERR),
+	"Check buffer correctness by byte with sync err mode and mmap/mprotect memory\n");
+	evaluate_test(check_buffer_by_byte(USE_MPROTECT, MTE_ASYNC_ERR),
+	"Check buffer correctness by byte with async err mode and mmap/mprotect memory\n");
+
+	/* Check buffer underflow with underflow size as 16 */
+	evaluate_test(check_buffer_underflow_by_byte(USE_MMAP, MTE_SYNC_ERR, MT_GRANULE_SIZE),
+	"Check buffer write underflow by byte with sync mode and mmap memory\n");
+	evaluate_test(check_buffer_underflow_by_byte(USE_MMAP, MTE_ASYNC_ERR, MT_GRANULE_SIZE),
+	"Check buffer write underflow by byte with async mode and mmap memory\n");
+	evaluate_test(check_buffer_underflow_by_byte(USE_MMAP, MTE_NONE_ERR, MT_GRANULE_SIZE),
+	"Check buffer write underflow by byte with tag check fault ignore and mmap memory\n");
+
+	/* Check buffer underflow with underflow size as page size */
+	evaluate_test(check_buffer_underflow_by_byte(USE_MMAP, MTE_SYNC_ERR, page_size),
+	"Check buffer write underflow by byte with sync mode and mmap memory\n");
+	evaluate_test(check_buffer_underflow_by_byte(USE_MMAP, MTE_ASYNC_ERR, page_size),
+	"Check buffer write underflow by byte with async mode and mmap memory\n");
+	evaluate_test(check_buffer_underflow_by_byte(USE_MMAP, MTE_NONE_ERR, page_size),
+	"Check buffer write underflow by byte with tag check fault ignore and mmap memory\n");
+
+	/* Check buffer overflow with overflow size as 16 */
+	evaluate_test(check_buffer_overflow_by_byte(USE_MMAP, MTE_SYNC_ERR, MT_GRANULE_SIZE),
+	"Check buffer write overflow by byte with sync mode and mmap memory\n");
+	evaluate_test(check_buffer_overflow_by_byte(USE_MMAP, MTE_ASYNC_ERR, MT_GRANULE_SIZE),
+	"Check buffer write overflow by byte with async mode and mmap memory\n");
+	evaluate_test(check_buffer_overflow_by_byte(USE_MMAP, MTE_NONE_ERR, MT_GRANULE_SIZE),
+	"Check buffer write overflow by byte with tag fault ignore mode and mmap memory\n");
+
+	/* Buffer by block tests */
+	evaluate_test(check_buffer_by_block(USE_MMAP, MTE_SYNC_ERR),
+	"Check buffer write correctness by block with sync mode and mmap memory\n");
+	evaluate_test(check_buffer_by_block(USE_MMAP, MTE_ASYNC_ERR),
+	"Check buffer write correctness by block with async mode and mmap memory\n");
+	evaluate_test(check_buffer_by_block(USE_MMAP, MTE_NONE_ERR),
+	"Check buffer write correctness by block with tag fault ignore and mmap memory\n");
+
+	/* Initial tags are supposed to be 0 */
+	evaluate_test(check_memory_initial_tags(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE),
+	"Check initial tags with private mapping, sync error mode and mmap memory\n");
+	evaluate_test(check_memory_initial_tags(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE),
+	"Check initial tags with private mapping, sync error mode and mmap/mprotect memory\n");
+	evaluate_test(check_memory_initial_tags(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED),
+	"Check initial tags with shared mapping, sync error mode and mmap memory\n");
+	evaluate_test(check_memory_initial_tags(USE_MPROTECT, MTE_SYNC_ERR, MAP_SHARED),
+	"Check initial tags with shared mapping, sync error mode and mmap/mprotect memory\n");
+
+	mte_restore_setup();
+	ksft_print_cnts();
+	return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL;
+}
diff --git a/tools/testing/selftests/arm64/mte/check_child_memory.c b/tools/testing/selftests/arm64/mte/check_child_memory.c
new file mode 100644
index 000000000000..97bebdecd29e
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/check_child_memory.c
@@ -0,0 +1,195 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2020 ARM Limited
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ucontext.h>
+#include <sys/wait.h>
+
+#include "kselftest.h"
+#include "mte_common_util.h"
+#include "mte_def.h"
+
+#define BUFFER_SIZE		(5 * MT_GRANULE_SIZE)
+#define RUNS			(MT_TAG_COUNT)
+#define UNDERFLOW		MT_GRANULE_SIZE
+#define OVERFLOW		MT_GRANULE_SIZE
+
+static size_t page_size;
+static int sizes[] = {
+	1, 537, 989, 1269, MT_GRANULE_SIZE - 1, MT_GRANULE_SIZE,
+	/* page size - 1*/ 0, /* page_size */ 0, /* page size + 1 */ 0
+};
+
+static int check_child_tag_inheritance(char *ptr, int size, int mode)
+{
+	int i, parent_tag, child_tag, fault, child_status;
+	pid_t child;
+
+	parent_tag = MT_FETCH_TAG((uintptr_t)ptr);
+	fault = 0;
+
+	child = fork();
+	if (child == -1) {
+		ksft_print_msg("FAIL: child process creation\n");
+		return KSFT_FAIL;
+	} else if (child == 0) {
+		mte_initialize_current_context(mode, (uintptr_t)ptr, size);
+		/* Do copy on write */
+		memset(ptr, '1', size);
+		mte_wait_after_trig();
+		if (cur_mte_cxt.fault_valid == true) {
+			fault = 1;
+			goto check_child_tag_inheritance_err;
+		}
+		for (i = 0 ; i < size ; i += MT_GRANULE_SIZE) {
+			child_tag = MT_FETCH_TAG((uintptr_t)(mte_get_tag_address(ptr + i)));
+			if (parent_tag != child_tag) {
+				ksft_print_msg("FAIL: child mte tag mismatch\n");
+				fault = 1;
+				goto check_child_tag_inheritance_err;
+			}
+		}
+		mte_initialize_current_context(mode, (uintptr_t)ptr, -UNDERFLOW);
+		memset(ptr - UNDERFLOW, '2', UNDERFLOW);
+		mte_wait_after_trig();
+		if (cur_mte_cxt.fault_valid == false) {
+			fault = 1;
+			goto check_child_tag_inheritance_err;
+		}
+		mte_initialize_current_context(mode, (uintptr_t)ptr, size + OVERFLOW);
+		memset(ptr + size, '3', OVERFLOW);
+		mte_wait_after_trig();
+		if (cur_mte_cxt.fault_valid == false) {
+			fault = 1;
+			goto check_child_tag_inheritance_err;
+		}
+check_child_tag_inheritance_err:
+		_exit(fault);
+	}
+	/* Wait for child process to terminate */
+	wait(&child_status);
+	if (WIFEXITED(child_status))
+		fault = WEXITSTATUS(child_status);
+	else
+		fault = 1;
+	return (fault) ? KSFT_FAIL : KSFT_PASS;
+}
+
+static int check_child_memory_mapping(int mem_type, int mode, int mapping)
+{
+	char *ptr;
+	int run, result;
+	int item = sizeof(sizes)/sizeof(int);
+
+	item = sizeof(sizes)/sizeof(int);
+	mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+	for (run = 0; run < item; run++) {
+		ptr = (char *)mte_allocate_memory_tag_range(sizes[run], mem_type, mapping,
+							    UNDERFLOW, OVERFLOW);
+		if (check_allocated_memory_range(ptr, sizes[run], mem_type,
+						 UNDERFLOW, OVERFLOW) != KSFT_PASS)
+			return KSFT_FAIL;
+		result = check_child_tag_inheritance(ptr, sizes[run], mode);
+		mte_free_memory_tag_range((void *)ptr, sizes[run], mem_type, UNDERFLOW, OVERFLOW);
+		if (result == KSFT_FAIL)
+			return result;
+	}
+	return KSFT_PASS;
+}
+
+static int check_child_file_mapping(int mem_type, int mode, int mapping)
+{
+	char *ptr, *map_ptr;
+	int run, fd, map_size, result = KSFT_PASS;
+	int total = sizeof(sizes)/sizeof(int);
+
+	mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+	for (run = 0; run < total; run++) {
+		fd = create_temp_file();
+		if (fd == -1)
+			return KSFT_FAIL;
+
+		map_size = sizes[run] + OVERFLOW + UNDERFLOW;
+		map_ptr = (char *)mte_allocate_file_memory(map_size, mem_type, mapping, false, fd);
+		if (check_allocated_memory(map_ptr, map_size, mem_type, false) != KSFT_PASS) {
+			close(fd);
+			return KSFT_FAIL;
+		}
+		ptr = map_ptr + UNDERFLOW;
+		mte_initialize_current_context(mode, (uintptr_t)ptr, sizes[run]);
+		/* Only mte enabled memory will allow tag insertion */
+		ptr = mte_insert_tags((void *)ptr, sizes[run]);
+		if (!ptr || cur_mte_cxt.fault_valid == true) {
+			ksft_print_msg("FAIL: Insert tags on file based memory\n");
+			munmap((void *)map_ptr, map_size);
+			close(fd);
+			return KSFT_FAIL;
+		}
+		result = check_child_tag_inheritance(ptr, sizes[run], mode);
+		mte_clear_tags((void *)ptr, sizes[run]);
+		munmap((void *)map_ptr, map_size);
+		close(fd);
+		if (result != KSFT_PASS)
+			return KSFT_FAIL;
+	}
+	return KSFT_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+	int err;
+	int item = sizeof(sizes)/sizeof(int);
+
+	page_size = getpagesize();
+	if (!page_size) {
+		ksft_print_msg("ERR: Unable to get page size\n");
+		return KSFT_FAIL;
+	}
+	sizes[item - 3] = page_size - 1;
+	sizes[item - 2] = page_size;
+	sizes[item - 1] = page_size + 1;
+
+	err = mte_default_setup();
+	if (err)
+		return err;
+
+	/* Register SIGSEGV handler */
+	mte_register_signal(SIGSEGV, mte_default_handler);
+	mte_register_signal(SIGBUS, mte_default_handler);
+
+	evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE),
+		"Check child anonymous memory with private mapping, precise mode and mmap memory\n");
+	evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED),
+		"Check child anonymous memory with shared mapping, precise mode and mmap memory\n");
+	evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE),
+		"Check child anonymous memory with private mapping, imprecise mode and mmap memory\n");
+	evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED),
+		"Check child anonymous memory with shared mapping, imprecise mode and mmap memory\n");
+	evaluate_test(check_child_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE),
+		"Check child anonymous memory with private mapping, precise mode and mmap/mprotect memory\n");
+	evaluate_test(check_child_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_SHARED),
+		"Check child anonymous memory with shared mapping, precise mode and mmap/mprotect memory\n");
+
+	evaluate_test(check_child_file_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE),
+		"Check child file memory with private mapping, precise mode and mmap memory\n");
+	evaluate_test(check_child_file_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED),
+		"Check child file memory with shared mapping, precise mode and mmap memory\n");
+	evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE),
+		"Check child file memory with private mapping, imprecise mode and mmap memory\n");
+	evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED),
+		"Check child file memory with shared mapping, imprecise mode and mmap memory\n");
+	evaluate_test(check_child_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE),
+		"Check child file memory with private mapping, precise mode and mmap/mprotect memory\n");
+	evaluate_test(check_child_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_SHARED),
+		"Check child file memory with shared mapping, precise mode and mmap/mprotect memory\n");
+
+	mte_restore_setup();
+	ksft_print_cnts();
+	return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL;
+}
diff --git a/tools/testing/selftests/arm64/mte/check_ksm_options.c b/tools/testing/selftests/arm64/mte/check_ksm_options.c
new file mode 100644
index 000000000000..bc41ae630c86
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/check_ksm_options.c
@@ -0,0 +1,159 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2020 ARM Limited
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ucontext.h>
+#include <sys/mman.h>
+
+#include "kselftest.h"
+#include "mte_common_util.h"
+#include "mte_def.h"
+
+#define TEST_UNIT	10
+#define PATH_KSM	"/sys/kernel/mm/ksm/"
+#define MAX_LOOP	4
+
+static size_t page_sz;
+static unsigned long ksm_sysfs[5];
+
+static unsigned long read_sysfs(char *str)
+{
+	FILE *f;
+	unsigned long val = 0;
+
+	f = fopen(str, "r");
+	if (!f) {
+		ksft_print_msg("ERR: missing %s\n", str);
+		return 0;
+	}
+	fscanf(f, "%lu", &val);
+	fclose(f);
+	return val;
+}
+
+static void write_sysfs(char *str, unsigned long val)
+{
+	FILE *f;
+
+	f = fopen(str, "w");
+	if (!f) {
+		ksft_print_msg("ERR: missing %s\n", str);
+		return;
+	}
+	fprintf(f, "%lu", val);
+	fclose(f);
+}
+
+static void mte_ksm_setup(void)
+{
+	ksm_sysfs[0] = read_sysfs(PATH_KSM "merge_across_nodes");
+	write_sysfs(PATH_KSM "merge_across_nodes", 1);
+	ksm_sysfs[1] = read_sysfs(PATH_KSM "sleep_millisecs");
+	write_sysfs(PATH_KSM "sleep_millisecs", 0);
+	ksm_sysfs[2] = read_sysfs(PATH_KSM "run");
+	write_sysfs(PATH_KSM "run", 1);
+	ksm_sysfs[3] = read_sysfs(PATH_KSM "max_page_sharing");
+	write_sysfs(PATH_KSM "max_page_sharing", ksm_sysfs[3] + TEST_UNIT);
+	ksm_sysfs[4] = read_sysfs(PATH_KSM "pages_to_scan");
+	write_sysfs(PATH_KSM "pages_to_scan", ksm_sysfs[4] + TEST_UNIT);
+}
+
+static void mte_ksm_restore(void)
+{
+	write_sysfs(PATH_KSM "merge_across_nodes", ksm_sysfs[0]);
+	write_sysfs(PATH_KSM "sleep_millisecs", ksm_sysfs[1]);
+	write_sysfs(PATH_KSM "run", ksm_sysfs[2]);
+	write_sysfs(PATH_KSM "max_page_sharing", ksm_sysfs[3]);
+	write_sysfs(PATH_KSM "pages_to_scan", ksm_sysfs[4]);
+}
+
+static void mte_ksm_scan(void)
+{
+	int cur_count = read_sysfs(PATH_KSM "full_scans");
+	int scan_count = cur_count + 1;
+	int max_loop_count = MAX_LOOP;
+
+	while ((cur_count < scan_count) && max_loop_count) {
+		sleep(1);
+		cur_count = read_sysfs(PATH_KSM "full_scans");
+		max_loop_count--;
+	}
+#ifdef DEBUG
+	ksft_print_msg("INFO: pages_shared=%lu pages_sharing=%lu\n",
+			read_sysfs(PATH_KSM "pages_shared"),
+			read_sysfs(PATH_KSM "pages_sharing"));
+#endif
+}
+
+static int check_madvise_options(int mem_type, int mode, int mapping)
+{
+	char *ptr;
+	int err, ret;
+
+	err = KSFT_FAIL;
+	if (access(PATH_KSM, F_OK) == -1) {
+		ksft_print_msg("ERR: Kernel KSM config not enabled\n");
+		return err;
+	}
+
+	mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+	ptr = mte_allocate_memory(TEST_UNIT * page_sz, mem_type, mapping, true);
+	if (check_allocated_memory(ptr, TEST_UNIT * page_sz, mem_type, false) != KSFT_PASS)
+		return KSFT_FAIL;
+
+	/* Insert same data in all the pages */
+	memset(ptr, 'A', TEST_UNIT * page_sz);
+	ret = madvise(ptr, TEST_UNIT * page_sz, MADV_MERGEABLE);
+	if (ret) {
+		ksft_print_msg("ERR: madvise failed to set MADV_UNMERGEABLE\n");
+		goto madvise_err;
+	}
+	mte_ksm_scan();
+	/* Tagged pages should not merge */
+	if ((read_sysfs(PATH_KSM "pages_shared") < 1) ||
+	    (read_sysfs(PATH_KSM "pages_sharing") < (TEST_UNIT - 1)))
+		err = KSFT_PASS;
+madvise_err:
+	mte_free_memory(ptr, TEST_UNIT * page_sz, mem_type, true);
+	return err;
+}
+
+int main(int argc, char *argv[])
+{
+	int err;
+
+	err = mte_default_setup();
+	if (err)
+		return err;
+	page_sz = getpagesize();
+	if (!page_sz) {
+		ksft_print_msg("ERR: Unable to get page size\n");
+		return KSFT_FAIL;
+	}
+	/* Register signal handlers */
+	mte_register_signal(SIGBUS, mte_default_handler);
+	mte_register_signal(SIGSEGV, mte_default_handler);
+	/* Enable KSM */
+	mte_ksm_setup();
+
+	evaluate_test(check_madvise_options(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE),
+		"Check KSM mte page merge for private mapping, sync mode and mmap memory\n");
+	evaluate_test(check_madvise_options(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE),
+		"Check KSM mte page merge for private mapping, async mode and mmap memory\n");
+	evaluate_test(check_madvise_options(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED),
+		"Check KSM mte page merge for shared mapping, sync mode and mmap memory\n");
+	evaluate_test(check_madvise_options(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED),
+		"Check KSM mte page merge for shared mapping, async mode and mmap memory\n");
+
+	mte_ksm_restore();
+	mte_restore_setup();
+	ksft_print_cnts();
+	return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL;
+}
diff --git a/tools/testing/selftests/arm64/mte/check_mmap_options.c b/tools/testing/selftests/arm64/mte/check_mmap_options.c
new file mode 100644
index 000000000000..33b13b86199b
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/check_mmap_options.c
@@ -0,0 +1,262 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2020 ARM Limited
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ucontext.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "kselftest.h"
+#include "mte_common_util.h"
+#include "mte_def.h"
+
+#define RUNS			(MT_TAG_COUNT)
+#define UNDERFLOW		MT_GRANULE_SIZE
+#define OVERFLOW		MT_GRANULE_SIZE
+#define TAG_CHECK_ON		0
+#define TAG_CHECK_OFF		1
+
+static size_t page_size;
+static int sizes[] = {
+	1, 537, 989, 1269, MT_GRANULE_SIZE - 1, MT_GRANULE_SIZE,
+	/* page size - 1*/ 0, /* page_size */ 0, /* page size + 1 */ 0
+};
+
+static int check_mte_memory(char *ptr, int size, int mode, int tag_check)
+{
+	mte_initialize_current_context(mode, (uintptr_t)ptr, size);
+	memset(ptr, '1', size);
+	mte_wait_after_trig();
+	if (cur_mte_cxt.fault_valid == true)
+		return KSFT_FAIL;
+
+	mte_initialize_current_context(mode, (uintptr_t)ptr, -UNDERFLOW);
+	memset(ptr - UNDERFLOW, '2', UNDERFLOW);
+	mte_wait_after_trig();
+	if (cur_mte_cxt.fault_valid == false && tag_check == TAG_CHECK_ON)
+		return KSFT_FAIL;
+	if (cur_mte_cxt.fault_valid == true && tag_check == TAG_CHECK_OFF)
+		return KSFT_FAIL;
+
+	mte_initialize_current_context(mode, (uintptr_t)ptr, size + OVERFLOW);
+	memset(ptr + size, '3', OVERFLOW);
+	mte_wait_after_trig();
+	if (cur_mte_cxt.fault_valid == false && tag_check == TAG_CHECK_ON)
+		return KSFT_FAIL;
+	if (cur_mte_cxt.fault_valid == true && tag_check == TAG_CHECK_OFF)
+		return KSFT_FAIL;
+
+	return KSFT_PASS;
+}
+
+static int check_anonymous_memory_mapping(int mem_type, int mode, int mapping, int tag_check)
+{
+	char *ptr, *map_ptr;
+	int run, result, map_size;
+	int item = sizeof(sizes)/sizeof(int);
+
+	item = sizeof(sizes)/sizeof(int);
+	mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+	for (run = 0; run < item; run++) {
+		map_size = sizes[run] + OVERFLOW + UNDERFLOW;
+		map_ptr = (char *)mte_allocate_memory(map_size, mem_type, mapping, false);
+		if (check_allocated_memory(map_ptr, map_size, mem_type, false) != KSFT_PASS)
+			return KSFT_FAIL;
+
+		ptr = map_ptr + UNDERFLOW;
+		mte_initialize_current_context(mode, (uintptr_t)ptr, sizes[run]);
+		/* Only mte enabled memory will allow tag insertion */
+		ptr = mte_insert_tags((void *)ptr, sizes[run]);
+		if (!ptr || cur_mte_cxt.fault_valid == true) {
+			ksft_print_msg("FAIL: Insert tags on anonymous mmap memory\n");
+			munmap((void *)map_ptr, map_size);
+			return KSFT_FAIL;
+		}
+		result = check_mte_memory(ptr, sizes[run], mode, tag_check);
+		mte_clear_tags((void *)ptr, sizes[run]);
+		mte_free_memory((void *)map_ptr, map_size, mem_type, false);
+		if (result == KSFT_FAIL)
+			return KSFT_FAIL;
+	}
+	return KSFT_PASS;
+}
+
+static int check_file_memory_mapping(int mem_type, int mode, int mapping, int tag_check)
+{
+	char *ptr, *map_ptr;
+	int run, fd, map_size;
+	int total = sizeof(sizes)/sizeof(int);
+	int result = KSFT_PASS;
+
+	mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+	for (run = 0; run < total; run++) {
+		fd = create_temp_file();
+		if (fd == -1)
+			return KSFT_FAIL;
+
+		map_size = sizes[run] + UNDERFLOW + OVERFLOW;
+		map_ptr = (char *)mte_allocate_file_memory(map_size, mem_type, mapping, false, fd);
+		if (check_allocated_memory(map_ptr, map_size, mem_type, false) != KSFT_PASS) {
+			close(fd);
+			return KSFT_FAIL;
+		}
+		ptr = map_ptr + UNDERFLOW;
+		mte_initialize_current_context(mode, (uintptr_t)ptr, sizes[run]);
+		/* Only mte enabled memory will allow tag insertion */
+		ptr = mte_insert_tags((void *)ptr, sizes[run]);
+		if (!ptr || cur_mte_cxt.fault_valid == true) {
+			ksft_print_msg("FAIL: Insert tags on file based memory\n");
+			munmap((void *)map_ptr, map_size);
+			close(fd);
+			return KSFT_FAIL;
+		}
+		result = check_mte_memory(ptr, sizes[run], mode, tag_check);
+		mte_clear_tags((void *)ptr, sizes[run]);
+		munmap((void *)map_ptr, map_size);
+		close(fd);
+		if (result == KSFT_FAIL)
+			break;
+	}
+	return result;
+}
+
+static int check_clear_prot_mte_flag(int mem_type, int mode, int mapping)
+{
+	char *ptr, *map_ptr;
+	int run, prot_flag, result, fd, map_size;
+	int total = sizeof(sizes)/sizeof(int);
+
+	prot_flag = PROT_READ | PROT_WRITE;
+	mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+	for (run = 0; run < total; run++) {
+		map_size = sizes[run] + OVERFLOW + UNDERFLOW;
+		ptr = (char *)mte_allocate_memory_tag_range(sizes[run], mem_type, mapping,
+							    UNDERFLOW, OVERFLOW);
+		if (check_allocated_memory_range(ptr, sizes[run], mem_type,
+						 UNDERFLOW, OVERFLOW) != KSFT_PASS)
+			return KSFT_FAIL;
+		map_ptr = ptr - UNDERFLOW;
+		/* Try to clear PROT_MTE property and verify it by tag checking */
+		if (mprotect(map_ptr, map_size, prot_flag)) {
+			mte_free_memory_tag_range((void *)ptr, sizes[run], mem_type,
+						  UNDERFLOW, OVERFLOW);
+			ksft_print_msg("FAIL: mprotect not ignoring clear PROT_MTE property\n");
+			return KSFT_FAIL;
+		}
+		result = check_mte_memory(ptr, sizes[run], mode, TAG_CHECK_ON);
+		mte_free_memory_tag_range((void *)ptr, sizes[run], mem_type, UNDERFLOW, OVERFLOW);
+		if (result != KSFT_PASS)
+			return KSFT_FAIL;
+
+		fd = create_temp_file();
+		if (fd == -1)
+			return KSFT_FAIL;
+		ptr = (char *)mte_allocate_file_memory_tag_range(sizes[run], mem_type, mapping,
+								 UNDERFLOW, OVERFLOW, fd);
+		if (check_allocated_memory_range(ptr, sizes[run], mem_type,
+						 UNDERFLOW, OVERFLOW) != KSFT_PASS) {
+			close(fd);
+			return KSFT_FAIL;
+		}
+		map_ptr = ptr - UNDERFLOW;
+		/* Try to clear PROT_MTE property and verify it by tag checking */
+		if (mprotect(map_ptr, map_size, prot_flag)) {
+			ksft_print_msg("FAIL: mprotect not ignoring clear PROT_MTE property\n");
+			mte_free_memory_tag_range((void *)ptr, sizes[run], mem_type,
+						  UNDERFLOW, OVERFLOW);
+			close(fd);
+			return KSFT_FAIL;
+		}
+		result = check_mte_memory(ptr, sizes[run], mode, TAG_CHECK_ON);
+		mte_free_memory_tag_range((void *)ptr, sizes[run], mem_type, UNDERFLOW, OVERFLOW);
+		close(fd);
+		if (result != KSFT_PASS)
+			return KSFT_FAIL;
+	}
+	return KSFT_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+	int err;
+	int item = sizeof(sizes)/sizeof(int);
+
+	err = mte_default_setup();
+	if (err)
+		return err;
+	page_size = getpagesize();
+	if (!page_size) {
+		ksft_print_msg("ERR: Unable to get page size\n");
+		return KSFT_FAIL;
+	}
+	sizes[item - 3] = page_size - 1;
+	sizes[item - 2] = page_size;
+	sizes[item - 1] = page_size + 1;
+
+	/* Register signal handlers */
+	mte_register_signal(SIGBUS, mte_default_handler);
+	mte_register_signal(SIGSEGV, mte_default_handler);
+
+	mte_enable_pstate_tco();
+	evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_OFF),
+	"Check anonymous memory with private mapping, sync error mode, mmap memory and tag check off\n");
+	evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_OFF),
+	"Check file memory with private mapping, sync error mode, mmap/mprotect memory and tag check off\n");
+
+	mte_disable_pstate_tco();
+	evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_NONE_ERR, MAP_PRIVATE, TAG_CHECK_OFF),
+	"Check anonymous memory with private mapping, no error mode, mmap memory and tag check off\n");
+	evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_NONE_ERR, MAP_PRIVATE, TAG_CHECK_OFF),
+	"Check file memory with private mapping, no error mode, mmap/mprotect memory and tag check off\n");
+
+	evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
+	"Check anonymous memory with private mapping, sync error mode, mmap memory and tag check on\n");
+	evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
+	"Check anonymous memory with private mapping, sync error mode, mmap/mprotect memory and tag check on\n");
+	evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
+	"Check anonymous memory with shared mapping, sync error mode, mmap memory and tag check on\n");
+	evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
+	"Check anonymous memory with shared mapping, sync error mode, mmap/mprotect memory and tag check on\n");
+	evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
+	"Check anonymous memory with private mapping, async error mode, mmap memory and tag check on\n");
+	evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
+	"Check anonymous memory with private mapping, async error mode, mmap/mprotect memory and tag check on\n");
+	evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
+	"Check anonymous memory with shared mapping, async error mode, mmap memory and tag check on\n");
+	evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
+	"Check anonymous memory with shared mapping, async error mode, mmap/mprotect memory and tag check on\n");
+
+	evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
+	"Check file memory with private mapping, sync error mode, mmap memory and tag check on\n");
+	evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
+	"Check file memory with private mapping, sync error mode, mmap/mprotect memory and tag check on\n");
+	evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
+	"Check file memory with shared mapping, sync error mode, mmap memory and tag check on\n");
+	evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
+	"Check file memory with shared mapping, sync error mode, mmap/mprotect memory and tag check on\n");
+	evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
+	"Check file memory with private mapping, async error mode, mmap memory and tag check on\n");
+	evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
+	"Check file memory with private mapping, async error mode, mmap/mprotect memory and tag check on\n");
+	evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
+	"Check file memory with shared mapping, async error mode, mmap memory and tag check on\n");
+	evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
+	"Check file memory with shared mapping, async error mode, mmap/mprotect memory and tag check on\n");
+
+	evaluate_test(check_clear_prot_mte_flag(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE),
+	"Check clear PROT_MTE flags with private mapping, sync error mode and mmap memory\n");
+	evaluate_test(check_clear_prot_mte_flag(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE),
+	"Check clear PROT_MTE flags with private mapping and sync error mode and mmap/mprotect memory\n");
+
+	mte_restore_setup();
+	ksft_print_cnts();
+	return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL;
+}
diff --git a/tools/testing/selftests/arm64/mte/check_tags_inclusion.c b/tools/testing/selftests/arm64/mte/check_tags_inclusion.c
new file mode 100644
index 000000000000..94d245a0ed56
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/check_tags_inclusion.c
@@ -0,0 +1,185 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2020 ARM Limited
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ucontext.h>
+#include <sys/wait.h>
+
+#include "kselftest.h"
+#include "mte_common_util.h"
+#include "mte_def.h"
+
+#define BUFFER_SIZE		(5 * MT_GRANULE_SIZE)
+#define RUNS			(MT_TAG_COUNT * 2)
+#define MTE_LAST_TAG_MASK	(0x7FFF)
+
+static int verify_mte_pointer_validity(char *ptr, int mode)
+{
+	mte_initialize_current_context(mode, (uintptr_t)ptr, BUFFER_SIZE);
+	/* Check the validity of the tagged pointer */
+	memset((void *)ptr, '1', BUFFER_SIZE);
+	mte_wait_after_trig();
+	if (cur_mte_cxt.fault_valid)
+		return KSFT_FAIL;
+	/* Proceed further for nonzero tags */
+	if (!MT_FETCH_TAG((uintptr_t)ptr))
+		return KSFT_PASS;
+	mte_initialize_current_context(mode, (uintptr_t)ptr, BUFFER_SIZE + 1);
+	/* Check the validity outside the range */
+	ptr[BUFFER_SIZE] = '2';
+	mte_wait_after_trig();
+	if (!cur_mte_cxt.fault_valid)
+		return KSFT_FAIL;
+	else
+		return KSFT_PASS;
+}
+
+static int check_single_included_tags(int mem_type, int mode)
+{
+	char *ptr;
+	int tag, run, result = KSFT_PASS;
+
+	ptr = (char *)mte_allocate_memory(BUFFER_SIZE + MT_GRANULE_SIZE, mem_type, 0, false);
+	if (check_allocated_memory(ptr, BUFFER_SIZE + MT_GRANULE_SIZE,
+				   mem_type, false) != KSFT_PASS)
+		return KSFT_FAIL;
+
+	for (tag = 0; (tag < MT_TAG_COUNT) && (result == KSFT_PASS); tag++) {
+		mte_switch_mode(mode, MT_INCLUDE_VALID_TAG(tag));
+		/* Try to catch a excluded tag by a number of tries. */
+		for (run = 0; (run < RUNS) && (result == KSFT_PASS); run++) {
+			ptr = (char *)mte_insert_tags(ptr, BUFFER_SIZE);
+			/* Check tag value */
+			if (MT_FETCH_TAG((uintptr_t)ptr) == tag) {
+				ksft_print_msg("FAIL: wrong tag = 0x%x with include mask=0x%x\n",
+					       MT_FETCH_TAG((uintptr_t)ptr),
+					       MT_INCLUDE_VALID_TAG(tag));
+				result = KSFT_FAIL;
+				break;
+			}
+			result = verify_mte_pointer_validity(ptr, mode);
+		}
+	}
+	mte_free_memory_tag_range((void *)ptr, BUFFER_SIZE, mem_type, 0, MT_GRANULE_SIZE);
+	return result;
+}
+
+static int check_multiple_included_tags(int mem_type, int mode)
+{
+	char *ptr;
+	int tag, run, result = KSFT_PASS;
+	unsigned long excl_mask = 0;
+
+	ptr = (char *)mte_allocate_memory(BUFFER_SIZE + MT_GRANULE_SIZE, mem_type, 0, false);
+	if (check_allocated_memory(ptr, BUFFER_SIZE + MT_GRANULE_SIZE,
+				   mem_type, false) != KSFT_PASS)
+		return KSFT_FAIL;
+
+	for (tag = 0; (tag < MT_TAG_COUNT - 1) && (result == KSFT_PASS); tag++) {
+		excl_mask |= 1 << tag;
+		mte_switch_mode(mode, MT_INCLUDE_VALID_TAGS(excl_mask));
+		/* Try to catch a excluded tag by a number of tries. */
+		for (run = 0; (run < RUNS) && (result == KSFT_PASS); run++) {
+			ptr = (char *)mte_insert_tags(ptr, BUFFER_SIZE);
+			/* Check tag value */
+			if (MT_FETCH_TAG((uintptr_t)ptr) < tag) {
+				ksft_print_msg("FAIL: wrong tag = 0x%x with include mask=0x%x\n",
+					       MT_FETCH_TAG((uintptr_t)ptr),
+					       MT_INCLUDE_VALID_TAGS(excl_mask));
+				result = KSFT_FAIL;
+				break;
+			}
+			result = verify_mte_pointer_validity(ptr, mode);
+		}
+	}
+	mte_free_memory_tag_range((void *)ptr, BUFFER_SIZE, mem_type, 0, MT_GRANULE_SIZE);
+	return result;
+}
+
+static int check_all_included_tags(int mem_type, int mode)
+{
+	char *ptr;
+	int run, result = KSFT_PASS;
+
+	ptr = (char *)mte_allocate_memory(BUFFER_SIZE + MT_GRANULE_SIZE, mem_type, 0, false);
+	if (check_allocated_memory(ptr, BUFFER_SIZE + MT_GRANULE_SIZE,
+				   mem_type, false) != KSFT_PASS)
+		return KSFT_FAIL;
+
+	mte_switch_mode(mode, MT_INCLUDE_TAG_MASK);
+	/* Try to catch a excluded tag by a number of tries. */
+	for (run = 0; (run < RUNS) && (result == KSFT_PASS); run++) {
+		ptr = (char *)mte_insert_tags(ptr, BUFFER_SIZE);
+		/*
+		 * Here tag byte can be between 0x0 to 0xF (full allowed range)
+		 * so no need to match so just verify if it is writable.
+		 */
+		result = verify_mte_pointer_validity(ptr, mode);
+	}
+	mte_free_memory_tag_range((void *)ptr, BUFFER_SIZE, mem_type, 0, MT_GRANULE_SIZE);
+	return result;
+}
+
+static int check_none_included_tags(int mem_type, int mode)
+{
+	char *ptr;
+	int run;
+
+	ptr = (char *)mte_allocate_memory(BUFFER_SIZE, mem_type, 0, false);
+	if (check_allocated_memory(ptr, BUFFER_SIZE, mem_type, false) != KSFT_PASS)
+		return KSFT_FAIL;
+
+	mte_switch_mode(mode, MT_EXCLUDE_TAG_MASK);
+	/* Try to catch a excluded tag by a number of tries. */
+	for (run = 0; run < RUNS; run++) {
+		ptr = (char *)mte_insert_tags(ptr, BUFFER_SIZE);
+		/* Here all tags exluded so tag value generated should be 0 */
+		if (MT_FETCH_TAG((uintptr_t)ptr)) {
+			ksft_print_msg("FAIL: included tag value found\n");
+			mte_free_memory((void *)ptr, BUFFER_SIZE, mem_type, true);
+			return KSFT_FAIL;
+		}
+		mte_initialize_current_context(mode, (uintptr_t)ptr, BUFFER_SIZE);
+		/* Check the write validity of the untagged pointer */
+		memset((void *)ptr, '1', BUFFER_SIZE);
+		mte_wait_after_trig();
+		if (cur_mte_cxt.fault_valid)
+			break;
+	}
+	mte_free_memory((void *)ptr, BUFFER_SIZE, mem_type, false);
+	if (cur_mte_cxt.fault_valid)
+		return KSFT_FAIL;
+	else
+		return KSFT_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+	int err;
+
+	err = mte_default_setup();
+	if (err)
+		return err;
+
+	/* Register SIGSEGV handler */
+	mte_register_signal(SIGSEGV, mte_default_handler);
+
+	evaluate_test(check_single_included_tags(USE_MMAP, MTE_SYNC_ERR),
+		      "Check an included tag value with sync mode\n");
+	evaluate_test(check_multiple_included_tags(USE_MMAP, MTE_SYNC_ERR),
+		      "Check different included tags value with sync mode\n");
+	evaluate_test(check_none_included_tags(USE_MMAP, MTE_SYNC_ERR),
+		      "Check none included tags value with sync mode\n");
+	evaluate_test(check_all_included_tags(USE_MMAP, MTE_SYNC_ERR),
+		      "Check all included tags value with sync mode\n");
+
+	mte_restore_setup();
+	ksft_print_cnts();
+	return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL;
+}
diff --git a/tools/testing/selftests/arm64/mte/check_user_mem.c b/tools/testing/selftests/arm64/mte/check_user_mem.c
new file mode 100644
index 000000000000..594e98e76880
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/check_user_mem.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2020 ARM Limited
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <ucontext.h>
+#include <unistd.h>
+#include <sys/mman.h>
+
+#include "kselftest.h"
+#include "mte_common_util.h"
+#include "mte_def.h"
+
+static size_t page_sz;
+
+static int check_usermem_access_fault(int mem_type, int mode, int mapping)
+{
+	int fd, i, err;
+	char val = 'A';
+	size_t len, read_len;
+	void *ptr, *ptr_next;
+
+	err = KSFT_FAIL;
+	len = 2 * page_sz;
+	mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+	fd = create_temp_file();
+	if (fd == -1)
+		return KSFT_FAIL;
+	for (i = 0; i < len; i++)
+		write(fd, &val, sizeof(val));
+	lseek(fd, 0, 0);
+	ptr = mte_allocate_memory(len, mem_type, mapping, true);
+	if (check_allocated_memory(ptr, len, mem_type, true) != KSFT_PASS) {
+		close(fd);
+		return KSFT_FAIL;
+	}
+	mte_initialize_current_context(mode, (uintptr_t)ptr, len);
+	/* Copy from file into buffer with valid tag */
+	read_len = read(fd, ptr, len);
+	mte_wait_after_trig();
+	if (cur_mte_cxt.fault_valid || read_len < len)
+		goto usermem_acc_err;
+	/* Verify same pattern is read */
+	for (i = 0; i < len; i++)
+		if (*(char *)(ptr + i) != val)
+			break;
+	if (i < len)
+		goto usermem_acc_err;
+
+	/* Tag the next half of memory with different value */
+	ptr_next = (void *)((unsigned long)ptr + page_sz);
+	ptr_next = mte_insert_new_tag(ptr_next);
+	mte_set_tag_address_range(ptr_next, page_sz);
+
+	lseek(fd, 0, 0);
+	/* Copy from file into buffer with invalid tag */
+	read_len = read(fd, ptr, len);
+	mte_wait_after_trig();
+	/*
+	 * Accessing user memory in kernel with invalid tag should fail in sync
+	 * mode without fault but may not fail in async mode as per the
+	 * implemented MTE userspace support in Arm64 kernel.
+	 */
+	if (mode == MTE_SYNC_ERR &&
+	    !cur_mte_cxt.fault_valid && read_len < len) {
+		err = KSFT_PASS;
+	} else if (mode == MTE_ASYNC_ERR &&
+		   !cur_mte_cxt.fault_valid && read_len == len) {
+		err = KSFT_PASS;
+	}
+usermem_acc_err:
+	mte_free_memory((void *)ptr, len, mem_type, true);
+	close(fd);
+	return err;
+}
+
+int main(int argc, char *argv[])
+{
+	int err;
+
+	page_sz = getpagesize();
+	if (!page_sz) {
+		ksft_print_msg("ERR: Unable to get page size\n");
+		return KSFT_FAIL;
+	}
+	err = mte_default_setup();
+	if (err)
+		return err;
+	/* Register signal handlers */
+	mte_register_signal(SIGSEGV, mte_default_handler);
+
+	evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE),
+		"Check memory access from kernel in sync mode, private mapping and mmap memory\n");
+	evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED),
+		"Check memory access from kernel in sync mode, shared mapping and mmap memory\n");
+
+	evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE),
+		"Check memory access from kernel in async mode, private mapping and mmap memory\n");
+	evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED),
+		"Check memory access from kernel in async mode, shared mapping and mmap memory\n");
+
+	mte_restore_setup();
+	ksft_print_cnts();
+	return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL;
+}
diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.c b/tools/testing/selftests/arm64/mte/mte_common_util.c
new file mode 100644
index 000000000000..39f8908988ea
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/mte_common_util.c
@@ -0,0 +1,341 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2020 ARM Limited
+
+#include <fcntl.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <linux/auxvec.h>
+#include <sys/auxv.h>
+#include <sys/mman.h>
+#include <sys/prctl.h>
+
+#include <asm/hwcap.h>
+
+#include "kselftest.h"
+#include "mte_common_util.h"
+#include "mte_def.h"
+
+#define INIT_BUFFER_SIZE       256
+
+struct mte_fault_cxt cur_mte_cxt;
+static unsigned int mte_cur_mode;
+static unsigned int mte_cur_pstate_tco;
+
+void mte_default_handler(int signum, siginfo_t *si, void *uc)
+{
+	unsigned long addr = (unsigned long)si->si_addr;
+
+	if (signum == SIGSEGV) {
+#ifdef DEBUG
+		ksft_print_msg("INFO: SIGSEGV signal at pc=%lx, fault addr=%lx, si_code=%lx\n",
+				((ucontext_t *)uc)->uc_mcontext.pc, addr, si->si_code);
+#endif
+		if (si->si_code == SEGV_MTEAERR) {
+			if (cur_mte_cxt.trig_si_code == si->si_code)
+				cur_mte_cxt.fault_valid = true;
+			return;
+		}
+		/* Compare the context for precise error */
+		else if (si->si_code == SEGV_MTESERR) {
+			if (cur_mte_cxt.trig_si_code == si->si_code &&
+			    ((cur_mte_cxt.trig_range >= 0 &&
+			      addr >= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) &&
+			      addr <= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)) ||
+			     (cur_mte_cxt.trig_range < 0 &&
+			      addr <= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) &&
+			      addr >= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)))) {
+				cur_mte_cxt.fault_valid = true;
+				/* Adjust the pc by 4 */
+				((ucontext_t *)uc)->uc_mcontext.pc += 4;
+			} else {
+				ksft_print_msg("Invalid MTE synchronous exception caught!\n");
+				exit(1);
+			}
+		} else {
+			ksft_print_msg("Unknown SIGSEGV exception caught!\n");
+			exit(1);
+		}
+	} else if (signum == SIGBUS) {
+		ksft_print_msg("INFO: SIGBUS signal at pc=%lx, fault addr=%lx, si_code=%lx\n",
+				((ucontext_t *)uc)->uc_mcontext.pc, addr, si->si_code);
+		if ((cur_mte_cxt.trig_range >= 0 &&
+		     addr >= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) &&
+		     addr <= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)) ||
+		    (cur_mte_cxt.trig_range < 0 &&
+		     addr <= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) &&
+		     addr >= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range))) {
+			cur_mte_cxt.fault_valid = true;
+			/* Adjust the pc by 4 */
+			((ucontext_t *)uc)->uc_mcontext.pc += 4;
+		}
+	}
+}
+
+void mte_register_signal(int signal, void (*handler)(int, siginfo_t *, void *))
+{
+	struct sigaction sa;
+
+	sa.sa_sigaction = handler;
+	sa.sa_flags = SA_SIGINFO;
+	sigemptyset(&sa.sa_mask);
+	sigaction(signal, &sa, NULL);
+}
+
+void mte_wait_after_trig(void)
+{
+	sched_yield();
+}
+
+void *mte_insert_tags(void *ptr, size_t size)
+{
+	void *tag_ptr;
+	int align_size;
+
+	if (!ptr || (unsigned long)(ptr) & MT_ALIGN_GRANULE) {
+		ksft_print_msg("FAIL: Addr=%lx: invalid\n", ptr);
+		return NULL;
+	}
+	align_size = MT_ALIGN_UP(size);
+	tag_ptr = mte_insert_random_tag(ptr);
+	mte_set_tag_address_range(tag_ptr, align_size);
+	return tag_ptr;
+}
+
+void mte_clear_tags(void *ptr, size_t size)
+{
+	if (!ptr || (unsigned long)(ptr) & MT_ALIGN_GRANULE) {
+		ksft_print_msg("FAIL: Addr=%lx: invalid\n", ptr);
+		return;
+	}
+	size = MT_ALIGN_UP(size);
+	ptr = (void *)MT_CLEAR_TAG((unsigned long)ptr);
+	mte_clear_tag_address_range(ptr, size);
+}
+
+static void *__mte_allocate_memory_range(size_t size, int mem_type, int mapping,
+					 size_t range_before, size_t range_after,
+					 bool tags, int fd)
+{
+	void *ptr;
+	int prot_flag, map_flag;
+	size_t entire_size = size + range_before + range_after;
+
+	if (mem_type != USE_MALLOC && mem_type != USE_MMAP &&
+	    mem_type != USE_MPROTECT) {
+		ksft_print_msg("FAIL: Invalid allocate request\n");
+		return NULL;
+	}
+	if (mem_type == USE_MALLOC)
+		return malloc(entire_size) + range_before;
+
+	prot_flag = PROT_READ | PROT_WRITE;
+	if (mem_type == USE_MMAP)
+		prot_flag |= PROT_MTE;
+
+	map_flag = mapping;
+	if (fd == -1)
+		map_flag = MAP_ANONYMOUS | map_flag;
+	if (!(mapping & MAP_SHARED))
+		map_flag |= MAP_PRIVATE;
+	ptr = mmap(NULL, entire_size, prot_flag, map_flag, fd, 0);
+	if (ptr == MAP_FAILED) {
+		ksft_print_msg("FAIL: mmap allocation\n");
+		return NULL;
+	}
+	if (mem_type == USE_MPROTECT) {
+		if (mprotect(ptr, entire_size, prot_flag | PROT_MTE)) {
+			munmap(ptr, size);
+			ksft_print_msg("FAIL: mprotect PROT_MTE property\n");
+			return NULL;
+		}
+	}
+	if (tags)
+		ptr = mte_insert_tags(ptr + range_before, size);
+	return ptr;
+}
+
+void *mte_allocate_memory_tag_range(size_t size, int mem_type, int mapping,
+				    size_t range_before, size_t range_after)
+{
+	return __mte_allocate_memory_range(size, mem_type, mapping, range_before,
+					   range_after, true, -1);
+}
+
+void *mte_allocate_memory(size_t size, int mem_type, int mapping, bool tags)
+{
+	return __mte_allocate_memory_range(size, mem_type, mapping, 0, 0, tags, -1);
+}
+
+void *mte_allocate_file_memory(size_t size, int mem_type, int mapping, bool tags, int fd)
+{
+	int index;
+	char buffer[INIT_BUFFER_SIZE];
+
+	if (mem_type != USE_MPROTECT && mem_type != USE_MMAP) {
+		ksft_print_msg("FAIL: Invalid mmap file request\n");
+		return NULL;
+	}
+	/* Initialize the file for mappable size */
+	lseek(fd, 0, SEEK_SET);
+	for (index = INIT_BUFFER_SIZE; index < size; index += INIT_BUFFER_SIZE)
+		write(fd, buffer, INIT_BUFFER_SIZE);
+	index -= INIT_BUFFER_SIZE;
+	write(fd, buffer, size - index);
+	return __mte_allocate_memory_range(size, mem_type, mapping, 0, 0, tags, fd);
+}
+
+void *mte_allocate_file_memory_tag_range(size_t size, int mem_type, int mapping,
+					 size_t range_before, size_t range_after, int fd)
+{
+	int index;
+	char buffer[INIT_BUFFER_SIZE];
+	int map_size = size + range_before + range_after;
+
+	if (mem_type != USE_MPROTECT && mem_type != USE_MMAP) {
+		ksft_print_msg("FAIL: Invalid mmap file request\n");
+		return NULL;
+	}
+	/* Initialize the file for mappable size */
+	lseek(fd, 0, SEEK_SET);
+	for (index = INIT_BUFFER_SIZE; index < map_size; index += INIT_BUFFER_SIZE)
+		write(fd, buffer, INIT_BUFFER_SIZE);
+	index -= INIT_BUFFER_SIZE;
+	write(fd, buffer, map_size - index);
+	return __mte_allocate_memory_range(size, mem_type, mapping, range_before,
+					   range_after, true, fd);
+}
+
+static void __mte_free_memory_range(void *ptr, size_t size, int mem_type,
+				    size_t range_before, size_t range_after, bool tags)
+{
+	switch (mem_type) {
+	case USE_MALLOC:
+		free(ptr - range_before);
+		break;
+	case USE_MMAP:
+	case USE_MPROTECT:
+		if (tags)
+			mte_clear_tags(ptr, size);
+		munmap(ptr - range_before, size + range_before + range_after);
+		break;
+	default:
+		ksft_print_msg("FAIL: Invalid free request\n");
+		break;
+	}
+}
+
+void mte_free_memory_tag_range(void *ptr, size_t size, int mem_type,
+			       size_t range_before, size_t range_after)
+{
+	__mte_free_memory_range(ptr, size, mem_type, range_before, range_after, true);
+}
+
+void mte_free_memory(void *ptr, size_t size, int mem_type, bool tags)
+{
+	__mte_free_memory_range(ptr, size, mem_type, 0, 0, tags);
+}
+
+void mte_initialize_current_context(int mode, uintptr_t ptr, ssize_t range)
+{
+	cur_mte_cxt.fault_valid = false;
+	cur_mte_cxt.trig_addr = ptr;
+	cur_mte_cxt.trig_range = range;
+	if (mode == MTE_SYNC_ERR)
+		cur_mte_cxt.trig_si_code = SEGV_MTESERR;
+	else if (mode == MTE_ASYNC_ERR)
+		cur_mte_cxt.trig_si_code = SEGV_MTEAERR;
+	else
+		cur_mte_cxt.trig_si_code = 0;
+}
+
+int mte_switch_mode(int mte_option, unsigned long incl_mask)
+{
+	unsigned long en = 0;
+
+	if (!(mte_option == MTE_SYNC_ERR || mte_option == MTE_ASYNC_ERR ||
+	      mte_option == MTE_NONE_ERR || incl_mask <= MTE_ALLOW_NON_ZERO_TAG)) {
+		ksft_print_msg("FAIL: Invalid mte config option\n");
+		return -EINVAL;
+	}
+	en = PR_TAGGED_ADDR_ENABLE;
+	if (mte_option == MTE_SYNC_ERR)
+		en |= PR_MTE_TCF_SYNC;
+	else if (mte_option == MTE_ASYNC_ERR)
+		en |= PR_MTE_TCF_ASYNC;
+	else if (mte_option == MTE_NONE_ERR)
+		en |= PR_MTE_TCF_NONE;
+
+	en |= (incl_mask << PR_MTE_TAG_SHIFT);
+	/* Enable address tagging ABI, mte error reporting mode and tag inclusion mask. */
+	if (!prctl(PR_SET_TAGGED_ADDR_CTRL, en, 0, 0, 0) == 0) {
+		ksft_print_msg("FAIL:prctl PR_SET_TAGGED_ADDR_CTRL for mte mode\n");
+		return -EINVAL;
+	}
+	return 0;
+}
+
+#define ID_AA64PFR1_MTE_SHIFT		8
+#define ID_AA64PFR1_MTE			2
+
+int mte_default_setup(void)
+{
+	unsigned long hwcaps = getauxval(AT_HWCAP);
+	unsigned long en = 0;
+	int ret;
+
+	if (!(hwcaps & HWCAP_CPUID)) {
+		ksft_print_msg("FAIL: CPUID registers unavailable\n");
+		return KSFT_FAIL;
+	}
+	/* Read ID_AA64PFR1_EL1 register */
+	asm volatile("mrs %0, id_aa64pfr1_el1" : "=r"(hwcaps) : : "memory");
+	if (((hwcaps >> ID_AA64PFR1_MTE_SHIFT) & MT_TAG_MASK) != ID_AA64PFR1_MTE) {
+		ksft_print_msg("FAIL: MTE features unavailable\n");
+		return KSFT_SKIP;
+	}
+	/* Get current mte mode */
+	ret = prctl(PR_GET_TAGGED_ADDR_CTRL, en, 0, 0, 0);
+	if (ret < 0) {
+		ksft_print_msg("FAIL:prctl PR_GET_TAGGED_ADDR_CTRL with error =%d\n", ret);
+		return KSFT_FAIL;
+	}
+	if (ret & PR_MTE_TCF_SYNC)
+		mte_cur_mode = MTE_SYNC_ERR;
+	else if (ret & PR_MTE_TCF_ASYNC)
+		mte_cur_mode = MTE_ASYNC_ERR;
+	else if (ret & PR_MTE_TCF_NONE)
+		mte_cur_mode = MTE_NONE_ERR;
+
+	mte_cur_pstate_tco = mte_get_pstate_tco();
+	/* Disable PSTATE.TCO */
+	mte_disable_pstate_tco();
+	return 0;
+}
+
+void mte_restore_setup(void)
+{
+	mte_switch_mode(mte_cur_mode, MTE_ALLOW_NON_ZERO_TAG);
+	if (mte_cur_pstate_tco == MT_PSTATE_TCO_EN)
+		mte_enable_pstate_tco();
+	else if (mte_cur_pstate_tco == MT_PSTATE_TCO_DIS)
+		mte_disable_pstate_tco();
+}
+
+int create_temp_file(void)
+{
+	int fd;
+	char filename[] = "/dev/shm/tmp_XXXXXX";
+
+	/* Create a file in the tmpfs filesystem */
+	fd = mkstemp(&filename[0]);
+	if (fd == -1) {
+		ksft_print_msg("FAIL: Unable to open temporary file\n");
+		return 0;
+	}
+	unlink(&filename[0]);
+	return fd;
+}
diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.h b/tools/testing/selftests/arm64/mte/mte_common_util.h
new file mode 100644
index 000000000000..195a7d1879e6
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/mte_common_util.h
@@ -0,0 +1,118 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2020 ARM Limited */
+
+#ifndef _MTE_COMMON_UTIL_H
+#define _MTE_COMMON_UTIL_H
+
+#include <signal.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <sys/auxv.h>
+#include <sys/mman.h>
+#include <sys/prctl.h>
+#include "mte_def.h"
+#include "kselftest.h"
+
+enum mte_mem_type {
+	USE_MALLOC,
+	USE_MMAP,
+	USE_MPROTECT,
+};
+
+enum mte_mode {
+	MTE_NONE_ERR,
+	MTE_SYNC_ERR,
+	MTE_ASYNC_ERR,
+};
+
+struct mte_fault_cxt {
+	/* Address start which triggers mte tag fault */
+	unsigned long trig_addr;
+	/* Address range for mte tag fault and negative value means underflow */
+	ssize_t trig_range;
+	/* siginfo si code */
+	unsigned long trig_si_code;
+	/* Flag to denote if correct fault caught */
+	bool fault_valid;
+};
+
+extern struct mte_fault_cxt cur_mte_cxt;
+
+/* MTE utility functions */
+void mte_default_handler(int signum, siginfo_t *si, void *uc);
+void mte_register_signal(int signal, void (*handler)(int, siginfo_t *, void *));
+void mte_wait_after_trig(void);
+void *mte_allocate_memory(size_t size, int mem_type, int mapping, bool tags);
+void *mte_allocate_memory_tag_range(size_t size, int mem_type, int mapping,
+				    size_t range_before, size_t range_after);
+void *mte_allocate_file_memory(size_t size, int mem_type, int mapping,
+			       bool tags, int fd);
+void *mte_allocate_file_memory_tag_range(size_t size, int mem_type, int mapping,
+					 size_t range_before, size_t range_after, int fd);
+void mte_free_memory(void *ptr, size_t size, int mem_type, bool tags);
+void mte_free_memory_tag_range(void *ptr, size_t size, int mem_type,
+			       size_t range_before, size_t range_after);
+void *mte_insert_tags(void *ptr, size_t size);
+void mte_clear_tags(void *ptr, size_t size);
+int mte_default_setup(void);
+void mte_restore_setup(void);
+int mte_switch_mode(int mte_option, unsigned long incl_mask);
+void mte_initialize_current_context(int mode, uintptr_t ptr, ssize_t range);
+
+/* Common utility functions */
+int create_temp_file(void);
+
+/* Assembly MTE utility functions */
+void *mte_insert_random_tag(void *ptr);
+void *mte_insert_new_tag(void *ptr);
+void *mte_get_tag_address(void *ptr);
+void mte_set_tag_address_range(void *ptr, int range);
+void mte_clear_tag_address_range(void *ptr, int range);
+void mte_disable_pstate_tco(void);
+void mte_enable_pstate_tco(void);
+unsigned int mte_get_pstate_tco(void);
+
+/* Test framework static inline functions/macros */
+static inline void evaluate_test(int err, const char *msg)
+{
+	if (err == KSFT_PASS)
+		ksft_test_result_pass(msg);
+	else if (err == KSFT_FAIL)
+		ksft_test_result_fail(msg);
+}
+
+static inline int check_allocated_memory(void *ptr, size_t size,
+					 int mem_type, bool tags)
+{
+	if (ptr == NULL) {
+		ksft_print_msg("FAIL: memory allocation\n");
+		return KSFT_FAIL;
+	}
+
+	if (tags && !MT_FETCH_TAG((uintptr_t)ptr)) {
+		ksft_print_msg("FAIL: tag not found at addr(%p)\n", ptr);
+		mte_free_memory((void *)ptr, size, mem_type, false);
+		return KSFT_FAIL;
+	}
+
+	return KSFT_PASS;
+}
+
+static inline int check_allocated_memory_range(void *ptr, size_t size, int mem_type,
+					       size_t range_before, size_t range_after)
+{
+	if (ptr == NULL) {
+		ksft_print_msg("FAIL: memory allocation\n");
+		return KSFT_FAIL;
+	}
+
+	if (!MT_FETCH_TAG((uintptr_t)ptr)) {
+		ksft_print_msg("FAIL: tag not found at addr(%p)\n", ptr);
+		mte_free_memory_tag_range((void *)ptr, size, mem_type, range_before,
+					  range_after);
+		return KSFT_FAIL;
+	}
+	return KSFT_PASS;
+}
+
+#endif /* _MTE_COMMON_UTIL_H */
diff --git a/tools/testing/selftests/arm64/mte/mte_def.h b/tools/testing/selftests/arm64/mte/mte_def.h
new file mode 100644
index 000000000000..9b188254b61a
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/mte_def.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2020 ARM Limited */
+
+/*
+ * Below definitions may be found in kernel headers, However, they are
+ * redefined here to decouple the MTE selftests compilations from them.
+ */
+#ifndef SEGV_MTEAERR
+#define	SEGV_MTEAERR	8
+#endif
+#ifndef SEGV_MTESERR
+#define	SEGV_MTESERR	9
+#endif
+#ifndef PROT_MTE
+#define PROT_MTE	 0x20
+#endif
+#ifndef HWCAP2_MTE
+#define HWCAP2_MTE	(1 << 18)
+#endif
+
+#ifndef PR_MTE_TCF_SHIFT
+#define PR_MTE_TCF_SHIFT	1
+#endif
+#ifndef PR_MTE_TCF_NONE
+#define PR_MTE_TCF_NONE		(0UL << PR_MTE_TCF_SHIFT)
+#endif
+#ifndef PR_MTE_TCF_SYNC
+#define	PR_MTE_TCF_SYNC		(1UL << PR_MTE_TCF_SHIFT)
+#endif
+#ifndef PR_MTE_TCF_ASYNC
+#define PR_MTE_TCF_ASYNC	(2UL << PR_MTE_TCF_SHIFT)
+#endif
+#ifndef PR_MTE_TAG_SHIFT
+#define	PR_MTE_TAG_SHIFT	3
+#endif
+
+/* MTE Hardware feature definitions below. */
+#define MT_TAG_SHIFT		56
+#define MT_TAG_MASK		0xFUL
+#define MT_FREE_TAG		0x0UL
+#define MT_GRANULE_SIZE         16
+#define MT_TAG_COUNT		16
+#define MT_INCLUDE_TAG_MASK	0xFFFF
+#define MT_EXCLUDE_TAG_MASK	0x0
+
+#define MT_ALIGN_GRANULE	(MT_GRANULE_SIZE - 1)
+#define MT_CLEAR_TAG(x)		((x) & ~(MT_TAG_MASK << MT_TAG_SHIFT))
+#define MT_SET_TAG(x, y)	((x) | (y << MT_TAG_SHIFT))
+#define MT_FETCH_TAG(x)		((x >> MT_TAG_SHIFT) & (MT_TAG_MASK))
+#define MT_ALIGN_UP(x)		((x + MT_ALIGN_GRANULE) & ~(MT_ALIGN_GRANULE))
+
+#define MT_PSTATE_TCO_SHIFT	25
+#define MT_PSTATE_TCO_MASK	~(0x1 << MT_PSTATE_TCO_SHIFT)
+#define MT_PSTATE_TCO_EN	1
+#define MT_PSTATE_TCO_DIS	0
+
+#define MT_EXCLUDE_TAG(x)		(1 << (x))
+#define MT_INCLUDE_VALID_TAG(x)		(MT_INCLUDE_TAG_MASK ^ MT_EXCLUDE_TAG(x))
+#define MT_INCLUDE_VALID_TAGS(x)	(MT_INCLUDE_TAG_MASK ^ (x))
+#define MTE_ALLOW_NON_ZERO_TAG		MT_INCLUDE_VALID_TAG(0)
diff --git a/tools/testing/selftests/arm64/mte/mte_helper.S b/tools/testing/selftests/arm64/mte/mte_helper.S
new file mode 100644
index 000000000000..a02c04cd0aac
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/mte_helper.S
@@ -0,0 +1,128 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2020 ARM Limited */
+
+#include "mte_def.h"
+
+#define ENTRY(name) \
+	.globl name ;\
+	.p2align 2;\
+	.type name, @function ;\
+name:
+
+#define ENDPROC(name) \
+	.size name, .-name ;
+
+	.text
+/*
+ * mte_insert_random_tag: Insert random tag and might be same as the source tag if
+ *			  the source pointer has it.
+ * Input:
+ *		x0 - source pointer with a tag/no-tag
+ * Return:
+ *		x0 - pointer with random tag
+ */
+ENTRY(mte_insert_random_tag)
+	irg	x0, x0, xzr
+	ret
+ENDPROC(mte_insert_random_tag)
+
+/*
+ * mte_insert_new_tag: Insert new tag and different from the source tag if
+ *		       source pointer has it.
+ * Input:
+ *		x0 - source pointer with a tag/no-tag
+ * Return:
+ *		x0 - pointer with random tag
+ */
+ENTRY(mte_insert_new_tag)
+	gmi	x1, x0, xzr
+	irg	x0, x0, x1
+	ret
+ENDPROC(mte_insert_new_tag)
+
+/*
+ * mte_get_tag_address: Get the tag from given address.
+ * Input:
+ *		x0 - source pointer
+ * Return:
+ *		x0 - pointer with appended tag
+ */
+ENTRY(mte_get_tag_address)
+	ldg	x0, [x0]
+	ret
+ENDPROC(mte_get_tag_address)
+
+/*
+ * mte_set_tag_address_range: Set the tag range from the given address
+ * Input:
+ *		x0 - source pointer with tag data
+ *		x1 - range
+ * Return:
+ *		none
+ */
+ENTRY(mte_set_tag_address_range)
+	cbz	x1, 2f
+1:
+	stg	x0, [x0, #0x0]
+	add	x0, x0, #MT_GRANULE_SIZE
+	sub	x1, x1, #MT_GRANULE_SIZE
+	cbnz	x1, 1b
+2:
+	ret
+ENDPROC(mte_set_tag_address_range)
+
+/*
+ * mt_clear_tag_address_range: Clear the tag range from the given address
+ * Input:
+ *		x0 - source pointer with tag data
+ *		x1 - range
+ * Return:
+ *		none
+ */
+ENTRY(mte_clear_tag_address_range)
+	cbz	x1, 2f
+1:
+	stzg	x0, [x0, #0x0]
+	add	x0, x0, #MT_GRANULE_SIZE
+	sub	x1, x1, #MT_GRANULE_SIZE
+	cbnz	x1, 1b
+2:
+	ret
+ENDPROC(mte_clear_tag_address_range)
+
+/*
+ * mte_enable_pstate_tco: Enable PSTATE.TCO (tag check override) field
+ * Input:
+ *		none
+ * Return:
+ *		none
+ */
+ENTRY(mte_enable_pstate_tco)
+	msr	tco, #MT_PSTATE_TCO_EN
+	ret
+ENDPROC(mte_enable_pstate_tco)
+
+/*
+ * mte_disable_pstate_tco: Disable PSTATE.TCO (tag check override) field
+ * Input:
+ *		none
+ * Return:
+ *		none
+ */
+ENTRY(mte_disable_pstate_tco)
+	msr	tco, #MT_PSTATE_TCO_DIS
+	ret
+ENDPROC(mte_disable_pstate_tco)
+
+/*
+ * mte_get_pstate_tco: Get PSTATE.TCO (tag check override) field
+ * Input:
+ *		none
+ * Return:
+ *		x0
+ */
+ENTRY(mte_get_pstate_tco)
+	mrs	x0, tco
+	ubfx	x0, x0, #MT_PSTATE_TCO_SHIFT, #1
+	ret
+ENDPROC(mte_get_pstate_tco)
diff --git a/tools/testing/selftests/arm64/pauth/.gitignore b/tools/testing/selftests/arm64/pauth/.gitignore
new file mode 100644
index 000000000000..155137d92722
--- /dev/null
+++ b/tools/testing/selftests/arm64/pauth/.gitignore
@@ -0,0 +1,2 @@
+exec_target
+pac
diff --git a/tools/testing/selftests/arm64/pauth/Makefile b/tools/testing/selftests/arm64/pauth/Makefile
new file mode 100644
index 000000000000..72e290b0b10c
--- /dev/null
+++ b/tools/testing/selftests/arm64/pauth/Makefile
@@ -0,0 +1,39 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2020 ARM Limited
+
+# preserve CC value from top level Makefile
+ifeq ($(CC),cc)
+CC := $(CROSS_COMPILE)gcc
+endif
+
+CFLAGS += -mbranch-protection=pac-ret
+# check if the compiler supports ARMv8.3 and branch protection with PAuth
+pauth_cc_support := $(shell if ($(CC) $(CFLAGS) -march=armv8.3-a -E -x c /dev/null -o /dev/null 2>&1) then echo "1"; fi)
+
+ifeq ($(pauth_cc_support),1)
+TEST_GEN_PROGS := pac
+TEST_GEN_FILES := pac_corruptor.o helper.o
+TEST_GEN_PROGS_EXTENDED := exec_target
+endif
+
+include ../../lib.mk
+
+ifeq ($(pauth_cc_support),1)
+# pac* and aut* instructions are not available on architectures berfore
+# ARMv8.3. Therefore target ARMv8.3 wherever they are used directly
+$(OUTPUT)/pac_corruptor.o: pac_corruptor.S
+	$(CC) -c $^ -o $@ $(CFLAGS) -march=armv8.3-a
+
+$(OUTPUT)/helper.o: helper.c
+	$(CC) -c $^ -o $@ $(CFLAGS) -march=armv8.3-a
+
+# when -mbranch-protection is enabled and the target architecture is ARMv8.3 or
+# greater, gcc emits pac* instructions which are not in HINT NOP space,
+# preventing the tests from occurring at all. Compile for ARMv8.2 so tests can
+# run on earlier targets and print a meaningful error messages
+$(OUTPUT)/exec_target: exec_target.c $(OUTPUT)/helper.o
+	$(CC) $^ -o $@ $(CFLAGS) -march=armv8.2-a
+
+$(OUTPUT)/pac: pac.c $(OUTPUT)/pac_corruptor.o $(OUTPUT)/helper.o
+	$(CC) $^ -o $@ $(CFLAGS) -march=armv8.2-a
+endif
diff --git a/tools/testing/selftests/arm64/pauth/exec_target.c b/tools/testing/selftests/arm64/pauth/exec_target.c
new file mode 100644
index 000000000000..4435600ca400
--- /dev/null
+++ b/tools/testing/selftests/arm64/pauth/exec_target.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2020 ARM Limited
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/auxv.h>
+
+#include "helper.h"
+
+int main(void)
+{
+	struct signatures signed_vals;
+	unsigned long hwcaps;
+	size_t val;
+
+	fread(&val, sizeof(size_t), 1, stdin);
+
+	/* don't try to execute illegal (unimplemented) instructions) caller
+	 * should have checked this and keep worker simple
+	 */
+	hwcaps = getauxval(AT_HWCAP);
+
+	if (hwcaps & HWCAP_PACA) {
+		signed_vals.keyia = keyia_sign(val);
+		signed_vals.keyib = keyib_sign(val);
+		signed_vals.keyda = keyda_sign(val);
+		signed_vals.keydb = keydb_sign(val);
+	}
+	signed_vals.keyg = (hwcaps & HWCAP_PACG) ?  keyg_sign(val) : 0;
+
+	fwrite(&signed_vals, sizeof(struct signatures), 1, stdout);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/arm64/pauth/helper.c b/tools/testing/selftests/arm64/pauth/helper.c
new file mode 100644
index 000000000000..2c201e7d0d50
--- /dev/null
+++ b/tools/testing/selftests/arm64/pauth/helper.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2020 ARM Limited
+
+#include "helper.h"
+
+size_t keyia_sign(size_t ptr)
+{
+	asm volatile("paciza %0" : "+r" (ptr));
+	return ptr;
+}
+
+size_t keyib_sign(size_t ptr)
+{
+	asm volatile("pacizb %0" : "+r" (ptr));
+	return ptr;
+}
+
+size_t keyda_sign(size_t ptr)
+{
+	asm volatile("pacdza %0" : "+r" (ptr));
+	return ptr;
+}
+
+size_t keydb_sign(size_t ptr)
+{
+	asm volatile("pacdzb %0" : "+r" (ptr));
+	return ptr;
+}
+
+size_t keyg_sign(size_t ptr)
+{
+	/* output is encoded in the upper 32 bits */
+	size_t dest = 0;
+	size_t modifier = 0;
+
+	asm volatile("pacga %0, %1, %2" : "=r" (dest) : "r" (ptr), "r" (modifier));
+
+	return dest;
+}
diff --git a/tools/testing/selftests/arm64/pauth/helper.h b/tools/testing/selftests/arm64/pauth/helper.h
new file mode 100644
index 000000000000..652496c7b411
--- /dev/null
+++ b/tools/testing/selftests/arm64/pauth/helper.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2020 ARM Limited */
+
+#ifndef _HELPER_H_
+#define _HELPER_H_
+
+#include <stdlib.h>
+
+#define NKEYS 5
+
+struct signatures {
+	size_t keyia;
+	size_t keyib;
+	size_t keyda;
+	size_t keydb;
+	size_t keyg;
+};
+
+void pac_corruptor(void);
+
+/* PAuth sign a value with key ia and modifier value 0 */
+size_t keyia_sign(size_t val);
+size_t keyib_sign(size_t val);
+size_t keyda_sign(size_t val);
+size_t keydb_sign(size_t val);
+size_t keyg_sign(size_t val);
+
+#endif
diff --git a/tools/testing/selftests/arm64/pauth/pac.c b/tools/testing/selftests/arm64/pauth/pac.c
new file mode 100644
index 000000000000..592fe538506e
--- /dev/null
+++ b/tools/testing/selftests/arm64/pauth/pac.c
@@ -0,0 +1,368 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2020 ARM Limited
+
+#define _GNU_SOURCE
+
+#include <sys/auxv.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <signal.h>
+#include <setjmp.h>
+#include <sched.h>
+
+#include "../../kselftest_harness.h"
+#include "helper.h"
+
+#define PAC_COLLISION_ATTEMPTS 10
+/*
+ * The kernel sets TBID by default. So bits 55 and above should remain
+ * untouched no matter what.
+ * The VA space size is 48 bits. Bigger is opt-in.
+ */
+#define PAC_MASK (~0xff80ffffffffffff)
+#define ARBITRARY_VALUE (0x1234)
+#define ASSERT_PAUTH_ENABLED() \
+do { \
+	unsigned long hwcaps = getauxval(AT_HWCAP); \
+	/* data key instructions are not in NOP space. This prevents a SIGILL */ \
+	ASSERT_NE(0, hwcaps & HWCAP_PACA) TH_LOG("PAUTH not enabled"); \
+} while (0)
+#define ASSERT_GENERIC_PAUTH_ENABLED() \
+do { \
+	unsigned long hwcaps = getauxval(AT_HWCAP); \
+	/* generic key instructions are not in NOP space. This prevents a SIGILL */ \
+	ASSERT_NE(0, hwcaps & HWCAP_PACG) TH_LOG("Generic PAUTH not enabled"); \
+} while (0)
+
+void sign_specific(struct signatures *sign, size_t val)
+{
+	sign->keyia = keyia_sign(val);
+	sign->keyib = keyib_sign(val);
+	sign->keyda = keyda_sign(val);
+	sign->keydb = keydb_sign(val);
+}
+
+void sign_all(struct signatures *sign, size_t val)
+{
+	sign->keyia = keyia_sign(val);
+	sign->keyib = keyib_sign(val);
+	sign->keyda = keyda_sign(val);
+	sign->keydb = keydb_sign(val);
+	sign->keyg  = keyg_sign(val);
+}
+
+int n_same(struct signatures *old, struct signatures *new, int nkeys)
+{
+	int res = 0;
+
+	res += old->keyia == new->keyia;
+	res += old->keyib == new->keyib;
+	res += old->keyda == new->keyda;
+	res += old->keydb == new->keydb;
+	if (nkeys == NKEYS)
+		res += old->keyg == new->keyg;
+
+	return res;
+}
+
+int n_same_single_set(struct signatures *sign, int nkeys)
+{
+	size_t vals[nkeys];
+	int same = 0;
+
+	vals[0] = sign->keyia & PAC_MASK;
+	vals[1] = sign->keyib & PAC_MASK;
+	vals[2] = sign->keyda & PAC_MASK;
+	vals[3] = sign->keydb & PAC_MASK;
+
+	if (nkeys >= 4)
+		vals[4] = sign->keyg & PAC_MASK;
+
+	for (int i = 0; i < nkeys - 1; i++) {
+		for (int j = i + 1; j < nkeys; j++) {
+			if (vals[i] == vals[j])
+				same += 1;
+		}
+	}
+	return same;
+}
+
+int exec_sign_all(struct signatures *signed_vals, size_t val)
+{
+	int new_stdin[2];
+	int new_stdout[2];
+	int status;
+	int i;
+	ssize_t ret;
+	pid_t pid;
+	cpu_set_t mask;
+
+	ret = pipe(new_stdin);
+	if (ret == -1) {
+		perror("pipe returned error");
+		return -1;
+	}
+
+	ret = pipe(new_stdout);
+	if (ret == -1) {
+		perror("pipe returned error");
+		return -1;
+	}
+
+	/*
+	 * pin this process and all its children to a single CPU, so it can also
+	 * guarantee a context switch with its child
+	 */
+	sched_getaffinity(0, sizeof(mask), &mask);
+
+	for (i = 0; i < sizeof(cpu_set_t); i++)
+		if (CPU_ISSET(i, &mask))
+			break;
+
+	CPU_ZERO(&mask);
+	CPU_SET(i, &mask);
+	sched_setaffinity(0, sizeof(mask), &mask);
+
+	pid = fork();
+	// child
+	if (pid == 0) {
+		dup2(new_stdin[0], STDIN_FILENO);
+		if (ret == -1) {
+			perror("dup2 returned error");
+			exit(1);
+		}
+
+		dup2(new_stdout[1], STDOUT_FILENO);
+		if (ret == -1) {
+			perror("dup2 returned error");
+			exit(1);
+		}
+
+		close(new_stdin[0]);
+		close(new_stdin[1]);
+		close(new_stdout[0]);
+		close(new_stdout[1]);
+
+		ret = execl("exec_target", "exec_target", (char *)NULL);
+		if (ret == -1) {
+			perror("exec returned error");
+			exit(1);
+		}
+	}
+
+	close(new_stdin[0]);
+	close(new_stdout[1]);
+
+	ret = write(new_stdin[1], &val, sizeof(size_t));
+	if (ret == -1) {
+		perror("write returned error");
+		return -1;
+	}
+
+	/*
+	 * wait for the worker to finish, so that read() reads all data
+	 * will also context switch with worker so that this function can be used
+	 * for context switch tests
+	 */
+	waitpid(pid, &status, 0);
+	if (WIFEXITED(status) == 0) {
+		fprintf(stderr, "worker exited unexpectedly\n");
+		return -1;
+	}
+	if (WEXITSTATUS(status) != 0) {
+		fprintf(stderr, "worker exited with error\n");
+		return -1;
+	}
+
+	ret = read(new_stdout[0], signed_vals, sizeof(struct signatures));
+	if (ret == -1) {
+		perror("read returned error");
+		return -1;
+	}
+
+	return 0;
+}
+
+sigjmp_buf jmpbuf;
+void pac_signal_handler(int signum, siginfo_t *si, void *uc)
+{
+	if (signum == SIGSEGV || signum == SIGILL)
+		siglongjmp(jmpbuf, 1);
+}
+
+/* check that a corrupted PAC results in SIGSEGV or SIGILL */
+TEST(corrupt_pac)
+{
+	struct sigaction sa;
+
+	ASSERT_PAUTH_ENABLED();
+	if (sigsetjmp(jmpbuf, 1) == 0) {
+		sa.sa_sigaction = pac_signal_handler;
+		sa.sa_flags = SA_SIGINFO | SA_RESETHAND;
+		sigemptyset(&sa.sa_mask);
+
+		sigaction(SIGSEGV, &sa, NULL);
+		sigaction(SIGILL, &sa, NULL);
+
+		pac_corruptor();
+		ASSERT_TRUE(0) TH_LOG("SIGSEGV/SIGILL signal did not occur");
+	}
+}
+
+/*
+ * There are no separate pac* and aut* controls so checking only the pac*
+ * instructions is sufficient
+ */
+TEST(pac_instructions_not_nop)
+{
+	size_t keyia = 0;
+	size_t keyib = 0;
+	size_t keyda = 0;
+	size_t keydb = 0;
+
+	ASSERT_PAUTH_ENABLED();
+
+	for (int i = 0; i < PAC_COLLISION_ATTEMPTS; i++) {
+		keyia |= keyia_sign(i) & PAC_MASK;
+		keyib |= keyib_sign(i) & PAC_MASK;
+		keyda |= keyda_sign(i) & PAC_MASK;
+		keydb |= keydb_sign(i) & PAC_MASK;
+	}
+
+	ASSERT_NE(0, keyia) TH_LOG("keyia instructions did nothing");
+	ASSERT_NE(0, keyib) TH_LOG("keyib instructions did nothing");
+	ASSERT_NE(0, keyda) TH_LOG("keyda instructions did nothing");
+	ASSERT_NE(0, keydb) TH_LOG("keydb instructions did nothing");
+}
+
+TEST(pac_instructions_not_nop_generic)
+{
+	size_t keyg = 0;
+
+	ASSERT_GENERIC_PAUTH_ENABLED();
+
+	for (int i = 0; i < PAC_COLLISION_ATTEMPTS; i++)
+		keyg |= keyg_sign(i) & PAC_MASK;
+
+	ASSERT_NE(0, keyg)  TH_LOG("keyg instructions did nothing");
+}
+
+TEST(single_thread_different_keys)
+{
+	int same = 10;
+	int nkeys = NKEYS;
+	int tmp;
+	struct signatures signed_vals;
+	unsigned long hwcaps = getauxval(AT_HWCAP);
+
+	/* generic and data key instructions are not in NOP space. This prevents a SIGILL */
+	ASSERT_NE(0, hwcaps & HWCAP_PACA) TH_LOG("PAUTH not enabled");
+	if (!(hwcaps & HWCAP_PACG)) {
+		TH_LOG("WARNING: Generic PAUTH not enabled. Skipping generic key checks");
+		nkeys = NKEYS - 1;
+	}
+
+	/*
+	 * In Linux the PAC field can be up to 7 bits wide. Even if keys are
+	 * different, there is about 5% chance for PACs to collide with
+	 * different addresses. This chance rapidly increases with fewer bits
+	 * allocated for the PAC (e.g. wider address). A comparison of the keys
+	 * directly will be more reliable.
+	 * All signed values need to be different at least once out of n
+	 * attempts to be certain that the keys are different
+	 */
+	for (int i = 0; i < PAC_COLLISION_ATTEMPTS; i++) {
+		if (nkeys == NKEYS)
+			sign_all(&signed_vals, i);
+		else
+			sign_specific(&signed_vals, i);
+
+		tmp = n_same_single_set(&signed_vals, nkeys);
+		if (tmp < same)
+			same = tmp;
+	}
+
+	ASSERT_EQ(0, same) TH_LOG("%d keys clashed every time", same);
+}
+
+/*
+ * fork() does not change keys. Only exec() does so call a worker program.
+ * Its only job is to sign a value and report back the resutls
+ */
+TEST(exec_changed_keys)
+{
+	struct signatures new_keys;
+	struct signatures old_keys;
+	int ret;
+	int same = 10;
+	int nkeys = NKEYS;
+	unsigned long hwcaps = getauxval(AT_HWCAP);
+
+	/* generic and data key instructions are not in NOP space. This prevents a SIGILL */
+	ASSERT_NE(0, hwcaps & HWCAP_PACA) TH_LOG("PAUTH not enabled");
+	if (!(hwcaps & HWCAP_PACG)) {
+		TH_LOG("WARNING: Generic PAUTH not enabled. Skipping generic key checks");
+		nkeys = NKEYS - 1;
+	}
+
+	for (int i = 0; i < PAC_COLLISION_ATTEMPTS; i++) {
+		ret = exec_sign_all(&new_keys, i);
+		ASSERT_EQ(0, ret) TH_LOG("failed to run worker");
+
+		if (nkeys == NKEYS)
+			sign_all(&old_keys, i);
+		else
+			sign_specific(&old_keys, i);
+
+		ret = n_same(&old_keys, &new_keys, nkeys);
+		if (ret < same)
+			same = ret;
+	}
+
+	ASSERT_EQ(0, same) TH_LOG("exec() did not change %d keys", same);
+}
+
+TEST(context_switch_keep_keys)
+{
+	int ret;
+	struct signatures trash;
+	struct signatures before;
+	struct signatures after;
+
+	ASSERT_PAUTH_ENABLED();
+
+	sign_specific(&before, ARBITRARY_VALUE);
+
+	/* will context switch with a process with different keys at least once */
+	ret = exec_sign_all(&trash, ARBITRARY_VALUE);
+	ASSERT_EQ(0, ret) TH_LOG("failed to run worker");
+
+	sign_specific(&after, ARBITRARY_VALUE);
+
+	ASSERT_EQ(before.keyia, after.keyia) TH_LOG("keyia changed after context switching");
+	ASSERT_EQ(before.keyib, after.keyib) TH_LOG("keyib changed after context switching");
+	ASSERT_EQ(before.keyda, after.keyda) TH_LOG("keyda changed after context switching");
+	ASSERT_EQ(before.keydb, after.keydb) TH_LOG("keydb changed after context switching");
+}
+
+TEST(context_switch_keep_keys_generic)
+{
+	int ret;
+	struct signatures trash;
+	size_t before;
+	size_t after;
+
+	ASSERT_GENERIC_PAUTH_ENABLED();
+
+	before = keyg_sign(ARBITRARY_VALUE);
+
+	/* will context switch with a process with different keys at least once */
+	ret = exec_sign_all(&trash, ARBITRARY_VALUE);
+	ASSERT_EQ(0, ret) TH_LOG("failed to run worker");
+
+	after = keyg_sign(ARBITRARY_VALUE);
+
+	ASSERT_EQ(before, after) TH_LOG("keyg changed after context switching");
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/arm64/pauth/pac_corruptor.S b/tools/testing/selftests/arm64/pauth/pac_corruptor.S
new file mode 100644
index 000000000000..aa6588050752
--- /dev/null
+++ b/tools/testing/selftests/arm64/pauth/pac_corruptor.S
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2020 ARM Limited */
+
+.global pac_corruptor
+
+.text
+/*
+ * Corrupting a single bit of the PAC ensures the authentication will fail.  It
+ * also guarantees no possible collision. TCR_EL1.TBI0 is set by default so no
+ * top byte PAC is tested
+ */
+ pac_corruptor:
+	paciasp
+
+	/* corrupt the top bit of the PAC */
+	eor lr, lr, #1 << 53
+
+	autiasp
+	ret
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index 9a0946ddb705..3ab1200e172f 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -13,9 +13,7 @@ test_verifier_log
 feature
 test_sock
 test_sock_addr
-test_sock_fields
 urandom_read
-test_btf
 test_sockmap
 test_lirc_mode2_user
 get_cgroup_id_user
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index daf186f88a63..542768f5195b 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -33,9 +33,9 @@ LDLIBS += -lcap -lelf -lz -lrt -lpthread
 # Order correspond to 'make run_tests' order
 TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
 	test_verifier_log test_dev_cgroup test_tcpbpf_user \
-	test_sock test_btf test_sockmap get_cgroup_id_user test_socket_cookie \
+	test_sock test_sockmap get_cgroup_id_user test_socket_cookie \
 	test_cgroup_storage \
-	test_netcnt test_tcpnotify_user test_sock_fields test_sysctl \
+	test_netcnt test_tcpnotify_user test_sysctl \
 	test_progs-no_alu32 \
 	test_current_pid_tgid_new_ns
 
@@ -68,7 +68,8 @@ TEST_PROGS := test_kmod.sh \
 	test_tc_edt.sh \
 	test_xdping.sh \
 	test_bpftool_build.sh \
-	test_bpftool.sh
+	test_bpftool.sh \
+	test_bpftool_metadata.sh \
 
 TEST_PROGS_EXTENDED := with_addr.sh \
 	with_tunnels.sh \
@@ -176,6 +177,11 @@ $(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile)    \
 	$(Q)$(MAKE) $(submake_extras)  -C $(BPFTOOLDIR)			       \
 		    OUTPUT=$(BUILD_DIR)/bpftool/			       \
 		    prefix= DESTDIR=$(SCRATCH_DIR)/ install
+	$(Q)mkdir -p $(BUILD_DIR)/bpftool/Documentation
+	$(Q)RST2MAN_OPTS="--exit-status=1" $(MAKE) $(submake_extras)	       \
+		    -C $(BPFTOOLDIR)/Documentation			       \
+		    OUTPUT=$(BUILD_DIR)/bpftool/Documentation/		       \
+		    prefix= DESTDIR=$(SCRATCH_DIR)/ install
 
 $(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile)		       \
 	   ../../../include/uapi/linux/bpf.h                                   \
@@ -316,7 +322,7 @@ $(TRUNNER_BPF_OBJS): $(TRUNNER_OUTPUT)/%.o:				\
 		     $(TRUNNER_BPF_PROGS_DIR)/%.c			\
 		     $(TRUNNER_BPF_PROGS_DIR)/*.h			\
 		     $$(INCLUDE_DIR)/vmlinux.h				\
-		     $$(BPFOBJ) | $(TRUNNER_OUTPUT)
+		     $(wildcard $(BPFDIR)/bpf_*.h) | $(TRUNNER_OUTPUT)
 	$$(call $(TRUNNER_BPF_BUILD_RULE),$$<,$$@,			\
 					  $(TRUNNER_BPF_CFLAGS),	\
 					  $(TRUNNER_BPF_LDFLAGS))
diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst
index e885d351595f..ac9eda830187 100644
--- a/tools/testing/selftests/bpf/README.rst
+++ b/tools/testing/selftests/bpf/README.rst
@@ -7,6 +7,44 @@ General instructions on running selftests can be found in
 Additional information about selftest failures are
 documented here.
 
+profiler[23] test failures with clang/llvm <12.0.0
+==================================================
+
+With clang/llvm <12.0.0, the profiler[23] test may fail.
+The symptom looks like
+
+.. code-block:: c
+
+  // r9 is a pointer to map_value
+  // r7 is a scalar
+  17:       bf 96 00 00 00 00 00 00 r6 = r9
+  18:       0f 76 00 00 00 00 00 00 r6 += r7
+  math between map_value pointer and register with unbounded min value is not allowed
+
+  // the instructions below will not be seen in the verifier log
+  19:       a5 07 01 00 01 01 00 00 if r7 < 257 goto +1
+  20:       bf 96 00 00 00 00 00 00 r6 = r9
+  // r6 is used here
+
+The verifier will reject such code with above error.
+At insn 18 the r7 is indeed unbounded. The later insn 19 checks the bounds and
+the insn 20 undoes map_value addition. It is currently impossible for the
+verifier to understand such speculative pointer arithmetic.
+Hence
+    https://reviews.llvm.org/D85570
+addresses it on the compiler side. It was committed on llvm 12.
+
+The corresponding C code
+.. code-block:: c
+
+  for (int i = 0; i < MAX_CGROUPS_PATH_DEPTH; i++) {
+          filepart_length = bpf_probe_read_str(payload, ...);
+          if (filepart_length <= MAX_PATH) {
+                  barrier_var(filepart_length); // workaround
+                  payload += filepart_length;
+          }
+  }
+
 bpf_iter test failures with clang/llvm 10.0.0
 =============================================
 
@@ -43,3 +81,24 @@ This is due to a llvm BPF backend bug. The fix
   https://reviews.llvm.org/D78466
 has been pushed to llvm 10.x release branch and will be
 available in 10.0.1. The fix is available in llvm 11.0.0 trunk.
+
+BPF CO-RE-based tests and Clang version
+=======================================
+
+A set of selftests use BPF target-specific built-ins, which might require
+bleeding-edge Clang versions (Clang 12 nightly at this time).
+
+Few sub-tests of core_reloc test suit (part of test_progs test runner) require
+the following built-ins, listed with corresponding Clang diffs introducing
+them to Clang/LLVM. These sub-tests are going to be skipped if Clang is too
+old to support them, they shouldn't cause build failures or runtime test
+failures:
+
+  - __builtin_btf_type_id() ([0], [1], [2]);
+  - __builtin_preserve_type_info(), __builtin_preserve_enum_value() ([3], [4]).
+
+  [0] https://reviews.llvm.org/D74572
+  [1] https://reviews.llvm.org/D74668
+  [2] https://reviews.llvm.org/D85174
+  [3] https://reviews.llvm.org/D83878
+  [4] https://reviews.llvm.org/D83242
diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c
index 944ad4721c83..332ed2f7b402 100644
--- a/tools/testing/selftests/bpf/bench.c
+++ b/tools/testing/selftests/bpf/bench.c
@@ -311,12 +311,12 @@ extern const struct bench bench_rename_kretprobe;
 extern const struct bench bench_rename_rawtp;
 extern const struct bench bench_rename_fentry;
 extern const struct bench bench_rename_fexit;
-extern const struct bench bench_rename_fmodret;
 extern const struct bench bench_trig_base;
 extern const struct bench bench_trig_tp;
 extern const struct bench bench_trig_rawtp;
 extern const struct bench bench_trig_kprobe;
 extern const struct bench bench_trig_fentry;
+extern const struct bench bench_trig_fentry_sleep;
 extern const struct bench bench_trig_fmodret;
 extern const struct bench bench_rb_libbpf;
 extern const struct bench bench_rb_custom;
@@ -332,12 +332,12 @@ static const struct bench *benchs[] = {
 	&bench_rename_rawtp,
 	&bench_rename_fentry,
 	&bench_rename_fexit,
-	&bench_rename_fmodret,
 	&bench_trig_base,
 	&bench_trig_tp,
 	&bench_trig_rawtp,
 	&bench_trig_kprobe,
 	&bench_trig_fentry,
+	&bench_trig_fentry_sleep,
 	&bench_trig_fmodret,
 	&bench_rb_libbpf,
 	&bench_rb_custom,
@@ -462,4 +462,3 @@ int main(int argc, char **argv)
 
 	return 0;
 }
-
diff --git a/tools/testing/selftests/bpf/benchs/bench_rename.c b/tools/testing/selftests/bpf/benchs/bench_rename.c
index e74cff40f4fe..a967674098ad 100644
--- a/tools/testing/selftests/bpf/benchs/bench_rename.c
+++ b/tools/testing/selftests/bpf/benchs/bench_rename.c
@@ -106,12 +106,6 @@ static void setup_fexit()
 	attach_bpf(ctx.skel->progs.prog5);
 }
 
-static void setup_fmodret()
-{
-	setup_ctx();
-	attach_bpf(ctx.skel->progs.prog6);
-}
-
 static void *consumer(void *input)
 {
 	return NULL;
@@ -182,14 +176,3 @@ const struct bench bench_rename_fexit = {
 	.report_progress = hits_drops_report_progress,
 	.report_final = hits_drops_report_final,
 };
-
-const struct bench bench_rename_fmodret = {
-	.name = "rename-fmodret",
-	.validate = validate,
-	.setup = setup_fmodret,
-	.producer_thread = producer,
-	.consumer_thread = consumer,
-	.measure = measure,
-	.report_progress = hits_drops_report_progress,
-	.report_final = hits_drops_report_final,
-};
diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c
index 49c22832f216..2a0b6c9885a4 100644
--- a/tools/testing/selftests/bpf/benchs/bench_trigger.c
+++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c
@@ -90,6 +90,12 @@ static void trigger_fentry_setup()
 	attach_bpf(ctx.skel->progs.bench_trigger_fentry);
 }
 
+static void trigger_fentry_sleep_setup()
+{
+	setup_ctx();
+	attach_bpf(ctx.skel->progs.bench_trigger_fentry_sleep);
+}
+
 static void trigger_fmodret_setup()
 {
 	setup_ctx();
@@ -155,6 +161,17 @@ const struct bench bench_trig_fentry = {
 	.report_final = hits_drops_report_final,
 };
 
+const struct bench bench_trig_fentry_sleep = {
+	.name = "trig-fentry-sleep",
+	.validate = trigger_validate,
+	.setup = trigger_fentry_sleep_setup,
+	.producer_thread = trigger_producer,
+	.consumer_thread = trigger_consumer,
+	.measure = trigger_measure,
+	.report_progress = hits_drops_report_progress,
+	.report_final = hits_drops_report_final,
+};
+
 const struct bench bench_trig_fmodret = {
 	.name = "trig-fmodret",
 	.validate = trigger_validate,
diff --git a/tools/testing/selftests/bpf/bpf_tcp_helpers.h b/tools/testing/selftests/bpf/bpf_tcp_helpers.h
index 5bf2fe9b1efa..2915664c335d 100644
--- a/tools/testing/selftests/bpf/bpf_tcp_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_tcp_helpers.h
@@ -16,6 +16,7 @@ BPF_PROG(name, args)
 
 struct sock_common {
 	unsigned char	skc_state;
+	__u16		skc_num;
 } __attribute__((preserve_access_index));
 
 enum sk_pacing {
@@ -45,6 +46,10 @@ struct inet_connection_sock {
 	__u64			  icsk_ca_priv[104 / sizeof(__u64)];
 } __attribute__((preserve_access_index));
 
+struct request_sock {
+	struct sock_common		__req_common;
+} __attribute__((preserve_access_index));
+
 struct tcp_sock {
 	struct inet_connection_sock	inet_conn;
 
@@ -115,14 +120,6 @@ enum tcp_ca_event {
 	CA_EVENT_ECN_IS_CE = 5,
 };
 
-enum tcp_ca_state {
-	TCP_CA_Open = 0,
-	TCP_CA_Disorder = 1,
-	TCP_CA_CWR = 2,
-	TCP_CA_Recovery = 3,
-	TCP_CA_Loss = 4
-};
-
 struct ack_sample {
 	__u32 pkts_acked;
 	__s32 rtt_us;
diff --git a/tools/testing/selftests/bpf/flow_dissector_load.h b/tools/testing/selftests/bpf/flow_dissector_load.h
index daeaeb518894..7290401ec172 100644
--- a/tools/testing/selftests/bpf/flow_dissector_load.h
+++ b/tools/testing/selftests/bpf/flow_dissector_load.h
@@ -23,7 +23,13 @@ static inline int bpf_flow_load(struct bpf_object **obj,
 	if (ret)
 		return ret;
 
-	main_prog = bpf_object__find_program_by_title(*obj, section_name);
+	main_prog = NULL;
+	bpf_object__for_each_program(prog, *obj) {
+		if (strcmp(section_name, bpf_program__section_name(prog)) == 0) {
+			main_prog = prog;
+			break;
+		}
+	}
 	if (!main_prog)
 		return -1;
 
diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c
index f56655690f9b..12ee40284da0 100644
--- a/tools/testing/selftests/bpf/network_helpers.c
+++ b/tools/testing/selftests/bpf/network_helpers.c
@@ -104,6 +104,43 @@ error_close:
 	return -1;
 }
 
+int fastopen_connect(int server_fd, const char *data, unsigned int data_len,
+		     int timeout_ms)
+{
+	struct sockaddr_storage addr;
+	socklen_t addrlen = sizeof(addr);
+	struct sockaddr_in *addr_in;
+	int fd, ret;
+
+	if (getsockname(server_fd, (struct sockaddr *)&addr, &addrlen)) {
+		log_err("Failed to get server addr");
+		return -1;
+	}
+
+	addr_in = (struct sockaddr_in *)&addr;
+	fd = socket(addr_in->sin_family, SOCK_STREAM, 0);
+	if (fd < 0) {
+		log_err("Failed to create client socket");
+		return -1;
+	}
+
+	if (settimeo(fd, timeout_ms))
+		goto error_close;
+
+	ret = sendto(fd, data, data_len, MSG_FASTOPEN, (struct sockaddr *)&addr,
+		     addrlen);
+	if (ret != data_len) {
+		log_err("sendto(data, %u) != %d\n", data_len, ret);
+		goto error_close;
+	}
+
+	return fd;
+
+error_close:
+	save_errno_close(fd);
+	return -1;
+}
+
 static int connect_fd_to_addr(int fd,
 			      const struct sockaddr_storage *addr,
 			      socklen_t addrlen)
diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h
index c3728f6667e4..7205f8afdba1 100644
--- a/tools/testing/selftests/bpf/network_helpers.h
+++ b/tools/testing/selftests/bpf/network_helpers.h
@@ -37,6 +37,8 @@ int start_server(int family, int type, const char *addr, __u16 port,
 		 int timeout_ms);
 int connect_to_fd(int server_fd, int timeout_ms);
 int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms);
+int fastopen_connect(int server_fd, const char *data, unsigned int data_len,
+		     int timeout_ms);
 int make_sockaddr(int family, const char *addr_str, __u16 port,
 		  struct sockaddr_storage *addr, socklen_t *len);
 
diff --git a/tools/testing/selftests/bpf/prog_tests/align.c b/tools/testing/selftests/bpf/prog_tests/align.c
index c548aded6585..52414058a627 100644
--- a/tools/testing/selftests/bpf/prog_tests/align.c
+++ b/tools/testing/selftests/bpf/prog_tests/align.c
@@ -195,13 +195,13 @@ static struct bpf_align_test tests[] = {
 		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 		.matches = {
 			{7, "R3_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
-			{8, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+			{8, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"},
 			{9, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
-			{10, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+			{10, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"},
 			{11, "R4_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"},
-			{12, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+			{12, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"},
 			{13, "R4_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
-			{14, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+			{14, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"},
 			{15, "R4_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"},
 			{16, "R4_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"},
 		},
@@ -518,7 +518,7 @@ static struct bpf_align_test tests[] = {
 			 * the total offset is 4-byte aligned and meets the
 			 * load's requirements.
 			 */
-			{20, "R5=pkt(id=1,off=0,r=4,umin_value=2,umax_value=1034,var_off=(0x2; 0x7fc)"},
+			{20, "R5=pkt(id=2,off=0,r=4,umin_value=2,umax_value=1034,var_off=(0x2; 0x7fc)"},
 
 		},
 	},
@@ -561,18 +561,18 @@ static struct bpf_align_test tests[] = {
 			/* Adding 14 makes R6 be (4n+2) */
 			{11, "R6_w=inv(id=0,umin_value=14,umax_value=74,var_off=(0x2; 0x7c))"},
 			/* Subtracting from packet pointer overflows ubounds */
-			{13, "R5_w=pkt(id=1,off=0,r=8,umin_value=18446744073709551542,umax_value=18446744073709551602,var_off=(0xffffffffffffff82; 0x7c)"},
+			{13, "R5_w=pkt(id=2,off=0,r=8,umin_value=18446744073709551542,umax_value=18446744073709551602,var_off=(0xffffffffffffff82; 0x7c)"},
 			/* New unknown value in R7 is (4n), >= 76 */
 			{15, "R7_w=inv(id=0,umin_value=76,umax_value=1096,var_off=(0x0; 0x7fc))"},
 			/* Adding it to packet pointer gives nice bounds again */
-			{16, "R5_w=pkt(id=2,off=0,r=0,umin_value=2,umax_value=1082,var_off=(0x2; 0xfffffffc)"},
+			{16, "R5_w=pkt(id=3,off=0,r=0,umin_value=2,umax_value=1082,var_off=(0x2; 0xfffffffc)"},
 			/* At the time the word size load is performed from R5,
 			 * its total fixed offset is NET_IP_ALIGN + reg->off (0)
 			 * which is 2.  Then the variable offset is (4n+2), so
 			 * the total offset is 4-byte aligned and meets the
 			 * load's requirements.
 			 */
-			{20, "R5=pkt(id=2,off=0,r=4,umin_value=2,umax_value=1082,var_off=(0x2; 0xfffffffc)"},
+			{20, "R5=pkt(id=3,off=0,r=4,umin_value=2,umax_value=1082,var_off=(0x2; 0xfffffffc)"},
 		},
 	},
 };
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index 7375d9a6d242..448885b95eed 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -7,6 +7,7 @@
 #include "bpf_iter_task.skel.h"
 #include "bpf_iter_task_stack.skel.h"
 #include "bpf_iter_task_file.skel.h"
+#include "bpf_iter_task_btf.skel.h"
 #include "bpf_iter_tcp4.skel.h"
 #include "bpf_iter_tcp6.skel.h"
 #include "bpf_iter_udp4.skel.h"
@@ -132,20 +133,118 @@ static void test_task_stack(void)
 	bpf_iter_task_stack__destroy(skel);
 }
 
+static void *do_nothing(void *arg)
+{
+	pthread_exit(arg);
+}
+
 static void test_task_file(void)
 {
 	struct bpf_iter_task_file *skel;
+	pthread_t thread_id;
+	void *ret;
 
 	skel = bpf_iter_task_file__open_and_load();
 	if (CHECK(!skel, "bpf_iter_task_file__open_and_load",
 		  "skeleton open_and_load failed\n"))
 		return;
 
+	skel->bss->tgid = getpid();
+
+	if (CHECK(pthread_create(&thread_id, NULL, &do_nothing, NULL),
+		  "pthread_create", "pthread_create failed\n"))
+		goto done;
+
 	do_dummy_read(skel->progs.dump_task_file);
 
+	if (CHECK(pthread_join(thread_id, &ret) || ret != NULL,
+		  "pthread_join", "pthread_join failed\n"))
+		goto done;
+
+	CHECK(skel->bss->count != 0, "check_count",
+	      "invalid non pthread file visit count %d\n", skel->bss->count);
+
+done:
 	bpf_iter_task_file__destroy(skel);
 }
 
+#define TASKBUFSZ		32768
+
+static char taskbuf[TASKBUFSZ];
+
+static int do_btf_read(struct bpf_iter_task_btf *skel)
+{
+	struct bpf_program *prog = skel->progs.dump_task_struct;
+	struct bpf_iter_task_btf__bss *bss = skel->bss;
+	int iter_fd = -1, len = 0, bufleft = TASKBUFSZ;
+	struct bpf_link *link;
+	char *buf = taskbuf;
+	int ret = 0;
+
+	link = bpf_program__attach_iter(prog, NULL);
+	if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+		return ret;
+
+	iter_fd = bpf_iter_create(bpf_link__fd(link));
+	if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+		goto free_link;
+
+	do {
+		len = read(iter_fd, buf, bufleft);
+		if (len > 0) {
+			buf += len;
+			bufleft -= len;
+		}
+	} while (len > 0);
+
+	if (bss->skip) {
+		printf("%s:SKIP:no __builtin_btf_type_id\n", __func__);
+		ret = 1;
+		test__skip();
+		goto free_link;
+	}
+
+	if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+		goto free_link;
+
+	CHECK(strstr(taskbuf, "(struct task_struct)") == NULL,
+	      "check for btf representation of task_struct in iter data",
+	      "struct task_struct not found");
+free_link:
+	if (iter_fd > 0)
+		close(iter_fd);
+	bpf_link__destroy(link);
+	return ret;
+}
+
+static void test_task_btf(void)
+{
+	struct bpf_iter_task_btf__bss *bss;
+	struct bpf_iter_task_btf *skel;
+	int ret;
+
+	skel = bpf_iter_task_btf__open_and_load();
+	if (CHECK(!skel, "bpf_iter_task_btf__open_and_load",
+		  "skeleton open_and_load failed\n"))
+		return;
+
+	bss = skel->bss;
+
+	ret = do_btf_read(skel);
+	if (ret)
+		goto cleanup;
+
+	if (CHECK(bss->tasks == 0, "check if iterated over tasks",
+		  "no task iteration, did BPF program run?\n"))
+		goto cleanup;
+
+	CHECK(bss->seq_err != 0, "check for unexpected err",
+	      "bpf_seq_printf_btf returned %ld", bss->seq_err);
+
+cleanup:
+	bpf_iter_task_btf__destroy(skel);
+}
+
 static void test_tcp4(void)
 {
 	struct bpf_iter_tcp4 *skel;
@@ -331,7 +430,7 @@ static void test_overflow(bool test_e2big_overflow, bool ret1)
 	struct bpf_map_info map_info = {};
 	struct bpf_iter_test_kern4 *skel;
 	struct bpf_link *link;
-	__u32 page_size;
+	__u32 iter_size;
 	char *buf;
 
 	skel = bpf_iter_test_kern4__open();
@@ -353,19 +452,19 @@ static void test_overflow(bool test_e2big_overflow, bool ret1)
 		  "map_creation failed: %s\n", strerror(errno)))
 		goto free_map1;
 
-	/* bpf_seq_printf kernel buffer is one page, so one map
+	/* bpf_seq_printf kernel buffer is 8 pages, so one map
 	 * bpf_seq_write will mostly fill it, and the other map
 	 * will partially fill and then trigger overflow and need
 	 * bpf_seq_read restart.
 	 */
-	page_size = sysconf(_SC_PAGE_SIZE);
+	iter_size = sysconf(_SC_PAGE_SIZE) << 3;
 
 	if (test_e2big_overflow) {
-		skel->rodata->print_len = (page_size + 8) / 8;
-		expected_read_len = 2 * (page_size + 8);
+		skel->rodata->print_len = (iter_size + 8) / 8;
+		expected_read_len = 2 * (iter_size + 8);
 	} else if (!ret1) {
-		skel->rodata->print_len = (page_size - 8) / 8;
-		expected_read_len = 2 * (page_size - 8);
+		skel->rodata->print_len = (iter_size - 8) / 8;
+		expected_read_len = 2 * (iter_size - 8);
 	} else {
 		skel->rodata->print_len = 1;
 		expected_read_len = 2 * 8;
@@ -936,6 +1035,8 @@ void test_bpf_iter(void)
 		test_task_stack();
 	if (test__start_subtest("task_file"))
 		test_task_file();
+	if (test__start_subtest("task_btf"))
+		test_task_btf();
 	if (test__start_subtest("tcp4"))
 		test_tcp4();
 	if (test__start_subtest("tcp6"))
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
index e9f2f12ba06b..e698ee6bb6c2 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
@@ -49,6 +49,7 @@ void test_bpf_verif_scale(void)
 		{ "test_verif_scale3.o", BPF_PROG_TYPE_SCHED_CLS },
 
 		{ "pyperf_global.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
+		{ "pyperf_subprogs.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
 
 		/* full unroll by llvm */
 		{ "pyperf50.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
@@ -86,6 +87,9 @@ void test_bpf_verif_scale(void)
 		{ "strobemeta_nounroll1.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
 		{ "strobemeta_nounroll2.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
 
+		/* non-inlined subprogs */
+		{ "strobemeta_subprogs.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
+
 		{ "test_sysctl_loop1.o", BPF_PROG_TYPE_CGROUP_SYSCTL },
 		{ "test_sysctl_loop2.o", BPF_PROG_TYPE_CGROUP_SYSCTL },
 
diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c
index c75fc6447186..93162484c2ca 100644
--- a/tools/testing/selftests/bpf/test_btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf.c
@@ -24,40 +24,17 @@
 
 #include "bpf_rlimit.h"
 #include "bpf_util.h"
-#include "test_btf.h"
+#include "../test_btf.h"
+#include "test_progs.h"
 
 #define MAX_INSNS	512
 #define MAX_SUBPROGS	16
 
-static uint32_t pass_cnt;
-static uint32_t error_cnt;
-static uint32_t skip_cnt;
+static int duration = 0;
+static bool always_log;
 
-#define CHECK(condition, format...) ({					\
-	int __ret = !!(condition);					\
-	if (__ret) {							\
-		fprintf(stderr, "%s:%d:FAIL ", __func__, __LINE__);	\
-		fprintf(stderr, format);				\
-	}								\
-	__ret;								\
-})
-
-static int count_result(int err)
-{
-	if (err)
-		error_cnt++;
-	else
-		pass_cnt++;
-
-	fprintf(stderr, "\n");
-	return err;
-}
-
-static int __base_pr(enum libbpf_print_level level __attribute__((unused)),
-		     const char *format, va_list args)
-{
-	return vfprintf(stderr, format, args);
-}
+#undef CHECK
+#define CHECK(condition, format...) _CHECK(condition, "check", duration, format)
 
 #define BTF_END_RAW 0xdeadbeef
 #define NAME_TBD 0xdeadb33f
@@ -69,21 +46,6 @@ static int __base_pr(enum libbpf_print_level level __attribute__((unused)),
 #define MAX_NR_RAW_U32 1024
 #define BTF_LOG_BUF_SIZE 65535
 
-static struct args {
-	unsigned int raw_test_num;
-	unsigned int file_test_num;
-	unsigned int get_info_test_num;
-	unsigned int info_raw_test_num;
-	unsigned int dedup_test_num;
-	bool raw_test;
-	bool file_test;
-	bool get_info_test;
-	bool pprint_test;
-	bool always_log;
-	bool info_raw_test;
-	bool dedup_test;
-} args;
-
 static char btf_log_buf[BTF_LOG_BUF_SIZE];
 
 static struct btf_header hdr_tmpl = {
@@ -3664,7 +3626,7 @@ done:
 	return raw_btf;
 }
 
-static int do_test_raw(unsigned int test_num)
+static void do_test_raw(unsigned int test_num)
 {
 	struct btf_raw_test *test = &raw_tests[test_num - 1];
 	struct bpf_create_map_attr create_attr = {};
@@ -3674,15 +3636,16 @@ static int do_test_raw(unsigned int test_num)
 	void *raw_btf;
 	int err;
 
-	fprintf(stderr, "BTF raw test[%u] (%s): ", test_num, test->descr);
+	if (!test__start_subtest(test->descr))
+		return;
+
 	raw_btf = btf_raw_create(&hdr_tmpl,
 				 test->raw_types,
 				 test->str_sec,
 				 test->str_sec_size,
 				 &raw_btf_size, NULL);
-
 	if (!raw_btf)
-		return -1;
+		return;
 
 	hdr = raw_btf;
 
@@ -3694,7 +3657,7 @@ static int do_test_raw(unsigned int test_num)
 	*btf_log_buf = '\0';
 	btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
 			      btf_log_buf, BTF_LOG_BUF_SIZE,
-			      args.always_log);
+			      always_log);
 	free(raw_btf);
 
 	err = ((btf_fd == -1) != test->btf_load_err);
@@ -3725,32 +3688,12 @@ static int do_test_raw(unsigned int test_num)
 	      map_fd, test->map_create_err);
 
 done:
-	if (!err)
-		fprintf(stderr, "OK");
-
-	if (*btf_log_buf && (err || args.always_log))
+	if (*btf_log_buf && (err || always_log))
 		fprintf(stderr, "\n%s", btf_log_buf);
-
 	if (btf_fd != -1)
 		close(btf_fd);
 	if (map_fd != -1)
 		close(map_fd);
-
-	return err;
-}
-
-static int test_raw(void)
-{
-	unsigned int i;
-	int err = 0;
-
-	if (args.raw_test_num)
-		return count_result(do_test_raw(args.raw_test_num));
-
-	for (i = 1; i <= ARRAY_SIZE(raw_tests); i++)
-		err |= count_result(do_test_raw(i));
-
-	return err;
 }
 
 struct btf_get_info_test {
@@ -3814,11 +3757,6 @@ const struct btf_get_info_test get_info_tests[] = {
 },
 };
 
-static inline __u64 ptr_to_u64(const void *ptr)
-{
-	return (__u64)(unsigned long)ptr;
-}
-
 static int test_big_btf_info(unsigned int test_num)
 {
 	const struct btf_get_info_test *test = &get_info_tests[test_num - 1];
@@ -3851,7 +3789,7 @@ static int test_big_btf_info(unsigned int test_num)
 
 	btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
 			      btf_log_buf, BTF_LOG_BUF_SIZE,
-			      args.always_log);
+			      always_log);
 	if (CHECK(btf_fd == -1, "errno:%d", errno)) {
 		err = -1;
 		goto done;
@@ -3892,7 +3830,7 @@ static int test_big_btf_info(unsigned int test_num)
 	fprintf(stderr, "OK");
 
 done:
-	if (*btf_log_buf && (err || args.always_log))
+	if (*btf_log_buf && (err || always_log))
 		fprintf(stderr, "\n%s", btf_log_buf);
 
 	free(raw_btf);
@@ -3939,7 +3877,7 @@ static int test_btf_id(unsigned int test_num)
 
 	btf_fd[0] = bpf_load_btf(raw_btf, raw_btf_size,
 				 btf_log_buf, BTF_LOG_BUF_SIZE,
-				 args.always_log);
+				 always_log);
 	if (CHECK(btf_fd[0] == -1, "errno:%d", errno)) {
 		err = -1;
 		goto done;
@@ -4024,7 +3962,7 @@ static int test_btf_id(unsigned int test_num)
 	fprintf(stderr, "OK");
 
 done:
-	if (*btf_log_buf && (err || args.always_log))
+	if (*btf_log_buf && (err || always_log))
 		fprintf(stderr, "\n%s", btf_log_buf);
 
 	free(raw_btf);
@@ -4039,7 +3977,7 @@ done:
 	return err;
 }
 
-static int do_test_get_info(unsigned int test_num)
+static void do_test_get_info(unsigned int test_num)
 {
 	const struct btf_get_info_test *test = &get_info_tests[test_num - 1];
 	unsigned int raw_btf_size, user_btf_size, expected_nbytes;
@@ -4048,11 +3986,14 @@ static int do_test_get_info(unsigned int test_num)
 	int btf_fd = -1, err, ret;
 	uint32_t info_len;
 
-	fprintf(stderr, "BTF GET_INFO test[%u] (%s): ",
-		test_num, test->descr);
+	if (!test__start_subtest(test->descr))
+		return;
 
-	if (test->special_test)
-		return test->special_test(test_num);
+	if (test->special_test) {
+		err = test->special_test(test_num);
+		if (CHECK(err, "failed: %d\n", err))
+			return;
+	}
 
 	raw_btf = btf_raw_create(&hdr_tmpl,
 				 test->raw_types,
@@ -4061,7 +4002,7 @@ static int do_test_get_info(unsigned int test_num)
 				 &raw_btf_size, NULL);
 
 	if (!raw_btf)
-		return -1;
+		return;
 
 	*btf_log_buf = '\0';
 
@@ -4073,7 +4014,7 @@ static int do_test_get_info(unsigned int test_num)
 
 	btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
 			      btf_log_buf, BTF_LOG_BUF_SIZE,
-			      args.always_log);
+			      always_log);
 	if (CHECK(btf_fd == -1, "errno:%d", errno)) {
 		err = -1;
 		goto done;
@@ -4114,7 +4055,7 @@ static int do_test_get_info(unsigned int test_num)
 	fprintf(stderr, "OK");
 
 done:
-	if (*btf_log_buf && (err || args.always_log))
+	if (*btf_log_buf && (err || always_log))
 		fprintf(stderr, "\n%s", btf_log_buf);
 
 	free(raw_btf);
@@ -4122,22 +4063,6 @@ done:
 
 	if (btf_fd != -1)
 		close(btf_fd);
-
-	return err;
-}
-
-static int test_get_info(void)
-{
-	unsigned int i;
-	int err = 0;
-
-	if (args.get_info_test_num)
-		return count_result(do_test_get_info(args.get_info_test_num));
-
-	for (i = 1; i <= ARRAY_SIZE(get_info_tests); i++)
-		err |= count_result(do_test_get_info(i));
-
-	return err;
 }
 
 struct btf_file_test {
@@ -4151,7 +4076,7 @@ static struct btf_file_test file_tests[] = {
 	{ .file = "test_btf_nokv.o", .btf_kv_notfound = true, },
 };
 
-static int do_test_file(unsigned int test_num)
+static void do_test_file(unsigned int test_num)
 {
 	const struct btf_file_test *test = &file_tests[test_num - 1];
 	const char *expected_fnames[] = {"_dummy_tracepoint",
@@ -4169,17 +4094,17 @@ static int do_test_file(unsigned int test_num)
 	struct bpf_map *map;
 	int i, err, prog_fd;
 
-	fprintf(stderr, "BTF libbpf test[%u] (%s): ", test_num,
-		test->file);
+	if (!test__start_subtest(test->file))
+		return;
 
 	btf = btf__parse_elf(test->file, &btf_ext);
 	if (IS_ERR(btf)) {
 		if (PTR_ERR(btf) == -ENOENT) {
-			fprintf(stderr, "SKIP. No ELF %s found", BTF_ELF_SEC);
-			skip_cnt++;
-			return 0;
+			printf("%s:SKIP: No ELF %s found", __func__, BTF_ELF_SEC);
+			test__skip();
+			return;
 		}
-		return PTR_ERR(btf);
+		return;
 	}
 	btf__free(btf);
 
@@ -4188,7 +4113,7 @@ static int do_test_file(unsigned int test_num)
 
 	obj = bpf_object__open(test->file);
 	if (CHECK(IS_ERR(obj), "obj: %ld", PTR_ERR(obj)))
-		return PTR_ERR(obj);
+		return;
 
 	prog = bpf_program__next(NULL, obj);
 	if (CHECK(!prog, "Cannot find bpf_prog")) {
@@ -4310,21 +4235,6 @@ skip:
 done:
 	free(func_info);
 	bpf_object__close(obj);
-	return err;
-}
-
-static int test_file(void)
-{
-	unsigned int i;
-	int err = 0;
-
-	if (args.file_test_num)
-		return count_result(do_test_file(args.file_test_num));
-
-	for (i = 1; i <= ARRAY_SIZE(file_tests); i++)
-		err |= count_result(do_test_file(i));
-
-	return err;
 }
 
 const char *pprint_enum_str[] = {
@@ -4428,7 +4338,7 @@ static struct btf_raw_test pprint_test_template[] = {
 	.value_size = sizeof(struct pprint_mapv),
 	.key_type_id = 3,	/* unsigned int */
 	.value_type_id = 16,	/* struct pprint_mapv */
-	.max_entries = 128 * 1024,
+	.max_entries = 128,
 },
 
 {
@@ -4493,7 +4403,7 @@ static struct btf_raw_test pprint_test_template[] = {
 	.value_size = sizeof(struct pprint_mapv),
 	.key_type_id = 3,	/* unsigned int */
 	.value_type_id = 16,	/* struct pprint_mapv */
-	.max_entries = 128 * 1024,
+	.max_entries = 128,
 },
 
 {
@@ -4564,7 +4474,7 @@ static struct btf_raw_test pprint_test_template[] = {
 	.value_size = sizeof(struct pprint_mapv),
 	.key_type_id = 3,	/* unsigned int */
 	.value_type_id = 16,	/* struct pprint_mapv */
-	.max_entries = 128 * 1024,
+	.max_entries = 128,
 },
 
 #ifdef __SIZEOF_INT128__
@@ -4591,7 +4501,7 @@ static struct btf_raw_test pprint_test_template[] = {
 	.value_size = sizeof(struct pprint_mapv_int128),
 	.key_type_id = 1,
 	.value_type_id = 4,
-	.max_entries = 128 * 1024,
+	.max_entries = 128,
 	.mapv_kind = PPRINT_MAPV_KIND_INT128,
 },
 #endif
@@ -4790,7 +4700,7 @@ static int check_line(const char *expected_line, int nexpected_line,
 }
 
 
-static int do_test_pprint(int test_num)
+static void do_test_pprint(int test_num)
 {
 	const struct btf_raw_test *test = &pprint_test_template[test_num];
 	enum pprint_mapv_kind_t mapv_kind = test->mapv_kind;
@@ -4809,18 +4719,20 @@ static int do_test_pprint(int test_num)
 	uint8_t *raw_btf;
 	ssize_t nread;
 
-	fprintf(stderr, "%s(#%d)......", test->descr, test_num);
+	if (!test__start_subtest(test->descr))
+		return;
+
 	raw_btf = btf_raw_create(&hdr_tmpl, test->raw_types,
 				 test->str_sec, test->str_sec_size,
 				 &raw_btf_size, NULL);
 
 	if (!raw_btf)
-		return -1;
+		return;
 
 	*btf_log_buf = '\0';
 	btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
 			      btf_log_buf, BTF_LOG_BUF_SIZE,
-			      args.always_log);
+			      always_log);
 	free(raw_btf);
 
 	if (CHECK(btf_fd == -1, "errno:%d", errno)) {
@@ -4971,7 +4883,7 @@ done:
 		free(mapv);
 	if (!err)
 		fprintf(stderr, "OK");
-	if (*btf_log_buf && (err || args.always_log))
+	if (*btf_log_buf && (err || always_log))
 		fprintf(stderr, "\n%s", btf_log_buf);
 	if (btf_fd != -1)
 		close(btf_fd);
@@ -4981,14 +4893,11 @@ done:
 		fclose(pin_file);
 	unlink(pin_path);
 	free(line);
-
-	return err;
 }
 
-static int test_pprint(void)
+static void test_pprint(void)
 {
 	unsigned int i;
-	int err = 0;
 
 	/* test various maps with the first test template */
 	for (i = 0; i < ARRAY_SIZE(pprint_tests_meta); i++) {
@@ -4999,7 +4908,7 @@ static int test_pprint(void)
 		pprint_test_template[0].lossless_map = pprint_tests_meta[i].lossless_map;
 		pprint_test_template[0].percpu_map = pprint_tests_meta[i].percpu_map;
 
-		err |= count_result(do_test_pprint(0));
+		do_test_pprint(0);
 	}
 
 	/* test rest test templates with the first map */
@@ -5010,10 +4919,8 @@ static int test_pprint(void)
 		pprint_test_template[i].ordered_map = pprint_tests_meta[0].ordered_map;
 		pprint_test_template[i].lossless_map = pprint_tests_meta[0].lossless_map;
 		pprint_test_template[i].percpu_map = pprint_tests_meta[0].percpu_map;
-		err |= count_result(do_test_pprint(i));
+		do_test_pprint(i);
 	}
-
-	return err;
 }
 
 #define BPF_LINE_INFO_ENC(insn_off, file_off, line_off, line_num, line_col) \
@@ -6178,7 +6085,7 @@ done:
 	return err;
 }
 
-static int do_test_info_raw(unsigned int test_num)
+static void do_test_info_raw(unsigned int test_num)
 {
 	const struct prog_info_raw_test *test = &info_raw_tests[test_num - 1];
 	unsigned int raw_btf_size, linfo_str_off, linfo_size;
@@ -6187,18 +6094,19 @@ static int do_test_info_raw(unsigned int test_num)
 	const char *ret_next_str;
 	union bpf_attr attr = {};
 
-	fprintf(stderr, "BTF prog info raw test[%u] (%s): ", test_num, test->descr);
+	if (!test__start_subtest(test->descr))
+		return;
+
 	raw_btf = btf_raw_create(&hdr_tmpl, test->raw_types,
 				 test->str_sec, test->str_sec_size,
 				 &raw_btf_size, &ret_next_str);
-
 	if (!raw_btf)
-		return -1;
+		return;
 
 	*btf_log_buf = '\0';
 	btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
 			      btf_log_buf, BTF_LOG_BUF_SIZE,
-			      args.always_log);
+			      always_log);
 	free(raw_btf);
 
 	if (CHECK(btf_fd == -1, "invalid btf_fd errno:%d", errno)) {
@@ -6206,7 +6114,7 @@ static int do_test_info_raw(unsigned int test_num)
 		goto done;
 	}
 
-	if (*btf_log_buf && args.always_log)
+	if (*btf_log_buf && always_log)
 		fprintf(stderr, "\n%s", btf_log_buf);
 	*btf_log_buf = '\0';
 
@@ -6261,10 +6169,7 @@ static int do_test_info_raw(unsigned int test_num)
 		goto done;
 
 done:
-	if (!err)
-		fprintf(stderr, "OK");
-
-	if (*btf_log_buf && (err || args.always_log))
+	if (*btf_log_buf && (err || always_log))
 		fprintf(stderr, "\n%s", btf_log_buf);
 
 	if (btf_fd != -1)
@@ -6274,22 +6179,6 @@ done:
 
 	if (!IS_ERR(patched_linfo))
 		free(patched_linfo);
-
-	return err;
-}
-
-static int test_info_raw(void)
-{
-	unsigned int i;
-	int err = 0;
-
-	if (args.info_raw_test_num)
-		return count_result(do_test_info_raw(args.info_raw_test_num));
-
-	for (i = 1; i <= ARRAY_SIZE(info_raw_tests); i++)
-		err |= count_result(do_test_info_raw(i));
-
-	return err;
 }
 
 struct btf_raw_data {
@@ -6754,7 +6643,7 @@ static void dump_btf_strings(const char *strs, __u32 len)
 	}
 }
 
-static int do_test_dedup(unsigned int test_num)
+static void do_test_dedup(unsigned int test_num)
 {
 	const struct btf_dedup_test *test = &dedup_tests[test_num - 1];
 	__u32 test_nr_types, expect_nr_types, test_btf_size, expect_btf_size;
@@ -6769,13 +6658,15 @@ static int do_test_dedup(unsigned int test_num)
 	void *raw_btf;
 	int err = 0, i;
 
-	fprintf(stderr, "BTF dedup test[%u] (%s):", test_num, test->descr);
+	if (!test__start_subtest(test->descr))
+		return;
 
 	raw_btf = btf_raw_create(&hdr_tmpl, test->input.raw_types,
 				 test->input.str_sec, test->input.str_sec_size,
 				 &raw_btf_size, &ret_test_next_str);
 	if (!raw_btf)
-		return -1;
+		return;
+
 	test_btf = btf__new((__u8 *)raw_btf, raw_btf_size);
 	free(raw_btf);
 	if (CHECK(IS_ERR(test_btf), "invalid test_btf errno:%ld",
@@ -6789,7 +6680,7 @@ static int do_test_dedup(unsigned int test_num)
 				 test->expect.str_sec_size,
 				 &raw_btf_size, &ret_expect_next_str);
 	if (!raw_btf)
-		return -1;
+		return;
 	expect_btf = btf__new((__u8 *)raw_btf, raw_btf_size);
 	free(raw_btf);
 	if (CHECK(IS_ERR(expect_btf), "invalid expect_btf errno:%ld",
@@ -6894,174 +6785,27 @@ static int do_test_dedup(unsigned int test_num)
 	}
 
 done:
-	if (!err)
-		fprintf(stderr, "OK");
 	if (!IS_ERR(test_btf))
 		btf__free(test_btf);
 	if (!IS_ERR(expect_btf))
 		btf__free(expect_btf);
-
-	return err;
 }
 
-static int test_dedup(void)
+void test_btf(void)
 {
-	unsigned int i;
-	int err = 0;
+	int i;
 
-	if (args.dedup_test_num)
-		return count_result(do_test_dedup(args.dedup_test_num));
+	always_log = env.verbosity > VERBOSE_NONE;
 
+	for (i = 1; i <= ARRAY_SIZE(raw_tests); i++)
+		do_test_raw(i);
+	for (i = 1; i <= ARRAY_SIZE(get_info_tests); i++)
+		do_test_get_info(i);
+	for (i = 1; i <= ARRAY_SIZE(file_tests); i++)
+		do_test_file(i);
+	for (i = 1; i <= ARRAY_SIZE(info_raw_tests); i++)
+		do_test_info_raw(i);
 	for (i = 1; i <= ARRAY_SIZE(dedup_tests); i++)
-		err |= count_result(do_test_dedup(i));
-
-	return err;
-}
-
-static void usage(const char *cmd)
-{
-	fprintf(stderr, "Usage: %s [-l] [[-r btf_raw_test_num (1 - %zu)] |\n"
-			"\t[-g btf_get_info_test_num (1 - %zu)] |\n"
-			"\t[-f btf_file_test_num (1 - %zu)] |\n"
-			"\t[-k btf_prog_info_raw_test_num (1 - %zu)] |\n"
-			"\t[-p (pretty print test)] |\n"
-			"\t[-d btf_dedup_test_num (1 - %zu)]]\n",
-		cmd, ARRAY_SIZE(raw_tests), ARRAY_SIZE(get_info_tests),
-		ARRAY_SIZE(file_tests), ARRAY_SIZE(info_raw_tests),
-		ARRAY_SIZE(dedup_tests));
-}
-
-static int parse_args(int argc, char **argv)
-{
-	const char *optstr = "hlpk:f:r:g:d:";
-	int opt;
-
-	while ((opt = getopt(argc, argv, optstr)) != -1) {
-		switch (opt) {
-		case 'l':
-			args.always_log = true;
-			break;
-		case 'f':
-			args.file_test_num = atoi(optarg);
-			args.file_test = true;
-			break;
-		case 'r':
-			args.raw_test_num = atoi(optarg);
-			args.raw_test = true;
-			break;
-		case 'g':
-			args.get_info_test_num = atoi(optarg);
-			args.get_info_test = true;
-			break;
-		case 'p':
-			args.pprint_test = true;
-			break;
-		case 'k':
-			args.info_raw_test_num = atoi(optarg);
-			args.info_raw_test = true;
-			break;
-		case 'd':
-			args.dedup_test_num = atoi(optarg);
-			args.dedup_test = true;
-			break;
-		case 'h':
-			usage(argv[0]);
-			exit(0);
-		default:
-			usage(argv[0]);
-			return -1;
-		}
-	}
-
-	if (args.raw_test_num &&
-	    (args.raw_test_num < 1 ||
-	     args.raw_test_num > ARRAY_SIZE(raw_tests))) {
-		fprintf(stderr, "BTF raw test number must be [1 - %zu]\n",
-			ARRAY_SIZE(raw_tests));
-		return -1;
-	}
-
-	if (args.file_test_num &&
-	    (args.file_test_num < 1 ||
-	     args.file_test_num > ARRAY_SIZE(file_tests))) {
-		fprintf(stderr, "BTF file test number must be [1 - %zu]\n",
-			ARRAY_SIZE(file_tests));
-		return -1;
-	}
-
-	if (args.get_info_test_num &&
-	    (args.get_info_test_num < 1 ||
-	     args.get_info_test_num > ARRAY_SIZE(get_info_tests))) {
-		fprintf(stderr, "BTF get info test number must be [1 - %zu]\n",
-			ARRAY_SIZE(get_info_tests));
-		return -1;
-	}
-
-	if (args.info_raw_test_num &&
-	    (args.info_raw_test_num < 1 ||
-	     args.info_raw_test_num > ARRAY_SIZE(info_raw_tests))) {
-		fprintf(stderr, "BTF prog info raw test number must be [1 - %zu]\n",
-			ARRAY_SIZE(info_raw_tests));
-		return -1;
-	}
-
-	if (args.dedup_test_num &&
-	    (args.dedup_test_num < 1 ||
-	     args.dedup_test_num > ARRAY_SIZE(dedup_tests))) {
-		fprintf(stderr, "BTF dedup test number must be [1 - %zu]\n",
-			ARRAY_SIZE(dedup_tests));
-		return -1;
-	}
-
-	return 0;
-}
-
-static void print_summary(void)
-{
-	fprintf(stderr, "PASS:%u SKIP:%u FAIL:%u\n",
-		pass_cnt - skip_cnt, skip_cnt, error_cnt);
-}
-
-int main(int argc, char **argv)
-{
-	int err = 0;
-
-	err = parse_args(argc, argv);
-	if (err)
-		return err;
-
-	if (args.always_log)
-		libbpf_set_print(__base_pr);
-
-	if (args.raw_test)
-		err |= test_raw();
-
-	if (args.get_info_test)
-		err |= test_get_info();
-
-	if (args.file_test)
-		err |= test_file();
-
-	if (args.pprint_test)
-		err |= test_pprint();
-
-	if (args.info_raw_test)
-		err |= test_info_raw();
-
-	if (args.dedup_test)
-		err |= test_dedup();
-
-	if (args.raw_test || args.get_info_test || args.file_test ||
-	    args.pprint_test || args.info_raw_test || args.dedup_test)
-		goto done;
-
-	err |= test_raw();
-	err |= test_get_info();
-	err |= test_file();
-	err |= test_info_raw();
-	err |= test_dedup();
-
-done:
-	print_summary();
-	return err;
+		do_test_dedup(i);
+	test_pprint();
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
index 39fb81d9daeb..c60091ee8a21 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
@@ -129,6 +129,109 @@ done:
 	return err;
 }
 
+static char *dump_buf;
+static size_t dump_buf_sz;
+static FILE *dump_buf_file;
+
+void test_btf_dump_incremental(void)
+{
+	struct btf *btf = NULL;
+	struct btf_dump *d = NULL;
+	struct btf_dump_opts opts;
+	int id, err, i;
+
+	dump_buf_file = open_memstream(&dump_buf, &dump_buf_sz);
+	if (!ASSERT_OK_PTR(dump_buf_file, "dump_memstream"))
+		return;
+	btf = btf__new_empty();
+	if (!ASSERT_OK_PTR(btf, "new_empty"))
+		goto err_out;
+	opts.ctx = dump_buf_file;
+	d = btf_dump__new(btf, NULL, &opts, btf_dump_printf);
+	if (!ASSERT_OK(libbpf_get_error(d), "btf_dump__new"))
+		goto err_out;
+
+	/* First, generate BTF corresponding to the following C code:
+	 *
+	 * enum { VAL = 1 };
+	 *
+	 * struct s { int x; };
+	 *
+	 */
+	id = btf__add_enum(btf, NULL, 4);
+	ASSERT_EQ(id, 1, "enum_id");
+	err = btf__add_enum_value(btf, "VAL", 1);
+	ASSERT_OK(err, "enum_val_ok");
+
+	id = btf__add_int(btf, "int", 4, BTF_INT_SIGNED);
+	ASSERT_EQ(id, 2, "int_id");
+
+	id = btf__add_struct(btf, "s", 4);
+	ASSERT_EQ(id, 3, "struct_id");
+	err = btf__add_field(btf, "x", 2, 0, 0);
+	ASSERT_OK(err, "field_ok");
+
+	for (i = 1; i <= btf__get_nr_types(btf); i++) {
+		err = btf_dump__dump_type(d, i);
+		ASSERT_OK(err, "dump_type_ok");
+	}
+
+	fflush(dump_buf_file);
+	dump_buf[dump_buf_sz] = 0; /* some libc implementations don't do this */
+	ASSERT_STREQ(dump_buf,
+"enum {\n"
+"	VAL = 1,\n"
+"};\n"
+"\n"
+"struct s {\n"
+"	int x;\n"
+"};\n\n", "c_dump1");
+
+	/* Now, after dumping original BTF, append another struct that embeds
+	 * anonymous enum. It also has a name conflict with the first struct:
+	 *
+	 * struct s___2 {
+	 *     enum { VAL___2 = 1 } x;
+	 *     struct s s;
+	 * };
+	 *
+	 * This will test that btf_dump'er maintains internal state properly.
+	 * Note that VAL___2 enum value. It's because we've already emitted
+	 * that enum as a global anonymous enum, so btf_dump will ensure that
+	 * enum values don't conflict;
+	 *
+	 */
+	fseek(dump_buf_file, 0, SEEK_SET);
+
+	id = btf__add_struct(btf, "s", 4);
+	ASSERT_EQ(id, 4, "struct_id");
+	err = btf__add_field(btf, "x", 1, 0, 0);
+	ASSERT_OK(err, "field_ok");
+	err = btf__add_field(btf, "s", 3, 32, 0);
+	ASSERT_OK(err, "field_ok");
+
+	for (i = 1; i <= btf__get_nr_types(btf); i++) {
+		err = btf_dump__dump_type(d, i);
+		ASSERT_OK(err, "dump_type_ok");
+	}
+
+	fflush(dump_buf_file);
+	dump_buf[dump_buf_sz] = 0; /* some libc implementations don't do this */
+	ASSERT_STREQ(dump_buf,
+"struct s___2 {\n"
+"	enum {\n"
+"		VAL___2 = 1,\n"
+"	} x;\n"
+"	struct s s;\n"
+"};\n\n" , "c_dump1");
+
+err_out:
+	fclose(dump_buf_file);
+	free(dump_buf);
+	btf_dump__free(d);
+	btf__free(btf);
+}
+
 void test_btf_dump() {
 	int i;
 
@@ -140,4 +243,6 @@ void test_btf_dump() {
 
 		test_btf_dump_case(i, &btf_dump_test_cases[i]);
 	}
+	if (test__start_subtest("btf_dump: incremental"))
+		test_btf_dump_incremental();
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_endian.c b/tools/testing/selftests/bpf/prog_tests/btf_endian.c
new file mode 100644
index 000000000000..8c52d72c876e
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/btf_endian.c
@@ -0,0 +1,101 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#define _GNU_SOURCE
+#include <string.h>
+#include <byteswap.h>
+#include <test_progs.h>
+#include <bpf/btf.h>
+
+static int duration = 0;
+
+void test_btf_endian() {
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+	enum btf_endianness endian = BTF_LITTLE_ENDIAN;
+#elif __BYTE_ORDER == __BIG_ENDIAN
+	enum btf_endianness endian = BTF_BIG_ENDIAN;
+#else
+#error "Unrecognized __BYTE_ORDER"
+#endif
+	enum btf_endianness swap_endian = 1 - endian;
+	struct btf *btf = NULL, *swap_btf = NULL;
+	const void *raw_data, *swap_raw_data;
+	const struct btf_type *t;
+	const struct btf_header *hdr;
+	__u32 raw_sz, swap_raw_sz;
+	int var_id;
+
+	/* Load BTF in native endianness */
+	btf = btf__parse_elf("btf_dump_test_case_syntax.o", NULL);
+	if (!ASSERT_OK_PTR(btf, "parse_native_btf"))
+		goto err_out;
+
+	ASSERT_EQ(btf__endianness(btf), endian, "endian");
+	btf__set_endianness(btf, swap_endian);
+	ASSERT_EQ(btf__endianness(btf), swap_endian, "endian");
+
+	/* Get raw BTF data in non-native endianness... */
+	raw_data = btf__get_raw_data(btf, &raw_sz);
+	if (!ASSERT_OK_PTR(raw_data, "raw_data_inverted"))
+		goto err_out;
+
+	/* ...and open it as a new BTF instance */
+	swap_btf = btf__new(raw_data, raw_sz);
+	if (!ASSERT_OK_PTR(swap_btf, "parse_swap_btf"))
+		goto err_out;
+
+	ASSERT_EQ(btf__endianness(swap_btf), swap_endian, "endian");
+	ASSERT_EQ(btf__get_nr_types(swap_btf), btf__get_nr_types(btf), "nr_types");
+
+	swap_raw_data = btf__get_raw_data(swap_btf, &swap_raw_sz);
+	if (!ASSERT_OK_PTR(swap_raw_data, "swap_raw_data"))
+		goto err_out;
+
+	/* both raw data should be identical (with non-native endianness) */
+	ASSERT_OK(memcmp(raw_data, swap_raw_data, raw_sz), "mem_identical");
+
+	/* make sure that at least BTF header data is really swapped */
+	hdr = swap_raw_data;
+	ASSERT_EQ(bswap_16(hdr->magic), BTF_MAGIC, "btf_magic_swapped");
+	ASSERT_EQ(raw_sz, swap_raw_sz, "raw_sizes");
+
+	/* swap it back to native endianness */
+	btf__set_endianness(swap_btf, endian);
+	swap_raw_data = btf__get_raw_data(swap_btf, &swap_raw_sz);
+	if (!ASSERT_OK_PTR(swap_raw_data, "swap_raw_data"))
+		goto err_out;
+
+	/* now header should have native BTF_MAGIC */
+	hdr = swap_raw_data;
+	ASSERT_EQ(hdr->magic, BTF_MAGIC, "btf_magic_native");
+	ASSERT_EQ(raw_sz, swap_raw_sz, "raw_sizes");
+
+	/* now modify original BTF */
+	var_id = btf__add_var(btf, "some_var", BTF_VAR_GLOBAL_ALLOCATED, 1);
+	CHECK(var_id <= 0, "var_id", "failed %d\n", var_id);
+
+	btf__free(swap_btf);
+	swap_btf = NULL;
+
+	btf__set_endianness(btf, swap_endian);
+	raw_data = btf__get_raw_data(btf, &raw_sz);
+	if (!ASSERT_OK_PTR(raw_data, "raw_data_inverted"))
+		goto err_out;
+
+	/* and re-open swapped raw data again */
+	swap_btf = btf__new(raw_data, raw_sz);
+	if (!ASSERT_OK_PTR(swap_btf, "parse_swap_btf"))
+		goto err_out;
+
+	ASSERT_EQ(btf__endianness(swap_btf), swap_endian, "endian");
+	ASSERT_EQ(btf__get_nr_types(swap_btf), btf__get_nr_types(btf), "nr_types");
+
+	/* the type should appear as if it was stored in native endianness */
+	t = btf__type_by_id(swap_btf, var_id);
+	ASSERT_STREQ(btf__str_by_offset(swap_btf, t->name_off), "some_var", "var_name");
+	ASSERT_EQ(btf_var(t)->linkage, BTF_VAR_GLOBAL_ALLOCATED, "var_linkage");
+	ASSERT_EQ(t->type, 1, "var_type");
+
+err_out:
+	btf__free(btf);
+	btf__free(swap_btf);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c b/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c
index 6ccecbd39476..76ebe4c250f1 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c
@@ -53,12 +53,12 @@ static int kern_sync_rcu(void)
 	return err;
 }
 
-void test_btf_map_in_map(void)
+static void test_lookup_update(void)
 {
-	int err, key = 0, val, i;
+	int map1_fd, map2_fd, map3_fd, map4_fd, map5_fd, map1_id, map2_id;
+	int outer_arr_fd, outer_hash_fd, outer_arr_dyn_fd;
 	struct test_btf_map_in_map *skel;
-	int outer_arr_fd, outer_hash_fd;
-	int fd, map1_fd, map2_fd, map1_id, map2_id;
+	int err, key = 0, val, i, fd;
 
 	skel = test_btf_map_in_map__open_and_load();
 	if (CHECK(!skel, "skel_open", "failed to open&load skeleton\n"))
@@ -70,32 +70,45 @@ void test_btf_map_in_map(void)
 
 	map1_fd = bpf_map__fd(skel->maps.inner_map1);
 	map2_fd = bpf_map__fd(skel->maps.inner_map2);
+	map3_fd = bpf_map__fd(skel->maps.inner_map3);
+	map4_fd = bpf_map__fd(skel->maps.inner_map4);
+	map5_fd = bpf_map__fd(skel->maps.inner_map5);
+	outer_arr_dyn_fd = bpf_map__fd(skel->maps.outer_arr_dyn);
 	outer_arr_fd = bpf_map__fd(skel->maps.outer_arr);
 	outer_hash_fd = bpf_map__fd(skel->maps.outer_hash);
 
-	/* inner1 = input, inner2 = input + 1 */
-	map1_fd = bpf_map__fd(skel->maps.inner_map1);
+	/* inner1 = input, inner2 = input + 1, inner3 = input + 2 */
 	bpf_map_update_elem(outer_arr_fd, &key, &map1_fd, 0);
-	map2_fd = bpf_map__fd(skel->maps.inner_map2);
 	bpf_map_update_elem(outer_hash_fd, &key, &map2_fd, 0);
+	bpf_map_update_elem(outer_arr_dyn_fd, &key, &map3_fd, 0);
 	skel->bss->input = 1;
 	usleep(1);
-
 	bpf_map_lookup_elem(map1_fd, &key, &val);
 	CHECK(val != 1, "inner1", "got %d != exp %d\n", val, 1);
 	bpf_map_lookup_elem(map2_fd, &key, &val);
 	CHECK(val != 2, "inner2", "got %d != exp %d\n", val, 2);
+	bpf_map_lookup_elem(map3_fd, &key, &val);
+	CHECK(val != 3, "inner3", "got %d != exp %d\n", val, 3);
 
-	/* inner1 = input + 1, inner2 = input */
+	/* inner2 = input, inner1 = input + 1, inner4 = input + 2 */
 	bpf_map_update_elem(outer_arr_fd, &key, &map2_fd, 0);
 	bpf_map_update_elem(outer_hash_fd, &key, &map1_fd, 0);
+	bpf_map_update_elem(outer_arr_dyn_fd, &key, &map4_fd, 0);
 	skel->bss->input = 3;
 	usleep(1);
-
 	bpf_map_lookup_elem(map1_fd, &key, &val);
 	CHECK(val != 4, "inner1", "got %d != exp %d\n", val, 4);
 	bpf_map_lookup_elem(map2_fd, &key, &val);
 	CHECK(val != 3, "inner2", "got %d != exp %d\n", val, 3);
+	bpf_map_lookup_elem(map4_fd, &key, &val);
+	CHECK(val != 5, "inner4", "got %d != exp %d\n", val, 5);
+
+	/* inner5 = input + 2 */
+	bpf_map_update_elem(outer_arr_dyn_fd, &key, &map5_fd, 0);
+	skel->bss->input = 5;
+	usleep(1);
+	bpf_map_lookup_elem(map5_fd, &key, &val);
+	CHECK(val != 7, "inner5", "got %d != exp %d\n", val, 7);
 
 	for (i = 0; i < 5; i++) {
 		val = i % 2 ? map1_fd : map2_fd;
@@ -106,7 +119,13 @@ void test_btf_map_in_map(void)
 		}
 		err = bpf_map_update_elem(outer_arr_fd, &key, &val, 0);
 		if (CHECK_FAIL(err)) {
-			printf("failed to update hash_of_maps on iter #%d\n", i);
+			printf("failed to update array_of_maps on iter #%d\n", i);
+			goto cleanup;
+		}
+		val = i % 2 ? map4_fd : map5_fd;
+		err = bpf_map_update_elem(outer_arr_dyn_fd, &key, &val, 0);
+		if (CHECK_FAIL(err)) {
+			printf("failed to update array_of_maps (dyn) on iter #%d\n", i);
 			goto cleanup;
 		}
 	}
@@ -143,3 +162,36 @@ void test_btf_map_in_map(void)
 cleanup:
 	test_btf_map_in_map__destroy(skel);
 }
+
+static void test_diff_size(void)
+{
+	struct test_btf_map_in_map *skel;
+	int err, inner_map_fd, zero = 0;
+
+	skel = test_btf_map_in_map__open_and_load();
+	if (CHECK(!skel, "skel_open", "failed to open&load skeleton\n"))
+		return;
+
+	inner_map_fd = bpf_map__fd(skel->maps.sockarr_sz2);
+	err = bpf_map_update_elem(bpf_map__fd(skel->maps.outer_sockarr), &zero,
+				  &inner_map_fd, 0);
+	CHECK(err, "outer_sockarr inner map size check",
+	      "cannot use a different size inner_map\n");
+
+	inner_map_fd = bpf_map__fd(skel->maps.inner_map_sz2);
+	err = bpf_map_update_elem(bpf_map__fd(skel->maps.outer_arr), &zero,
+				  &inner_map_fd, 0);
+	CHECK(!err, "outer_arr inner map size check",
+	      "incorrectly updated with a different size inner_map\n");
+
+	test_btf_map_in_map__destroy(skel);
+}
+
+void test_btf_map_in_map(void)
+{
+	if (test__start_subtest("lookup_update"))
+		test_lookup_update();
+
+	if (test__start_subtest("diff_size"))
+		test_diff_size();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c b/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c
new file mode 100644
index 000000000000..86ccf37e26b3
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c
@@ -0,0 +1,234 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#define _GNU_SOURCE
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <sched.h>
+#include <linux/compiler.h>
+#include <bpf/libbpf.h>
+
+#include "network_helpers.h"
+#include "test_progs.h"
+#include "test_btf_skc_cls_ingress.skel.h"
+
+static struct test_btf_skc_cls_ingress *skel;
+struct sockaddr_in6 srv_sa6;
+static __u32 duration;
+
+#define PROG_PIN_FILE "/sys/fs/bpf/btf_skc_cls_ingress"
+
+static int write_sysctl(const char *sysctl, const char *value)
+{
+	int fd, err, len;
+
+	fd = open(sysctl, O_WRONLY);
+	if (CHECK(fd == -1, "open sysctl", "open(%s): %s (%d)\n",
+		  sysctl, strerror(errno), errno))
+		return -1;
+
+	len = strlen(value);
+	err = write(fd, value, len);
+	close(fd);
+	if (CHECK(err != len, "write sysctl",
+		  "write(%s, %s, %d): err:%d %s (%d)\n",
+		  sysctl, value, len, err, strerror(errno), errno))
+		return -1;
+
+	return 0;
+}
+
+static int prepare_netns(void)
+{
+	if (CHECK(unshare(CLONE_NEWNET), "create netns",
+		  "unshare(CLONE_NEWNET): %s (%d)",
+		  strerror(errno), errno))
+		return -1;
+
+	if (CHECK(system("ip link set dev lo up"),
+		  "ip link set dev lo up", "failed\n"))
+		return -1;
+
+	if (CHECK(system("tc qdisc add dev lo clsact"),
+		  "tc qdisc add dev lo clsact", "failed\n"))
+		return -1;
+
+	if (CHECK(system("tc filter add dev lo ingress bpf direct-action object-pinned " PROG_PIN_FILE),
+		  "install tc cls-prog at ingress", "failed\n"))
+		return -1;
+
+	/* Ensure 20 bytes options (i.e. in total 40 bytes tcp header) for the
+	 * bpf_tcp_gen_syncookie() helper.
+	 */
+	if (write_sysctl("/proc/sys/net/ipv4/tcp_window_scaling", "1") ||
+	    write_sysctl("/proc/sys/net/ipv4/tcp_timestamps", "1") ||
+	    write_sysctl("/proc/sys/net/ipv4/tcp_sack", "1"))
+		return -1;
+
+	return 0;
+}
+
+static void reset_test(void)
+{
+	memset(&skel->bss->srv_sa6, 0, sizeof(skel->bss->srv_sa6));
+	skel->bss->listen_tp_sport = 0;
+	skel->bss->req_sk_sport = 0;
+	skel->bss->recv_cookie = 0;
+	skel->bss->gen_cookie = 0;
+	skel->bss->linum = 0;
+}
+
+static void print_err_line(void)
+{
+	if (skel->bss->linum)
+		printf("bpf prog error at line %u\n", skel->bss->linum);
+}
+
+static void test_conn(void)
+{
+	int listen_fd = -1, cli_fd = -1, err;
+	socklen_t addrlen = sizeof(srv_sa6);
+	int srv_port;
+
+	if (write_sysctl("/proc/sys/net/ipv4/tcp_syncookies", "1"))
+		return;
+
+	listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0);
+	if (CHECK_FAIL(listen_fd == -1))
+		return;
+
+	err = getsockname(listen_fd, (struct sockaddr *)&srv_sa6, &addrlen);
+	if (CHECK(err, "getsockname(listen_fd)", "err:%d errno:%d\n", err,
+		  errno))
+		goto done;
+	memcpy(&skel->bss->srv_sa6, &srv_sa6, sizeof(srv_sa6));
+	srv_port = ntohs(srv_sa6.sin6_port);
+
+	cli_fd = connect_to_fd(listen_fd, 0);
+	if (CHECK_FAIL(cli_fd == -1))
+		goto done;
+
+	if (CHECK(skel->bss->listen_tp_sport != srv_port ||
+		  skel->bss->req_sk_sport != srv_port,
+		  "Unexpected sk src port",
+		  "listen_tp_sport:%u req_sk_sport:%u expected:%u\n",
+		  skel->bss->listen_tp_sport, skel->bss->req_sk_sport,
+		  srv_port))
+		goto done;
+
+	if (CHECK(skel->bss->gen_cookie || skel->bss->recv_cookie,
+		  "Unexpected syncookie states",
+		  "gen_cookie:%u recv_cookie:%u\n",
+		  skel->bss->gen_cookie, skel->bss->recv_cookie))
+		goto done;
+
+	CHECK(skel->bss->linum, "bpf prog detected error", "at line %u\n",
+	      skel->bss->linum);
+
+done:
+	if (listen_fd != -1)
+		close(listen_fd);
+	if (cli_fd != -1)
+		close(cli_fd);
+}
+
+static void test_syncookie(void)
+{
+	int listen_fd = -1, cli_fd = -1, err;
+	socklen_t addrlen = sizeof(srv_sa6);
+	int srv_port;
+
+	/* Enforce syncookie mode */
+	if (write_sysctl("/proc/sys/net/ipv4/tcp_syncookies", "2"))
+		return;
+
+	listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0);
+	if (CHECK_FAIL(listen_fd == -1))
+		return;
+
+	err = getsockname(listen_fd, (struct sockaddr *)&srv_sa6, &addrlen);
+	if (CHECK(err, "getsockname(listen_fd)", "err:%d errno:%d\n", err,
+		  errno))
+		goto done;
+	memcpy(&skel->bss->srv_sa6, &srv_sa6, sizeof(srv_sa6));
+	srv_port = ntohs(srv_sa6.sin6_port);
+
+	cli_fd = connect_to_fd(listen_fd, 0);
+	if (CHECK_FAIL(cli_fd == -1))
+		goto done;
+
+	if (CHECK(skel->bss->listen_tp_sport != srv_port,
+		  "Unexpected tp src port",
+		  "listen_tp_sport:%u expected:%u\n",
+		  skel->bss->listen_tp_sport, srv_port))
+		goto done;
+
+	if (CHECK(skel->bss->req_sk_sport,
+		  "Unexpected req_sk src port",
+		  "req_sk_sport:%u expected:0\n",
+		   skel->bss->req_sk_sport))
+		goto done;
+
+	if (CHECK(!skel->bss->gen_cookie ||
+		  skel->bss->gen_cookie != skel->bss->recv_cookie,
+		  "Unexpected syncookie states",
+		  "gen_cookie:%u recv_cookie:%u\n",
+		  skel->bss->gen_cookie, skel->bss->recv_cookie))
+		goto done;
+
+	CHECK(skel->bss->linum, "bpf prog detected error", "at line %u\n",
+	      skel->bss->linum);
+
+done:
+	if (listen_fd != -1)
+		close(listen_fd);
+	if (cli_fd != -1)
+		close(cli_fd);
+}
+
+struct test {
+	const char *desc;
+	void (*run)(void);
+};
+
+#define DEF_TEST(name) { #name, test_##name }
+static struct test tests[] = {
+	DEF_TEST(conn),
+	DEF_TEST(syncookie),
+};
+
+void test_btf_skc_cls_ingress(void)
+{
+	int i, err;
+
+	skel = test_btf_skc_cls_ingress__open_and_load();
+	if (CHECK(!skel, "test_btf_skc_cls_ingress__open_and_load", "failed\n"))
+		return;
+
+	err = bpf_program__pin(skel->progs.cls_ingress, PROG_PIN_FILE);
+	if (CHECK(err, "bpf_program__pin",
+		  "cannot pin bpf prog to %s. err:%d\n", PROG_PIN_FILE, err)) {
+		test_btf_skc_cls_ingress__destroy(skel);
+		return;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(tests); i++) {
+		if (!test__start_subtest(tests[i].desc))
+			continue;
+
+		if (prepare_netns())
+			break;
+
+		tests[i].run();
+
+		print_err_line();
+		reset_test();
+	}
+
+	bpf_program__unpin(skel->progs.cls_ingress, PROG_PIN_FILE);
+	test_btf_skc_cls_ingress__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_write.c b/tools/testing/selftests/bpf/prog_tests/btf_write.c
new file mode 100644
index 000000000000..314e1e7c36df
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/btf_write.c
@@ -0,0 +1,244 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include <test_progs.h>
+#include <bpf/btf.h>
+
+static int duration = 0;
+
+void test_btf_write() {
+	const struct btf_var_secinfo *vi;
+	const struct btf_type *t;
+	const struct btf_member *m;
+	const struct btf_enum *v;
+	const struct btf_param *p;
+	struct btf *btf;
+	int id, err, str_off;
+
+	btf = btf__new_empty();
+	if (CHECK(IS_ERR(btf), "new_empty", "failed: %ld\n", PTR_ERR(btf)))
+		return;
+
+	str_off = btf__find_str(btf, "int");
+	ASSERT_EQ(str_off, -ENOENT, "int_str_missing_off");
+
+	str_off = btf__add_str(btf, "int");
+	ASSERT_EQ(str_off, 1, "int_str_off");
+
+	str_off = btf__find_str(btf, "int");
+	ASSERT_EQ(str_off, 1, "int_str_found_off");
+
+	/* BTF_KIND_INT */
+	id = btf__add_int(btf, "int", 4,  BTF_INT_SIGNED);
+	ASSERT_EQ(id, 1, "int_id");
+
+	t = btf__type_by_id(btf, 1);
+	/* should re-use previously added "int" string */
+	ASSERT_EQ(t->name_off, str_off, "int_name_off");
+	ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "int", "int_name");
+	ASSERT_EQ(btf_kind(t), BTF_KIND_INT, "int_kind");
+	ASSERT_EQ(t->size, 4, "int_sz");
+	ASSERT_EQ(btf_int_encoding(t), BTF_INT_SIGNED, "int_enc");
+	ASSERT_EQ(btf_int_bits(t), 32, "int_bits");
+
+	/* invalid int size */
+	id = btf__add_int(btf, "bad sz int", 7, 0);
+	ASSERT_ERR(id, "int_bad_sz");
+	/* invalid encoding */
+	id = btf__add_int(btf, "bad enc int", 4, 123);
+	ASSERT_ERR(id, "int_bad_enc");
+	/* NULL name */
+	id = btf__add_int(btf, NULL, 4, 0);
+	ASSERT_ERR(id, "int_bad_null_name");
+	/* empty name */
+	id = btf__add_int(btf, "", 4, 0);
+	ASSERT_ERR(id, "int_bad_empty_name");
+
+	/* PTR/CONST/VOLATILE/RESTRICT */
+	id = btf__add_ptr(btf, 1);
+	ASSERT_EQ(id, 2, "ptr_id");
+	t = btf__type_by_id(btf, 2);
+	ASSERT_EQ(btf_kind(t), BTF_KIND_PTR, "ptr_kind");
+	ASSERT_EQ(t->type, 1, "ptr_type");
+
+	id = btf__add_const(btf, 5); /* points forward to restrict */
+	ASSERT_EQ(id, 3, "const_id");
+	t = btf__type_by_id(btf, 3);
+	ASSERT_EQ(btf_kind(t), BTF_KIND_CONST, "const_kind");
+	ASSERT_EQ(t->type, 5, "const_type");
+
+	id = btf__add_volatile(btf, 3);
+	ASSERT_EQ(id, 4, "volatile_id");
+	t = btf__type_by_id(btf, 4);
+	ASSERT_EQ(btf_kind(t), BTF_KIND_VOLATILE, "volatile_kind");
+	ASSERT_EQ(t->type, 3, "volatile_type");
+
+	id = btf__add_restrict(btf, 4);
+	ASSERT_EQ(id, 5, "restrict_id");
+	t = btf__type_by_id(btf, 5);
+	ASSERT_EQ(btf_kind(t), BTF_KIND_RESTRICT, "restrict_kind");
+	ASSERT_EQ(t->type, 4, "restrict_type");
+
+	/* ARRAY */
+	id = btf__add_array(btf, 1, 2, 10); /* int *[10] */
+	ASSERT_EQ(id, 6, "array_id");
+	t = btf__type_by_id(btf, 6);
+	ASSERT_EQ(btf_kind(t), BTF_KIND_ARRAY, "array_kind");
+	ASSERT_EQ(btf_array(t)->index_type, 1, "array_index_type");
+	ASSERT_EQ(btf_array(t)->type, 2, "array_elem_type");
+	ASSERT_EQ(btf_array(t)->nelems, 10, "array_nelems");
+
+	/* STRUCT */
+	err = btf__add_field(btf, "field", 1, 0, 0);
+	ASSERT_ERR(err, "no_struct_field");
+	id = btf__add_struct(btf, "s1", 8);
+	ASSERT_EQ(id, 7, "struct_id");
+	err = btf__add_field(btf, "f1", 1, 0, 0);
+	ASSERT_OK(err, "f1_res");
+	err = btf__add_field(btf, "f2", 1, 32, 16);
+	ASSERT_OK(err, "f2_res");
+
+	t = btf__type_by_id(btf, 7);
+	ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "s1", "struct_name");
+	ASSERT_EQ(btf_kind(t), BTF_KIND_STRUCT, "struct_kind");
+	ASSERT_EQ(btf_vlen(t), 2, "struct_vlen");
+	ASSERT_EQ(btf_kflag(t), true, "struct_kflag");
+	ASSERT_EQ(t->size, 8, "struct_sz");
+	m = btf_members(t) + 0;
+	ASSERT_STREQ(btf__str_by_offset(btf, m->name_off), "f1", "f1_name");
+	ASSERT_EQ(m->type, 1, "f1_type");
+	ASSERT_EQ(btf_member_bit_offset(t, 0), 0, "f1_bit_off");
+	ASSERT_EQ(btf_member_bitfield_size(t, 0), 0, "f1_bit_sz");
+	m = btf_members(t) + 1;
+	ASSERT_STREQ(btf__str_by_offset(btf, m->name_off), "f2", "f2_name");
+	ASSERT_EQ(m->type, 1, "f2_type");
+	ASSERT_EQ(btf_member_bit_offset(t, 1), 32, "f2_bit_off");
+	ASSERT_EQ(btf_member_bitfield_size(t, 1), 16, "f2_bit_sz");
+
+	/* UNION */
+	id = btf__add_union(btf, "u1", 8);
+	ASSERT_EQ(id, 8, "union_id");
+
+	/* invalid, non-zero offset */
+	err = btf__add_field(btf, "field", 1, 1, 0);
+	ASSERT_ERR(err, "no_struct_field");
+
+	err = btf__add_field(btf, "f1", 1, 0, 16);
+	ASSERT_OK(err, "f1_res");
+
+	t = btf__type_by_id(btf, 8);
+	ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "u1", "union_name");
+	ASSERT_EQ(btf_kind(t), BTF_KIND_UNION, "union_kind");
+	ASSERT_EQ(btf_vlen(t), 1, "union_vlen");
+	ASSERT_EQ(btf_kflag(t), true, "union_kflag");
+	ASSERT_EQ(t->size, 8, "union_sz");
+	m = btf_members(t) + 0;
+	ASSERT_STREQ(btf__str_by_offset(btf, m->name_off), "f1", "f1_name");
+	ASSERT_EQ(m->type, 1, "f1_type");
+	ASSERT_EQ(btf_member_bit_offset(t, 0), 0, "f1_bit_off");
+	ASSERT_EQ(btf_member_bitfield_size(t, 0), 16, "f1_bit_sz");
+
+	/* ENUM */
+	id = btf__add_enum(btf, "e1", 4);
+	ASSERT_EQ(id, 9, "enum_id");
+	err = btf__add_enum_value(btf, "v1", 1);
+	ASSERT_OK(err, "v1_res");
+	err = btf__add_enum_value(btf, "v2", 2);
+	ASSERT_OK(err, "v2_res");
+
+	t = btf__type_by_id(btf, 9);
+	ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "e1", "enum_name");
+	ASSERT_EQ(btf_kind(t), BTF_KIND_ENUM, "enum_kind");
+	ASSERT_EQ(btf_vlen(t), 2, "enum_vlen");
+	ASSERT_EQ(t->size, 4, "enum_sz");
+	v = btf_enum(t) + 0;
+	ASSERT_STREQ(btf__str_by_offset(btf, v->name_off), "v1", "v1_name");
+	ASSERT_EQ(v->val, 1, "v1_val");
+	v = btf_enum(t) + 1;
+	ASSERT_STREQ(btf__str_by_offset(btf, v->name_off), "v2", "v2_name");
+	ASSERT_EQ(v->val, 2, "v2_val");
+
+	/* FWDs */
+	id = btf__add_fwd(btf, "struct_fwd", BTF_FWD_STRUCT);
+	ASSERT_EQ(id, 10, "struct_fwd_id");
+	t = btf__type_by_id(btf, 10);
+	ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "struct_fwd", "fwd_name");
+	ASSERT_EQ(btf_kind(t), BTF_KIND_FWD, "fwd_kind");
+	ASSERT_EQ(btf_kflag(t), 0, "fwd_kflag");
+
+	id = btf__add_fwd(btf, "union_fwd", BTF_FWD_UNION);
+	ASSERT_EQ(id, 11, "union_fwd_id");
+	t = btf__type_by_id(btf, 11);
+	ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "union_fwd", "fwd_name");
+	ASSERT_EQ(btf_kind(t), BTF_KIND_FWD, "fwd_kind");
+	ASSERT_EQ(btf_kflag(t), 1, "fwd_kflag");
+
+	id = btf__add_fwd(btf, "enum_fwd", BTF_FWD_ENUM);
+	ASSERT_EQ(id, 12, "enum_fwd_id");
+	t = btf__type_by_id(btf, 12);
+	ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "enum_fwd", "fwd_name");
+	ASSERT_EQ(btf_kind(t), BTF_KIND_ENUM, "enum_fwd_kind");
+	ASSERT_EQ(btf_vlen(t), 0, "enum_fwd_kind");
+	ASSERT_EQ(t->size, 4, "enum_fwd_sz");
+
+	/* TYPEDEF */
+	id = btf__add_typedef(btf, "typedef1", 1);
+	ASSERT_EQ(id, 13, "typedef_fwd_id");
+	t = btf__type_by_id(btf, 13);
+	ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "typedef1", "typedef_name");
+	ASSERT_EQ(btf_kind(t), BTF_KIND_TYPEDEF, "typedef_kind");
+	ASSERT_EQ(t->type, 1, "typedef_type");
+
+	/* FUNC & FUNC_PROTO */
+	id = btf__add_func(btf, "func1", BTF_FUNC_GLOBAL, 15);
+	ASSERT_EQ(id, 14, "func_id");
+	t = btf__type_by_id(btf, 14);
+	ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "func1", "func_name");
+	ASSERT_EQ(t->type, 15, "func_type");
+	ASSERT_EQ(btf_kind(t), BTF_KIND_FUNC, "func_kind");
+	ASSERT_EQ(btf_vlen(t), BTF_FUNC_GLOBAL, "func_vlen");
+
+	id = btf__add_func_proto(btf, 1);
+	ASSERT_EQ(id, 15, "func_proto_id");
+	err = btf__add_func_param(btf, "p1", 1);
+	ASSERT_OK(err, "p1_res");
+	err = btf__add_func_param(btf, "p2", 2);
+	ASSERT_OK(err, "p2_res");
+
+	t = btf__type_by_id(btf, 15);
+	ASSERT_EQ(btf_kind(t), BTF_KIND_FUNC_PROTO, "func_proto_kind");
+	ASSERT_EQ(btf_vlen(t), 2, "func_proto_vlen");
+	ASSERT_EQ(t->type, 1, "func_proto_ret_type");
+	p = btf_params(t) + 0;
+	ASSERT_STREQ(btf__str_by_offset(btf, p->name_off), "p1", "p1_name");
+	ASSERT_EQ(p->type, 1, "p1_type");
+	p = btf_params(t) + 1;
+	ASSERT_STREQ(btf__str_by_offset(btf, p->name_off), "p2", "p2_name");
+	ASSERT_EQ(p->type, 2, "p2_type");
+
+	/* VAR */
+	id = btf__add_var(btf, "var1", BTF_VAR_GLOBAL_ALLOCATED, 1);
+	ASSERT_EQ(id, 16, "var_id");
+	t = btf__type_by_id(btf, 16);
+	ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "var1", "var_name");
+	ASSERT_EQ(btf_kind(t), BTF_KIND_VAR, "var_kind");
+	ASSERT_EQ(t->type, 1, "var_type");
+	ASSERT_EQ(btf_var(t)->linkage, BTF_VAR_GLOBAL_ALLOCATED, "var_type");
+
+	/* DATASECT */
+	id = btf__add_datasec(btf, "datasec1", 12);
+	ASSERT_EQ(id, 17, "datasec_id");
+	err = btf__add_datasec_var_info(btf, 1, 4, 8);
+	ASSERT_OK(err, "v1_res");
+
+	t = btf__type_by_id(btf, 17);
+	ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "datasec1", "datasec_name");
+	ASSERT_EQ(t->size, 12, "datasec_sz");
+	ASSERT_EQ(btf_kind(t), BTF_KIND_DATASEC, "datasec_kind");
+	ASSERT_EQ(btf_vlen(t), 1, "datasec_vlen");
+	vi = btf_var_secinfos(t) + 0;
+	ASSERT_EQ(vi->type, 1, "v1_type");
+	ASSERT_EQ(vi->offset, 4, "v1_off");
+	ASSERT_EQ(vi->size, 8, "v1_sz");
+
+	btf__free(btf);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/cls_redirect.c b/tools/testing/selftests/bpf/prog_tests/cls_redirect.c
index f259085cca6a..9781d85cb223 100644
--- a/tools/testing/selftests/bpf/prog_tests/cls_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/cls_redirect.c
@@ -12,10 +12,13 @@
 
 #include "progs/test_cls_redirect.h"
 #include "test_cls_redirect.skel.h"
+#include "test_cls_redirect_subprogs.skel.h"
 
 #define ENCAP_IP INADDR_LOOPBACK
 #define ENCAP_PORT (1234)
 
+static int duration = 0;
+
 struct addr_port {
 	in_port_t port;
 	union {
@@ -361,30 +364,18 @@ static void close_fds(int *fds, int n)
 			close(fds[i]);
 }
 
-void test_cls_redirect(void)
+static void test_cls_redirect_common(struct bpf_program *prog)
 {
-	struct test_cls_redirect *skel = NULL;
 	struct bpf_prog_test_run_attr tattr = {};
 	int families[] = { AF_INET, AF_INET6 };
 	struct sockaddr_storage ss;
 	struct sockaddr *addr;
 	socklen_t slen;
 	int i, j, err;
-
 	int servers[__NR_KIND][ARRAY_SIZE(families)] = {};
 	int conns[__NR_KIND][ARRAY_SIZE(families)] = {};
 	struct tuple tuples[__NR_KIND][ARRAY_SIZE(families)];
 
-	skel = test_cls_redirect__open();
-	if (CHECK_FAIL(!skel))
-		return;
-
-	skel->rodata->ENCAPSULATION_IP = htonl(ENCAP_IP);
-	skel->rodata->ENCAPSULATION_PORT = htons(ENCAP_PORT);
-
-	if (CHECK_FAIL(test_cls_redirect__load(skel)))
-		goto cleanup;
-
 	addr = (struct sockaddr *)&ss;
 	for (i = 0; i < ARRAY_SIZE(families); i++) {
 		slen = prepare_addr(&ss, families[i]);
@@ -402,7 +393,7 @@ void test_cls_redirect(void)
 			goto cleanup;
 	}
 
-	tattr.prog_fd = bpf_program__fd(skel->progs.cls_redirect);
+	tattr.prog_fd = bpf_program__fd(prog);
 	for (i = 0; i < ARRAY_SIZE(tests); i++) {
 		struct test_cfg *test = &tests[i];
 
@@ -450,7 +441,58 @@ void test_cls_redirect(void)
 	}
 
 cleanup:
-	test_cls_redirect__destroy(skel);
 	close_fds((int *)servers, sizeof(servers) / sizeof(servers[0][0]));
 	close_fds((int *)conns, sizeof(conns) / sizeof(conns[0][0]));
 }
+
+static void test_cls_redirect_inlined(void)
+{
+	struct test_cls_redirect *skel;
+	int err;
+
+	skel = test_cls_redirect__open();
+	if (CHECK(!skel, "skel_open", "failed\n"))
+		return;
+
+	skel->rodata->ENCAPSULATION_IP = htonl(ENCAP_IP);
+	skel->rodata->ENCAPSULATION_PORT = htons(ENCAP_PORT);
+
+	err = test_cls_redirect__load(skel);
+	if (CHECK(err, "skel_load", "failed: %d\n", err))
+		goto cleanup;
+
+	test_cls_redirect_common(skel->progs.cls_redirect);
+
+cleanup:
+	test_cls_redirect__destroy(skel);
+}
+
+static void test_cls_redirect_subprogs(void)
+{
+	struct test_cls_redirect_subprogs *skel;
+	int err;
+
+	skel = test_cls_redirect_subprogs__open();
+	if (CHECK(!skel, "skel_open", "failed\n"))
+		return;
+
+	skel->rodata->ENCAPSULATION_IP = htonl(ENCAP_IP);
+	skel->rodata->ENCAPSULATION_PORT = htons(ENCAP_PORT);
+
+	err = test_cls_redirect_subprogs__load(skel);
+	if (CHECK(err, "skel_load", "failed: %d\n", err))
+		goto cleanup;
+
+	test_cls_redirect_common(skel->progs.cls_redirect);
+
+cleanup:
+	test_cls_redirect_subprogs__destroy(skel);
+}
+
+void test_cls_redirect(void)
+{
+	if (test__start_subtest("cls_redirect_inlined"))
+		test_cls_redirect_inlined();
+	if (test__start_subtest("cls_redirect_subprogs"))
+		test_cls_redirect_subprogs();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/core_autosize.c b/tools/testing/selftests/bpf/prog_tests/core_autosize.c
new file mode 100644
index 000000000000..981c251453d9
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/core_autosize.c
@@ -0,0 +1,225 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include <test_progs.h>
+#include <bpf/btf.h>
+
+/* real layout and sizes according to test's (32-bit) BTF
+ * needs to be defined before skeleton is included */
+struct test_struct___real {
+	unsigned int ptr; /* can't use `void *`, it is always 8 byte in BPF target */
+	unsigned int val2;
+	unsigned long long val1;
+	unsigned short val3;
+	unsigned char val4;
+	unsigned char _pad;
+};
+
+#include "test_core_autosize.skel.h"
+
+static int duration = 0;
+
+static struct {
+	unsigned long long ptr_samesized;
+	unsigned long long val1_samesized;
+	unsigned long long val2_samesized;
+	unsigned long long val3_samesized;
+	unsigned long long val4_samesized;
+	struct test_struct___real output_samesized;
+
+	unsigned long long ptr_downsized;
+	unsigned long long val1_downsized;
+	unsigned long long val2_downsized;
+	unsigned long long val3_downsized;
+	unsigned long long val4_downsized;
+	struct test_struct___real output_downsized;
+
+	unsigned long long ptr_probed;
+	unsigned long long val1_probed;
+	unsigned long long val2_probed;
+	unsigned long long val3_probed;
+	unsigned long long val4_probed;
+
+	unsigned long long ptr_signed;
+	unsigned long long val1_signed;
+	unsigned long long val2_signed;
+	unsigned long long val3_signed;
+	unsigned long long val4_signed;
+	struct test_struct___real output_signed;
+} out;
+
+void test_core_autosize(void)
+{
+	char btf_file[] = "/tmp/core_autosize.btf.XXXXXX";
+	int err, fd = -1, zero = 0;
+	int char_id, short_id, int_id, long_long_id, void_ptr_id, id;
+	struct test_core_autosize* skel = NULL;
+	struct bpf_object_load_attr load_attr = {};
+	struct bpf_program *prog;
+	struct bpf_map *bss_map;
+	struct btf *btf = NULL;
+	size_t written;
+	const void *raw_data;
+	__u32 raw_sz;
+	FILE *f = NULL;
+
+	btf = btf__new_empty();
+	if (!ASSERT_OK_PTR(btf, "empty_btf"))
+		return;
+	/* Emit the following struct with 32-bit pointer size:
+	 *
+	 * struct test_struct {
+	 *     void *ptr;
+	 *     unsigned long val2;
+	 *     unsigned long long val1;
+	 *     unsigned short val3;
+	 *     unsigned char val4;
+	 *     char: 8;
+	 * };
+	 *
+	 * This struct is going to be used as the "kernel BTF" for this test.
+	 * It's equivalent memory-layout-wise to test_struct__real above.
+	 */
+
+	/* force 32-bit pointer size */
+	btf__set_pointer_size(btf, 4);
+
+	char_id = btf__add_int(btf, "unsigned char", 1, 0);
+	ASSERT_EQ(char_id, 1, "char_id");
+	short_id = btf__add_int(btf, "unsigned short", 2, 0);
+	ASSERT_EQ(short_id, 2, "short_id");
+	/* "long unsigned int" of 4 byte size tells BTF that sizeof(void *) == 4 */
+	int_id = btf__add_int(btf, "long unsigned int", 4, 0);
+	ASSERT_EQ(int_id, 3, "int_id");
+	long_long_id = btf__add_int(btf, "unsigned long long", 8, 0);
+	ASSERT_EQ(long_long_id, 4, "long_long_id");
+	void_ptr_id = btf__add_ptr(btf, 0);
+	ASSERT_EQ(void_ptr_id, 5, "void_ptr_id");
+
+	id = btf__add_struct(btf, "test_struct", 20 /* bytes */);
+	ASSERT_EQ(id, 6, "struct_id");
+	err = btf__add_field(btf, "ptr", void_ptr_id, 0, 0);
+	err = err ?: btf__add_field(btf, "val2", int_id, 32, 0);
+	err = err ?: btf__add_field(btf, "val1", long_long_id, 64, 0);
+	err = err ?: btf__add_field(btf, "val3", short_id, 128, 0);
+	err = err ?: btf__add_field(btf, "val4", char_id, 144, 0);
+	ASSERT_OK(err, "struct_fields");
+
+	fd = mkstemp(btf_file);
+	if (CHECK(fd < 0, "btf_tmp", "failed to create file: %d\n", fd))
+		goto cleanup;
+	f = fdopen(fd, "w");
+	if (!ASSERT_OK_PTR(f, "btf_fdopen"))
+		goto cleanup;
+
+	raw_data = btf__get_raw_data(btf, &raw_sz);
+	if (!ASSERT_OK_PTR(raw_data, "raw_data"))
+		goto cleanup;
+	written = fwrite(raw_data, 1, raw_sz, f);
+	if (CHECK(written != raw_sz, "btf_write", "written: %zu, errno: %d\n", written, errno))
+		goto cleanup;
+	fflush(f);
+	fclose(f);
+	f = NULL;
+	close(fd);
+	fd = -1;
+
+	/* open and load BPF program with custom BTF as the kernel BTF */
+	skel = test_core_autosize__open();
+	if (!ASSERT_OK_PTR(skel, "skel_open"))
+		return;
+
+	/* disable handle_signed() for now */
+	prog = bpf_object__find_program_by_name(skel->obj, "handle_signed");
+	if (!ASSERT_OK_PTR(prog, "prog_find"))
+		goto cleanup;
+	bpf_program__set_autoload(prog, false);
+
+	load_attr.obj = skel->obj;
+	load_attr.target_btf_path = btf_file;
+	err = bpf_object__load_xattr(&load_attr);
+	if (!ASSERT_OK(err, "prog_load"))
+		goto cleanup;
+
+	prog = bpf_object__find_program_by_name(skel->obj, "handle_samesize");
+	if (!ASSERT_OK_PTR(prog, "prog_find"))
+		goto cleanup;
+	skel->links.handle_samesize = bpf_program__attach(prog);
+	if (!ASSERT_OK_PTR(skel->links.handle_samesize, "prog_attach"))
+		goto cleanup;
+
+	prog = bpf_object__find_program_by_name(skel->obj, "handle_downsize");
+	if (!ASSERT_OK_PTR(prog, "prog_find"))
+		goto cleanup;
+	skel->links.handle_downsize = bpf_program__attach(prog);
+	if (!ASSERT_OK_PTR(skel->links.handle_downsize, "prog_attach"))
+		goto cleanup;
+
+	prog = bpf_object__find_program_by_name(skel->obj, "handle_probed");
+	if (!ASSERT_OK_PTR(prog, "prog_find"))
+		goto cleanup;
+	skel->links.handle_probed = bpf_program__attach(prog);
+	if (!ASSERT_OK_PTR(skel->links.handle_probed, "prog_attach"))
+		goto cleanup;
+
+	usleep(1);
+
+	bss_map = bpf_object__find_map_by_name(skel->obj, "test_cor.bss");
+	if (!ASSERT_OK_PTR(bss_map, "bss_map_find"))
+		goto cleanup;
+
+	err = bpf_map_lookup_elem(bpf_map__fd(bss_map), &zero, (void *)&out);
+	if (!ASSERT_OK(err, "bss_lookup"))
+		goto cleanup;
+
+	ASSERT_EQ(out.ptr_samesized, 0x01020304, "ptr_samesized");
+	ASSERT_EQ(out.val1_samesized, 0x1020304050607080, "val1_samesized");
+	ASSERT_EQ(out.val2_samesized, 0x0a0b0c0d, "val2_samesized");
+	ASSERT_EQ(out.val3_samesized, 0xfeed, "val3_samesized");
+	ASSERT_EQ(out.val4_samesized, 0xb9, "val4_samesized");
+	ASSERT_EQ(out.output_samesized.ptr, 0x01020304, "ptr_samesized");
+	ASSERT_EQ(out.output_samesized.val1, 0x1020304050607080, "val1_samesized");
+	ASSERT_EQ(out.output_samesized.val2, 0x0a0b0c0d, "val2_samesized");
+	ASSERT_EQ(out.output_samesized.val3, 0xfeed, "val3_samesized");
+	ASSERT_EQ(out.output_samesized.val4, 0xb9, "val4_samesized");
+
+	ASSERT_EQ(out.ptr_downsized, 0x01020304, "ptr_downsized");
+	ASSERT_EQ(out.val1_downsized, 0x1020304050607080, "val1_downsized");
+	ASSERT_EQ(out.val2_downsized, 0x0a0b0c0d, "val2_downsized");
+	ASSERT_EQ(out.val3_downsized, 0xfeed, "val3_downsized");
+	ASSERT_EQ(out.val4_downsized, 0xb9, "val4_downsized");
+	ASSERT_EQ(out.output_downsized.ptr, 0x01020304, "ptr_downsized");
+	ASSERT_EQ(out.output_downsized.val1, 0x1020304050607080, "val1_downsized");
+	ASSERT_EQ(out.output_downsized.val2, 0x0a0b0c0d, "val2_downsized");
+	ASSERT_EQ(out.output_downsized.val3, 0xfeed, "val3_downsized");
+	ASSERT_EQ(out.output_downsized.val4, 0xb9, "val4_downsized");
+
+	ASSERT_EQ(out.ptr_probed, 0x01020304, "ptr_probed");
+	ASSERT_EQ(out.val1_probed, 0x1020304050607080, "val1_probed");
+	ASSERT_EQ(out.val2_probed, 0x0a0b0c0d, "val2_probed");
+	ASSERT_EQ(out.val3_probed, 0xfeed, "val3_probed");
+	ASSERT_EQ(out.val4_probed, 0xb9, "val4_probed");
+
+	test_core_autosize__destroy(skel);
+	skel = NULL;
+
+	/* now re-load with handle_signed() enabled, it should fail loading */
+	skel = test_core_autosize__open();
+	if (!ASSERT_OK_PTR(skel, "skel_open"))
+		return;
+
+	load_attr.obj = skel->obj;
+	load_attr.target_btf_path = btf_file;
+	err = bpf_object__load_xattr(&load_attr);
+	if (!ASSERT_ERR(err, "bad_prog_load"))
+		goto cleanup;
+
+cleanup:
+	if (f)
+		fclose(f);
+	if (fd >= 0)
+		close(fd);
+	remove(btf_file);
+	btf__free(btf);
+	test_core_autosize__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
index a54eafc5e4b3..30e40ff4b0d8 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
@@ -3,6 +3,9 @@
 #include "progs/core_reloc_types.h"
 #include <sys/mman.h>
 #include <sys/syscall.h>
+#include <bpf/btf.h>
+
+static int duration = 0;
 
 #define STRUCT_TO_CHAR_PTR(struct_name) (const char *)&(struct struct_name)
 
@@ -177,14 +180,13 @@
 	.fails = true,							\
 }
 
-#define EXISTENCE_CASE_COMMON(name)					\
+#define FIELD_EXISTS_CASE_COMMON(name)					\
 	.case_name = #name,						\
 	.bpf_obj_file = "test_core_reloc_existence.o",			\
-	.btf_src_file = "btf__core_reloc_" #name ".o",			\
-	.relaxed_core_relocs = true
+	.btf_src_file = "btf__core_reloc_" #name ".o"			\
 
-#define EXISTENCE_ERR_CASE(name) {					\
-	EXISTENCE_CASE_COMMON(name),					\
+#define FIELD_EXISTS_ERR_CASE(name) {					\
+	FIELD_EXISTS_CASE_COMMON(name),					\
 	.fails = true,							\
 }
 
@@ -253,6 +255,61 @@
 	.fails = true,							\
 }
 
+#define TYPE_BASED_CASE_COMMON(name)					\
+	.case_name = #name,						\
+	.bpf_obj_file = "test_core_reloc_type_based.o",		\
+	.btf_src_file = "btf__core_reloc_" #name ".o"			\
+
+#define TYPE_BASED_CASE(name, ...) {					\
+	TYPE_BASED_CASE_COMMON(name),					\
+	.output = STRUCT_TO_CHAR_PTR(core_reloc_type_based_output)	\
+			__VA_ARGS__,					\
+	.output_len = sizeof(struct core_reloc_type_based_output),	\
+}
+
+#define TYPE_BASED_ERR_CASE(name) {					\
+	TYPE_BASED_CASE_COMMON(name),					\
+	.fails = true,							\
+}
+
+#define TYPE_ID_CASE_COMMON(name)					\
+	.case_name = #name,						\
+	.bpf_obj_file = "test_core_reloc_type_id.o",			\
+	.btf_src_file = "btf__core_reloc_" #name ".o"			\
+
+#define TYPE_ID_CASE(name, setup_fn) {					\
+	TYPE_ID_CASE_COMMON(name),					\
+	.output = STRUCT_TO_CHAR_PTR(core_reloc_type_id_output) {},	\
+	.output_len = sizeof(struct core_reloc_type_id_output),		\
+	.setup = setup_fn,						\
+}
+
+#define TYPE_ID_ERR_CASE(name) {					\
+	TYPE_ID_CASE_COMMON(name),					\
+	.fails = true,							\
+}
+
+#define ENUMVAL_CASE_COMMON(name)					\
+	.case_name = #name,						\
+	.bpf_obj_file = "test_core_reloc_enumval.o",			\
+	.btf_src_file = "btf__core_reloc_" #name ".o"			\
+
+#define ENUMVAL_CASE(name, ...) {					\
+	ENUMVAL_CASE_COMMON(name),					\
+	.output = STRUCT_TO_CHAR_PTR(core_reloc_enumval_output)		\
+			__VA_ARGS__,					\
+	.output_len = sizeof(struct core_reloc_enumval_output),		\
+}
+
+#define ENUMVAL_ERR_CASE(name) {					\
+	ENUMVAL_CASE_COMMON(name),					\
+	.fails = true,							\
+}
+
+struct core_reloc_test_case;
+
+typedef int (*setup_test_fn)(struct core_reloc_test_case *test);
+
 struct core_reloc_test_case {
 	const char *case_name;
 	const char *bpf_obj_file;
@@ -264,8 +321,136 @@ struct core_reloc_test_case {
 	bool fails;
 	bool relaxed_core_relocs;
 	bool direct_raw_tp;
+	setup_test_fn setup;
 };
 
+static int find_btf_type(const struct btf *btf, const char *name, __u32 kind)
+{
+	int id;
+
+	id = btf__find_by_name_kind(btf, name, kind);
+	if (CHECK(id <= 0, "find_type_id", "failed to find '%s', kind %d: %d\n", name, kind, id))
+		return -1;
+
+	return id;
+}
+
+static int setup_type_id_case_local(struct core_reloc_test_case *test)
+{
+	struct core_reloc_type_id_output *exp = (void *)test->output;
+	struct btf *local_btf = btf__parse(test->bpf_obj_file, NULL);
+	struct btf *targ_btf = btf__parse(test->btf_src_file, NULL);
+	const struct btf_type *t;
+	const char *name;
+	int i;
+
+	if (CHECK(IS_ERR(local_btf), "local_btf", "failed: %ld\n", PTR_ERR(local_btf)) ||
+	    CHECK(IS_ERR(targ_btf), "targ_btf", "failed: %ld\n", PTR_ERR(targ_btf))) {
+		btf__free(local_btf);
+		btf__free(targ_btf);
+		return -EINVAL;
+	}
+
+	exp->local_anon_struct = -1;
+	exp->local_anon_union = -1;
+	exp->local_anon_enum = -1;
+	exp->local_anon_func_proto_ptr = -1;
+	exp->local_anon_void_ptr = -1;
+	exp->local_anon_arr = -1;
+
+	for (i = 1; i <= btf__get_nr_types(local_btf); i++)
+	{
+		t = btf__type_by_id(local_btf, i);
+		/* we are interested only in anonymous types */
+		if (t->name_off)
+			continue;
+
+		if (btf_is_struct(t) && btf_vlen(t) &&
+		    (name = btf__name_by_offset(local_btf, btf_members(t)[0].name_off)) &&
+		    strcmp(name, "marker_field") == 0) {
+			exp->local_anon_struct = i;
+		} else if (btf_is_union(t) && btf_vlen(t) &&
+			 (name = btf__name_by_offset(local_btf, btf_members(t)[0].name_off)) &&
+			 strcmp(name, "marker_field") == 0) {
+			exp->local_anon_union = i;
+		} else if (btf_is_enum(t) && btf_vlen(t) &&
+			 (name = btf__name_by_offset(local_btf, btf_enum(t)[0].name_off)) &&
+			 strcmp(name, "MARKER_ENUM_VAL") == 0) {
+			exp->local_anon_enum = i;
+		} else if (btf_is_ptr(t) && (t = btf__type_by_id(local_btf, t->type))) {
+			if (btf_is_func_proto(t) && (t = btf__type_by_id(local_btf, t->type)) &&
+			    btf_is_int(t) && (name = btf__name_by_offset(local_btf, t->name_off)) &&
+			    strcmp(name, "_Bool") == 0) {
+				/* ptr -> func_proto -> _Bool */
+				exp->local_anon_func_proto_ptr = i;
+			} else if (btf_is_void(t)) {
+				/* ptr -> void */
+				exp->local_anon_void_ptr = i;
+			}
+		} else if (btf_is_array(t) && (t = btf__type_by_id(local_btf, btf_array(t)->type)) &&
+			   btf_is_int(t) && (name = btf__name_by_offset(local_btf, t->name_off)) &&
+			   strcmp(name, "_Bool") == 0) {
+			/* _Bool[] */
+			exp->local_anon_arr = i;
+		}
+	}
+
+	exp->local_struct = find_btf_type(local_btf, "a_struct", BTF_KIND_STRUCT);
+	exp->local_union = find_btf_type(local_btf, "a_union", BTF_KIND_UNION);
+	exp->local_enum = find_btf_type(local_btf, "an_enum", BTF_KIND_ENUM);
+	exp->local_int = find_btf_type(local_btf, "int", BTF_KIND_INT);
+	exp->local_struct_typedef = find_btf_type(local_btf, "named_struct_typedef", BTF_KIND_TYPEDEF);
+	exp->local_func_proto_typedef = find_btf_type(local_btf, "func_proto_typedef", BTF_KIND_TYPEDEF);
+	exp->local_arr_typedef = find_btf_type(local_btf, "arr_typedef", BTF_KIND_TYPEDEF);
+
+	btf__free(local_btf);
+	btf__free(targ_btf);
+	return 0;
+}
+
+static int setup_type_id_case_success(struct core_reloc_test_case *test) {
+	struct core_reloc_type_id_output *exp = (void *)test->output;
+	struct btf *targ_btf = btf__parse(test->btf_src_file, NULL);
+	int err;
+
+	err = setup_type_id_case_local(test);
+	if (err)
+		return err;
+
+	targ_btf = btf__parse(test->btf_src_file, NULL);
+
+	exp->targ_struct = find_btf_type(targ_btf, "a_struct", BTF_KIND_STRUCT);
+	exp->targ_union = find_btf_type(targ_btf, "a_union", BTF_KIND_UNION);
+	exp->targ_enum = find_btf_type(targ_btf, "an_enum", BTF_KIND_ENUM);
+	exp->targ_int = find_btf_type(targ_btf, "int", BTF_KIND_INT);
+	exp->targ_struct_typedef = find_btf_type(targ_btf, "named_struct_typedef", BTF_KIND_TYPEDEF);
+	exp->targ_func_proto_typedef = find_btf_type(targ_btf, "func_proto_typedef", BTF_KIND_TYPEDEF);
+	exp->targ_arr_typedef = find_btf_type(targ_btf, "arr_typedef", BTF_KIND_TYPEDEF);
+
+	btf__free(targ_btf);
+	return 0;
+}
+
+static int setup_type_id_case_failure(struct core_reloc_test_case *test)
+{
+	struct core_reloc_type_id_output *exp = (void *)test->output;
+	int err;
+
+	err = setup_type_id_case_local(test);
+	if (err)
+		return err;
+
+	exp->targ_struct = 0;
+	exp->targ_union = 0;
+	exp->targ_enum = 0;
+	exp->targ_int = 0;
+	exp->targ_struct_typedef = 0;
+	exp->targ_func_proto_typedef = 0;
+	exp->targ_arr_typedef = 0;
+
+	return 0;
+}
+
 static struct core_reloc_test_case test_cases[] = {
 	/* validate we can find kernel image and use its BTF for relocs */
 	{
@@ -364,7 +549,7 @@ static struct core_reloc_test_case test_cases[] = {
 
 	/* validate field existence checks */
 	{
-		EXISTENCE_CASE_COMMON(existence),
+		FIELD_EXISTS_CASE_COMMON(existence),
 		.input = STRUCT_TO_CHAR_PTR(core_reloc_existence) {
 			.a = 1,
 			.b = 2,
@@ -388,7 +573,7 @@ static struct core_reloc_test_case test_cases[] = {
 		.output_len = sizeof(struct core_reloc_existence_output),
 	},
 	{
-		EXISTENCE_CASE_COMMON(existence___minimal),
+		FIELD_EXISTS_CASE_COMMON(existence___minimal),
 		.input = STRUCT_TO_CHAR_PTR(core_reloc_existence___minimal) {
 			.a = 42,
 		},
@@ -408,12 +593,12 @@ static struct core_reloc_test_case test_cases[] = {
 		.output_len = sizeof(struct core_reloc_existence_output),
 	},
 
-	EXISTENCE_ERR_CASE(existence__err_int_sz),
-	EXISTENCE_ERR_CASE(existence__err_int_type),
-	EXISTENCE_ERR_CASE(existence__err_int_kind),
-	EXISTENCE_ERR_CASE(existence__err_arr_kind),
-	EXISTENCE_ERR_CASE(existence__err_arr_value_type),
-	EXISTENCE_ERR_CASE(existence__err_struct_type),
+	FIELD_EXISTS_ERR_CASE(existence__err_int_sz),
+	FIELD_EXISTS_ERR_CASE(existence__err_int_type),
+	FIELD_EXISTS_ERR_CASE(existence__err_int_kind),
+	FIELD_EXISTS_ERR_CASE(existence__err_arr_kind),
+	FIELD_EXISTS_ERR_CASE(existence__err_arr_value_type),
+	FIELD_EXISTS_ERR_CASE(existence__err_struct_type),
 
 	/* bitfield relocation checks */
 	BITFIELDS_CASE(bitfields, {
@@ -452,11 +637,117 @@ static struct core_reloc_test_case test_cases[] = {
 	/* size relocation checks */
 	SIZE_CASE(size),
 	SIZE_CASE(size___diff_sz),
+	SIZE_ERR_CASE(size___err_ambiguous),
+
+	/* validate type existence and size relocations */
+	TYPE_BASED_CASE(type_based, {
+		.struct_exists = 1,
+		.union_exists = 1,
+		.enum_exists = 1,
+		.typedef_named_struct_exists = 1,
+		.typedef_anon_struct_exists = 1,
+		.typedef_struct_ptr_exists = 1,
+		.typedef_int_exists = 1,
+		.typedef_enum_exists = 1,
+		.typedef_void_ptr_exists = 1,
+		.typedef_func_proto_exists = 1,
+		.typedef_arr_exists = 1,
+		.struct_sz = sizeof(struct a_struct),
+		.union_sz = sizeof(union a_union),
+		.enum_sz = sizeof(enum an_enum),
+		.typedef_named_struct_sz = sizeof(named_struct_typedef),
+		.typedef_anon_struct_sz = sizeof(anon_struct_typedef),
+		.typedef_struct_ptr_sz = sizeof(struct_ptr_typedef),
+		.typedef_int_sz = sizeof(int_typedef),
+		.typedef_enum_sz = sizeof(enum_typedef),
+		.typedef_void_ptr_sz = sizeof(void_ptr_typedef),
+		.typedef_func_proto_sz = sizeof(func_proto_typedef),
+		.typedef_arr_sz = sizeof(arr_typedef),
+	}),
+	TYPE_BASED_CASE(type_based___all_missing, {
+		/* all zeros */
+	}),
+	TYPE_BASED_CASE(type_based___diff_sz, {
+		.struct_exists = 1,
+		.union_exists = 1,
+		.enum_exists = 1,
+		.typedef_named_struct_exists = 1,
+		.typedef_anon_struct_exists = 1,
+		.typedef_struct_ptr_exists = 1,
+		.typedef_int_exists = 1,
+		.typedef_enum_exists = 1,
+		.typedef_void_ptr_exists = 1,
+		.typedef_func_proto_exists = 1,
+		.typedef_arr_exists = 1,
+		.struct_sz = sizeof(struct a_struct___diff_sz),
+		.union_sz = sizeof(union a_union___diff_sz),
+		.enum_sz = sizeof(enum an_enum___diff_sz),
+		.typedef_named_struct_sz = sizeof(named_struct_typedef___diff_sz),
+		.typedef_anon_struct_sz = sizeof(anon_struct_typedef___diff_sz),
+		.typedef_struct_ptr_sz = sizeof(struct_ptr_typedef___diff_sz),
+		.typedef_int_sz = sizeof(int_typedef___diff_sz),
+		.typedef_enum_sz = sizeof(enum_typedef___diff_sz),
+		.typedef_void_ptr_sz = sizeof(void_ptr_typedef___diff_sz),
+		.typedef_func_proto_sz = sizeof(func_proto_typedef___diff_sz),
+		.typedef_arr_sz = sizeof(arr_typedef___diff_sz),
+	}),
+	TYPE_BASED_CASE(type_based___incompat, {
+		.enum_exists = 1,
+		.enum_sz = sizeof(enum an_enum),
+	}),
+	TYPE_BASED_CASE(type_based___fn_wrong_args, {
+		.struct_exists = 1,
+		.struct_sz = sizeof(struct a_struct),
+	}),
+
+	/* BTF_TYPE_ID_LOCAL/BTF_TYPE_ID_TARGET tests */
+	TYPE_ID_CASE(type_id, setup_type_id_case_success),
+	TYPE_ID_CASE(type_id___missing_targets, setup_type_id_case_failure),
+
+	/* Enumerator value existence and value relocations */
+	ENUMVAL_CASE(enumval, {
+		.named_val1_exists = true,
+		.named_val2_exists = true,
+		.named_val3_exists = true,
+		.anon_val1_exists = true,
+		.anon_val2_exists = true,
+		.anon_val3_exists = true,
+		.named_val1 = 1,
+		.named_val2 = 2,
+		.anon_val1 = 0x10,
+		.anon_val2 = 0x20,
+	}),
+	ENUMVAL_CASE(enumval___diff, {
+		.named_val1_exists = true,
+		.named_val2_exists = true,
+		.named_val3_exists = true,
+		.anon_val1_exists = true,
+		.anon_val2_exists = true,
+		.anon_val3_exists = true,
+		.named_val1 = 101,
+		.named_val2 = 202,
+		.anon_val1 = 0x11,
+		.anon_val2 = 0x22,
+	}),
+	ENUMVAL_CASE(enumval___val3_missing, {
+		.named_val1_exists = true,
+		.named_val2_exists = true,
+		.named_val3_exists = false,
+		.anon_val1_exists = true,
+		.anon_val2_exists = true,
+		.anon_val3_exists = false,
+		.named_val1 = 111,
+		.named_val2 = 222,
+		.anon_val1 = 0x111,
+		.anon_val2 = 0x222,
+	}),
+	ENUMVAL_ERR_CASE(enumval___err_missing),
 };
 
 struct data {
 	char in[256];
 	char out[256];
+	bool skip;
 	uint64_t my_pid_tgid;
 };
 
@@ -472,7 +763,7 @@ void test_core_reloc(void)
 	struct bpf_object_load_attr load_attr = {};
 	struct core_reloc_test_case *test_case;
 	const char *tp_name, *probe_name;
-	int err, duration = 0, i, equal;
+	int err, i, equal;
 	struct bpf_link *link = NULL;
 	struct bpf_map *data_map;
 	struct bpf_program *prog;
@@ -488,11 +779,13 @@ void test_core_reloc(void)
 		if (!test__start_subtest(test_case->case_name))
 			continue;
 
-		DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
-			.relaxed_core_relocs = test_case->relaxed_core_relocs,
-		);
+		if (test_case->setup) {
+			err = test_case->setup(test_case);
+			if (CHECK(err, "test_setup", "test #%d setup failed: %d\n", i, err))
+				continue;
+		}
 
-		obj = bpf_object__open_file(test_case->bpf_obj_file, &opts);
+		obj = bpf_object__open_file(test_case->bpf_obj_file, NULL);
 		if (CHECK(IS_ERR(obj), "obj_open", "failed to open '%s': %ld\n",
 			  test_case->bpf_obj_file, PTR_ERR(obj)))
 			continue;
@@ -515,15 +808,10 @@ void test_core_reloc(void)
 		load_attr.log_level = 0;
 		load_attr.target_btf_path = test_case->btf_src_file;
 		err = bpf_object__load_xattr(&load_attr);
-		if (test_case->fails) {
-			CHECK(!err, "obj_load_fail",
-			      "should fail to load prog '%s'\n", probe_name);
+		if (err) {
+			if (!test_case->fails)
+				CHECK(false, "obj_load", "failed to load prog '%s': %d\n", probe_name, err);
 			goto cleanup;
-		} else {
-			if (CHECK(err, "obj_load",
-				  "failed to load prog '%s': %d\n",
-				  probe_name, err))
-				goto cleanup;
 		}
 
 		data_map = bpf_object__find_map_by_name(obj, "test_cor.bss");
@@ -551,6 +839,16 @@ void test_core_reloc(void)
 		/* trigger test run */
 		usleep(1);
 
+		if (data->skip) {
+			test__skip();
+			goto cleanup;
+		}
+
+		if (test_case->fails) {
+			CHECK(false, "obj_load_fail", "should fail to load prog '%s'\n", probe_name);
+			goto cleanup;
+		}
+
 		equal = memcmp(data->out, test_case->output,
 			       test_case->output_len) == 0;
 		if (CHECK(!equal, "check_result",
diff --git a/tools/testing/selftests/bpf/prog_tests/d_path.c b/tools/testing/selftests/bpf/prog_tests/d_path.c
new file mode 100644
index 000000000000..0a577a248d34
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/d_path.c
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <test_progs.h>
+#include <sys/stat.h>
+#include <linux/sched.h>
+#include <sys/syscall.h>
+
+#define MAX_PATH_LEN		128
+#define MAX_FILES		7
+
+#include "test_d_path.skel.h"
+
+static int duration;
+
+static struct {
+	__u32 cnt;
+	char paths[MAX_FILES][MAX_PATH_LEN];
+} src;
+
+static int set_pathname(int fd, pid_t pid)
+{
+	char buf[MAX_PATH_LEN];
+
+	snprintf(buf, MAX_PATH_LEN, "/proc/%d/fd/%d", pid, fd);
+	return readlink(buf, src.paths[src.cnt++], MAX_PATH_LEN);
+}
+
+static int trigger_fstat_events(pid_t pid)
+{
+	int sockfd = -1, procfd = -1, devfd = -1;
+	int localfd = -1, indicatorfd = -1;
+	int pipefd[2] = { -1, -1 };
+	struct stat fileStat;
+	int ret = -1;
+
+	/* unmountable pseudo-filesystems */
+	if (CHECK(pipe(pipefd) < 0, "trigger", "pipe failed\n"))
+		return ret;
+	/* unmountable pseudo-filesystems */
+	sockfd = socket(AF_INET, SOCK_STREAM, 0);
+	if (CHECK(sockfd < 0, "trigger", "socket failed\n"))
+		goto out_close;
+	/* mountable pseudo-filesystems */
+	procfd = open("/proc/self/comm", O_RDONLY);
+	if (CHECK(procfd < 0, "trigger", "open /proc/self/comm failed\n"))
+		goto out_close;
+	devfd = open("/dev/urandom", O_RDONLY);
+	if (CHECK(devfd < 0, "trigger", "open /dev/urandom failed\n"))
+		goto out_close;
+	localfd = open("/tmp/d_path_loadgen.txt", O_CREAT | O_RDONLY, 0644);
+	if (CHECK(localfd < 0, "trigger", "open /tmp/d_path_loadgen.txt failed\n"))
+		goto out_close;
+	/* bpf_d_path will return path with (deleted) */
+	remove("/tmp/d_path_loadgen.txt");
+	indicatorfd = open("/tmp/", O_PATH);
+	if (CHECK(indicatorfd < 0, "trigger", "open /tmp/ failed\n"))
+		goto out_close;
+
+	ret = set_pathname(pipefd[0], pid);
+	if (CHECK(ret < 0, "trigger", "set_pathname failed for pipe[0]\n"))
+		goto out_close;
+	ret = set_pathname(pipefd[1], pid);
+	if (CHECK(ret < 0, "trigger", "set_pathname failed for pipe[1]\n"))
+		goto out_close;
+	ret = set_pathname(sockfd, pid);
+	if (CHECK(ret < 0, "trigger", "set_pathname failed for socket\n"))
+		goto out_close;
+	ret = set_pathname(procfd, pid);
+	if (CHECK(ret < 0, "trigger", "set_pathname failed for proc\n"))
+		goto out_close;
+	ret = set_pathname(devfd, pid);
+	if (CHECK(ret < 0, "trigger", "set_pathname failed for dev\n"))
+		goto out_close;
+	ret = set_pathname(localfd, pid);
+	if (CHECK(ret < 0, "trigger", "set_pathname failed for file\n"))
+		goto out_close;
+	ret = set_pathname(indicatorfd, pid);
+	if (CHECK(ret < 0, "trigger", "set_pathname failed for dir\n"))
+		goto out_close;
+
+	/* triggers vfs_getattr */
+	fstat(pipefd[0], &fileStat);
+	fstat(pipefd[1], &fileStat);
+	fstat(sockfd, &fileStat);
+	fstat(procfd, &fileStat);
+	fstat(devfd, &fileStat);
+	fstat(localfd, &fileStat);
+	fstat(indicatorfd, &fileStat);
+
+out_close:
+	/* triggers filp_close */
+	close(pipefd[0]);
+	close(pipefd[1]);
+	close(sockfd);
+	close(procfd);
+	close(devfd);
+	close(localfd);
+	close(indicatorfd);
+	return ret;
+}
+
+void test_d_path(void)
+{
+	struct test_d_path__bss *bss;
+	struct test_d_path *skel;
+	int err;
+
+	skel = test_d_path__open_and_load();
+	if (CHECK(!skel, "setup", "d_path skeleton failed\n"))
+		goto cleanup;
+
+	err = test_d_path__attach(skel);
+	if (CHECK(err, "setup", "attach failed: %d\n", err))
+		goto cleanup;
+
+	bss = skel->bss;
+	bss->my_pid = getpid();
+
+	err = trigger_fstat_events(bss->my_pid);
+	if (err < 0)
+		goto cleanup;
+
+	if (CHECK(!bss->called_stat,
+		  "stat",
+		  "trampoline for security_inode_getattr was not called\n"))
+		goto cleanup;
+
+	if (CHECK(!bss->called_close,
+		  "close",
+		  "trampoline for filp_close was not called\n"))
+		goto cleanup;
+
+	for (int i = 0; i < MAX_FILES; i++) {
+		CHECK(strncmp(src.paths[i], bss->paths_stat[i], MAX_PATH_LEN),
+		      "check",
+		      "failed to get stat path[%d]: %s vs %s\n",
+		      i, src.paths[i], bss->paths_stat[i]);
+		CHECK(strncmp(src.paths[i], bss->paths_close[i], MAX_PATH_LEN),
+		      "check",
+		      "failed to get close path[%d]: %s vs %s\n",
+		      i, src.paths[i], bss->paths_close[i]);
+		/* The d_path helper returns size plus NUL char, hence + 1 */
+		CHECK(bss->rets_stat[i] != strlen(bss->paths_stat[i]) + 1,
+		      "check",
+		      "failed to match stat return [%d]: %d vs %zd [%s]\n",
+		      i, bss->rets_stat[i], strlen(bss->paths_stat[i]) + 1,
+		      bss->paths_stat[i]);
+		CHECK(bss->rets_close[i] != strlen(bss->paths_stat[i]) + 1,
+		      "check",
+		      "failed to match stat return [%d]: %d vs %zd [%s]\n",
+		      i, bss->rets_close[i], strlen(bss->paths_close[i]) + 1,
+		      bss->paths_stat[i]);
+	}
+
+cleanup:
+	test_d_path__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
index 197d0d217b56..5c0448910426 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
@@ -2,36 +2,79 @@
 /* Copyright (c) 2019 Facebook */
 #include <test_progs.h>
 #include <network_helpers.h>
+#include <bpf/btf.h>
+
+typedef int (*test_cb)(struct bpf_object *obj);
+
+static int check_data_map(struct bpf_object *obj, int prog_cnt, bool reset)
+{
+	struct bpf_map *data_map = NULL, *map;
+	__u64 *result = NULL;
+	const int zero = 0;
+	__u32 duration = 0;
+	int ret = -1, i;
+
+	result = malloc((prog_cnt + 32 /* spare */) * sizeof(__u64));
+	if (CHECK(!result, "alloc_memory", "failed to alloc memory"))
+		return -ENOMEM;
+
+	bpf_object__for_each_map(map, obj)
+		if (bpf_map__is_internal(map)) {
+			data_map = map;
+			break;
+		}
+	if (CHECK(!data_map, "find_data_map", "data map not found\n"))
+		goto out;
+
+	ret = bpf_map_lookup_elem(bpf_map__fd(data_map), &zero, result);
+	if (CHECK(ret, "get_result",
+		  "failed to get output data: %d\n", ret))
+		goto out;
+
+	for (i = 0; i < prog_cnt; i++) {
+		if (CHECK(result[i] != 1, "result",
+			  "fexit_bpf2bpf result[%d] failed err %llu\n",
+			  i, result[i]))
+			goto out;
+		result[i] = 0;
+	}
+	if (reset) {
+		ret = bpf_map_update_elem(bpf_map__fd(data_map), &zero, result, 0);
+		if (CHECK(ret, "reset_result", "failed to reset result\n"))
+			goto out;
+	}
+
+	ret = 0;
+out:
+	free(result);
+	return ret;
+}
 
 static void test_fexit_bpf2bpf_common(const char *obj_file,
 				      const char *target_obj_file,
 				      int prog_cnt,
 				      const char **prog_name,
-				      bool run_prog)
+				      bool run_prog,
+				      test_cb cb)
 {
-	struct bpf_object *obj = NULL, *pkt_obj;
-	int err, pkt_fd, i;
-	struct bpf_link **link = NULL;
+	struct bpf_object *obj = NULL, *tgt_obj;
 	struct bpf_program **prog = NULL;
+	struct bpf_link **link = NULL;
 	__u32 duration = 0, retval;
-	struct bpf_map *data_map;
-	const int zero = 0;
-	__u64 *result = NULL;
+	int err, tgt_fd, i;
 
 	err = bpf_prog_load(target_obj_file, BPF_PROG_TYPE_UNSPEC,
-			    &pkt_obj, &pkt_fd);
+			    &tgt_obj, &tgt_fd);
 	if (CHECK(err, "tgt_prog_load", "file %s err %d errno %d\n",
 		  target_obj_file, err, errno))
 		return;
 	DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
-			    .attach_prog_fd = pkt_fd,
+			    .attach_prog_fd = tgt_fd,
 			   );
 
 	link = calloc(sizeof(struct bpf_link *), prog_cnt);
 	prog = calloc(sizeof(struct bpf_program *), prog_cnt);
-	result = malloc((prog_cnt + 32 /* spare */) * sizeof(__u64));
-	if (CHECK(!link || !prog || !result, "alloc_memory",
-		  "failed to alloc memory"))
+	if (CHECK(!link || !prog, "alloc_memory", "failed to alloc memory"))
 		goto close_prog;
 
 	obj = bpf_object__open_file(obj_file, &opts);
@@ -53,39 +96,33 @@ static void test_fexit_bpf2bpf_common(const char *obj_file,
 			goto close_prog;
 	}
 
-	if (!run_prog)
-		goto close_prog;
+	if (cb) {
+		err = cb(obj);
+		if (err)
+			goto close_prog;
+	}
 
-	data_map = bpf_object__find_map_by_name(obj, "fexit_bp.bss");
-	if (CHECK(!data_map, "find_data_map", "data map not found\n"))
+	if (!run_prog)
 		goto close_prog;
 
-	err = bpf_prog_test_run(pkt_fd, 1, &pkt_v6, sizeof(pkt_v6),
+	err = bpf_prog_test_run(tgt_fd, 1, &pkt_v6, sizeof(pkt_v6),
 				NULL, NULL, &retval, &duration);
 	CHECK(err || retval, "ipv6",
 	      "err %d errno %d retval %d duration %d\n",
 	      err, errno, retval, duration);
 
-	err = bpf_map_lookup_elem(bpf_map__fd(data_map), &zero, result);
-	if (CHECK(err, "get_result",
-		  "failed to get output data: %d\n", err))
+	if (check_data_map(obj, prog_cnt, false))
 		goto close_prog;
 
-	for (i = 0; i < prog_cnt; i++)
-		if (CHECK(result[i] != 1, "result", "fexit_bpf2bpf failed err %llu\n",
-			  result[i]))
-			goto close_prog;
-
 close_prog:
 	for (i = 0; i < prog_cnt; i++)
 		if (!IS_ERR_OR_NULL(link[i]))
 			bpf_link__destroy(link[i]);
 	if (!IS_ERR_OR_NULL(obj))
 		bpf_object__close(obj);
-	bpf_object__close(pkt_obj);
+	bpf_object__close(tgt_obj);
 	free(link);
 	free(prog);
-	free(result);
 }
 
 static void test_target_no_callees(void)
@@ -96,7 +133,7 @@ static void test_target_no_callees(void)
 	test_fexit_bpf2bpf_common("./fexit_bpf2bpf_simple.o",
 				  "./test_pkt_md_access.o",
 				  ARRAY_SIZE(prog_name),
-				  prog_name, true);
+				  prog_name, true, NULL);
 }
 
 static void test_target_yes_callees(void)
@@ -110,7 +147,7 @@ static void test_target_yes_callees(void)
 	test_fexit_bpf2bpf_common("./fexit_bpf2bpf.o",
 				  "./test_pkt_access.o",
 				  ARRAY_SIZE(prog_name),
-				  prog_name, true);
+				  prog_name, true, NULL);
 }
 
 static void test_func_replace(void)
@@ -123,11 +160,12 @@ static void test_func_replace(void)
 		"freplace/get_skb_len",
 		"freplace/get_skb_ifindex",
 		"freplace/get_constant",
+		"freplace/test_pkt_write_access_subprog",
 	};
 	test_fexit_bpf2bpf_common("./fexit_bpf2bpf.o",
 				  "./test_pkt_access.o",
 				  ARRAY_SIZE(prog_name),
-				  prog_name, true);
+				  prog_name, true, NULL);
 }
 
 static void test_func_replace_verify(void)
@@ -138,13 +176,198 @@ static void test_func_replace_verify(void)
 	test_fexit_bpf2bpf_common("./freplace_connect4.o",
 				  "./connect4_prog.o",
 				  ARRAY_SIZE(prog_name),
-				  prog_name, false);
+				  prog_name, false, NULL);
+}
+
+static int test_second_attach(struct bpf_object *obj)
+{
+	const char *prog_name = "freplace/get_constant";
+	const char *tgt_name = prog_name + 9; /* cut off freplace/ */
+	const char *tgt_obj_file = "./test_pkt_access.o";
+	struct bpf_program *prog = NULL;
+	struct bpf_object *tgt_obj;
+	__u32 duration = 0, retval;
+	struct bpf_link *link;
+	int err = 0, tgt_fd;
+
+	prog = bpf_object__find_program_by_title(obj, prog_name);
+	if (CHECK(!prog, "find_prog", "prog %s not found\n", prog_name))
+		return -ENOENT;
+
+	err = bpf_prog_load(tgt_obj_file, BPF_PROG_TYPE_UNSPEC,
+			    &tgt_obj, &tgt_fd);
+	if (CHECK(err, "second_prog_load", "file %s err %d errno %d\n",
+		  tgt_obj_file, err, errno))
+		return err;
+
+	link = bpf_program__attach_freplace(prog, tgt_fd, tgt_name);
+	if (CHECK(IS_ERR(link), "second_link", "failed to attach second link prog_fd %d tgt_fd %d\n", bpf_program__fd(prog), tgt_fd))
+		goto out;
+
+	err = bpf_prog_test_run(tgt_fd, 1, &pkt_v6, sizeof(pkt_v6),
+				NULL, NULL, &retval, &duration);
+	if (CHECK(err || retval, "ipv6",
+		  "err %d errno %d retval %d duration %d\n",
+		  err, errno, retval, duration))
+		goto out;
+
+	err = check_data_map(obj, 1, true);
+	if (err)
+		goto out;
+
+out:
+	bpf_link__destroy(link);
+	bpf_object__close(tgt_obj);
+	return err;
+}
+
+static void test_func_replace_multi(void)
+{
+	const char *prog_name[] = {
+		"freplace/get_constant",
+	};
+	test_fexit_bpf2bpf_common("./freplace_get_constant.o",
+				  "./test_pkt_access.o",
+				  ARRAY_SIZE(prog_name),
+				  prog_name, true, test_second_attach);
+}
+
+static void test_fmod_ret_freplace(void)
+{
+	struct bpf_object *freplace_obj = NULL, *pkt_obj, *fmod_obj = NULL;
+	const char *freplace_name = "./freplace_get_constant.o";
+	const char *fmod_ret_name = "./fmod_ret_freplace.o";
+	DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts);
+	const char *tgt_name = "./test_pkt_access.o";
+	struct bpf_link *freplace_link = NULL;
+	struct bpf_program *prog;
+	__u32 duration = 0;
+	int err, pkt_fd;
+
+	err = bpf_prog_load(tgt_name, BPF_PROG_TYPE_UNSPEC,
+			    &pkt_obj, &pkt_fd);
+	/* the target prog should load fine */
+	if (CHECK(err, "tgt_prog_load", "file %s err %d errno %d\n",
+		  tgt_name, err, errno))
+		return;
+	opts.attach_prog_fd = pkt_fd;
+
+	freplace_obj = bpf_object__open_file(freplace_name, &opts);
+	if (CHECK(IS_ERR_OR_NULL(freplace_obj), "freplace_obj_open",
+		  "failed to open %s: %ld\n", freplace_name,
+		  PTR_ERR(freplace_obj)))
+		goto out;
+
+	err = bpf_object__load(freplace_obj);
+	if (CHECK(err, "freplace_obj_load", "err %d\n", err))
+		goto out;
+
+	prog = bpf_program__next(NULL, freplace_obj);
+	freplace_link = bpf_program__attach_trace(prog);
+	if (CHECK(IS_ERR(freplace_link), "freplace_attach_trace", "failed to link\n"))
+		goto out;
+
+	opts.attach_prog_fd = bpf_program__fd(prog);
+	fmod_obj = bpf_object__open_file(fmod_ret_name, &opts);
+	if (CHECK(IS_ERR_OR_NULL(fmod_obj), "fmod_obj_open",
+		  "failed to open %s: %ld\n", fmod_ret_name,
+		  PTR_ERR(fmod_obj)))
+		goto out;
+
+	err = bpf_object__load(fmod_obj);
+	if (CHECK(!err, "fmod_obj_load", "loading fmod_ret should fail\n"))
+		goto out;
+
+out:
+	bpf_link__destroy(freplace_link);
+	bpf_object__close(freplace_obj);
+	bpf_object__close(fmod_obj);
+	bpf_object__close(pkt_obj);
+}
+
+
+static void test_func_sockmap_update(void)
+{
+	const char *prog_name[] = {
+		"freplace/cls_redirect",
+	};
+	test_fexit_bpf2bpf_common("./freplace_cls_redirect.o",
+				  "./test_cls_redirect.o",
+				  ARRAY_SIZE(prog_name),
+				  prog_name, false, NULL);
+}
+
+static void test_obj_load_failure_common(const char *obj_file,
+					  const char *target_obj_file)
+
+{
+	/*
+	 * standalone test that asserts failure to load freplace prog
+	 * because of invalid return code.
+	 */
+	struct bpf_object *obj = NULL, *pkt_obj;
+	int err, pkt_fd;
+	__u32 duration = 0;
+
+	err = bpf_prog_load(target_obj_file, BPF_PROG_TYPE_UNSPEC,
+			    &pkt_obj, &pkt_fd);
+	/* the target prog should load fine */
+	if (CHECK(err, "tgt_prog_load", "file %s err %d errno %d\n",
+		  target_obj_file, err, errno))
+		return;
+	DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
+			    .attach_prog_fd = pkt_fd,
+			   );
+
+	obj = bpf_object__open_file(obj_file, &opts);
+	if (CHECK(IS_ERR_OR_NULL(obj), "obj_open",
+		  "failed to open %s: %ld\n", obj_file,
+		  PTR_ERR(obj)))
+		goto close_prog;
+
+	/* It should fail to load the program */
+	err = bpf_object__load(obj);
+	if (CHECK(!err, "bpf_obj_load should fail", "err %d\n", err))
+		goto close_prog;
+
+close_prog:
+	if (!IS_ERR_OR_NULL(obj))
+		bpf_object__close(obj);
+	bpf_object__close(pkt_obj);
+}
+
+static void test_func_replace_return_code(void)
+{
+	/* test invalid return code in the replaced program */
+	test_obj_load_failure_common("./freplace_connect_v4_prog.o",
+				     "./connect4_prog.o");
+}
+
+static void test_func_map_prog_compatibility(void)
+{
+	/* test with spin lock map value in the replaced program */
+	test_obj_load_failure_common("./freplace_attach_probe.o",
+				     "./test_attach_probe.o");
 }
 
 void test_fexit_bpf2bpf(void)
 {
-	test_target_no_callees();
-	test_target_yes_callees();
-	test_func_replace();
-	test_func_replace_verify();
+	if (test__start_subtest("target_no_callees"))
+		test_target_no_callees();
+	if (test__start_subtest("target_yes_callees"))
+		test_target_yes_callees();
+	if (test__start_subtest("func_replace"))
+		test_func_replace();
+	if (test__start_subtest("func_replace_verify"))
+		test_func_replace_verify();
+	if (test__start_subtest("func_sockmap_update"))
+		test_func_sockmap_update();
+	if (test__start_subtest("func_replace_return_code"))
+		test_func_replace_return_code();
+	if (test__start_subtest("func_map_prog_compatibility"))
+		test_func_map_prog_compatibility();
+	if (test__start_subtest("func_replace_multi"))
+		test_func_replace_multi();
+	if (test__start_subtest("fmod_ret_freplace"))
+		test_fmod_ret_freplace();
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/global_data_init.c b/tools/testing/selftests/bpf/prog_tests/global_data_init.c
index 3bdaa5a40744..ee46b11f1f9a 100644
--- a/tools/testing/selftests/bpf/prog_tests/global_data_init.c
+++ b/tools/testing/selftests/bpf/prog_tests/global_data_init.c
@@ -12,7 +12,8 @@ void test_global_data_init(void)
 	size_t sz;
 
 	obj = bpf_object__open_file(file, NULL);
-	if (CHECK_FAIL(!obj))
+	err = libbpf_get_error(obj);
+	if (CHECK_FAIL(err))
 		return;
 
 	map = bpf_object__find_map_by_name(obj, "test_glo.rodata");
diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms.c b/tools/testing/selftests/bpf/prog_tests/ksyms.c
index e3d6777226a8..b295969b263b 100644
--- a/tools/testing/selftests/bpf/prog_tests/ksyms.c
+++ b/tools/testing/selftests/bpf/prog_tests/ksyms.c
@@ -7,39 +7,28 @@
 
 static int duration;
 
-static __u64 kallsyms_find(const char *sym)
-{
-	char type, name[500];
-	__u64 addr, res = 0;
-	FILE *f;
-
-	f = fopen("/proc/kallsyms", "r");
-	if (CHECK(!f, "kallsyms_fopen", "failed to open: %d\n", errno))
-		return 0;
-
-	while (fscanf(f, "%llx %c %499s%*[^\n]\n", &addr, &type, name) > 0) {
-		if (strcmp(name, sym) == 0) {
-			res = addr;
-			goto out;
-		}
-	}
-
-	CHECK(false, "not_found", "symbol %s not found\n", sym);
-out:
-	fclose(f);
-	return res;
-}
-
 void test_ksyms(void)
 {
-	__u64 link_fops_addr = kallsyms_find("bpf_link_fops");
 	const char *btf_path = "/sys/kernel/btf/vmlinux";
 	struct test_ksyms *skel;
 	struct test_ksyms__data *data;
+	__u64 link_fops_addr, per_cpu_start_addr;
 	struct stat st;
 	__u64 btf_size;
 	int err;
 
+	err = kallsyms_find("bpf_link_fops", &link_fops_addr);
+	if (CHECK(err == -EINVAL, "kallsyms_fopen", "failed to open: %d\n", errno))
+		return;
+	if (CHECK(err == -ENOENT, "ksym_find", "symbol 'bpf_link_fops' not found\n"))
+		return;
+
+	err = kallsyms_find("__per_cpu_start", &per_cpu_start_addr);
+	if (CHECK(err == -EINVAL, "kallsyms_fopen", "failed to open: %d\n", errno))
+		return;
+	if (CHECK(err == -ENOENT, "ksym_find", "symbol 'per_cpu_start' not found\n"))
+		return;
+
 	if (CHECK(stat(btf_path, &st), "stat_btf", "err %d\n", errno))
 		return;
 	btf_size = st.st_size;
@@ -63,8 +52,9 @@ void test_ksyms(void)
 	      "got %llu, exp %llu\n", data->out__bpf_link_fops1, (__u64)0);
 	CHECK(data->out__btf_size != btf_size, "btf_size",
 	      "got %llu, exp %llu\n", data->out__btf_size, btf_size);
-	CHECK(data->out__per_cpu_start != 0, "__per_cpu_start",
-	      "got %llu, exp %llu\n", data->out__per_cpu_start, (__u64)0);
+	CHECK(data->out__per_cpu_start != per_cpu_start_addr, "__per_cpu_start",
+	      "got %llu, exp %llu\n", data->out__per_cpu_start,
+	      per_cpu_start_addr);
 
 cleanup:
 	test_ksyms__destroy(skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c
new file mode 100644
index 000000000000..28e26bd3e0ca
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Google */
+
+#include <test_progs.h>
+#include <bpf/libbpf.h>
+#include <bpf/btf.h>
+#include "test_ksyms_btf.skel.h"
+
+static int duration;
+
+void test_ksyms_btf(void)
+{
+	__u64 runqueues_addr, bpf_prog_active_addr;
+	__u32 this_rq_cpu;
+	int this_bpf_prog_active;
+	struct test_ksyms_btf *skel = NULL;
+	struct test_ksyms_btf__data *data;
+	struct btf *btf;
+	int percpu_datasec;
+	int err;
+
+	err = kallsyms_find("runqueues", &runqueues_addr);
+	if (CHECK(err == -EINVAL, "kallsyms_fopen", "failed to open: %d\n", errno))
+		return;
+	if (CHECK(err == -ENOENT, "ksym_find", "symbol 'runqueues' not found\n"))
+		return;
+
+	err = kallsyms_find("bpf_prog_active", &bpf_prog_active_addr);
+	if (CHECK(err == -EINVAL, "kallsyms_fopen", "failed to open: %d\n", errno))
+		return;
+	if (CHECK(err == -ENOENT, "ksym_find", "symbol 'bpf_prog_active' not found\n"))
+		return;
+
+	btf = libbpf_find_kernel_btf();
+	if (CHECK(IS_ERR(btf), "btf_exists", "failed to load kernel BTF: %ld\n",
+		  PTR_ERR(btf)))
+		return;
+
+	percpu_datasec = btf__find_by_name_kind(btf, ".data..percpu",
+						BTF_KIND_DATASEC);
+	if (percpu_datasec < 0) {
+		printf("%s:SKIP:no PERCPU DATASEC in kernel btf\n",
+		       __func__);
+		test__skip();
+		goto cleanup;
+	}
+
+	skel = test_ksyms_btf__open_and_load();
+	if (CHECK(!skel, "skel_open", "failed to open and load skeleton\n"))
+		goto cleanup;
+
+	err = test_ksyms_btf__attach(skel);
+	if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+		goto cleanup;
+
+	/* trigger tracepoint */
+	usleep(1);
+
+	data = skel->data;
+	CHECK(data->out__runqueues_addr != runqueues_addr, "runqueues_addr",
+	      "got %llu, exp %llu\n",
+	      (unsigned long long)data->out__runqueues_addr,
+	      (unsigned long long)runqueues_addr);
+	CHECK(data->out__bpf_prog_active_addr != bpf_prog_active_addr, "bpf_prog_active_addr",
+	      "got %llu, exp %llu\n",
+	      (unsigned long long)data->out__bpf_prog_active_addr,
+	      (unsigned long long)bpf_prog_active_addr);
+
+	CHECK(data->out__rq_cpu == -1, "rq_cpu",
+	      "got %u, exp != -1\n", data->out__rq_cpu);
+	CHECK(data->out__bpf_prog_active < 0, "bpf_prog_active",
+	      "got %d, exp >= 0\n", data->out__bpf_prog_active);
+	CHECK(data->out__cpu_0_rq_cpu != 0, "cpu_rq(0)->cpu",
+	      "got %u, exp 0\n", data->out__cpu_0_rq_cpu);
+
+	this_rq_cpu = data->out__this_rq_cpu;
+	CHECK(this_rq_cpu != data->out__rq_cpu, "this_rq_cpu",
+	      "got %u, exp %u\n", this_rq_cpu, data->out__rq_cpu);
+
+	this_bpf_prog_active = data->out__this_bpf_prog_active;
+	CHECK(this_bpf_prog_active != data->out__bpf_prog_active, "this_bpf_prog_active",
+	      "got %d, exp %d\n", this_bpf_prog_active,
+	      data->out__bpf_prog_active);
+
+cleanup:
+	btf__free(btf);
+	test_ksyms_btf__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/l4lb_all.c b/tools/testing/selftests/bpf/prog_tests/l4lb_all.c
index c2d373e294bb..8073105548ff 100644
--- a/tools/testing/selftests/bpf/prog_tests/l4lb_all.c
+++ b/tools/testing/selftests/bpf/prog_tests/l4lb_all.c
@@ -80,9 +80,8 @@ out:
 
 void test_l4lb_all(void)
 {
-	const char *file1 = "./test_l4lb.o";
-	const char *file2 = "./test_l4lb_noinline.o";
-
-	test_l4lb(file1);
-	test_l4lb(file2);
+	if (test__start_subtest("l4lb_inline"))
+		test_l4lb("test_l4lb.o");
+	if (test__start_subtest("l4lb_noinline"))
+		test_l4lb("test_l4lb_noinline.o");
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/metadata.c b/tools/testing/selftests/bpf/prog_tests/metadata.c
new file mode 100644
index 000000000000..2c53eade88e3
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/metadata.c
@@ -0,0 +1,141 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright 2020 Google LLC.
+ */
+
+#include <test_progs.h>
+#include <cgroup_helpers.h>
+#include <network_helpers.h>
+
+#include "metadata_unused.skel.h"
+#include "metadata_used.skel.h"
+
+static int duration;
+
+static int prog_holds_map(int prog_fd, int map_fd)
+{
+	struct bpf_prog_info prog_info = {};
+	struct bpf_prog_info map_info = {};
+	__u32 prog_info_len;
+	__u32 map_info_len;
+	__u32 *map_ids;
+	int nr_maps;
+	int ret;
+	int i;
+
+	map_info_len = sizeof(map_info);
+	ret = bpf_obj_get_info_by_fd(map_fd, &map_info, &map_info_len);
+	if (ret)
+		return -errno;
+
+	prog_info_len = sizeof(prog_info);
+	ret = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &prog_info_len);
+	if (ret)
+		return -errno;
+
+	map_ids = calloc(prog_info.nr_map_ids, sizeof(__u32));
+	if (!map_ids)
+		return -ENOMEM;
+
+	nr_maps = prog_info.nr_map_ids;
+	memset(&prog_info, 0, sizeof(prog_info));
+	prog_info.nr_map_ids = nr_maps;
+	prog_info.map_ids = ptr_to_u64(map_ids);
+	prog_info_len = sizeof(prog_info);
+
+	ret = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &prog_info_len);
+	if (ret) {
+		ret = -errno;
+		goto free_map_ids;
+	}
+
+	ret = -ENOENT;
+	for (i = 0; i < prog_info.nr_map_ids; i++) {
+		if (map_ids[i] == map_info.id) {
+			ret = 0;
+			break;
+		}
+	}
+
+free_map_ids:
+	free(map_ids);
+	return ret;
+}
+
+static void test_metadata_unused(void)
+{
+	struct metadata_unused *obj;
+	int err;
+
+	obj = metadata_unused__open_and_load();
+	if (CHECK(!obj, "skel-load", "errno %d", errno))
+		return;
+
+	err = prog_holds_map(bpf_program__fd(obj->progs.prog),
+			     bpf_map__fd(obj->maps.rodata));
+	if (CHECK(err, "prog-holds-rodata", "errno: %d", err))
+		return;
+
+	/* Assert that we can access the metadata in skel and the values are
+	 * what we expect.
+	 */
+	if (CHECK(strncmp(obj->rodata->bpf_metadata_a, "foo",
+			  sizeof(obj->rodata->bpf_metadata_a)),
+		  "bpf_metadata_a", "expected \"foo\", value differ"))
+		goto close_bpf_object;
+	if (CHECK(obj->rodata->bpf_metadata_b != 1, "bpf_metadata_b",
+		  "expected 1, got %d", obj->rodata->bpf_metadata_b))
+		goto close_bpf_object;
+
+	/* Assert that binding metadata map to prog again succeeds. */
+	err = bpf_prog_bind_map(bpf_program__fd(obj->progs.prog),
+				bpf_map__fd(obj->maps.rodata), NULL);
+	CHECK(err, "rebind_map", "errno %d, expected 0", errno);
+
+close_bpf_object:
+	metadata_unused__destroy(obj);
+}
+
+static void test_metadata_used(void)
+{
+	struct metadata_used *obj;
+	int err;
+
+	obj = metadata_used__open_and_load();
+	if (CHECK(!obj, "skel-load", "errno %d", errno))
+		return;
+
+	err = prog_holds_map(bpf_program__fd(obj->progs.prog),
+			     bpf_map__fd(obj->maps.rodata));
+	if (CHECK(err, "prog-holds-rodata", "errno: %d", err))
+		return;
+
+	/* Assert that we can access the metadata in skel and the values are
+	 * what we expect.
+	 */
+	if (CHECK(strncmp(obj->rodata->bpf_metadata_a, "bar",
+			  sizeof(obj->rodata->bpf_metadata_a)),
+		  "metadata_a", "expected \"bar\", value differ"))
+		goto close_bpf_object;
+	if (CHECK(obj->rodata->bpf_metadata_b != 2, "metadata_b",
+		  "expected 2, got %d", obj->rodata->bpf_metadata_b))
+		goto close_bpf_object;
+
+	/* Assert that binding metadata map to prog again succeeds. */
+	err = bpf_prog_bind_map(bpf_program__fd(obj->progs.prog),
+				bpf_map__fd(obj->maps.rodata), NULL);
+	CHECK(err, "rebind_map", "errno %d, expected 0", errno);
+
+close_bpf_object:
+	metadata_used__destroy(obj);
+}
+
+void test_metadata(void)
+{
+	if (test__start_subtest("unused"))
+		test_metadata_unused();
+
+	if (test__start_subtest("used"))
+		test_metadata_used();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/pe_preserve_elems.c b/tools/testing/selftests/bpf/prog_tests/pe_preserve_elems.c
new file mode 100644
index 000000000000..673d38395253
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/pe_preserve_elems.c
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2019 Facebook */
+#include <test_progs.h>
+#include <linux/bpf.h>
+#include "test_pe_preserve_elems.skel.h"
+
+static int duration;
+
+static void test_one_map(struct bpf_map *map, struct bpf_program *prog,
+			 bool has_share_pe)
+{
+	int err, key = 0, pfd = -1, mfd = bpf_map__fd(map);
+	DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts);
+	struct perf_event_attr attr = {
+		.size = sizeof(struct perf_event_attr),
+		.type = PERF_TYPE_SOFTWARE,
+		.config = PERF_COUNT_SW_CPU_CLOCK,
+	};
+
+	pfd = syscall(__NR_perf_event_open, &attr, 0 /* pid */,
+		      -1 /* cpu 0 */, -1 /* group id */, 0 /* flags */);
+	if (CHECK(pfd < 0, "perf_event_open", "failed\n"))
+		return;
+
+	err = bpf_map_update_elem(mfd, &key, &pfd, BPF_ANY);
+	close(pfd);
+	if (CHECK(err < 0, "bpf_map_update_elem", "failed\n"))
+		return;
+
+	err = bpf_prog_test_run_opts(bpf_program__fd(prog), &opts);
+	if (CHECK(err < 0, "bpf_prog_test_run_opts", "failed\n"))
+		return;
+	if (CHECK(opts.retval != 0, "bpf_perf_event_read_value",
+		  "failed with %d\n", opts.retval))
+		return;
+
+	/* closing mfd, prog still holds a reference on map */
+	close(mfd);
+
+	err = bpf_prog_test_run_opts(bpf_program__fd(prog), &opts);
+	if (CHECK(err < 0, "bpf_prog_test_run_opts", "failed\n"))
+		return;
+
+	if (has_share_pe) {
+		CHECK(opts.retval != 0, "bpf_perf_event_read_value",
+		      "failed with %d\n", opts.retval);
+	} else {
+		CHECK(opts.retval != -ENOENT, "bpf_perf_event_read_value",
+		      "should have failed with %d, but got %d\n", -ENOENT,
+		      opts.retval);
+	}
+}
+
+void test_pe_preserve_elems(void)
+{
+	struct test_pe_preserve_elems *skel;
+
+	skel = test_pe_preserve_elems__open_and_load();
+	if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+		return;
+
+	test_one_map(skel->maps.array_1, skel->progs.read_array_1, false);
+	test_one_map(skel->maps.array_2, skel->progs.read_array_2, true);
+
+	test_pe_preserve_elems__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c
index c33ec180b3f2..ca9f0895ec84 100644
--- a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c
+++ b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c
@@ -7,6 +7,8 @@
 #include "test_perf_buffer.skel.h"
 #include "bpf/libbpf_internal.h"
 
+static int duration;
+
 /* AddressSanitizer sometimes crashes due to data dereference below, due to
  * this being mmap()'ed memory. Disable instrumentation with
  * no_sanitize_address attribute
@@ -24,13 +26,31 @@ static void on_sample(void *ctx, int cpu, void *data, __u32 size)
 	CPU_SET(cpu, cpu_seen);
 }
 
+int trigger_on_cpu(int cpu)
+{
+	cpu_set_t cpu_set;
+	int err;
+
+	CPU_ZERO(&cpu_set);
+	CPU_SET(cpu, &cpu_set);
+
+	err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set), &cpu_set);
+	if (err && CHECK(err, "set_affinity", "cpu #%d, err %d\n", cpu, err))
+		return err;
+
+	usleep(1);
+
+	return 0;
+}
+
 void test_perf_buffer(void)
 {
-	int err, on_len, nr_on_cpus = 0,  nr_cpus, i, duration = 0;
+	int err, on_len, nr_on_cpus = 0, nr_cpus, i;
 	struct perf_buffer_opts pb_opts = {};
 	struct test_perf_buffer *skel;
-	cpu_set_t cpu_set, cpu_seen;
+	cpu_set_t cpu_seen;
 	struct perf_buffer *pb;
+	int last_fd = -1, fd;
 	bool *online;
 
 	nr_cpus = libbpf_num_possible_cpus();
@@ -63,6 +83,9 @@ void test_perf_buffer(void)
 	if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb)))
 		goto out_close;
 
+	CHECK(perf_buffer__epoll_fd(pb) < 0, "epoll_fd",
+	      "bad fd: %d\n", perf_buffer__epoll_fd(pb));
+
 	/* trigger kprobe on every CPU */
 	CPU_ZERO(&cpu_seen);
 	for (i = 0; i < nr_cpus; i++) {
@@ -71,16 +94,8 @@ void test_perf_buffer(void)
 			continue;
 		}
 
-		CPU_ZERO(&cpu_set);
-		CPU_SET(i, &cpu_set);
-
-		err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set),
-					     &cpu_set);
-		if (err && CHECK(err, "set_affinity", "cpu #%d, err %d\n",
-				 i, err))
+		if (trigger_on_cpu(i))
 			goto out_close;
-
-		usleep(1);
 	}
 
 	/* read perf buffer */
@@ -92,6 +107,34 @@ void test_perf_buffer(void)
 		  "expect %d, seen %d\n", nr_on_cpus, CPU_COUNT(&cpu_seen)))
 		goto out_free_pb;
 
+	if (CHECK(perf_buffer__buffer_cnt(pb) != nr_cpus, "buf_cnt",
+		  "got %zu, expected %d\n", perf_buffer__buffer_cnt(pb), nr_cpus))
+		goto out_close;
+
+	for (i = 0; i < nr_cpus; i++) {
+		if (i >= on_len || !online[i])
+			continue;
+
+		fd = perf_buffer__buffer_fd(pb, i);
+		CHECK(fd < 0 || last_fd == fd, "fd_check", "last fd %d == fd %d\n", last_fd, fd);
+		last_fd = fd;
+
+		err = perf_buffer__consume_buffer(pb, i);
+		if (CHECK(err, "drain_buf", "cpu %d, err %d\n", i, err))
+			goto out_close;
+
+		CPU_CLR(i, &cpu_seen);
+		if (trigger_on_cpu(i))
+			goto out_close;
+
+		err = perf_buffer__consume_buffer(pb, i);
+		if (CHECK(err, "consume_buf", "cpu %d, err %d\n", i, err))
+			goto out_close;
+
+		if (CHECK(!CPU_ISSET(i, &cpu_seen), "cpu_seen", "cpu %d not seen\n", i))
+			goto out_close;
+	}
+
 out_free_pb:
 	perf_buffer__free(pb);
 out_close:
diff --git a/tools/testing/selftests/bpf/prog_tests/pinning.c b/tools/testing/selftests/bpf/prog_tests/pinning.c
index 041952524c55..fcf54b3a1dd0 100644
--- a/tools/testing/selftests/bpf/prog_tests/pinning.c
+++ b/tools/testing/selftests/bpf/prog_tests/pinning.c
@@ -37,7 +37,7 @@ void test_pinning(void)
 	struct stat statbuf = {};
 	struct bpf_object *obj;
 	struct bpf_map *map;
-	int err;
+	int err, map_fd;
 	DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
 		.pin_root_path = custpath,
 	);
@@ -213,6 +213,53 @@ void test_pinning(void)
 	if (CHECK(err, "stat custpinpath", "err %d errno %d\n", err, errno))
 		goto out;
 
+	/* remove the custom pin path to re-test it with reuse fd below */
+	err = unlink(custpinpath);
+	if (CHECK(err, "unlink custpinpath", "err %d errno %d\n", err, errno))
+		goto out;
+
+	err = rmdir(custpath);
+	if (CHECK(err, "rmdir custpindir", "err %d errno %d\n", err, errno))
+		goto out;
+
+	bpf_object__close(obj);
+
+	/* test pinning at custom path with reuse fd */
+	obj = bpf_object__open_file(file, NULL);
+	err = libbpf_get_error(obj);
+	if (CHECK(err, "default open", "err %d errno %d\n", err, errno)) {
+		obj = NULL;
+		goto out;
+	}
+
+	map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(__u32),
+				sizeof(__u64), 1, 0);
+	if (CHECK(map_fd < 0, "create pinmap manually", "fd %d\n", map_fd))
+		goto out;
+
+	map = bpf_object__find_map_by_name(obj, "pinmap");
+	if (CHECK(!map, "find map", "NULL map"))
+		goto close_map_fd;
+
+	err = bpf_map__reuse_fd(map, map_fd);
+	if (CHECK(err, "reuse pinmap fd", "err %d errno %d\n", err, errno))
+		goto close_map_fd;
+
+	err = bpf_map__set_pin_path(map, custpinpath);
+	if (CHECK(err, "set pin path", "err %d errno %d\n", err, errno))
+		goto close_map_fd;
+
+	err = bpf_object__load(obj);
+	if (CHECK(err, "custom load", "err %d errno %d\n", err, errno))
+		goto close_map_fd;
+
+	/* check that pinmap was pinned at the custom path */
+	err = stat(custpinpath, &statbuf);
+	if (CHECK(err, "stat custpinpath", "err %d errno %d\n", err, errno))
+		goto close_map_fd;
+
+close_map_fd:
+	close(map_fd);
 out:
 	unlink(pinpath);
 	unlink(nopinpath);
diff --git a/tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c b/tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c
new file mode 100644
index 000000000000..c5fb191874ac
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2019 Facebook */
+#include <test_progs.h>
+#include <linux/bpf.h>
+#include "bpf/libbpf_internal.h"
+#include "test_raw_tp_test_run.skel.h"
+
+static int duration;
+
+void test_raw_tp_test_run(void)
+{
+	struct bpf_prog_test_run_attr test_attr = {};
+	int comm_fd = -1, err, nr_online, i, prog_fd;
+	__u64 args[2] = {0x1234ULL, 0x5678ULL};
+	int expected_retval = 0x1234 + 0x5678;
+	struct test_raw_tp_test_run *skel;
+	char buf[] = "new_name";
+	bool *online = NULL;
+	DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
+			    .ctx_in = args,
+			    .ctx_size_in = sizeof(args),
+			    .flags = BPF_F_TEST_RUN_ON_CPU,
+		);
+
+	err = parse_cpu_mask_file("/sys/devices/system/cpu/online", &online,
+				  &nr_online);
+	if (CHECK(err, "parse_cpu_mask_file", "err %d\n", err))
+		return;
+
+	skel = test_raw_tp_test_run__open_and_load();
+	if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+		goto cleanup;
+
+	err = test_raw_tp_test_run__attach(skel);
+	if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+		goto cleanup;
+
+	comm_fd = open("/proc/self/comm", O_WRONLY|O_TRUNC);
+	if (CHECK(comm_fd < 0, "open /proc/self/comm", "err %d\n", errno))
+		goto cleanup;
+
+	err = write(comm_fd, buf, sizeof(buf));
+	CHECK(err < 0, "task rename", "err %d", errno);
+
+	CHECK(skel->bss->count == 0, "check_count", "didn't increase\n");
+	CHECK(skel->data->on_cpu != 0xffffffff, "check_on_cpu", "got wrong value\n");
+
+	prog_fd = bpf_program__fd(skel->progs.rename);
+	test_attr.prog_fd = prog_fd;
+	test_attr.ctx_in = args;
+	test_attr.ctx_size_in = sizeof(__u64);
+
+	err = bpf_prog_test_run_xattr(&test_attr);
+	CHECK(err == 0, "test_run", "should fail for too small ctx\n");
+
+	test_attr.ctx_size_in = sizeof(args);
+	err = bpf_prog_test_run_xattr(&test_attr);
+	CHECK(err < 0, "test_run", "err %d\n", errno);
+	CHECK(test_attr.retval != expected_retval, "check_retval",
+	      "expect 0x%x, got 0x%x\n", expected_retval, test_attr.retval);
+
+	for (i = 0; i < nr_online; i++) {
+		if (!online[i])
+			continue;
+
+		opts.cpu = i;
+		opts.retval = 0;
+		err = bpf_prog_test_run_opts(prog_fd, &opts);
+		CHECK(err < 0, "test_run_opts", "err %d\n", errno);
+		CHECK(skel->data->on_cpu != i, "check_on_cpu",
+		      "expect %d got %d\n", i, skel->data->on_cpu);
+		CHECK(opts.retval != expected_retval,
+		      "check_retval", "expect 0x%x, got 0x%x\n",
+		      expected_retval, opts.retval);
+	}
+
+	/* invalid cpu ID should fail with ENXIO */
+	opts.cpu = 0xffffffff;
+	err = bpf_prog_test_run_opts(prog_fd, &opts);
+	CHECK(err != -1 || errno != ENXIO,
+	      "test_run_opts_fail",
+	      "should failed with ENXIO\n");
+
+	/* non-zero cpu w/o BPF_F_TEST_RUN_ON_CPU should fail with EINVAL */
+	opts.cpu = 1;
+	opts.flags = 0;
+	err = bpf_prog_test_run_opts(prog_fd, &opts);
+	CHECK(err != -1 || errno != EINVAL,
+	      "test_run_opts_fail",
+	      "should failed with EINVAL\n");
+
+cleanup:
+	close(comm_fd);
+	test_raw_tp_test_run__destroy(skel);
+	free(online);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c
index fc0d7f4f02cf..ac1ee10cffd8 100644
--- a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c
+++ b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c
@@ -27,7 +27,7 @@ void test_reference_tracking(void)
 		const char *title;
 
 		/* Ignore .text sections */
-		title = bpf_program__title(prog, false);
+		title = bpf_program__section_name(prog);
 		if (strstr(title, ".text") != NULL)
 			continue;
 
diff --git a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
index 3b127cab4864..6ace5e9efec1 100644
--- a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
+++ b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
@@ -28,6 +28,12 @@ struct symbol test_symbols[] = {
 	{ "func",    BTF_KIND_FUNC,    -1 },
 };
 
+/* Align the .BTF_ids section to 4 bytes */
+asm (
+".pushsection " BTF_IDS_SECTION " ,\"a\"; \n"
+".balign 4, 0;                            \n"
+".popsection;                             \n");
+
 BTF_ID_LIST(test_list_local)
 BTF_ID_UNUSED
 BTF_ID(typedef, S)
@@ -47,6 +53,15 @@ BTF_ID(struct,  S)
 BTF_ID(union,   U)
 BTF_ID(func,    func)
 
+BTF_SET_START(test_set)
+BTF_ID(typedef, S)
+BTF_ID(typedef, T)
+BTF_ID(typedef, U)
+BTF_ID(struct,  S)
+BTF_ID(union,   U)
+BTF_ID(func,    func)
+BTF_SET_END(test_set)
+
 static int
 __resolve_symbol(struct btf *btf, int type_id)
 {
@@ -116,12 +131,40 @@ int test_resolve_btfids(void)
 	 */
 	for (j = 0; j < ARRAY_SIZE(test_lists); j++) {
 		test_list = test_lists[j];
-		for (i = 0; i < ARRAY_SIZE(test_symbols) && !ret; i++) {
+		for (i = 0; i < ARRAY_SIZE(test_symbols); i++) {
 			ret = CHECK(test_list[i] != test_symbols[i].id,
 				    "id_check",
 				    "wrong ID for %s (%d != %d)\n",
 				    test_symbols[i].name,
 				    test_list[i], test_symbols[i].id);
+			if (ret)
+				return ret;
+		}
+	}
+
+	/* Check BTF_SET_START(test_set) IDs */
+	for (i = 0; i < test_set.cnt; i++) {
+		bool found = false;
+
+		for (j = 0; j < ARRAY_SIZE(test_symbols); j++) {
+			if (test_symbols[j].id != test_set.ids[i])
+				continue;
+			found = true;
+			break;
+		}
+
+		ret = CHECK(!found, "id_check",
+			    "ID %d not found in test_symbols\n",
+			    test_set.ids[i]);
+		if (ret)
+			break;
+
+		if (i > 0) {
+			ret = CHECK(test_set.ids[i - 1] > test_set.ids[i],
+				    "sort_check",
+				    "test_set is not sorted\n");
+			if (ret)
+				break;
 		}
 	}
 
diff --git a/tools/testing/selftests/bpf/prog_tests/sk_assign.c b/tools/testing/selftests/bpf/prog_tests/sk_assign.c
index 47fa04adc147..3a469099f30d 100644
--- a/tools/testing/selftests/bpf/prog_tests/sk_assign.c
+++ b/tools/testing/selftests/bpf/prog_tests/sk_assign.c
@@ -49,7 +49,7 @@ configure_stack(void)
 	sprintf(tc_cmd, "%s %s %s %s", "tc filter add dev lo ingress bpf",
 		       "direct-action object-file ./test_sk_assign.o",
 		       "section classifier/sk_assign_test",
-		       (env.verbosity < VERBOSE_VERY) ? " 2>/dev/null" : "");
+		       (env.verbosity < VERBOSE_VERY) ? " 2>/dev/null" : "verbose");
 	if (CHECK(system(tc_cmd), "BPF load failed;",
 		  "run with -vv for more info\n"))
 		return false;
@@ -265,9 +265,10 @@ void test_sk_assign(void)
 		TEST("ipv6 udp port redir", AF_INET6, SOCK_DGRAM, false),
 		TEST("ipv6 udp addr redir", AF_INET6, SOCK_DGRAM, true),
 	};
-	int server = -1;
+	__s64 server = -1;
 	int server_map;
 	int self_net;
+	int i;
 
 	self_net = open(NS_SELF, O_RDONLY);
 	if (CHECK_FAIL(self_net < 0)) {
@@ -286,7 +287,7 @@ void test_sk_assign(void)
 		goto cleanup;
 	}
 
-	for (int i = 0; i < ARRAY_SIZE(tests) && !READ_ONCE(stop); i++) {
+	for (i = 0; i < ARRAY_SIZE(tests) && !READ_ONCE(stop); i++) {
 		struct test_sk_cfg *test = &tests[i];
 		const struct sockaddr *addr;
 		const int zero = 0;
diff --git a/tools/testing/selftests/bpf/prog_tests/snprintf_btf.c b/tools/testing/selftests/bpf/prog_tests/snprintf_btf.c
new file mode 100644
index 000000000000..686b40f11a45
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/snprintf_btf.c
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <linux/btf.h>
+#include "netif_receive_skb.skel.h"
+
+/* Demonstrate that bpf_snprintf_btf succeeds and that various data types
+ * are formatted correctly.
+ */
+void test_snprintf_btf(void)
+{
+	struct netif_receive_skb *skel;
+	struct netif_receive_skb__bss *bss;
+	int err, duration = 0;
+
+	skel = netif_receive_skb__open();
+	if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+		return;
+
+	err = netif_receive_skb__load(skel);
+	if (CHECK(err, "skel_load", "failed to load skeleton: %d\n", err))
+		goto cleanup;
+
+	bss = skel->bss;
+
+	err = netif_receive_skb__attach(skel);
+	if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+		goto cleanup;
+
+	/* generate receive event */
+	err = system("ping -c 1 127.0.0.1 > /dev/null");
+	if (CHECK(err, "system", "ping failed: %d\n", err))
+		goto cleanup;
+
+	if (bss->skip) {
+		printf("%s:SKIP:no __builtin_btf_type_id\n", __func__);
+		test__skip();
+		goto cleanup;
+	}
+
+	/*
+	 * Make sure netif_receive_skb program was triggered
+	 * and it set expected return values from bpf_trace_printk()s
+	 * and all tests ran.
+	 */
+	if (CHECK(bss->ret <= 0,
+		  "bpf_snprintf_btf: got return value",
+		  "ret <= 0 %ld test %d\n", bss->ret, bss->ran_subtests))
+		goto cleanup;
+
+	if (CHECK(bss->ran_subtests == 0, "check if subtests ran",
+		  "no subtests ran, did BPF program run?"))
+		goto cleanup;
+
+	if (CHECK(bss->num_subtests != bss->ran_subtests,
+		  "check all subtests ran",
+		  "only ran %d of %d tests\n", bss->num_subtests,
+		  bss->ran_subtests))
+		goto cleanup;
+
+cleanup:
+	netif_receive_skb__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/sock_fields.c b/tools/testing/selftests/bpf/prog_tests/sock_fields.c
new file mode 100644
index 000000000000..af87118e748e
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/sock_fields.c
@@ -0,0 +1,382 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Facebook */
+
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+#include <linux/compiler.h>
+
+#include "network_helpers.h"
+#include "cgroup_helpers.h"
+#include "test_progs.h"
+#include "bpf_rlimit.h"
+#include "test_sock_fields.skel.h"
+
+enum bpf_linum_array_idx {
+	EGRESS_LINUM_IDX,
+	INGRESS_LINUM_IDX,
+	__NR_BPF_LINUM_ARRAY_IDX,
+};
+
+struct bpf_spinlock_cnt {
+	struct bpf_spin_lock lock;
+	__u32 cnt;
+};
+
+#define PARENT_CGROUP	"/test-bpf-sock-fields"
+#define CHILD_CGROUP	"/test-bpf-sock-fields/child"
+#define DATA "Hello BPF!"
+#define DATA_LEN sizeof(DATA)
+
+static struct sockaddr_in6 srv_sa6, cli_sa6;
+static int sk_pkt_out_cnt10_fd;
+static struct test_sock_fields *skel;
+static int sk_pkt_out_cnt_fd;
+static __u64 parent_cg_id;
+static __u64 child_cg_id;
+static int linum_map_fd;
+static __u32 duration;
+
+static __u32 egress_linum_idx = EGRESS_LINUM_IDX;
+static __u32 ingress_linum_idx = INGRESS_LINUM_IDX;
+
+static void print_sk(const struct bpf_sock *sk, const char *prefix)
+{
+	char src_ip4[24], dst_ip4[24];
+	char src_ip6[64], dst_ip6[64];
+
+	inet_ntop(AF_INET, &sk->src_ip4, src_ip4, sizeof(src_ip4));
+	inet_ntop(AF_INET6, &sk->src_ip6, src_ip6, sizeof(src_ip6));
+	inet_ntop(AF_INET, &sk->dst_ip4, dst_ip4, sizeof(dst_ip4));
+	inet_ntop(AF_INET6, &sk->dst_ip6, dst_ip6, sizeof(dst_ip6));
+
+	printf("%s: state:%u bound_dev_if:%u family:%u type:%u protocol:%u mark:%u priority:%u "
+	       "src_ip4:%x(%s) src_ip6:%x:%x:%x:%x(%s) src_port:%u "
+	       "dst_ip4:%x(%s) dst_ip6:%x:%x:%x:%x(%s) dst_port:%u\n",
+	       prefix,
+	       sk->state, sk->bound_dev_if, sk->family, sk->type, sk->protocol,
+	       sk->mark, sk->priority,
+	       sk->src_ip4, src_ip4,
+	       sk->src_ip6[0], sk->src_ip6[1], sk->src_ip6[2], sk->src_ip6[3],
+	       src_ip6, sk->src_port,
+	       sk->dst_ip4, dst_ip4,
+	       sk->dst_ip6[0], sk->dst_ip6[1], sk->dst_ip6[2], sk->dst_ip6[3],
+	       dst_ip6, ntohs(sk->dst_port));
+}
+
+static void print_tp(const struct bpf_tcp_sock *tp, const char *prefix)
+{
+	printf("%s: snd_cwnd:%u srtt_us:%u rtt_min:%u snd_ssthresh:%u rcv_nxt:%u "
+	       "snd_nxt:%u snd:una:%u mss_cache:%u ecn_flags:%u "
+	       "rate_delivered:%u rate_interval_us:%u packets_out:%u "
+	       "retrans_out:%u total_retrans:%u segs_in:%u data_segs_in:%u "
+	       "segs_out:%u data_segs_out:%u lost_out:%u sacked_out:%u "
+	       "bytes_received:%llu bytes_acked:%llu\n",
+	       prefix,
+	       tp->snd_cwnd, tp->srtt_us, tp->rtt_min, tp->snd_ssthresh,
+	       tp->rcv_nxt, tp->snd_nxt, tp->snd_una, tp->mss_cache,
+	       tp->ecn_flags, tp->rate_delivered, tp->rate_interval_us,
+	       tp->packets_out, tp->retrans_out, tp->total_retrans,
+	       tp->segs_in, tp->data_segs_in, tp->segs_out,
+	       tp->data_segs_out, tp->lost_out, tp->sacked_out,
+	       tp->bytes_received, tp->bytes_acked);
+}
+
+static void check_result(void)
+{
+	struct bpf_tcp_sock srv_tp, cli_tp, listen_tp;
+	struct bpf_sock srv_sk, cli_sk, listen_sk;
+	__u32 ingress_linum, egress_linum;
+	int err;
+
+	err = bpf_map_lookup_elem(linum_map_fd, &egress_linum_idx,
+				  &egress_linum);
+	CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)",
+	      "err:%d errno:%d\n", err, errno);
+
+	err = bpf_map_lookup_elem(linum_map_fd, &ingress_linum_idx,
+				  &ingress_linum);
+	CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)",
+	      "err:%d errno:%d\n", err, errno);
+
+	memcpy(&srv_sk, &skel->bss->srv_sk, sizeof(srv_sk));
+	memcpy(&srv_tp, &skel->bss->srv_tp, sizeof(srv_tp));
+	memcpy(&cli_sk, &skel->bss->cli_sk, sizeof(cli_sk));
+	memcpy(&cli_tp, &skel->bss->cli_tp, sizeof(cli_tp));
+	memcpy(&listen_sk, &skel->bss->listen_sk, sizeof(listen_sk));
+	memcpy(&listen_tp, &skel->bss->listen_tp, sizeof(listen_tp));
+
+	print_sk(&listen_sk, "listen_sk");
+	print_sk(&srv_sk, "srv_sk");
+	print_sk(&cli_sk, "cli_sk");
+	print_tp(&listen_tp, "listen_tp");
+	print_tp(&srv_tp, "srv_tp");
+	print_tp(&cli_tp, "cli_tp");
+
+	CHECK(listen_sk.state != 10 ||
+	      listen_sk.family != AF_INET6 ||
+	      listen_sk.protocol != IPPROTO_TCP ||
+	      memcmp(listen_sk.src_ip6, &in6addr_loopback,
+		     sizeof(listen_sk.src_ip6)) ||
+	      listen_sk.dst_ip6[0] || listen_sk.dst_ip6[1] ||
+	      listen_sk.dst_ip6[2] || listen_sk.dst_ip6[3] ||
+	      listen_sk.src_port != ntohs(srv_sa6.sin6_port) ||
+	      listen_sk.dst_port,
+	      "listen_sk",
+	      "Unexpected. Check listen_sk output. ingress_linum:%u\n",
+	      ingress_linum);
+
+	CHECK(srv_sk.state == 10 ||
+	      !srv_sk.state ||
+	      srv_sk.family != AF_INET6 ||
+	      srv_sk.protocol != IPPROTO_TCP ||
+	      memcmp(srv_sk.src_ip6, &in6addr_loopback,
+		     sizeof(srv_sk.src_ip6)) ||
+	      memcmp(srv_sk.dst_ip6, &in6addr_loopback,
+		     sizeof(srv_sk.dst_ip6)) ||
+	      srv_sk.src_port != ntohs(srv_sa6.sin6_port) ||
+	      srv_sk.dst_port != cli_sa6.sin6_port,
+	      "srv_sk", "Unexpected. Check srv_sk output. egress_linum:%u\n",
+	      egress_linum);
+
+	CHECK(!skel->bss->lsndtime, "srv_tp", "Unexpected lsndtime:0\n");
+
+	CHECK(cli_sk.state == 10 ||
+	      !cli_sk.state ||
+	      cli_sk.family != AF_INET6 ||
+	      cli_sk.protocol != IPPROTO_TCP ||
+	      memcmp(cli_sk.src_ip6, &in6addr_loopback,
+		     sizeof(cli_sk.src_ip6)) ||
+	      memcmp(cli_sk.dst_ip6, &in6addr_loopback,
+		     sizeof(cli_sk.dst_ip6)) ||
+	      cli_sk.src_port != ntohs(cli_sa6.sin6_port) ||
+	      cli_sk.dst_port != srv_sa6.sin6_port,
+	      "cli_sk", "Unexpected. Check cli_sk output. egress_linum:%u\n",
+	      egress_linum);
+
+	CHECK(listen_tp.data_segs_out ||
+	      listen_tp.data_segs_in ||
+	      listen_tp.total_retrans ||
+	      listen_tp.bytes_acked,
+	      "listen_tp",
+	      "Unexpected. Check listen_tp output. ingress_linum:%u\n",
+	      ingress_linum);
+
+	CHECK(srv_tp.data_segs_out != 2 ||
+	      srv_tp.data_segs_in ||
+	      srv_tp.snd_cwnd != 10 ||
+	      srv_tp.total_retrans ||
+	      srv_tp.bytes_acked < 2 * DATA_LEN,
+	      "srv_tp", "Unexpected. Check srv_tp output. egress_linum:%u\n",
+	      egress_linum);
+
+	CHECK(cli_tp.data_segs_out ||
+	      cli_tp.data_segs_in != 2 ||
+	      cli_tp.snd_cwnd != 10 ||
+	      cli_tp.total_retrans ||
+	      cli_tp.bytes_received < 2 * DATA_LEN,
+	      "cli_tp", "Unexpected. Check cli_tp output. egress_linum:%u\n",
+	      egress_linum);
+
+	CHECK(skel->bss->parent_cg_id != parent_cg_id,
+	      "parent_cg_id", "%zu != %zu\n",
+	      (size_t)skel->bss->parent_cg_id, (size_t)parent_cg_id);
+
+	CHECK(skel->bss->child_cg_id != child_cg_id,
+	      "child_cg_id", "%zu != %zu\n",
+	       (size_t)skel->bss->child_cg_id, (size_t)child_cg_id);
+}
+
+static void check_sk_pkt_out_cnt(int accept_fd, int cli_fd)
+{
+	struct bpf_spinlock_cnt pkt_out_cnt = {}, pkt_out_cnt10 = {};
+	int err;
+
+	pkt_out_cnt.cnt = ~0;
+	pkt_out_cnt10.cnt = ~0;
+	err = bpf_map_lookup_elem(sk_pkt_out_cnt_fd, &accept_fd, &pkt_out_cnt);
+	if (!err)
+		err = bpf_map_lookup_elem(sk_pkt_out_cnt10_fd, &accept_fd,
+					  &pkt_out_cnt10);
+
+	/* The bpf prog only counts for fullsock and
+	 * passive connection did not become fullsock until 3WHS
+	 * had been finished, so the bpf prog only counted two data
+	 * packet out.
+	 */
+	CHECK(err || pkt_out_cnt.cnt < 0xeB9F + 2 ||
+	      pkt_out_cnt10.cnt < 0xeB9F + 20,
+	      "bpf_map_lookup_elem(sk_pkt_out_cnt, &accept_fd)",
+	      "err:%d errno:%d pkt_out_cnt:%u pkt_out_cnt10:%u\n",
+	      err, errno, pkt_out_cnt.cnt, pkt_out_cnt10.cnt);
+
+	pkt_out_cnt.cnt = ~0;
+	pkt_out_cnt10.cnt = ~0;
+	err = bpf_map_lookup_elem(sk_pkt_out_cnt_fd, &cli_fd, &pkt_out_cnt);
+	if (!err)
+		err = bpf_map_lookup_elem(sk_pkt_out_cnt10_fd, &cli_fd,
+					  &pkt_out_cnt10);
+	/* Active connection is fullsock from the beginning.
+	 * 1 SYN and 1 ACK during 3WHS
+	 * 2 Acks on data packet.
+	 *
+	 * The bpf_prog initialized it to 0xeB9F.
+	 */
+	CHECK(err || pkt_out_cnt.cnt < 0xeB9F + 4 ||
+	      pkt_out_cnt10.cnt < 0xeB9F + 40,
+	      "bpf_map_lookup_elem(sk_pkt_out_cnt, &cli_fd)",
+	      "err:%d errno:%d pkt_out_cnt:%u pkt_out_cnt10:%u\n",
+	      err, errno, pkt_out_cnt.cnt, pkt_out_cnt10.cnt);
+}
+
+static int init_sk_storage(int sk_fd, __u32 pkt_out_cnt)
+{
+	struct bpf_spinlock_cnt scnt = {};
+	int err;
+
+	scnt.cnt = pkt_out_cnt;
+	err = bpf_map_update_elem(sk_pkt_out_cnt_fd, &sk_fd, &scnt,
+				  BPF_NOEXIST);
+	if (CHECK(err, "bpf_map_update_elem(sk_pkt_out_cnt_fd)",
+		  "err:%d errno:%d\n", err, errno))
+		return err;
+
+	err = bpf_map_update_elem(sk_pkt_out_cnt10_fd, &sk_fd, &scnt,
+				  BPF_NOEXIST);
+	if (CHECK(err, "bpf_map_update_elem(sk_pkt_out_cnt10_fd)",
+		  "err:%d errno:%d\n", err, errno))
+		return err;
+
+	return 0;
+}
+
+static void test(void)
+{
+	int listen_fd = -1, cli_fd = -1, accept_fd = -1, err, i;
+	socklen_t addrlen = sizeof(struct sockaddr_in6);
+	char buf[DATA_LEN];
+
+	/* Prepare listen_fd */
+	listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0);
+	/* start_server() has logged the error details */
+	if (CHECK_FAIL(listen_fd == -1))
+		goto done;
+
+	err = getsockname(listen_fd, (struct sockaddr *)&srv_sa6, &addrlen);
+	if (CHECK(err, "getsockname(listen_fd)", "err:%d errno:%d\n", err,
+		  errno))
+		goto done;
+	memcpy(&skel->bss->srv_sa6, &srv_sa6, sizeof(srv_sa6));
+
+	cli_fd = connect_to_fd(listen_fd, 0);
+	if (CHECK_FAIL(cli_fd == -1))
+		goto done;
+
+	err = getsockname(cli_fd, (struct sockaddr *)&cli_sa6, &addrlen);
+	if (CHECK(err, "getsockname(cli_fd)", "err:%d errno:%d\n",
+		  err, errno))
+		goto done;
+
+	accept_fd = accept(listen_fd, NULL, NULL);
+	if (CHECK(accept_fd == -1, "accept(listen_fd)",
+		  "accept_fd:%d errno:%d\n",
+		  accept_fd, errno))
+		goto done;
+
+	if (init_sk_storage(accept_fd, 0xeB9F))
+		goto done;
+
+	for (i = 0; i < 2; i++) {
+		/* Send some data from accept_fd to cli_fd.
+		 * MSG_EOR to stop kernel from coalescing two pkts.
+		 */
+		err = send(accept_fd, DATA, DATA_LEN, MSG_EOR);
+		if (CHECK(err != DATA_LEN, "send(accept_fd)",
+			  "err:%d errno:%d\n", err, errno))
+			goto done;
+
+		err = recv(cli_fd, buf, DATA_LEN, 0);
+		if (CHECK(err != DATA_LEN, "recv(cli_fd)", "err:%d errno:%d\n",
+			  err, errno))
+			goto done;
+	}
+
+	shutdown(cli_fd, SHUT_WR);
+	err = recv(accept_fd, buf, 1, 0);
+	if (CHECK(err, "recv(accept_fd) for fin", "err:%d errno:%d\n",
+		  err, errno))
+		goto done;
+	shutdown(accept_fd, SHUT_WR);
+	err = recv(cli_fd, buf, 1, 0);
+	if (CHECK(err, "recv(cli_fd) for fin", "err:%d errno:%d\n",
+		  err, errno))
+		goto done;
+	check_sk_pkt_out_cnt(accept_fd, cli_fd);
+	check_result();
+
+done:
+	if (accept_fd != -1)
+		close(accept_fd);
+	if (cli_fd != -1)
+		close(cli_fd);
+	if (listen_fd != -1)
+		close(listen_fd);
+}
+
+void test_sock_fields(void)
+{
+	struct bpf_link *egress_link = NULL, *ingress_link = NULL;
+	int parent_cg_fd = -1, child_cg_fd = -1;
+
+	/* Create a cgroup, get fd, and join it */
+	parent_cg_fd = test__join_cgroup(PARENT_CGROUP);
+	if (CHECK_FAIL(parent_cg_fd < 0))
+		return;
+	parent_cg_id = get_cgroup_id(PARENT_CGROUP);
+	if (CHECK_FAIL(!parent_cg_id))
+		goto done;
+
+	child_cg_fd = test__join_cgroup(CHILD_CGROUP);
+	if (CHECK_FAIL(child_cg_fd < 0))
+		goto done;
+	child_cg_id = get_cgroup_id(CHILD_CGROUP);
+	if (CHECK_FAIL(!child_cg_id))
+		goto done;
+
+	skel = test_sock_fields__open_and_load();
+	if (CHECK(!skel, "test_sock_fields__open_and_load", "failed\n"))
+		goto done;
+
+	egress_link = bpf_program__attach_cgroup(skel->progs.egress_read_sock_fields,
+						 child_cg_fd);
+	if (CHECK(IS_ERR(egress_link), "attach_cgroup(egress)", "err:%ld\n",
+		  PTR_ERR(egress_link)))
+		goto done;
+
+	ingress_link = bpf_program__attach_cgroup(skel->progs.ingress_read_sock_fields,
+						  child_cg_fd);
+	if (CHECK(IS_ERR(ingress_link), "attach_cgroup(ingress)", "err:%ld\n",
+		  PTR_ERR(ingress_link)))
+		goto done;
+
+	linum_map_fd = bpf_map__fd(skel->maps.linum_map);
+	sk_pkt_out_cnt_fd = bpf_map__fd(skel->maps.sk_pkt_out_cnt);
+	sk_pkt_out_cnt10_fd = bpf_map__fd(skel->maps.sk_pkt_out_cnt10);
+
+	test();
+
+done:
+	bpf_link__destroy(egress_link);
+	bpf_link__destroy(ingress_link);
+	test_sock_fields__destroy(skel);
+	if (child_cg_fd != -1)
+		close(child_cg_fd);
+	if (parent_cg_fd != -1)
+		close(parent_cg_fd);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
index 96e7b7f84c65..85f73261fab0 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
@@ -4,6 +4,9 @@
 
 #include "test_progs.h"
 #include "test_skmsg_load_helpers.skel.h"
+#include "test_sockmap_update.skel.h"
+#include "test_sockmap_invalid_update.skel.h"
+#include "bpf_iter_sockmap.skel.h"
 
 #define TCP_REPAIR		19	/* TCP sock is under repair right now */
 
@@ -45,6 +48,37 @@ error:
 	return -1;
 }
 
+static void compare_cookies(struct bpf_map *src, struct bpf_map *dst)
+{
+	__u32 i, max_entries = bpf_map__max_entries(src);
+	int err, duration = 0, src_fd, dst_fd;
+
+	src_fd = bpf_map__fd(src);
+	dst_fd = bpf_map__fd(dst);
+
+	for (i = 0; i < max_entries; i++) {
+		__u64 src_cookie, dst_cookie;
+
+		err = bpf_map_lookup_elem(src_fd, &i, &src_cookie);
+		if (err && errno == ENOENT) {
+			err = bpf_map_lookup_elem(dst_fd, &i, &dst_cookie);
+			CHECK(!err, "map_lookup_elem(dst)", "element %u not deleted\n", i);
+			CHECK(err && errno != ENOENT, "map_lookup_elem(dst)", "%s\n",
+			      strerror(errno));
+			continue;
+		}
+		if (CHECK(err, "lookup_elem(src)", "%s\n", strerror(errno)))
+			continue;
+
+		err = bpf_map_lookup_elem(dst_fd, &i, &dst_cookie);
+		if (CHECK(err, "lookup_elem(dst)", "%s\n", strerror(errno)))
+			continue;
+
+		CHECK(dst_cookie != src_cookie, "cookie mismatch",
+		      "%llu != %llu (pos %u)\n", dst_cookie, src_cookie, i);
+	}
+}
+
 /* Create a map, populate it with one socket, and free the map. */
 static void test_sockmap_create_update_free(enum bpf_map_type map_type)
 {
@@ -101,6 +135,151 @@ out:
 	test_skmsg_load_helpers__destroy(skel);
 }
 
+static void test_sockmap_update(enum bpf_map_type map_type)
+{
+	struct bpf_prog_test_run_attr tattr;
+	int err, prog, src, duration = 0;
+	struct test_sockmap_update *skel;
+	struct bpf_map *dst_map;
+	const __u32 zero = 0;
+	char dummy[14] = {0};
+	__s64 sk;
+
+	sk = connected_socket_v4();
+	if (CHECK(sk == -1, "connected_socket_v4", "cannot connect\n"))
+		return;
+
+	skel = test_sockmap_update__open_and_load();
+	if (CHECK(!skel, "open_and_load", "cannot load skeleton\n"))
+		goto close_sk;
+
+	prog = bpf_program__fd(skel->progs.copy_sock_map);
+	src = bpf_map__fd(skel->maps.src);
+	if (map_type == BPF_MAP_TYPE_SOCKMAP)
+		dst_map = skel->maps.dst_sock_map;
+	else
+		dst_map = skel->maps.dst_sock_hash;
+
+	err = bpf_map_update_elem(src, &zero, &sk, BPF_NOEXIST);
+	if (CHECK(err, "update_elem(src)", "errno=%u\n", errno))
+		goto out;
+
+	tattr = (struct bpf_prog_test_run_attr){
+		.prog_fd = prog,
+		.repeat = 1,
+		.data_in = dummy,
+		.data_size_in = sizeof(dummy),
+	};
+
+	err = bpf_prog_test_run_xattr(&tattr);
+	if (CHECK_ATTR(err || !tattr.retval, "bpf_prog_test_run",
+		       "errno=%u retval=%u\n", errno, tattr.retval))
+		goto out;
+
+	compare_cookies(skel->maps.src, dst_map);
+
+out:
+	test_sockmap_update__destroy(skel);
+close_sk:
+	close(sk);
+}
+
+static void test_sockmap_invalid_update(void)
+{
+	struct test_sockmap_invalid_update *skel;
+	int duration = 0;
+
+	skel = test_sockmap_invalid_update__open_and_load();
+	if (CHECK(skel, "open_and_load", "verifier accepted map_update\n"))
+		test_sockmap_invalid_update__destroy(skel);
+}
+
+static void test_sockmap_copy(enum bpf_map_type map_type)
+{
+	DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+	int err, len, src_fd, iter_fd, duration = 0;
+	union bpf_iter_link_info linfo = {};
+	__u32 i, num_sockets, num_elems;
+	struct bpf_iter_sockmap *skel;
+	__s64 *sock_fd = NULL;
+	struct bpf_link *link;
+	struct bpf_map *src;
+	char buf[64];
+
+	skel = bpf_iter_sockmap__open_and_load();
+	if (CHECK(!skel, "bpf_iter_sockmap__open_and_load", "skeleton open_and_load failed\n"))
+		return;
+
+	if (map_type == BPF_MAP_TYPE_SOCKMAP) {
+		src = skel->maps.sockmap;
+		num_elems = bpf_map__max_entries(src);
+		num_sockets = num_elems - 1;
+	} else {
+		src = skel->maps.sockhash;
+		num_elems = bpf_map__max_entries(src) - 1;
+		num_sockets = num_elems;
+	}
+
+	sock_fd = calloc(num_sockets, sizeof(*sock_fd));
+	if (CHECK(!sock_fd, "calloc(sock_fd)", "failed to allocate\n"))
+		goto out;
+
+	for (i = 0; i < num_sockets; i++)
+		sock_fd[i] = -1;
+
+	src_fd = bpf_map__fd(src);
+
+	for (i = 0; i < num_sockets; i++) {
+		sock_fd[i] = connected_socket_v4();
+		if (CHECK(sock_fd[i] == -1, "connected_socket_v4", "cannot connect\n"))
+			goto out;
+
+		err = bpf_map_update_elem(src_fd, &i, &sock_fd[i], BPF_NOEXIST);
+		if (CHECK(err, "map_update", "failed: %s\n", strerror(errno)))
+			goto out;
+	}
+
+	linfo.map.map_fd = src_fd;
+	opts.link_info = &linfo;
+	opts.link_info_len = sizeof(linfo);
+	link = bpf_program__attach_iter(skel->progs.copy, &opts);
+	if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+		goto out;
+
+	iter_fd = bpf_iter_create(bpf_link__fd(link));
+	if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+		goto free_link;
+
+	/* do some tests */
+	while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
+		;
+	if (CHECK(len < 0, "read", "failed: %s\n", strerror(errno)))
+		goto close_iter;
+
+	/* test results */
+	if (CHECK(skel->bss->elems != num_elems, "elems", "got %u expected %u\n",
+		  skel->bss->elems, num_elems))
+		goto close_iter;
+
+	if (CHECK(skel->bss->socks != num_sockets, "socks", "got %u expected %u\n",
+		  skel->bss->socks, num_sockets))
+		goto close_iter;
+
+	compare_cookies(src, skel->maps.dst);
+
+close_iter:
+	close(iter_fd);
+free_link:
+	bpf_link__destroy(link);
+out:
+	for (i = 0; sock_fd && i < num_sockets; i++)
+		if (sock_fd[i] >= 0)
+			close(sock_fd[i]);
+	if (sock_fd)
+		free(sock_fd);
+	bpf_iter_sockmap__destroy(skel);
+}
+
 void test_sockmap_basic(void)
 {
 	if (test__start_subtest("sockmap create_update_free"))
@@ -111,4 +290,14 @@ void test_sockmap_basic(void)
 		test_skmsg_helpers(BPF_MAP_TYPE_SOCKMAP);
 	if (test__start_subtest("sockhash sk_msg load helpers"))
 		test_skmsg_helpers(BPF_MAP_TYPE_SOCKHASH);
+	if (test__start_subtest("sockmap update"))
+		test_sockmap_update(BPF_MAP_TYPE_SOCKMAP);
+	if (test__start_subtest("sockhash update"))
+		test_sockmap_update(BPF_MAP_TYPE_SOCKHASH);
+	if (test__start_subtest("sockmap update in unsafe context"))
+		test_sockmap_invalid_update();
+	if (test__start_subtest("sockmap copy"))
+		test_sockmap_copy(BPF_MAP_TYPE_SOCKMAP);
+	if (test__start_subtest("sockhash copy"))
+		test_sockmap_copy(BPF_MAP_TYPE_SOCKHASH);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c
index 5f54c6aec7f0..b25c9c45c148 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c
@@ -45,9 +45,9 @@ static int getsetsockopt(void)
 		goto err;
 	}
 
-	if (*(int *)big_buf != 0x08) {
+	if (*big_buf != 0x08) {
 		log_err("Unexpected getsockopt(IP_TOS) optval 0x%x != 0x08",
-			*(int *)big_buf);
+			(int)*big_buf);
 		goto err;
 	}
 
diff --git a/tools/testing/selftests/bpf/prog_tests/subprogs.c b/tools/testing/selftests/bpf/prog_tests/subprogs.c
new file mode 100644
index 000000000000..a00abf58c037
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/subprogs.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include <test_progs.h>
+#include <time.h>
+#include "test_subprogs.skel.h"
+
+static int duration;
+
+void test_subprogs(void)
+{
+	struct test_subprogs *skel;
+	int err;
+
+	skel = test_subprogs__open_and_load();
+	if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+		return;
+
+	err = test_subprogs__attach(skel);
+	if (CHECK(err, "skel_attach", "failed to attach skeleton: %d\n", err))
+		goto cleanup;
+
+	usleep(1);
+
+	CHECK(skel->bss->res1 != 12, "res1", "got %d, exp %d\n", skel->bss->res1, 12);
+	CHECK(skel->bss->res2 != 17, "res2", "got %d, exp %d\n", skel->bss->res2, 17);
+	CHECK(skel->bss->res3 != 19, "res3", "got %d, exp %d\n", skel->bss->res3, 19);
+	CHECK(skel->bss->res4 != 36, "res4", "got %d, exp %d\n", skel->bss->res4, 36);
+
+cleanup:
+	test_subprogs__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/tailcalls.c b/tools/testing/selftests/bpf/prog_tests/tailcalls.c
index bb8fe646dd9f..ee27d68d2a1c 100644
--- a/tools/testing/selftests/bpf/prog_tests/tailcalls.c
+++ b/tools/testing/selftests/bpf/prog_tests/tailcalls.c
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <test_progs.h>
+#include <network_helpers.h>
 
 /* test_tailcall_1 checks basic functionality by patching multiple locations
  * in a single program for a single tail call slot with nop->jmp, jmp->nop
@@ -472,6 +473,329 @@ out:
 	bpf_object__close(obj);
 }
 
+/* test_tailcall_bpf2bpf_1 purpose is to make sure that tailcalls are working
+ * correctly in correlation with BPF subprograms
+ */
+static void test_tailcall_bpf2bpf_1(void)
+{
+	int err, map_fd, prog_fd, main_fd, i;
+	struct bpf_map *prog_array;
+	struct bpf_program *prog;
+	struct bpf_object *obj;
+	__u32 retval, duration;
+	char prog_name[32];
+
+	err = bpf_prog_load("tailcall_bpf2bpf1.o", BPF_PROG_TYPE_SCHED_CLS,
+			    &obj, &prog_fd);
+	if (CHECK_FAIL(err))
+		return;
+
+	prog = bpf_object__find_program_by_title(obj, "classifier");
+	if (CHECK_FAIL(!prog))
+		goto out;
+
+	main_fd = bpf_program__fd(prog);
+	if (CHECK_FAIL(main_fd < 0))
+		goto out;
+
+	prog_array = bpf_object__find_map_by_name(obj, "jmp_table");
+	if (CHECK_FAIL(!prog_array))
+		goto out;
+
+	map_fd = bpf_map__fd(prog_array);
+	if (CHECK_FAIL(map_fd < 0))
+		goto out;
+
+	/* nop -> jmp */
+	for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+		snprintf(prog_name, sizeof(prog_name), "classifier/%i", i);
+
+		prog = bpf_object__find_program_by_title(obj, prog_name);
+		if (CHECK_FAIL(!prog))
+			goto out;
+
+		prog_fd = bpf_program__fd(prog);
+		if (CHECK_FAIL(prog_fd < 0))
+			goto out;
+
+		err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY);
+		if (CHECK_FAIL(err))
+			goto out;
+	}
+
+	err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0,
+				0, &retval, &duration);
+	CHECK(err || retval != 1, "tailcall",
+	      "err %d errno %d retval %d\n", err, errno, retval);
+
+	/* jmp -> nop, call subprog that will do tailcall */
+	i = 1;
+	err = bpf_map_delete_elem(map_fd, &i);
+	if (CHECK_FAIL(err))
+		goto out;
+
+	err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0,
+				0, &retval, &duration);
+	CHECK(err || retval != 0, "tailcall", "err %d errno %d retval %d\n",
+	      err, errno, retval);
+
+	/* make sure that subprog can access ctx and entry prog that
+	 * called this subprog can properly return
+	 */
+	i = 0;
+	err = bpf_map_delete_elem(map_fd, &i);
+	if (CHECK_FAIL(err))
+		goto out;
+
+	err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0,
+				0, &retval, &duration);
+	CHECK(err || retval != sizeof(pkt_v4) * 2,
+	      "tailcall", "err %d errno %d retval %d\n",
+	      err, errno, retval);
+out:
+	bpf_object__close(obj);
+}
+
+/* test_tailcall_bpf2bpf_2 checks that the count value of the tail call limit
+ * enforcement matches with expectations when tailcall is preceded with
+ * bpf2bpf call.
+ */
+static void test_tailcall_bpf2bpf_2(void)
+{
+	int err, map_fd, prog_fd, main_fd, data_fd, i, val;
+	struct bpf_map *prog_array, *data_map;
+	struct bpf_program *prog;
+	struct bpf_object *obj;
+	__u32 retval, duration;
+	char buff[128] = {};
+
+	err = bpf_prog_load("tailcall_bpf2bpf2.o", BPF_PROG_TYPE_SCHED_CLS,
+			    &obj, &prog_fd);
+	if (CHECK_FAIL(err))
+		return;
+
+	prog = bpf_object__find_program_by_title(obj, "classifier");
+	if (CHECK_FAIL(!prog))
+		goto out;
+
+	main_fd = bpf_program__fd(prog);
+	if (CHECK_FAIL(main_fd < 0))
+		goto out;
+
+	prog_array = bpf_object__find_map_by_name(obj, "jmp_table");
+	if (CHECK_FAIL(!prog_array))
+		goto out;
+
+	map_fd = bpf_map__fd(prog_array);
+	if (CHECK_FAIL(map_fd < 0))
+		goto out;
+
+	prog = bpf_object__find_program_by_title(obj, "classifier/0");
+	if (CHECK_FAIL(!prog))
+		goto out;
+
+	prog_fd = bpf_program__fd(prog);
+	if (CHECK_FAIL(prog_fd < 0))
+		goto out;
+
+	i = 0;
+	err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY);
+	if (CHECK_FAIL(err))
+		goto out;
+
+	err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
+				&duration, &retval, NULL);
+	CHECK(err || retval != 1, "tailcall", "err %d errno %d retval %d\n",
+	      err, errno, retval);
+
+	data_map = bpf_object__find_map_by_name(obj, "tailcall.bss");
+	if (CHECK_FAIL(!data_map || !bpf_map__is_internal(data_map)))
+		return;
+
+	data_fd = bpf_map__fd(data_map);
+	if (CHECK_FAIL(map_fd < 0))
+		return;
+
+	i = 0;
+	err = bpf_map_lookup_elem(data_fd, &i, &val);
+	CHECK(err || val != 33, "tailcall count", "err %d errno %d count %d\n",
+	      err, errno, val);
+
+	i = 0;
+	err = bpf_map_delete_elem(map_fd, &i);
+	if (CHECK_FAIL(err))
+		goto out;
+
+	err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
+				&duration, &retval, NULL);
+	CHECK(err || retval != 0, "tailcall", "err %d errno %d retval %d\n",
+	      err, errno, retval);
+out:
+	bpf_object__close(obj);
+}
+
+/* test_tailcall_bpf2bpf_3 checks that non-trivial amount of stack (up to
+ * 256 bytes) can be used within bpf subprograms that have the tailcalls
+ * in them
+ */
+static void test_tailcall_bpf2bpf_3(void)
+{
+	int err, map_fd, prog_fd, main_fd, i;
+	struct bpf_map *prog_array;
+	struct bpf_program *prog;
+	struct bpf_object *obj;
+	__u32 retval, duration;
+	char prog_name[32];
+
+	err = bpf_prog_load("tailcall_bpf2bpf3.o", BPF_PROG_TYPE_SCHED_CLS,
+			    &obj, &prog_fd);
+	if (CHECK_FAIL(err))
+		return;
+
+	prog = bpf_object__find_program_by_title(obj, "classifier");
+	if (CHECK_FAIL(!prog))
+		goto out;
+
+	main_fd = bpf_program__fd(prog);
+	if (CHECK_FAIL(main_fd < 0))
+		goto out;
+
+	prog_array = bpf_object__find_map_by_name(obj, "jmp_table");
+	if (CHECK_FAIL(!prog_array))
+		goto out;
+
+	map_fd = bpf_map__fd(prog_array);
+	if (CHECK_FAIL(map_fd < 0))
+		goto out;
+
+	for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+		snprintf(prog_name, sizeof(prog_name), "classifier/%i", i);
+
+		prog = bpf_object__find_program_by_title(obj, prog_name);
+		if (CHECK_FAIL(!prog))
+			goto out;
+
+		prog_fd = bpf_program__fd(prog);
+		if (CHECK_FAIL(prog_fd < 0))
+			goto out;
+
+		err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY);
+		if (CHECK_FAIL(err))
+			goto out;
+	}
+
+	err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0,
+				&duration, &retval, NULL);
+	CHECK(err || retval != sizeof(pkt_v4) * 3,
+	      "tailcall", "err %d errno %d retval %d\n",
+	      err, errno, retval);
+
+	i = 1;
+	err = bpf_map_delete_elem(map_fd, &i);
+	if (CHECK_FAIL(err))
+		goto out;
+
+	err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0,
+				&duration, &retval, NULL);
+	CHECK(err || retval != sizeof(pkt_v4),
+	      "tailcall", "err %d errno %d retval %d\n",
+	      err, errno, retval);
+
+	i = 0;
+	err = bpf_map_delete_elem(map_fd, &i);
+	if (CHECK_FAIL(err))
+		goto out;
+
+	err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0,
+				&duration, &retval, NULL);
+	CHECK(err || retval != sizeof(pkt_v4) * 2,
+	      "tailcall", "err %d errno %d retval %d\n",
+	      err, errno, retval);
+out:
+	bpf_object__close(obj);
+}
+
+/* test_tailcall_bpf2bpf_4 checks that tailcall counter is correctly preserved
+ * across tailcalls combined with bpf2bpf calls. for making sure that tailcall
+ * counter behaves correctly, bpf program will go through following flow:
+ *
+ * entry -> entry_subprog -> tailcall0 -> bpf_func0 -> subprog0 ->
+ * -> tailcall1 -> bpf_func1 -> subprog1 -> tailcall2 -> bpf_func2 ->
+ * subprog2 [here bump global counter] --------^
+ *
+ * We go through first two tailcalls and start counting from the subprog2 where
+ * the loop begins. At the end of the test make sure that the global counter is
+ * equal to 31, because tailcall counter includes the first two tailcalls
+ * whereas global counter is incremented only on loop presented on flow above.
+ */
+static void test_tailcall_bpf2bpf_4(void)
+{
+	int err, map_fd, prog_fd, main_fd, data_fd, i, val;
+	struct bpf_map *prog_array, *data_map;
+	struct bpf_program *prog;
+	struct bpf_object *obj;
+	__u32 retval, duration;
+	char prog_name[32];
+
+	err = bpf_prog_load("tailcall_bpf2bpf4.o", BPF_PROG_TYPE_SCHED_CLS,
+			    &obj, &prog_fd);
+	if (CHECK_FAIL(err))
+		return;
+
+	prog = bpf_object__find_program_by_title(obj, "classifier");
+	if (CHECK_FAIL(!prog))
+		goto out;
+
+	main_fd = bpf_program__fd(prog);
+	if (CHECK_FAIL(main_fd < 0))
+		goto out;
+
+	prog_array = bpf_object__find_map_by_name(obj, "jmp_table");
+	if (CHECK_FAIL(!prog_array))
+		goto out;
+
+	map_fd = bpf_map__fd(prog_array);
+	if (CHECK_FAIL(map_fd < 0))
+		goto out;
+
+	for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+		snprintf(prog_name, sizeof(prog_name), "classifier/%i", i);
+
+		prog = bpf_object__find_program_by_title(obj, prog_name);
+		if (CHECK_FAIL(!prog))
+			goto out;
+
+		prog_fd = bpf_program__fd(prog);
+		if (CHECK_FAIL(prog_fd < 0))
+			goto out;
+
+		err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY);
+		if (CHECK_FAIL(err))
+			goto out;
+	}
+
+	err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0,
+				&duration, &retval, NULL);
+	CHECK(err || retval != sizeof(pkt_v4) * 3, "tailcall", "err %d errno %d retval %d\n",
+	      err, errno, retval);
+
+	data_map = bpf_object__find_map_by_name(obj, "tailcall.bss");
+	if (CHECK_FAIL(!data_map || !bpf_map__is_internal(data_map)))
+		return;
+
+	data_fd = bpf_map__fd(data_map);
+	if (CHECK_FAIL(map_fd < 0))
+		return;
+
+	i = 0;
+	err = bpf_map_lookup_elem(data_fd, &i, &val);
+	CHECK(err || val != 31, "tailcall count", "err %d errno %d count %d\n",
+	      err, errno, val);
+
+out:
+	bpf_object__close(obj);
+}
+
 void test_tailcalls(void)
 {
 	if (test__start_subtest("tailcall_1"))
@@ -484,4 +808,12 @@ void test_tailcalls(void)
 		test_tailcall_4();
 	if (test__start_subtest("tailcall_5"))
 		test_tailcall_5();
+	if (test__start_subtest("tailcall_bpf2bpf_1"))
+		test_tailcall_bpf2bpf_1();
+	if (test__start_subtest("tailcall_bpf2bpf_2"))
+		test_tailcall_bpf2bpf_2();
+	if (test__start_subtest("tailcall_bpf2bpf_3"))
+		test_tailcall_bpf2bpf_3();
+	if (test__start_subtest("tailcall_bpf2bpf_4"))
+		test_tailcall_bpf2bpf_4();
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c
new file mode 100644
index 000000000000..c85174cdcb77
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c
@@ -0,0 +1,610 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#define _GNU_SOURCE
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/socket.h>
+#include <linux/compiler.h>
+
+#include "test_progs.h"
+#include "cgroup_helpers.h"
+#include "network_helpers.h"
+#include "test_tcp_hdr_options.h"
+#include "test_tcp_hdr_options.skel.h"
+#include "test_misc_tcp_hdr_options.skel.h"
+
+#define LO_ADDR6 "::1"
+#define CG_NAME "/tcpbpf-hdr-opt-test"
+
+struct bpf_test_option exp_passive_estab_in;
+struct bpf_test_option exp_active_estab_in;
+struct bpf_test_option exp_passive_fin_in;
+struct bpf_test_option exp_active_fin_in;
+struct hdr_stg exp_passive_hdr_stg;
+struct hdr_stg exp_active_hdr_stg = { .active = true, };
+
+static struct test_misc_tcp_hdr_options *misc_skel;
+static struct test_tcp_hdr_options *skel;
+static int lport_linum_map_fd;
+static int hdr_stg_map_fd;
+static __u32 duration;
+static int cg_fd;
+
+struct sk_fds {
+	int srv_fd;
+	int passive_fd;
+	int active_fd;
+	int passive_lport;
+	int active_lport;
+};
+
+static int create_netns(void)
+{
+	if (CHECK(unshare(CLONE_NEWNET), "create netns",
+		  "unshare(CLONE_NEWNET): %s (%d)",
+		  strerror(errno), errno))
+		return -1;
+
+	if (CHECK(system("ip link set dev lo up"), "run ip cmd",
+		  "failed to bring lo link up\n"))
+		return -1;
+
+	return 0;
+}
+
+static int write_sysctl(const char *sysctl, const char *value)
+{
+	int fd, err, len;
+
+	fd = open(sysctl, O_WRONLY);
+	if (CHECK(fd == -1, "open sysctl", "open(%s): %s (%d)\n",
+		  sysctl, strerror(errno), errno))
+		return -1;
+
+	len = strlen(value);
+	err = write(fd, value, len);
+	close(fd);
+	if (CHECK(err != len, "write sysctl",
+		  "write(%s, %s): err:%d %s (%d)\n",
+		  sysctl, value, err, strerror(errno), errno))
+		return -1;
+
+	return 0;
+}
+
+static void print_hdr_stg(const struct hdr_stg *hdr_stg, const char *prefix)
+{
+	fprintf(stderr, "%s{active:%u, resend_syn:%u, syncookie:%u, fastopen:%u}\n",
+		prefix ? : "", hdr_stg->active, hdr_stg->resend_syn,
+		hdr_stg->syncookie, hdr_stg->fastopen);
+}
+
+static void print_option(const struct bpf_test_option *opt, const char *prefix)
+{
+	fprintf(stderr, "%s{flags:0x%x, max_delack_ms:%u, rand:0x%x}\n",
+		prefix ? : "", opt->flags, opt->max_delack_ms, opt->rand);
+}
+
+static void sk_fds_close(struct sk_fds *sk_fds)
+{
+	close(sk_fds->srv_fd);
+	close(sk_fds->passive_fd);
+	close(sk_fds->active_fd);
+}
+
+static int sk_fds_shutdown(struct sk_fds *sk_fds)
+{
+	int ret, abyte;
+
+	shutdown(sk_fds->active_fd, SHUT_WR);
+	ret = read(sk_fds->passive_fd, &abyte, sizeof(abyte));
+	if (CHECK(ret != 0, "read-after-shutdown(passive_fd):",
+		  "ret:%d %s (%d)\n",
+		  ret, strerror(errno), errno))
+		return -1;
+
+	shutdown(sk_fds->passive_fd, SHUT_WR);
+	ret = read(sk_fds->active_fd, &abyte, sizeof(abyte));
+	if (CHECK(ret != 0, "read-after-shutdown(active_fd):",
+		  "ret:%d %s (%d)\n",
+		  ret, strerror(errno), errno))
+		return -1;
+
+	return 0;
+}
+
+static int sk_fds_connect(struct sk_fds *sk_fds, bool fast_open)
+{
+	const char fast[] = "FAST!!!";
+	struct sockaddr_in6 addr6;
+	socklen_t len;
+
+	sk_fds->srv_fd = start_server(AF_INET6, SOCK_STREAM, LO_ADDR6, 0, 0);
+	if (CHECK(sk_fds->srv_fd == -1, "start_server", "%s (%d)\n",
+		  strerror(errno), errno))
+		goto error;
+
+	if (fast_open)
+		sk_fds->active_fd = fastopen_connect(sk_fds->srv_fd, fast,
+						     sizeof(fast), 0);
+	else
+		sk_fds->active_fd = connect_to_fd(sk_fds->srv_fd, 0);
+
+	if (CHECK_FAIL(sk_fds->active_fd == -1)) {
+		close(sk_fds->srv_fd);
+		goto error;
+	}
+
+	len = sizeof(addr6);
+	if (CHECK(getsockname(sk_fds->srv_fd, (struct sockaddr *)&addr6,
+			      &len), "getsockname(srv_fd)", "%s (%d)\n",
+		  strerror(errno), errno))
+		goto error_close;
+	sk_fds->passive_lport = ntohs(addr6.sin6_port);
+
+	len = sizeof(addr6);
+	if (CHECK(getsockname(sk_fds->active_fd, (struct sockaddr *)&addr6,
+			      &len), "getsockname(active_fd)", "%s (%d)\n",
+		  strerror(errno), errno))
+		goto error_close;
+	sk_fds->active_lport = ntohs(addr6.sin6_port);
+
+	sk_fds->passive_fd = accept(sk_fds->srv_fd, NULL, 0);
+	if (CHECK(sk_fds->passive_fd == -1, "accept(srv_fd)", "%s (%d)\n",
+		  strerror(errno), errno))
+		goto error_close;
+
+	if (fast_open) {
+		char bytes_in[sizeof(fast)];
+		int ret;
+
+		ret = read(sk_fds->passive_fd, bytes_in, sizeof(bytes_in));
+		if (CHECK(ret != sizeof(fast), "read fastopen syn data",
+			  "expected=%lu actual=%d\n", sizeof(fast), ret)) {
+			close(sk_fds->passive_fd);
+			goto error_close;
+		}
+	}
+
+	return 0;
+
+error_close:
+	close(sk_fds->active_fd);
+	close(sk_fds->srv_fd);
+
+error:
+	memset(sk_fds, -1, sizeof(*sk_fds));
+	return -1;
+}
+
+static int check_hdr_opt(const struct bpf_test_option *exp,
+			 const struct bpf_test_option *act,
+			 const char *hdr_desc)
+{
+	if (CHECK(memcmp(exp, act, sizeof(*exp)),
+		  "expected-vs-actual", "unexpected %s\n", hdr_desc)) {
+		print_option(exp, "expected: ");
+		print_option(act, "  actual: ");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int check_hdr_stg(const struct hdr_stg *exp, int fd,
+			 const char *stg_desc)
+{
+	struct hdr_stg act;
+
+	if (CHECK(bpf_map_lookup_elem(hdr_stg_map_fd, &fd, &act),
+		  "map_lookup(hdr_stg_map_fd)", "%s %s (%d)\n",
+		  stg_desc, strerror(errno), errno))
+		return -1;
+
+	if (CHECK(memcmp(exp, &act, sizeof(*exp)),
+		  "expected-vs-actual", "unexpected %s\n", stg_desc)) {
+		print_hdr_stg(exp, "expected: ");
+		print_hdr_stg(&act, "  actual: ");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int check_error_linum(const struct sk_fds *sk_fds)
+{
+	unsigned int nr_errors = 0;
+	struct linum_err linum_err;
+	int lport;
+
+	lport = sk_fds->passive_lport;
+	if (!bpf_map_lookup_elem(lport_linum_map_fd, &lport, &linum_err)) {
+		fprintf(stderr,
+			"bpf prog error out at lport:passive(%d), linum:%u err:%d\n",
+			lport, linum_err.linum, linum_err.err);
+		nr_errors++;
+	}
+
+	lport = sk_fds->active_lport;
+	if (!bpf_map_lookup_elem(lport_linum_map_fd, &lport, &linum_err)) {
+		fprintf(stderr,
+			"bpf prog error out at lport:active(%d), linum:%u err:%d\n",
+			lport, linum_err.linum, linum_err.err);
+		nr_errors++;
+	}
+
+	return nr_errors;
+}
+
+static void check_hdr_and_close_fds(struct sk_fds *sk_fds)
+{
+	const __u32 expected_inherit_cb_flags =
+		BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG |
+		BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG |
+		BPF_SOCK_OPS_STATE_CB_FLAG;
+
+	if (sk_fds_shutdown(sk_fds))
+		goto check_linum;
+
+	if (CHECK(expected_inherit_cb_flags != skel->bss->inherit_cb_flags,
+		  "Unexpected inherit_cb_flags", "0x%x != 0x%x\n",
+		  skel->bss->inherit_cb_flags, expected_inherit_cb_flags))
+		goto check_linum;
+
+	if (check_hdr_stg(&exp_passive_hdr_stg, sk_fds->passive_fd,
+			  "passive_hdr_stg"))
+		goto check_linum;
+
+	if (check_hdr_stg(&exp_active_hdr_stg, sk_fds->active_fd,
+			  "active_hdr_stg"))
+		goto check_linum;
+
+	if (check_hdr_opt(&exp_passive_estab_in, &skel->bss->passive_estab_in,
+			  "passive_estab_in"))
+		goto check_linum;
+
+	if (check_hdr_opt(&exp_active_estab_in, &skel->bss->active_estab_in,
+			  "active_estab_in"))
+		goto check_linum;
+
+	if (check_hdr_opt(&exp_passive_fin_in, &skel->bss->passive_fin_in,
+			  "passive_fin_in"))
+		goto check_linum;
+
+	check_hdr_opt(&exp_active_fin_in, &skel->bss->active_fin_in,
+		      "active_fin_in");
+
+check_linum:
+	CHECK_FAIL(check_error_linum(sk_fds));
+	sk_fds_close(sk_fds);
+}
+
+static void prepare_out(void)
+{
+	skel->bss->active_syn_out = exp_passive_estab_in;
+	skel->bss->passive_synack_out = exp_active_estab_in;
+
+	skel->bss->active_fin_out = exp_passive_fin_in;
+	skel->bss->passive_fin_out = exp_active_fin_in;
+}
+
+static void reset_test(void)
+{
+	size_t optsize = sizeof(struct bpf_test_option);
+	int lport, err;
+
+	memset(&skel->bss->passive_synack_out, 0, optsize);
+	memset(&skel->bss->passive_fin_out, 0, optsize);
+
+	memset(&skel->bss->passive_estab_in, 0, optsize);
+	memset(&skel->bss->passive_fin_in, 0, optsize);
+
+	memset(&skel->bss->active_syn_out, 0, optsize);
+	memset(&skel->bss->active_fin_out, 0, optsize);
+
+	memset(&skel->bss->active_estab_in, 0, optsize);
+	memset(&skel->bss->active_fin_in, 0, optsize);
+
+	skel->bss->inherit_cb_flags = 0;
+
+	skel->data->test_kind = TCPOPT_EXP;
+	skel->data->test_magic = 0xeB9F;
+
+	memset(&exp_passive_estab_in, 0, optsize);
+	memset(&exp_active_estab_in, 0, optsize);
+	memset(&exp_passive_fin_in, 0, optsize);
+	memset(&exp_active_fin_in, 0, optsize);
+
+	memset(&exp_passive_hdr_stg, 0, sizeof(exp_passive_hdr_stg));
+	memset(&exp_active_hdr_stg, 0, sizeof(exp_active_hdr_stg));
+	exp_active_hdr_stg.active = true;
+
+	err = bpf_map_get_next_key(lport_linum_map_fd, NULL, &lport);
+	while (!err) {
+		bpf_map_delete_elem(lport_linum_map_fd, &lport);
+		err = bpf_map_get_next_key(lport_linum_map_fd, &lport, &lport);
+	}
+}
+
+static void fastopen_estab(void)
+{
+	struct bpf_link *link;
+	struct sk_fds sk_fds;
+
+	hdr_stg_map_fd = bpf_map__fd(skel->maps.hdr_stg_map);
+	lport_linum_map_fd = bpf_map__fd(skel->maps.lport_linum_map);
+
+	exp_passive_estab_in.flags = OPTION_F_RAND | OPTION_F_MAX_DELACK_MS;
+	exp_passive_estab_in.rand = 0xfa;
+	exp_passive_estab_in.max_delack_ms = 11;
+
+	exp_active_estab_in.flags = OPTION_F_RAND | OPTION_F_MAX_DELACK_MS;
+	exp_active_estab_in.rand = 0xce;
+	exp_active_estab_in.max_delack_ms = 22;
+
+	exp_passive_hdr_stg.fastopen = true;
+
+	prepare_out();
+
+	/* Allow fastopen without fastopen cookie */
+	if (write_sysctl("/proc/sys/net/ipv4/tcp_fastopen", "1543"))
+		return;
+
+	link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd);
+	if (CHECK(IS_ERR(link), "attach_cgroup(estab)", "err: %ld\n",
+		  PTR_ERR(link)))
+		return;
+
+	if (sk_fds_connect(&sk_fds, true)) {
+		bpf_link__destroy(link);
+		return;
+	}
+
+	check_hdr_and_close_fds(&sk_fds);
+	bpf_link__destroy(link);
+}
+
+static void syncookie_estab(void)
+{
+	struct bpf_link *link;
+	struct sk_fds sk_fds;
+
+	hdr_stg_map_fd = bpf_map__fd(skel->maps.hdr_stg_map);
+	lport_linum_map_fd = bpf_map__fd(skel->maps.lport_linum_map);
+
+	exp_passive_estab_in.flags = OPTION_F_RAND | OPTION_F_MAX_DELACK_MS;
+	exp_passive_estab_in.rand = 0xfa;
+	exp_passive_estab_in.max_delack_ms = 11;
+
+	exp_active_estab_in.flags = OPTION_F_RAND | OPTION_F_MAX_DELACK_MS |
+					OPTION_F_RESEND;
+	exp_active_estab_in.rand = 0xce;
+	exp_active_estab_in.max_delack_ms = 22;
+
+	exp_passive_hdr_stg.syncookie = true;
+	exp_active_hdr_stg.resend_syn = true,
+
+	prepare_out();
+
+	/* Clear the RESEND to ensure the bpf prog can learn
+	 * want_cookie and set the RESEND by itself.
+	 */
+	skel->bss->passive_synack_out.flags &= ~OPTION_F_RESEND;
+
+	/* Enforce syncookie mode */
+	if (write_sysctl("/proc/sys/net/ipv4/tcp_syncookies", "2"))
+		return;
+
+	link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd);
+	if (CHECK(IS_ERR(link), "attach_cgroup(estab)", "err: %ld\n",
+		  PTR_ERR(link)))
+		return;
+
+	if (sk_fds_connect(&sk_fds, false)) {
+		bpf_link__destroy(link);
+		return;
+	}
+
+	check_hdr_and_close_fds(&sk_fds);
+	bpf_link__destroy(link);
+}
+
+static void fin(void)
+{
+	struct bpf_link *link;
+	struct sk_fds sk_fds;
+
+	hdr_stg_map_fd = bpf_map__fd(skel->maps.hdr_stg_map);
+	lport_linum_map_fd = bpf_map__fd(skel->maps.lport_linum_map);
+
+	exp_passive_fin_in.flags = OPTION_F_RAND;
+	exp_passive_fin_in.rand = 0xfa;
+
+	exp_active_fin_in.flags = OPTION_F_RAND;
+	exp_active_fin_in.rand = 0xce;
+
+	prepare_out();
+
+	if (write_sysctl("/proc/sys/net/ipv4/tcp_syncookies", "1"))
+		return;
+
+	link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd);
+	if (CHECK(IS_ERR(link), "attach_cgroup(estab)", "err: %ld\n",
+		  PTR_ERR(link)))
+		return;
+
+	if (sk_fds_connect(&sk_fds, false)) {
+		bpf_link__destroy(link);
+		return;
+	}
+
+	check_hdr_and_close_fds(&sk_fds);
+	bpf_link__destroy(link);
+}
+
+static void __simple_estab(bool exprm)
+{
+	struct bpf_link *link;
+	struct sk_fds sk_fds;
+
+	hdr_stg_map_fd = bpf_map__fd(skel->maps.hdr_stg_map);
+	lport_linum_map_fd = bpf_map__fd(skel->maps.lport_linum_map);
+
+	exp_passive_estab_in.flags = OPTION_F_RAND | OPTION_F_MAX_DELACK_MS;
+	exp_passive_estab_in.rand = 0xfa;
+	exp_passive_estab_in.max_delack_ms = 11;
+
+	exp_active_estab_in.flags = OPTION_F_RAND | OPTION_F_MAX_DELACK_MS;
+	exp_active_estab_in.rand = 0xce;
+	exp_active_estab_in.max_delack_ms = 22;
+
+	prepare_out();
+
+	if (!exprm) {
+		skel->data->test_kind = 0xB9;
+		skel->data->test_magic = 0;
+	}
+
+	if (write_sysctl("/proc/sys/net/ipv4/tcp_syncookies", "1"))
+		return;
+
+	link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd);
+	if (CHECK(IS_ERR(link), "attach_cgroup(estab)", "err: %ld\n",
+		  PTR_ERR(link)))
+		return;
+
+	if (sk_fds_connect(&sk_fds, false)) {
+		bpf_link__destroy(link);
+		return;
+	}
+
+	check_hdr_and_close_fds(&sk_fds);
+	bpf_link__destroy(link);
+}
+
+static void no_exprm_estab(void)
+{
+	__simple_estab(false);
+}
+
+static void simple_estab(void)
+{
+	__simple_estab(true);
+}
+
+static void misc(void)
+{
+	const char send_msg[] = "MISC!!!";
+	char recv_msg[sizeof(send_msg)];
+	const unsigned int nr_data = 2;
+	struct bpf_link *link;
+	struct sk_fds sk_fds;
+	int i, ret;
+
+	lport_linum_map_fd = bpf_map__fd(misc_skel->maps.lport_linum_map);
+
+	if (write_sysctl("/proc/sys/net/ipv4/tcp_syncookies", "1"))
+		return;
+
+	link = bpf_program__attach_cgroup(misc_skel->progs.misc_estab, cg_fd);
+	if (CHECK(IS_ERR(link), "attach_cgroup(misc_estab)", "err: %ld\n",
+		  PTR_ERR(link)))
+		return;
+
+	if (sk_fds_connect(&sk_fds, false)) {
+		bpf_link__destroy(link);
+		return;
+	}
+
+	for (i = 0; i < nr_data; i++) {
+		/* MSG_EOR to ensure skb will not be combined */
+		ret = send(sk_fds.active_fd, send_msg, sizeof(send_msg),
+			   MSG_EOR);
+		if (CHECK(ret != sizeof(send_msg), "send(msg)", "ret:%d\n",
+			  ret))
+			goto check_linum;
+
+		ret = read(sk_fds.passive_fd, recv_msg, sizeof(recv_msg));
+		if (CHECK(ret != sizeof(send_msg), "read(msg)", "ret:%d\n",
+			  ret))
+			goto check_linum;
+	}
+
+	if (sk_fds_shutdown(&sk_fds))
+		goto check_linum;
+
+	CHECK(misc_skel->bss->nr_syn != 1, "unexpected nr_syn",
+	      "expected (1) != actual (%u)\n",
+		misc_skel->bss->nr_syn);
+
+	CHECK(misc_skel->bss->nr_data != nr_data, "unexpected nr_data",
+	      "expected (%u) != actual (%u)\n",
+	      nr_data, misc_skel->bss->nr_data);
+
+	/* The last ACK may have been delayed, so it is either 1 or 2. */
+	CHECK(misc_skel->bss->nr_pure_ack != 1 &&
+	      misc_skel->bss->nr_pure_ack != 2,
+	      "unexpected nr_pure_ack",
+	      "expected (1 or 2) != actual (%u)\n",
+		misc_skel->bss->nr_pure_ack);
+
+	CHECK(misc_skel->bss->nr_fin != 1, "unexpected nr_fin",
+	      "expected (1) != actual (%u)\n",
+	      misc_skel->bss->nr_fin);
+
+check_linum:
+	CHECK_FAIL(check_error_linum(&sk_fds));
+	sk_fds_close(&sk_fds);
+	bpf_link__destroy(link);
+}
+
+struct test {
+	const char *desc;
+	void (*run)(void);
+};
+
+#define DEF_TEST(name) { #name, name }
+static struct test tests[] = {
+	DEF_TEST(simple_estab),
+	DEF_TEST(no_exprm_estab),
+	DEF_TEST(syncookie_estab),
+	DEF_TEST(fastopen_estab),
+	DEF_TEST(fin),
+	DEF_TEST(misc),
+};
+
+void test_tcp_hdr_options(void)
+{
+	int i;
+
+	skel = test_tcp_hdr_options__open_and_load();
+	if (CHECK(!skel, "open and load skel", "failed"))
+		return;
+
+	misc_skel = test_misc_tcp_hdr_options__open_and_load();
+	if (CHECK(!misc_skel, "open and load misc test skel", "failed"))
+		goto skel_destroy;
+
+	cg_fd = test__join_cgroup(CG_NAME);
+	if (CHECK_FAIL(cg_fd < 0))
+		goto skel_destroy;
+
+	for (i = 0; i < ARRAY_SIZE(tests); i++) {
+		if (!test__start_subtest(tests[i].desc))
+			continue;
+
+		if (create_netns())
+			break;
+
+		tests[i].run();
+
+		reset_test();
+	}
+
+	close(cg_fd);
+skel_destroy:
+	test_misc_tcp_hdr_options__destroy(misc_skel);
+	test_tcp_hdr_options__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_bpffs.c b/tools/testing/selftests/bpf/prog_tests/test_bpffs.c
new file mode 100644
index 000000000000..172c999e523c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_bpffs.c
@@ -0,0 +1,94 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#define _GNU_SOURCE
+#include <sched.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <test_progs.h>
+
+#define TDIR "/sys/kernel/debug"
+
+static int read_iter(char *file)
+{
+	/* 1024 should be enough to get contiguous 4 "iter" letters at some point */
+	char buf[1024];
+	int fd, len;
+
+	fd = open(file, 0);
+	if (fd < 0)
+		return -1;
+	while ((len = read(fd, buf, sizeof(buf))) > 0)
+		if (strstr(buf, "iter")) {
+			close(fd);
+			return 0;
+		}
+	close(fd);
+	return -1;
+}
+
+static int fn(void)
+{
+	int err, duration = 0;
+
+	err = unshare(CLONE_NEWNS);
+	if (CHECK(err, "unshare", "failed: %d\n", errno))
+		goto out;
+
+	err = mount("", "/", "", MS_REC | MS_PRIVATE, NULL);
+	if (CHECK(err, "mount /", "failed: %d\n", errno))
+		goto out;
+
+	err = umount(TDIR);
+	if (CHECK(err, "umount " TDIR, "failed: %d\n", errno))
+		goto out;
+
+	err = mount("none", TDIR, "tmpfs", 0, NULL);
+	if (CHECK(err, "mount", "mount root failed: %d\n", errno))
+		goto out;
+
+	err = mkdir(TDIR "/fs1", 0777);
+	if (CHECK(err, "mkdir "TDIR"/fs1", "failed: %d\n", errno))
+		goto out;
+	err = mkdir(TDIR "/fs2", 0777);
+	if (CHECK(err, "mkdir "TDIR"/fs2", "failed: %d\n", errno))
+		goto out;
+
+	err = mount("bpf", TDIR "/fs1", "bpf", 0, NULL);
+	if (CHECK(err, "mount bpffs "TDIR"/fs1", "failed: %d\n", errno))
+		goto out;
+	err = mount("bpf", TDIR "/fs2", "bpf", 0, NULL);
+	if (CHECK(err, "mount bpffs " TDIR "/fs2", "failed: %d\n", errno))
+		goto out;
+
+	err = read_iter(TDIR "/fs1/maps.debug");
+	if (CHECK(err, "reading " TDIR "/fs1/maps.debug", "failed\n"))
+		goto out;
+	err = read_iter(TDIR "/fs2/progs.debug");
+	if (CHECK(err, "reading " TDIR "/fs2/progs.debug", "failed\n"))
+		goto out;
+out:
+	umount(TDIR "/fs1");
+	umount(TDIR "/fs2");
+	rmdir(TDIR "/fs1");
+	rmdir(TDIR "/fs2");
+	umount(TDIR);
+	exit(err);
+}
+
+void test_test_bpffs(void)
+{
+	int err, duration = 0, status = 0;
+	pid_t pid;
+
+	pid = fork();
+	if (CHECK(pid == -1, "clone", "clone failed %d", errno))
+		return;
+	if (pid == 0)
+		fn();
+	err = waitpid(pid, &status, 0);
+	if (CHECK(err == -1 && errno != ECHILD, "waitpid", "failed %d", errno))
+		return;
+	if (CHECK(WEXITSTATUS(status), "bpffs test ", "failed %d", WEXITSTATUS(status)))
+		return;
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_local_storage.c b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c
new file mode 100644
index 000000000000..91cd6f357246
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright (C) 2020 Google LLC.
+ */
+
+#include <test_progs.h>
+#include <linux/limits.h>
+
+#include "local_storage.skel.h"
+#include "network_helpers.h"
+
+int create_and_unlink_file(void)
+{
+	char fname[PATH_MAX] = "/tmp/fileXXXXXX";
+	int fd;
+
+	fd = mkstemp(fname);
+	if (fd < 0)
+		return fd;
+
+	close(fd);
+	unlink(fname);
+	return 0;
+}
+
+void test_test_local_storage(void)
+{
+	struct local_storage *skel = NULL;
+	int err, duration = 0, serv_sk = -1;
+
+	skel = local_storage__open_and_load();
+	if (CHECK(!skel, "skel_load", "lsm skeleton failed\n"))
+		goto close_prog;
+
+	err = local_storage__attach(skel);
+	if (CHECK(err, "attach", "lsm attach failed: %d\n", err))
+		goto close_prog;
+
+	skel->bss->monitored_pid = getpid();
+
+	err = create_and_unlink_file();
+	if (CHECK(err < 0, "exec_cmd", "err %d errno %d\n", err, errno))
+		goto close_prog;
+
+	CHECK(skel->data->inode_storage_result != 0, "inode_storage_result",
+	      "inode_local_storage not set\n");
+
+	serv_sk = start_server(AF_INET6, SOCK_STREAM, NULL, 0, 0);
+	if (CHECK(serv_sk < 0, "start_server", "failed to start server\n"))
+		goto close_prog;
+
+	CHECK(skel->data->sk_storage_result != 0, "sk_storage_result",
+	      "sk_local_storage not set\n");
+
+	close(serv_sk);
+
+close_prog:
+	local_storage__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_lsm.c b/tools/testing/selftests/bpf/prog_tests/test_lsm.c
index b17eb2045c1d..6ab29226c99b 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_lsm.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_lsm.c
@@ -10,6 +10,7 @@
 #include <unistd.h>
 #include <malloc.h>
 #include <stdlib.h>
+#include <unistd.h>
 
 #include "lsm.skel.h"
 
@@ -55,6 +56,7 @@ void test_test_lsm(void)
 {
 	struct lsm *skel = NULL;
 	int err, duration = 0;
+	int buf = 1234;
 
 	skel = lsm__open_and_load();
 	if (CHECK(!skel, "skel_load", "lsm skeleton failed\n"))
@@ -81,6 +83,13 @@ void test_test_lsm(void)
 	CHECK(skel->bss->mprotect_count != 1, "mprotect_count",
 	      "mprotect_count = %d\n", skel->bss->mprotect_count);
 
+	syscall(__NR_setdomainname, &buf, -2L);
+	syscall(__NR_setdomainname, 0, -3L);
+	syscall(__NR_setdomainname, ~0L, -4L);
+
+	CHECK(skel->bss->copy_test != 3, "copy_test",
+	      "copy_test = %d\n", skel->bss->copy_test);
+
 close_prog:
 	lsm__destroy(skel);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/test_overhead.c b/tools/testing/selftests/bpf/prog_tests/test_overhead.c
index 2702df2b2343..9966685866fd 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_overhead.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_overhead.c
@@ -61,10 +61,9 @@ void test_test_overhead(void)
 	const char *raw_tp_name = "raw_tp/task_rename";
 	const char *fentry_name = "fentry/__set_task_comm";
 	const char *fexit_name = "fexit/__set_task_comm";
-	const char *fmodret_name = "fmod_ret/__set_task_comm";
 	const char *kprobe_func = "__set_task_comm";
 	struct bpf_program *kprobe_prog, *kretprobe_prog, *raw_tp_prog;
-	struct bpf_program *fentry_prog, *fexit_prog, *fmodret_prog;
+	struct bpf_program *fentry_prog, *fexit_prog;
 	struct bpf_object *obj;
 	struct bpf_link *link;
 	int err, duration = 0;
@@ -97,11 +96,6 @@ void test_test_overhead(void)
 	if (CHECK(!fexit_prog, "find_probe",
 		  "prog '%s' not found\n", fexit_name))
 		goto cleanup;
-	fmodret_prog = bpf_object__find_program_by_title(obj, fmodret_name);
-	if (CHECK(!fmodret_prog, "find_probe",
-		  "prog '%s' not found\n", fmodret_name))
-		goto cleanup;
-
 	err = bpf_object__load(obj);
 	if (CHECK(err, "obj_load", "err %d\n", err))
 		goto cleanup;
@@ -148,12 +142,6 @@ void test_test_overhead(void)
 	test_run("fexit");
 	bpf_link__destroy(link);
 
-	/* attach fmod_ret */
-	link = bpf_program__attach_trace(fmodret_prog);
-	if (CHECK(IS_ERR(link), "attach fmod_ret", "err %ld\n", PTR_ERR(link)))
-		goto cleanup;
-	test_run("fmod_ret");
-	bpf_link__destroy(link);
 cleanup:
 	prctl(PR_SET_NAME, comm, 0L, 0L, 0L);
 	bpf_object__close(obj);
diff --git a/tools/testing/selftests/bpf/prog_tests/test_profiler.c b/tools/testing/selftests/bpf/prog_tests/test_profiler.c
new file mode 100644
index 000000000000..4ca275101ee0
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_profiler.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include <test_progs.h>
+#include "progs/profiler.h"
+#include "profiler1.skel.h"
+#include "profiler2.skel.h"
+#include "profiler3.skel.h"
+
+static int sanity_run(struct bpf_program *prog)
+{
+	struct bpf_prog_test_run_attr test_attr = {};
+	__u64 args[] = {1, 2, 3};
+	__u32 duration = 0;
+	int err, prog_fd;
+
+	prog_fd = bpf_program__fd(prog);
+	test_attr.prog_fd = prog_fd;
+	test_attr.ctx_in = args;
+	test_attr.ctx_size_in = sizeof(args);
+	err = bpf_prog_test_run_xattr(&test_attr);
+	if (CHECK(err || test_attr.retval, "test_run",
+		  "err %d errno %d retval %d duration %d\n",
+		  err, errno, test_attr.retval, duration))
+		return -1;
+	return 0;
+}
+
+void test_test_profiler(void)
+{
+	struct profiler1 *profiler1_skel = NULL;
+	struct profiler2 *profiler2_skel = NULL;
+	struct profiler3 *profiler3_skel = NULL;
+	__u32 duration = 0;
+	int err;
+
+	profiler1_skel = profiler1__open_and_load();
+	if (CHECK(!profiler1_skel, "profiler1_skel_load", "profiler1 skeleton failed\n"))
+		goto cleanup;
+
+	err = profiler1__attach(profiler1_skel);
+	if (CHECK(err, "profiler1_attach", "profiler1 attach failed: %d\n", err))
+		goto cleanup;
+
+	if (sanity_run(profiler1_skel->progs.raw_tracepoint__sched_process_exec))
+		goto cleanup;
+
+	profiler2_skel = profiler2__open_and_load();
+	if (CHECK(!profiler2_skel, "profiler2_skel_load", "profiler2 skeleton failed\n"))
+		goto cleanup;
+
+	err = profiler2__attach(profiler2_skel);
+	if (CHECK(err, "profiler2_attach", "profiler2 attach failed: %d\n", err))
+		goto cleanup;
+
+	if (sanity_run(profiler2_skel->progs.raw_tracepoint__sched_process_exec))
+		goto cleanup;
+
+	profiler3_skel = profiler3__open_and_load();
+	if (CHECK(!profiler3_skel, "profiler3_skel_load", "profiler3 skeleton failed\n"))
+		goto cleanup;
+
+	err = profiler3__attach(profiler3_skel);
+	if (CHECK(err, "profiler3_attach", "profiler3 attach failed: %d\n", err))
+		goto cleanup;
+
+	if (sanity_run(profiler3_skel->progs.raw_tracepoint__sched_process_exec))
+		goto cleanup;
+cleanup:
+	profiler1__destroy(profiler1_skel);
+	profiler2__destroy(profiler2_skel);
+	profiler3__destroy(profiler3_skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/trace_ext.c b/tools/testing/selftests/bpf/prog_tests/trace_ext.c
new file mode 100644
index 000000000000..924441d4362d
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/trace_ext.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <test_progs.h>
+#include <network_helpers.h>
+#include <sys/stat.h>
+#include <linux/sched.h>
+#include <sys/syscall.h>
+
+#include "test_pkt_md_access.skel.h"
+#include "test_trace_ext.skel.h"
+#include "test_trace_ext_tracing.skel.h"
+
+static __u32 duration;
+
+void test_trace_ext(void)
+{
+	struct test_pkt_md_access *skel_pkt = NULL;
+	struct test_trace_ext_tracing *skel_trace = NULL;
+	struct test_trace_ext_tracing__bss *bss_trace;
+	struct test_trace_ext *skel_ext = NULL;
+	struct test_trace_ext__bss *bss_ext;
+	int err, pkt_fd, ext_fd;
+	struct bpf_program *prog;
+	char buf[100];
+	__u32 retval;
+	__u64 len;
+
+	/* open/load/attach test_pkt_md_access */
+	skel_pkt = test_pkt_md_access__open_and_load();
+	if (CHECK(!skel_pkt, "setup", "classifier/test_pkt_md_access open failed\n"))
+		goto cleanup;
+
+	err = test_pkt_md_access__attach(skel_pkt);
+	if (CHECK(err, "setup", "classifier/test_pkt_md_access attach failed: %d\n", err))
+		goto cleanup;
+
+	prog = skel_pkt->progs.test_pkt_md_access;
+	pkt_fd = bpf_program__fd(prog);
+
+	/* open extension */
+	skel_ext = test_trace_ext__open();
+	if (CHECK(!skel_ext, "setup", "freplace/test_pkt_md_access open failed\n"))
+		goto cleanup;
+
+	/* set extension's attach target - test_pkt_md_access  */
+	prog = skel_ext->progs.test_pkt_md_access_new;
+	bpf_program__set_attach_target(prog, pkt_fd, "test_pkt_md_access");
+
+	/* load/attach extension */
+	err = test_trace_ext__load(skel_ext);
+	if (CHECK(err, "setup", "freplace/test_pkt_md_access load failed\n")) {
+		libbpf_strerror(err, buf, sizeof(buf));
+		fprintf(stderr, "%s\n", buf);
+		goto cleanup;
+	}
+
+	err = test_trace_ext__attach(skel_ext);
+	if (CHECK(err, "setup", "freplace/test_pkt_md_access attach failed: %d\n", err))
+		goto cleanup;
+
+	prog = skel_ext->progs.test_pkt_md_access_new;
+	ext_fd = bpf_program__fd(prog);
+
+	/* open tracing  */
+	skel_trace = test_trace_ext_tracing__open();
+	if (CHECK(!skel_trace, "setup", "tracing/test_pkt_md_access_new open failed\n"))
+		goto cleanup;
+
+	/* set tracing's attach target - fentry */
+	prog = skel_trace->progs.fentry;
+	bpf_program__set_attach_target(prog, ext_fd, "test_pkt_md_access_new");
+
+	/* set tracing's attach target - fexit */
+	prog = skel_trace->progs.fexit;
+	bpf_program__set_attach_target(prog, ext_fd, "test_pkt_md_access_new");
+
+	/* load/attach tracing */
+	err = test_trace_ext_tracing__load(skel_trace);
+	if (CHECK(err, "setup", "tracing/test_pkt_md_access_new load failed\n")) {
+		libbpf_strerror(err, buf, sizeof(buf));
+		fprintf(stderr, "%s\n", buf);
+		goto cleanup;
+	}
+
+	err = test_trace_ext_tracing__attach(skel_trace);
+	if (CHECK(err, "setup", "tracing/test_pkt_md_access_new attach failed: %d\n", err))
+		goto cleanup;
+
+	/* trigger the test */
+	err = bpf_prog_test_run(pkt_fd, 1, &pkt_v4, sizeof(pkt_v4),
+				NULL, NULL, &retval, &duration);
+	CHECK(err || retval, "run", "err %d errno %d retval %d\n", err, errno, retval);
+
+	bss_ext = skel_ext->bss;
+	bss_trace = skel_trace->bss;
+
+	len = bss_ext->ext_called;
+
+	CHECK(bss_ext->ext_called == 0,
+		"check", "failed to trigger freplace/test_pkt_md_access\n");
+	CHECK(bss_trace->fentry_called != len,
+		"check", "failed to trigger fentry/test_pkt_md_access_new\n");
+	CHECK(bss_trace->fexit_called != len,
+		"check", "failed to trigger fexit/test_pkt_md_access_new\n");
+
+cleanup:
+	test_trace_ext_tracing__destroy(skel_trace);
+	test_trace_ext__destroy(skel_ext);
+	test_pkt_md_access__destroy(skel_pkt);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c b/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c
index f284f72158ef..0281095de266 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c
@@ -1,11 +1,12 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <test_progs.h>
 #include <network_helpers.h>
+#include "test_xdp_noinline.skel.h"
 
 void test_xdp_noinline(void)
 {
-	const char *file = "./test_xdp_noinline.o";
 	unsigned int nr_cpus = bpf_num_possible_cpus();
+	struct test_xdp_noinline *skel;
 	struct vip key = {.protocol = 6};
 	struct vip_meta {
 		__u32 flags;
@@ -24,59 +25,43 @@ void test_xdp_noinline(void)
 		__u8 flags;
 	} real_def = {.dst = MAGIC_VAL};
 	__u32 ch_key = 11, real_num = 3;
-	__u32 duration, retval, size;
-	int err, i, prog_fd, map_fd;
+	__u32 duration = 0, retval, size;
+	int err, i;
 	__u64 bytes = 0, pkts = 0;
-	struct bpf_object *obj;
 	char buf[128];
 	u32 *magic = (u32 *)buf;
 
-	err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
-	if (CHECK_FAIL(err))
+	skel = test_xdp_noinline__open_and_load();
+	if (CHECK(!skel, "skel_open_and_load", "failed\n"))
 		return;
 
-	map_fd = bpf_find_map(__func__, obj, "vip_map");
-	if (map_fd < 0)
-		goto out;
-	bpf_map_update_elem(map_fd, &key, &value, 0);
+	bpf_map_update_elem(bpf_map__fd(skel->maps.vip_map), &key, &value, 0);
+	bpf_map_update_elem(bpf_map__fd(skel->maps.ch_rings), &ch_key, &real_num, 0);
+	bpf_map_update_elem(bpf_map__fd(skel->maps.reals), &real_num, &real_def, 0);
 
-	map_fd = bpf_find_map(__func__, obj, "ch_rings");
-	if (map_fd < 0)
-		goto out;
-	bpf_map_update_elem(map_fd, &ch_key, &real_num, 0);
-
-	map_fd = bpf_find_map(__func__, obj, "reals");
-	if (map_fd < 0)
-		goto out;
-	bpf_map_update_elem(map_fd, &real_num, &real_def, 0);
-
-	err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v4, sizeof(pkt_v4),
+	err = bpf_prog_test_run(bpf_program__fd(skel->progs.balancer_ingress_v4),
+				NUM_ITER, &pkt_v4, sizeof(pkt_v4),
 				buf, &size, &retval, &duration);
 	CHECK(err || retval != 1 || size != 54 ||
 	      *magic != MAGIC_VAL, "ipv4",
 	      "err %d errno %d retval %d size %d magic %x\n",
 	      err, errno, retval, size, *magic);
 
-	err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v6, sizeof(pkt_v6),
+	err = bpf_prog_test_run(bpf_program__fd(skel->progs.balancer_ingress_v6),
+				NUM_ITER, &pkt_v6, sizeof(pkt_v6),
 				buf, &size, &retval, &duration);
 	CHECK(err || retval != 1 || size != 74 ||
 	      *magic != MAGIC_VAL, "ipv6",
 	      "err %d errno %d retval %d size %d magic %x\n",
 	      err, errno, retval, size, *magic);
 
-	map_fd = bpf_find_map(__func__, obj, "stats");
-	if (map_fd < 0)
-		goto out;
-	bpf_map_lookup_elem(map_fd, &stats_key, stats);
+	bpf_map_lookup_elem(bpf_map__fd(skel->maps.stats), &stats_key, stats);
 	for (i = 0; i < nr_cpus; i++) {
 		bytes += stats[i].bytes;
 		pkts += stats[i].pkts;
 	}
-	if (CHECK_FAIL(bytes != MAGIC_BYTES * NUM_ITER * 2 ||
-		       pkts != NUM_ITER * 2)) {
-		printf("test_xdp_noinline:FAIL:stats %lld %lld\n",
-		       bytes, pkts);
-	}
-out:
-	bpf_object__close(obj);
+	CHECK(bytes != MAGIC_BYTES * NUM_ITER * 2 || pkts != NUM_ITER * 2,
+	      "stats", "bytes %lld pkts %lld\n",
+	      (unsigned long long)bytes, (unsigned long long)pkts);
+	test_xdp_noinline__destroy(skel);
 }
diff --git a/tools/testing/selftests/bpf/progs/bpf_cubic.c b/tools/testing/selftests/bpf/progs/bpf_cubic.c
index ef574087f1e1..6939bfd8690f 100644
--- a/tools/testing/selftests/bpf/progs/bpf_cubic.c
+++ b/tools/testing/selftests/bpf/progs/bpf_cubic.c
@@ -15,6 +15,8 @@
  */
 
 #include <linux/bpf.h>
+#include <linux/stddef.h>
+#include <linux/tcp.h>
 #include "bpf_tcp_helpers.h"
 
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_dctcp.c b/tools/testing/selftests/bpf/progs/bpf_dctcp.c
index 3fb4260570b1..4dc1a967776a 100644
--- a/tools/testing/selftests/bpf/progs/bpf_dctcp.c
+++ b/tools/testing/selftests/bpf/progs/bpf_dctcp.c
@@ -9,6 +9,8 @@
 #include <stddef.h>
 #include <linux/bpf.h>
 #include <linux/types.h>
+#include <linux/stddef.h>
+#include <linux/tcp.h>
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
 #include "bpf_tcp_helpers.h"
diff --git a/tools/testing/selftests/bpf/progs/bpf_flow.c b/tools/testing/selftests/bpf/progs/bpf_flow.c
index de6de9221518..5a65f6b51377 100644
--- a/tools/testing/selftests/bpf/progs/bpf_flow.c
+++ b/tools/testing/selftests/bpf/progs/bpf_flow.c
@@ -118,18 +118,18 @@ static __always_inline int parse_eth_proto(struct __sk_buff *skb, __be16 proto)
 
 	switch (proto) {
 	case bpf_htons(ETH_P_IP):
-		bpf_tail_call(skb, &jmp_table, IP);
+		bpf_tail_call_static(skb, &jmp_table, IP);
 		break;
 	case bpf_htons(ETH_P_IPV6):
-		bpf_tail_call(skb, &jmp_table, IPV6);
+		bpf_tail_call_static(skb, &jmp_table, IPV6);
 		break;
 	case bpf_htons(ETH_P_MPLS_MC):
 	case bpf_htons(ETH_P_MPLS_UC):
-		bpf_tail_call(skb, &jmp_table, MPLS);
+		bpf_tail_call_static(skb, &jmp_table, MPLS);
 		break;
 	case bpf_htons(ETH_P_8021Q):
 	case bpf_htons(ETH_P_8021AD):
-		bpf_tail_call(skb, &jmp_table, VLAN);
+		bpf_tail_call_static(skb, &jmp_table, VLAN);
 		break;
 	default:
 		/* Protocol not supported */
@@ -246,10 +246,10 @@ static __always_inline int parse_ipv6_proto(struct __sk_buff *skb, __u8 nexthdr)
 	switch (nexthdr) {
 	case IPPROTO_HOPOPTS:
 	case IPPROTO_DSTOPTS:
-		bpf_tail_call(skb, &jmp_table, IPV6OP);
+		bpf_tail_call_static(skb, &jmp_table, IPV6OP);
 		break;
 	case IPPROTO_FRAGMENT:
-		bpf_tail_call(skb, &jmp_table, IPV6FR);
+		bpf_tail_call_static(skb, &jmp_table, IPV6FR);
 		break;
 	default:
 		return parse_ip_proto(skb, nexthdr);
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter.h b/tools/testing/selftests/bpf/progs/bpf_iter.h
index c196280df90d..6a1255465fd6 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter.h
+++ b/tools/testing/selftests/bpf/progs/bpf_iter.h
@@ -13,6 +13,12 @@
 #define udp6_sock udp6_sock___not_used
 #define bpf_iter__bpf_map_elem bpf_iter__bpf_map_elem___not_used
 #define bpf_iter__bpf_sk_storage_map bpf_iter__bpf_sk_storage_map___not_used
+#define bpf_iter__sockmap bpf_iter__sockmap___not_used
+#define btf_ptr btf_ptr___not_used
+#define BTF_F_COMPACT BTF_F_COMPACT___not_used
+#define BTF_F_NONAME BTF_F_NONAME___not_used
+#define BTF_F_PTR_RAW BTF_F_PTR_RAW___not_used
+#define BTF_F_ZERO BTF_F_ZERO___not_used
 #include "vmlinux.h"
 #undef bpf_iter_meta
 #undef bpf_iter__bpf_map
@@ -26,6 +32,12 @@
 #undef udp6_sock
 #undef bpf_iter__bpf_map_elem
 #undef bpf_iter__bpf_sk_storage_map
+#undef bpf_iter__sockmap
+#undef btf_ptr
+#undef BTF_F_COMPACT
+#undef BTF_F_NONAME
+#undef BTF_F_PTR_RAW
+#undef BTF_F_ZERO
 
 struct bpf_iter_meta {
 	struct seq_file *seq;
@@ -96,3 +108,23 @@ struct bpf_iter__bpf_sk_storage_map {
 	struct sock *sk;
 	void *value;
 };
+
+struct bpf_iter__sockmap {
+	struct bpf_iter_meta *meta;
+	struct bpf_map *map;
+	void *key;
+	struct sock *sk;
+};
+
+struct btf_ptr {
+	void *ptr;
+	__u32 type_id;
+	__u32 flags;
+};
+
+enum {
+	BTF_F_COMPACT	=	(1ULL << 0),
+	BTF_F_NONAME	=	(1ULL << 1),
+	BTF_F_PTR_RAW	=	(1ULL << 2),
+	BTF_F_ZERO	=	(1ULL << 3),
+};
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c
index 07ddbfdbcab7..6dfce3fd68bc 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c
@@ -47,7 +47,10 @@ int dump_bpf_hash_map(struct bpf_iter__bpf_map_elem *ctx)
 	__u32 seq_num = ctx->meta->seq_num;
 	struct bpf_map *map = ctx->map;
 	struct key_t *key = ctx->key;
+	struct key_t tmp_key;
 	__u64 *val = ctx->value;
+	__u64 tmp_val = 0;
+	int ret;
 
 	if (in_test_mode) {
 		/* test mode is used by selftests to
@@ -61,6 +64,18 @@ int dump_bpf_hash_map(struct bpf_iter__bpf_map_elem *ctx)
 		if (key == (void *)0 || val == (void *)0)
 			return 0;
 
+		/* update the value and then delete the <key, value> pair.
+		 * it should not impact the existing 'val' which is still
+		 * accessible under rcu.
+		 */
+		__builtin_memcpy(&tmp_key, key, sizeof(struct key_t));
+		ret = bpf_map_update_elem(&hashmap1, &tmp_key, &tmp_val, 0);
+		if (ret)
+			return 0;
+		ret = bpf_map_delete_elem(&hashmap1, &tmp_key);
+		if (ret)
+			return 0;
+
 		key_sum_a += key->a;
 		key_sum_b += key->b;
 		key_sum_c += key->c;
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_sockmap.c b/tools/testing/selftests/bpf/progs/bpf_iter_sockmap.c
new file mode 100644
index 000000000000..f3af0e30cead
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_sockmap.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Cloudflare */
+#include "bpf_iter.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <errno.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+	__uint(type, BPF_MAP_TYPE_SOCKMAP);
+	__uint(max_entries, 64);
+	__type(key, __u32);
+	__type(value, __u64);
+} sockmap SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_SOCKHASH);
+	__uint(max_entries, 64);
+	__type(key, __u32);
+	__type(value, __u64);
+} sockhash SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_SOCKHASH);
+	__uint(max_entries, 64);
+	__type(key, __u32);
+	__type(value, __u64);
+} dst SEC(".maps");
+
+__u32 elems = 0;
+__u32 socks = 0;
+
+SEC("iter/sockmap")
+int copy(struct bpf_iter__sockmap *ctx)
+{
+	struct sock *sk = ctx->sk;
+	__u32 tmp, *key = ctx->key;
+	int ret;
+
+	if (!key)
+		return 0;
+
+	elems++;
+
+	/* We need a temporary buffer on the stack, since the verifier doesn't
+	 * let us use the pointer from the context as an argument to the helper.
+	 */
+	tmp = *key;
+
+	if (sk) {
+		socks++;
+		return bpf_map_update_elem(&dst, &tmp, sk, 0) != 0;
+	}
+
+	ret = bpf_map_delete_elem(&dst, &tmp);
+	return ret && ret != -ENOENT;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_btf.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_btf.c
new file mode 100644
index 000000000000..a1ddc36f13ec
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_btf.c
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020, Oracle and/or its affiliates. */
+#include "bpf_iter.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+#include <errno.h>
+
+char _license[] SEC("license") = "GPL";
+
+long tasks = 0;
+long seq_err = 0;
+bool skip = false;
+
+SEC("iter/task")
+int dump_task_struct(struct bpf_iter__task *ctx)
+{
+	struct seq_file *seq = ctx->meta->seq;
+	struct task_struct *task = ctx->task;
+	static struct btf_ptr ptr = { };
+	long ret;
+
+#if __has_builtin(__builtin_btf_type_id)
+	ptr.type_id = bpf_core_type_id_kernel(struct task_struct);
+	ptr.ptr = task;
+
+	if (ctx->meta->seq_num == 0)
+		BPF_SEQ_PRINTF(seq, "Raw BTF task\n");
+
+	ret = bpf_seq_printf_btf(seq, &ptr, sizeof(ptr), 0);
+	switch (ret) {
+	case 0:
+		tasks++;
+		break;
+	case -ERANGE:
+		/* NULL task or task->fs, don't count it as an error. */
+		break;
+	case -E2BIG:
+		return 1;
+	default:
+		seq_err = ret;
+		break;
+	}
+#else
+	skip = true;
+#endif
+
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c
index 8b787baa2654..b2f7c7c5f952 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c
@@ -6,6 +6,9 @@
 
 char _license[] SEC("license") = "GPL";
 
+int count = 0;
+int tgid = 0;
+
 SEC("iter/task_file")
 int dump_task_file(struct bpf_iter__task_file *ctx)
 {
@@ -17,8 +20,13 @@ int dump_task_file(struct bpf_iter__task_file *ctx)
 	if (task == (void *)0 || file == (void *)0)
 		return 0;
 
-	if (ctx->meta->seq_num == 0)
+	if (ctx->meta->seq_num == 0) {
+		count = 0;
 		BPF_SEQ_PRINTF(seq, "    tgid      gid       fd      file\n");
+	}
+
+	if (tgid == task->tgid && task->tgid != task->pid)
+		count++;
 
 	BPF_SEQ_PRINTF(seq, "%8d %8d %8d %lx\n", task->tgid, task->pid, fd,
 		       (long)file->f_op);
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval.c
new file mode 100644
index 000000000000..48e62f3f074f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_enumval x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___diff.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___diff.c
new file mode 100644
index 000000000000..53e5e5a76888
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___diff.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_enumval___diff x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___err_missing.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___err_missing.c
new file mode 100644
index 000000000000..d024fb2ac06e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___err_missing.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_enumval___err_missing x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___val3_missing.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___val3_missing.c
new file mode 100644
index 000000000000..9de6595d250c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___val3_missing.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_enumval___val3_missing x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_size___err_ambiguous.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_size___err_ambiguous.c
new file mode 100644
index 000000000000..f3e9904df9c2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_size___err_ambiguous.c
@@ -0,0 +1,4 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_size___err_ambiguous1 x,
+       struct core_reloc_size___err_ambiguous2 y) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based.c
new file mode 100644
index 000000000000..fc3f69e58c71
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_type_based x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___all_missing.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___all_missing.c
new file mode 100644
index 000000000000..51511648b4ec
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___all_missing.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_type_based___all_missing x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___diff_sz.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___diff_sz.c
new file mode 100644
index 000000000000..67db3dceb279
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___diff_sz.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_type_based___diff_sz x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___fn_wrong_args.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___fn_wrong_args.c
new file mode 100644
index 000000000000..b357fc65431d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___fn_wrong_args.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_type_based___fn_wrong_args x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___incompat.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___incompat.c
new file mode 100644
index 000000000000..8ddf20d33d9e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___incompat.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_type_based___incompat x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_type_id.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_id.c
new file mode 100644
index 000000000000..abbe5bddcefd
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_id.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_type_id x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_type_id___missing_targets.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_id___missing_targets.c
new file mode 100644
index 000000000000..24e7caf4f013
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_id___missing_targets.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_type_id___missing_targets x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf_ptr.h b/tools/testing/selftests/bpf/progs/btf_ptr.h
new file mode 100644
index 000000000000..c3c9797c67db
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf_ptr.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2020, Oracle and/or its affiliates. */
+/* "undefine" structs in vmlinux.h, because we "override" them below */
+#define btf_ptr btf_ptr___not_used
+#define BTF_F_COMPACT BTF_F_COMPACT___not_used
+#define BTF_F_NONAME BTF_F_NONAME___not_used
+#define BTF_F_PTR_RAW BTF_F_PTR_RAW___not_used
+#define BTF_F_ZERO BTF_F_ZERO___not_used
+#include "vmlinux.h"
+#undef btf_ptr
+#undef BTF_F_COMPACT
+#undef BTF_F_NONAME
+#undef BTF_F_PTR_RAW
+#undef BTF_F_ZERO
+
+struct btf_ptr {
+	void *ptr;
+	__u32 type_id;
+	__u32 flags;
+};
+
+enum {
+	BTF_F_COMPACT	=	(1ULL << 0),
+	BTF_F_NONAME	=	(1ULL << 1),
+	BTF_F_PTR_RAW	=	(1ULL << 2),
+	BTF_F_ZERO	=	(1ULL << 3),
+};
diff --git a/tools/testing/selftests/bpf/progs/connect4_prog.c b/tools/testing/selftests/bpf/progs/connect4_prog.c
index b1b2773c0b9d..a943d394fd3a 100644
--- a/tools/testing/selftests/bpf/progs/connect4_prog.c
+++ b/tools/testing/selftests/bpf/progs/connect4_prog.c
@@ -23,6 +23,10 @@
 #define TCP_CA_NAME_MAX 16
 #endif
 
+#ifndef TCP_NOTSENT_LOWAT
+#define TCP_NOTSENT_LOWAT 25
+#endif
+
 #ifndef IFNAMSIZ
 #define IFNAMSIZ 16
 #endif
@@ -128,6 +132,18 @@ static __inline int set_keepalive(struct bpf_sock_addr *ctx)
 	return 0;
 }
 
+static __inline int set_notsent_lowat(struct bpf_sock_addr *ctx)
+{
+	int lowat = 65535;
+
+	if (ctx->type == SOCK_STREAM) {
+		if (bpf_setsockopt(ctx, SOL_TCP, TCP_NOTSENT_LOWAT, &lowat, sizeof(lowat)))
+			return 1;
+	}
+
+	return 0;
+}
+
 SEC("cgroup/connect4")
 int connect_v4_prog(struct bpf_sock_addr *ctx)
 {
@@ -148,6 +164,9 @@ int connect_v4_prog(struct bpf_sock_addr *ctx)
 	if (set_keepalive(ctx))
 		return 0;
 
+	if (set_notsent_lowat(ctx))
+		return 0;
+
 	if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM)
 		return 0;
 	else if (ctx->type == SOCK_STREAM)
diff --git a/tools/testing/selftests/bpf/progs/core_reloc_types.h b/tools/testing/selftests/bpf/progs/core_reloc_types.h
index 69139ed66216..e6e616cb7bc9 100644
--- a/tools/testing/selftests/bpf/progs/core_reloc_types.h
+++ b/tools/testing/selftests/bpf/progs/core_reloc_types.h
@@ -652,7 +652,7 @@ struct core_reloc_misc_extensible {
 };
 
 /*
- * EXISTENCE
+ * FIELD EXISTENCE
  */
 struct core_reloc_existence_output {
 	int a_exists;
@@ -809,3 +809,353 @@ struct core_reloc_size___diff_sz {
 	void *ptr_field;
 	enum { OTHER_VALUE = 0xFFFFFFFFFFFFFFFF } enum_field;
 };
+
+/* Error case of two candidates with the fields (int_field) at the same
+ * offset, but with differing final relocation values: size 4 vs size 1
+ */
+struct core_reloc_size___err_ambiguous1 {
+	/* int at offset 0 */
+	int int_field;
+
+	struct { int x; } struct_field;
+	union { int x; } union_field;
+	int arr_field[4];
+	void *ptr_field;
+	enum { VALUE___1 = 123 } enum_field;
+};
+
+struct core_reloc_size___err_ambiguous2 {
+	/* char at offset 0 */
+	char int_field;
+
+	struct { int x; } struct_field;
+	union { int x; } union_field;
+	int arr_field[4];
+	void *ptr_field;
+	enum { VALUE___2 = 123 } enum_field;
+};
+
+/*
+ * TYPE EXISTENCE & SIZE
+ */
+struct core_reloc_type_based_output {
+	bool struct_exists;
+	bool union_exists;
+	bool enum_exists;
+	bool typedef_named_struct_exists;
+	bool typedef_anon_struct_exists;
+	bool typedef_struct_ptr_exists;
+	bool typedef_int_exists;
+	bool typedef_enum_exists;
+	bool typedef_void_ptr_exists;
+	bool typedef_func_proto_exists;
+	bool typedef_arr_exists;
+
+	int struct_sz;
+	int union_sz;
+	int enum_sz;
+	int typedef_named_struct_sz;
+	int typedef_anon_struct_sz;
+	int typedef_struct_ptr_sz;
+	int typedef_int_sz;
+	int typedef_enum_sz;
+	int typedef_void_ptr_sz;
+	int typedef_func_proto_sz;
+	int typedef_arr_sz;
+};
+
+struct a_struct {
+	int x;
+};
+
+union a_union {
+	int y;
+	int z;
+};
+
+typedef struct a_struct named_struct_typedef;
+
+typedef struct { int x, y, z; } anon_struct_typedef;
+
+typedef struct {
+	int a, b, c;
+} *struct_ptr_typedef;
+
+enum an_enum {
+	AN_ENUM_VAL1 = 1,
+	AN_ENUM_VAL2 = 2,
+	AN_ENUM_VAL3 = 3,
+};
+
+typedef int int_typedef;
+
+typedef enum { TYPEDEF_ENUM_VAL1, TYPEDEF_ENUM_VAL2 } enum_typedef;
+
+typedef void *void_ptr_typedef;
+
+typedef int (*func_proto_typedef)(long);
+
+typedef char arr_typedef[20];
+
+struct core_reloc_type_based {
+	struct a_struct f1;
+	union a_union f2;
+	enum an_enum f3;
+	named_struct_typedef f4;
+	anon_struct_typedef f5;
+	struct_ptr_typedef f6;
+	int_typedef f7;
+	enum_typedef f8;
+	void_ptr_typedef f9;
+	func_proto_typedef f10;
+	arr_typedef f11;
+};
+
+/* no types in target */
+struct core_reloc_type_based___all_missing {
+};
+
+/* different type sizes, extra modifiers, anon vs named enums, etc */
+struct a_struct___diff_sz {
+	long x;
+	int y;
+	char z;
+};
+
+union a_union___diff_sz {
+	char yy;
+	char zz;
+};
+
+typedef struct a_struct___diff_sz named_struct_typedef___diff_sz;
+
+typedef struct { long xx, yy, zzz; } anon_struct_typedef___diff_sz;
+
+typedef struct {
+	char aa[1], bb[2], cc[3];
+} *struct_ptr_typedef___diff_sz;
+
+enum an_enum___diff_sz {
+	AN_ENUM_VAL1___diff_sz = 0x123412341234,
+	AN_ENUM_VAL2___diff_sz = 2,
+};
+
+typedef unsigned long int_typedef___diff_sz;
+
+typedef enum an_enum___diff_sz enum_typedef___diff_sz;
+
+typedef const void * const void_ptr_typedef___diff_sz;
+
+typedef int_typedef___diff_sz (*func_proto_typedef___diff_sz)(char);
+
+typedef int arr_typedef___diff_sz[2];
+
+struct core_reloc_type_based___diff_sz {
+	struct a_struct___diff_sz f1;
+	union a_union___diff_sz f2;
+	enum an_enum___diff_sz f3;
+	named_struct_typedef___diff_sz f4;
+	anon_struct_typedef___diff_sz f5;
+	struct_ptr_typedef___diff_sz f6;
+	int_typedef___diff_sz f7;
+	enum_typedef___diff_sz f8;
+	void_ptr_typedef___diff_sz f9;
+	func_proto_typedef___diff_sz f10;
+	arr_typedef___diff_sz f11;
+};
+
+/* incompatibilities between target and local types */
+union a_struct___incompat { /* union instead of struct */
+	int x;
+};
+
+struct a_union___incompat { /* struct instead of union */
+	int y;
+	int z;
+};
+
+/* typedef to union, not to struct */
+typedef union a_struct___incompat named_struct_typedef___incompat;
+
+/* typedef to void pointer, instead of struct */
+typedef void *anon_struct_typedef___incompat;
+
+/* extra pointer indirection */
+typedef struct {
+	int a, b, c;
+} **struct_ptr_typedef___incompat;
+
+/* typedef of a struct with int, instead of int */
+typedef struct { int x; } int_typedef___incompat;
+
+/* typedef to func_proto, instead of enum */
+typedef int (*enum_typedef___incompat)(void);
+
+/* pointer to char instead of void */
+typedef char *void_ptr_typedef___incompat;
+
+/* void return type instead of int */
+typedef void (*func_proto_typedef___incompat)(long);
+
+/* multi-dimensional array instead of a single-dimensional */
+typedef int arr_typedef___incompat[20][2];
+
+struct core_reloc_type_based___incompat {
+	union a_struct___incompat f1;
+	struct a_union___incompat f2;
+	/* the only valid one is enum, to check that something still succeeds */
+	enum an_enum f3;
+	named_struct_typedef___incompat f4;
+	anon_struct_typedef___incompat f5;
+	struct_ptr_typedef___incompat f6;
+	int_typedef___incompat f7;
+	enum_typedef___incompat f8;
+	void_ptr_typedef___incompat f9;
+	func_proto_typedef___incompat f10;
+	arr_typedef___incompat f11;
+};
+
+/* func_proto with incompatible signature */
+typedef void (*func_proto_typedef___fn_wrong_ret1)(long);
+typedef int * (*func_proto_typedef___fn_wrong_ret2)(long);
+typedef struct { int x; } int_struct_typedef;
+typedef int_struct_typedef (*func_proto_typedef___fn_wrong_ret3)(long);
+typedef int (*func_proto_typedef___fn_wrong_arg)(void *);
+typedef int (*func_proto_typedef___fn_wrong_arg_cnt1)(long, long);
+typedef int (*func_proto_typedef___fn_wrong_arg_cnt2)(void);
+
+struct core_reloc_type_based___fn_wrong_args {
+	/* one valid type to make sure relos still work */
+	struct a_struct f1;
+	func_proto_typedef___fn_wrong_ret1 f2;
+	func_proto_typedef___fn_wrong_ret2 f3;
+	func_proto_typedef___fn_wrong_ret3 f4;
+	func_proto_typedef___fn_wrong_arg f5;
+	func_proto_typedef___fn_wrong_arg_cnt1 f6;
+	func_proto_typedef___fn_wrong_arg_cnt2 f7;
+};
+
+/*
+ * TYPE ID MAPPING (LOCAL AND TARGET)
+ */
+struct core_reloc_type_id_output {
+	int local_anon_struct;
+	int local_anon_union;
+	int local_anon_enum;
+	int local_anon_func_proto_ptr;
+	int local_anon_void_ptr;
+	int local_anon_arr;
+
+	int local_struct;
+	int local_union;
+	int local_enum;
+	int local_int;
+	int local_struct_typedef;
+	int local_func_proto_typedef;
+	int local_arr_typedef;
+
+	int targ_struct;
+	int targ_union;
+	int targ_enum;
+	int targ_int;
+	int targ_struct_typedef;
+	int targ_func_proto_typedef;
+	int targ_arr_typedef;
+};
+
+struct core_reloc_type_id {
+	struct a_struct f1;
+	union a_union f2;
+	enum an_enum f3;
+	named_struct_typedef f4;
+	func_proto_typedef f5;
+	arr_typedef f6;
+};
+
+struct core_reloc_type_id___missing_targets {
+	/* nothing */
+};
+
+/*
+ * ENUMERATOR VALUE EXISTENCE AND VALUE RELOCATION
+ */
+struct core_reloc_enumval_output {
+	bool named_val1_exists;
+	bool named_val2_exists;
+	bool named_val3_exists;
+	bool anon_val1_exists;
+	bool anon_val2_exists;
+	bool anon_val3_exists;
+
+	int named_val1;
+	int named_val2;
+	int anon_val1;
+	int anon_val2;
+};
+
+enum named_enum {
+	NAMED_ENUM_VAL1 = 1,
+	NAMED_ENUM_VAL2 = 2,
+	NAMED_ENUM_VAL3 = 3,
+};
+
+typedef enum {
+	ANON_ENUM_VAL1 = 0x10,
+	ANON_ENUM_VAL2 = 0x20,
+	ANON_ENUM_VAL3 = 0x30,
+} anon_enum;
+
+struct core_reloc_enumval {
+	enum named_enum f1;
+	anon_enum f2;
+};
+
+/* differing enumerator values */
+enum named_enum___diff {
+	NAMED_ENUM_VAL1___diff = 101,
+	NAMED_ENUM_VAL2___diff = 202,
+	NAMED_ENUM_VAL3___diff = 303,
+};
+
+typedef enum {
+	ANON_ENUM_VAL1___diff = 0x11,
+	ANON_ENUM_VAL2___diff = 0x22,
+	ANON_ENUM_VAL3___diff = 0x33,
+} anon_enum___diff;
+
+struct core_reloc_enumval___diff {
+	enum named_enum___diff f1;
+	anon_enum___diff f2;
+};
+
+/* missing (optional) third enum value */
+enum named_enum___val3_missing {
+	NAMED_ENUM_VAL1___val3_missing = 111,
+	NAMED_ENUM_VAL2___val3_missing = 222,
+};
+
+typedef enum {
+	ANON_ENUM_VAL1___val3_missing = 0x111,
+	ANON_ENUM_VAL2___val3_missing = 0x222,
+} anon_enum___val3_missing;
+
+struct core_reloc_enumval___val3_missing {
+	enum named_enum___val3_missing f1;
+	anon_enum___val3_missing f2;
+};
+
+/* missing (mandatory) second enum value, should fail */
+enum named_enum___err_missing {
+	NAMED_ENUM_VAL1___err_missing = 1,
+	NAMED_ENUM_VAL3___err_missing = 3,
+};
+
+typedef enum {
+	ANON_ENUM_VAL1___err_missing = 0x111,
+	ANON_ENUM_VAL3___err_missing = 0x222,
+} anon_enum___err_missing;
+
+struct core_reloc_enumval___err_missing {
+	enum named_enum___err_missing f1;
+	anon_enum___err_missing f2;
+};
diff --git a/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c
index 98e1efe14549..49a84a3a2306 100644
--- a/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c
@@ -1,8 +1,10 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2019 Facebook */
 #include <linux/stddef.h>
+#include <linux/if_ether.h>
 #include <linux/ipv6.h>
 #include <linux/bpf.h>
+#include <linux/tcp.h>
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_endian.h>
 #include <bpf/bpf_tracing.h>
@@ -151,4 +153,29 @@ int new_get_constant(long val)
 	test_get_constant = 1;
 	return test_get_constant; /* original get_constant() returns val - 122 */
 }
+
+__u64 test_pkt_write_access_subprog = 0;
+SEC("freplace/test_pkt_write_access_subprog")
+int new_test_pkt_write_access_subprog(struct __sk_buff *skb, __u32 off)
+{
+
+	void *data = (void *)(long)skb->data;
+	void *data_end = (void *)(long)skb->data_end;
+	struct tcphdr *tcp;
+
+	if (off > sizeof(struct ethhdr) + sizeof(struct ipv6hdr))
+		return -1;
+
+	tcp = data + off;
+	if (tcp + 1 > data_end)
+		return -1;
+
+	/* make modifications to the packet data */
+	tcp->check++;
+	tcp->syn = 0;
+
+	test_pkt_write_access_subprog = 1;
+	return 0;
+}
+
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/fmod_ret_freplace.c b/tools/testing/selftests/bpf/progs/fmod_ret_freplace.c
new file mode 100644
index 000000000000..c8943ccee6c0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/fmod_ret_freplace.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+volatile __u64 test_fmod_ret = 0;
+SEC("fmod_ret/security_new_get_constant")
+int BPF_PROG(fmod_ret_test, long val, int ret)
+{
+	test_fmod_ret = 1;
+	return 120;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/freplace_attach_probe.c b/tools/testing/selftests/bpf/progs/freplace_attach_probe.c
new file mode 100644
index 000000000000..bb2a77c5b62b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/freplace_attach_probe.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/ptrace.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#define VAR_NUM 2
+
+struct hmap_elem {
+	struct bpf_spin_lock lock;
+	int var[VAR_NUM];
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, struct hmap_elem);
+} hash_map SEC(".maps");
+
+SEC("freplace/handle_kprobe")
+int new_handle_kprobe(struct pt_regs *ctx)
+{
+	struct hmap_elem zero = {}, *val;
+	int key = 0;
+
+	val = bpf_map_lookup_elem(&hash_map, &key);
+	if (!val)
+		return 1;
+	/* spin_lock in hash map */
+	bpf_spin_lock(&val->lock);
+	val->var[0] = 99;
+	bpf_spin_unlock(&val->lock);
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/freplace_cls_redirect.c b/tools/testing/selftests/bpf/progs/freplace_cls_redirect.c
new file mode 100644
index 000000000000..68a5a9db928a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/freplace_cls_redirect.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/stddef.h>
+#include <linux/bpf.h>
+#include <linux/pkt_cls.h>
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_helpers.h>
+
+struct bpf_map_def SEC("maps") sock_map = {
+	.type = BPF_MAP_TYPE_SOCKMAP,
+	.key_size = sizeof(int),
+	.value_size = sizeof(int),
+	.max_entries = 2,
+};
+
+SEC("freplace/cls_redirect")
+int freplace_cls_redirect_test(struct __sk_buff *skb)
+{
+	int ret = 0;
+	const int zero = 0;
+	struct bpf_sock *sk;
+
+	sk = bpf_map_lookup_elem(&sock_map, &zero);
+	if (!sk)
+		return TC_ACT_SHOT;
+
+	ret = bpf_map_update_elem(&sock_map, &zero, sk, 0);
+	bpf_sk_release(sk);
+
+	return ret == 0 ? TC_ACT_OK : TC_ACT_SHOT;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/freplace_connect_v4_prog.c b/tools/testing/selftests/bpf/progs/freplace_connect_v4_prog.c
new file mode 100644
index 000000000000..544e5ac90461
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/freplace_connect_v4_prog.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/stddef.h>
+#include <linux/ipv6.h>
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <sys/socket.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+SEC("freplace/connect_v4_prog")
+int new_connect_v4_prog(struct bpf_sock_addr *ctx)
+{
+	// return value thats in invalid range
+	return 255;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/freplace_get_constant.c b/tools/testing/selftests/bpf/progs/freplace_get_constant.c
new file mode 100644
index 000000000000..705e4b64dfc2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/freplace_get_constant.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+volatile __u64 test_get_constant = 0;
+SEC("freplace/get_constant")
+int security_new_get_constant(long val)
+{
+	if (val != 123)
+		return 0;
+	test_get_constant = 1;
+	return test_get_constant; /* original get_constant() returns val - 122 */
+}
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/local_storage.c b/tools/testing/selftests/bpf/progs/local_storage.c
new file mode 100644
index 000000000000..0758ba229ae0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/local_storage.c
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2020 Google LLC.
+ */
+
+#include <errno.h>
+#include <linux/bpf.h>
+#include <stdbool.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+#define DUMMY_STORAGE_VALUE 0xdeadbeef
+
+int monitored_pid = 0;
+int inode_storage_result = -1;
+int sk_storage_result = -1;
+
+struct dummy_storage {
+	__u32 value;
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_INODE_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, int);
+	__type(value, struct dummy_storage);
+} inode_storage_map SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_SK_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC | BPF_F_CLONE);
+	__type(key, int);
+	__type(value, struct dummy_storage);
+} sk_storage_map SEC(".maps");
+
+/* TODO Use vmlinux.h once BTF pruning for embedded types is fixed.
+ */
+struct sock {} __attribute__((preserve_access_index));
+struct sockaddr {} __attribute__((preserve_access_index));
+struct socket {
+	struct sock *sk;
+} __attribute__((preserve_access_index));
+
+struct inode {} __attribute__((preserve_access_index));
+struct dentry {
+	struct inode *d_inode;
+} __attribute__((preserve_access_index));
+struct file {
+	struct inode *f_inode;
+} __attribute__((preserve_access_index));
+
+
+SEC("lsm/inode_unlink")
+int BPF_PROG(unlink_hook, struct inode *dir, struct dentry *victim)
+{
+	__u32 pid = bpf_get_current_pid_tgid() >> 32;
+	struct dummy_storage *storage;
+
+	if (pid != monitored_pid)
+		return 0;
+
+	storage = bpf_inode_storage_get(&inode_storage_map, victim->d_inode, 0,
+				     BPF_SK_STORAGE_GET_F_CREATE);
+	if (!storage)
+		return 0;
+
+	if (storage->value == DUMMY_STORAGE_VALUE)
+		inode_storage_result = -1;
+
+	inode_storage_result =
+		bpf_inode_storage_delete(&inode_storage_map, victim->d_inode);
+
+	return 0;
+}
+
+SEC("lsm/socket_bind")
+int BPF_PROG(socket_bind, struct socket *sock, struct sockaddr *address,
+	     int addrlen)
+{
+	__u32 pid = bpf_get_current_pid_tgid() >> 32;
+	struct dummy_storage *storage;
+
+	if (pid != monitored_pid)
+		return 0;
+
+	storage = bpf_sk_storage_get(&sk_storage_map, sock->sk, 0,
+				     BPF_SK_STORAGE_GET_F_CREATE);
+	if (!storage)
+		return 0;
+
+	if (storage->value == DUMMY_STORAGE_VALUE)
+		sk_storage_result = -1;
+
+	sk_storage_result = bpf_sk_storage_delete(&sk_storage_map, sock->sk);
+	return 0;
+}
+
+SEC("lsm/socket_post_create")
+int BPF_PROG(socket_post_create, struct socket *sock, int family, int type,
+	     int protocol, int kern)
+{
+	__u32 pid = bpf_get_current_pid_tgid() >> 32;
+	struct dummy_storage *storage;
+
+	if (pid != monitored_pid)
+		return 0;
+
+	storage = bpf_sk_storage_get(&sk_storage_map, sock->sk, 0,
+				     BPF_SK_STORAGE_GET_F_CREATE);
+	if (!storage)
+		return 0;
+
+	storage->value = DUMMY_STORAGE_VALUE;
+
+	return 0;
+}
+
+SEC("lsm/file_open")
+int BPF_PROG(file_open, struct file *file)
+{
+	__u32 pid = bpf_get_current_pid_tgid() >> 32;
+	struct dummy_storage *storage;
+
+	if (pid != monitored_pid)
+		return 0;
+
+	if (!file->f_inode)
+		return 0;
+
+	storage = bpf_inode_storage_get(&inode_storage_map, file->f_inode, 0,
+				     BPF_LOCAL_STORAGE_GET_F_CREATE);
+	if (!storage)
+		return 0;
+
+	storage->value = DUMMY_STORAGE_VALUE;
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/lsm.c b/tools/testing/selftests/bpf/progs/lsm.c
index b4598d4bc4f7..ff4d343b94b5 100644
--- a/tools/testing/selftests/bpf/progs/lsm.c
+++ b/tools/testing/selftests/bpf/progs/lsm.c
@@ -9,6 +9,27 @@
 #include <bpf/bpf_tracing.h>
 #include  <errno.h>
 
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, __u64);
+} array SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, __u64);
+} hash SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_LRU_HASH);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, __u64);
+} lru_hash SEC(".maps");
+
 char _license[] SEC("license") = "GPL";
 
 int monitored_pid = 0;
@@ -36,13 +57,54 @@ int BPF_PROG(test_int_hook, struct vm_area_struct *vma,
 	return ret;
 }
 
-SEC("lsm/bprm_committed_creds")
+SEC("lsm.s/bprm_committed_creds")
 int BPF_PROG(test_void_hook, struct linux_binprm *bprm)
 {
 	__u32 pid = bpf_get_current_pid_tgid() >> 32;
+	char args[64];
+	__u32 key = 0;
+	__u64 *value;
 
 	if (monitored_pid == pid)
 		bprm_count++;
 
+	bpf_copy_from_user(args, sizeof(args), (void *)bprm->vma->vm_mm->arg_start);
+	bpf_copy_from_user(args, sizeof(args), (void *)bprm->mm->arg_start);
+
+	value = bpf_map_lookup_elem(&array, &key);
+	if (value)
+		*value = 0;
+	value = bpf_map_lookup_elem(&hash, &key);
+	if (value)
+		*value = 0;
+	value = bpf_map_lookup_elem(&lru_hash, &key);
+	if (value)
+		*value = 0;
+
+	return 0;
+}
+SEC("lsm/task_free") /* lsm/ is ok, lsm.s/ fails */
+int BPF_PROG(test_task_free, struct task_struct *task)
+{
+	return 0;
+}
+
+int copy_test = 0;
+
+SEC("fentry.s/__x64_sys_setdomainname")
+int BPF_PROG(test_sys_setdomainname, struct pt_regs *regs)
+{
+	void *ptr = (void *)PT_REGS_PARM1(regs);
+	int len = PT_REGS_PARM2(regs);
+	int buf = 0;
+	long ret;
+
+	ret = bpf_copy_from_user(&buf, sizeof(buf), ptr);
+	if (len == -2 && ret == 0 && buf == 1234)
+		copy_test++;
+	if (len == -3 && ret == -EFAULT)
+		copy_test++;
+	if (len == -4 && ret == -EFAULT)
+		copy_test++;
 	return 0;
 }
diff --git a/tools/testing/selftests/bpf/progs/map_ptr_kern.c b/tools/testing/selftests/bpf/progs/map_ptr_kern.c
index 473665cac67e..c325405751e2 100644
--- a/tools/testing/selftests/bpf/progs/map_ptr_kern.c
+++ b/tools/testing/selftests/bpf/progs/map_ptr_kern.c
@@ -82,6 +82,14 @@ static inline int check_default(struct bpf_map *indirect,
 	return 1;
 }
 
+static __noinline int
+check_default_noinline(struct bpf_map *indirect, struct bpf_map *direct)
+{
+	VERIFY(check(indirect, direct, sizeof(__u32), sizeof(__u32),
+		     MAX_ENTRIES));
+	return 1;
+}
+
 typedef struct {
 	int counter;
 } atomic_t;
@@ -107,7 +115,7 @@ static inline int check_hash(void)
 	struct bpf_map *map = (struct bpf_map *)&m_hash;
 	int i;
 
-	VERIFY(check_default(&hash->map, map));
+	VERIFY(check_default_noinline(&hash->map, map));
 
 	VERIFY(hash->n_buckets == MAX_ENTRIES);
 	VERIFY(hash->elem_size == 64);
@@ -589,7 +597,7 @@ static inline int check_stack(void)
 	return 1;
 }
 
-struct bpf_sk_storage_map {
+struct bpf_local_storage_map {
 	struct bpf_map map;
 } __attribute__((preserve_access_index));
 
@@ -602,8 +610,8 @@ struct {
 
 static inline int check_sk_storage(void)
 {
-	struct bpf_sk_storage_map *sk_storage =
-		(struct bpf_sk_storage_map *)&m_sk_storage;
+	struct bpf_local_storage_map *sk_storage =
+		(struct bpf_local_storage_map *)&m_sk_storage;
 	struct bpf_map *map = (struct bpf_map *)&m_sk_storage;
 
 	VERIFY(check(&sk_storage->map, map, sizeof(__u32), sizeof(__u32), 0));
diff --git a/tools/testing/selftests/bpf/progs/metadata_unused.c b/tools/testing/selftests/bpf/progs/metadata_unused.c
new file mode 100644
index 000000000000..672a0d19f8d0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/metadata_unused.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+volatile const char bpf_metadata_a[] SEC(".rodata") = "foo";
+volatile const int bpf_metadata_b SEC(".rodata") = 1;
+
+SEC("cgroup_skb/egress")
+int prog(struct xdp_md *ctx)
+{
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/metadata_used.c b/tools/testing/selftests/bpf/progs/metadata_used.c
new file mode 100644
index 000000000000..b7198e65383d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/metadata_used.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+volatile const char bpf_metadata_a[] SEC(".rodata") = "bar";
+volatile const int bpf_metadata_b SEC(".rodata") = 2;
+
+SEC("cgroup_skb/egress")
+int prog(struct xdp_md *ctx)
+{
+	return bpf_metadata_b ? 1 : 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/netif_receive_skb.c b/tools/testing/selftests/bpf/progs/netif_receive_skb.c
new file mode 100644
index 000000000000..6b670039ea67
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/netif_receive_skb.c
@@ -0,0 +1,249 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020, Oracle and/or its affiliates. */
+
+#include "btf_ptr.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+#include <errno.h>
+
+long ret = 0;
+int num_subtests = 0;
+int ran_subtests = 0;
+bool skip = false;
+
+#define STRSIZE			2048
+#define EXPECTED_STRSIZE	256
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(x)	(sizeof(x) / sizeof((x)[0]))
+#endif
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, char[STRSIZE]);
+} strdata SEC(".maps");
+
+static int __strncmp(const void *m1, const void *m2, size_t len)
+{
+	const unsigned char *s1 = m1;
+	const unsigned char *s2 = m2;
+	int i, delta = 0;
+
+	for (i = 0; i < len; i++) {
+		delta = s1[i] - s2[i];
+		if (delta || s1[i] == 0 || s2[i] == 0)
+			break;
+	}
+	return delta;
+}
+
+#if __has_builtin(__builtin_btf_type_id)
+#define	TEST_BTF(_str, _type, _flags, _expected, ...)			\
+	do {								\
+		static const char _expectedval[EXPECTED_STRSIZE] =	\
+							_expected;	\
+		static const char _ptrtype[64] = #_type;		\
+		__u64 _hflags = _flags | BTF_F_COMPACT;			\
+		static _type _ptrdata = __VA_ARGS__;			\
+		static struct btf_ptr _ptr = { };			\
+		int _cmp;						\
+									\
+		++num_subtests;						\
+		if (ret < 0)						\
+			break;						\
+		++ran_subtests;						\
+		_ptr.ptr = &_ptrdata;					\
+		_ptr.type_id = bpf_core_type_id_kernel(_type);		\
+		if (_ptr.type_id <= 0) {				\
+			ret = -EINVAL;					\
+			break;						\
+		}							\
+		ret = bpf_snprintf_btf(_str, STRSIZE,			\
+				       &_ptr, sizeof(_ptr), _hflags);	\
+		if (ret)						\
+			break;						\
+		_cmp = __strncmp(_str, _expectedval, EXPECTED_STRSIZE);	\
+		if (_cmp != 0) {					\
+			bpf_printk("(%d) got %s", _cmp, _str);		\
+			bpf_printk("(%d) expected %s", _cmp,		\
+				   _expectedval);			\
+			ret = -EBADMSG;					\
+			break;						\
+		}							\
+	} while (0)
+#endif
+
+/* Use where expected data string matches its stringified declaration */
+#define TEST_BTF_C(_str, _type, _flags, ...)				\
+	TEST_BTF(_str, _type, _flags, "(" #_type ")" #__VA_ARGS__,	\
+		 __VA_ARGS__)
+
+/* TRACE_EVENT(netif_receive_skb,
+ *	TP_PROTO(struct sk_buff *skb),
+ */
+SEC("tp_btf/netif_receive_skb")
+int BPF_PROG(trace_netif_receive_skb, struct sk_buff *skb)
+{
+	static __u64 flags[] = { 0, BTF_F_COMPACT, BTF_F_ZERO, BTF_F_PTR_RAW,
+				 BTF_F_NONAME, BTF_F_COMPACT | BTF_F_ZERO |
+				 BTF_F_PTR_RAW | BTF_F_NONAME };
+	static struct btf_ptr p = { };
+	__u32 key = 0;
+	int i, __ret;
+	char *str;
+
+#if __has_builtin(__builtin_btf_type_id)
+	str = bpf_map_lookup_elem(&strdata, &key);
+	if (!str)
+		return 0;
+
+	/* Ensure we can write skb string representation */
+	p.type_id = bpf_core_type_id_kernel(struct sk_buff);
+	p.ptr = skb;
+	for (i = 0; i < ARRAY_SIZE(flags); i++) {
+		++num_subtests;
+		ret = bpf_snprintf_btf(str, STRSIZE, &p, sizeof(p), 0);
+		if (ret < 0)
+			bpf_printk("returned %d when writing skb", ret);
+		++ran_subtests;
+	}
+
+	/* Check invalid ptr value */
+	p.ptr = 0;
+	__ret = bpf_snprintf_btf(str, STRSIZE, &p, sizeof(p), 0);
+	if (__ret >= 0) {
+		bpf_printk("printing NULL should generate error, got (%d)",
+			   __ret);
+		ret = -ERANGE;
+	}
+
+	/* Verify type display for various types. */
+
+	/* simple int */
+	TEST_BTF_C(str, int, 0, 1234);
+	TEST_BTF(str, int, BTF_F_NONAME, "1234", 1234);
+	/* zero value should be printed at toplevel */
+	TEST_BTF(str, int, 0, "(int)0", 0);
+	TEST_BTF(str, int, BTF_F_NONAME, "0", 0);
+	TEST_BTF(str, int, BTF_F_ZERO, "(int)0", 0);
+	TEST_BTF(str, int, BTF_F_NONAME | BTF_F_ZERO, "0", 0);
+	TEST_BTF_C(str, int, 0, -4567);
+	TEST_BTF(str, int, BTF_F_NONAME, "-4567", -4567);
+
+	/* simple char */
+	TEST_BTF_C(str, char, 0, 100);
+	TEST_BTF(str, char, BTF_F_NONAME, "100", 100);
+	/* zero value should be printed at toplevel */
+	TEST_BTF(str, char, 0, "(char)0", 0);
+	TEST_BTF(str, char, BTF_F_NONAME, "0", 0);
+	TEST_BTF(str, char, BTF_F_ZERO, "(char)0", 0);
+	TEST_BTF(str, char, BTF_F_NONAME | BTF_F_ZERO, "0", 0);
+
+	/* simple typedef */
+	TEST_BTF_C(str, uint64_t, 0, 100);
+	TEST_BTF(str, u64, BTF_F_NONAME, "1", 1);
+	/* zero value should be printed at toplevel */
+	TEST_BTF(str, u64, 0, "(u64)0", 0);
+	TEST_BTF(str, u64, BTF_F_NONAME, "0", 0);
+	TEST_BTF(str, u64, BTF_F_ZERO, "(u64)0", 0);
+	TEST_BTF(str, u64, BTF_F_NONAME|BTF_F_ZERO, "0", 0);
+
+	/* typedef struct */
+	TEST_BTF_C(str, atomic_t, 0, {.counter = (int)1,});
+	TEST_BTF(str, atomic_t, BTF_F_NONAME, "{1,}", {.counter = 1,});
+	/* typedef with 0 value should be printed at toplevel */
+	TEST_BTF(str, atomic_t, 0, "(atomic_t){}", {.counter = 0,});
+	TEST_BTF(str, atomic_t, BTF_F_NONAME, "{}", {.counter = 0,});
+	TEST_BTF(str, atomic_t, BTF_F_ZERO, "(atomic_t){.counter = (int)0,}",
+		 {.counter = 0,});
+	TEST_BTF(str, atomic_t, BTF_F_NONAME|BTF_F_ZERO,
+		 "{0,}", {.counter = 0,});
+
+	/* enum where enum value does (and does not) exist */
+	TEST_BTF_C(str, enum bpf_cmd, 0, BPF_MAP_CREATE);
+	TEST_BTF(str, enum bpf_cmd, 0, "(enum bpf_cmd)BPF_MAP_CREATE", 0);
+	TEST_BTF(str, enum bpf_cmd, BTF_F_NONAME, "BPF_MAP_CREATE",
+		 BPF_MAP_CREATE);
+	TEST_BTF(str, enum bpf_cmd, BTF_F_NONAME|BTF_F_ZERO,
+		 "BPF_MAP_CREATE", 0);
+
+	TEST_BTF(str, enum bpf_cmd, BTF_F_ZERO, "(enum bpf_cmd)BPF_MAP_CREATE",
+		 BPF_MAP_CREATE);
+	TEST_BTF(str, enum bpf_cmd, BTF_F_NONAME|BTF_F_ZERO,
+		 "BPF_MAP_CREATE", BPF_MAP_CREATE);
+	TEST_BTF_C(str, enum bpf_cmd, 0, 2000);
+	TEST_BTF(str, enum bpf_cmd, BTF_F_NONAME, "2000", 2000);
+
+	/* simple struct */
+	TEST_BTF_C(str, struct btf_enum, 0,
+		   {.name_off = (__u32)3,.val = (__s32)-1,});
+	TEST_BTF(str, struct btf_enum, BTF_F_NONAME, "{3,-1,}",
+		 { .name_off = 3, .val = -1,});
+	TEST_BTF(str, struct btf_enum, BTF_F_NONAME, "{-1,}",
+		 { .name_off = 0, .val = -1,});
+	TEST_BTF(str, struct btf_enum, BTF_F_NONAME|BTF_F_ZERO, "{0,-1,}",
+		 { .name_off = 0, .val = -1,});
+	/* empty struct should be printed */
+	TEST_BTF(str, struct btf_enum, 0, "(struct btf_enum){}",
+		 { .name_off = 0, .val = 0,});
+	TEST_BTF(str, struct btf_enum, BTF_F_NONAME, "{}",
+		 { .name_off = 0, .val = 0,});
+	TEST_BTF(str, struct btf_enum, BTF_F_ZERO,
+		 "(struct btf_enum){.name_off = (__u32)0,.val = (__s32)0,}",
+		 { .name_off = 0, .val = 0,});
+
+	/* struct with pointers */
+	TEST_BTF(str, struct list_head, BTF_F_PTR_RAW,
+		 "(struct list_head){.next = (struct list_head *)0x0000000000000001,}",
+		 { .next = (struct list_head *)1 });
+	/* NULL pointer should not be displayed */
+	TEST_BTF(str, struct list_head, BTF_F_PTR_RAW,
+		 "(struct list_head){}",
+		 { .next = (struct list_head *)0 });
+
+	/* struct with char array */
+	TEST_BTF(str, struct bpf_prog_info, 0,
+		 "(struct bpf_prog_info){.name = (char[])['f','o','o',],}",
+		 { .name = "foo",});
+	TEST_BTF(str, struct bpf_prog_info, BTF_F_NONAME,
+		 "{['f','o','o',],}",
+		 {.name = "foo",});
+	/* leading null char means do not display string */
+	TEST_BTF(str, struct bpf_prog_info, 0,
+		 "(struct bpf_prog_info){}",
+		 {.name = {'\0', 'f', 'o', 'o'}});
+	/* handle non-printable characters */
+	TEST_BTF(str, struct bpf_prog_info, 0,
+		 "(struct bpf_prog_info){.name = (char[])[1,2,3,],}",
+		 { .name = {1, 2, 3, 0}});
+
+	/* struct with non-char array */
+	TEST_BTF(str, struct __sk_buff, 0,
+		 "(struct __sk_buff){.cb = (__u32[])[1,2,3,4,5,],}",
+		 { .cb = {1, 2, 3, 4, 5,},});
+	TEST_BTF(str, struct __sk_buff, BTF_F_NONAME,
+		 "{[1,2,3,4,5,],}",
+		 { .cb = { 1, 2, 3, 4, 5},});
+	/* For non-char, arrays, show non-zero values only */
+	TEST_BTF(str, struct __sk_buff, 0,
+		 "(struct __sk_buff){.cb = (__u32[])[1,],}",
+		 { .cb = { 0, 0, 1, 0, 0},});
+
+	/* struct with bitfields */
+	TEST_BTF_C(str, struct bpf_insn, 0,
+		   {.code = (__u8)1,.dst_reg = (__u8)0x2,.src_reg = (__u8)0x3,.off = (__s16)4,.imm = (__s32)5,});
+	TEST_BTF(str, struct bpf_insn, BTF_F_NONAME, "{1,0x2,0x3,4,5,}",
+		 {.code = 1, .dst_reg = 0x2, .src_reg = 0x3, .off = 4,
+		  .imm = 5,});
+#else
+	skip = true;
+#endif
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/profiler.h b/tools/testing/selftests/bpf/progs/profiler.h
new file mode 100644
index 000000000000..3bac4fdd4bdf
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/profiler.h
@@ -0,0 +1,177 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#pragma once
+
+#define TASK_COMM_LEN 16
+#define MAX_ANCESTORS 4
+#define MAX_PATH 256
+#define KILL_TARGET_LEN 64
+#define CTL_MAXNAME 10
+#define MAX_ARGS_LEN 4096
+#define MAX_FILENAME_LEN 512
+#define MAX_ENVIRON_LEN 8192
+#define MAX_PATH_DEPTH 32
+#define MAX_FILEPATH_LENGTH (MAX_PATH_DEPTH * MAX_PATH)
+#define MAX_CGROUPS_PATH_DEPTH 8
+
+#define MAX_METADATA_PAYLOAD_LEN TASK_COMM_LEN
+
+#define MAX_CGROUP_PAYLOAD_LEN \
+	(MAX_PATH * 2 + (MAX_PATH * MAX_CGROUPS_PATH_DEPTH))
+
+#define MAX_CAP_PAYLOAD_LEN (MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN)
+
+#define MAX_SYSCTL_PAYLOAD_LEN \
+	(MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN + CTL_MAXNAME + MAX_PATH)
+
+#define MAX_KILL_PAYLOAD_LEN \
+	(MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN + TASK_COMM_LEN + \
+	 KILL_TARGET_LEN)
+
+#define MAX_EXEC_PAYLOAD_LEN \
+	(MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN + MAX_FILENAME_LEN + \
+	 MAX_ARGS_LEN + MAX_ENVIRON_LEN)
+
+#define MAX_FILEMOD_PAYLOAD_LEN \
+	(MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN + MAX_FILEPATH_LENGTH + \
+	 MAX_FILEPATH_LENGTH)
+
+enum data_type {
+	INVALID_EVENT,
+	EXEC_EVENT,
+	FORK_EVENT,
+	KILL_EVENT,
+	SYSCTL_EVENT,
+	FILEMOD_EVENT,
+	MAX_DATA_TYPE_EVENT
+};
+
+enum filemod_type {
+	FMOD_OPEN,
+	FMOD_LINK,
+	FMOD_SYMLINK,
+};
+
+struct ancestors_data_t {
+	pid_t ancestor_pids[MAX_ANCESTORS];
+	uint32_t ancestor_exec_ids[MAX_ANCESTORS];
+	uint64_t ancestor_start_times[MAX_ANCESTORS];
+	uint32_t num_ancestors;
+};
+
+struct var_metadata_t {
+	enum data_type type;
+	pid_t pid;
+	uint32_t exec_id;
+	uid_t uid;
+	gid_t gid;
+	uint64_t start_time;
+	uint32_t cpu_id;
+	uint64_t bpf_stats_num_perf_events;
+	uint64_t bpf_stats_start_ktime_ns;
+	uint8_t comm_length;
+};
+
+struct cgroup_data_t {
+	ino_t cgroup_root_inode;
+	ino_t cgroup_proc_inode;
+	uint64_t cgroup_root_mtime;
+	uint64_t cgroup_proc_mtime;
+	uint16_t cgroup_root_length;
+	uint16_t cgroup_proc_length;
+	uint16_t cgroup_full_length;
+	int cgroup_full_path_root_pos;
+};
+
+struct var_sysctl_data_t {
+	struct var_metadata_t meta;
+	struct cgroup_data_t cgroup_data;
+	struct ancestors_data_t ancestors_info;
+	uint8_t sysctl_val_length;
+	uint16_t sysctl_path_length;
+	char payload[MAX_SYSCTL_PAYLOAD_LEN];
+};
+
+struct var_kill_data_t {
+	struct var_metadata_t meta;
+	struct cgroup_data_t cgroup_data;
+	struct ancestors_data_t ancestors_info;
+	pid_t kill_target_pid;
+	int kill_sig;
+	uint32_t kill_count;
+	uint64_t last_kill_time;
+	uint8_t kill_target_name_length;
+	uint8_t kill_target_cgroup_proc_length;
+	char payload[MAX_KILL_PAYLOAD_LEN];
+	size_t payload_length;
+};
+
+struct var_exec_data_t {
+	struct var_metadata_t meta;
+	struct cgroup_data_t cgroup_data;
+	pid_t parent_pid;
+	uint32_t parent_exec_id;
+	uid_t parent_uid;
+	uint64_t parent_start_time;
+	uint16_t bin_path_length;
+	uint16_t cmdline_length;
+	uint16_t environment_length;
+	char payload[MAX_EXEC_PAYLOAD_LEN];
+};
+
+struct var_fork_data_t {
+	struct var_metadata_t meta;
+	pid_t parent_pid;
+	uint32_t parent_exec_id;
+	uint64_t parent_start_time;
+	char payload[MAX_METADATA_PAYLOAD_LEN];
+};
+
+struct var_filemod_data_t {
+	struct var_metadata_t meta;
+	struct cgroup_data_t cgroup_data;
+	enum filemod_type fmod_type;
+	unsigned int dst_flags;
+	uint32_t src_device_id;
+	uint32_t dst_device_id;
+	ino_t src_inode;
+	ino_t dst_inode;
+	uint16_t src_filepath_length;
+	uint16_t dst_filepath_length;
+	char payload[MAX_FILEMOD_PAYLOAD_LEN];
+};
+
+struct profiler_config_struct {
+	bool fetch_cgroups_from_bpf;
+	ino_t cgroup_fs_inode;
+	ino_t cgroup_login_session_inode;
+	uint64_t kill_signals_mask;
+	ino_t inode_filter;
+	uint32_t stale_info_secs;
+	bool use_variable_buffers;
+	bool read_environ_from_exec;
+	bool enable_cgroup_v1_resolver;
+};
+
+struct bpf_func_stats_data {
+	uint64_t time_elapsed_ns;
+	uint64_t num_executions;
+	uint64_t num_perf_events;
+};
+
+struct bpf_func_stats_ctx {
+	uint64_t start_time_ns;
+	struct bpf_func_stats_data* bpf_func_stats_data_val;
+};
+
+enum bpf_function_id {
+	profiler_bpf_proc_sys_write,
+	profiler_bpf_sched_process_exec,
+	profiler_bpf_sched_process_exit,
+	profiler_bpf_sys_enter_kill,
+	profiler_bpf_do_filp_open_ret,
+	profiler_bpf_sched_process_fork,
+	profiler_bpf_vfs_link,
+	profiler_bpf_vfs_symlink,
+	profiler_bpf_max_function_id
+};
diff --git a/tools/testing/selftests/bpf/progs/profiler.inc.h b/tools/testing/selftests/bpf/progs/profiler.inc.h
new file mode 100644
index 000000000000..00578311a423
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/profiler.inc.h
@@ -0,0 +1,969 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include <vmlinux.h>
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#include "profiler.h"
+
+#ifndef NULL
+#define NULL 0
+#endif
+
+#define O_WRONLY 00000001
+#define O_RDWR 00000002
+#define O_DIRECTORY 00200000
+#define __O_TMPFILE 020000000
+#define O_TMPFILE (__O_TMPFILE | O_DIRECTORY)
+#define MAX_ERRNO 4095
+#define S_IFMT 00170000
+#define S_IFSOCK 0140000
+#define S_IFLNK 0120000
+#define S_IFREG 0100000
+#define S_IFBLK 0060000
+#define S_IFDIR 0040000
+#define S_IFCHR 0020000
+#define S_IFIFO 0010000
+#define S_ISUID 0004000
+#define S_ISGID 0002000
+#define S_ISVTX 0001000
+#define S_ISLNK(m) (((m)&S_IFMT) == S_IFLNK)
+#define S_ISDIR(m) (((m)&S_IFMT) == S_IFDIR)
+#define S_ISCHR(m) (((m)&S_IFMT) == S_IFCHR)
+#define S_ISBLK(m) (((m)&S_IFMT) == S_IFBLK)
+#define S_ISFIFO(m) (((m)&S_IFMT) == S_IFIFO)
+#define S_ISSOCK(m) (((m)&S_IFMT) == S_IFSOCK)
+#define IS_ERR_VALUE(x) (unsigned long)(void*)(x) >= (unsigned long)-MAX_ERRNO
+
+#define KILL_DATA_ARRAY_SIZE 8
+
+struct var_kill_data_arr_t {
+	struct var_kill_data_t array[KILL_DATA_ARRAY_SIZE];
+};
+
+union any_profiler_data_t {
+	struct var_exec_data_t var_exec;
+	struct var_kill_data_t var_kill;
+	struct var_sysctl_data_t var_sysctl;
+	struct var_filemod_data_t var_filemod;
+	struct var_fork_data_t var_fork;
+	struct var_kill_data_arr_t var_kill_data_arr;
+};
+
+volatile struct profiler_config_struct bpf_config = {};
+
+#define FETCH_CGROUPS_FROM_BPF (bpf_config.fetch_cgroups_from_bpf)
+#define CGROUP_FS_INODE (bpf_config.cgroup_fs_inode)
+#define CGROUP_LOGIN_SESSION_INODE \
+	(bpf_config.cgroup_login_session_inode)
+#define KILL_SIGNALS (bpf_config.kill_signals_mask)
+#define STALE_INFO (bpf_config.stale_info_secs)
+#define INODE_FILTER (bpf_config.inode_filter)
+#define READ_ENVIRON_FROM_EXEC (bpf_config.read_environ_from_exec)
+#define ENABLE_CGROUP_V1_RESOLVER (bpf_config.enable_cgroup_v1_resolver)
+
+struct kernfs_iattrs___52 {
+	struct iattr ia_iattr;
+};
+
+struct kernfs_node___52 {
+	union /* kernfs_node_id */ {
+		struct {
+			u32 ino;
+			u32 generation;
+		};
+		u64 id;
+	} id;
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, u32);
+	__type(value, union any_profiler_data_t);
+} data_heap SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+	__uint(key_size, sizeof(int));
+	__uint(value_size, sizeof(int));
+} events SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, KILL_DATA_ARRAY_SIZE);
+	__type(key, u32);
+	__type(value, struct var_kill_data_arr_t);
+} var_tpid_to_data SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+	__uint(max_entries, profiler_bpf_max_function_id);
+	__type(key, u32);
+	__type(value, struct bpf_func_stats_data);
+} bpf_func_stats SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__type(key, u32);
+	__type(value, bool);
+	__uint(max_entries, 16);
+} allowed_devices SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__type(key, u64);
+	__type(value, bool);
+	__uint(max_entries, 1024);
+} allowed_file_inodes SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__type(key, u64);
+	__type(value, bool);
+	__uint(max_entries, 1024);
+} allowed_directory_inodes SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__type(key, u32);
+	__type(value, bool);
+	__uint(max_entries, 16);
+} disallowed_exec_inodes SEC(".maps");
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0]))
+#endif
+
+static INLINE bool IS_ERR(const void* ptr)
+{
+	return IS_ERR_VALUE((unsigned long)ptr);
+}
+
+static INLINE u32 get_userspace_pid()
+{
+	return bpf_get_current_pid_tgid() >> 32;
+}
+
+static INLINE bool is_init_process(u32 tgid)
+{
+	return tgid == 1 || tgid == 0;
+}
+
+static INLINE unsigned long
+probe_read_lim(void* dst, void* src, unsigned long len, unsigned long max)
+{
+	len = len < max ? len : max;
+	if (len > 1) {
+		if (bpf_probe_read(dst, len, src))
+			return 0;
+	} else if (len == 1) {
+		if (bpf_probe_read(dst, 1, src))
+			return 0;
+	}
+	return len;
+}
+
+static INLINE int get_var_spid_index(struct var_kill_data_arr_t* arr_struct,
+				     int spid)
+{
+#ifdef UNROLL
+#pragma unroll
+#endif
+	for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++)
+		if (arr_struct->array[i].meta.pid == spid)
+			return i;
+	return -1;
+}
+
+static INLINE void populate_ancestors(struct task_struct* task,
+				      struct ancestors_data_t* ancestors_data)
+{
+	struct task_struct* parent = task;
+	u32 num_ancestors, ppid;
+
+	ancestors_data->num_ancestors = 0;
+#ifdef UNROLL
+#pragma unroll
+#endif
+	for (num_ancestors = 0; num_ancestors < MAX_ANCESTORS; num_ancestors++) {
+		parent = BPF_CORE_READ(parent, real_parent);
+		if (parent == NULL)
+			break;
+		ppid = BPF_CORE_READ(parent, tgid);
+		if (is_init_process(ppid))
+			break;
+		ancestors_data->ancestor_pids[num_ancestors] = ppid;
+		ancestors_data->ancestor_exec_ids[num_ancestors] =
+			BPF_CORE_READ(parent, self_exec_id);
+		ancestors_data->ancestor_start_times[num_ancestors] =
+			BPF_CORE_READ(parent, start_time);
+		ancestors_data->num_ancestors = num_ancestors;
+	}
+}
+
+static INLINE void* read_full_cgroup_path(struct kernfs_node* cgroup_node,
+					  struct kernfs_node* cgroup_root_node,
+					  void* payload,
+					  int* root_pos)
+{
+	void* payload_start = payload;
+	size_t filepart_length;
+
+#ifdef UNROLL
+#pragma unroll
+#endif
+	for (int i = 0; i < MAX_CGROUPS_PATH_DEPTH; i++) {
+		filepart_length =
+			bpf_probe_read_str(payload, MAX_PATH, BPF_CORE_READ(cgroup_node, name));
+		if (!cgroup_node)
+			return payload;
+		if (cgroup_node == cgroup_root_node)
+			*root_pos = payload - payload_start;
+		if (filepart_length <= MAX_PATH) {
+			barrier_var(filepart_length);
+			payload += filepart_length;
+		}
+		cgroup_node = BPF_CORE_READ(cgroup_node, parent);
+	}
+	return payload;
+}
+
+static ino_t get_inode_from_kernfs(struct kernfs_node* node)
+{
+	struct kernfs_node___52* node52 = (void*)node;
+
+	if (bpf_core_field_exists(node52->id.ino)) {
+		barrier_var(node52);
+		return BPF_CORE_READ(node52, id.ino);
+	} else {
+		barrier_var(node);
+		return (u64)BPF_CORE_READ(node, id);
+	}
+}
+
+int pids_cgrp_id = 1;
+
+static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data,
+					 struct task_struct* task,
+					 void* payload)
+{
+	struct kernfs_node* root_kernfs =
+		BPF_CORE_READ(task, nsproxy, cgroup_ns, root_cset, dfl_cgrp, kn);
+	struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn);
+
+	if (ENABLE_CGROUP_V1_RESOLVER) {
+#ifdef UNROLL
+#pragma unroll
+#endif
+		for (int i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+			struct cgroup_subsys_state* subsys =
+				BPF_CORE_READ(task, cgroups, subsys[i]);
+			if (subsys != NULL) {
+				int subsys_id = BPF_CORE_READ(subsys, ss, id);
+				if (subsys_id == pids_cgrp_id) {
+					proc_kernfs = BPF_CORE_READ(subsys, cgroup, kn);
+					root_kernfs = BPF_CORE_READ(subsys, ss, root, kf_root, kn);
+					break;
+				}
+			}
+		}
+	}
+
+	cgroup_data->cgroup_root_inode = get_inode_from_kernfs(root_kernfs);
+	cgroup_data->cgroup_proc_inode = get_inode_from_kernfs(proc_kernfs);
+
+	if (bpf_core_field_exists(root_kernfs->iattr->ia_mtime)) {
+		cgroup_data->cgroup_root_mtime =
+			BPF_CORE_READ(root_kernfs, iattr, ia_mtime.tv_nsec);
+		cgroup_data->cgroup_proc_mtime =
+			BPF_CORE_READ(proc_kernfs, iattr, ia_mtime.tv_nsec);
+	} else {
+		struct kernfs_iattrs___52* root_iattr =
+			(struct kernfs_iattrs___52*)BPF_CORE_READ(root_kernfs, iattr);
+		cgroup_data->cgroup_root_mtime =
+			BPF_CORE_READ(root_iattr, ia_iattr.ia_mtime.tv_nsec);
+
+		struct kernfs_iattrs___52* proc_iattr =
+			(struct kernfs_iattrs___52*)BPF_CORE_READ(proc_kernfs, iattr);
+		cgroup_data->cgroup_proc_mtime =
+			BPF_CORE_READ(proc_iattr, ia_iattr.ia_mtime.tv_nsec);
+	}
+
+	cgroup_data->cgroup_root_length = 0;
+	cgroup_data->cgroup_proc_length = 0;
+	cgroup_data->cgroup_full_length = 0;
+
+	size_t cgroup_root_length =
+		bpf_probe_read_str(payload, MAX_PATH, BPF_CORE_READ(root_kernfs, name));
+	barrier_var(cgroup_root_length);
+	if (cgroup_root_length <= MAX_PATH) {
+		barrier_var(cgroup_root_length);
+		cgroup_data->cgroup_root_length = cgroup_root_length;
+		payload += cgroup_root_length;
+	}
+
+	size_t cgroup_proc_length =
+		bpf_probe_read_str(payload, MAX_PATH, BPF_CORE_READ(proc_kernfs, name));
+	barrier_var(cgroup_proc_length);
+	if (cgroup_proc_length <= MAX_PATH) {
+		barrier_var(cgroup_proc_length);
+		cgroup_data->cgroup_proc_length = cgroup_proc_length;
+		payload += cgroup_proc_length;
+	}
+
+	if (FETCH_CGROUPS_FROM_BPF) {
+		cgroup_data->cgroup_full_path_root_pos = -1;
+		void* payload_end_pos = read_full_cgroup_path(proc_kernfs, root_kernfs, payload,
+							      &cgroup_data->cgroup_full_path_root_pos);
+		cgroup_data->cgroup_full_length = payload_end_pos - payload;
+		payload = payload_end_pos;
+	}
+
+	return (void*)payload;
+}
+
+static INLINE void* populate_var_metadata(struct var_metadata_t* metadata,
+					  struct task_struct* task,
+					  u32 pid, void* payload)
+{
+	u64 uid_gid = bpf_get_current_uid_gid();
+
+	metadata->uid = (u32)uid_gid;
+	metadata->gid = uid_gid >> 32;
+	metadata->pid = pid;
+	metadata->exec_id = BPF_CORE_READ(task, self_exec_id);
+	metadata->start_time = BPF_CORE_READ(task, start_time);
+	metadata->comm_length = 0;
+
+	size_t comm_length = bpf_core_read_str(payload, TASK_COMM_LEN, &task->comm);
+	barrier_var(comm_length);
+	if (comm_length <= TASK_COMM_LEN) {
+		barrier_var(comm_length);
+		metadata->comm_length = comm_length;
+		payload += comm_length;
+	}
+
+	return (void*)payload;
+}
+
+static INLINE struct var_kill_data_t*
+get_var_kill_data(struct pt_regs* ctx, int spid, int tpid, int sig)
+{
+	int zero = 0;
+	struct var_kill_data_t* kill_data = bpf_map_lookup_elem(&data_heap, &zero);
+
+	if (kill_data == NULL)
+		return NULL;
+	struct task_struct* task = (struct task_struct*)bpf_get_current_task();
+
+	void* payload = populate_var_metadata(&kill_data->meta, task, spid, kill_data->payload);
+	payload = populate_cgroup_info(&kill_data->cgroup_data, task, payload);
+	size_t payload_length = payload - (void*)kill_data->payload;
+	kill_data->payload_length = payload_length;
+	populate_ancestors(task, &kill_data->ancestors_info);
+	kill_data->meta.type = KILL_EVENT;
+	kill_data->kill_target_pid = tpid;
+	kill_data->kill_sig = sig;
+	kill_data->kill_count = 1;
+	kill_data->last_kill_time = bpf_ktime_get_ns();
+	return kill_data;
+}
+
+static INLINE int trace_var_sys_kill(void* ctx, int tpid, int sig)
+{
+	if ((KILL_SIGNALS & (1ULL << sig)) == 0)
+		return 0;
+
+	u32 spid = get_userspace_pid();
+	struct var_kill_data_arr_t* arr_struct = bpf_map_lookup_elem(&var_tpid_to_data, &tpid);
+
+	if (arr_struct == NULL) {
+		struct var_kill_data_t* kill_data = get_var_kill_data(ctx, spid, tpid, sig);
+		int zero = 0;
+
+		if (kill_data == NULL)
+			return 0;
+		arr_struct = bpf_map_lookup_elem(&data_heap, &zero);
+		if (arr_struct == NULL)
+			return 0;
+		bpf_probe_read(&arr_struct->array[0], sizeof(arr_struct->array[0]), kill_data);
+	} else {
+		int index = get_var_spid_index(arr_struct, spid);
+
+		if (index == -1) {
+			struct var_kill_data_t* kill_data =
+				get_var_kill_data(ctx, spid, tpid, sig);
+			if (kill_data == NULL)
+				return 0;
+#ifdef UNROLL
+#pragma unroll
+#endif
+			for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++)
+				if (arr_struct->array[i].meta.pid == 0) {
+					bpf_probe_read(&arr_struct->array[i],
+						       sizeof(arr_struct->array[i]), kill_data);
+					bpf_map_update_elem(&var_tpid_to_data, &tpid,
+							    arr_struct, 0);
+
+					return 0;
+				}
+			return 0;
+		}
+
+		struct var_kill_data_t* kill_data = &arr_struct->array[index];
+
+		u64 delta_sec =
+			(bpf_ktime_get_ns() - kill_data->last_kill_time) / 1000000000;
+
+		if (delta_sec < STALE_INFO) {
+			kill_data->kill_count++;
+			kill_data->last_kill_time = bpf_ktime_get_ns();
+			bpf_probe_read(&arr_struct->array[index],
+				       sizeof(arr_struct->array[index]),
+				       kill_data);
+		} else {
+			struct var_kill_data_t* kill_data =
+				get_var_kill_data(ctx, spid, tpid, sig);
+			if (kill_data == NULL)
+				return 0;
+			bpf_probe_read(&arr_struct->array[index],
+				       sizeof(arr_struct->array[index]),
+				       kill_data);
+		}
+	}
+	bpf_map_update_elem(&var_tpid_to_data, &tpid, arr_struct, 0);
+	return 0;
+}
+
+static INLINE void bpf_stats_enter(struct bpf_func_stats_ctx* bpf_stat_ctx,
+				   enum bpf_function_id func_id)
+{
+	int func_id_key = func_id;
+
+	bpf_stat_ctx->start_time_ns = bpf_ktime_get_ns();
+	bpf_stat_ctx->bpf_func_stats_data_val =
+		bpf_map_lookup_elem(&bpf_func_stats, &func_id_key);
+	if (bpf_stat_ctx->bpf_func_stats_data_val)
+		bpf_stat_ctx->bpf_func_stats_data_val->num_executions++;
+}
+
+static INLINE void bpf_stats_exit(struct bpf_func_stats_ctx* bpf_stat_ctx)
+{
+	if (bpf_stat_ctx->bpf_func_stats_data_val)
+		bpf_stat_ctx->bpf_func_stats_data_val->time_elapsed_ns +=
+			bpf_ktime_get_ns() - bpf_stat_ctx->start_time_ns;
+}
+
+static INLINE void
+bpf_stats_pre_submit_var_perf_event(struct bpf_func_stats_ctx* bpf_stat_ctx,
+				    struct var_metadata_t* meta)
+{
+	if (bpf_stat_ctx->bpf_func_stats_data_val) {
+		bpf_stat_ctx->bpf_func_stats_data_val->num_perf_events++;
+		meta->bpf_stats_num_perf_events =
+			bpf_stat_ctx->bpf_func_stats_data_val->num_perf_events;
+	}
+	meta->bpf_stats_start_ktime_ns = bpf_stat_ctx->start_time_ns;
+	meta->cpu_id = bpf_get_smp_processor_id();
+}
+
+static INLINE size_t
+read_absolute_file_path_from_dentry(struct dentry* filp_dentry, void* payload)
+{
+	size_t length = 0;
+	size_t filepart_length;
+	struct dentry* parent_dentry;
+
+#ifdef UNROLL
+#pragma unroll
+#endif
+	for (int i = 0; i < MAX_PATH_DEPTH; i++) {
+		filepart_length = bpf_probe_read_str(payload, MAX_PATH,
+						     BPF_CORE_READ(filp_dentry, d_name.name));
+		barrier_var(filepart_length);
+		if (filepart_length > MAX_PATH)
+			break;
+		barrier_var(filepart_length);
+		payload += filepart_length;
+		length += filepart_length;
+
+		parent_dentry = BPF_CORE_READ(filp_dentry, d_parent);
+		if (filp_dentry == parent_dentry)
+			break;
+		filp_dentry = parent_dentry;
+	}
+
+	return length;
+}
+
+static INLINE bool
+is_ancestor_in_allowed_inodes(struct dentry* filp_dentry)
+{
+	struct dentry* parent_dentry;
+#ifdef UNROLL
+#pragma unroll
+#endif
+	for (int i = 0; i < MAX_PATH_DEPTH; i++) {
+		u64 dir_ino = BPF_CORE_READ(filp_dentry, d_inode, i_ino);
+		bool* allowed_dir = bpf_map_lookup_elem(&allowed_directory_inodes, &dir_ino);
+
+		if (allowed_dir != NULL)
+			return true;
+		parent_dentry = BPF_CORE_READ(filp_dentry, d_parent);
+		if (filp_dentry == parent_dentry)
+			break;
+		filp_dentry = parent_dentry;
+	}
+	return false;
+}
+
+static INLINE bool is_dentry_allowed_for_filemod(struct dentry* file_dentry,
+						 u32* device_id,
+						 u64* file_ino)
+{
+	u32 dev_id = BPF_CORE_READ(file_dentry, d_sb, s_dev);
+	*device_id = dev_id;
+	bool* allowed_device = bpf_map_lookup_elem(&allowed_devices, &dev_id);
+
+	if (allowed_device == NULL)
+		return false;
+
+	u64 ino = BPF_CORE_READ(file_dentry, d_inode, i_ino);
+	*file_ino = ino;
+	bool* allowed_file = bpf_map_lookup_elem(&allowed_file_inodes, &ino);
+
+	if (allowed_file == NULL)
+		if (!is_ancestor_in_allowed_inodes(BPF_CORE_READ(file_dentry, d_parent)))
+			return false;
+	return true;
+}
+
+SEC("kprobe/proc_sys_write")
+ssize_t BPF_KPROBE(kprobe__proc_sys_write,
+		   struct file* filp, const char* buf,
+		   size_t count, loff_t* ppos)
+{
+	struct bpf_func_stats_ctx stats_ctx;
+	bpf_stats_enter(&stats_ctx, profiler_bpf_proc_sys_write);
+
+	u32 pid = get_userspace_pid();
+	int zero = 0;
+	struct var_sysctl_data_t* sysctl_data =
+		bpf_map_lookup_elem(&data_heap, &zero);
+	if (!sysctl_data)
+		goto out;
+
+	struct task_struct* task = (struct task_struct*)bpf_get_current_task();
+	sysctl_data->meta.type = SYSCTL_EVENT;
+	void* payload = populate_var_metadata(&sysctl_data->meta, task, pid, sysctl_data->payload);
+	payload = populate_cgroup_info(&sysctl_data->cgroup_data, task, payload);
+
+	populate_ancestors(task, &sysctl_data->ancestors_info);
+
+	sysctl_data->sysctl_val_length = 0;
+	sysctl_data->sysctl_path_length = 0;
+
+	size_t sysctl_val_length = bpf_probe_read_str(payload, CTL_MAXNAME, buf);
+	barrier_var(sysctl_val_length);
+	if (sysctl_val_length <= CTL_MAXNAME) {
+		barrier_var(sysctl_val_length);
+		sysctl_data->sysctl_val_length = sysctl_val_length;
+		payload += sysctl_val_length;
+	}
+
+	size_t sysctl_path_length = bpf_probe_read_str(payload, MAX_PATH,
+						       BPF_CORE_READ(filp, f_path.dentry, d_name.name));
+	barrier_var(sysctl_path_length);
+	if (sysctl_path_length <= MAX_PATH) {
+		barrier_var(sysctl_path_length);
+		sysctl_data->sysctl_path_length = sysctl_path_length;
+		payload += sysctl_path_length;
+	}
+
+	bpf_stats_pre_submit_var_perf_event(&stats_ctx, &sysctl_data->meta);
+	unsigned long data_len = payload - (void*)sysctl_data;
+	data_len = data_len > sizeof(struct var_sysctl_data_t)
+		? sizeof(struct var_sysctl_data_t)
+		: data_len;
+	bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, sysctl_data, data_len);
+out:
+	bpf_stats_exit(&stats_ctx);
+	return 0;
+}
+
+SEC("tracepoint/syscalls/sys_enter_kill")
+int tracepoint__syscalls__sys_enter_kill(struct trace_event_raw_sys_enter* ctx)
+{
+	struct bpf_func_stats_ctx stats_ctx;
+
+	bpf_stats_enter(&stats_ctx, profiler_bpf_sys_enter_kill);
+	int pid = ctx->args[0];
+	int sig = ctx->args[1];
+	int ret = trace_var_sys_kill(ctx, pid, sig);
+	bpf_stats_exit(&stats_ctx);
+	return ret;
+};
+
+SEC("raw_tracepoint/sched_process_exit")
+int raw_tracepoint__sched_process_exit(void* ctx)
+{
+	int zero = 0;
+	struct bpf_func_stats_ctx stats_ctx;
+	bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_exit);
+
+	u32 tpid = get_userspace_pid();
+
+	struct var_kill_data_arr_t* arr_struct = bpf_map_lookup_elem(&var_tpid_to_data, &tpid);
+	struct var_kill_data_t* kill_data = bpf_map_lookup_elem(&data_heap, &zero);
+
+	if (arr_struct == NULL || kill_data == NULL)
+		goto out;
+
+	struct task_struct* task = (struct task_struct*)bpf_get_current_task();
+	struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn);
+
+#ifdef UNROLL
+#pragma unroll
+#endif
+	for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++) {
+		struct var_kill_data_t* past_kill_data = &arr_struct->array[i];
+
+		if (past_kill_data != NULL && past_kill_data->kill_target_pid == tpid) {
+			bpf_probe_read(kill_data, sizeof(*past_kill_data), past_kill_data);
+			void* payload = kill_data->payload;
+			size_t offset = kill_data->payload_length;
+			if (offset >= MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN)
+				return 0;
+			payload += offset;
+
+			kill_data->kill_target_name_length = 0;
+			kill_data->kill_target_cgroup_proc_length = 0;
+
+			size_t comm_length = bpf_core_read_str(payload, TASK_COMM_LEN, &task->comm);
+			barrier_var(comm_length);
+			if (comm_length <= TASK_COMM_LEN) {
+				barrier_var(comm_length);
+				kill_data->kill_target_name_length = comm_length;
+				payload += comm_length;
+			}
+
+			size_t cgroup_proc_length = bpf_probe_read_str(payload, KILL_TARGET_LEN,
+								       BPF_CORE_READ(proc_kernfs, name));
+			barrier_var(cgroup_proc_length);
+			if (cgroup_proc_length <= KILL_TARGET_LEN) {
+				barrier_var(cgroup_proc_length);
+				kill_data->kill_target_cgroup_proc_length = cgroup_proc_length;
+				payload += cgroup_proc_length;
+			}
+
+			bpf_stats_pre_submit_var_perf_event(&stats_ctx, &kill_data->meta);
+			unsigned long data_len = (void*)payload - (void*)kill_data;
+			data_len = data_len > sizeof(struct var_kill_data_t)
+				? sizeof(struct var_kill_data_t)
+				: data_len;
+			bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, kill_data, data_len);
+		}
+	}
+	bpf_map_delete_elem(&var_tpid_to_data, &tpid);
+out:
+	bpf_stats_exit(&stats_ctx);
+	return 0;
+}
+
+SEC("raw_tracepoint/sched_process_exec")
+int raw_tracepoint__sched_process_exec(struct bpf_raw_tracepoint_args* ctx)
+{
+	struct bpf_func_stats_ctx stats_ctx;
+	bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_exec);
+
+	struct linux_binprm* bprm = (struct linux_binprm*)ctx->args[2];
+	u64 inode = BPF_CORE_READ(bprm, file, f_inode, i_ino);
+
+	bool* should_filter_binprm = bpf_map_lookup_elem(&disallowed_exec_inodes, &inode);
+	if (should_filter_binprm != NULL)
+		goto out;
+
+	int zero = 0;
+	struct var_exec_data_t* proc_exec_data = bpf_map_lookup_elem(&data_heap, &zero);
+	if (!proc_exec_data)
+		goto out;
+
+	if (INODE_FILTER && inode != INODE_FILTER)
+		return 0;
+
+	u32 pid = get_userspace_pid();
+	struct task_struct* task = (struct task_struct*)bpf_get_current_task();
+
+	proc_exec_data->meta.type = EXEC_EVENT;
+	proc_exec_data->bin_path_length = 0;
+	proc_exec_data->cmdline_length = 0;
+	proc_exec_data->environment_length = 0;
+	void* payload = populate_var_metadata(&proc_exec_data->meta, task, pid,
+					      proc_exec_data->payload);
+	payload = populate_cgroup_info(&proc_exec_data->cgroup_data, task, payload);
+
+	struct task_struct* parent_task = BPF_CORE_READ(task, real_parent);
+	proc_exec_data->parent_pid = BPF_CORE_READ(parent_task, tgid);
+	proc_exec_data->parent_uid = BPF_CORE_READ(parent_task, real_cred, uid.val);
+	proc_exec_data->parent_exec_id = BPF_CORE_READ(parent_task, self_exec_id);
+	proc_exec_data->parent_start_time = BPF_CORE_READ(parent_task, start_time);
+
+	const char* filename = BPF_CORE_READ(bprm, filename);
+	size_t bin_path_length = bpf_probe_read_str(payload, MAX_FILENAME_LEN, filename);
+	barrier_var(bin_path_length);
+	if (bin_path_length <= MAX_FILENAME_LEN) {
+		barrier_var(bin_path_length);
+		proc_exec_data->bin_path_length = bin_path_length;
+		payload += bin_path_length;
+	}
+
+	void* arg_start = (void*)BPF_CORE_READ(task, mm, arg_start);
+	void* arg_end = (void*)BPF_CORE_READ(task, mm, arg_end);
+	unsigned int cmdline_length = probe_read_lim(payload, arg_start,
+						     arg_end - arg_start, MAX_ARGS_LEN);
+
+	if (cmdline_length <= MAX_ARGS_LEN) {
+		barrier_var(cmdline_length);
+		proc_exec_data->cmdline_length = cmdline_length;
+		payload += cmdline_length;
+	}
+
+	if (READ_ENVIRON_FROM_EXEC) {
+		void* env_start = (void*)BPF_CORE_READ(task, mm, env_start);
+		void* env_end = (void*)BPF_CORE_READ(task, mm, env_end);
+		unsigned long env_len = probe_read_lim(payload, env_start,
+						       env_end - env_start, MAX_ENVIRON_LEN);
+		if (cmdline_length <= MAX_ENVIRON_LEN) {
+			proc_exec_data->environment_length = env_len;
+			payload += env_len;
+		}
+	}
+
+	bpf_stats_pre_submit_var_perf_event(&stats_ctx, &proc_exec_data->meta);
+	unsigned long data_len = payload - (void*)proc_exec_data;
+	data_len = data_len > sizeof(struct var_exec_data_t)
+		? sizeof(struct var_exec_data_t)
+		: data_len;
+	bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, proc_exec_data, data_len);
+out:
+	bpf_stats_exit(&stats_ctx);
+	return 0;
+}
+
+SEC("kretprobe/do_filp_open")
+int kprobe_ret__do_filp_open(struct pt_regs* ctx)
+{
+	struct bpf_func_stats_ctx stats_ctx;
+	bpf_stats_enter(&stats_ctx, profiler_bpf_do_filp_open_ret);
+
+	struct file* filp = (struct file*)PT_REGS_RC_CORE(ctx);
+
+	if (filp == NULL || IS_ERR(filp))
+		goto out;
+	unsigned int flags = BPF_CORE_READ(filp, f_flags);
+	if ((flags & (O_RDWR | O_WRONLY)) == 0)
+		goto out;
+	if ((flags & O_TMPFILE) > 0)
+		goto out;
+	struct inode* file_inode = BPF_CORE_READ(filp, f_inode);
+	umode_t mode = BPF_CORE_READ(file_inode, i_mode);
+	if (S_ISDIR(mode) || S_ISCHR(mode) || S_ISBLK(mode) || S_ISFIFO(mode) ||
+	    S_ISSOCK(mode))
+		goto out;
+
+	struct dentry* filp_dentry = BPF_CORE_READ(filp, f_path.dentry);
+	u32 device_id = 0;
+	u64 file_ino = 0;
+	if (!is_dentry_allowed_for_filemod(filp_dentry, &device_id, &file_ino))
+		goto out;
+
+	int zero = 0;
+	struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero);
+	if (!filemod_data)
+		goto out;
+
+	u32 pid = get_userspace_pid();
+	struct task_struct* task = (struct task_struct*)bpf_get_current_task();
+
+	filemod_data->meta.type = FILEMOD_EVENT;
+	filemod_data->fmod_type = FMOD_OPEN;
+	filemod_data->dst_flags = flags;
+	filemod_data->src_inode = 0;
+	filemod_data->dst_inode = file_ino;
+	filemod_data->src_device_id = 0;
+	filemod_data->dst_device_id = device_id;
+	filemod_data->src_filepath_length = 0;
+	filemod_data->dst_filepath_length = 0;
+
+	void* payload = populate_var_metadata(&filemod_data->meta, task, pid,
+					      filemod_data->payload);
+	payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload);
+
+	size_t len = read_absolute_file_path_from_dentry(filp_dentry, payload);
+	barrier_var(len);
+	if (len <= MAX_FILEPATH_LENGTH) {
+		barrier_var(len);
+		payload += len;
+		filemod_data->dst_filepath_length = len;
+	}
+	bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta);
+	unsigned long data_len = payload - (void*)filemod_data;
+	data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len;
+	bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len);
+out:
+	bpf_stats_exit(&stats_ctx);
+	return 0;
+}
+
+SEC("kprobe/vfs_link")
+int BPF_KPROBE(kprobe__vfs_link,
+	       struct dentry* old_dentry, struct inode* dir,
+	       struct dentry* new_dentry, struct inode** delegated_inode)
+{
+	struct bpf_func_stats_ctx stats_ctx;
+	bpf_stats_enter(&stats_ctx, profiler_bpf_vfs_link);
+
+	u32 src_device_id = 0;
+	u64 src_file_ino = 0;
+	u32 dst_device_id = 0;
+	u64 dst_file_ino = 0;
+	if (!is_dentry_allowed_for_filemod(old_dentry, &src_device_id, &src_file_ino) &&
+	    !is_dentry_allowed_for_filemod(new_dentry, &dst_device_id, &dst_file_ino))
+		goto out;
+
+	int zero = 0;
+	struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero);
+	if (!filemod_data)
+		goto out;
+
+	u32 pid = get_userspace_pid();
+	struct task_struct* task = (struct task_struct*)bpf_get_current_task();
+
+	filemod_data->meta.type = FILEMOD_EVENT;
+	filemod_data->fmod_type = FMOD_LINK;
+	filemod_data->dst_flags = 0;
+	filemod_data->src_inode = src_file_ino;
+	filemod_data->dst_inode = dst_file_ino;
+	filemod_data->src_device_id = src_device_id;
+	filemod_data->dst_device_id = dst_device_id;
+	filemod_data->src_filepath_length = 0;
+	filemod_data->dst_filepath_length = 0;
+
+	void* payload = populate_var_metadata(&filemod_data->meta, task, pid,
+					      filemod_data->payload);
+	payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload);
+
+	size_t len = read_absolute_file_path_from_dentry(old_dentry, payload);
+	barrier_var(len);
+	if (len <= MAX_FILEPATH_LENGTH) {
+		barrier_var(len);
+		payload += len;
+		filemod_data->src_filepath_length = len;
+	}
+
+	len = read_absolute_file_path_from_dentry(new_dentry, payload);
+	barrier_var(len);
+	if (len <= MAX_FILEPATH_LENGTH) {
+		barrier_var(len);
+		payload += len;
+		filemod_data->dst_filepath_length = len;
+	}
+
+	bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta);
+	unsigned long data_len = payload - (void*)filemod_data;
+	data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len;
+	bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len);
+out:
+	bpf_stats_exit(&stats_ctx);
+	return 0;
+}
+
+SEC("kprobe/vfs_symlink")
+int BPF_KPROBE(kprobe__vfs_symlink, struct inode* dir, struct dentry* dentry,
+	       const char* oldname)
+{
+	struct bpf_func_stats_ctx stats_ctx;
+	bpf_stats_enter(&stats_ctx, profiler_bpf_vfs_symlink);
+
+	u32 dst_device_id = 0;
+	u64 dst_file_ino = 0;
+	if (!is_dentry_allowed_for_filemod(dentry, &dst_device_id, &dst_file_ino))
+		goto out;
+
+	int zero = 0;
+	struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero);
+	if (!filemod_data)
+		goto out;
+
+	u32 pid = get_userspace_pid();
+	struct task_struct* task = (struct task_struct*)bpf_get_current_task();
+
+	filemod_data->meta.type = FILEMOD_EVENT;
+	filemod_data->fmod_type = FMOD_SYMLINK;
+	filemod_data->dst_flags = 0;
+	filemod_data->src_inode = 0;
+	filemod_data->dst_inode = dst_file_ino;
+	filemod_data->src_device_id = 0;
+	filemod_data->dst_device_id = dst_device_id;
+	filemod_data->src_filepath_length = 0;
+	filemod_data->dst_filepath_length = 0;
+
+	void* payload = populate_var_metadata(&filemod_data->meta, task, pid,
+					      filemod_data->payload);
+	payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload);
+
+	size_t len = bpf_probe_read_str(payload, MAX_FILEPATH_LENGTH, oldname);
+	barrier_var(len);
+	if (len <= MAX_FILEPATH_LENGTH) {
+		barrier_var(len);
+		payload += len;
+		filemod_data->src_filepath_length = len;
+	}
+	len = read_absolute_file_path_from_dentry(dentry, payload);
+	barrier_var(len);
+	if (len <= MAX_FILEPATH_LENGTH) {
+		barrier_var(len);
+		payload += len;
+		filemod_data->dst_filepath_length = len;
+	}
+	bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta);
+	unsigned long data_len = payload - (void*)filemod_data;
+	data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len;
+	bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len);
+out:
+	bpf_stats_exit(&stats_ctx);
+	return 0;
+}
+
+SEC("raw_tracepoint/sched_process_fork")
+int raw_tracepoint__sched_process_fork(struct bpf_raw_tracepoint_args* ctx)
+{
+	struct bpf_func_stats_ctx stats_ctx;
+	bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_fork);
+
+	int zero = 0;
+	struct var_fork_data_t* fork_data = bpf_map_lookup_elem(&data_heap, &zero);
+	if (!fork_data)
+		goto out;
+
+	struct task_struct* parent = (struct task_struct*)ctx->args[0];
+	struct task_struct* child = (struct task_struct*)ctx->args[1];
+	fork_data->meta.type = FORK_EVENT;
+
+	void* payload = populate_var_metadata(&fork_data->meta, child,
+					      BPF_CORE_READ(child, pid), fork_data->payload);
+	fork_data->parent_pid = BPF_CORE_READ(parent, pid);
+	fork_data->parent_exec_id = BPF_CORE_READ(parent, self_exec_id);
+	fork_data->parent_start_time = BPF_CORE_READ(parent, start_time);
+	bpf_stats_pre_submit_var_perf_event(&stats_ctx, &fork_data->meta);
+
+	unsigned long data_len = payload - (void*)fork_data;
+	data_len = data_len > sizeof(*fork_data) ? sizeof(*fork_data) : data_len;
+	bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, fork_data, data_len);
+out:
+	bpf_stats_exit(&stats_ctx);
+	return 0;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/profiler1.c b/tools/testing/selftests/bpf/progs/profiler1.c
new file mode 100644
index 000000000000..4df9088bfc00
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/profiler1.c
@@ -0,0 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#define barrier_var(var) asm volatile("" : "=r"(var) : "0"(var))
+#define UNROLL
+#define INLINE __always_inline
+#include "profiler.inc.h"
diff --git a/tools/testing/selftests/bpf/progs/profiler2.c b/tools/testing/selftests/bpf/progs/profiler2.c
new file mode 100644
index 000000000000..0f32a3cbf556
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/profiler2.c
@@ -0,0 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#define barrier_var(var) /**/
+/* undef #define UNROLL */
+#define INLINE /**/
+#include "profiler.inc.h"
diff --git a/tools/testing/selftests/bpf/progs/profiler3.c b/tools/testing/selftests/bpf/progs/profiler3.c
new file mode 100644
index 000000000000..6249fc31ccb0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/profiler3.c
@@ -0,0 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#define barrier_var(var) /**/
+#define UNROLL
+#define INLINE __noinline
+#include "profiler.inc.h"
diff --git a/tools/testing/selftests/bpf/progs/pyperf.h b/tools/testing/selftests/bpf/progs/pyperf.h
index cc615b82b56e..2fb7adafb6b6 100644
--- a/tools/testing/selftests/bpf/progs/pyperf.h
+++ b/tools/testing/selftests/bpf/progs/pyperf.h
@@ -67,7 +67,12 @@ typedef struct {
 	void* co_name; // PyCodeObject.co_name
 } FrameData;
 
-static __always_inline void *get_thread_state(void *tls_base, PidData *pidData)
+#ifdef SUBPROGS
+__noinline
+#else
+__always_inline
+#endif
+static void *get_thread_state(void *tls_base, PidData *pidData)
 {
 	void* thread_state;
 	int key;
@@ -155,7 +160,9 @@ struct {
 } stackmap SEC(".maps");
 
 #ifdef GLOBAL_FUNC
-__attribute__((noinline))
+__noinline
+#elif defined(SUBPROGS)
+static __noinline
 #else
 static __always_inline
 #endif
diff --git a/tools/testing/selftests/bpf/progs/pyperf_subprogs.c b/tools/testing/selftests/bpf/progs/pyperf_subprogs.c
new file mode 100644
index 000000000000..60e27a7f0cca
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/pyperf_subprogs.c
@@ -0,0 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#define STACK_MAX_LEN 50
+#define SUBPROGS
+#include "pyperf.h"
diff --git a/tools/testing/selftests/bpf/progs/strobemeta.h b/tools/testing/selftests/bpf/progs/strobemeta.h
index ad61b722a9de..7de534f38c3f 100644
--- a/tools/testing/selftests/bpf/progs/strobemeta.h
+++ b/tools/testing/selftests/bpf/progs/strobemeta.h
@@ -266,8 +266,12 @@ struct tls_index {
 	uint64_t offset;
 };
 
-static __always_inline void *calc_location(struct strobe_value_loc *loc,
-					   void *tls_base)
+#ifdef SUBPROGS
+__noinline
+#else
+__always_inline
+#endif
+static void *calc_location(struct strobe_value_loc *loc, void *tls_base)
 {
 	/*
 	 * tls_mode value is:
@@ -327,10 +331,15 @@ static __always_inline void *calc_location(struct strobe_value_loc *loc,
 		: NULL;
 }
 
-static __always_inline void read_int_var(struct strobemeta_cfg *cfg,
-					 size_t idx, void *tls_base,
-					 struct strobe_value_generic *value,
-					 struct strobemeta_payload *data)
+#ifdef SUBPROGS
+__noinline
+#else
+__always_inline
+#endif
+static void read_int_var(struct strobemeta_cfg *cfg,
+			 size_t idx, void *tls_base,
+			 struct strobe_value_generic *value,
+			 struct strobemeta_payload *data)
 {
 	void *location = calc_location(&cfg->int_locs[idx], tls_base);
 	if (!location)
@@ -440,8 +449,13 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg,
  * read_strobe_meta returns NULL, if no metadata was read; otherwise returns
  * pointer to *right after* payload ends
  */
-static __always_inline void *read_strobe_meta(struct task_struct *task,
-					      struct strobemeta_payload *data)
+#ifdef SUBPROGS
+__noinline
+#else
+__always_inline
+#endif
+static void *read_strobe_meta(struct task_struct *task,
+			      struct strobemeta_payload *data)
 {
 	pid_t pid = bpf_get_current_pid_tgid() >> 32;
 	struct strobe_value_generic value = {0};
diff --git a/tools/testing/selftests/bpf/progs/strobemeta_subprogs.c b/tools/testing/selftests/bpf/progs/strobemeta_subprogs.c
new file mode 100644
index 000000000000..b6c01f8fc559
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/strobemeta_subprogs.c
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+// Copyright (c) 2019 Facebook
+
+#define STROBE_MAX_INTS 2
+#define STROBE_MAX_STRS 25
+#define STROBE_MAX_MAPS 13
+#define STROBE_MAX_MAP_ENTRIES 20
+#define NO_UNROLL
+#define SUBPROGS
+#include "strobemeta.h"
diff --git a/tools/testing/selftests/bpf/progs/tailcall1.c b/tools/testing/selftests/bpf/progs/tailcall1.c
index 1f407e65ae52..7115bcefbe8a 100644
--- a/tools/testing/selftests/bpf/progs/tailcall1.c
+++ b/tools/testing/selftests/bpf/progs/tailcall1.c
@@ -26,20 +26,20 @@ int entry(struct __sk_buff *skb)
 	/* Multiple locations to make sure we patch
 	 * all of them.
 	 */
-	bpf_tail_call(skb, &jmp_table, 0);
-	bpf_tail_call(skb, &jmp_table, 0);
-	bpf_tail_call(skb, &jmp_table, 0);
-	bpf_tail_call(skb, &jmp_table, 0);
-
-	bpf_tail_call(skb, &jmp_table, 1);
-	bpf_tail_call(skb, &jmp_table, 1);
-	bpf_tail_call(skb, &jmp_table, 1);
-	bpf_tail_call(skb, &jmp_table, 1);
-
-	bpf_tail_call(skb, &jmp_table, 2);
-	bpf_tail_call(skb, &jmp_table, 2);
-	bpf_tail_call(skb, &jmp_table, 2);
-	bpf_tail_call(skb, &jmp_table, 2);
+	bpf_tail_call_static(skb, &jmp_table, 0);
+	bpf_tail_call_static(skb, &jmp_table, 0);
+	bpf_tail_call_static(skb, &jmp_table, 0);
+	bpf_tail_call_static(skb, &jmp_table, 0);
+
+	bpf_tail_call_static(skb, &jmp_table, 1);
+	bpf_tail_call_static(skb, &jmp_table, 1);
+	bpf_tail_call_static(skb, &jmp_table, 1);
+	bpf_tail_call_static(skb, &jmp_table, 1);
+
+	bpf_tail_call_static(skb, &jmp_table, 2);
+	bpf_tail_call_static(skb, &jmp_table, 2);
+	bpf_tail_call_static(skb, &jmp_table, 2);
+	bpf_tail_call_static(skb, &jmp_table, 2);
 
 	return 3;
 }
diff --git a/tools/testing/selftests/bpf/progs/tailcall2.c b/tools/testing/selftests/bpf/progs/tailcall2.c
index a093e739cf0e..0431e4fe7efd 100644
--- a/tools/testing/selftests/bpf/progs/tailcall2.c
+++ b/tools/testing/selftests/bpf/progs/tailcall2.c
@@ -13,14 +13,14 @@ struct {
 SEC("classifier/0")
 int bpf_func_0(struct __sk_buff *skb)
 {
-	bpf_tail_call(skb, &jmp_table, 1);
+	bpf_tail_call_static(skb, &jmp_table, 1);
 	return 0;
 }
 
 SEC("classifier/1")
 int bpf_func_1(struct __sk_buff *skb)
 {
-	bpf_tail_call(skb, &jmp_table, 2);
+	bpf_tail_call_static(skb, &jmp_table, 2);
 	return 1;
 }
 
@@ -33,25 +33,25 @@ int bpf_func_2(struct __sk_buff *skb)
 SEC("classifier/3")
 int bpf_func_3(struct __sk_buff *skb)
 {
-	bpf_tail_call(skb, &jmp_table, 4);
+	bpf_tail_call_static(skb, &jmp_table, 4);
 	return 3;
 }
 
 SEC("classifier/4")
 int bpf_func_4(struct __sk_buff *skb)
 {
-	bpf_tail_call(skb, &jmp_table, 3);
+	bpf_tail_call_static(skb, &jmp_table, 3);
 	return 4;
 }
 
 SEC("classifier")
 int entry(struct __sk_buff *skb)
 {
-	bpf_tail_call(skb, &jmp_table, 0);
+	bpf_tail_call_static(skb, &jmp_table, 0);
 	/* Check multi-prog update. */
-	bpf_tail_call(skb, &jmp_table, 2);
+	bpf_tail_call_static(skb, &jmp_table, 2);
 	/* Check tail call limit. */
-	bpf_tail_call(skb, &jmp_table, 3);
+	bpf_tail_call_static(skb, &jmp_table, 3);
 	return 3;
 }
 
diff --git a/tools/testing/selftests/bpf/progs/tailcall3.c b/tools/testing/selftests/bpf/progs/tailcall3.c
index cabda877cf0a..739dc2a51e74 100644
--- a/tools/testing/selftests/bpf/progs/tailcall3.c
+++ b/tools/testing/selftests/bpf/progs/tailcall3.c
@@ -16,14 +16,14 @@ SEC("classifier/0")
 int bpf_func_0(struct __sk_buff *skb)
 {
 	count++;
-	bpf_tail_call(skb, &jmp_table, 0);
+	bpf_tail_call_static(skb, &jmp_table, 0);
 	return 1;
 }
 
 SEC("classifier")
 int entry(struct __sk_buff *skb)
 {
-	bpf_tail_call(skb, &jmp_table, 0);
+	bpf_tail_call_static(skb, &jmp_table, 0);
 	return 0;
 }
 
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf1.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf1.c
new file mode 100644
index 000000000000..0103f3dd9f02
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf1.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+	__uint(max_entries, 2);
+	__uint(key_size, sizeof(__u32));
+	__uint(value_size, sizeof(__u32));
+} jmp_table SEC(".maps");
+
+#define TAIL_FUNC(x) 				\
+	SEC("classifier/" #x)			\
+	int bpf_func_##x(struct __sk_buff *skb)	\
+	{					\
+		return x;			\
+	}
+TAIL_FUNC(0)
+TAIL_FUNC(1)
+
+static __noinline
+int subprog_tail(struct __sk_buff *skb)
+{
+	bpf_tail_call_static(skb, &jmp_table, 0);
+
+	return skb->len * 2;
+}
+
+SEC("classifier")
+int entry(struct __sk_buff *skb)
+{
+	bpf_tail_call_static(skb, &jmp_table, 1);
+
+	return subprog_tail(skb);
+}
+
+char __license[] SEC("license") = "GPL";
+int _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c
new file mode 100644
index 000000000000..7b1c04183824
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_legacy.h"
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+	__uint(max_entries, 1);
+	__uint(key_size, sizeof(__u32));
+	__uint(value_size, sizeof(__u32));
+} jmp_table SEC(".maps");
+
+static __noinline
+int subprog_tail(struct __sk_buff *skb)
+{
+	if (load_byte(skb, 0))
+		bpf_tail_call_static(skb, &jmp_table, 1);
+	else
+		bpf_tail_call_static(skb, &jmp_table, 0);
+	return 1;
+}
+
+static volatile int count;
+
+SEC("classifier/0")
+int bpf_func_0(struct __sk_buff *skb)
+{
+	count++;
+	return subprog_tail(skb);
+}
+
+SEC("classifier")
+int entry(struct __sk_buff *skb)
+{
+	bpf_tail_call_static(skb, &jmp_table, 0);
+
+	return 0;
+}
+
+char __license[] SEC("license") = "GPL";
+int _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf3.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf3.c
new file mode 100644
index 000000000000..0d5482bea6c9
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf3.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_legacy.h"
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+	__uint(max_entries, 2);
+	__uint(key_size, sizeof(__u32));
+	__uint(value_size, sizeof(__u32));
+} jmp_table SEC(".maps");
+
+__noinline
+int subprog_tail2(struct __sk_buff *skb)
+{
+	volatile char arr[64] = {};
+
+	if (load_word(skb, 0) || load_half(skb, 0))
+		bpf_tail_call_static(skb, &jmp_table, 10);
+	else
+		bpf_tail_call_static(skb, &jmp_table, 1);
+
+	return skb->len;
+}
+
+static __noinline
+int subprog_tail(struct __sk_buff *skb)
+{
+	volatile char arr[64] = {};
+
+	bpf_tail_call_static(skb, &jmp_table, 0);
+
+	return skb->len * 2;
+}
+
+SEC("classifier/0")
+int bpf_func_0(struct __sk_buff *skb)
+{
+	volatile char arr[128] = {};
+
+	return subprog_tail2(skb);
+}
+
+SEC("classifier/1")
+int bpf_func_1(struct __sk_buff *skb)
+{
+	volatile char arr[128] = {};
+
+	return skb->len * 3;
+}
+
+SEC("classifier")
+int entry(struct __sk_buff *skb)
+{
+	volatile char arr[128] = {};
+
+	return subprog_tail(skb);
+}
+
+char __license[] SEC("license") = "GPL";
+int _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c
new file mode 100644
index 000000000000..9a1b166b7fbe
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+	__uint(max_entries, 3);
+	__uint(key_size, sizeof(__u32));
+	__uint(value_size, sizeof(__u32));
+} jmp_table SEC(".maps");
+
+static volatile int count;
+
+__noinline
+int subprog_tail_2(struct __sk_buff *skb)
+{
+	bpf_tail_call_static(skb, &jmp_table, 2);
+	return skb->len * 3;
+}
+
+__noinline
+int subprog_tail_1(struct __sk_buff *skb)
+{
+	bpf_tail_call_static(skb, &jmp_table, 1);
+	return skb->len * 2;
+}
+
+__noinline
+int subprog_tail(struct __sk_buff *skb)
+{
+	bpf_tail_call_static(skb, &jmp_table, 0);
+	return skb->len;
+}
+
+SEC("classifier/1")
+int bpf_func_1(struct __sk_buff *skb)
+{
+	return subprog_tail_2(skb);
+}
+
+SEC("classifier/2")
+int bpf_func_2(struct __sk_buff *skb)
+{
+	count++;
+	return subprog_tail_2(skb);
+}
+
+SEC("classifier/0")
+int bpf_func_0(struct __sk_buff *skb)
+{
+	return subprog_tail_1(skb);
+}
+
+SEC("classifier")
+int entry(struct __sk_buff *skb)
+{
+	return subprog_tail(skb);
+}
+
+char __license[] SEC("license") = "GPL";
+int _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c b/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c
index e5093796be97..c1e0c8c7c55f 100644
--- a/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c
+++ b/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c
@@ -11,6 +11,13 @@ struct inner_map {
 } inner_map1 SEC(".maps"),
   inner_map2 SEC(".maps");
 
+struct inner_map_sz2 {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 2);
+	__type(key, int);
+	__type(value, int);
+} inner_map_sz2 SEC(".maps");
+
 struct outer_arr {
 	__uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
 	__uint(max_entries, 3);
@@ -34,6 +41,43 @@ struct outer_arr {
 	.values = { (void *)&inner_map1, 0, (void *)&inner_map2 },
 };
 
+struct inner_map_sz3 {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(map_flags, BPF_F_INNER_MAP);
+	__uint(max_entries, 3);
+	__type(key, int);
+	__type(value, int);
+} inner_map3 SEC(".maps"),
+  inner_map4 SEC(".maps");
+
+struct inner_map_sz4 {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(map_flags, BPF_F_INNER_MAP);
+	__uint(max_entries, 5);
+	__type(key, int);
+	__type(value, int);
+} inner_map5 SEC(".maps");
+
+struct outer_arr_dyn {
+	__uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+	__uint(max_entries, 3);
+	__uint(key_size, sizeof(int));
+	__uint(value_size, sizeof(int));
+	__array(values, struct {
+		__uint(type, BPF_MAP_TYPE_ARRAY);
+		__uint(map_flags, BPF_F_INNER_MAP);
+		__uint(max_entries, 1);
+		__type(key, int);
+		__type(value, int);
+	});
+} outer_arr_dyn SEC(".maps") = {
+	.values = {
+		[0] = (void *)&inner_map3,
+		[1] = (void *)&inner_map4,
+		[2] = (void *)&inner_map5,
+	},
+};
+
 struct outer_hash {
 	__uint(type, BPF_MAP_TYPE_HASH_OF_MAPS);
 	__uint(max_entries, 5);
@@ -50,6 +94,30 @@ struct outer_hash {
 	},
 };
 
+struct sockarr_sz1 {
+	__uint(type, BPF_MAP_TYPE_REUSEPORT_SOCKARRAY);
+	__uint(max_entries, 1);
+	__type(key, int);
+	__type(value, int);
+} sockarr_sz1 SEC(".maps");
+
+struct sockarr_sz2 {
+	__uint(type, BPF_MAP_TYPE_REUSEPORT_SOCKARRAY);
+	__uint(max_entries, 2);
+	__type(key, int);
+	__type(value, int);
+} sockarr_sz2 SEC(".maps");
+
+struct outer_sockarr_sz1 {
+	__uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+	__uint(max_entries, 1);
+	__uint(key_size, sizeof(int));
+	__uint(value_size, sizeof(int));
+	__array(values, struct sockarr_sz1);
+} outer_sockarr SEC(".maps") = {
+	.values = { (void *)&sockarr_sz1 },
+};
+
 int input = 0;
 
 SEC("raw_tp/sys_enter")
@@ -70,6 +138,12 @@ int handle__sys_enter(void *ctx)
 	val = input + 1;
 	bpf_map_update_elem(inner_map, &key, &val, 0);
 
+	inner_map = bpf_map_lookup_elem(&outer_arr_dyn, &key);
+	if (!inner_map)
+		return 1;
+	val = input + 2;
+	bpf_map_update_elem(inner_map, &key, &val, 0);
+
 	return 0;
 }
 
diff --git a/tools/testing/selftests/bpf/progs/test_btf_skc_cls_ingress.c b/tools/testing/selftests/bpf/progs/test_btf_skc_cls_ingress.c
new file mode 100644
index 000000000000..9a6b85dd52d2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_btf_skc_cls_ingress.c
@@ -0,0 +1,174 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include <string.h>
+#include <errno.h>
+#include <netinet/in.h>
+#include <linux/stddef.h>
+#include <linux/bpf.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <linux/if_ether.h>
+#include <linux/pkt_cls.h>
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+#include "bpf_tcp_helpers.h"
+
+struct sockaddr_in6 srv_sa6 = {};
+__u16 listen_tp_sport = 0;
+__u16 req_sk_sport = 0;
+__u32 recv_cookie = 0;
+__u32 gen_cookie = 0;
+__u32 linum = 0;
+
+#define LOG() ({ if (!linum) linum = __LINE__; })
+
+static void test_syncookie_helper(struct ipv6hdr *ip6h, struct tcphdr *th,
+				  struct tcp_sock *tp,
+				  struct __sk_buff *skb)
+{
+	if (th->syn) {
+		__s64 mss_cookie;
+		void *data_end;
+
+		data_end = (void *)(long)(skb->data_end);
+
+		if (th->doff * 4 != 40) {
+			LOG();
+			return;
+		}
+
+		if ((void *)th + 40 > data_end) {
+			LOG();
+			return;
+		}
+
+		mss_cookie = bpf_tcp_gen_syncookie(tp, ip6h, sizeof(*ip6h),
+						   th, 40);
+		if (mss_cookie < 0) {
+			if (mss_cookie != -ENOENT)
+				LOG();
+		} else {
+			gen_cookie = (__u32)mss_cookie;
+		}
+	} else if (gen_cookie) {
+		/* It was in cookie mode */
+		int ret = bpf_tcp_check_syncookie(tp, ip6h, sizeof(*ip6h),
+						  th, sizeof(*th));
+
+		if (ret < 0) {
+			if (ret != -ENOENT)
+				LOG();
+		} else {
+			recv_cookie = bpf_ntohl(th->ack_seq) - 1;
+		}
+	}
+}
+
+static int handle_ip6_tcp(struct ipv6hdr *ip6h, struct __sk_buff *skb)
+{
+	struct bpf_sock_tuple *tuple;
+	struct bpf_sock *bpf_skc;
+	unsigned int tuple_len;
+	struct tcphdr *th;
+	void *data_end;
+
+	data_end = (void *)(long)(skb->data_end);
+
+	th = (struct tcphdr *)(ip6h + 1);
+	if (th + 1 > data_end)
+		return TC_ACT_OK;
+
+	/* Is it the testing traffic? */
+	if (th->dest != srv_sa6.sin6_port)
+		return TC_ACT_OK;
+
+	tuple_len = sizeof(tuple->ipv6);
+	tuple = (struct bpf_sock_tuple *)&ip6h->saddr;
+	if ((void *)tuple + tuple_len > data_end) {
+		LOG();
+		return TC_ACT_OK;
+	}
+
+	bpf_skc = bpf_skc_lookup_tcp(skb, tuple, tuple_len,
+				     BPF_F_CURRENT_NETNS, 0);
+	if (!bpf_skc) {
+		LOG();
+		return TC_ACT_OK;
+	}
+
+	if (bpf_skc->state == BPF_TCP_NEW_SYN_RECV) {
+		struct request_sock *req_sk;
+
+		req_sk = (struct request_sock *)bpf_skc_to_tcp_request_sock(bpf_skc);
+		if (!req_sk) {
+			LOG();
+			goto release;
+		}
+
+		if (bpf_sk_assign(skb, req_sk, 0)) {
+			LOG();
+			goto release;
+		}
+
+		req_sk_sport = req_sk->__req_common.skc_num;
+
+		bpf_sk_release(req_sk);
+		return TC_ACT_OK;
+	} else if (bpf_skc->state == BPF_TCP_LISTEN) {
+		struct tcp_sock *tp;
+
+		tp = bpf_skc_to_tcp_sock(bpf_skc);
+		if (!tp) {
+			LOG();
+			goto release;
+		}
+
+		if (bpf_sk_assign(skb, tp, 0)) {
+			LOG();
+			goto release;
+		}
+
+		listen_tp_sport = tp->inet_conn.icsk_inet.sk.__sk_common.skc_num;
+
+		test_syncookie_helper(ip6h, th, tp, skb);
+		bpf_sk_release(tp);
+		return TC_ACT_OK;
+	}
+
+	if (bpf_sk_assign(skb, bpf_skc, 0))
+		LOG();
+
+release:
+	bpf_sk_release(bpf_skc);
+	return TC_ACT_OK;
+}
+
+SEC("classifier/ingress")
+int cls_ingress(struct __sk_buff *skb)
+{
+	struct ipv6hdr *ip6h;
+	struct ethhdr *eth;
+	void *data_end;
+
+	data_end = (void *)(long)(skb->data_end);
+
+	eth = (struct ethhdr *)(long)(skb->data);
+	if (eth + 1 > data_end)
+		return TC_ACT_OK;
+
+	if (eth->h_proto != bpf_htons(ETH_P_IPV6))
+		return TC_ACT_OK;
+
+	ip6h = (struct ipv6hdr *)(eth + 1);
+	if (ip6h + 1 > data_end)
+		return TC_ACT_OK;
+
+	if (ip6h->nexthdr == IPPROTO_TCP)
+		return handle_ip6_tcp(ip6h, skb);
+
+	return TC_ACT_OK;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect.c b/tools/testing/selftests/bpf/progs/test_cls_redirect.c
index f0b72e86bee5..c9f8464996ea 100644
--- a/tools/testing/selftests/bpf/progs/test_cls_redirect.c
+++ b/tools/testing/selftests/bpf/progs/test_cls_redirect.c
@@ -22,6 +22,12 @@
 
 #include "test_cls_redirect.h"
 
+#ifdef SUBPROGS
+#define INLINING __noinline
+#else
+#define INLINING __always_inline
+#endif
+
 #define offsetofend(TYPE, MEMBER) \
 	(offsetof(TYPE, MEMBER) + sizeof((((TYPE *)0)->MEMBER)))
 
@@ -125,7 +131,7 @@ typedef struct buf {
 	uint8_t *const tail;
 } buf_t;
 
-static size_t buf_off(const buf_t *buf)
+static __always_inline size_t buf_off(const buf_t *buf)
 {
 	/* Clang seems to optimize constructs like
 	 *    a - b + c
@@ -145,7 +151,7 @@ static size_t buf_off(const buf_t *buf)
 	return off;
 }
 
-static bool buf_copy(buf_t *buf, void *dst, size_t len)
+static __always_inline bool buf_copy(buf_t *buf, void *dst, size_t len)
 {
 	if (bpf_skb_load_bytes(buf->skb, buf_off(buf), dst, len)) {
 		return false;
@@ -155,7 +161,7 @@ static bool buf_copy(buf_t *buf, void *dst, size_t len)
 	return true;
 }
 
-static bool buf_skip(buf_t *buf, const size_t len)
+static __always_inline bool buf_skip(buf_t *buf, const size_t len)
 {
 	/* Check whether off + len is valid in the non-linear part. */
 	if (buf_off(buf) + len > buf->skb->len) {
@@ -173,7 +179,7 @@ static bool buf_skip(buf_t *buf, const size_t len)
  * If scratch is not NULL, the function will attempt to load non-linear
  * data via bpf_skb_load_bytes. On success, scratch is returned.
  */
-static void *buf_assign(buf_t *buf, const size_t len, void *scratch)
+static __always_inline void *buf_assign(buf_t *buf, const size_t len, void *scratch)
 {
 	if (buf->head + len > buf->tail) {
 		if (scratch == NULL) {
@@ -188,7 +194,7 @@ static void *buf_assign(buf_t *buf, const size_t len, void *scratch)
 	return ptr;
 }
 
-static bool pkt_skip_ipv4_options(buf_t *buf, const struct iphdr *ipv4)
+static INLINING bool pkt_skip_ipv4_options(buf_t *buf, const struct iphdr *ipv4)
 {
 	if (ipv4->ihl <= 5) {
 		return true;
@@ -197,13 +203,13 @@ static bool pkt_skip_ipv4_options(buf_t *buf, const struct iphdr *ipv4)
 	return buf_skip(buf, (ipv4->ihl - 5) * 4);
 }
 
-static bool ipv4_is_fragment(const struct iphdr *ip)
+static INLINING bool ipv4_is_fragment(const struct iphdr *ip)
 {
 	uint16_t frag_off = ip->frag_off & bpf_htons(IP_OFFSET_MASK);
 	return (ip->frag_off & bpf_htons(IP_MF)) != 0 || frag_off > 0;
 }
 
-static struct iphdr *pkt_parse_ipv4(buf_t *pkt, struct iphdr *scratch)
+static __always_inline struct iphdr *pkt_parse_ipv4(buf_t *pkt, struct iphdr *scratch)
 {
 	struct iphdr *ipv4 = buf_assign(pkt, sizeof(*ipv4), scratch);
 	if (ipv4 == NULL) {
@@ -222,7 +228,7 @@ static struct iphdr *pkt_parse_ipv4(buf_t *pkt, struct iphdr *scratch)
 }
 
 /* Parse the L4 ports from a packet, assuming a layout like TCP or UDP. */
-static bool pkt_parse_icmp_l4_ports(buf_t *pkt, flow_ports_t *ports)
+static INLINING bool pkt_parse_icmp_l4_ports(buf_t *pkt, flow_ports_t *ports)
 {
 	if (!buf_copy(pkt, ports, sizeof(*ports))) {
 		return false;
@@ -237,7 +243,7 @@ static bool pkt_parse_icmp_l4_ports(buf_t *pkt, flow_ports_t *ports)
 	return true;
 }
 
-static uint16_t pkt_checksum_fold(uint32_t csum)
+static INLINING uint16_t pkt_checksum_fold(uint32_t csum)
 {
 	/* The highest reasonable value for an IPv4 header
 	 * checksum requires two folds, so we just do that always.
@@ -247,7 +253,7 @@ static uint16_t pkt_checksum_fold(uint32_t csum)
 	return (uint16_t)~csum;
 }
 
-static void pkt_ipv4_checksum(struct iphdr *iph)
+static INLINING void pkt_ipv4_checksum(struct iphdr *iph)
 {
 	iph->check = 0;
 
@@ -268,10 +274,11 @@ static void pkt_ipv4_checksum(struct iphdr *iph)
 	iph->check = pkt_checksum_fold(acc);
 }
 
-static bool pkt_skip_ipv6_extension_headers(buf_t *pkt,
-					    const struct ipv6hdr *ipv6,
-					    uint8_t *upper_proto,
-					    bool *is_fragment)
+static INLINING
+bool pkt_skip_ipv6_extension_headers(buf_t *pkt,
+				     const struct ipv6hdr *ipv6,
+				     uint8_t *upper_proto,
+				     bool *is_fragment)
 {
 	/* We understand five extension headers.
 	 * https://tools.ietf.org/html/rfc8200#section-4.1 states that all
@@ -336,7 +343,7 @@ static bool pkt_skip_ipv6_extension_headers(buf_t *pkt,
  * scratch is allocated on the stack. However, this usage should be safe since
  * it's the callers stack after all.
  */
-static inline __attribute__((__always_inline__)) struct ipv6hdr *
+static __always_inline struct ipv6hdr *
 pkt_parse_ipv6(buf_t *pkt, struct ipv6hdr *scratch, uint8_t *proto,
 	       bool *is_fragment)
 {
@@ -354,20 +361,20 @@ pkt_parse_ipv6(buf_t *pkt, struct ipv6hdr *scratch, uint8_t *proto,
 
 /* Global metrics, per CPU
  */
-struct bpf_map_def metrics_map SEC("maps") = {
-	.type = BPF_MAP_TYPE_PERCPU_ARRAY,
-	.key_size = sizeof(unsigned int),
-	.value_size = sizeof(metrics_t),
-	.max_entries = 1,
-};
-
-static metrics_t *get_global_metrics(void)
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, unsigned int);
+	__type(value, metrics_t);
+} metrics_map SEC(".maps");
+
+static INLINING metrics_t *get_global_metrics(void)
 {
 	uint64_t key = 0;
 	return bpf_map_lookup_elem(&metrics_map, &key);
 }
 
-static ret_t accept_locally(struct __sk_buff *skb, encap_headers_t *encap)
+static INLINING ret_t accept_locally(struct __sk_buff *skb, encap_headers_t *encap)
 {
 	const int payload_off =
 		sizeof(*encap) +
@@ -388,8 +395,8 @@ static ret_t accept_locally(struct __sk_buff *skb, encap_headers_t *encap)
 	return bpf_redirect(skb->ifindex, BPF_F_INGRESS);
 }
 
-static ret_t forward_with_gre(struct __sk_buff *skb, encap_headers_t *encap,
-			      struct in_addr *next_hop, metrics_t *metrics)
+static INLINING ret_t forward_with_gre(struct __sk_buff *skb, encap_headers_t *encap,
+				       struct in_addr *next_hop, metrics_t *metrics)
 {
 	metrics->forwarded_packets_total_gre++;
 
@@ -509,8 +516,8 @@ static ret_t forward_with_gre(struct __sk_buff *skb, encap_headers_t *encap,
 	return bpf_redirect(skb->ifindex, 0);
 }
 
-static ret_t forward_to_next_hop(struct __sk_buff *skb, encap_headers_t *encap,
-				 struct in_addr *next_hop, metrics_t *metrics)
+static INLINING ret_t forward_to_next_hop(struct __sk_buff *skb, encap_headers_t *encap,
+					  struct in_addr *next_hop, metrics_t *metrics)
 {
 	/* swap L2 addresses */
 	/* This assumes that packets are received from a router.
@@ -546,7 +553,7 @@ static ret_t forward_to_next_hop(struct __sk_buff *skb, encap_headers_t *encap,
 	return bpf_redirect(skb->ifindex, 0);
 }
 
-static ret_t skip_next_hops(buf_t *pkt, int n)
+static INLINING ret_t skip_next_hops(buf_t *pkt, int n)
 {
 	switch (n) {
 	case 1:
@@ -566,8 +573,8 @@ static ret_t skip_next_hops(buf_t *pkt, int n)
  * pkt is positioned just after the variable length GLB header
  * iff the call is successful.
  */
-static ret_t get_next_hop(buf_t *pkt, encap_headers_t *encap,
-			  struct in_addr *next_hop)
+static INLINING ret_t get_next_hop(buf_t *pkt, encap_headers_t *encap,
+				   struct in_addr *next_hop)
 {
 	if (encap->unigue.next_hop > encap->unigue.hop_count) {
 		return TC_ACT_SHOT;
@@ -601,8 +608,8 @@ static ret_t get_next_hop(buf_t *pkt, encap_headers_t *encap,
  * return value, and calling code works while still being "generic" to
  * IPv4 and IPv6.
  */
-static uint64_t fill_tuple(struct bpf_sock_tuple *tuple, void *iph,
-			   uint64_t iphlen, uint16_t sport, uint16_t dport)
+static INLINING uint64_t fill_tuple(struct bpf_sock_tuple *tuple, void *iph,
+				    uint64_t iphlen, uint16_t sport, uint16_t dport)
 {
 	switch (iphlen) {
 	case sizeof(struct iphdr): {
@@ -630,9 +637,9 @@ static uint64_t fill_tuple(struct bpf_sock_tuple *tuple, void *iph,
 	}
 }
 
-static verdict_t classify_tcp(struct __sk_buff *skb,
-			      struct bpf_sock_tuple *tuple, uint64_t tuplen,
-			      void *iph, struct tcphdr *tcp)
+static INLINING verdict_t classify_tcp(struct __sk_buff *skb,
+				       struct bpf_sock_tuple *tuple, uint64_t tuplen,
+				       void *iph, struct tcphdr *tcp)
 {
 	struct bpf_sock *sk =
 		bpf_skc_lookup_tcp(skb, tuple, tuplen, BPF_F_CURRENT_NETNS, 0);
@@ -663,8 +670,8 @@ static verdict_t classify_tcp(struct __sk_buff *skb,
 	return UNKNOWN;
 }
 
-static verdict_t classify_udp(struct __sk_buff *skb,
-			      struct bpf_sock_tuple *tuple, uint64_t tuplen)
+static INLINING verdict_t classify_udp(struct __sk_buff *skb,
+				       struct bpf_sock_tuple *tuple, uint64_t tuplen)
 {
 	struct bpf_sock *sk =
 		bpf_sk_lookup_udp(skb, tuple, tuplen, BPF_F_CURRENT_NETNS, 0);
@@ -681,9 +688,9 @@ static verdict_t classify_udp(struct __sk_buff *skb,
 	return UNKNOWN;
 }
 
-static verdict_t classify_icmp(struct __sk_buff *skb, uint8_t proto,
-			       struct bpf_sock_tuple *tuple, uint64_t tuplen,
-			       metrics_t *metrics)
+static INLINING verdict_t classify_icmp(struct __sk_buff *skb, uint8_t proto,
+					struct bpf_sock_tuple *tuple, uint64_t tuplen,
+					metrics_t *metrics)
 {
 	switch (proto) {
 	case IPPROTO_TCP:
@@ -698,7 +705,7 @@ static verdict_t classify_icmp(struct __sk_buff *skb, uint8_t proto,
 	}
 }
 
-static verdict_t process_icmpv4(buf_t *pkt, metrics_t *metrics)
+static INLINING verdict_t process_icmpv4(buf_t *pkt, metrics_t *metrics)
 {
 	struct icmphdr icmp;
 	if (!buf_copy(pkt, &icmp, sizeof(icmp))) {
@@ -745,7 +752,7 @@ static verdict_t process_icmpv4(buf_t *pkt, metrics_t *metrics)
 			     sizeof(tuple.ipv4), metrics);
 }
 
-static verdict_t process_icmpv6(buf_t *pkt, metrics_t *metrics)
+static INLINING verdict_t process_icmpv6(buf_t *pkt, metrics_t *metrics)
 {
 	struct icmp6hdr icmp6;
 	if (!buf_copy(pkt, &icmp6, sizeof(icmp6))) {
@@ -797,8 +804,8 @@ static verdict_t process_icmpv6(buf_t *pkt, metrics_t *metrics)
 			     metrics);
 }
 
-static verdict_t process_tcp(buf_t *pkt, void *iph, uint64_t iphlen,
-			     metrics_t *metrics)
+static INLINING verdict_t process_tcp(buf_t *pkt, void *iph, uint64_t iphlen,
+				      metrics_t *metrics)
 {
 	metrics->l4_protocol_packets_total_tcp++;
 
@@ -819,8 +826,8 @@ static verdict_t process_tcp(buf_t *pkt, void *iph, uint64_t iphlen,
 	return classify_tcp(pkt->skb, &tuple, tuplen, iph, tcp);
 }
 
-static verdict_t process_udp(buf_t *pkt, void *iph, uint64_t iphlen,
-			     metrics_t *metrics)
+static INLINING verdict_t process_udp(buf_t *pkt, void *iph, uint64_t iphlen,
+				      metrics_t *metrics)
 {
 	metrics->l4_protocol_packets_total_udp++;
 
@@ -837,7 +844,7 @@ static verdict_t process_udp(buf_t *pkt, void *iph, uint64_t iphlen,
 	return classify_udp(pkt->skb, &tuple, tuplen);
 }
 
-static verdict_t process_ipv4(buf_t *pkt, metrics_t *metrics)
+static INLINING verdict_t process_ipv4(buf_t *pkt, metrics_t *metrics)
 {
 	metrics->l3_protocol_packets_total_ipv4++;
 
@@ -874,7 +881,7 @@ static verdict_t process_ipv4(buf_t *pkt, metrics_t *metrics)
 	}
 }
 
-static verdict_t process_ipv6(buf_t *pkt, metrics_t *metrics)
+static INLINING verdict_t process_ipv6(buf_t *pkt, metrics_t *metrics)
 {
 	metrics->l3_protocol_packets_total_ipv6++;
 
diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect_subprogs.c b/tools/testing/selftests/bpf/progs/test_cls_redirect_subprogs.c
new file mode 100644
index 000000000000..eed26b70e3a2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_cls_redirect_subprogs.c
@@ -0,0 +1,2 @@
+#define SUBPROGS
+#include "test_cls_redirect.c"
diff --git a/tools/testing/selftests/bpf/progs/test_core_autosize.c b/tools/testing/selftests/bpf/progs/test_core_autosize.c
new file mode 100644
index 000000000000..44f5aa2e8956
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_core_autosize.c
@@ -0,0 +1,172 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+char _license[] SEC("license") = "GPL";
+
+/* fields of exactly the same size */
+struct test_struct___samesize {
+	void *ptr;
+	unsigned long long val1;
+	unsigned int val2;
+	unsigned short val3;
+	unsigned char val4;
+} __attribute((preserve_access_index));
+
+/* unsigned fields that have to be downsized by libbpf */
+struct test_struct___downsize {
+	void *ptr;
+	unsigned long val1;
+	unsigned long val2;
+	unsigned long val3;
+	unsigned long val4;
+	/* total sz: 40 */
+} __attribute__((preserve_access_index));
+
+/* fields with signed integers of wrong size, should be rejected */
+struct test_struct___signed {
+	void *ptr;
+	long val1;
+	long val2;
+	long val3;
+	long val4;
+} __attribute((preserve_access_index));
+
+/* real layout and sizes according to test's (32-bit) BTF */
+struct test_struct___real {
+	unsigned int ptr; /* can't use `void *`, it is always 8 byte in BPF target */
+	unsigned int val2;
+	unsigned long long val1;
+	unsigned short val3;
+	unsigned char val4;
+	unsigned char _pad;
+	/* total sz: 20 */
+};
+
+struct test_struct___real input = {
+	.ptr = 0x01020304,
+	.val1 = 0x1020304050607080,
+	.val2 = 0x0a0b0c0d,
+	.val3 = 0xfeed,
+	.val4 = 0xb9,
+	._pad = 0xff, /* make sure no accidental zeros are present */
+};
+
+unsigned long long ptr_samesized = 0;
+unsigned long long val1_samesized = 0;
+unsigned long long val2_samesized = 0;
+unsigned long long val3_samesized = 0;
+unsigned long long val4_samesized = 0;
+struct test_struct___real output_samesized = {};
+
+unsigned long long ptr_downsized = 0;
+unsigned long long val1_downsized = 0;
+unsigned long long val2_downsized = 0;
+unsigned long long val3_downsized = 0;
+unsigned long long val4_downsized = 0;
+struct test_struct___real output_downsized = {};
+
+unsigned long long ptr_probed = 0;
+unsigned long long val1_probed = 0;
+unsigned long long val2_probed = 0;
+unsigned long long val3_probed = 0;
+unsigned long long val4_probed = 0;
+
+unsigned long long ptr_signed = 0;
+unsigned long long val1_signed = 0;
+unsigned long long val2_signed = 0;
+unsigned long long val3_signed = 0;
+unsigned long long val4_signed = 0;
+struct test_struct___real output_signed = {};
+
+SEC("raw_tp/sys_exit")
+int handle_samesize(void *ctx)
+{
+	struct test_struct___samesize *in = (void *)&input;
+	struct test_struct___samesize *out = (void *)&output_samesized;
+
+	ptr_samesized = (unsigned long long)in->ptr;
+	val1_samesized = in->val1;
+	val2_samesized = in->val2;
+	val3_samesized = in->val3;
+	val4_samesized = in->val4;
+
+	out->ptr = in->ptr;
+	out->val1 = in->val1;
+	out->val2 = in->val2;
+	out->val3 = in->val3;
+	out->val4 = in->val4;
+
+	return 0;
+}
+
+SEC("raw_tp/sys_exit")
+int handle_downsize(void *ctx)
+{
+	struct test_struct___downsize *in = (void *)&input;
+	struct test_struct___downsize *out = (void *)&output_downsized;
+
+	ptr_downsized = (unsigned long long)in->ptr;
+	val1_downsized = in->val1;
+	val2_downsized = in->val2;
+	val3_downsized = in->val3;
+	val4_downsized = in->val4;
+
+	out->ptr = in->ptr;
+	out->val1 = in->val1;
+	out->val2 = in->val2;
+	out->val3 = in->val3;
+	out->val4 = in->val4;
+
+	return 0;
+}
+
+SEC("raw_tp/sys_enter")
+int handle_probed(void *ctx)
+{
+	struct test_struct___downsize *in = (void *)&input;
+	__u64 tmp;
+
+	tmp = 0;
+	bpf_core_read(&tmp, bpf_core_field_size(in->ptr), &in->ptr);
+	ptr_probed = tmp;
+
+	tmp = 0;
+	bpf_core_read(&tmp, bpf_core_field_size(in->val1), &in->val1);
+	val1_probed = tmp;
+
+	tmp = 0;
+	bpf_core_read(&tmp, bpf_core_field_size(in->val2), &in->val2);
+	val2_probed = tmp;
+
+	tmp = 0;
+	bpf_core_read(&tmp, bpf_core_field_size(in->val3), &in->val3);
+	val3_probed = tmp;
+
+	tmp = 0;
+	bpf_core_read(&tmp, bpf_core_field_size(in->val4), &in->val4);
+	val4_probed = tmp;
+
+	return 0;
+}
+
+SEC("raw_tp/sys_enter")
+int handle_signed(void *ctx)
+{
+	struct test_struct___signed *in = (void *)&input;
+	struct test_struct___signed *out = (void *)&output_signed;
+
+	val2_signed = in->val2;
+	val3_signed = in->val3;
+	val4_signed = in->val4;
+
+	out->val2= in->val2;
+	out->val3= in->val3;
+	out->val4= in->val4;
+
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_enumval.c b/tools/testing/selftests/bpf/progs/test_core_reloc_enumval.c
new file mode 100644
index 000000000000..e7ef3dada2bf
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_enumval.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+	char in[256];
+	char out[256];
+	bool skip;
+} data = {};
+
+enum named_enum {
+	NAMED_ENUM_VAL1 = 1,
+	NAMED_ENUM_VAL2 = 2,
+	NAMED_ENUM_VAL3 = 3,
+};
+
+typedef enum {
+	ANON_ENUM_VAL1 = 0x10,
+	ANON_ENUM_VAL2 = 0x20,
+	ANON_ENUM_VAL3 = 0x30,
+} anon_enum;
+
+struct core_reloc_enumval_output {
+	bool named_val1_exists;
+	bool named_val2_exists;
+	bool named_val3_exists;
+	bool anon_val1_exists;
+	bool anon_val2_exists;
+	bool anon_val3_exists;
+
+	int named_val1;
+	int named_val2;
+	int anon_val1;
+	int anon_val2;
+};
+
+SEC("raw_tracepoint/sys_enter")
+int test_core_enumval(void *ctx)
+{
+#if __has_builtin(__builtin_preserve_enum_value)
+	struct core_reloc_enumval_output *out = (void *)&data.out;
+	enum named_enum named = 0;
+	anon_enum anon = 0;
+
+	out->named_val1_exists = bpf_core_enum_value_exists(named, NAMED_ENUM_VAL1);
+	out->named_val2_exists = bpf_core_enum_value_exists(enum named_enum, NAMED_ENUM_VAL2);
+	out->named_val3_exists = bpf_core_enum_value_exists(enum named_enum, NAMED_ENUM_VAL3);
+
+	out->anon_val1_exists = bpf_core_enum_value_exists(anon, ANON_ENUM_VAL1);
+	out->anon_val2_exists = bpf_core_enum_value_exists(anon_enum, ANON_ENUM_VAL2);
+	out->anon_val3_exists = bpf_core_enum_value_exists(anon_enum, ANON_ENUM_VAL3);
+
+	out->named_val1 = bpf_core_enum_value(named, NAMED_ENUM_VAL1);
+	out->named_val2 = bpf_core_enum_value(named, NAMED_ENUM_VAL2);
+	/* NAMED_ENUM_VAL3 value is optional */
+
+	out->anon_val1 = bpf_core_enum_value(anon, ANON_ENUM_VAL1);
+	out->anon_val2 = bpf_core_enum_value(anon, ANON_ENUM_VAL2);
+	/* ANON_ENUM_VAL3 value is optional */
+#else
+	data.skip = true;
+#endif
+
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c b/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c
index aba928fd60d3..145028b52ad8 100644
--- a/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c
@@ -3,6 +3,7 @@
 
 #include <linux/bpf.h>
 #include <stdint.h>
+#include <stdbool.h>
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_core_read.h>
 
@@ -11,6 +12,7 @@ char _license[] SEC("license") = "GPL";
 struct {
 	char in[256];
 	char out[256];
+	bool skip;
 	uint64_t my_pid_tgid;
 } data = {};
 
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c b/tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c
new file mode 100644
index 000000000000..fb60f8195c53
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+	char in[256];
+	char out[256];
+	bool skip;
+} data = {};
+
+struct a_struct {
+	int x;
+};
+
+union a_union {
+	int y;
+	int z;
+};
+
+typedef struct a_struct named_struct_typedef;
+
+typedef struct { int x, y, z; } anon_struct_typedef;
+
+typedef struct {
+	int a, b, c;
+} *struct_ptr_typedef;
+
+enum an_enum {
+	AN_ENUM_VAL1 = 1,
+	AN_ENUM_VAL2 = 2,
+	AN_ENUM_VAL3 = 3,
+};
+
+typedef int int_typedef;
+
+typedef enum { TYPEDEF_ENUM_VAL1, TYPEDEF_ENUM_VAL2 } enum_typedef;
+
+typedef void *void_ptr_typedef;
+
+typedef int (*func_proto_typedef)(long);
+
+typedef char arr_typedef[20];
+
+struct core_reloc_type_based_output {
+	bool struct_exists;
+	bool union_exists;
+	bool enum_exists;
+	bool typedef_named_struct_exists;
+	bool typedef_anon_struct_exists;
+	bool typedef_struct_ptr_exists;
+	bool typedef_int_exists;
+	bool typedef_enum_exists;
+	bool typedef_void_ptr_exists;
+	bool typedef_func_proto_exists;
+	bool typedef_arr_exists;
+
+	int struct_sz;
+	int union_sz;
+	int enum_sz;
+	int typedef_named_struct_sz;
+	int typedef_anon_struct_sz;
+	int typedef_struct_ptr_sz;
+	int typedef_int_sz;
+	int typedef_enum_sz;
+	int typedef_void_ptr_sz;
+	int typedef_func_proto_sz;
+	int typedef_arr_sz;
+};
+
+SEC("raw_tracepoint/sys_enter")
+int test_core_type_based(void *ctx)
+{
+#if __has_builtin(__builtin_preserve_type_info)
+	struct core_reloc_type_based_output *out = (void *)&data.out;
+
+	out->struct_exists = bpf_core_type_exists(struct a_struct);
+	out->union_exists = bpf_core_type_exists(union a_union);
+	out->enum_exists = bpf_core_type_exists(enum an_enum);
+	out->typedef_named_struct_exists = bpf_core_type_exists(named_struct_typedef);
+	out->typedef_anon_struct_exists = bpf_core_type_exists(anon_struct_typedef);
+	out->typedef_struct_ptr_exists = bpf_core_type_exists(struct_ptr_typedef);
+	out->typedef_int_exists = bpf_core_type_exists(int_typedef);
+	out->typedef_enum_exists = bpf_core_type_exists(enum_typedef);
+	out->typedef_void_ptr_exists = bpf_core_type_exists(void_ptr_typedef);
+	out->typedef_func_proto_exists = bpf_core_type_exists(func_proto_typedef);
+	out->typedef_arr_exists = bpf_core_type_exists(arr_typedef);
+
+	out->struct_sz = bpf_core_type_size(struct a_struct);
+	out->union_sz = bpf_core_type_size(union a_union);
+	out->enum_sz = bpf_core_type_size(enum an_enum);
+	out->typedef_named_struct_sz = bpf_core_type_size(named_struct_typedef);
+	out->typedef_anon_struct_sz = bpf_core_type_size(anon_struct_typedef);
+	out->typedef_struct_ptr_sz = bpf_core_type_size(struct_ptr_typedef);
+	out->typedef_int_sz = bpf_core_type_size(int_typedef);
+	out->typedef_enum_sz = bpf_core_type_size(enum_typedef);
+	out->typedef_void_ptr_sz = bpf_core_type_size(void_ptr_typedef);
+	out->typedef_func_proto_sz = bpf_core_type_size(func_proto_typedef);
+	out->typedef_arr_sz = bpf_core_type_size(arr_typedef);
+#else
+	data.skip = true;
+#endif
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_type_id.c b/tools/testing/selftests/bpf/progs/test_core_reloc_type_id.c
new file mode 100644
index 000000000000..22aba3f6e344
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_type_id.c
@@ -0,0 +1,115 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+	char in[256];
+	char out[256];
+	bool skip;
+} data = {};
+
+/* some types are shared with test_core_reloc_type_based.c */
+struct a_struct {
+	int x;
+};
+
+union a_union {
+	int y;
+	int z;
+};
+
+enum an_enum {
+	AN_ENUM_VAL1 = 1,
+	AN_ENUM_VAL2 = 2,
+	AN_ENUM_VAL3 = 3,
+};
+
+typedef struct a_struct named_struct_typedef;
+
+typedef int (*func_proto_typedef)(long);
+
+typedef char arr_typedef[20];
+
+struct core_reloc_type_id_output {
+	int local_anon_struct;
+	int local_anon_union;
+	int local_anon_enum;
+	int local_anon_func_proto_ptr;
+	int local_anon_void_ptr;
+	int local_anon_arr;
+
+	int local_struct;
+	int local_union;
+	int local_enum;
+	int local_int;
+	int local_struct_typedef;
+	int local_func_proto_typedef;
+	int local_arr_typedef;
+
+	int targ_struct;
+	int targ_union;
+	int targ_enum;
+	int targ_int;
+	int targ_struct_typedef;
+	int targ_func_proto_typedef;
+	int targ_arr_typedef;
+};
+
+/* preserve types even if Clang doesn't support built-in */
+struct a_struct t1 = {};
+union a_union t2 = {};
+enum an_enum t3 = 0;
+named_struct_typedef t4 = {};
+func_proto_typedef t5 = 0;
+arr_typedef t6 = {};
+
+SEC("raw_tracepoint/sys_enter")
+int test_core_type_id(void *ctx)
+{
+	/* We use __builtin_btf_type_id() in this tests, but up until the time
+	 * __builtin_preserve_type_info() was added it contained a bug that
+	 * would make this test fail. The bug was fixed ([0]) with addition of
+	 * __builtin_preserve_type_info(), though, so that's what we are using
+	 * to detect whether this test has to be executed, however strange
+	 * that might look like.
+	 *
+	 *   [0] https://reviews.llvm.org/D85174
+	 */
+#if __has_builtin(__builtin_preserve_type_info)
+	struct core_reloc_type_id_output *out = (void *)&data.out;
+
+	out->local_anon_struct = bpf_core_type_id_local(struct { int marker_field; });
+	out->local_anon_union = bpf_core_type_id_local(union { int marker_field; });
+	out->local_anon_enum = bpf_core_type_id_local(enum { MARKER_ENUM_VAL = 123 });
+	out->local_anon_func_proto_ptr = bpf_core_type_id_local(_Bool(*)(int));
+	out->local_anon_void_ptr = bpf_core_type_id_local(void *);
+	out->local_anon_arr = bpf_core_type_id_local(_Bool[47]);
+
+	out->local_struct = bpf_core_type_id_local(struct a_struct);
+	out->local_union = bpf_core_type_id_local(union a_union);
+	out->local_enum = bpf_core_type_id_local(enum an_enum);
+	out->local_int = bpf_core_type_id_local(int);
+	out->local_struct_typedef = bpf_core_type_id_local(named_struct_typedef);
+	out->local_func_proto_typedef = bpf_core_type_id_local(func_proto_typedef);
+	out->local_arr_typedef = bpf_core_type_id_local(arr_typedef);
+
+	out->targ_struct = bpf_core_type_id_kernel(struct a_struct);
+	out->targ_union = bpf_core_type_id_kernel(union a_union);
+	out->targ_enum = bpf_core_type_id_kernel(enum an_enum);
+	out->targ_int = bpf_core_type_id_kernel(int);
+	out->targ_struct_typedef = bpf_core_type_id_kernel(named_struct_typedef);
+	out->targ_func_proto_typedef = bpf_core_type_id_kernel(func_proto_typedef);
+	out->targ_arr_typedef = bpf_core_type_id_kernel(arr_typedef);
+#else
+	data.skip = true;
+#endif
+
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_d_path.c b/tools/testing/selftests/bpf/progs/test_d_path.c
new file mode 100644
index 000000000000..84e1f883f97b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_d_path.c
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#define MAX_PATH_LEN		128
+#define MAX_FILES		7
+
+pid_t my_pid = 0;
+__u32 cnt_stat = 0;
+__u32 cnt_close = 0;
+char paths_stat[MAX_FILES][MAX_PATH_LEN] = {};
+char paths_close[MAX_FILES][MAX_PATH_LEN] = {};
+int rets_stat[MAX_FILES] = {};
+int rets_close[MAX_FILES] = {};
+
+int called_stat = 0;
+int called_close = 0;
+
+SEC("fentry/security_inode_getattr")
+int BPF_PROG(prog_stat, struct path *path, struct kstat *stat,
+	     __u32 request_mask, unsigned int query_flags)
+{
+	pid_t pid = bpf_get_current_pid_tgid() >> 32;
+	__u32 cnt = cnt_stat;
+	int ret;
+
+	called_stat = 1;
+
+	if (pid != my_pid)
+		return 0;
+
+	if (cnt >= MAX_FILES)
+		return 0;
+	ret = bpf_d_path(path, paths_stat[cnt], MAX_PATH_LEN);
+
+	rets_stat[cnt] = ret;
+	cnt_stat++;
+	return 0;
+}
+
+SEC("fentry/filp_close")
+int BPF_PROG(prog_close, struct file *file, void *id)
+{
+	pid_t pid = bpf_get_current_pid_tgid() >> 32;
+	__u32 cnt = cnt_close;
+	int ret;
+
+	called_close = 1;
+
+	if (pid != my_pid)
+		return 0;
+
+	if (cnt >= MAX_FILES)
+		return 0;
+	ret = bpf_d_path(&file->f_path,
+			 paths_close[cnt], MAX_PATH_LEN);
+
+	rets_close[cnt] = ret;
+	cnt_close++;
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_ksyms_btf.c b/tools/testing/selftests/bpf/progs/test_ksyms_btf.c
new file mode 100644
index 000000000000..bb8ea9270f29
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_ksyms_btf.c
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Google */
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+
+__u64 out__runqueues_addr = -1;
+__u64 out__bpf_prog_active_addr = -1;
+
+__u32 out__rq_cpu = -1; /* percpu struct fields */
+int out__bpf_prog_active = -1; /* percpu int */
+
+__u32 out__this_rq_cpu = -1;
+int out__this_bpf_prog_active = -1;
+
+__u32 out__cpu_0_rq_cpu = -1; /* cpu_rq(0)->cpu */
+
+extern const struct rq runqueues __ksym; /* struct type global var. */
+extern const int bpf_prog_active __ksym; /* int type global var. */
+
+SEC("raw_tp/sys_enter")
+int handler(const void *ctx)
+{
+	struct rq *rq;
+	int *active;
+	__u32 cpu;
+
+	out__runqueues_addr = (__u64)&runqueues;
+	out__bpf_prog_active_addr = (__u64)&bpf_prog_active;
+
+	cpu = bpf_get_smp_processor_id();
+
+	/* test bpf_per_cpu_ptr() */
+	rq = (struct rq *)bpf_per_cpu_ptr(&runqueues, cpu);
+	if (rq)
+		out__rq_cpu = rq->cpu;
+	active = (int *)bpf_per_cpu_ptr(&bpf_prog_active, cpu);
+	if (active)
+		out__bpf_prog_active = *active;
+
+	rq = (struct rq *)bpf_per_cpu_ptr(&runqueues, 0);
+	if (rq) /* should always be valid, but we can't spare the check. */
+		out__cpu_0_rq_cpu = rq->cpu;
+
+	/* test bpf_this_cpu_ptr */
+	rq = (struct rq *)bpf_this_cpu_ptr(&runqueues);
+	out__this_rq_cpu = rq->cpu;
+	active = (int *)bpf_this_cpu_ptr(&bpf_prog_active);
+	out__this_bpf_prog_active = *active;
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c b/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c
index 28351936a438..b9e2753f4f91 100644
--- a/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c
+++ b/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c
@@ -17,9 +17,7 @@
 #include "test_iptunnel_common.h"
 #include <bpf/bpf_endian.h>
 
-int _version SEC("version") = 1;
-
-static __u32 rol32(__u32 word, unsigned int shift)
+static __always_inline __u32 rol32(__u32 word, unsigned int shift)
 {
 	return (word << shift) | (word >> ((-shift) & 31));
 }
@@ -52,7 +50,7 @@ static __u32 rol32(__u32 word, unsigned int shift)
 
 typedef unsigned int u32;
 
-static u32 jhash(const void *key, u32 length, u32 initval)
+static __noinline u32 jhash(const void *key, u32 length, u32 initval)
 {
 	u32 a, b, c;
 	const unsigned char *k = key;
@@ -88,7 +86,7 @@ static u32 jhash(const void *key, u32 length, u32 initval)
 	return c;
 }
 
-static u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
+static __noinline u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
 {
 	a += initval;
 	b += initval;
@@ -97,7 +95,7 @@ static u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
 	return c;
 }
 
-static u32 jhash_2words(u32 a, u32 b, u32 initval)
+static __noinline u32 jhash_2words(u32 a, u32 b, u32 initval)
 {
 	return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2));
 }
@@ -200,8 +198,7 @@ struct {
 	__type(value, struct ctl_value);
 } ctl_array SEC(".maps");
 
-static __u32 get_packet_hash(struct packet_description *pckt,
-			     bool ipv6)
+static __noinline __u32 get_packet_hash(struct packet_description *pckt, bool ipv6)
 {
 	if (ipv6)
 		return jhash_2words(jhash(pckt->srcv6, 16, MAX_VIPS),
@@ -210,10 +207,10 @@ static __u32 get_packet_hash(struct packet_description *pckt,
 		return jhash_2words(pckt->src, pckt->ports, CH_RINGS_SIZE);
 }
 
-static bool get_packet_dst(struct real_definition **real,
-			   struct packet_description *pckt,
-			   struct vip_meta *vip_info,
-			   bool is_ipv6)
+static __noinline bool get_packet_dst(struct real_definition **real,
+				      struct packet_description *pckt,
+				      struct vip_meta *vip_info,
+				      bool is_ipv6)
 {
 	__u32 hash = get_packet_hash(pckt, is_ipv6);
 	__u32 key = RING_SIZE * vip_info->vip_num + hash % RING_SIZE;
@@ -233,8 +230,8 @@ static bool get_packet_dst(struct real_definition **real,
 	return true;
 }
 
-static int parse_icmpv6(void *data, void *data_end, __u64 off,
-			struct packet_description *pckt)
+static __noinline int parse_icmpv6(void *data, void *data_end, __u64 off,
+				   struct packet_description *pckt)
 {
 	struct icmp6hdr *icmp_hdr;
 	struct ipv6hdr *ip6h;
@@ -255,8 +252,8 @@ static int parse_icmpv6(void *data, void *data_end, __u64 off,
 	return TC_ACT_UNSPEC;
 }
 
-static int parse_icmp(void *data, void *data_end, __u64 off,
-		      struct packet_description *pckt)
+static __noinline int parse_icmp(void *data, void *data_end, __u64 off,
+				 struct packet_description *pckt)
 {
 	struct icmphdr *icmp_hdr;
 	struct iphdr *iph;
@@ -280,8 +277,8 @@ static int parse_icmp(void *data, void *data_end, __u64 off,
 	return TC_ACT_UNSPEC;
 }
 
-static bool parse_udp(void *data, __u64 off, void *data_end,
-		      struct packet_description *pckt)
+static __noinline bool parse_udp(void *data, __u64 off, void *data_end,
+				 struct packet_description *pckt)
 {
 	struct udphdr *udp;
 	udp = data + off;
@@ -299,8 +296,8 @@ static bool parse_udp(void *data, __u64 off, void *data_end,
 	return true;
 }
 
-static bool parse_tcp(void *data, __u64 off, void *data_end,
-		      struct packet_description *pckt)
+static __noinline bool parse_tcp(void *data, __u64 off, void *data_end,
+				 struct packet_description *pckt)
 {
 	struct tcphdr *tcp;
 
@@ -321,8 +318,8 @@ static bool parse_tcp(void *data, __u64 off, void *data_end,
 	return true;
 }
 
-static int process_packet(void *data, __u64 off, void *data_end,
-			  bool is_ipv6, struct __sk_buff *skb)
+static __noinline int process_packet(void *data, __u64 off, void *data_end,
+				     bool is_ipv6, struct __sk_buff *skb)
 {
 	void *pkt_start = (void *)(long)skb->data;
 	struct packet_description pckt = {};
diff --git a/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c b/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c
new file mode 100644
index 000000000000..6077a025092c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c
@@ -0,0 +1,325 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include <stddef.h>
+#include <errno.h>
+#include <stdbool.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <linux/socket.h>
+#include <linux/bpf.h>
+#include <linux/types.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+#define BPF_PROG_TEST_TCP_HDR_OPTIONS
+#include "test_tcp_hdr_options.h"
+
+__u16 last_addr16_n = __bpf_htons(1);
+__u16 active_lport_n = 0;
+__u16 active_lport_h = 0;
+__u16 passive_lport_n = 0;
+__u16 passive_lport_h = 0;
+
+/* options received at passive side */
+unsigned int nr_pure_ack = 0;
+unsigned int nr_data = 0;
+unsigned int nr_syn = 0;
+unsigned int nr_fin = 0;
+
+/* Check the header received from the active side */
+static int __check_active_hdr_in(struct bpf_sock_ops *skops, bool check_syn)
+{
+	union {
+		struct tcphdr th;
+		struct ipv6hdr ip6;
+		struct tcp_exprm_opt exprm_opt;
+		struct tcp_opt reg_opt;
+		__u8 data[100]; /* IPv6 (40) + Max TCP hdr (60) */
+	} hdr = {};
+	__u64 load_flags = check_syn ? BPF_LOAD_HDR_OPT_TCP_SYN : 0;
+	struct tcphdr *pth;
+	int ret;
+
+	hdr.reg_opt.kind = 0xB9;
+
+	/* The option is 4 bytes long instead of 2 bytes */
+	ret = bpf_load_hdr_opt(skops, &hdr.reg_opt, 2, load_flags);
+	if (ret != -ENOSPC)
+		RET_CG_ERR(ret);
+
+	/* Test searching magic with regular kind */
+	hdr.reg_opt.len = 4;
+	ret = bpf_load_hdr_opt(skops, &hdr.reg_opt, sizeof(hdr.reg_opt),
+			       load_flags);
+	if (ret != -EINVAL)
+		RET_CG_ERR(ret);
+
+	hdr.reg_opt.len = 0;
+	ret = bpf_load_hdr_opt(skops, &hdr.reg_opt, sizeof(hdr.reg_opt),
+			       load_flags);
+	if (ret != 4 || hdr.reg_opt.len != 4 || hdr.reg_opt.kind != 0xB9 ||
+	    hdr.reg_opt.data[0] != 0xfa || hdr.reg_opt.data[1] != 0xce)
+		RET_CG_ERR(ret);
+
+	/* Test searching experimental option with invalid kind length */
+	hdr.exprm_opt.kind = TCPOPT_EXP;
+	hdr.exprm_opt.len = 5;
+	hdr.exprm_opt.magic = 0;
+	ret = bpf_load_hdr_opt(skops, &hdr.exprm_opt, sizeof(hdr.exprm_opt),
+			       load_flags);
+	if (ret != -EINVAL)
+		RET_CG_ERR(ret);
+
+	/* Test searching experimental option with 0 magic value */
+	hdr.exprm_opt.len = 4;
+	ret = bpf_load_hdr_opt(skops, &hdr.exprm_opt, sizeof(hdr.exprm_opt),
+			       load_flags);
+	if (ret != -ENOMSG)
+		RET_CG_ERR(ret);
+
+	hdr.exprm_opt.magic = __bpf_htons(0xeB9F);
+	ret = bpf_load_hdr_opt(skops, &hdr.exprm_opt, sizeof(hdr.exprm_opt),
+			       load_flags);
+	if (ret != 4 || hdr.exprm_opt.len != 4 ||
+	    hdr.exprm_opt.kind != TCPOPT_EXP ||
+	    hdr.exprm_opt.magic != __bpf_htons(0xeB9F))
+		RET_CG_ERR(ret);
+
+	if (!check_syn)
+		return CG_OK;
+
+	/* Test loading from skops->syn_skb if sk_state == TCP_NEW_SYN_RECV
+	 *
+	 * Test loading from tp->saved_syn for other sk_state.
+	 */
+	ret = bpf_getsockopt(skops, SOL_TCP, TCP_BPF_SYN_IP, &hdr.ip6,
+			     sizeof(hdr.ip6));
+	if (ret != -ENOSPC)
+		RET_CG_ERR(ret);
+
+	if (hdr.ip6.saddr.s6_addr16[7] != last_addr16_n ||
+	    hdr.ip6.daddr.s6_addr16[7] != last_addr16_n)
+		RET_CG_ERR(0);
+
+	ret = bpf_getsockopt(skops, SOL_TCP, TCP_BPF_SYN_IP, &hdr, sizeof(hdr));
+	if (ret < 0)
+		RET_CG_ERR(ret);
+
+	pth = (struct tcphdr *)(&hdr.ip6 + 1);
+	if (pth->dest != passive_lport_n || pth->source != active_lport_n)
+		RET_CG_ERR(0);
+
+	ret = bpf_getsockopt(skops, SOL_TCP, TCP_BPF_SYN, &hdr, sizeof(hdr));
+	if (ret < 0)
+		RET_CG_ERR(ret);
+
+	if (hdr.th.dest != passive_lport_n || hdr.th.source != active_lport_n)
+		RET_CG_ERR(0);
+
+	return CG_OK;
+}
+
+static int check_active_syn_in(struct bpf_sock_ops *skops)
+{
+	return __check_active_hdr_in(skops, true);
+}
+
+static int check_active_hdr_in(struct bpf_sock_ops *skops)
+{
+	struct tcphdr *th;
+
+	if (__check_active_hdr_in(skops, false) == CG_ERR)
+		return CG_ERR;
+
+	th = skops->skb_data;
+	if (th + 1 > skops->skb_data_end)
+		RET_CG_ERR(0);
+
+	if (tcp_hdrlen(th) < skops->skb_len)
+		nr_data++;
+
+	if (th->fin)
+		nr_fin++;
+
+	if (th->ack && !th->fin && tcp_hdrlen(th) == skops->skb_len)
+		nr_pure_ack++;
+
+	return CG_OK;
+}
+
+static int active_opt_len(struct bpf_sock_ops *skops)
+{
+	int err;
+
+	/* Reserve more than enough to allow the -EEXIST test in
+	 * the write_active_opt().
+	 */
+	err = bpf_reserve_hdr_opt(skops, 12, 0);
+	if (err)
+		RET_CG_ERR(err);
+
+	return CG_OK;
+}
+
+static int write_active_opt(struct bpf_sock_ops *skops)
+{
+	struct tcp_exprm_opt exprm_opt = {};
+	struct tcp_opt win_scale_opt = {};
+	struct tcp_opt reg_opt = {};
+	struct tcphdr *th;
+	int err, ret;
+
+	exprm_opt.kind = TCPOPT_EXP;
+	exprm_opt.len = 4;
+	exprm_opt.magic = __bpf_htons(0xeB9F);
+
+	reg_opt.kind = 0xB9;
+	reg_opt.len = 4;
+	reg_opt.data[0] = 0xfa;
+	reg_opt.data[1] = 0xce;
+
+	win_scale_opt.kind = TCPOPT_WINDOW;
+
+	err = bpf_store_hdr_opt(skops, &exprm_opt, sizeof(exprm_opt), 0);
+	if (err)
+		RET_CG_ERR(err);
+
+	/* Store the same exprm option */
+	err = bpf_store_hdr_opt(skops, &exprm_opt, sizeof(exprm_opt), 0);
+	if (err != -EEXIST)
+		RET_CG_ERR(err);
+
+	err = bpf_store_hdr_opt(skops, &reg_opt, sizeof(reg_opt), 0);
+	if (err)
+		RET_CG_ERR(err);
+	err = bpf_store_hdr_opt(skops, &reg_opt, sizeof(reg_opt), 0);
+	if (err != -EEXIST)
+		RET_CG_ERR(err);
+
+	/* Check the option has been written and can be searched */
+	ret = bpf_load_hdr_opt(skops, &exprm_opt, sizeof(exprm_opt), 0);
+	if (ret != 4 || exprm_opt.len != 4 || exprm_opt.kind != TCPOPT_EXP ||
+	    exprm_opt.magic != __bpf_htons(0xeB9F))
+		RET_CG_ERR(ret);
+
+	reg_opt.len = 0;
+	ret = bpf_load_hdr_opt(skops, &reg_opt, sizeof(reg_opt), 0);
+	if (ret != 4 || reg_opt.len != 4 || reg_opt.kind != 0xB9 ||
+	    reg_opt.data[0] != 0xfa || reg_opt.data[1] != 0xce)
+		RET_CG_ERR(ret);
+
+	th = skops->skb_data;
+	if (th + 1 > skops->skb_data_end)
+		RET_CG_ERR(0);
+
+	if (th->syn) {
+		active_lport_h = skops->local_port;
+		active_lport_n = th->source;
+
+		/* Search the win scale option written by kernel
+		 * in the SYN packet.
+		 */
+		ret = bpf_load_hdr_opt(skops, &win_scale_opt,
+				       sizeof(win_scale_opt), 0);
+		if (ret != 3 || win_scale_opt.len != 3 ||
+		    win_scale_opt.kind != TCPOPT_WINDOW)
+			RET_CG_ERR(ret);
+
+		/* Write the win scale option that kernel
+		 * has already written.
+		 */
+		err = bpf_store_hdr_opt(skops, &win_scale_opt,
+					sizeof(win_scale_opt), 0);
+		if (err != -EEXIST)
+			RET_CG_ERR(err);
+	}
+
+	return CG_OK;
+}
+
+static int handle_hdr_opt_len(struct bpf_sock_ops *skops)
+{
+	__u8 tcp_flags = skops_tcp_flags(skops);
+
+	if ((tcp_flags & TCPHDR_SYNACK) == TCPHDR_SYNACK)
+		/* Check the SYN from bpf_sock_ops_kern->syn_skb */
+		return check_active_syn_in(skops);
+
+	/* Passive side should have cleared the write hdr cb by now */
+	if (skops->local_port == passive_lport_h)
+		RET_CG_ERR(0);
+
+	return active_opt_len(skops);
+}
+
+static int handle_write_hdr_opt(struct bpf_sock_ops *skops)
+{
+	if (skops->local_port == passive_lport_h)
+		RET_CG_ERR(0);
+
+	return write_active_opt(skops);
+}
+
+static int handle_parse_hdr(struct bpf_sock_ops *skops)
+{
+	/* Passive side is not writing any non-standard/unknown
+	 * option, so the active side should never be called.
+	 */
+	if (skops->local_port == active_lport_h)
+		RET_CG_ERR(0);
+
+	return check_active_hdr_in(skops);
+}
+
+static int handle_passive_estab(struct bpf_sock_ops *skops)
+{
+	int err;
+
+	/* No more write hdr cb */
+	bpf_sock_ops_cb_flags_set(skops,
+				  skops->bpf_sock_ops_cb_flags &
+				  ~BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG);
+
+	/* Recheck the SYN but check the tp->saved_syn this time */
+	err = check_active_syn_in(skops);
+	if (err == CG_ERR)
+		return err;
+
+	nr_syn++;
+
+	/* The ack has header option written by the active side also */
+	return check_active_hdr_in(skops);
+}
+
+SEC("sockops/misc_estab")
+int misc_estab(struct bpf_sock_ops *skops)
+{
+	int true_val = 1;
+
+	switch (skops->op) {
+	case BPF_SOCK_OPS_TCP_LISTEN_CB:
+		passive_lport_h = skops->local_port;
+		passive_lport_n = __bpf_htons(passive_lport_h);
+		bpf_setsockopt(skops, SOL_TCP, TCP_SAVE_SYN,
+			       &true_val, sizeof(true_val));
+		set_hdr_cb_flags(skops, 0);
+		break;
+	case BPF_SOCK_OPS_TCP_CONNECT_CB:
+		set_hdr_cb_flags(skops, 0);
+		break;
+	case BPF_SOCK_OPS_PARSE_HDR_OPT_CB:
+		return handle_parse_hdr(skops);
+	case BPF_SOCK_OPS_HDR_OPT_LEN_CB:
+		return handle_hdr_opt_len(skops);
+	case BPF_SOCK_OPS_WRITE_HDR_OPT_CB:
+		return handle_write_hdr_opt(skops);
+	case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
+		return handle_passive_estab(skops);
+	}
+
+	return CG_OK;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_overhead.c b/tools/testing/selftests/bpf/progs/test_overhead.c
index 42403d088abc..abb7344b531f 100644
--- a/tools/testing/selftests/bpf/progs/test_overhead.c
+++ b/tools/testing/selftests/bpf/progs/test_overhead.c
@@ -39,10 +39,4 @@ int BPF_PROG(prog5, struct task_struct *tsk, const char *buf, bool exec)
 	return 0;
 }
 
-SEC("fmod_ret/__set_task_comm")
-int BPF_PROG(prog6, struct task_struct *tsk, const char *buf, bool exec)
-{
-	return !tsk;
-}
-
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_pe_preserve_elems.c b/tools/testing/selftests/bpf/progs/test_pe_preserve_elems.c
new file mode 100644
index 000000000000..fb22de7c365d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_pe_preserve_elems.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+	__uint(max_entries, 1);
+	__uint(key_size, sizeof(int));
+	__uint(value_size, sizeof(int));
+} array_1 SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+	__uint(max_entries, 1);
+	__uint(key_size, sizeof(int));
+	__uint(value_size, sizeof(int));
+	__uint(map_flags, BPF_F_PRESERVE_ELEMS);
+} array_2 SEC(".maps");
+
+SEC("raw_tp/sched_switch")
+int BPF_PROG(read_array_1)
+{
+	struct bpf_perf_event_value val;
+
+	return bpf_perf_event_read_value(&array_1, 0, &val, sizeof(val));
+}
+
+SEC("raw_tp/task_rename")
+int BPF_PROG(read_array_2)
+{
+	struct bpf_perf_event_value val;
+
+	return bpf_perf_event_read_value(&array_2, 0, &val, sizeof(val));
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_pkt_access.c b/tools/testing/selftests/bpf/progs/test_pkt_access.c
index e72eba4a93d2..852051064507 100644
--- a/tools/testing/selftests/bpf/progs/test_pkt_access.c
+++ b/tools/testing/selftests/bpf/progs/test_pkt_access.c
@@ -79,6 +79,24 @@ int get_skb_ifindex(int val, struct __sk_buff *skb, int var)
 	return skb->ifindex * val * var;
 }
 
+__attribute__ ((noinline))
+int test_pkt_write_access_subprog(struct __sk_buff *skb, __u32 off)
+{
+	void *data = (void *)(long)skb->data;
+	void *data_end = (void *)(long)skb->data_end;
+	struct tcphdr *tcp = NULL;
+
+	if (off > sizeof(struct ethhdr) + sizeof(struct ipv6hdr))
+		return -1;
+
+	tcp = data + off;
+	if (tcp + 1 > data_end)
+		return -1;
+	/* make modification to the packet data */
+	tcp->check++;
+	return 0;
+}
+
 SEC("classifier/test_pkt_access")
 int test_pkt_access(struct __sk_buff *skb)
 {
@@ -117,6 +135,8 @@ int test_pkt_access(struct __sk_buff *skb)
 	if (test_pkt_access_subprog3(3, skb) != skb->len * 3 * skb->ifindex)
 		return TC_ACT_SHOT;
 	if (tcp) {
+		if (test_pkt_write_access_subprog(skb, (void *)tcp - data))
+			return TC_ACT_SHOT;
 		if (((void *)(tcp) + 20) > data_end || proto != 6)
 			return TC_ACT_SHOT;
 		barrier(); /* to force ordering of checks */
diff --git a/tools/testing/selftests/bpf/progs/test_raw_tp_test_run.c b/tools/testing/selftests/bpf/progs/test_raw_tp_test_run.c
new file mode 100644
index 000000000000..4c63cc87b9d0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_raw_tp_test_run.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+__u32 count = 0;
+__u32 on_cpu = 0xffffffff;
+
+SEC("raw_tp/task_rename")
+int BPF_PROG(rename, struct task_struct *task, char *comm)
+{
+
+	count++;
+	if ((__u64) task == 0x1234ULL && (__u64) comm == 0x5678ULL) {
+		on_cpu = bpf_get_smp_processor_id();
+		return (long)task + (long)comm;
+	}
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_sk_lookup.c b/tools/testing/selftests/bpf/progs/test_sk_lookup.c
index bbf8296f4d66..1032b292af5b 100644
--- a/tools/testing/selftests/bpf/progs/test_sk_lookup.c
+++ b/tools/testing/selftests/bpf/progs/test_sk_lookup.c
@@ -19,6 +19,17 @@
 #define IP6(aaaa, bbbb, cccc, dddd)			\
 	{ bpf_htonl(aaaa), bpf_htonl(bbbb), bpf_htonl(cccc), bpf_htonl(dddd) }
 
+/* Macros for least-significant byte and word accesses. */
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define LSE_INDEX(index, size) (index)
+#else
+#define LSE_INDEX(index, size) ((size) - (index) - 1)
+#endif
+#define LSB(value, index)				\
+	(((__u8 *)&(value))[LSE_INDEX((index), sizeof(value))])
+#define LSW(value, index)				\
+	(((__u16 *)&(value))[LSE_INDEX((index), sizeof(value) / 2)])
+
 #define MAX_SOCKS 32
 
 struct {
@@ -369,171 +380,146 @@ int ctx_narrow_access(struct bpf_sk_lookup *ctx)
 {
 	struct bpf_sock *sk;
 	int err, family;
-	__u16 *half;
-	__u8 *byte;
 	bool v4;
 
 	v4 = (ctx->family == AF_INET);
 
 	/* Narrow loads from family field */
-	byte = (__u8 *)&ctx->family;
-	half = (__u16 *)&ctx->family;
-	if (byte[0] != (v4 ? AF_INET : AF_INET6) ||
-	    byte[1] != 0 || byte[2] != 0 || byte[3] != 0)
+	if (LSB(ctx->family, 0) != (v4 ? AF_INET : AF_INET6) ||
+	    LSB(ctx->family, 1) != 0 || LSB(ctx->family, 2) != 0 || LSB(ctx->family, 3) != 0)
 		return SK_DROP;
-	if (half[0] != (v4 ? AF_INET : AF_INET6))
+	if (LSW(ctx->family, 0) != (v4 ? AF_INET : AF_INET6))
 		return SK_DROP;
 
-	byte = (__u8 *)&ctx->protocol;
-	if (byte[0] != IPPROTO_TCP ||
-	    byte[1] != 0 || byte[2] != 0 || byte[3] != 0)
+	/* Narrow loads from protocol field */
+	if (LSB(ctx->protocol, 0) != IPPROTO_TCP ||
+	    LSB(ctx->protocol, 1) != 0 || LSB(ctx->protocol, 2) != 0 || LSB(ctx->protocol, 3) != 0)
 		return SK_DROP;
-	half = (__u16 *)&ctx->protocol;
-	if (half[0] != IPPROTO_TCP)
+	if (LSW(ctx->protocol, 0) != IPPROTO_TCP)
 		return SK_DROP;
 
 	/* Narrow loads from remote_port field. Expect non-0 value. */
-	byte = (__u8 *)&ctx->remote_port;
-	if (byte[0] == 0 && byte[1] == 0 && byte[2] == 0 && byte[3] == 0)
+	if (LSB(ctx->remote_port, 0) == 0 && LSB(ctx->remote_port, 1) == 0 &&
+	    LSB(ctx->remote_port, 2) == 0 && LSB(ctx->remote_port, 3) == 0)
 		return SK_DROP;
-	half = (__u16 *)&ctx->remote_port;
-	if (half[0] == 0)
+	if (LSW(ctx->remote_port, 0) == 0)
 		return SK_DROP;
 
 	/* Narrow loads from local_port field. Expect DST_PORT. */
-	byte = (__u8 *)&ctx->local_port;
-	if (byte[0] != ((DST_PORT >> 0) & 0xff) ||
-	    byte[1] != ((DST_PORT >> 8) & 0xff) ||
-	    byte[2] != 0 || byte[3] != 0)
+	if (LSB(ctx->local_port, 0) != ((DST_PORT >> 0) & 0xff) ||
+	    LSB(ctx->local_port, 1) != ((DST_PORT >> 8) & 0xff) ||
+	    LSB(ctx->local_port, 2) != 0 || LSB(ctx->local_port, 3) != 0)
 		return SK_DROP;
-	half = (__u16 *)&ctx->local_port;
-	if (half[0] != DST_PORT)
+	if (LSW(ctx->local_port, 0) != DST_PORT)
 		return SK_DROP;
 
 	/* Narrow loads from IPv4 fields */
 	if (v4) {
 		/* Expect non-0.0.0.0 in remote_ip4 */
-		byte = (__u8 *)&ctx->remote_ip4;
-		if (byte[0] == 0 && byte[1] == 0 &&
-		    byte[2] == 0 && byte[3] == 0)
+		if (LSB(ctx->remote_ip4, 0) == 0 && LSB(ctx->remote_ip4, 1) == 0 &&
+		    LSB(ctx->remote_ip4, 2) == 0 && LSB(ctx->remote_ip4, 3) == 0)
 			return SK_DROP;
-		half = (__u16 *)&ctx->remote_ip4;
-		if (half[0] == 0 && half[1] == 0)
+		if (LSW(ctx->remote_ip4, 0) == 0 && LSW(ctx->remote_ip4, 1) == 0)
 			return SK_DROP;
 
 		/* Expect DST_IP4 in local_ip4 */
-		byte = (__u8 *)&ctx->local_ip4;
-		if (byte[0] != ((DST_IP4 >>  0) & 0xff) ||
-		    byte[1] != ((DST_IP4 >>  8) & 0xff) ||
-		    byte[2] != ((DST_IP4 >> 16) & 0xff) ||
-		    byte[3] != ((DST_IP4 >> 24) & 0xff))
+		if (LSB(ctx->local_ip4, 0) != ((DST_IP4 >> 0) & 0xff) ||
+		    LSB(ctx->local_ip4, 1) != ((DST_IP4 >> 8) & 0xff) ||
+		    LSB(ctx->local_ip4, 2) != ((DST_IP4 >> 16) & 0xff) ||
+		    LSB(ctx->local_ip4, 3) != ((DST_IP4 >> 24) & 0xff))
 			return SK_DROP;
-		half = (__u16 *)&ctx->local_ip4;
-		if (half[0] != ((DST_IP4 >>  0) & 0xffff) ||
-		    half[1] != ((DST_IP4 >> 16) & 0xffff))
+		if (LSW(ctx->local_ip4, 0) != ((DST_IP4 >> 0) & 0xffff) ||
+		    LSW(ctx->local_ip4, 1) != ((DST_IP4 >> 16) & 0xffff))
 			return SK_DROP;
 	} else {
 		/* Expect 0.0.0.0 IPs when family != AF_INET */
-		byte = (__u8 *)&ctx->remote_ip4;
-		if (byte[0] != 0 || byte[1] != 0 &&
-		    byte[2] != 0 || byte[3] != 0)
+		if (LSB(ctx->remote_ip4, 0) != 0 || LSB(ctx->remote_ip4, 1) != 0 ||
+		    LSB(ctx->remote_ip4, 2) != 0 || LSB(ctx->remote_ip4, 3) != 0)
 			return SK_DROP;
-		half = (__u16 *)&ctx->remote_ip4;
-		if (half[0] != 0 || half[1] != 0)
+		if (LSW(ctx->remote_ip4, 0) != 0 || LSW(ctx->remote_ip4, 1) != 0)
 			return SK_DROP;
 
-		byte = (__u8 *)&ctx->local_ip4;
-		if (byte[0] != 0 || byte[1] != 0 &&
-		    byte[2] != 0 || byte[3] != 0)
+		if (LSB(ctx->local_ip4, 0) != 0 || LSB(ctx->local_ip4, 1) != 0 ||
+		    LSB(ctx->local_ip4, 2) != 0 || LSB(ctx->local_ip4, 3) != 0)
 			return SK_DROP;
-		half = (__u16 *)&ctx->local_ip4;
-		if (half[0] != 0 || half[1] != 0)
+		if (LSW(ctx->local_ip4, 0) != 0 || LSW(ctx->local_ip4, 1) != 0)
 			return SK_DROP;
 	}
 
 	/* Narrow loads from IPv6 fields */
 	if (!v4) {
-		/* Expenct non-:: IP in remote_ip6 */
-		byte = (__u8 *)&ctx->remote_ip6;
-		if (byte[0] == 0 && byte[1] == 0 &&
-		    byte[2] == 0 && byte[3] == 0 &&
-		    byte[4] == 0 && byte[5] == 0 &&
-		    byte[6] == 0 && byte[7] == 0 &&
-		    byte[8] == 0 && byte[9] == 0 &&
-		    byte[10] == 0 && byte[11] == 0 &&
-		    byte[12] == 0 && byte[13] == 0 &&
-		    byte[14] == 0 && byte[15] == 0)
+		/* Expect non-:: IP in remote_ip6 */
+		if (LSB(ctx->remote_ip6[0], 0) == 0 && LSB(ctx->remote_ip6[0], 1) == 0 &&
+		    LSB(ctx->remote_ip6[0], 2) == 0 && LSB(ctx->remote_ip6[0], 3) == 0 &&
+		    LSB(ctx->remote_ip6[1], 0) == 0 && LSB(ctx->remote_ip6[1], 1) == 0 &&
+		    LSB(ctx->remote_ip6[1], 2) == 0 && LSB(ctx->remote_ip6[1], 3) == 0 &&
+		    LSB(ctx->remote_ip6[2], 0) == 0 && LSB(ctx->remote_ip6[2], 1) == 0 &&
+		    LSB(ctx->remote_ip6[2], 2) == 0 && LSB(ctx->remote_ip6[2], 3) == 0 &&
+		    LSB(ctx->remote_ip6[3], 0) == 0 && LSB(ctx->remote_ip6[3], 1) == 0 &&
+		    LSB(ctx->remote_ip6[3], 2) == 0 && LSB(ctx->remote_ip6[3], 3) == 0)
 			return SK_DROP;
-		half = (__u16 *)&ctx->remote_ip6;
-		if (half[0] == 0 && half[1] == 0 &&
-		    half[2] == 0 && half[3] == 0 &&
-		    half[4] == 0 && half[5] == 0 &&
-		    half[6] == 0 && half[7] == 0)
+		if (LSW(ctx->remote_ip6[0], 0) == 0 && LSW(ctx->remote_ip6[0], 1) == 0 &&
+		    LSW(ctx->remote_ip6[1], 0) == 0 && LSW(ctx->remote_ip6[1], 1) == 0 &&
+		    LSW(ctx->remote_ip6[2], 0) == 0 && LSW(ctx->remote_ip6[2], 1) == 0 &&
+		    LSW(ctx->remote_ip6[3], 0) == 0 && LSW(ctx->remote_ip6[3], 1) == 0)
 			return SK_DROP;
-
 		/* Expect DST_IP6 in local_ip6 */
-		byte = (__u8 *)&ctx->local_ip6;
-		if (byte[0] != ((DST_IP6[0] >>  0) & 0xff) ||
-		    byte[1] != ((DST_IP6[0] >>  8) & 0xff) ||
-		    byte[2] != ((DST_IP6[0] >> 16) & 0xff) ||
-		    byte[3] != ((DST_IP6[0] >> 24) & 0xff) ||
-		    byte[4] != ((DST_IP6[1] >>  0) & 0xff) ||
-		    byte[5] != ((DST_IP6[1] >>  8) & 0xff) ||
-		    byte[6] != ((DST_IP6[1] >> 16) & 0xff) ||
-		    byte[7] != ((DST_IP6[1] >> 24) & 0xff) ||
-		    byte[8] != ((DST_IP6[2] >>  0) & 0xff) ||
-		    byte[9] != ((DST_IP6[2] >>  8) & 0xff) ||
-		    byte[10] != ((DST_IP6[2] >> 16) & 0xff) ||
-		    byte[11] != ((DST_IP6[2] >> 24) & 0xff) ||
-		    byte[12] != ((DST_IP6[3] >>  0) & 0xff) ||
-		    byte[13] != ((DST_IP6[3] >>  8) & 0xff) ||
-		    byte[14] != ((DST_IP6[3] >> 16) & 0xff) ||
-		    byte[15] != ((DST_IP6[3] >> 24) & 0xff))
+		if (LSB(ctx->local_ip6[0], 0) != ((DST_IP6[0] >> 0) & 0xff) ||
+		    LSB(ctx->local_ip6[0], 1) != ((DST_IP6[0] >> 8) & 0xff) ||
+		    LSB(ctx->local_ip6[0], 2) != ((DST_IP6[0] >> 16) & 0xff) ||
+		    LSB(ctx->local_ip6[0], 3) != ((DST_IP6[0] >> 24) & 0xff) ||
+		    LSB(ctx->local_ip6[1], 0) != ((DST_IP6[1] >> 0) & 0xff) ||
+		    LSB(ctx->local_ip6[1], 1) != ((DST_IP6[1] >> 8) & 0xff) ||
+		    LSB(ctx->local_ip6[1], 2) != ((DST_IP6[1] >> 16) & 0xff) ||
+		    LSB(ctx->local_ip6[1], 3) != ((DST_IP6[1] >> 24) & 0xff) ||
+		    LSB(ctx->local_ip6[2], 0) != ((DST_IP6[2] >> 0) & 0xff) ||
+		    LSB(ctx->local_ip6[2], 1) != ((DST_IP6[2] >> 8) & 0xff) ||
+		    LSB(ctx->local_ip6[2], 2) != ((DST_IP6[2] >> 16) & 0xff) ||
+		    LSB(ctx->local_ip6[2], 3) != ((DST_IP6[2] >> 24) & 0xff) ||
+		    LSB(ctx->local_ip6[3], 0) != ((DST_IP6[3] >> 0) & 0xff) ||
+		    LSB(ctx->local_ip6[3], 1) != ((DST_IP6[3] >> 8) & 0xff) ||
+		    LSB(ctx->local_ip6[3], 2) != ((DST_IP6[3] >> 16) & 0xff) ||
+		    LSB(ctx->local_ip6[3], 3) != ((DST_IP6[3] >> 24) & 0xff))
 			return SK_DROP;
-		half = (__u16 *)&ctx->local_ip6;
-		if (half[0] != ((DST_IP6[0] >>  0) & 0xffff) ||
-		    half[1] != ((DST_IP6[0] >> 16) & 0xffff) ||
-		    half[2] != ((DST_IP6[1] >>  0) & 0xffff) ||
-		    half[3] != ((DST_IP6[1] >> 16) & 0xffff) ||
-		    half[4] != ((DST_IP6[2] >>  0) & 0xffff) ||
-		    half[5] != ((DST_IP6[2] >> 16) & 0xffff) ||
-		    half[6] != ((DST_IP6[3] >>  0) & 0xffff) ||
-		    half[7] != ((DST_IP6[3] >> 16) & 0xffff))
+		if (LSW(ctx->local_ip6[0], 0) != ((DST_IP6[0] >> 0) & 0xffff) ||
+		    LSW(ctx->local_ip6[0], 1) != ((DST_IP6[0] >> 16) & 0xffff) ||
+		    LSW(ctx->local_ip6[1], 0) != ((DST_IP6[1] >> 0) & 0xffff) ||
+		    LSW(ctx->local_ip6[1], 1) != ((DST_IP6[1] >> 16) & 0xffff) ||
+		    LSW(ctx->local_ip6[2], 0) != ((DST_IP6[2] >> 0) & 0xffff) ||
+		    LSW(ctx->local_ip6[2], 1) != ((DST_IP6[2] >> 16) & 0xffff) ||
+		    LSW(ctx->local_ip6[3], 0) != ((DST_IP6[3] >> 0) & 0xffff) ||
+		    LSW(ctx->local_ip6[3], 1) != ((DST_IP6[3] >> 16) & 0xffff))
 			return SK_DROP;
 	} else {
 		/* Expect :: IPs when family != AF_INET6 */
-		byte = (__u8 *)&ctx->remote_ip6;
-		if (byte[0] != 0 || byte[1] != 0 ||
-		    byte[2] != 0 || byte[3] != 0 ||
-		    byte[4] != 0 || byte[5] != 0 ||
-		    byte[6] != 0 || byte[7] != 0 ||
-		    byte[8] != 0 || byte[9] != 0 ||
-		    byte[10] != 0 || byte[11] != 0 ||
-		    byte[12] != 0 || byte[13] != 0 ||
-		    byte[14] != 0 || byte[15] != 0)
+		if (LSB(ctx->remote_ip6[0], 0) != 0 || LSB(ctx->remote_ip6[0], 1) != 0 ||
+		    LSB(ctx->remote_ip6[0], 2) != 0 || LSB(ctx->remote_ip6[0], 3) != 0 ||
+		    LSB(ctx->remote_ip6[1], 0) != 0 || LSB(ctx->remote_ip6[1], 1) != 0 ||
+		    LSB(ctx->remote_ip6[1], 2) != 0 || LSB(ctx->remote_ip6[1], 3) != 0 ||
+		    LSB(ctx->remote_ip6[2], 0) != 0 || LSB(ctx->remote_ip6[2], 1) != 0 ||
+		    LSB(ctx->remote_ip6[2], 2) != 0 || LSB(ctx->remote_ip6[2], 3) != 0 ||
+		    LSB(ctx->remote_ip6[3], 0) != 0 || LSB(ctx->remote_ip6[3], 1) != 0 ||
+		    LSB(ctx->remote_ip6[3], 2) != 0 || LSB(ctx->remote_ip6[3], 3) != 0)
 			return SK_DROP;
-		half = (__u16 *)&ctx->remote_ip6;
-		if (half[0] != 0 || half[1] != 0 ||
-		    half[2] != 0 || half[3] != 0 ||
-		    half[4] != 0 || half[5] != 0 ||
-		    half[6] != 0 || half[7] != 0)
+		if (LSW(ctx->remote_ip6[0], 0) != 0 || LSW(ctx->remote_ip6[0], 1) != 0 ||
+		    LSW(ctx->remote_ip6[1], 0) != 0 || LSW(ctx->remote_ip6[1], 1) != 0 ||
+		    LSW(ctx->remote_ip6[2], 0) != 0 || LSW(ctx->remote_ip6[2], 1) != 0 ||
+		    LSW(ctx->remote_ip6[3], 0) != 0 || LSW(ctx->remote_ip6[3], 1) != 0)
 			return SK_DROP;
 
-		byte = (__u8 *)&ctx->local_ip6;
-		if (byte[0] != 0 || byte[1] != 0 ||
-		    byte[2] != 0 || byte[3] != 0 ||
-		    byte[4] != 0 || byte[5] != 0 ||
-		    byte[6] != 0 || byte[7] != 0 ||
-		    byte[8] != 0 || byte[9] != 0 ||
-		    byte[10] != 0 || byte[11] != 0 ||
-		    byte[12] != 0 || byte[13] != 0 ||
-		    byte[14] != 0 || byte[15] != 0)
+		if (LSB(ctx->local_ip6[0], 0) != 0 || LSB(ctx->local_ip6[0], 1) != 0 ||
+		    LSB(ctx->local_ip6[0], 2) != 0 || LSB(ctx->local_ip6[0], 3) != 0 ||
+		    LSB(ctx->local_ip6[1], 0) != 0 || LSB(ctx->local_ip6[1], 1) != 0 ||
+		    LSB(ctx->local_ip6[1], 2) != 0 || LSB(ctx->local_ip6[1], 3) != 0 ||
+		    LSB(ctx->local_ip6[2], 0) != 0 || LSB(ctx->local_ip6[2], 1) != 0 ||
+		    LSB(ctx->local_ip6[2], 2) != 0 || LSB(ctx->local_ip6[2], 3) != 0 ||
+		    LSB(ctx->local_ip6[3], 0) != 0 || LSB(ctx->local_ip6[3], 1) != 0 ||
+		    LSB(ctx->local_ip6[3], 2) != 0 || LSB(ctx->local_ip6[3], 3) != 0)
 			return SK_DROP;
-		half = (__u16 *)&ctx->local_ip6;
-		if (half[0] != 0 || half[1] != 0 ||
-		    half[2] != 0 || half[3] != 0 ||
-		    half[4] != 0 || half[5] != 0 ||
-		    half[6] != 0 || half[7] != 0)
+		if (LSW(ctx->remote_ip6[0], 0) != 0 || LSW(ctx->remote_ip6[0], 1) != 0 ||
+		    LSW(ctx->remote_ip6[1], 0) != 0 || LSW(ctx->remote_ip6[1], 1) != 0 ||
+		    LSW(ctx->remote_ip6[2], 0) != 0 || LSW(ctx->remote_ip6[2], 1) != 0 ||
+		    LSW(ctx->remote_ip6[3], 0) != 0 || LSW(ctx->remote_ip6[3], 1) != 0)
 			return SK_DROP;
 	}
 
diff --git a/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c b/tools/testing/selftests/bpf/progs/test_sock_fields.c
index 9bcaa37f476a..81b57b9aaaea 100644
--- a/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_sock_fields.c
@@ -7,19 +7,7 @@
 
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_endian.h>
-
-enum bpf_addr_array_idx {
-	ADDR_SRV_IDX,
-	ADDR_CLI_IDX,
-	__NR_BPF_ADDR_ARRAY_IDX,
-};
-
-enum bpf_result_array_idx {
-	EGRESS_SRV_IDX,
-	EGRESS_CLI_IDX,
-	INGRESS_LISTEN_IDX,
-	__NR_BPF_RESULT_ARRAY_IDX,
-};
+#include "bpf_tcp_helpers.h"
 
 enum bpf_linum_array_idx {
 	EGRESS_LINUM_IDX,
@@ -29,27 +17,6 @@ enum bpf_linum_array_idx {
 
 struct {
 	__uint(type, BPF_MAP_TYPE_ARRAY);
-	__uint(max_entries, __NR_BPF_ADDR_ARRAY_IDX);
-	__type(key, __u32);
-	__type(value, struct sockaddr_in6);
-} addr_map SEC(".maps");
-
-struct {
-	__uint(type, BPF_MAP_TYPE_ARRAY);
-	__uint(max_entries, __NR_BPF_RESULT_ARRAY_IDX);
-	__type(key, __u32);
-	__type(value, struct bpf_sock);
-} sock_result_map SEC(".maps");
-
-struct {
-	__uint(type, BPF_MAP_TYPE_ARRAY);
-	__uint(max_entries, __NR_BPF_RESULT_ARRAY_IDX);
-	__type(key, __u32);
-	__type(value, struct bpf_tcp_sock);
-} tcp_sock_result_map SEC(".maps");
-
-struct {
-	__uint(type, BPF_MAP_TYPE_ARRAY);
 	__uint(max_entries, __NR_BPF_LINUM_ARRAY_IDX);
 	__type(key, __u32);
 	__type(value, __u32);
@@ -74,6 +41,17 @@ struct {
 	__type(value, struct bpf_spinlock_cnt);
 } sk_pkt_out_cnt10 SEC(".maps");
 
+struct bpf_tcp_sock listen_tp = {};
+struct sockaddr_in6 srv_sa6 = {};
+struct bpf_tcp_sock cli_tp = {};
+struct bpf_tcp_sock srv_tp = {};
+struct bpf_sock listen_sk = {};
+struct bpf_sock srv_sk = {};
+struct bpf_sock cli_sk = {};
+__u64 parent_cg_id = 0;
+__u64 child_cg_id = 0;
+__u64 lsndtime = 0;
+
 static bool is_loopback6(__u32 *a6)
 {
 	return !a6[0] && !a6[1] && !a6[2] && a6[3] == bpf_htonl(1);
@@ -130,62 +108,86 @@ static void tpcpy(struct bpf_tcp_sock *dst,
 	dst->bytes_acked = src->bytes_acked;
 }
 
-#define RETURN {						\
+/* Always return CG_OK so that no pkt will be filtered out */
+#define CG_OK 1
+
+#define RET_LOG() ({						\
 	linum = __LINE__;					\
-	bpf_map_update_elem(&linum_map, &linum_idx, &linum, 0);	\
-	return 1;						\
-}
+	bpf_map_update_elem(&linum_map, &linum_idx, &linum, BPF_NOEXIST);	\
+	return CG_OK;						\
+})
 
 SEC("cgroup_skb/egress")
 int egress_read_sock_fields(struct __sk_buff *skb)
 {
 	struct bpf_spinlock_cnt cli_cnt_init = { .lock = 0, .cnt = 0xeB9F };
-	__u32 srv_idx = ADDR_SRV_IDX, cli_idx = ADDR_CLI_IDX, result_idx;
 	struct bpf_spinlock_cnt *pkt_out_cnt, *pkt_out_cnt10;
-	struct sockaddr_in6 *srv_sa6, *cli_sa6;
 	struct bpf_tcp_sock *tp, *tp_ret;
 	struct bpf_sock *sk, *sk_ret;
 	__u32 linum, linum_idx;
+	struct tcp_sock *ktp;
 
 	linum_idx = EGRESS_LINUM_IDX;
 
 	sk = skb->sk;
-	if (!sk || sk->state == 10)
-		RETURN;
+	if (!sk)
+		RET_LOG();
 
+	/* Not the testing egress traffic or
+	 * TCP_LISTEN (10) socket will be copied at the ingress side.
+	 */
+	if (sk->family != AF_INET6 || !is_loopback6(sk->src_ip6) ||
+	    sk->state == 10)
+		return CG_OK;
+
+	if (sk->src_port == bpf_ntohs(srv_sa6.sin6_port)) {
+		/* Server socket */
+		sk_ret = &srv_sk;
+		tp_ret = &srv_tp;
+	} else if (sk->dst_port == srv_sa6.sin6_port) {
+		/* Client socket */
+		sk_ret = &cli_sk;
+		tp_ret = &cli_tp;
+	} else {
+		/* Not the testing egress traffic */
+		return CG_OK;
+	}
+
+	/* It must be a fullsock for cgroup_skb/egress prog */
 	sk = bpf_sk_fullsock(sk);
-	if (!sk || sk->family != AF_INET6 || sk->protocol != IPPROTO_TCP ||
-	    !is_loopback6(sk->src_ip6))
-		RETURN;
+	if (!sk)
+		RET_LOG();
+
+	/* Not the testing egress traffic */
+	if (sk->protocol != IPPROTO_TCP)
+		return CG_OK;
 
 	tp = bpf_tcp_sock(sk);
 	if (!tp)
-		RETURN;
+		RET_LOG();
 
-	srv_sa6 = bpf_map_lookup_elem(&addr_map, &srv_idx);
-	cli_sa6 = bpf_map_lookup_elem(&addr_map, &cli_idx);
-	if (!srv_sa6 || !cli_sa6)
-		RETURN;
+	skcpy(sk_ret, sk);
+	tpcpy(tp_ret, tp);
 
-	if (sk->src_port == bpf_ntohs(srv_sa6->sin6_port))
-		result_idx = EGRESS_SRV_IDX;
-	else if (sk->src_port == bpf_ntohs(cli_sa6->sin6_port))
-		result_idx = EGRESS_CLI_IDX;
-	else
-		RETURN;
+	if (sk_ret == &srv_sk) {
+		ktp = bpf_skc_to_tcp_sock(sk);
 
-	sk_ret = bpf_map_lookup_elem(&sock_result_map, &result_idx);
-	tp_ret = bpf_map_lookup_elem(&tcp_sock_result_map, &result_idx);
-	if (!sk_ret || !tp_ret)
-		RETURN;
+		if (!ktp)
+			RET_LOG();
 
-	skcpy(sk_ret, sk);
-	tpcpy(tp_ret, tp);
+		lsndtime = ktp->lsndtime;
+
+		child_cg_id = bpf_sk_cgroup_id(ktp);
+		if (!child_cg_id)
+			RET_LOG();
+
+		parent_cg_id = bpf_sk_ancestor_cgroup_id(ktp, 2);
+		if (!parent_cg_id)
+			RET_LOG();
 
-	if (result_idx == EGRESS_SRV_IDX) {
 		/* The userspace has created it for srv sk */
-		pkt_out_cnt = bpf_sk_storage_get(&sk_pkt_out_cnt, sk, 0, 0);
-		pkt_out_cnt10 = bpf_sk_storage_get(&sk_pkt_out_cnt10, sk,
+		pkt_out_cnt = bpf_sk_storage_get(&sk_pkt_out_cnt, ktp, 0, 0);
+		pkt_out_cnt10 = bpf_sk_storage_get(&sk_pkt_out_cnt10, ktp,
 						   0, 0);
 	} else {
 		pkt_out_cnt = bpf_sk_storage_get(&sk_pkt_out_cnt, sk,
@@ -197,7 +199,7 @@ int egress_read_sock_fields(struct __sk_buff *skb)
 	}
 
 	if (!pkt_out_cnt || !pkt_out_cnt10)
-		RETURN;
+		RET_LOG();
 
 	/* Even both cnt and cnt10 have lock defined in their BTF,
 	 * intentionally one cnt takes lock while one does not
@@ -208,48 +210,44 @@ int egress_read_sock_fields(struct __sk_buff *skb)
 	pkt_out_cnt10->cnt += 10;
 	bpf_spin_unlock(&pkt_out_cnt10->lock);
 
-	RETURN;
+	return CG_OK;
 }
 
 SEC("cgroup_skb/ingress")
 int ingress_read_sock_fields(struct __sk_buff *skb)
 {
-	__u32 srv_idx = ADDR_SRV_IDX, result_idx = INGRESS_LISTEN_IDX;
-	struct bpf_tcp_sock *tp, *tp_ret;
-	struct bpf_sock *sk, *sk_ret;
-	struct sockaddr_in6 *srv_sa6;
+	struct bpf_tcp_sock *tp;
 	__u32 linum, linum_idx;
+	struct bpf_sock *sk;
 
 	linum_idx = INGRESS_LINUM_IDX;
 
 	sk = skb->sk;
-	if (!sk || sk->family != AF_INET6 || !is_loopback6(sk->src_ip6))
-		RETURN;
+	if (!sk)
+		RET_LOG();
 
-	srv_sa6 = bpf_map_lookup_elem(&addr_map, &srv_idx);
-	if (!srv_sa6 || sk->src_port != bpf_ntohs(srv_sa6->sin6_port))
-		RETURN;
+	/* Not the testing ingress traffic to the server */
+	if (sk->family != AF_INET6 || !is_loopback6(sk->src_ip6) ||
+	    sk->src_port != bpf_ntohs(srv_sa6.sin6_port))
+		return CG_OK;
 
-	if (sk->state != 10 && sk->state != 12)
-		RETURN;
+	/* Only interested in TCP_LISTEN */
+	if (sk->state != 10)
+		return CG_OK;
 
-	sk = bpf_get_listener_sock(sk);
+	/* It must be a fullsock for cgroup_skb/ingress prog */
+	sk = bpf_sk_fullsock(sk);
 	if (!sk)
-		RETURN;
+		RET_LOG();
 
 	tp = bpf_tcp_sock(sk);
 	if (!tp)
-		RETURN;
-
-	sk_ret = bpf_map_lookup_elem(&sock_result_map, &result_idx);
-	tp_ret = bpf_map_lookup_elem(&tcp_sock_result_map, &result_idx);
-	if (!sk_ret || !tp_ret)
-		RETURN;
+		RET_LOG();
 
-	skcpy(sk_ret, sk);
-	tpcpy(tp_ret, tp);
+	skcpy(&listen_sk, sk);
+	tpcpy(&listen_tp, tp);
 
-	RETURN;
+	return CG_OK;
 }
 
 char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_invalid_update.c b/tools/testing/selftests/bpf/progs/test_sockmap_invalid_update.c
new file mode 100644
index 000000000000..02a59e220cbc
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_invalid_update.c
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Cloudflare
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+struct {
+	__uint(type, BPF_MAP_TYPE_SOCKMAP);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, __u64);
+} map SEC(".maps");
+
+SEC("sockops")
+int bpf_sockmap(struct bpf_sock_ops *skops)
+{
+	__u32 key = 0;
+
+	if (skops->sk)
+		bpf_map_update_elem(&map, &key, skops->sk, 0);
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_kern.h b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h
index 3dca4c2e2418..1858435de7aa 100644
--- a/tools/testing/selftests/bpf/progs/test_sockmap_kern.h
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h
@@ -131,39 +131,55 @@ int bpf_prog2(struct __sk_buff *skb)
 
 }
 
-SEC("sk_skb3")
-int bpf_prog3(struct __sk_buff *skb)
+static inline void bpf_write_pass(struct __sk_buff *skb, int offset)
 {
-	const int one = 1;
-	int err, *f, ret = SK_PASS;
+	int err = bpf_skb_pull_data(skb, 6 + offset);
 	void *data_end;
 	char *c;
 
-	err = bpf_skb_pull_data(skb, 19);
 	if (err)
-		goto tls_out;
+		return;
 
 	c = (char *)(long)skb->data;
 	data_end = (void *)(long)skb->data_end;
 
-	if (c + 18 < data_end)
-		memcpy(&c[13], "PASS", 4);
+	if (c + 5 + offset < data_end)
+		memcpy(c + offset, "PASS", 4);
+}
+
+SEC("sk_skb3")
+int bpf_prog3(struct __sk_buff *skb)
+{
+	int err, *f, ret = SK_PASS;
+	const int one = 1;
+
 	f = bpf_map_lookup_elem(&sock_skb_opts, &one);
 	if (f && *f) {
 		__u64 flags = 0;
 
 		ret = 0;
 		flags = *f;
+
+		err = bpf_skb_adjust_room(skb, -13, 0, 0);
+		if (err)
+			return SK_DROP;
+		err = bpf_skb_adjust_room(skb, 4, 0, 0);
+		if (err)
+			return SK_DROP;
+		bpf_write_pass(skb, 0);
 #ifdef SOCKMAP
 		return bpf_sk_redirect_map(skb, &tls_sock_map, ret, flags);
 #else
 		return bpf_sk_redirect_hash(skb, &tls_sock_map, &ret, flags);
 #endif
 	}
-
 	f = bpf_map_lookup_elem(&sock_skb_opts, &one);
 	if (f && *f)
 		ret = SK_DROP;
+	err = bpf_skb_adjust_room(skb, 4, 0, 0);
+	if (err)
+		return SK_DROP;
+	bpf_write_pass(skb, 13);
 tls_out:
 	return ret;
 }
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_update.c b/tools/testing/selftests/bpf/progs/test_sockmap_update.c
new file mode 100644
index 000000000000..9d0c9f28cab2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_update.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Cloudflare
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+struct {
+	__uint(type, BPF_MAP_TYPE_SOCKMAP);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, __u64);
+} src SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_SOCKMAP);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, __u64);
+} dst_sock_map SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_SOCKHASH);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, __u64);
+} dst_sock_hash SEC(".maps");
+
+SEC("classifier/copy_sock_map")
+int copy_sock_map(void *ctx)
+{
+	struct bpf_sock *sk;
+	bool failed = false;
+	__u32 key = 0;
+
+	sk = bpf_map_lookup_elem(&src, &key);
+	if (!sk)
+		return SK_DROP;
+
+	if (bpf_map_update_elem(&dst_sock_map, &key, sk, 0))
+		failed = true;
+
+	if (bpf_map_update_elem(&dst_sock_hash, &key, sk, 0))
+		failed = true;
+
+	bpf_sk_release(sk);
+	return failed ? SK_DROP : SK_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_subprogs.c b/tools/testing/selftests/bpf/progs/test_subprogs.c
new file mode 100644
index 000000000000..d3c5673c0218
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_subprogs.c
@@ -0,0 +1,103 @@
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+const char LICENSE[] SEC("license") = "GPL";
+
+__noinline int sub1(int x)
+{
+	return x + 1;
+}
+
+static __noinline int sub5(int v);
+
+__noinline int sub2(int y)
+{
+	return sub5(y + 2);
+}
+
+static __noinline int sub3(int z)
+{
+	return z + 3 + sub1(4);
+}
+
+static __noinline int sub4(int w)
+{
+	return w + sub3(5) + sub1(6);
+}
+
+/* sub5() is an identitify function, just to test weirder functions layout and
+ * call patterns
+ */
+static __noinline int sub5(int v)
+{
+	return sub1(v) - 1; /* compensates sub1()'s + 1 */
+}
+
+/* unfortunately verifier rejects `struct task_struct *t` as an unkown pointer
+ * type, so we need to accept pointer as integer and then cast it inside the
+ * function
+ */
+__noinline int get_task_tgid(uintptr_t t)
+{
+	/* this ensures that CO-RE relocs work in multi-subprogs .text */
+	return BPF_CORE_READ((struct task_struct *)(void *)t, tgid);
+}
+
+int res1 = 0;
+int res2 = 0;
+int res3 = 0;
+int res4 = 0;
+
+SEC("raw_tp/sys_enter")
+int prog1(void *ctx)
+{
+	/* perform some CO-RE relocations to ensure they work with multi-prog
+	 * sections correctly
+	 */
+	struct task_struct *t = (void *)bpf_get_current_task();
+
+	if (!BPF_CORE_READ(t, pid) || !get_task_tgid((uintptr_t)t))
+		return 1;
+
+	res1 = sub1(1) + sub3(2); /* (1 + 1) + (2 + 3 + (4 + 1)) = 12 */
+	return 0;
+}
+
+SEC("raw_tp/sys_exit")
+int prog2(void *ctx)
+{
+	struct task_struct *t = (void *)bpf_get_current_task();
+
+	if (!BPF_CORE_READ(t, pid) || !get_task_tgid((uintptr_t)t))
+		return 1;
+
+	res2 = sub2(3) + sub3(4); /* (3 + 2) + (4 + 3 + (4 + 1)) = 17 */
+	return 0;
+}
+
+/* prog3 has the same section name as prog1 */
+SEC("raw_tp/sys_enter")
+int prog3(void *ctx)
+{
+	struct task_struct *t = (void *)bpf_get_current_task();
+
+	if (!BPF_CORE_READ(t, pid) || !get_task_tgid((uintptr_t)t))
+		return 1;
+
+	res3 = sub3(5) + 6; /* (5 + 3 + (4 + 1)) + 6 = 19 */
+	return 0;
+}
+
+/* prog4 has the same section name as prog2 */
+SEC("raw_tp/sys_exit")
+int prog4(void *ctx)
+{
+	struct task_struct *t = (void *)bpf_get_current_task();
+
+	if (!BPF_CORE_READ(t, pid) || !get_task_tgid((uintptr_t)t))
+		return 1;
+
+	res4 = sub4(7) + sub1(8); /* (7 + (5 + 3 + (4 + 1)) + (6 + 1)) + (8 + 1) = 36 */
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c b/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c
index 458b0d69133e..553a282d816a 100644
--- a/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c
+++ b/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c
@@ -18,11 +18,11 @@
 #define MAX_ULONG_STR_LEN 7
 #define MAX_VALUE_STR_LEN (TCP_MEM_LOOPS * MAX_ULONG_STR_LEN)
 
+const char tcp_mem_name[] = "net/ipv4/tcp_mem/very_very_very_very_long_pointless_string";
 static __always_inline int is_tcp_mem(struct bpf_sysctl *ctx)
 {
-	volatile char tcp_mem_name[] = "net/ipv4/tcp_mem/very_very_very_very_long_pointless_string";
 	unsigned char i;
-	char name[64];
+	char name[sizeof(tcp_mem_name)];
 	int ret;
 
 	memset(name, 0, sizeof(name));
diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c b/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c
index b2e6f9b0894d..2b64bc563a12 100644
--- a/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c
+++ b/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c
@@ -18,11 +18,11 @@
 #define MAX_ULONG_STR_LEN 7
 #define MAX_VALUE_STR_LEN (TCP_MEM_LOOPS * MAX_ULONG_STR_LEN)
 
+const char tcp_mem_name[] = "net/ipv4/tcp_mem/very_very_very_very_long_pointless_string_to_stress_byte_loop";
 static __attribute__((noinline)) int is_tcp_mem(struct bpf_sysctl *ctx)
 {
-	volatile char tcp_mem_name[] = "net/ipv4/tcp_mem/very_very_very_very_long_pointless_string_to_stress_byte_loop";
 	unsigned char i;
-	char name[64];
+	char name[sizeof(tcp_mem_name)];
 	int ret;
 
 	memset(name, 0, sizeof(name));
diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_prog.c b/tools/testing/selftests/bpf/progs/test_sysctl_prog.c
index 50525235380e..5489823c83fc 100644
--- a/tools/testing/selftests/bpf/progs/test_sysctl_prog.c
+++ b/tools/testing/selftests/bpf/progs/test_sysctl_prog.c
@@ -19,11 +19,11 @@
 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
 #endif
 
+const char tcp_mem_name[] = "net/ipv4/tcp_mem";
 static __always_inline int is_tcp_mem(struct bpf_sysctl *ctx)
 {
-	char tcp_mem_name[] = "net/ipv4/tcp_mem";
 	unsigned char i;
-	char name[64];
+	char name[sizeof(tcp_mem_name)];
 	int ret;
 
 	memset(name, 0, sizeof(name));
diff --git a/tools/testing/selftests/bpf/progs/test_tc_neigh.c b/tools/testing/selftests/bpf/progs/test_tc_neigh.c
new file mode 100644
index 000000000000..fe182616b112
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_tc_neigh.c
@@ -0,0 +1,148 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdint.h>
+#include <stdbool.h>
+
+#include <linux/bpf.h>
+#include <linux/stddef.h>
+#include <linux/pkt_cls.h>
+#include <linux/if_ether.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#ifndef ctx_ptr
+# define ctx_ptr(field)		(void *)(long)(field)
+#endif
+
+#define ip4_src			0xac100164 /* 172.16.1.100 */
+#define ip4_dst			0xac100264 /* 172.16.2.100 */
+
+#define ip6_src			{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
+				  0x00, 0x01, 0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe }
+#define ip6_dst			{ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
+				  0x00, 0x02, 0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe }
+
+#ifndef v6_equal
+# define v6_equal(a, b)		(a.s6_addr32[0] == b.s6_addr32[0] && \
+				 a.s6_addr32[1] == b.s6_addr32[1] && \
+				 a.s6_addr32[2] == b.s6_addr32[2] && \
+				 a.s6_addr32[3] == b.s6_addr32[3])
+#endif
+
+enum {
+	dev_src,
+	dev_dst,
+};
+
+struct bpf_map_def SEC("maps") ifindex_map = {
+	.type		= BPF_MAP_TYPE_ARRAY,
+	.key_size	= sizeof(int),
+	.value_size	= sizeof(int),
+	.max_entries	= 2,
+};
+
+static __always_inline bool is_remote_ep_v4(struct __sk_buff *skb,
+					    __be32 addr)
+{
+	void *data_end = ctx_ptr(skb->data_end);
+	void *data = ctx_ptr(skb->data);
+	struct iphdr *ip4h;
+
+	if (data + sizeof(struct ethhdr) > data_end)
+		return false;
+
+	ip4h = (struct iphdr *)(data + sizeof(struct ethhdr));
+	if ((void *)(ip4h + 1) > data_end)
+		return false;
+
+	return ip4h->daddr == addr;
+}
+
+static __always_inline bool is_remote_ep_v6(struct __sk_buff *skb,
+					    struct in6_addr addr)
+{
+	void *data_end = ctx_ptr(skb->data_end);
+	void *data = ctx_ptr(skb->data);
+	struct ipv6hdr *ip6h;
+
+	if (data + sizeof(struct ethhdr) > data_end)
+		return false;
+
+	ip6h = (struct ipv6hdr *)(data + sizeof(struct ethhdr));
+	if ((void *)(ip6h + 1) > data_end)
+		return false;
+
+	return v6_equal(ip6h->daddr, addr);
+}
+
+static __always_inline int get_dev_ifindex(int which)
+{
+	int *ifindex = bpf_map_lookup_elem(&ifindex_map, &which);
+
+	return ifindex ? *ifindex : 0;
+}
+
+SEC("chk_egress") int tc_chk(struct __sk_buff *skb)
+{
+	void *data_end = ctx_ptr(skb->data_end);
+	void *data = ctx_ptr(skb->data);
+	__u32 *raw = data;
+
+	if (data + sizeof(struct ethhdr) > data_end)
+		return TC_ACT_SHOT;
+
+	return !raw[0] && !raw[1] && !raw[2] ? TC_ACT_SHOT : TC_ACT_OK;
+}
+
+SEC("dst_ingress") int tc_dst(struct __sk_buff *skb)
+{
+	__u8 zero[ETH_ALEN * 2];
+	bool redirect = false;
+
+	switch (skb->protocol) {
+	case __bpf_constant_htons(ETH_P_IP):
+		redirect = is_remote_ep_v4(skb, __bpf_constant_htonl(ip4_src));
+		break;
+	case __bpf_constant_htons(ETH_P_IPV6):
+		redirect = is_remote_ep_v6(skb, (struct in6_addr)ip6_src);
+		break;
+	}
+
+	if (!redirect)
+		return TC_ACT_OK;
+
+	__builtin_memset(&zero, 0, sizeof(zero));
+	if (bpf_skb_store_bytes(skb, 0, &zero, sizeof(zero), 0) < 0)
+		return TC_ACT_SHOT;
+
+	return bpf_redirect_neigh(get_dev_ifindex(dev_src), 0);
+}
+
+SEC("src_ingress") int tc_src(struct __sk_buff *skb)
+{
+	__u8 zero[ETH_ALEN * 2];
+	bool redirect = false;
+
+	switch (skb->protocol) {
+	case __bpf_constant_htons(ETH_P_IP):
+		redirect = is_remote_ep_v4(skb, __bpf_constant_htonl(ip4_dst));
+		break;
+	case __bpf_constant_htons(ETH_P_IPV6):
+		redirect = is_remote_ep_v6(skb, (struct in6_addr)ip6_dst);
+		break;
+	}
+
+	if (!redirect)
+		return TC_ACT_OK;
+
+	__builtin_memset(&zero, 0, sizeof(zero));
+	if (bpf_skb_store_bytes(skb, 0, &zero, sizeof(zero), 0) < 0)
+		return TC_ACT_SHOT;
+
+	return bpf_redirect_neigh(get_dev_ifindex(dev_dst), 0);
+}
+
+char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_tc_peer.c b/tools/testing/selftests/bpf/progs/test_tc_peer.c
new file mode 100644
index 000000000000..fc84a7685aa2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_tc_peer.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdint.h>
+#include <stdbool.h>
+
+#include <linux/bpf.h>
+#include <linux/stddef.h>
+#include <linux/pkt_cls.h>
+
+#include <bpf/bpf_helpers.h>
+
+enum {
+	dev_src,
+	dev_dst,
+};
+
+struct bpf_map_def SEC("maps") ifindex_map = {
+	.type		= BPF_MAP_TYPE_ARRAY,
+	.key_size	= sizeof(int),
+	.value_size	= sizeof(int),
+	.max_entries	= 2,
+};
+
+static __always_inline int get_dev_ifindex(int which)
+{
+	int *ifindex = bpf_map_lookup_elem(&ifindex_map, &which);
+
+	return ifindex ? *ifindex : 0;
+}
+
+SEC("chk_egress") int tc_chk(struct __sk_buff *skb)
+{
+	return TC_ACT_SHOT;
+}
+
+SEC("dst_ingress") int tc_dst(struct __sk_buff *skb)
+{
+	return bpf_redirect_peer(get_dev_ifindex(dev_src), 0);
+}
+
+SEC("src_ingress") int tc_src(struct __sk_buff *skb)
+{
+	return bpf_redirect_peer(get_dev_ifindex(dev_dst), 0);
+}
+
+char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c b/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c
new file mode 100644
index 000000000000..678bd0fad29e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c
@@ -0,0 +1,626 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include <stddef.h>
+#include <errno.h>
+#include <stdbool.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <linux/tcp.h>
+#include <linux/socket.h>
+#include <linux/bpf.h>
+#include <linux/types.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+#define BPF_PROG_TEST_TCP_HDR_OPTIONS
+#include "test_tcp_hdr_options.h"
+
+#ifndef sizeof_field
+#define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER))
+#endif
+
+__u8 test_kind = TCPOPT_EXP;
+__u16 test_magic = 0xeB9F;
+__u32 inherit_cb_flags = 0;
+
+struct bpf_test_option passive_synack_out = {};
+struct bpf_test_option passive_fin_out	= {};
+
+struct bpf_test_option passive_estab_in = {};
+struct bpf_test_option passive_fin_in	= {};
+
+struct bpf_test_option active_syn_out	= {};
+struct bpf_test_option active_fin_out	= {};
+
+struct bpf_test_option active_estab_in	= {};
+struct bpf_test_option active_fin_in	= {};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_SK_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, int);
+	__type(value, struct hdr_stg);
+} hdr_stg_map SEC(".maps");
+
+static bool skops_want_cookie(const struct bpf_sock_ops *skops)
+{
+	return skops->args[0] == BPF_WRITE_HDR_TCP_SYNACK_COOKIE;
+}
+
+static bool skops_current_mss(const struct bpf_sock_ops *skops)
+{
+	return skops->args[0] == BPF_WRITE_HDR_TCP_CURRENT_MSS;
+}
+
+static __u8 option_total_len(__u8 flags)
+{
+	__u8 i, len = 1; /* +1 for flags */
+
+	if (!flags)
+		return 0;
+
+	/* RESEND bit does not use a byte */
+	for (i = OPTION_RESEND + 1; i < __NR_OPTION_FLAGS; i++)
+		len += !!TEST_OPTION_FLAGS(flags, i);
+
+	if (test_kind == TCPOPT_EXP)
+		return len + TCP_BPF_EXPOPT_BASE_LEN;
+	else
+		return len + 2; /* +1 kind, +1 kind-len */
+}
+
+static void write_test_option(const struct bpf_test_option *test_opt,
+			      __u8 *data)
+{
+	__u8 offset = 0;
+
+	data[offset++] = test_opt->flags;
+	if (TEST_OPTION_FLAGS(test_opt->flags, OPTION_MAX_DELACK_MS))
+		data[offset++] = test_opt->max_delack_ms;
+
+	if (TEST_OPTION_FLAGS(test_opt->flags, OPTION_RAND))
+		data[offset++] = test_opt->rand;
+}
+
+static int store_option(struct bpf_sock_ops *skops,
+			const struct bpf_test_option *test_opt)
+{
+	union {
+		struct tcp_exprm_opt exprm;
+		struct tcp_opt regular;
+	} write_opt;
+	int err;
+
+	if (test_kind == TCPOPT_EXP) {
+		write_opt.exprm.kind = TCPOPT_EXP;
+		write_opt.exprm.len = option_total_len(test_opt->flags);
+		write_opt.exprm.magic = __bpf_htons(test_magic);
+		write_opt.exprm.data32 = 0;
+		write_test_option(test_opt, write_opt.exprm.data);
+		err = bpf_store_hdr_opt(skops, &write_opt.exprm,
+					sizeof(write_opt.exprm), 0);
+	} else {
+		write_opt.regular.kind = test_kind;
+		write_opt.regular.len = option_total_len(test_opt->flags);
+		write_opt.regular.data32 = 0;
+		write_test_option(test_opt, write_opt.regular.data);
+		err = bpf_store_hdr_opt(skops, &write_opt.regular,
+					sizeof(write_opt.regular), 0);
+	}
+
+	if (err)
+		RET_CG_ERR(err);
+
+	return CG_OK;
+}
+
+static int parse_test_option(struct bpf_test_option *opt, const __u8 *start)
+{
+	opt->flags = *start++;
+
+	if (TEST_OPTION_FLAGS(opt->flags, OPTION_MAX_DELACK_MS))
+		opt->max_delack_ms = *start++;
+
+	if (TEST_OPTION_FLAGS(opt->flags, OPTION_RAND))
+		opt->rand = *start++;
+
+	return 0;
+}
+
+static int load_option(struct bpf_sock_ops *skops,
+		       struct bpf_test_option *test_opt, bool from_syn)
+{
+	union {
+		struct tcp_exprm_opt exprm;
+		struct tcp_opt regular;
+	} search_opt;
+	int ret, load_flags = from_syn ? BPF_LOAD_HDR_OPT_TCP_SYN : 0;
+
+	if (test_kind == TCPOPT_EXP) {
+		search_opt.exprm.kind = TCPOPT_EXP;
+		search_opt.exprm.len = 4;
+		search_opt.exprm.magic = __bpf_htons(test_magic);
+		search_opt.exprm.data32 = 0;
+		ret = bpf_load_hdr_opt(skops, &search_opt.exprm,
+				       sizeof(search_opt.exprm), load_flags);
+		if (ret < 0)
+			return ret;
+		return parse_test_option(test_opt, search_opt.exprm.data);
+	} else {
+		search_opt.regular.kind = test_kind;
+		search_opt.regular.len = 0;
+		search_opt.regular.data32 = 0;
+		ret = bpf_load_hdr_opt(skops, &search_opt.regular,
+				       sizeof(search_opt.regular), load_flags);
+		if (ret < 0)
+			return ret;
+		return parse_test_option(test_opt, search_opt.regular.data);
+	}
+}
+
+static int synack_opt_len(struct bpf_sock_ops *skops)
+{
+	struct bpf_test_option test_opt = {};
+	__u8 optlen;
+	int err;
+
+	if (!passive_synack_out.flags)
+		return CG_OK;
+
+	err = load_option(skops, &test_opt, true);
+
+	/* bpf_test_option is not found */
+	if (err == -ENOMSG)
+		return CG_OK;
+
+	if (err)
+		RET_CG_ERR(err);
+
+	optlen = option_total_len(passive_synack_out.flags);
+	if (optlen) {
+		err = bpf_reserve_hdr_opt(skops, optlen, 0);
+		if (err)
+			RET_CG_ERR(err);
+	}
+
+	return CG_OK;
+}
+
+static int write_synack_opt(struct bpf_sock_ops *skops)
+{
+	struct bpf_test_option opt;
+
+	if (!passive_synack_out.flags)
+		/* We should not even be called since no header
+		 * space has been reserved.
+		 */
+		RET_CG_ERR(0);
+
+	opt = passive_synack_out;
+	if (skops_want_cookie(skops))
+		SET_OPTION_FLAGS(opt.flags, OPTION_RESEND);
+
+	return store_option(skops, &opt);
+}
+
+static int syn_opt_len(struct bpf_sock_ops *skops)
+{
+	__u8 optlen;
+	int err;
+
+	if (!active_syn_out.flags)
+		return CG_OK;
+
+	optlen = option_total_len(active_syn_out.flags);
+	if (optlen) {
+		err = bpf_reserve_hdr_opt(skops, optlen, 0);
+		if (err)
+			RET_CG_ERR(err);
+	}
+
+	return CG_OK;
+}
+
+static int write_syn_opt(struct bpf_sock_ops *skops)
+{
+	if (!active_syn_out.flags)
+		RET_CG_ERR(0);
+
+	return store_option(skops, &active_syn_out);
+}
+
+static int fin_opt_len(struct bpf_sock_ops *skops)
+{
+	struct bpf_test_option *opt;
+	struct hdr_stg *hdr_stg;
+	__u8 optlen;
+	int err;
+
+	if (!skops->sk)
+		RET_CG_ERR(0);
+
+	hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0);
+	if (!hdr_stg)
+		RET_CG_ERR(0);
+
+	if (hdr_stg->active)
+		opt = &active_fin_out;
+	else
+		opt = &passive_fin_out;
+
+	optlen = option_total_len(opt->flags);
+	if (optlen) {
+		err = bpf_reserve_hdr_opt(skops, optlen, 0);
+		if (err)
+			RET_CG_ERR(err);
+	}
+
+	return CG_OK;
+}
+
+static int write_fin_opt(struct bpf_sock_ops *skops)
+{
+	struct bpf_test_option *opt;
+	struct hdr_stg *hdr_stg;
+
+	if (!skops->sk)
+		RET_CG_ERR(0);
+
+	hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0);
+	if (!hdr_stg)
+		RET_CG_ERR(0);
+
+	if (hdr_stg->active)
+		opt = &active_fin_out;
+	else
+		opt = &passive_fin_out;
+
+	if (!opt->flags)
+		RET_CG_ERR(0);
+
+	return store_option(skops, opt);
+}
+
+static int resend_in_ack(struct bpf_sock_ops *skops)
+{
+	struct hdr_stg *hdr_stg;
+
+	if (!skops->sk)
+		return -1;
+
+	hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0);
+	if (!hdr_stg)
+		return -1;
+
+	return !!hdr_stg->resend_syn;
+}
+
+static int nodata_opt_len(struct bpf_sock_ops *skops)
+{
+	int resend;
+
+	resend = resend_in_ack(skops);
+	if (resend < 0)
+		RET_CG_ERR(0);
+
+	if (resend)
+		return syn_opt_len(skops);
+
+	return CG_OK;
+}
+
+static int write_nodata_opt(struct bpf_sock_ops *skops)
+{
+	int resend;
+
+	resend = resend_in_ack(skops);
+	if (resend < 0)
+		RET_CG_ERR(0);
+
+	if (resend)
+		return write_syn_opt(skops);
+
+	return CG_OK;
+}
+
+static int data_opt_len(struct bpf_sock_ops *skops)
+{
+	/* Same as the nodata version.  Mostly to show
+	 * an example usage on skops->skb_len.
+	 */
+	return nodata_opt_len(skops);
+}
+
+static int write_data_opt(struct bpf_sock_ops *skops)
+{
+	return write_nodata_opt(skops);
+}
+
+static int current_mss_opt_len(struct bpf_sock_ops *skops)
+{
+	/* Reserve maximum that may be needed */
+	int err;
+
+	err = bpf_reserve_hdr_opt(skops, option_total_len(OPTION_MASK), 0);
+	if (err)
+		RET_CG_ERR(err);
+
+	return CG_OK;
+}
+
+static int handle_hdr_opt_len(struct bpf_sock_ops *skops)
+{
+	__u8 tcp_flags = skops_tcp_flags(skops);
+
+	if ((tcp_flags & TCPHDR_SYNACK) == TCPHDR_SYNACK)
+		return synack_opt_len(skops);
+
+	if (tcp_flags & TCPHDR_SYN)
+		return syn_opt_len(skops);
+
+	if (tcp_flags & TCPHDR_FIN)
+		return fin_opt_len(skops);
+
+	if (skops_current_mss(skops))
+		/* The kernel is calculating the MSS */
+		return current_mss_opt_len(skops);
+
+	if (skops->skb_len)
+		return data_opt_len(skops);
+
+	return nodata_opt_len(skops);
+}
+
+static int handle_write_hdr_opt(struct bpf_sock_ops *skops)
+{
+	__u8 tcp_flags = skops_tcp_flags(skops);
+	struct tcphdr *th;
+
+	if ((tcp_flags & TCPHDR_SYNACK) == TCPHDR_SYNACK)
+		return write_synack_opt(skops);
+
+	if (tcp_flags & TCPHDR_SYN)
+		return write_syn_opt(skops);
+
+	if (tcp_flags & TCPHDR_FIN)
+		return write_fin_opt(skops);
+
+	th = skops->skb_data;
+	if (th + 1 > skops->skb_data_end)
+		RET_CG_ERR(0);
+
+	if (skops->skb_len > tcp_hdrlen(th))
+		return write_data_opt(skops);
+
+	return write_nodata_opt(skops);
+}
+
+static int set_delack_max(struct bpf_sock_ops *skops, __u8 max_delack_ms)
+{
+	__u32 max_delack_us = max_delack_ms * 1000;
+
+	return bpf_setsockopt(skops, SOL_TCP, TCP_BPF_DELACK_MAX,
+			      &max_delack_us, sizeof(max_delack_us));
+}
+
+static int set_rto_min(struct bpf_sock_ops *skops, __u8 peer_max_delack_ms)
+{
+	__u32 min_rto_us = peer_max_delack_ms * 1000;
+
+	return bpf_setsockopt(skops, SOL_TCP, TCP_BPF_RTO_MIN, &min_rto_us,
+			      sizeof(min_rto_us));
+}
+
+static int handle_active_estab(struct bpf_sock_ops *skops)
+{
+	struct hdr_stg init_stg = {
+		.active = true,
+	};
+	int err;
+
+	err = load_option(skops, &active_estab_in, false);
+	if (err && err != -ENOMSG)
+		RET_CG_ERR(err);
+
+	init_stg.resend_syn = TEST_OPTION_FLAGS(active_estab_in.flags,
+						OPTION_RESEND);
+	if (!skops->sk || !bpf_sk_storage_get(&hdr_stg_map, skops->sk,
+					      &init_stg,
+					      BPF_SK_STORAGE_GET_F_CREATE))
+		RET_CG_ERR(0);
+
+	if (init_stg.resend_syn)
+		/* Don't clear the write_hdr cb now because
+		 * the ACK may get lost and retransmit may
+		 * be needed.
+		 *
+		 * PARSE_ALL_HDR cb flag is set to learn if this
+		 * resend_syn option has received by the peer.
+		 *
+		 * The header option will be resent until a valid
+		 * packet is received at handle_parse_hdr()
+		 * and all hdr cb flags will be cleared in
+		 * handle_parse_hdr().
+		 */
+		set_parse_all_hdr_cb_flags(skops);
+	else if (!active_fin_out.flags)
+		/* No options will be written from now */
+		clear_hdr_cb_flags(skops);
+
+	if (active_syn_out.max_delack_ms) {
+		err = set_delack_max(skops, active_syn_out.max_delack_ms);
+		if (err)
+			RET_CG_ERR(err);
+	}
+
+	if (active_estab_in.max_delack_ms) {
+		err = set_rto_min(skops, active_estab_in.max_delack_ms);
+		if (err)
+			RET_CG_ERR(err);
+	}
+
+	return CG_OK;
+}
+
+static int handle_passive_estab(struct bpf_sock_ops *skops)
+{
+	struct hdr_stg init_stg = {};
+	struct tcphdr *th;
+	int err;
+
+	inherit_cb_flags = skops->bpf_sock_ops_cb_flags;
+
+	err = load_option(skops, &passive_estab_in, true);
+	if (err == -ENOENT) {
+		/* saved_syn is not found. It was in syncookie mode.
+		 * We have asked the active side to resend the options
+		 * in ACK, so try to find the bpf_test_option from ACK now.
+		 */
+		err = load_option(skops, &passive_estab_in, false);
+		init_stg.syncookie = true;
+	}
+
+	/* ENOMSG: The bpf_test_option is not found which is fine.
+	 * Bail out now for all other errors.
+	 */
+	if (err && err != -ENOMSG)
+		RET_CG_ERR(err);
+
+	th = skops->skb_data;
+	if (th + 1 > skops->skb_data_end)
+		RET_CG_ERR(0);
+
+	if (th->syn) {
+		/* Fastopen */
+
+		/* Cannot clear cb_flags to stop write_hdr cb.
+		 * synack is not sent yet for fast open.
+		 * Even it was, the synack may need to be retransmitted.
+		 *
+		 * PARSE_ALL_HDR cb flag is set to learn
+		 * if synack has reached the peer.
+		 * All cb_flags will be cleared in handle_parse_hdr().
+		 */
+		set_parse_all_hdr_cb_flags(skops);
+		init_stg.fastopen = true;
+	} else if (!passive_fin_out.flags) {
+		/* No options will be written from now */
+		clear_hdr_cb_flags(skops);
+	}
+
+	if (!skops->sk ||
+	    !bpf_sk_storage_get(&hdr_stg_map, skops->sk, &init_stg,
+				BPF_SK_STORAGE_GET_F_CREATE))
+		RET_CG_ERR(0);
+
+	if (passive_synack_out.max_delack_ms) {
+		err = set_delack_max(skops, passive_synack_out.max_delack_ms);
+		if (err)
+			RET_CG_ERR(err);
+	}
+
+	if (passive_estab_in.max_delack_ms) {
+		err = set_rto_min(skops, passive_estab_in.max_delack_ms);
+		if (err)
+			RET_CG_ERR(err);
+	}
+
+	return CG_OK;
+}
+
+static int handle_parse_hdr(struct bpf_sock_ops *skops)
+{
+	struct hdr_stg *hdr_stg;
+	struct tcphdr *th;
+
+	if (!skops->sk)
+		RET_CG_ERR(0);
+
+	th = skops->skb_data;
+	if (th + 1 > skops->skb_data_end)
+		RET_CG_ERR(0);
+
+	hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0);
+	if (!hdr_stg)
+		RET_CG_ERR(0);
+
+	if (hdr_stg->resend_syn || hdr_stg->fastopen)
+		/* The PARSE_ALL_HDR cb flag was turned on
+		 * to ensure that the previously written
+		 * options have reached the peer.
+		 * Those previously written option includes:
+		 *     - Active side: resend_syn in ACK during syncookie
+		 *      or
+		 *     - Passive side: SYNACK during fastopen
+		 *
+		 * A valid packet has been received here after
+		 * the 3WHS, so the PARSE_ALL_HDR cb flag
+		 * can be cleared now.
+		 */
+		clear_parse_all_hdr_cb_flags(skops);
+
+	if (hdr_stg->resend_syn && !active_fin_out.flags)
+		/* Active side resent the syn option in ACK
+		 * because the server was in syncookie mode.
+		 * A valid packet has been received, so
+		 * clear header cb flags if there is no
+		 * more option to send.
+		 */
+		clear_hdr_cb_flags(skops);
+
+	if (hdr_stg->fastopen && !passive_fin_out.flags)
+		/* Passive side was in fastopen.
+		 * A valid packet has been received, so
+		 * the SYNACK has reached the peer.
+		 * Clear header cb flags if there is no more
+		 * option to send.
+		 */
+		clear_hdr_cb_flags(skops);
+
+	if (th->fin) {
+		struct bpf_test_option *fin_opt;
+		int err;
+
+		if (hdr_stg->active)
+			fin_opt = &active_fin_in;
+		else
+			fin_opt = &passive_fin_in;
+
+		err = load_option(skops, fin_opt, false);
+		if (err && err != -ENOMSG)
+			RET_CG_ERR(err);
+	}
+
+	return CG_OK;
+}
+
+SEC("sockops/estab")
+int estab(struct bpf_sock_ops *skops)
+{
+	int true_val = 1;
+
+	switch (skops->op) {
+	case BPF_SOCK_OPS_TCP_LISTEN_CB:
+		bpf_setsockopt(skops, SOL_TCP, TCP_SAVE_SYN,
+			       &true_val, sizeof(true_val));
+		set_hdr_cb_flags(skops, BPF_SOCK_OPS_STATE_CB_FLAG);
+		break;
+	case BPF_SOCK_OPS_TCP_CONNECT_CB:
+		set_hdr_cb_flags(skops, 0);
+		break;
+	case BPF_SOCK_OPS_PARSE_HDR_OPT_CB:
+		return handle_parse_hdr(skops);
+	case BPF_SOCK_OPS_HDR_OPT_LEN_CB:
+		return handle_hdr_opt_len(skops);
+	case BPF_SOCK_OPS_WRITE_HDR_OPT_CB:
+		return handle_write_hdr_opt(skops);
+	case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
+		return handle_passive_estab(skops);
+	case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
+		return handle_active_estab(skops);
+	}
+
+	return CG_OK;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_trace_ext.c b/tools/testing/selftests/bpf/progs/test_trace_ext.c
new file mode 100644
index 000000000000..d19a634d0e78
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_trace_ext.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+#include <linux/bpf.h>
+#include <stdbool.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_tracing.h>
+
+__u64 ext_called = 0;
+
+SEC("freplace/test_pkt_md_access")
+int test_pkt_md_access_new(struct __sk_buff *skb)
+{
+	ext_called = skb->len;
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_trace_ext_tracing.c b/tools/testing/selftests/bpf/progs/test_trace_ext_tracing.c
new file mode 100644
index 000000000000..52f3baf98f20
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_trace_ext_tracing.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+__u64 fentry_called = 0;
+
+SEC("fentry/test_pkt_md_access_new")
+int BPF_PROG(fentry, struct sk_buff *skb)
+{
+	fentry_called = skb->len;
+	return 0;
+}
+
+__u64 fexit_called = 0;
+
+SEC("fexit/test_pkt_md_access_new")
+int BPF_PROG(fexit, struct sk_buff *skb)
+{
+	fexit_called = skb->len;
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_vmlinux.c b/tools/testing/selftests/bpf/progs/test_vmlinux.c
index 29fa09d6a6c6..e9dfa0313d1b 100644
--- a/tools/testing/selftests/bpf/progs/test_vmlinux.c
+++ b/tools/testing/selftests/bpf/progs/test_vmlinux.c
@@ -19,12 +19,14 @@ SEC("tp/syscalls/sys_enter_nanosleep")
 int handle__tp(struct trace_event_raw_sys_enter *args)
 {
 	struct __kernel_timespec *ts;
+	long tv_nsec;
 
 	if (args->id != __NR_nanosleep)
 		return 0;
 
 	ts = (void *)args->args[0];
-	if (BPF_CORE_READ(ts, tv_nsec) != MY_TV_NSEC)
+	if (bpf_probe_read_user(&tv_nsec, sizeof(ts->tv_nsec), &ts->tv_nsec) ||
+	    tv_nsec != MY_TV_NSEC)
 		return 0;
 
 	tp_called = true;
@@ -35,12 +37,14 @@ SEC("raw_tp/sys_enter")
 int BPF_PROG(handle__raw_tp, struct pt_regs *regs, long id)
 {
 	struct __kernel_timespec *ts;
+	long tv_nsec;
 
 	if (id != __NR_nanosleep)
 		return 0;
 
 	ts = (void *)PT_REGS_PARM1_CORE(regs);
-	if (BPF_CORE_READ(ts, tv_nsec) != MY_TV_NSEC)
+	if (bpf_probe_read_user(&tv_nsec, sizeof(ts->tv_nsec), &ts->tv_nsec) ||
+	    tv_nsec != MY_TV_NSEC)
 		return 0;
 
 	raw_tp_called = true;
@@ -51,12 +55,14 @@ SEC("tp_btf/sys_enter")
 int BPF_PROG(handle__tp_btf, struct pt_regs *regs, long id)
 {
 	struct __kernel_timespec *ts;
+	long tv_nsec;
 
 	if (id != __NR_nanosleep)
 		return 0;
 
 	ts = (void *)PT_REGS_PARM1_CORE(regs);
-	if (BPF_CORE_READ(ts, tv_nsec) != MY_TV_NSEC)
+	if (bpf_probe_read_user(&tv_nsec, sizeof(ts->tv_nsec), &ts->tv_nsec) ||
+	    tv_nsec != MY_TV_NSEC)
 		return 0;
 
 	tp_btf_called = true;
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
index 8beecec166d9..3a67921f62b5 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
@@ -16,7 +16,7 @@
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_endian.h>
 
-static __u32 rol32(__u32 word, unsigned int shift)
+static __always_inline __u32 rol32(__u32 word, unsigned int shift)
 {
 	return (word << shift) | (word >> ((-shift) & 31));
 }
@@ -49,7 +49,7 @@ static __u32 rol32(__u32 word, unsigned int shift)
 
 typedef unsigned int u32;
 
-static __attribute__ ((noinline))
+static __noinline
 u32 jhash(const void *key, u32 length, u32 initval)
 {
 	u32 a, b, c;
@@ -86,7 +86,7 @@ u32 jhash(const void *key, u32 length, u32 initval)
 	return c;
 }
 
-__attribute__ ((noinline))
+__noinline
 u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
 {
 	a += initval;
@@ -96,7 +96,7 @@ u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
 	return c;
 }
 
-__attribute__ ((noinline))
+__noinline
 u32 jhash_2words(u32 a, u32 b, u32 initval)
 {
 	return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2));
@@ -213,7 +213,7 @@ struct eth_hdr {
 	unsigned short eth_proto;
 };
 
-static inline __u64 calc_offset(bool is_ipv6, bool is_icmp)
+static __noinline __u64 calc_offset(bool is_ipv6, bool is_icmp)
 {
 	__u64 off = sizeof(struct eth_hdr);
 	if (is_ipv6) {
@@ -797,8 +797,8 @@ out:
 	return XDP_DROP;
 }
 
-__attribute__ ((section("xdp-test"), used))
-int balancer_ingress(struct xdp_md *ctx)
+SEC("xdp-test-v4")
+int balancer_ingress_v4(struct xdp_md *ctx)
 {
 	void *data = (void *)(long)ctx->data;
 	void *data_end = (void *)(long)ctx->data_end;
@@ -812,11 +812,27 @@ int balancer_ingress(struct xdp_md *ctx)
 	eth_proto = bpf_ntohs(eth->eth_proto);
 	if (eth_proto == ETH_P_IP)
 		return process_packet(data, nh_off, data_end, 0, ctx);
-	else if (eth_proto == ETH_P_IPV6)
+	else
+		return XDP_DROP;
+}
+
+SEC("xdp-test-v6")
+int balancer_ingress_v6(struct xdp_md *ctx)
+{
+	void *data = (void *)(long)ctx->data;
+	void *data_end = (void *)(long)ctx->data_end;
+	struct eth_hdr *eth = data;
+	__u32 eth_proto;
+	__u32 nh_off;
+
+	nh_off = sizeof(struct eth_hdr);
+	if (data + nh_off > data_end)
+		return XDP_DROP;
+	eth_proto = bpf_ntohs(eth->eth_proto);
+	if (eth_proto == ETH_P_IPV6)
 		return process_packet(data, nh_off, data_end, 1, ctx);
 	else
 		return XDP_DROP;
 }
 
-char _license[] __attribute__ ((section("license"), used)) = "GPL";
-int _version __attribute__ ((section("version"), used)) = 1;
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/trigger_bench.c b/tools/testing/selftests/bpf/progs/trigger_bench.c
index 8b36b6640e7e..9a4d09590b3d 100644
--- a/tools/testing/selftests/bpf/progs/trigger_bench.c
+++ b/tools/testing/selftests/bpf/progs/trigger_bench.c
@@ -39,6 +39,13 @@ int bench_trigger_fentry(void *ctx)
 	return 0;
 }
 
+SEC("fentry.s/__x64_sys_getpgid")
+int bench_trigger_fentry_sleep(void *ctx)
+{
+	__sync_add_and_fetch(&hits, 1);
+	return 0;
+}
+
 SEC("fmod_ret/__x64_sys_getpgid")
 int bench_trigger_fmodret(void *ctx)
 {
diff --git a/tools/testing/selftests/bpf/test_bpftool_build.sh b/tools/testing/selftests/bpf/test_bpftool_build.sh
index ac349a5cea7e..2db3c60e1e61 100755
--- a/tools/testing/selftests/bpf/test_bpftool_build.sh
+++ b/tools/testing/selftests/bpf/test_bpftool_build.sh
@@ -85,6 +85,23 @@ make_with_tmpdir() {
 	echo
 }
 
+make_doc_and_clean() {
+	echo -e "\$PWD:    $PWD"
+	echo -e "command: make -s $* doc >/dev/null"
+	RST2MAN_OPTS="--exit-status=1" make $J -s $* doc
+	if [ $? -ne 0 ] ; then
+		ERROR=1
+		printf "FAILURE: Errors or warnings when building documentation\n"
+	fi
+	(
+		if [ $# -ge 1 ] ; then
+			cd ${@: -1}
+		fi
+		make -s doc-clean
+	)
+	echo
+}
+
 echo "Trying to build bpftool"
 echo -e "... through kbuild\n"
 
@@ -145,3 +162,7 @@ make_and_clean
 make_with_tmpdir OUTPUT
 
 make_with_tmpdir O
+
+echo -e "Checking documentation build\n"
+# From tools/bpf/bpftool
+make_doc_and_clean
diff --git a/tools/testing/selftests/bpf/test_bpftool_metadata.sh b/tools/testing/selftests/bpf/test_bpftool_metadata.sh
new file mode 100755
index 000000000000..1bf81b49457a
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_bpftool_metadata.sh
@@ -0,0 +1,82 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+TESTNAME=bpftool_metadata
+BPF_FS=$(awk '$3 == "bpf" {print $2; exit}' /proc/mounts)
+BPF_DIR=$BPF_FS/test_$TESTNAME
+
+_cleanup()
+{
+	set +e
+	rm -rf $BPF_DIR 2> /dev/null
+}
+
+cleanup_skip()
+{
+	echo "selftests: $TESTNAME [SKIP]"
+	_cleanup
+
+	exit $ksft_skip
+}
+
+cleanup()
+{
+	if [ "$?" = 0 ]; then
+		echo "selftests: $TESTNAME [PASS]"
+	else
+		echo "selftests: $TESTNAME [FAILED]"
+	fi
+	_cleanup
+}
+
+if [ $(id -u) -ne 0 ]; then
+	echo "selftests: $TESTNAME [SKIP] Need root privileges"
+	exit $ksft_skip
+fi
+
+if [ -z "$BPF_FS" ]; then
+	echo "selftests: $TESTNAME [SKIP] Could not run test without bpffs mounted"
+	exit $ksft_skip
+fi
+
+if ! bpftool version > /dev/null 2>&1; then
+	echo "selftests: $TESTNAME [SKIP] Could not run test without bpftool"
+	exit $ksft_skip
+fi
+
+set -e
+
+trap cleanup_skip EXIT
+
+mkdir $BPF_DIR
+
+trap cleanup EXIT
+
+bpftool prog load metadata_unused.o $BPF_DIR/unused
+
+METADATA_PLAIN="$(bpftool prog)"
+echo "$METADATA_PLAIN" | grep 'a = "foo"' > /dev/null
+echo "$METADATA_PLAIN" | grep 'b = 1' > /dev/null
+
+bpftool prog --json | grep '"metadata":{"a":"foo","b":1}' > /dev/null
+
+bpftool map | grep 'metadata.rodata' > /dev/null
+
+rm $BPF_DIR/unused
+
+bpftool prog load metadata_used.o $BPF_DIR/used
+
+METADATA_PLAIN="$(bpftool prog)"
+echo "$METADATA_PLAIN" | grep 'a = "bar"' > /dev/null
+echo "$METADATA_PLAIN" | grep 'b = 2' > /dev/null
+
+bpftool prog --json | grep '"metadata":{"a":"bar","b":2}' > /dev/null
+
+bpftool map | grep 'metadata.rodata' > /dev/null
+
+rm $BPF_DIR/used
+
+exit 0
diff --git a/tools/testing/selftests/bpf/test_current_pid_tgid_new_ns.c b/tools/testing/selftests/bpf/test_current_pid_tgid_new_ns.c
index ed253f252cd0..ec53b1ef90d2 100644
--- a/tools/testing/selftests/bpf/test_current_pid_tgid_new_ns.c
+++ b/tools/testing/selftests/bpf/test_current_pid_tgid_new_ns.c
@@ -156,4 +156,5 @@ cleanup:
 			bpf_object__close(obj);
 		}
 	}
+	return 0;
 }
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 754cf611723e..0d92ebcb335d 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -1274,6 +1274,8 @@ static void __run_parallel(unsigned int tasks,
 	pid_t pid[tasks];
 	int i;
 
+	fflush(stdout);
+
 	for (i = 0; i < tasks; i++) {
 		pid[i] = fork();
 		if (pid[i] == 0) {
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index b1e4dadacd9b..22943b58d752 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -618,7 +618,9 @@ int cd_flavor_subdir(const char *exec_name)
 	if (!flavor)
 		return 0;
 	flavor++;
-	fprintf(stdout, "Switching to flavor '%s' subdirectory...\n", flavor);
+	if (env.verbosity > VERBOSE_NONE)
+		fprintf(stdout,	"Switching to flavor '%s' subdirectory...\n", flavor);
+
 	return chdir(flavor);
 }
 
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index dbb820dde138..238f5f61189e 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -130,6 +130,69 @@ extern int test__join_cgroup(const char *path);
 #define CHECK_ATTR(condition, tag, format...) \
 	_CHECK(condition, tag, tattr.duration, format)
 
+#define ASSERT_EQ(actual, expected, name) ({				\
+	static int duration = 0;					\
+	typeof(actual) ___act = (actual);				\
+	typeof(expected) ___exp = (expected);				\
+	bool ___ok = ___act == ___exp;					\
+	CHECK(!___ok, (name),						\
+	      "unexpected %s: actual %lld != expected %lld\n",		\
+	      (name), (long long)(___act), (long long)(___exp));	\
+	___ok;								\
+})
+
+#define ASSERT_STREQ(actual, expected, name) ({				\
+	static int duration = 0;					\
+	const char *___act = actual;					\
+	const char *___exp = expected;					\
+	bool ___ok = strcmp(___act, ___exp) == 0;			\
+	CHECK(!___ok, (name),						\
+	      "unexpected %s: actual '%s' != expected '%s'\n",		\
+	      (name), ___act, ___exp);					\
+	___ok;								\
+})
+
+#define ASSERT_OK(res, name) ({						\
+	static int duration = 0;					\
+	long long ___res = (res);					\
+	bool ___ok = ___res == 0;					\
+	CHECK(!___ok, (name), "unexpected error: %lld\n", ___res);	\
+	___ok;								\
+})
+
+#define ASSERT_ERR(res, name) ({					\
+	static int duration = 0;					\
+	long long ___res = (res);					\
+	bool ___ok = ___res < 0;					\
+	CHECK(!___ok, (name), "unexpected success: %lld\n", ___res);	\
+	___ok;								\
+})
+
+#define ASSERT_NULL(ptr, name) ({					\
+	static int duration = 0;					\
+	const void *___res = (ptr);					\
+	bool ___ok = !___res;						\
+	CHECK(!___ok, (name), "unexpected pointer: %p\n", ___res);	\
+	___ok;								\
+})
+
+#define ASSERT_OK_PTR(ptr, name) ({					\
+	static int duration = 0;					\
+	const void *___res = (ptr);					\
+	bool ___ok = !IS_ERR_OR_NULL(___res);				\
+	CHECK(!___ok, (name),						\
+	      "unexpected error: %ld\n", PTR_ERR(___res));		\
+	___ok;								\
+})
+
+#define ASSERT_ERR_PTR(ptr, name) ({					\
+	static int duration = 0;					\
+	const void *___res = (ptr);					\
+	bool ___ok = IS_ERR(___res)					\
+	CHECK(!___ok, (name), "unexpected pointer: %p\n", ___res);	\
+	___ok;								\
+})
+
 static inline __u64 ptr_to_u64(const void *ptr)
 {
 	return (__u64) (unsigned long) ptr;
diff --git a/tools/testing/selftests/bpf/test_sock_fields.c b/tools/testing/selftests/bpf/test_sock_fields.c
deleted file mode 100644
index 6c9f269c396d..000000000000
--- a/tools/testing/selftests/bpf/test_sock_fields.c
+++ /dev/null
@@ -1,482 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* Copyright (c) 2019 Facebook */
-
-#include <sys/socket.h>
-#include <sys/epoll.h>
-#include <netinet/in.h>
-#include <arpa/inet.h>
-#include <unistd.h>
-#include <stdlib.h>
-#include <string.h>
-#include <errno.h>
-
-#include <bpf/bpf.h>
-#include <bpf/libbpf.h>
-
-#include "cgroup_helpers.h"
-#include "bpf_rlimit.h"
-
-enum bpf_addr_array_idx {
-	ADDR_SRV_IDX,
-	ADDR_CLI_IDX,
-	__NR_BPF_ADDR_ARRAY_IDX,
-};
-
-enum bpf_result_array_idx {
-	EGRESS_SRV_IDX,
-	EGRESS_CLI_IDX,
-	INGRESS_LISTEN_IDX,
-	__NR_BPF_RESULT_ARRAY_IDX,
-};
-
-enum bpf_linum_array_idx {
-	EGRESS_LINUM_IDX,
-	INGRESS_LINUM_IDX,
-	__NR_BPF_LINUM_ARRAY_IDX,
-};
-
-struct bpf_spinlock_cnt {
-	struct bpf_spin_lock lock;
-	__u32 cnt;
-};
-
-#define CHECK(condition, tag, format...) ({				\
-	int __ret = !!(condition);					\
-	if (__ret) {							\
-		printf("%s(%d):FAIL:%s ", __func__, __LINE__, tag);	\
-		printf(format);						\
-		printf("\n");						\
-		exit(-1);						\
-	}								\
-})
-
-#define TEST_CGROUP "/test-bpf-sock-fields"
-#define DATA "Hello BPF!"
-#define DATA_LEN sizeof(DATA)
-
-static struct sockaddr_in6 srv_sa6, cli_sa6;
-static int sk_pkt_out_cnt10_fd;
-static int sk_pkt_out_cnt_fd;
-static int linum_map_fd;
-static int addr_map_fd;
-static int tp_map_fd;
-static int sk_map_fd;
-
-static __u32 addr_srv_idx = ADDR_SRV_IDX;
-static __u32 addr_cli_idx = ADDR_CLI_IDX;
-
-static __u32 egress_srv_idx = EGRESS_SRV_IDX;
-static __u32 egress_cli_idx = EGRESS_CLI_IDX;
-static __u32 ingress_listen_idx = INGRESS_LISTEN_IDX;
-
-static __u32 egress_linum_idx = EGRESS_LINUM_IDX;
-static __u32 ingress_linum_idx = INGRESS_LINUM_IDX;
-
-static void init_loopback6(struct sockaddr_in6 *sa6)
-{
-	memset(sa6, 0, sizeof(*sa6));
-	sa6->sin6_family = AF_INET6;
-	sa6->sin6_addr = in6addr_loopback;
-}
-
-static void print_sk(const struct bpf_sock *sk)
-{
-	char src_ip4[24], dst_ip4[24];
-	char src_ip6[64], dst_ip6[64];
-
-	inet_ntop(AF_INET, &sk->src_ip4, src_ip4, sizeof(src_ip4));
-	inet_ntop(AF_INET6, &sk->src_ip6, src_ip6, sizeof(src_ip6));
-	inet_ntop(AF_INET, &sk->dst_ip4, dst_ip4, sizeof(dst_ip4));
-	inet_ntop(AF_INET6, &sk->dst_ip6, dst_ip6, sizeof(dst_ip6));
-
-	printf("state:%u bound_dev_if:%u family:%u type:%u protocol:%u mark:%u priority:%u "
-	       "src_ip4:%x(%s) src_ip6:%x:%x:%x:%x(%s) src_port:%u "
-	       "dst_ip4:%x(%s) dst_ip6:%x:%x:%x:%x(%s) dst_port:%u\n",
-	       sk->state, sk->bound_dev_if, sk->family, sk->type, sk->protocol,
-	       sk->mark, sk->priority,
-	       sk->src_ip4, src_ip4,
-	       sk->src_ip6[0], sk->src_ip6[1], sk->src_ip6[2], sk->src_ip6[3],
-	       src_ip6, sk->src_port,
-	       sk->dst_ip4, dst_ip4,
-	       sk->dst_ip6[0], sk->dst_ip6[1], sk->dst_ip6[2], sk->dst_ip6[3],
-	       dst_ip6, ntohs(sk->dst_port));
-}
-
-static void print_tp(const struct bpf_tcp_sock *tp)
-{
-	printf("snd_cwnd:%u srtt_us:%u rtt_min:%u snd_ssthresh:%u rcv_nxt:%u "
-	       "snd_nxt:%u snd:una:%u mss_cache:%u ecn_flags:%u "
-	       "rate_delivered:%u rate_interval_us:%u packets_out:%u "
-	       "retrans_out:%u total_retrans:%u segs_in:%u data_segs_in:%u "
-	       "segs_out:%u data_segs_out:%u lost_out:%u sacked_out:%u "
-	       "bytes_received:%llu bytes_acked:%llu\n",
-	       tp->snd_cwnd, tp->srtt_us, tp->rtt_min, tp->snd_ssthresh,
-	       tp->rcv_nxt, tp->snd_nxt, tp->snd_una, tp->mss_cache,
-	       tp->ecn_flags, tp->rate_delivered, tp->rate_interval_us,
-	       tp->packets_out, tp->retrans_out, tp->total_retrans,
-	       tp->segs_in, tp->data_segs_in, tp->segs_out,
-	       tp->data_segs_out, tp->lost_out, tp->sacked_out,
-	       tp->bytes_received, tp->bytes_acked);
-}
-
-static void check_result(void)
-{
-	struct bpf_tcp_sock srv_tp, cli_tp, listen_tp;
-	struct bpf_sock srv_sk, cli_sk, listen_sk;
-	__u32 ingress_linum, egress_linum;
-	int err;
-
-	err = bpf_map_lookup_elem(linum_map_fd, &egress_linum_idx,
-				  &egress_linum);
-	CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)",
-	      "err:%d errno:%d", err, errno);
-
-	err = bpf_map_lookup_elem(linum_map_fd, &ingress_linum_idx,
-				  &ingress_linum);
-	CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)",
-	      "err:%d errno:%d", err, errno);
-
-	err = bpf_map_lookup_elem(sk_map_fd, &egress_srv_idx, &srv_sk);
-	CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &egress_srv_idx)",
-	      "err:%d errno:%d", err, errno);
-	err = bpf_map_lookup_elem(tp_map_fd, &egress_srv_idx, &srv_tp);
-	CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &egress_srv_idx)",
-	      "err:%d errno:%d", err, errno);
-
-	err = bpf_map_lookup_elem(sk_map_fd, &egress_cli_idx, &cli_sk);
-	CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &egress_cli_idx)",
-	      "err:%d errno:%d", err, errno);
-	err = bpf_map_lookup_elem(tp_map_fd, &egress_cli_idx, &cli_tp);
-	CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &egress_cli_idx)",
-	      "err:%d errno:%d", err, errno);
-
-	err = bpf_map_lookup_elem(sk_map_fd, &ingress_listen_idx, &listen_sk);
-	CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &ingress_listen_idx)",
-	      "err:%d errno:%d", err, errno);
-	err = bpf_map_lookup_elem(tp_map_fd, &ingress_listen_idx, &listen_tp);
-	CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &ingress_listen_idx)",
-	      "err:%d errno:%d", err, errno);
-
-	printf("listen_sk: ");
-	print_sk(&listen_sk);
-	printf("\n");
-
-	printf("srv_sk: ");
-	print_sk(&srv_sk);
-	printf("\n");
-
-	printf("cli_sk: ");
-	print_sk(&cli_sk);
-	printf("\n");
-
-	printf("listen_tp: ");
-	print_tp(&listen_tp);
-	printf("\n");
-
-	printf("srv_tp: ");
-	print_tp(&srv_tp);
-	printf("\n");
-
-	printf("cli_tp: ");
-	print_tp(&cli_tp);
-	printf("\n");
-
-	CHECK(listen_sk.state != 10 ||
-	      listen_sk.family != AF_INET6 ||
-	      listen_sk.protocol != IPPROTO_TCP ||
-	      memcmp(listen_sk.src_ip6, &in6addr_loopback,
-		     sizeof(listen_sk.src_ip6)) ||
-	      listen_sk.dst_ip6[0] || listen_sk.dst_ip6[1] ||
-	      listen_sk.dst_ip6[2] || listen_sk.dst_ip6[3] ||
-	      listen_sk.src_port != ntohs(srv_sa6.sin6_port) ||
-	      listen_sk.dst_port,
-	      "Unexpected listen_sk",
-	      "Check listen_sk output. ingress_linum:%u",
-	      ingress_linum);
-
-	CHECK(srv_sk.state == 10 ||
-	      !srv_sk.state ||
-	      srv_sk.family != AF_INET6 ||
-	      srv_sk.protocol != IPPROTO_TCP ||
-	      memcmp(srv_sk.src_ip6, &in6addr_loopback,
-		     sizeof(srv_sk.src_ip6)) ||
-	      memcmp(srv_sk.dst_ip6, &in6addr_loopback,
-		     sizeof(srv_sk.dst_ip6)) ||
-	      srv_sk.src_port != ntohs(srv_sa6.sin6_port) ||
-	      srv_sk.dst_port != cli_sa6.sin6_port,
-	      "Unexpected srv_sk", "Check srv_sk output. egress_linum:%u",
-	      egress_linum);
-
-	CHECK(cli_sk.state == 10 ||
-	      !cli_sk.state ||
-	      cli_sk.family != AF_INET6 ||
-	      cli_sk.protocol != IPPROTO_TCP ||
-	      memcmp(cli_sk.src_ip6, &in6addr_loopback,
-		     sizeof(cli_sk.src_ip6)) ||
-	      memcmp(cli_sk.dst_ip6, &in6addr_loopback,
-		     sizeof(cli_sk.dst_ip6)) ||
-	      cli_sk.src_port != ntohs(cli_sa6.sin6_port) ||
-	      cli_sk.dst_port != srv_sa6.sin6_port,
-	      "Unexpected cli_sk", "Check cli_sk output. egress_linum:%u",
-	      egress_linum);
-
-	CHECK(listen_tp.data_segs_out ||
-	      listen_tp.data_segs_in ||
-	      listen_tp.total_retrans ||
-	      listen_tp.bytes_acked,
-	      "Unexpected listen_tp", "Check listen_tp output. ingress_linum:%u",
-	      ingress_linum);
-
-	CHECK(srv_tp.data_segs_out != 2 ||
-	      srv_tp.data_segs_in ||
-	      srv_tp.snd_cwnd != 10 ||
-	      srv_tp.total_retrans ||
-	      srv_tp.bytes_acked != 2 * DATA_LEN,
-	      "Unexpected srv_tp", "Check srv_tp output. egress_linum:%u",
-	      egress_linum);
-
-	CHECK(cli_tp.data_segs_out ||
-	      cli_tp.data_segs_in != 2 ||
-	      cli_tp.snd_cwnd != 10 ||
-	      cli_tp.total_retrans ||
-	      cli_tp.bytes_received != 2 * DATA_LEN,
-	      "Unexpected cli_tp", "Check cli_tp output. egress_linum:%u",
-	      egress_linum);
-}
-
-static void check_sk_pkt_out_cnt(int accept_fd, int cli_fd)
-{
-	struct bpf_spinlock_cnt pkt_out_cnt = {}, pkt_out_cnt10 = {};
-	int err;
-
-	pkt_out_cnt.cnt = ~0;
-	pkt_out_cnt10.cnt = ~0;
-	err = bpf_map_lookup_elem(sk_pkt_out_cnt_fd, &accept_fd, &pkt_out_cnt);
-	if (!err)
-		err = bpf_map_lookup_elem(sk_pkt_out_cnt10_fd, &accept_fd,
-					  &pkt_out_cnt10);
-
-	/* The bpf prog only counts for fullsock and
-	 * passive conneciton did not become fullsock until 3WHS
-	 * had been finished.
-	 * The bpf prog only counted two data packet out but we
-	 * specially init accept_fd's pkt_out_cnt by 2 in
-	 * init_sk_storage().  Hence, 4 here.
-	 */
-	CHECK(err || pkt_out_cnt.cnt != 4 || pkt_out_cnt10.cnt != 40,
-	      "bpf_map_lookup_elem(sk_pkt_out_cnt, &accept_fd)",
-	      "err:%d errno:%d pkt_out_cnt:%u pkt_out_cnt10:%u",
-	      err, errno, pkt_out_cnt.cnt, pkt_out_cnt10.cnt);
-
-	pkt_out_cnt.cnt = ~0;
-	pkt_out_cnt10.cnt = ~0;
-	err = bpf_map_lookup_elem(sk_pkt_out_cnt_fd, &cli_fd, &pkt_out_cnt);
-	if (!err)
-		err = bpf_map_lookup_elem(sk_pkt_out_cnt10_fd, &cli_fd,
-					  &pkt_out_cnt10);
-	/* Active connection is fullsock from the beginning.
-	 * 1 SYN and 1 ACK during 3WHS
-	 * 2 Acks on data packet.
-	 *
-	 * The bpf_prog initialized it to 0xeB9F.
-	 */
-	CHECK(err || pkt_out_cnt.cnt != 0xeB9F + 4 ||
-	      pkt_out_cnt10.cnt != 0xeB9F + 40,
-	      "bpf_map_lookup_elem(sk_pkt_out_cnt, &cli_fd)",
-	      "err:%d errno:%d pkt_out_cnt:%u pkt_out_cnt10:%u",
-	      err, errno, pkt_out_cnt.cnt, pkt_out_cnt10.cnt);
-}
-
-static void init_sk_storage(int sk_fd, __u32 pkt_out_cnt)
-{
-	struct bpf_spinlock_cnt scnt = {};
-	int err;
-
-	scnt.cnt = pkt_out_cnt;
-	err = bpf_map_update_elem(sk_pkt_out_cnt_fd, &sk_fd, &scnt,
-				  BPF_NOEXIST);
-	CHECK(err, "bpf_map_update_elem(sk_pkt_out_cnt_fd)",
-	      "err:%d errno:%d", err, errno);
-
-	scnt.cnt *= 10;
-	err = bpf_map_update_elem(sk_pkt_out_cnt10_fd, &sk_fd, &scnt,
-				  BPF_NOEXIST);
-	CHECK(err, "bpf_map_update_elem(sk_pkt_out_cnt10_fd)",
-	      "err:%d errno:%d", err, errno);
-}
-
-static void test(void)
-{
-	int listen_fd, cli_fd, accept_fd, epfd, err;
-	struct epoll_event ev;
-	socklen_t addrlen;
-	int i;
-
-	addrlen = sizeof(struct sockaddr_in6);
-	ev.events = EPOLLIN;
-
-	epfd = epoll_create(1);
-	CHECK(epfd == -1, "epoll_create()", "epfd:%d errno:%d", epfd, errno);
-
-	/* Prepare listen_fd */
-	listen_fd = socket(AF_INET6, SOCK_STREAM | SOCK_NONBLOCK, 0);
-	CHECK(listen_fd == -1, "socket()", "listen_fd:%d errno:%d",
-	      listen_fd, errno);
-
-	init_loopback6(&srv_sa6);
-	err = bind(listen_fd, (struct sockaddr *)&srv_sa6, sizeof(srv_sa6));
-	CHECK(err, "bind(listen_fd)", "err:%d errno:%d", err, errno);
-
-	err = getsockname(listen_fd, (struct sockaddr *)&srv_sa6, &addrlen);
-	CHECK(err, "getsockname(listen_fd)", "err:%d errno:%d", err, errno);
-
-	err = listen(listen_fd, 1);
-	CHECK(err, "listen(listen_fd)", "err:%d errno:%d", err, errno);
-
-	/* Prepare cli_fd */
-	cli_fd = socket(AF_INET6, SOCK_STREAM | SOCK_NONBLOCK, 0);
-	CHECK(cli_fd == -1, "socket()", "cli_fd:%d errno:%d", cli_fd, errno);
-
-	init_loopback6(&cli_sa6);
-	err = bind(cli_fd, (struct sockaddr *)&cli_sa6, sizeof(cli_sa6));
-	CHECK(err, "bind(cli_fd)", "err:%d errno:%d", err, errno);
-
-	err = getsockname(cli_fd, (struct sockaddr *)&cli_sa6, &addrlen);
-	CHECK(err, "getsockname(cli_fd)", "err:%d errno:%d",
-	      err, errno);
-
-	/* Update addr_map with srv_sa6 and cli_sa6 */
-	err = bpf_map_update_elem(addr_map_fd, &addr_srv_idx, &srv_sa6, 0);
-	CHECK(err, "map_update", "err:%d errno:%d", err, errno);
-
-	err = bpf_map_update_elem(addr_map_fd, &addr_cli_idx, &cli_sa6, 0);
-	CHECK(err, "map_update", "err:%d errno:%d", err, errno);
-
-	/* Connect from cli_sa6 to srv_sa6 */
-	err = connect(cli_fd, (struct sockaddr *)&srv_sa6, addrlen);
-	printf("srv_sa6.sin6_port:%u cli_sa6.sin6_port:%u\n\n",
-	       ntohs(srv_sa6.sin6_port), ntohs(cli_sa6.sin6_port));
-	CHECK(err && errno != EINPROGRESS,
-	      "connect(cli_fd)", "err:%d errno:%d", err, errno);
-
-	ev.data.fd = listen_fd;
-	err = epoll_ctl(epfd, EPOLL_CTL_ADD, listen_fd, &ev);
-	CHECK(err, "epoll_ctl(EPOLL_CTL_ADD, listen_fd)", "err:%d errno:%d",
-	      err, errno);
-
-	/* Accept the connection */
-	/* Have some timeout in accept(listen_fd). Just in case. */
-	err = epoll_wait(epfd, &ev, 1, 1000);
-	CHECK(err != 1 || ev.data.fd != listen_fd,
-	      "epoll_wait(listen_fd)",
-	      "err:%d errno:%d ev.data.fd:%d listen_fd:%d",
-	      err, errno, ev.data.fd, listen_fd);
-
-	accept_fd = accept(listen_fd, NULL, NULL);
-	CHECK(accept_fd == -1, "accept(listen_fd)", "accept_fd:%d errno:%d",
-	      accept_fd, errno);
-	close(listen_fd);
-
-	ev.data.fd = cli_fd;
-	err = epoll_ctl(epfd, EPOLL_CTL_ADD, cli_fd, &ev);
-	CHECK(err, "epoll_ctl(EPOLL_CTL_ADD, cli_fd)", "err:%d errno:%d",
-	      err, errno);
-
-	init_sk_storage(accept_fd, 2);
-
-	for (i = 0; i < 2; i++) {
-		/* Send some data from accept_fd to cli_fd */
-		err = send(accept_fd, DATA, DATA_LEN, 0);
-		CHECK(err != DATA_LEN, "send(accept_fd)", "err:%d errno:%d",
-		      err, errno);
-
-		/* Have some timeout in recv(cli_fd). Just in case. */
-		err = epoll_wait(epfd, &ev, 1, 1000);
-		CHECK(err != 1 || ev.data.fd != cli_fd,
-		      "epoll_wait(cli_fd)", "err:%d errno:%d ev.data.fd:%d cli_fd:%d",
-		      err, errno, ev.data.fd, cli_fd);
-
-		err = recv(cli_fd, NULL, 0, MSG_TRUNC);
-		CHECK(err, "recv(cli_fd)", "err:%d errno:%d", err, errno);
-	}
-
-	check_sk_pkt_out_cnt(accept_fd, cli_fd);
-
-	close(epfd);
-	close(accept_fd);
-	close(cli_fd);
-
-	check_result();
-}
-
-int main(int argc, char **argv)
-{
-	struct bpf_prog_load_attr attr = {
-		.file = "test_sock_fields_kern.o",
-		.prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-		.prog_flags = BPF_F_TEST_RND_HI32,
-	};
-	int cgroup_fd, egress_fd, ingress_fd, err;
-	struct bpf_program *ingress_prog;
-	struct bpf_object *obj;
-	struct bpf_map *map;
-
-	/* Create a cgroup, get fd, and join it */
-	cgroup_fd = cgroup_setup_and_join(TEST_CGROUP);
-	CHECK(cgroup_fd < 0, "cgroup_setup_and_join()",
-	      "cgroup_fd:%d errno:%d", cgroup_fd, errno);
-	atexit(cleanup_cgroup_environment);
-
-	err = bpf_prog_load_xattr(&attr, &obj, &egress_fd);
-	CHECK(err, "bpf_prog_load_xattr()", "err:%d", err);
-
-	ingress_prog = bpf_object__find_program_by_title(obj,
-							 "cgroup_skb/ingress");
-	CHECK(!ingress_prog,
-	      "bpf_object__find_program_by_title(cgroup_skb/ingress)",
-	      "not found");
-	ingress_fd = bpf_program__fd(ingress_prog);
-
-	err = bpf_prog_attach(egress_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0);
-	CHECK(err == -1, "bpf_prog_attach(CPF_CGROUP_INET_EGRESS)",
-	      "err:%d errno%d", err, errno);
-
-	err = bpf_prog_attach(ingress_fd, cgroup_fd,
-			      BPF_CGROUP_INET_INGRESS, 0);
-	CHECK(err == -1, "bpf_prog_attach(CPF_CGROUP_INET_INGRESS)",
-	      "err:%d errno%d", err, errno);
-	close(cgroup_fd);
-
-	map = bpf_object__find_map_by_name(obj, "addr_map");
-	CHECK(!map, "cannot find addr_map", "(null)");
-	addr_map_fd = bpf_map__fd(map);
-
-	map = bpf_object__find_map_by_name(obj, "sock_result_map");
-	CHECK(!map, "cannot find sock_result_map", "(null)");
-	sk_map_fd = bpf_map__fd(map);
-
-	map = bpf_object__find_map_by_name(obj, "tcp_sock_result_map");
-	CHECK(!map, "cannot find tcp_sock_result_map", "(null)");
-	tp_map_fd = bpf_map__fd(map);
-
-	map = bpf_object__find_map_by_name(obj, "linum_map");
-	CHECK(!map, "cannot find linum_map", "(null)");
-	linum_map_fd = bpf_map__fd(map);
-
-	map = bpf_object__find_map_by_name(obj, "sk_pkt_out_cnt");
-	CHECK(!map, "cannot find sk_pkt_out_cnt", "(null)");
-	sk_pkt_out_cnt_fd = bpf_map__fd(map);
-
-	map = bpf_object__find_map_by_name(obj, "sk_pkt_out_cnt10");
-	CHECK(!map, "cannot find sk_pkt_out_cnt10", "(null)");
-	sk_pkt_out_cnt10_fd = bpf_map__fd(map);
-
-	test();
-
-	bpf_object__close(obj);
-	cleanup_cgroup_environment();
-
-	printf("PASS\n");
-
-	return 0;
-}
diff --git a/tools/testing/selftests/bpf/test_socket_cookie.c b/tools/testing/selftests/bpf/test_socket_cookie.c
index 154a8fd2a48d..ca7ca87e91aa 100644
--- a/tools/testing/selftests/bpf/test_socket_cookie.c
+++ b/tools/testing/selftests/bpf/test_socket_cookie.c
@@ -151,7 +151,7 @@ static int run_test(int cgfd)
 	}
 
 	bpf_object__for_each_program(prog, pobj) {
-		prog_name = bpf_program__title(prog, /*needs_copy*/ false);
+		prog_name = bpf_program__section_name(prog);
 
 		if (libbpf_attach_type_by_name(prog_name, &attach_type))
 			goto err;
diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c
index 9b6fb00dc7a0..0fa1e421c3d7 100644
--- a/tools/testing/selftests/bpf/test_sockmap.c
+++ b/tools/testing/selftests/bpf/test_sockmap.c
@@ -86,6 +86,7 @@ int txmsg_ktls_skb_redir;
 int ktls;
 int peek_flag;
 int skb_use_parser;
+int txmsg_omit_skb_parser;
 
 static const struct option long_options[] = {
 	{"help",	no_argument,		NULL, 'h' },
@@ -111,6 +112,7 @@ static const struct option long_options[] = {
 	{"txmsg_redir_skb", no_argument,	&txmsg_redir_skb, 1 },
 	{"ktls", no_argument,			&ktls, 1 },
 	{"peek", no_argument,			&peek_flag, 1 },
+	{"txmsg_omit_skb_parser", no_argument,      &txmsg_omit_skb_parser, 1},
 	{"whitelist", required_argument,	NULL, 'n' },
 	{"blacklist", required_argument,	NULL, 'b' },
 	{0, 0, NULL, 0 }
@@ -175,6 +177,7 @@ static void test_reset(void)
 	txmsg_apply = txmsg_cork = 0;
 	txmsg_ingress = txmsg_redir_skb = 0;
 	txmsg_ktls_skb = txmsg_ktls_skb_drop = txmsg_ktls_skb_redir = 0;
+	txmsg_omit_skb_parser = 0;
 	skb_use_parser = 0;
 }
 
@@ -518,28 +521,13 @@ static int msg_verify_data(struct msghdr *msg, int size, int chunk_sz)
 		if (i == 0 && txmsg_ktls_skb) {
 			if (msg->msg_iov[i].iov_len < 4)
 				return -EIO;
-			if (txmsg_ktls_skb_redir) {
-				if (memcmp(&d[13], "PASS", 4) != 0) {
-					fprintf(stderr,
-						"detected redirect ktls_skb data error with skb ingress update @iov[%i]:%i \"%02x %02x %02x %02x\" != \"PASS\"\n", i, 0, d[13], d[14], d[15], d[16]);
-					return -EIO;
-				}
-				d[13] = 0;
-				d[14] = 1;
-				d[15] = 2;
-				d[16] = 3;
-				j = 13;
-			} else if (txmsg_ktls_skb) {
-				if (memcmp(d, "PASS", 4) != 0) {
-					fprintf(stderr,
-						"detected ktls_skb data error with skb ingress update @iov[%i]:%i \"%02x %02x %02x %02x\" != \"PASS\"\n", i, 0, d[0], d[1], d[2], d[3]);
-					return -EIO;
-				}
-				d[0] = 0;
-				d[1] = 1;
-				d[2] = 2;
-				d[3] = 3;
+			if (memcmp(d, "PASS", 4) != 0) {
+				fprintf(stderr,
+					"detected skb data error with skb ingress update @iov[%i]:%i \"%02x %02x %02x %02x\" != \"PASS\"\n",
+					i, 0, d[0], d[1], d[2], d[3]);
+				return -EIO;
 			}
+			j = 4; /* advance index past PASS header */
 		}
 
 		for (; j < msg->msg_iov[i].iov_len && size; j++) {
@@ -927,13 +915,15 @@ static int run_options(struct sockmap_options *options, int cg_fd,  int test)
 		goto run;
 
 	/* Attach programs to sockmap */
-	err = bpf_prog_attach(prog_fd[0], map_fd[0],
-				BPF_SK_SKB_STREAM_PARSER, 0);
-	if (err) {
-		fprintf(stderr,
-			"ERROR: bpf_prog_attach (sockmap %i->%i): %d (%s)\n",
-			prog_fd[0], map_fd[0], err, strerror(errno));
-		return err;
+	if (!txmsg_omit_skb_parser) {
+		err = bpf_prog_attach(prog_fd[0], map_fd[0],
+				      BPF_SK_SKB_STREAM_PARSER, 0);
+		if (err) {
+			fprintf(stderr,
+				"ERROR: bpf_prog_attach (sockmap %i->%i): %d (%s)\n",
+				prog_fd[0], map_fd[0], err, strerror(errno));
+			return err;
+		}
 	}
 
 	err = bpf_prog_attach(prog_fd[1], map_fd[0],
@@ -946,13 +936,15 @@ static int run_options(struct sockmap_options *options, int cg_fd,  int test)
 
 	/* Attach programs to TLS sockmap */
 	if (txmsg_ktls_skb) {
-		err = bpf_prog_attach(prog_fd[0], map_fd[8],
-					BPF_SK_SKB_STREAM_PARSER, 0);
-		if (err) {
-			fprintf(stderr,
-				"ERROR: bpf_prog_attach (TLS sockmap %i->%i): %d (%s)\n",
-				prog_fd[0], map_fd[8], err, strerror(errno));
-			return err;
+		if (!txmsg_omit_skb_parser) {
+			err = bpf_prog_attach(prog_fd[0], map_fd[8],
+					      BPF_SK_SKB_STREAM_PARSER, 0);
+			if (err) {
+				fprintf(stderr,
+					"ERROR: bpf_prog_attach (TLS sockmap %i->%i): %d (%s)\n",
+					prog_fd[0], map_fd[8], err, strerror(errno));
+				return err;
+			}
 		}
 
 		err = bpf_prog_attach(prog_fd[2], map_fd[8],
@@ -1480,12 +1472,29 @@ static void test_txmsg_skb(int cgrp, struct sockmap_options *opt)
 	txmsg_ktls_skb_drop = 0;
 	txmsg_ktls_skb_redir = 1;
 	test_exec(cgrp, opt);
+	txmsg_ktls_skb_redir = 0;
+
+	/* Tests that omit skb_parser */
+	txmsg_omit_skb_parser = 1;
+	ktls = 0;
+	txmsg_ktls_skb = 0;
+	test_exec(cgrp, opt);
+
+	txmsg_ktls_skb_drop = 1;
+	test_exec(cgrp, opt);
+	txmsg_ktls_skb_drop = 0;
+
+	txmsg_ktls_skb_redir = 1;
+	test_exec(cgrp, opt);
+
+	ktls = 1;
+	test_exec(cgrp, opt);
+	txmsg_omit_skb_parser = 0;
 
 	opt->data_test = data;
 	ktls = k;
 }
 
-
 /* Test cork with hung data. This tests poor usage patterns where
  * cork can leave data on the ring if user program is buggy and
  * doesn't flush them somehow. They do take some time however
diff --git a/tools/testing/selftests/bpf/test_tc_redirect.sh b/tools/testing/selftests/bpf/test_tc_redirect.sh
new file mode 100755
index 000000000000..6d7482562140
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_tc_redirect.sh
@@ -0,0 +1,204 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# This test sets up 3 netns (src <-> fwd <-> dst). There is no direct veth link
+# between src and dst. The netns fwd has veth links to each src and dst. The
+# client is in src and server in dst. The test installs a TC BPF program to each
+# host facing veth in fwd which calls into i) bpf_redirect_neigh() to perform the
+# neigh addr population and redirect or ii) bpf_redirect_peer() for namespace
+# switch from ingress side; it also installs a checker prog on the egress side
+# to drop unexpected traffic.
+
+if [[ $EUID -ne 0 ]]; then
+	echo "This script must be run as root"
+	echo "FAIL"
+	exit 1
+fi
+
+# check that needed tools are present
+command -v nc >/dev/null 2>&1 || \
+	{ echo >&2 "nc is not available"; exit 1; }
+command -v dd >/dev/null 2>&1 || \
+	{ echo >&2 "dd is not available"; exit 1; }
+command -v timeout >/dev/null 2>&1 || \
+	{ echo >&2 "timeout is not available"; exit 1; }
+command -v ping >/dev/null 2>&1 || \
+	{ echo >&2 "ping is not available"; exit 1; }
+command -v ping6 >/dev/null 2>&1 || \
+	{ echo >&2 "ping6 is not available"; exit 1; }
+command -v perl >/dev/null 2>&1 || \
+	{ echo >&2 "perl is not available"; exit 1; }
+command -v jq >/dev/null 2>&1 || \
+	{ echo >&2 "jq is not available"; exit 1; }
+command -v bpftool >/dev/null 2>&1 || \
+	{ echo >&2 "bpftool is not available"; exit 1; }
+
+readonly GREEN='\033[0;92m'
+readonly RED='\033[0;31m'
+readonly NC='\033[0m' # No Color
+
+readonly PING_ARG="-c 3 -w 10 -q"
+
+readonly TIMEOUT=10
+
+readonly NS_SRC="ns-src-$(mktemp -u XXXXXX)"
+readonly NS_FWD="ns-fwd-$(mktemp -u XXXXXX)"
+readonly NS_DST="ns-dst-$(mktemp -u XXXXXX)"
+
+readonly IP4_SRC="172.16.1.100"
+readonly IP4_DST="172.16.2.100"
+
+readonly IP6_SRC="::1:dead:beef:cafe"
+readonly IP6_DST="::2:dead:beef:cafe"
+
+readonly IP4_SLL="169.254.0.1"
+readonly IP4_DLL="169.254.0.2"
+readonly IP4_NET="169.254.0.0"
+
+netns_cleanup()
+{
+	ip netns del ${NS_SRC}
+	ip netns del ${NS_FWD}
+	ip netns del ${NS_DST}
+}
+
+netns_setup()
+{
+	ip netns add "${NS_SRC}"
+	ip netns add "${NS_FWD}"
+	ip netns add "${NS_DST}"
+
+	ip link add veth_src type veth peer name veth_src_fwd
+	ip link add veth_dst type veth peer name veth_dst_fwd
+
+	ip link set veth_src netns ${NS_SRC}
+	ip link set veth_src_fwd netns ${NS_FWD}
+
+	ip link set veth_dst netns ${NS_DST}
+	ip link set veth_dst_fwd netns ${NS_FWD}
+
+	ip -netns ${NS_SRC} addr add ${IP4_SRC}/32 dev veth_src
+	ip -netns ${NS_DST} addr add ${IP4_DST}/32 dev veth_dst
+
+	# The fwd netns automatically get a v6 LL address / routes, but also
+	# needs v4 one in order to start ARP probing. IP4_NET route is added
+	# to the endpoints so that the ARP processing will reply.
+
+	ip -netns ${NS_FWD} addr add ${IP4_SLL}/32 dev veth_src_fwd
+	ip -netns ${NS_FWD} addr add ${IP4_DLL}/32 dev veth_dst_fwd
+
+	ip -netns ${NS_SRC} addr add ${IP6_SRC}/128 dev veth_src nodad
+	ip -netns ${NS_DST} addr add ${IP6_DST}/128 dev veth_dst nodad
+
+	ip -netns ${NS_SRC} link set dev veth_src up
+	ip -netns ${NS_FWD} link set dev veth_src_fwd up
+
+	ip -netns ${NS_DST} link set dev veth_dst up
+	ip -netns ${NS_FWD} link set dev veth_dst_fwd up
+
+	ip -netns ${NS_SRC} route add ${IP4_DST}/32 dev veth_src scope global
+	ip -netns ${NS_SRC} route add ${IP4_NET}/16 dev veth_src scope global
+	ip -netns ${NS_FWD} route add ${IP4_SRC}/32 dev veth_src_fwd scope global
+
+	ip -netns ${NS_SRC} route add ${IP6_DST}/128 dev veth_src scope global
+	ip -netns ${NS_FWD} route add ${IP6_SRC}/128 dev veth_src_fwd scope global
+
+	ip -netns ${NS_DST} route add ${IP4_SRC}/32 dev veth_dst scope global
+	ip -netns ${NS_DST} route add ${IP4_NET}/16 dev veth_dst scope global
+	ip -netns ${NS_FWD} route add ${IP4_DST}/32 dev veth_dst_fwd scope global
+
+	ip -netns ${NS_DST} route add ${IP6_SRC}/128 dev veth_dst scope global
+	ip -netns ${NS_FWD} route add ${IP6_DST}/128 dev veth_dst_fwd scope global
+
+	fmac_src=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_src_fwd/address)
+	fmac_dst=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_dst_fwd/address)
+
+	ip -netns ${NS_SRC} neigh add ${IP4_DST} dev veth_src lladdr $fmac_src
+	ip -netns ${NS_DST} neigh add ${IP4_SRC} dev veth_dst lladdr $fmac_dst
+
+	ip -netns ${NS_SRC} neigh add ${IP6_DST} dev veth_src lladdr $fmac_src
+	ip -netns ${NS_DST} neigh add ${IP6_SRC} dev veth_dst lladdr $fmac_dst
+}
+
+netns_test_connectivity()
+{
+	set +e
+
+	ip netns exec ${NS_DST} bash -c "nc -4 -l -p 9004 &"
+	ip netns exec ${NS_DST} bash -c "nc -6 -l -p 9006 &"
+
+	TEST="TCPv4 connectivity test"
+	ip netns exec ${NS_SRC} bash -c "timeout ${TIMEOUT} dd if=/dev/zero bs=1000 count=100 > /dev/tcp/${IP4_DST}/9004"
+	if [ $? -ne 0 ]; then
+		echo -e "${TEST}: ${RED}FAIL${NC}"
+		exit 1
+	fi
+	echo -e "${TEST}: ${GREEN}PASS${NC}"
+
+	TEST="TCPv6 connectivity test"
+	ip netns exec ${NS_SRC} bash -c "timeout ${TIMEOUT} dd if=/dev/zero bs=1000 count=100 > /dev/tcp/${IP6_DST}/9006"
+	if [ $? -ne 0 ]; then
+		echo -e "${TEST}: ${RED}FAIL${NC}"
+		exit 1
+	fi
+	echo -e "${TEST}: ${GREEN}PASS${NC}"
+
+	TEST="ICMPv4 connectivity test"
+	ip netns exec ${NS_SRC} ping  $PING_ARG ${IP4_DST}
+	if [ $? -ne 0 ]; then
+		echo -e "${TEST}: ${RED}FAIL${NC}"
+		exit 1
+	fi
+	echo -e "${TEST}: ${GREEN}PASS${NC}"
+
+	TEST="ICMPv6 connectivity test"
+	ip netns exec ${NS_SRC} ping6 $PING_ARG ${IP6_DST}
+	if [ $? -ne 0 ]; then
+		echo -e "${TEST}: ${RED}FAIL${NC}"
+		exit 1
+	fi
+	echo -e "${TEST}: ${GREEN}PASS${NC}"
+
+	set -e
+}
+
+hex_mem_str()
+{
+	perl -e 'print join(" ", unpack("(H2)8", pack("L", @ARGV)))' $1
+}
+
+netns_setup_bpf()
+{
+	local obj=$1
+
+	ip netns exec ${NS_FWD} tc qdisc add dev veth_src_fwd clsact
+	ip netns exec ${NS_FWD} tc filter add dev veth_src_fwd ingress bpf da obj $obj sec src_ingress
+	ip netns exec ${NS_FWD} tc filter add dev veth_src_fwd egress  bpf da obj $obj sec chk_egress
+
+	ip netns exec ${NS_FWD} tc qdisc add dev veth_dst_fwd clsact
+	ip netns exec ${NS_FWD} tc filter add dev veth_dst_fwd ingress bpf da obj $obj sec dst_ingress
+	ip netns exec ${NS_FWD} tc filter add dev veth_dst_fwd egress  bpf da obj $obj sec chk_egress
+
+	veth_src=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_src_fwd/ifindex)
+	veth_dst=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_dst_fwd/ifindex)
+
+	progs=$(ip netns exec ${NS_FWD} bpftool net --json | jq -r '.[] | .tc | map(.id) | .[]')
+	for prog in $progs; do
+		map=$(bpftool prog show id $prog --json | jq -r '.map_ids | .? | .[]')
+		if [ ! -z "$map" ]; then
+			bpftool map update id $map key hex $(hex_mem_str 0) value hex $(hex_mem_str $veth_src)
+			bpftool map update id $map key hex $(hex_mem_str 1) value hex $(hex_mem_str $veth_dst)
+		fi
+	done
+}
+
+trap netns_cleanup EXIT
+set -e
+
+netns_setup
+netns_setup_bpf test_tc_neigh.o
+netns_test_connectivity
+netns_cleanup
+netns_setup
+netns_setup_bpf test_tc_peer.o
+netns_test_connectivity
diff --git a/tools/testing/selftests/bpf/test_tcp_hdr_options.h b/tools/testing/selftests/bpf/test_tcp_hdr_options.h
new file mode 100644
index 000000000000..6118e3ab61fc
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_tcp_hdr_options.h
@@ -0,0 +1,152 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2020 Facebook */
+
+#ifndef _TEST_TCP_HDR_OPTIONS_H
+#define _TEST_TCP_HDR_OPTIONS_H
+
+struct bpf_test_option {
+	__u8 flags;
+	__u8 max_delack_ms;
+	__u8 rand;
+} __attribute__((packed));
+
+enum {
+	OPTION_RESEND,
+	OPTION_MAX_DELACK_MS,
+	OPTION_RAND,
+	__NR_OPTION_FLAGS,
+};
+
+#define OPTION_F_RESEND		(1 << OPTION_RESEND)
+#define OPTION_F_MAX_DELACK_MS	(1 << OPTION_MAX_DELACK_MS)
+#define OPTION_F_RAND		(1 << OPTION_RAND)
+#define OPTION_MASK		((1 << __NR_OPTION_FLAGS) - 1)
+
+#define TEST_OPTION_FLAGS(flags, option) (1 & ((flags) >> (option)))
+#define SET_OPTION_FLAGS(flags, option)	((flags) |= (1 << (option)))
+
+/* Store in bpf_sk_storage */
+struct hdr_stg {
+	bool active;
+	bool resend_syn; /* active side only */
+	bool syncookie;  /* passive side only */
+	bool fastopen;	/* passive side only */
+};
+
+struct linum_err {
+	unsigned int linum;
+	int err;
+};
+
+#define TCPHDR_FIN 0x01
+#define TCPHDR_SYN 0x02
+#define TCPHDR_RST 0x04
+#define TCPHDR_PSH 0x08
+#define TCPHDR_ACK 0x10
+#define TCPHDR_URG 0x20
+#define TCPHDR_ECE 0x40
+#define TCPHDR_CWR 0x80
+#define TCPHDR_SYNACK (TCPHDR_SYN | TCPHDR_ACK)
+
+#define TCPOPT_EOL		0
+#define TCPOPT_NOP		1
+#define TCPOPT_WINDOW		3
+#define TCPOPT_EXP		254
+
+#define TCP_BPF_EXPOPT_BASE_LEN 4
+#define MAX_TCP_HDR_LEN		60
+#define MAX_TCP_OPTION_SPACE	40
+
+#ifdef BPF_PROG_TEST_TCP_HDR_OPTIONS
+
+#define CG_OK	1
+#define CG_ERR	0
+
+#ifndef SOL_TCP
+#define SOL_TCP 6
+#endif
+
+struct tcp_exprm_opt {
+	__u8 kind;
+	__u8 len;
+	__u16 magic;
+	union {
+		__u8 data[4];
+		__u32 data32;
+	};
+} __attribute__((packed));
+
+struct tcp_opt {
+	__u8 kind;
+	__u8 len;
+	union {
+		__u8 data[4];
+		__u32 data32;
+	};
+} __attribute__((packed));
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__uint(max_entries, 2);
+	__type(key, int);
+	__type(value, struct linum_err);
+} lport_linum_map SEC(".maps");
+
+static inline unsigned int tcp_hdrlen(const struct tcphdr *th)
+{
+	return th->doff << 2;
+}
+
+static inline __u8 skops_tcp_flags(const struct bpf_sock_ops *skops)
+{
+	return skops->skb_tcp_flags;
+}
+
+static inline void clear_hdr_cb_flags(struct bpf_sock_ops *skops)
+{
+	bpf_sock_ops_cb_flags_set(skops,
+				  skops->bpf_sock_ops_cb_flags &
+				  ~(BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG |
+				    BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG));
+}
+
+static inline void set_hdr_cb_flags(struct bpf_sock_ops *skops, __u32 extra)
+{
+	bpf_sock_ops_cb_flags_set(skops,
+				  skops->bpf_sock_ops_cb_flags |
+				  BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG |
+				  BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG |
+				  extra);
+}
+static inline void
+clear_parse_all_hdr_cb_flags(struct bpf_sock_ops *skops)
+{
+	bpf_sock_ops_cb_flags_set(skops,
+				  skops->bpf_sock_ops_cb_flags &
+				  ~BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG);
+}
+
+static inline void
+set_parse_all_hdr_cb_flags(struct bpf_sock_ops *skops)
+{
+	bpf_sock_ops_cb_flags_set(skops,
+				  skops->bpf_sock_ops_cb_flags |
+				  BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG);
+}
+
+#define RET_CG_ERR(__err) ({			\
+	struct linum_err __linum_err;		\
+	int __lport;				\
+						\
+	__linum_err.linum = __LINE__;		\
+	__linum_err.err = __err;		\
+	__lport = skops->local_port;		\
+	bpf_map_update_elem(&lport_linum_map, &__lport, &__linum_err, BPF_NOEXIST); \
+	clear_hdr_cb_flags(skops);					\
+	clear_parse_all_hdr_cb_flags(skops);				\
+	return CG_ERR;							\
+})
+
+#endif /* BPF_PROG_TEST_TCP_HDR_OPTIONS */
+
+#endif /* _TEST_TCP_HDR_OPTIONS_H */
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 78a6bae56ea6..9be395d9dc64 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -114,6 +114,7 @@ struct bpf_test {
 		bpf_testdata_struct_t retvals[MAX_TEST_RUNS];
 	};
 	enum bpf_attach_type expected_attach_type;
+	const char *kfunc;
 };
 
 /* Note we want this to be 64 bit aligned so that the end of our array is
@@ -984,8 +985,24 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 		attr.log_level = 4;
 	attr.prog_flags = pflags;
 
+	if (prog_type == BPF_PROG_TYPE_TRACING && test->kfunc) {
+		attr.attach_btf_id = libbpf_find_vmlinux_btf_id(test->kfunc,
+						attr.expected_attach_type);
+		if (attr.attach_btf_id < 0) {
+			printf("FAIL\nFailed to find BTF ID for '%s'!\n",
+				test->kfunc);
+			(*errors)++;
+			return;
+		}
+	}
+
 	fd_prog = bpf_load_program_xattr(&attr, bpf_vlog, sizeof(bpf_vlog));
-	if (fd_prog < 0 && !bpf_probe_prog_type(prog_type, 0)) {
+
+	/* BPF_PROG_TYPE_TRACING requires more setup and
+	 * bpf_probe_prog_type won't give correct answer
+	 */
+	if (fd_prog < 0 && prog_type != BPF_PROG_TYPE_TRACING &&
+	    !bpf_probe_prog_type(prog_type, 0)) {
 		printf("SKIP (unsupported program type %d)\n", prog_type);
 		skips++;
 		goto close_fds;
diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c
index 4d0e913bbb22..1bbd1d9830c8 100644
--- a/tools/testing/selftests/bpf/trace_helpers.c
+++ b/tools/testing/selftests/bpf/trace_helpers.c
@@ -90,6 +90,33 @@ long ksym_get_addr(const char *name)
 	return 0;
 }
 
+/* open kallsyms and read symbol addresses on the fly. Without caching all symbols,
+ * this is faster than load + find.
+ */
+int kallsyms_find(const char *sym, unsigned long long *addr)
+{
+	char type, name[500];
+	unsigned long long value;
+	int err = 0;
+	FILE *f;
+
+	f = fopen("/proc/kallsyms", "r");
+	if (!f)
+		return -EINVAL;
+
+	while (fscanf(f, "%llx %c %499s%*[^\n]\n", &value, &type, name) > 0) {
+		if (strcmp(name, sym) == 0) {
+			*addr = value;
+			goto out;
+		}
+	}
+	err = -ENOENT;
+
+out:
+	fclose(f);
+	return err;
+}
+
 void read_trace_pipe(void)
 {
 	int trace_fd;
diff --git a/tools/testing/selftests/bpf/trace_helpers.h b/tools/testing/selftests/bpf/trace_helpers.h
index 25ef597dd03f..f62fdef9e589 100644
--- a/tools/testing/selftests/bpf/trace_helpers.h
+++ b/tools/testing/selftests/bpf/trace_helpers.h
@@ -12,6 +12,10 @@ struct ksym {
 int load_kallsyms(void);
 struct ksym *ksym_search(long key);
 long ksym_get_addr(const char *name);
+
+/* open kallsyms and find addresses on the fly, faster than load + search. */
+int kallsyms_find(const char *sym, unsigned long long *addr);
+
 void read_trace_pipe(void);
 
 #endif
diff --git a/tools/testing/selftests/bpf/verifier/and.c b/tools/testing/selftests/bpf/verifier/and.c
index d781bc86e100..ca8fdb1b3f01 100644
--- a/tools/testing/selftests/bpf/verifier/and.c
+++ b/tools/testing/selftests/bpf/verifier/and.c
@@ -48,3 +48,19 @@
 	.result = REJECT,
 	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
+{
+	"check known subreg with unknown reg",
+	.insns = {
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
+	BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 32),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
+	BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xFFFF1234),
+	/* Upper bits are unknown but AND above masks out 1 zero'ing lower bits */
+	BPF_JMP32_IMM(BPF_JLT, BPF_REG_0, 1, 1),
+	BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, 512),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.result = ACCEPT,
+	.retval = 0
+},
diff --git a/tools/testing/selftests/bpf/verifier/basic.c b/tools/testing/selftests/bpf/verifier/basic.c
index b8d18642653a..de84f0d57082 100644
--- a/tools/testing/selftests/bpf/verifier/basic.c
+++ b/tools/testing/selftests/bpf/verifier/basic.c
@@ -2,7 +2,7 @@
 	"empty prog",
 	.insns = {
 	},
-	.errstr = "unknown opcode 00",
+	.errstr = "last insn is not an exit or jmp",
 	.result = REJECT,
 },
 {
diff --git a/tools/testing/selftests/bpf/verifier/bounds.c b/tools/testing/selftests/bpf/verifier/bounds.c
index 4d6645f2874c..dac40de3f868 100644
--- a/tools/testing/selftests/bpf/verifier/bounds.c
+++ b/tools/testing/selftests/bpf/verifier/bounds.c
@@ -557,3 +557,149 @@
 	.result = ACCEPT,
 	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
+{
+	"bounds check for reg = 0, reg xor 1",
+	.insns = {
+	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_MOV64_IMM(BPF_REG_1, 0),
+	BPF_ALU64_IMM(BPF_XOR, BPF_REG_1, 1),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1),
+	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 8),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_hash_8b = { 3 },
+	.result = ACCEPT,
+},
+{
+	"bounds check for reg32 = 0, reg32 xor 1",
+	.insns = {
+	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_MOV32_IMM(BPF_REG_1, 0),
+	BPF_ALU32_IMM(BPF_XOR, BPF_REG_1, 1),
+	BPF_JMP32_IMM(BPF_JNE, BPF_REG_1, 0, 1),
+	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 8),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_hash_8b = { 3 },
+	.result = ACCEPT,
+},
+{
+	"bounds check for reg = 2, reg xor 3",
+	.insns = {
+	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_MOV64_IMM(BPF_REG_1, 2),
+	BPF_ALU64_IMM(BPF_XOR, BPF_REG_1, 3),
+	BPF_JMP_IMM(BPF_JGT, BPF_REG_1, 0, 1),
+	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 8),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_hash_8b = { 3 },
+	.result = ACCEPT,
+},
+{
+	"bounds check for reg = any, reg xor 3",
+	.insns = {
+	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
+	BPF_ALU64_IMM(BPF_XOR, BPF_REG_1, 3),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1),
+	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 8),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_hash_8b = { 3 },
+	.result = REJECT,
+	.errstr = "invalid access to map value",
+	.errstr_unpriv = "invalid access to map value",
+},
+{
+	"bounds check for reg32 = any, reg32 xor 3",
+	.insns = {
+	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
+	BPF_ALU32_IMM(BPF_XOR, BPF_REG_1, 3),
+	BPF_JMP32_IMM(BPF_JNE, BPF_REG_1, 0, 1),
+	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 8),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_hash_8b = { 3 },
+	.result = REJECT,
+	.errstr = "invalid access to map value",
+	.errstr_unpriv = "invalid access to map value",
+},
+{
+	"bounds check for reg > 0, reg xor 3",
+	.insns = {
+	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
+	BPF_JMP_IMM(BPF_JLE, BPF_REG_1, 0, 3),
+	BPF_ALU64_IMM(BPF_XOR, BPF_REG_1, 3),
+	BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 1),
+	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 8),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_hash_8b = { 3 },
+	.result = ACCEPT,
+},
+{
+	"bounds check for reg32 > 0, reg32 xor 3",
+	.insns = {
+	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
+	BPF_JMP32_IMM(BPF_JLE, BPF_REG_1, 0, 3),
+	BPF_ALU32_IMM(BPF_XOR, BPF_REG_1, 3),
+	BPF_JMP32_IMM(BPF_JGE, BPF_REG_1, 0, 1),
+	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 8),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_hash_8b = { 3 },
+	.result = ACCEPT,
+},
diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c
index 94258c6b5235..c4f5d909e58a 100644
--- a/tools/testing/selftests/bpf/verifier/calls.c
+++ b/tools/testing/selftests/bpf/verifier/calls.c
@@ -647,13 +647,14 @@
 	.result = REJECT,
 },
 {
-	"calls: ld_abs with changing ctx data in callee",
+	"calls: subprog call with ld_abs in main prog",
 	.insns = {
 	BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
 	BPF_LD_ABS(BPF_B, 0),
 	BPF_LD_ABS(BPF_H, 0),
 	BPF_LD_ABS(BPF_W, 0),
 	BPF_MOV64_REG(BPF_REG_7, BPF_REG_6),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
 	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 5),
 	BPF_MOV64_REG(BPF_REG_6, BPF_REG_7),
 	BPF_LD_ABS(BPF_B, 0),
@@ -666,8 +667,7 @@
 	BPF_EXIT_INSN(),
 	},
 	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
-	.errstr = "BPF_LD_[ABS|IND] instructions cannot be mixed",
-	.result = REJECT,
+	.result = ACCEPT,
 },
 {
 	"calls: two calls with bad fallthrough",
diff --git a/tools/testing/selftests/bpf/verifier/d_path.c b/tools/testing/selftests/bpf/verifier/d_path.c
new file mode 100644
index 000000000000..b988396379a7
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/d_path.c
@@ -0,0 +1,37 @@
+{
+	"d_path accept",
+	.insns = {
+	BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, 0),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+	BPF_MOV64_IMM(BPF_REG_6, 0),
+	BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_6, 0),
+	BPF_LD_IMM64(BPF_REG_3, 8),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_d_path),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.result = ACCEPT,
+	.prog_type = BPF_PROG_TYPE_TRACING,
+	.expected_attach_type = BPF_TRACE_FENTRY,
+	.kfunc = "dentry_open",
+},
+{
+	"d_path reject",
+	.insns = {
+	BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, 0),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+	BPF_MOV64_IMM(BPF_REG_6, 0),
+	BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_6, 0),
+	BPF_LD_IMM64(BPF_REG_3, 8),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_d_path),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.errstr = "helper call is not allowed in probe",
+	.result = REJECT,
+	.prog_type = BPF_PROG_TYPE_TRACING,
+	.expected_attach_type = BPF_TRACE_FENTRY,
+	.kfunc = "d_path",
+},
diff --git a/tools/testing/selftests/bpf/verifier/direct_packet_access.c b/tools/testing/selftests/bpf/verifier/direct_packet_access.c
index 2c5fbe7bcd27..ae72536603fe 100644
--- a/tools/testing/selftests/bpf/verifier/direct_packet_access.c
+++ b/tools/testing/selftests/bpf/verifier/direct_packet_access.c
@@ -529,7 +529,7 @@
 	},
 	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	.result = REJECT,
-	.errstr = "invalid access to packet, off=0 size=8, R5(id=1,off=0,r=0)",
+	.errstr = "invalid access to packet, off=0 size=8, R5(id=2,off=0,r=0)",
 	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
 {
diff --git a/tools/testing/selftests/bpf/verifier/ld_imm64.c b/tools/testing/selftests/bpf/verifier/ld_imm64.c
index 3856dba733e9..f9297900cea6 100644
--- a/tools/testing/selftests/bpf/verifier/ld_imm64.c
+++ b/tools/testing/selftests/bpf/verifier/ld_imm64.c
@@ -51,14 +51,6 @@
 	.result = REJECT,
 },
 {
-	"test5 ld_imm64",
-	.insns = {
-	BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 0),
-	},
-	.errstr = "invalid bpf_ld_imm64 insn",
-	.result = REJECT,
-},
-{
 	"test6 ld_imm64",
 	.insns = {
 	BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 0),
diff --git a/tools/testing/selftests/bpf/verifier/map_ptr.c b/tools/testing/selftests/bpf/verifier/map_ptr.c
index b52209db8250..637f9293bda8 100644
--- a/tools/testing/selftests/bpf/verifier/map_ptr.c
+++ b/tools/testing/selftests/bpf/verifier/map_ptr.c
@@ -60,3 +60,35 @@
 	.result = ACCEPT,
 	.retval = 1,
 },
+{
+	"bpf_map_ptr: r = 0, map_ptr = map_ptr + r",
+	.insns = {
+	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_hash_16b = { 4 },
+	.result = ACCEPT,
+},
+{
+	"bpf_map_ptr: r = 0, r = r + map_ptr",
+	.insns = {
+	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+	BPF_MOV64_IMM(BPF_REG_1, 0),
+	BPF_LD_MAP_FD(BPF_REG_0, 0),
+	BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_hash_16b = { 4 },
+	.result = ACCEPT,
+},
diff --git a/tools/testing/selftests/bpf/verifier/ref_tracking.c b/tools/testing/selftests/bpf/verifier/ref_tracking.c
index 056e0273bf12..006b5bd99c08 100644
--- a/tools/testing/selftests/bpf/verifier/ref_tracking.c
+++ b/tools/testing/selftests/bpf/verifier/ref_tracking.c
@@ -854,3 +854,50 @@
 	.errstr = "Unreleased reference",
 	.result = REJECT,
 },
+{
+	"reference tracking: bpf_sk_release(btf_tcp_sock)",
+	.insns = {
+	BPF_SK_LOOKUP(sk_lookup_tcp),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+	BPF_EMIT_CALL(BPF_FUNC_skc_to_tcp_sock),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+	BPF_EMIT_CALL(BPF_FUNC_sk_release),
+	BPF_EXIT_INSN(),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+	BPF_EMIT_CALL(BPF_FUNC_sk_release),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.result = ACCEPT,
+	.result_unpriv = REJECT,
+	.errstr_unpriv = "unknown func",
+},
+{
+	"reference tracking: use ptr from bpf_skc_to_tcp_sock() after release",
+	.insns = {
+	BPF_SK_LOOKUP(sk_lookup_tcp),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+	BPF_EMIT_CALL(BPF_FUNC_skc_to_tcp_sock),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+	BPF_EMIT_CALL(BPF_FUNC_sk_release),
+	BPF_EXIT_INSN(),
+	BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+	BPF_EMIT_CALL(BPF_FUNC_sk_release),
+	BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_7, 0),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.result = REJECT,
+	.errstr = "invalid mem access",
+	.result_unpriv = REJECT,
+	.errstr_unpriv = "unknown func",
+},
diff --git a/tools/testing/selftests/bpf/verifier/regalloc.c b/tools/testing/selftests/bpf/verifier/regalloc.c
new file mode 100644
index 000000000000..4ad7e05de706
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/regalloc.c
@@ -0,0 +1,269 @@
+{
+	"regalloc basic",
+	.insns = {
+	BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
+	BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+	BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+	BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 20, 4),
+	BPF_JMP_IMM(BPF_JSLT, BPF_REG_2, 0, 3),
+	BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0),
+	BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_2),
+	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_hash_48b = { 4 },
+	.result = ACCEPT,
+	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+},
+{
+	"regalloc negative",
+	.insns = {
+	BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
+	BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+	BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+	BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 24, 4),
+	BPF_JMP_IMM(BPF_JSLT, BPF_REG_2, 0, 3),
+	BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0),
+	BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_2),
+	BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_7, 0),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_hash_48b = { 4 },
+	.result = REJECT,
+	.errstr = "invalid access to map value, value_size=48 off=48 size=1",
+	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+},
+{
+	"regalloc src_reg mark",
+	.insns = {
+	BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+	BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+	BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+	BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 20, 5),
+	BPF_MOV64_IMM(BPF_REG_3, 0),
+	BPF_JMP_REG(BPF_JSGE, BPF_REG_3, BPF_REG_2, 3),
+	BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0),
+	BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_2),
+	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_hash_48b = { 4 },
+	.result = ACCEPT,
+	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+},
+{
+	"regalloc src_reg negative",
+	.insns = {
+	BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
+	BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+	BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+	BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 22, 5),
+	BPF_MOV64_IMM(BPF_REG_3, 0),
+	BPF_JMP_REG(BPF_JSGE, BPF_REG_3, BPF_REG_2, 3),
+	BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0),
+	BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_2),
+	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_hash_48b = { 4 },
+	.result = REJECT,
+	.errstr = "invalid access to map value, value_size=48 off=44 size=8",
+	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+},
+{
+	"regalloc and spill",
+	.insns = {
+	BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11),
+	BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+	BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+	BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 20, 7),
+	/* r0 has upper bound that should propagate into r2 */
+	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -8), /* spill r2 */
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_MOV64_IMM(BPF_REG_2, 0), /* clear r0 and r2 */
+	BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_10, -8), /* fill r3 */
+	BPF_JMP_REG(BPF_JSGE, BPF_REG_0, BPF_REG_3, 2),
+	/* r3 has lower and upper bounds */
+	BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_3),
+	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_hash_48b = { 4 },
+	.result = ACCEPT,
+	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+},
+{
+	"regalloc and spill negative",
+	.insns = {
+	BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11),
+	BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+	BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+	BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 48, 7),
+	/* r0 has upper bound that should propagate into r2 */
+	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -8), /* spill r2 */
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_MOV64_IMM(BPF_REG_2, 0), /* clear r0 and r2 */
+	BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_10, -8), /* fill r3 */
+	BPF_JMP_REG(BPF_JSGE, BPF_REG_0, BPF_REG_3, 2),
+	/* r3 has lower and upper bounds */
+	BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_3),
+	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_hash_48b = { 4 },
+	.result = REJECT,
+	.errstr = "invalid access to map value, value_size=48 off=48 size=8",
+	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+},
+{
+	"regalloc three regs",
+	.insns = {
+	BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10),
+	BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+	BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+	BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
+	BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 12, 5),
+	BPF_JMP_IMM(BPF_JSLT, BPF_REG_2, 0, 4),
+	BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0),
+	BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_2),
+	BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_4),
+	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_hash_48b = { 4 },
+	.result = ACCEPT,
+	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+},
+{
+	"regalloc after call",
+	.insns = {
+	BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10),
+	BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+	BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
+	BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
+	BPF_MOV64_REG(BPF_REG_9, BPF_REG_0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 6),
+	BPF_JMP_IMM(BPF_JSGT, BPF_REG_8, 20, 4),
+	BPF_JMP_IMM(BPF_JSLT, BPF_REG_9, 0, 3),
+	BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_8),
+	BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_9),
+	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
+	BPF_EXIT_INSN(),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_hash_48b = { 4 },
+	.result = ACCEPT,
+	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+},
+{
+	"regalloc in callee",
+	.insns = {
+	BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+	BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+	BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+	BPF_MOV64_REG(BPF_REG_3, BPF_REG_7),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 20, 5),
+	BPF_JMP_IMM(BPF_JSLT, BPF_REG_2, 0, 4),
+	BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_1),
+	BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_2),
+	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0),
+	BPF_EXIT_INSN(),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_hash_48b = { 4 },
+	.result = ACCEPT,
+	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+},
+{
+	"regalloc, spill, JEQ",
+	.insns = {
+	BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), /* spill r0 */
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 0),
+	/* The verifier will walk the rest twice with r0 == 0 and r0 == map_value */
+	BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_2, 20, 0),
+	/* The verifier will walk the rest two more times with r0 == 20 and r0 == unknown */
+	BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_10, -8), /* fill r3 with map_value */
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_3, 0, 1), /* skip ldx if map_value == NULL */
+	/* Buggy verifier will think that r3 == 20 here */
+	BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0), /* read from map_value */
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_hash_48b = { 4 },
+	.result = ACCEPT,
+	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
+},
diff --git a/tools/testing/selftests/clone3/clone3.c b/tools/testing/selftests/clone3/clone3.c
index b7e6dec36173..42be3b925830 100644
--- a/tools/testing/selftests/clone3/clone3.c
+++ b/tools/testing/selftests/clone3/clone3.c
@@ -20,13 +20,6 @@
 #include "../kselftest.h"
 #include "clone3_selftests.h"
 
-/*
- * Different sizes of struct clone_args
- */
-#ifndef CLONE3_ARGS_SIZE_V0
-#define CLONE3_ARGS_SIZE_V0 64
-#endif
-
 enum test_mode {
 	CLONE3_ARGS_NO_TEST,
 	CLONE3_ARGS_ALL_0,
@@ -38,13 +31,13 @@ enum test_mode {
 
 static int call_clone3(uint64_t flags, size_t size, enum test_mode test_mode)
 {
-	struct clone_args args = {
+	struct __clone_args args = {
 		.flags = flags,
 		.exit_signal = SIGCHLD,
 	};
 
 	struct clone_args_extended {
-		struct clone_args args;
+		struct __clone_args args;
 		__aligned_u64 excess_space[2];
 	} args_ext;
 
@@ -52,11 +45,11 @@ static int call_clone3(uint64_t flags, size_t size, enum test_mode test_mode)
 	int status;
 
 	memset(&args_ext, 0, sizeof(args_ext));
-	if (size > sizeof(struct clone_args))
+	if (size > sizeof(struct __clone_args))
 		args_ext.excess_space[1] = 1;
 
 	if (size == 0)
-		size = sizeof(struct clone_args);
+		size = sizeof(struct __clone_args);
 
 	switch (test_mode) {
 	case CLONE3_ARGS_ALL_0:
@@ -77,9 +70,9 @@ static int call_clone3(uint64_t flags, size_t size, enum test_mode test_mode)
 		break;
 	}
 
-	memcpy(&args_ext.args, &args, sizeof(struct clone_args));
+	memcpy(&args_ext.args, &args, sizeof(struct __clone_args));
 
-	pid = sys_clone3((struct clone_args *)&args_ext, size);
+	pid = sys_clone3((struct __clone_args *)&args_ext, size);
 	if (pid < 0) {
 		ksft_print_msg("%s - Failed to create new process\n",
 				strerror(errno));
@@ -144,14 +137,14 @@ int main(int argc, char *argv[])
 	else
 		ksft_test_result_skip("Skipping clone3() with CLONE_NEWPID\n");
 
-	/* Do a clone3() with CLONE3_ARGS_SIZE_V0. */
-	test_clone3(0, CLONE3_ARGS_SIZE_V0, 0, CLONE3_ARGS_NO_TEST);
+	/* Do a clone3() with CLONE_ARGS_SIZE_VER0. */
+	test_clone3(0, CLONE_ARGS_SIZE_VER0, 0, CLONE3_ARGS_NO_TEST);
 
-	/* Do a clone3() with CLONE3_ARGS_SIZE_V0 - 8 */
-	test_clone3(0, CLONE3_ARGS_SIZE_V0 - 8, -EINVAL, CLONE3_ARGS_NO_TEST);
+	/* Do a clone3() with CLONE_ARGS_SIZE_VER0 - 8 */
+	test_clone3(0, CLONE_ARGS_SIZE_VER0 - 8, -EINVAL, CLONE3_ARGS_NO_TEST);
 
 	/* Do a clone3() with sizeof(struct clone_args) + 8 */
-	test_clone3(0, sizeof(struct clone_args) + 8, 0, CLONE3_ARGS_NO_TEST);
+	test_clone3(0, sizeof(struct __clone_args) + 8, 0, CLONE3_ARGS_NO_TEST);
 
 	/* Do a clone3() with exit_signal having highest 32 bits non-zero */
 	test_clone3(0, 0, -EINVAL, CLONE3_ARGS_INVAL_EXIT_SIGNAL_BIG);
@@ -165,31 +158,31 @@ int main(int argc, char *argv[])
 	/* Do a clone3() with NSIG < exit_signal < CSIG */
 	test_clone3(0, 0, -EINVAL, CLONE3_ARGS_INVAL_EXIT_SIGNAL_NSIG);
 
-	test_clone3(0, sizeof(struct clone_args) + 8, 0, CLONE3_ARGS_ALL_0);
+	test_clone3(0, sizeof(struct __clone_args) + 8, 0, CLONE3_ARGS_ALL_0);
 
-	test_clone3(0, sizeof(struct clone_args) + 16, -E2BIG,
+	test_clone3(0, sizeof(struct __clone_args) + 16, -E2BIG,
 			CLONE3_ARGS_ALL_0);
 
-	test_clone3(0, sizeof(struct clone_args) * 2, -E2BIG,
+	test_clone3(0, sizeof(struct __clone_args) * 2, -E2BIG,
 			CLONE3_ARGS_ALL_0);
 
 	/* Do a clone3() with > page size */
 	test_clone3(0, getpagesize() + 8, -E2BIG, CLONE3_ARGS_NO_TEST);
 
-	/* Do a clone3() with CLONE3_ARGS_SIZE_V0 in a new PID NS. */
+	/* Do a clone3() with CLONE_ARGS_SIZE_VER0 in a new PID NS. */
 	if (uid == 0)
-		test_clone3(CLONE_NEWPID, CLONE3_ARGS_SIZE_V0, 0,
+		test_clone3(CLONE_NEWPID, CLONE_ARGS_SIZE_VER0, 0,
 				CLONE3_ARGS_NO_TEST);
 	else
 		ksft_test_result_skip("Skipping clone3() with CLONE_NEWPID\n");
 
-	/* Do a clone3() with CLONE3_ARGS_SIZE_V0 - 8 in a new PID NS */
-	test_clone3(CLONE_NEWPID, CLONE3_ARGS_SIZE_V0 - 8, -EINVAL,
+	/* Do a clone3() with CLONE_ARGS_SIZE_VER0 - 8 in a new PID NS */
+	test_clone3(CLONE_NEWPID, CLONE_ARGS_SIZE_VER0 - 8, -EINVAL,
 			CLONE3_ARGS_NO_TEST);
 
 	/* Do a clone3() with sizeof(struct clone_args) + 8 in a new PID NS */
 	if (uid == 0)
-		test_clone3(CLONE_NEWPID, sizeof(struct clone_args) + 8, 0,
+		test_clone3(CLONE_NEWPID, sizeof(struct __clone_args) + 8, 0,
 				CLONE3_ARGS_NO_TEST);
 	else
 		ksft_test_result_skip("Skipping clone3() with CLONE_NEWPID\n");
diff --git a/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c b/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c
index 9562425aa0a9..55bd387ce7ec 100644
--- a/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c
+++ b/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c
@@ -44,13 +44,13 @@ static int call_clone3_set_tid(struct __test_metadata *_metadata,
 	int status;
 	pid_t pid = -1;
 
-	struct clone_args args = {
+	struct __clone_args args = {
 		.exit_signal = SIGCHLD,
 		.set_tid = ptr_to_u64(set_tid),
 		.set_tid_size = set_tid_size,
 	};
 
-	pid = sys_clone3(&args, sizeof(struct clone_args));
+	pid = sys_clone3(&args, sizeof(args));
 	if (pid < 0) {
 		TH_LOG("%s - Failed to create new process", strerror(errno));
 		return -errno;
diff --git a/tools/testing/selftests/clone3/clone3_clear_sighand.c b/tools/testing/selftests/clone3/clone3_clear_sighand.c
index db5fc9c5edcf..47a8c0fc3676 100644
--- a/tools/testing/selftests/clone3/clone3_clear_sighand.c
+++ b/tools/testing/selftests/clone3/clone3_clear_sighand.c
@@ -47,7 +47,7 @@ static void test_clone3_clear_sighand(void)
 {
 	int ret;
 	pid_t pid;
-	struct clone_args args = {};
+	struct __clone_args args = {};
 	struct sigaction act;
 
 	/*
diff --git a/tools/testing/selftests/clone3/clone3_selftests.h b/tools/testing/selftests/clone3/clone3_selftests.h
index 91c1a78ddb39..e81ffaaee02b 100644
--- a/tools/testing/selftests/clone3/clone3_selftests.h
+++ b/tools/testing/selftests/clone3/clone3_selftests.h
@@ -19,13 +19,11 @@
 #define CLONE_INTO_CGROUP 0x200000000ULL /* Clone into a specific cgroup given the right permissions. */
 #endif
 
-#ifndef CLONE_ARGS_SIZE_VER0
-#define CLONE_ARGS_SIZE_VER0 64
-#endif
-
 #ifndef __NR_clone3
 #define __NR_clone3 -1
-struct clone_args {
+#endif
+
+struct __clone_args {
 	__aligned_u64 flags;
 	__aligned_u64 pidfd;
 	__aligned_u64 child_tid;
@@ -34,15 +32,21 @@ struct clone_args {
 	__aligned_u64 stack;
 	__aligned_u64 stack_size;
 	__aligned_u64 tls;
-#define CLONE_ARGS_SIZE_VER1 80
+#ifndef CLONE_ARGS_SIZE_VER0
+#define CLONE_ARGS_SIZE_VER0 64	/* sizeof first published struct */
+#endif
 	__aligned_u64 set_tid;
 	__aligned_u64 set_tid_size;
-#define CLONE_ARGS_SIZE_VER2 88
+#ifndef CLONE_ARGS_SIZE_VER1
+#define CLONE_ARGS_SIZE_VER1 80	/* sizeof second published struct */
+#endif
 	__aligned_u64 cgroup;
+#ifndef CLONE_ARGS_SIZE_VER2
+#define CLONE_ARGS_SIZE_VER2 88	/* sizeof third published struct */
+#endif
 };
-#endif /* __NR_clone3 */
 
-static pid_t sys_clone3(struct clone_args *args, size_t size)
+static pid_t sys_clone3(struct __clone_args *args, size_t size)
 {
 	fflush(stdout);
 	fflush(stderr);
@@ -52,7 +56,7 @@ static pid_t sys_clone3(struct clone_args *args, size_t size)
 static inline void test_clone3_supported(void)
 {
 	pid_t pid;
-	struct clone_args args = {};
+	struct __clone_args args = {};
 
 	if (__NR_clone3 < 0)
 		ksft_exit_skip("clone3() syscall is not supported\n");
diff --git a/tools/testing/selftests/clone3/clone3_set_tid.c b/tools/testing/selftests/clone3/clone3_set_tid.c
index 5831c1082d6d..0229e9ebb995 100644
--- a/tools/testing/selftests/clone3/clone3_set_tid.c
+++ b/tools/testing/selftests/clone3/clone3_set_tid.c
@@ -46,14 +46,14 @@ static int call_clone3_set_tid(pid_t *set_tid,
 	int status;
 	pid_t pid = -1;
 
-	struct clone_args args = {
+	struct __clone_args args = {
 		.flags = flags,
 		.exit_signal = SIGCHLD,
 		.set_tid = ptr_to_u64(set_tid),
 		.set_tid_size = set_tid_size,
 	};
 
-	pid = sys_clone3(&args, sizeof(struct clone_args));
+	pid = sys_clone3(&args, sizeof(args));
 	if (pid < 0) {
 		ksft_print_msg("%s - Failed to create new process\n",
 			       strerror(errno));
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh
index 47edf099a17e..508a702f0021 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh
@@ -207,7 +207,7 @@ __rate_test()
 
 	RET=0
 
-	devlink trap policer set $DEVLINK_DEV policer $id rate 1000 burst 16
+	devlink trap policer set $DEVLINK_DEV policer $id rate 1000 burst 512
 	devlink trap group set $DEVLINK_DEV group l3_drops policer $id
 
 	# Send packets at highest possible rate and make sure they are dropped
@@ -220,8 +220,8 @@ __rate_test()
 
 	rate=$(trap_rate_get)
 	pct=$((100 * (rate - 1000) / 1000))
-	((-5 <= pct && pct <= 5))
-	check_err $? "Expected rate 1000 pps, got $rate pps, which is $pct% off. Required accuracy is +-5%"
+	((-10 <= pct && pct <= 10))
+	check_err $? "Expected rate 1000 pps, got $rate pps, which is $pct% off. Required accuracy is +-10%"
 	log_info "Expected rate 1000 pps, measured rate $rate pps"
 
 	drop_rate=$(policer_drop_rate_get $id)
@@ -288,35 +288,12 @@ __burst_test()
 
 	RET=0
 
-	devlink trap policer set $DEVLINK_DEV policer $id rate 1000 burst 32
+	devlink trap policer set $DEVLINK_DEV policer $id rate 1000 burst 512
 	devlink trap group set $DEVLINK_DEV group l3_drops policer $id
 
-	# Send a burst of 64 packets and make sure that about 32 are received
-	# and the rest are dropped by the policer
-	log_info "=== Tx burst size: 64, Policer burst size: 32 pps ==="
-
-	t0_rx=$(devlink_trap_rx_packets_get blackhole_route)
-	t0_drop=$(devlink_trap_policer_rx_dropped_get $id)
-
-	start_traffic $h1 192.0.2.1 198.51.100.100 $rp1_mac -c 64
-
-	t1_rx=$(devlink_trap_rx_packets_get blackhole_route)
-	t1_drop=$(devlink_trap_policer_rx_dropped_get $id)
-
-	rx=$((t1_rx - t0_rx))
-	pct=$((100 * (rx - 32) / 32))
-	((-20 <= pct && pct <= 20))
-	check_err $? "Expected burst size of 32 packets, got $rx packets, which is $pct% off. Required accuracy is +-20%"
-	log_info "Expected burst size of 32 packets, measured burst size of $rx packets"
-
-	drop=$((t1_drop - t0_drop))
-	(( drop > 0 ))
-	check_err $? "Expected non-zero policer drops, got 0"
-	log_info "Measured policer drops of $drop packets"
-
 	# Send a burst of 16 packets and make sure that 16 are received
 	# and that none are dropped by the policer
-	log_info "=== Tx burst size: 16, Policer burst size: 32 pps ==="
+	log_info "=== Tx burst size: 16, Policer burst size: 512 ==="
 
 	t0_rx=$(devlink_trap_rx_packets_get blackhole_route)
 	t0_drop=$(devlink_trap_policer_rx_dropped_get $id)
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh
index 6d1790b5de7a..e9f8718af979 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh
@@ -147,17 +147,26 @@ switch_create()
 
 	# Make sure that ingress quotas are smaller than egress so that there is
 	# room for both streams of traffic to be admitted to shared buffer.
+	devlink_pool_size_thtype_save 0
 	devlink_pool_size_thtype_set 0 dynamic 10000000
+	devlink_pool_size_thtype_save 4
 	devlink_pool_size_thtype_set 4 dynamic 10000000
 
+	devlink_port_pool_th_save $swp1 0
 	devlink_port_pool_th_set $swp1 0 6
+	devlink_tc_bind_pool_th_save $swp1 1 ingress
 	devlink_tc_bind_pool_th_set $swp1 1 ingress 0 6
 
+	devlink_port_pool_th_save $swp2 0
 	devlink_port_pool_th_set $swp2 0 6
+	devlink_tc_bind_pool_th_save $swp2 2 ingress
 	devlink_tc_bind_pool_th_set $swp2 2 ingress 0 6
 
+	devlink_tc_bind_pool_th_save $swp3 1 egress
 	devlink_tc_bind_pool_th_set $swp3 1 egress 4 7
+	devlink_tc_bind_pool_th_save $swp3 2 egress
 	devlink_tc_bind_pool_th_set $swp3 2 egress 4 7
+	devlink_port_pool_th_save $swp3 4
 	devlink_port_pool_th_set $swp3 4 7
 }
 
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh
new file mode 100755
index 000000000000..27de3d9ed08e
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh
@@ -0,0 +1,379 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+	test_defaults
+	test_dcb_ets
+	test_mtu
+	test_pfc
+	test_int_buf
+	test_tc_priomap
+	test_tc_mtu
+	test_tc_sizes
+	test_tc_int_buf
+"
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+NUM_NETIFS=0
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+source qos_lib.sh
+
+swp=$NETIF_NO_CABLE
+
+cleanup()
+{
+	pre_cleanup
+}
+
+get_prio_pg()
+{
+	__mlnx_qos -i $swp | sed -n '/^PFC/,/^[^[:space:]]/p' |
+		grep buffer | sed 's/ \+/ /g' | cut -d' ' -f 2-
+}
+
+get_prio_pfc()
+{
+	__mlnx_qos -i $swp | sed -n '/^PFC/,/^[^[:space:]]/p' |
+		grep enabled | sed 's/ \+/ /g' | cut -d' ' -f 2-
+}
+
+get_prio_tc()
+{
+	__mlnx_qos -i $swp | sed -n '/^tc/,$p' |
+		awk '/^tc/ { TC = $2 }
+		     /priority:/ { PRIO[$2]=TC }
+		     END {
+			for (i in PRIO)
+			    printf("%d ", PRIO[i])
+		     }'
+}
+
+get_buf_size()
+{
+	local idx=$1; shift
+
+	__mlnx_qos -i $swp | grep Receive | sed 's/.*: //' | cut -d, -f $((idx + 1))
+}
+
+get_tot_size()
+{
+	__mlnx_qos -i $swp | grep Receive | sed 's/.*total_size=//'
+}
+
+check_prio_pg()
+{
+	local expect=$1; shift
+
+	local current=$(get_prio_pg)
+	test "$current" = "$expect"
+	check_err $? "prio2buffer is '$current', expected '$expect'"
+}
+
+check_prio_pfc()
+{
+	local expect=$1; shift
+
+	local current=$(get_prio_pfc)
+	test "$current" = "$expect"
+	check_err $? "prio PFC is '$current', expected '$expect'"
+}
+
+check_prio_tc()
+{
+	local expect=$1; shift
+
+	local current=$(get_prio_tc)
+	test "$current" = "$expect"
+	check_err $? "prio_tc is '$current', expected '$expect'"
+}
+
+__check_buf_size()
+{
+	local idx=$1; shift
+	local expr=$1; shift
+	local what=$1; shift
+
+	local current=$(get_buf_size $idx)
+	((current $expr))
+	check_err $? "${what}buffer $idx size is '$current', expected '$expr'"
+	echo $current
+}
+
+check_buf_size()
+{
+	__check_buf_size "$@" > /dev/null
+}
+
+test_defaults()
+{
+	RET=0
+
+	check_prio_pg "0 0 0 0 0 0 0 0 "
+	check_prio_tc "0 0 0 0 0 0 0 0 "
+	check_prio_pfc "0 0 0 0 0 0 0 0 "
+
+	log_test "Default headroom configuration"
+}
+
+test_dcb_ets()
+{
+	RET=0
+
+	__mlnx_qos -i $swp --prio_tc=0,2,4,6,1,3,5,7 > /dev/null
+
+	check_prio_pg "0 2 4 6 1 3 5 7 "
+	check_prio_tc "0 2 4 6 1 3 5 7 "
+	check_prio_pfc "0 0 0 0 0 0 0 0 "
+
+	__mlnx_qos -i $swp --prio_tc=0,0,0,0,0,0,0,0 > /dev/null
+
+	check_prio_pg "0 0 0 0 0 0 0 0 "
+	check_prio_tc "0 0 0 0 0 0 0 0 "
+
+	__mlnx_qos -i $swp --prio2buffer=1,3,5,7,0,2,4,6 &> /dev/null
+	check_fail $? "prio2buffer accepted in DCB mode"
+
+	log_test "Configuring headroom through ETS"
+}
+
+test_mtu()
+{
+	local what=$1; shift
+	local buf0size_2
+	local buf0size
+
+	RET=0
+	buf0size=$(__check_buf_size 0 "> 0")
+
+	mtu_set $swp 3000
+	buf0size_2=$(__check_buf_size 0 "> $buf0size" "MTU 3000: ")
+	mtu_restore $swp
+
+	mtu_set $swp 6000
+	check_buf_size 0 "> $buf0size_2" "MTU 6000: "
+	mtu_restore $swp
+
+	check_buf_size 0 "== $buf0size"
+
+	log_test "${what}MTU impacts buffer size"
+}
+
+test_tc_mtu()
+{
+	# In TC mode, MTU still impacts the threshold below which a buffer is
+	# not permitted to go.
+
+	tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M
+	test_mtu "TC: "
+	tc qdisc delete dev $swp root
+}
+
+test_pfc()
+{
+	RET=0
+
+	__mlnx_qos -i $swp --prio_tc=0,0,0,0,0,1,2,3 > /dev/null
+
+	local buf0size=$(get_buf_size 0)
+	local buf1size=$(get_buf_size 1)
+	local buf2size=$(get_buf_size 2)
+	local buf3size=$(get_buf_size 3)
+	check_buf_size 0 "> 0"
+	check_buf_size 1 "> 0"
+	check_buf_size 2 "> 0"
+	check_buf_size 3 "> 0"
+	check_buf_size 4 "== 0"
+	check_buf_size 5 "== 0"
+	check_buf_size 6 "== 0"
+	check_buf_size 7 "== 0"
+
+	log_test "Buffer size sans PFC"
+
+	RET=0
+
+	__mlnx_qos -i $swp --pfc=0,0,0,0,0,1,1,1 --cable_len=0 > /dev/null
+
+	check_prio_pg "0 0 0 0 0 1 2 3 "
+	check_prio_pfc "0 0 0 0 0 1 1 1 "
+	check_buf_size 0 "== $buf0size"
+	check_buf_size 1 "> $buf1size"
+	check_buf_size 2 "> $buf2size"
+	check_buf_size 3 "> $buf3size"
+
+	local buf1size=$(get_buf_size 1)
+	check_buf_size 2 "== $buf1size"
+	check_buf_size 3 "== $buf1size"
+
+	log_test "PFC: Cable length 0"
+
+	RET=0
+
+	__mlnx_qos -i $swp --pfc=0,0,0,0,0,1,1,1 --cable_len=1000 > /dev/null
+
+	check_buf_size 0 "== $buf0size"
+	check_buf_size 1 "> $buf1size"
+	check_buf_size 2 "> $buf1size"
+	check_buf_size 3 "> $buf1size"
+
+	log_test "PFC: Cable length 1000"
+
+	RET=0
+
+	__mlnx_qos -i $swp --pfc=0,0,0,0,0,0,0,0 --cable_len=0 > /dev/null
+	__mlnx_qos -i $swp --prio_tc=0,0,0,0,0,0,0,0 > /dev/null
+
+	check_prio_pg "0 0 0 0 0 0 0 0 "
+	check_prio_tc "0 0 0 0 0 0 0 0 "
+	check_buf_size 0 "> 0"
+	check_buf_size 1 "== 0"
+	check_buf_size 2 "== 0"
+	check_buf_size 3 "== 0"
+	check_buf_size 4 "== 0"
+	check_buf_size 5 "== 0"
+	check_buf_size 6 "== 0"
+	check_buf_size 7 "== 0"
+
+	log_test "PFC: Restore defaults"
+}
+
+test_tc_priomap()
+{
+	RET=0
+
+	__mlnx_qos -i $swp --prio_tc=0,1,2,3,4,5,6,7 > /dev/null
+	check_prio_pg "0 1 2 3 4 5 6 7 "
+
+	tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M
+	check_prio_pg "0 0 0 0 0 0 0 0 "
+
+	__mlnx_qos -i $swp --prio2buffer=1,3,5,7,0,2,4,6 > /dev/null
+	check_prio_pg "1 3 5 7 0 2 4 6 "
+
+	tc qdisc delete dev $swp root
+	check_prio_pg "0 1 2 3 4 5 6 7 "
+
+	# Clean up.
+	tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M
+	__mlnx_qos -i $swp --prio2buffer=0,0,0,0,0,0,0,0 > /dev/null
+	tc qdisc delete dev $swp root
+	__mlnx_qos -i $swp --prio_tc=0,0,0,0,0,0,0,0 > /dev/null
+
+	log_test "TC: priomap"
+}
+
+test_tc_sizes()
+{
+	local cell_size=$(devlink_cell_size_get)
+	local size=$((cell_size * 1000))
+
+	RET=0
+
+	__mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 &> /dev/null
+	check_fail $? "buffer_size should fail before qdisc is added"
+
+	tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M
+
+	__mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 > /dev/null
+	check_err $? "buffer_size should pass after qdisc is added"
+	check_buf_size 0 "== $size" "set size: "
+
+	mtu_set $swp 6000
+	check_buf_size 0 "== $size" "set MTU: "
+	mtu_restore $swp
+
+	__mlnx_qos -i $swp --buffer_size=0,0,0,0,0,0,0,0 > /dev/null
+
+	# After replacing the qdisc for the same kind, buffer_size still has to
+	# work.
+	tc qdisc replace dev $swp root handle 1: bfifo limit 1M
+
+	__mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 > /dev/null
+	check_buf_size 0 "== $size" "post replace, set size: "
+
+	__mlnx_qos -i $swp --buffer_size=0,0,0,0,0,0,0,0 > /dev/null
+
+	# Likewise after replacing for a different kind.
+	tc qdisc replace dev $swp root handle 2: prio bands 8
+
+	__mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 > /dev/null
+	check_buf_size 0 "== $size" "post replace different kind, set size: "
+
+	tc qdisc delete dev $swp root
+
+	__mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 &> /dev/null
+	check_fail $? "buffer_size should fail after qdisc is deleted"
+
+	log_test "TC: buffer size"
+}
+
+test_int_buf()
+{
+	local what=$1; shift
+
+	RET=0
+
+	local buf0size=$(get_buf_size 0)
+	local tot_size=$(get_tot_size)
+
+	# Size of internal buffer and buffer 9.
+	local dsize=$((tot_size - buf0size))
+
+	tc qdisc add dev $swp clsact
+	tc filter add dev $swp egress matchall skip_sw action mirred egress mirror dev $swp
+
+	local buf0size_2=$(get_buf_size 0)
+	local tot_size_2=$(get_tot_size)
+	local dsize_2=$((tot_size_2 - buf0size_2))
+
+	# Egress SPAN should have added to the "invisible" buffer configuration.
+	((dsize_2 > dsize))
+	check_err $? "Invisible buffers account for '$dsize_2', expected '> $dsize'"
+
+	mtu_set $swp 3000
+
+	local buf0size_3=$(get_buf_size 0)
+	local tot_size_3=$(get_tot_size)
+	local dsize_3=$((tot_size_3 - buf0size_3))
+
+	# MTU change might change buffer 0, which will show at total, but the
+	# hidden buffers should stay the same size.
+	((dsize_3 == dsize_2))
+	check_err $? "MTU change: Invisible buffers account for '$dsize_3', expected '== $dsize_2'"
+
+	mtu_restore $swp
+	tc qdisc del dev $swp clsact
+
+	# After SPAN removal, hidden buffers should be back to the original sizes.
+	local buf0size_4=$(get_buf_size 0)
+	local tot_size_4=$(get_tot_size)
+	local dsize_4=$((tot_size_4 - buf0size_4))
+	((dsize_4 == dsize))
+	check_err $? "SPAN removed: Invisible buffers account for '$dsize_4', expected '== $dsize'"
+
+	log_test "${what}internal buffer size"
+}
+
+test_tc_int_buf()
+{
+	local cell_size=$(devlink_cell_size_get)
+	local size=$((cell_size * 1000))
+
+	tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M
+	test_int_buf "TC: "
+
+	__mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 > /dev/null
+	test_int_buf "TC+buffsize: "
+
+	__mlnx_qos -i $swp --buffer_size=0,0,0,0,0,0,0,0 > /dev/null
+	tc qdisc delete dev $swp root
+}
+
+trap cleanup EXIT
+
+bail_on_lldpad
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh
index faa51012cdac..0bf76f13c030 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh
@@ -82,3 +82,17 @@ bail_on_lldpad()
 		fi
 	fi
 }
+
+__mlnx_qos()
+{
+	local err
+
+	mlnx_qos "$@" 2>/dev/null
+	err=$?
+
+	if ((err)); then
+		echo "Error ($err) in mlnx_qos $@" >/dev/stderr
+	fi
+
+	return $err
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh
index b025daea062d..8f164c80e215 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh
@@ -145,12 +145,17 @@ switch_create()
 
 	# Make sure that ingress quotas are smaller than egress so that there is
 	# room for both streams of traffic to be admitted to shared buffer.
+	devlink_port_pool_th_save $swp1 0
 	devlink_port_pool_th_set $swp1 0 5
+	devlink_tc_bind_pool_th_save $swp1 0 ingress
 	devlink_tc_bind_pool_th_set $swp1 0 ingress 0 5
 
+	devlink_port_pool_th_save $swp2 0
 	devlink_port_pool_th_set $swp2 0 5
+	devlink_tc_bind_pool_th_save $swp2 1 ingress
 	devlink_tc_bind_pool_th_set $swp2 1 ingress 0 5
 
+	devlink_port_pool_th_save $swp3 4
 	devlink_port_pool_th_set $swp3 4 12
 }
 
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh
new file mode 100755
index 000000000000..4d900bc1f76c
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh
@@ -0,0 +1,403 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# This test injects a 10-MB burst of traffic with VLAN tag and 802.1p priority
+# of 1. This stream is consistently prioritized as priority 1, is put to PG
+# buffer 1, and scheduled at TC 1.
+#
+# - the stream first ingresses through $swp1, where it is forwarded to $swp3
+#
+# - then it ingresses through $swp4. Here it is put to a lossless buffer and put
+#   to a small pool ("PFC pool"). The traffic is forwarded to $swp2, which is
+#   shaped, and thus the PFC pool eventually fills, therefore the headroom
+#   fills, and $swp3 is paused.
+#
+# - since $swp3 now can't send traffic, the traffic ingressing $swp1 is kept at
+#   a pool ("overflow pool"). The overflow pool needs to be large enough to
+#   contain the whole burst.
+#
+# - eventually the PFC pool gets some traffic out, headroom therefore gets some
+#   traffic to the pool, and $swp3 is unpaused again. This way the traffic is
+#   gradually forwarded from the overflow pool, through the PFC pool, out of
+#   $swp2, and eventually to $h2.
+#
+# - if PFC works, all lossless flow packets that ingress through $swp1 should
+#   also be seen ingressing $h2. If it doesn't, there will be drops due to
+#   discrepancy between the speeds of $swp1 and $h2.
+#
+# - it should all play out relatively quickly, so that SLL and HLL will not
+#   cause drops.
+#
+# +-----------------------+
+# | H1                    |
+# |   + $h1.111           |
+# |   | 192.0.2.33/28     |
+# |   |                   |
+# |   + $h1               |
+# +---|-------------------+  +--------------------+
+#     |                      |                    |
+# +---|----------------------|--------------------|---------------------------+
+# |   + $swp1          $swp3 +                    + $swp4                     |
+# |   | iPOOL1        iPOOL0 |                    | iPOOL2                    |
+# |   | ePOOL4        ePOOL5 |                    | ePOOL4                    |
+# |   |                1Gbps |                    | 1Gbps                     |
+# |   |        PFC:enabled=1 |                    | PFC:enabled=1             |
+# | +-|----------------------|-+                +-|------------------------+  |
+# | | + $swp1.111  $swp3.111 + |                | + $swp4.111              |  |
+# | |                          |                |                          |  |
+# | | BR1                      |                | BR2                      |  |
+# | |                          |                |                          |  |
+# | |                          |                |         + $swp2.111      |  |
+# | +--------------------------+                +---------|----------------+  |
+# |                                                       |                   |
+# | iPOOL0: 500KB dynamic                                 |                   |
+# | iPOOL1: 10MB static                                   |                   |
+# | iPOOL2: 1MB static                                    + $swp2             |
+# | ePOOL4: 500KB dynamic                                 | iPOOL0            |
+# | ePOOL5: 10MB static                                   | ePOOL6            |
+# | ePOOL6: "infinite" static                             | 200Mbps shaper    |
+# +-------------------------------------------------------|-------------------+
+#                                                         |
+#                                                     +---|-------------------+
+#                                                     |   + $h2            H2 |
+#                                                     |   |                   |
+#                                                     |   + $h2.111           |
+#                                                     |     192.0.2.34/28     |
+#                                                     +-----------------------+
+#
+# iPOOL0+ePOOL4 is a helper pool for control traffic etc.
+# iPOOL1+ePOOL5 are overflow pools.
+# iPOOL2+ePOOL6 are PFC pools.
+
+ALL_TESTS="
+	ping_ipv4
+	test_qos_pfc
+"
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+NUM_NETIFS=6
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+source qos_lib.sh
+
+_1KB=1000
+_100KB=$((100 * _1KB))
+_500KB=$((500 * _1KB))
+_1MB=$((1000 * _1KB))
+_10MB=$((10 * _1MB))
+
+h1_create()
+{
+	simple_if_init $h1
+	mtu_set $h1 10000
+
+	vlan_create $h1 111 v$h1 192.0.2.33/28
+}
+
+h1_destroy()
+{
+	vlan_destroy $h1 111
+
+	mtu_restore $h1
+	simple_if_fini $h1
+}
+
+h2_create()
+{
+	simple_if_init $h2
+	mtu_set $h2 10000
+
+	vlan_create $h2 111 v$h2 192.0.2.34/28
+}
+
+h2_destroy()
+{
+	vlan_destroy $h2 111
+
+	mtu_restore $h2
+	simple_if_fini $h2
+}
+
+switch_create()
+{
+	# pools
+	# -----
+
+	devlink_pool_size_thtype_save 0
+	devlink_pool_size_thtype_save 4
+	devlink_pool_size_thtype_save 1
+	devlink_pool_size_thtype_save 5
+	devlink_pool_size_thtype_save 2
+	devlink_pool_size_thtype_save 6
+
+	devlink_port_pool_th_save $swp1 1
+	devlink_port_pool_th_save $swp2 6
+	devlink_port_pool_th_save $swp3 5
+	devlink_port_pool_th_save $swp4 2
+
+	devlink_tc_bind_pool_th_save $swp1 1 ingress
+	devlink_tc_bind_pool_th_save $swp2 1 egress
+	devlink_tc_bind_pool_th_save $swp3 1 egress
+	devlink_tc_bind_pool_th_save $swp4 1 ingress
+
+	# Control traffic pools. Just reduce the size. Keep them dynamic so that
+	# we don't need to change all the uninteresting quotas.
+	devlink_pool_size_thtype_set 0 dynamic $_500KB
+	devlink_pool_size_thtype_set 4 dynamic $_500KB
+
+	# Overflow pools.
+	devlink_pool_size_thtype_set 1 static $_10MB
+	devlink_pool_size_thtype_set 5 static $_10MB
+
+	# PFC pools. As per the writ, the size of egress PFC pool should be
+	# infinice, but actually it just needs to be large enough to not matter
+	# in practice, so reuse the 10MB limit.
+	devlink_pool_size_thtype_set 2 static $_1MB
+	devlink_pool_size_thtype_set 6 static $_10MB
+
+	# $swp1
+	# -----
+
+	ip link set dev $swp1 up
+	mtu_set $swp1 10000
+	vlan_create $swp1 111
+	ip link set dev $swp1.111 type vlan ingress-qos-map 0:0 1:1
+
+	devlink_port_pool_th_set $swp1 1 $_10MB
+	devlink_tc_bind_pool_th_set $swp1 1 ingress 1 $_10MB
+
+	# Configure qdisc so that we can configure PG and therefore pool
+	# assignment.
+	tc qdisc replace dev $swp1 root handle 1: \
+	   ets bands 8 strict 8 priomap 7 6
+	__mlnx_qos -i $swp1 --prio2buffer=0,1,0,0,0,0,0,0 >/dev/null
+
+	# $swp2
+	# -----
+
+	ip link set dev $swp2 up
+	mtu_set $swp2 10000
+	vlan_create $swp2 111
+	ip link set dev $swp2.111 type vlan egress-qos-map 0:0 1:1
+
+	devlink_port_pool_th_set $swp2 6 $_10MB
+	devlink_tc_bind_pool_th_set $swp2 1 egress 6 $_10MB
+
+	# prio 0->TC0 (band 7), 1->TC1 (band 6). TC1 is shaped.
+	tc qdisc replace dev $swp2 root handle 1: \
+	   ets bands 8 strict 8 priomap 7 6
+	tc qdisc replace dev $swp2 parent 1:7 handle 17: \
+	   tbf rate 200Mbit burst 131072 limit 1M
+
+	# $swp3
+	# -----
+
+	ip link set dev $swp3 up
+	mtu_set $swp3 10000
+	vlan_create $swp3 111
+	ip link set dev $swp3.111 type vlan egress-qos-map 0:0 1:1
+
+	devlink_port_pool_th_set $swp3 5 $_10MB
+	devlink_tc_bind_pool_th_set $swp3 1 egress 5 $_10MB
+
+	# prio 0->TC0 (band 7), 1->TC1 (band 6)
+	tc qdisc replace dev $swp3 root handle 1: \
+	   ets bands 8 strict 8 priomap 7 6
+
+	# Need to enable PFC so that PAUSE takes effect. Therefore need to put
+	# the lossless prio into a buffer of its own. Don't bother with buffer
+	# sizes though, there is not going to be any pressure in the "backward"
+	# direction.
+	__mlnx_qos -i $swp3 --prio2buffer=0,1,0,0,0,0,0,0 >/dev/null
+	__mlnx_qos -i $swp3 --pfc=0,1,0,0,0,0,0,0 >/dev/null
+
+	# $swp4
+	# -----
+
+	ip link set dev $swp4 up
+	mtu_set $swp4 10000
+	vlan_create $swp4 111
+	ip link set dev $swp4.111 type vlan ingress-qos-map 0:0 1:1
+
+	devlink_port_pool_th_set $swp4 2 $_1MB
+	devlink_tc_bind_pool_th_set $swp4 1 ingress 2 $_1MB
+
+	# Configure qdisc so that we can hand-tune headroom.
+	tc qdisc replace dev $swp4 root handle 1: \
+	   ets bands 8 strict 8 priomap 7 6
+	__mlnx_qos -i $swp4 --prio2buffer=0,1,0,0,0,0,0,0 >/dev/null
+	__mlnx_qos -i $swp4 --pfc=0,1,0,0,0,0,0,0 >/dev/null
+	# PG0 will get autoconfigured to Xoff, give PG1 arbitrarily 100K, which
+	# is (-2*MTU) about 80K of delay provision.
+	__mlnx_qos -i $swp3 --buffer_size=0,$_100KB,0,0,0,0,0,0 >/dev/null
+
+	# bridges
+	# -------
+
+	ip link add name br1 type bridge vlan_filtering 0
+	ip link set dev $swp1.111 master br1
+	ip link set dev $swp3.111 master br1
+	ip link set dev br1 up
+
+	ip link add name br2 type bridge vlan_filtering 0
+	ip link set dev $swp2.111 master br2
+	ip link set dev $swp4.111 master br2
+	ip link set dev br2 up
+}
+
+switch_destroy()
+{
+	# Do this first so that we can reset the limits to values that are only
+	# valid for the original static / dynamic setting.
+	devlink_pool_size_thtype_restore 6
+	devlink_pool_size_thtype_restore 5
+	devlink_pool_size_thtype_restore 4
+	devlink_pool_size_thtype_restore 2
+	devlink_pool_size_thtype_restore 1
+	devlink_pool_size_thtype_restore 0
+
+	# bridges
+	# -------
+
+	ip link set dev br2 down
+	ip link set dev $swp4.111 nomaster
+	ip link set dev $swp2.111 nomaster
+	ip link del dev br2
+
+	ip link set dev br1 down
+	ip link set dev $swp3.111 nomaster
+	ip link set dev $swp1.111 nomaster
+	ip link del dev br1
+
+	# $swp4
+	# -----
+
+	__mlnx_qos -i $swp4 --buffer_size=0,0,0,0,0,0,0,0 >/dev/null
+	__mlnx_qos -i $swp4 --pfc=0,0,0,0,0,0,0,0 >/dev/null
+	__mlnx_qos -i $swp4 --prio2buffer=0,0,0,0,0,0,0,0 >/dev/null
+	tc qdisc del dev $swp4 root
+
+	devlink_tc_bind_pool_th_restore $swp4 1 ingress
+	devlink_port_pool_th_restore $swp4 2
+
+	vlan_destroy $swp4 111
+	mtu_restore $swp4
+	ip link set dev $swp4 down
+
+	# $swp3
+	# -----
+
+	__mlnx_qos -i $swp3 --pfc=0,0,0,0,0,0,0,0 >/dev/null
+	__mlnx_qos -i $swp3 --prio2buffer=0,0,0,0,0,0,0,0 >/dev/null
+	tc qdisc del dev $swp3 root
+
+	devlink_tc_bind_pool_th_restore $swp3 1 egress
+	devlink_port_pool_th_restore $swp3 5
+
+	vlan_destroy $swp3 111
+	mtu_restore $swp3
+	ip link set dev $swp3 down
+
+	# $swp2
+	# -----
+
+	tc qdisc del dev $swp2 parent 1:7
+	tc qdisc del dev $swp2 root
+
+	devlink_tc_bind_pool_th_restore $swp2 1 egress
+	devlink_port_pool_th_restore $swp2 6
+
+	vlan_destroy $swp2 111
+	mtu_restore $swp2
+	ip link set dev $swp2 down
+
+	# $swp1
+	# -----
+
+	__mlnx_qos -i $swp1 --prio2buffer=0,0,0,0,0,0,0,0 >/dev/null
+	tc qdisc del dev $swp1 root
+
+	devlink_tc_bind_pool_th_restore $swp1 1 ingress
+	devlink_port_pool_th_restore $swp1 1
+
+	vlan_destroy $swp1 111
+	mtu_restore $swp1
+	ip link set dev $swp1 down
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	swp4=${NETIFS[p6]}
+
+	h2mac=$(mac_get $h2)
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+ping_ipv4()
+{
+	ping_test $h1 192.0.2.34
+}
+
+test_qos_pfc()
+{
+	RET=0
+
+	# 10M pool, each packet is 8K of payload + headers
+	local pkts=$((_10MB / 8050))
+	local size=$((pkts * 8050))
+	local in0=$(ethtool_stats_get $swp1 rx_octets_prio_1)
+	local out0=$(ethtool_stats_get $swp2 tx_octets_prio_1)
+
+	$MZ $h1 -p 8000 -Q 1:111 -A 192.0.2.33 -B 192.0.2.34 \
+		-a own -b $h2mac -c $pkts -t udp -q
+	sleep 2
+
+	local in1=$(ethtool_stats_get $swp1 rx_octets_prio_1)
+	local out1=$(ethtool_stats_get $swp2 tx_octets_prio_1)
+
+	local din=$((in1 - in0))
+	local dout=$((out1 - out0))
+
+	local pct_in=$((din * 100 / size))
+
+	((pct_in > 95 && pct_in < 105))
+	check_err $? "Relative ingress out of expected bounds, $pct_in% should be 100%"
+
+	((dout == din))
+	check_err $? "$((din - dout)) bytes out of $din ingressed got lost"
+
+	log_test "PFC"
+}
+
+trap cleanup EXIT
+
+bail_on_lldpad
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh
index 94c37124a840..af64bc9ea8ab 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh
@@ -27,11 +27,17 @@ switch_create()
 	# amount of traffic that is admitted to the shared buffers. This makes
 	# sure that there is always enough traffic of all types to select from
 	# for the DWRR process.
+	devlink_port_pool_th_save $swp1 0
 	devlink_port_pool_th_set $swp1 0 12
+	devlink_tc_bind_pool_th_save $swp1 0 ingress
 	devlink_tc_bind_pool_th_set $swp1 0 ingress 0 12
+	devlink_port_pool_th_save $swp2 4
 	devlink_port_pool_th_set $swp2 4 12
+	devlink_tc_bind_pool_th_save $swp2 7 egress
 	devlink_tc_bind_pool_th_set $swp2 7 egress 4 5
+	devlink_tc_bind_pool_th_save $swp2 6 egress
 	devlink_tc_bind_pool_th_set $swp2 6 egress 4 5
+	devlink_tc_bind_pool_th_save $swp2 5 egress
 	devlink_tc_bind_pool_th_set $swp2 5 egress 4 5
 
 	# Note: sch_ets_core.sh uses VLAN ingress-qos-map to assign packet
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh
index 517297a14ecf..b0cb1aaffdda 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh
@@ -208,6 +208,7 @@ switch_create()
 	ip link set dev br2_11 up
 
 	local size=$(devlink_pool_size_thtype 0 | cut -d' ' -f 1)
+	devlink_port_pool_th_save $swp3 8
 	devlink_port_pool_th_set $swp3 8 $size
 }
 
diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh
index 4b96561c462f..3e3e06ea5703 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh
@@ -24,6 +24,13 @@ tc_police_switch_destroy()
 	simple_if_fini $swp1
 }
 
+tc_police_addr()
+{
+       local num=$1; shift
+
+       printf "2001:db8:1::%x" $num
+}
+
 tc_police_rules_create()
 {
 	local count=$1; shift
@@ -34,8 +41,9 @@ tc_police_rules_create()
 	for ((i = 0; i < count; ++i)); do
 		cat >> $TC_POLICE_BATCH_FILE <<-EOF
 			filter add dev $swp1 ingress \
-				prot ip \
-				flower skip_sw \
+				prot ipv6 \
+				pref 1000 \
+				flower skip_sw dst_ip $(tc_police_addr $i) \
 				action police rate 10mbit burst 100k \
 				conform-exceed drop/ok
 		EOF
diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
index de4b32fc4223..40909c254365 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
@@ -23,6 +23,27 @@ fw_flash_test()
 	devlink dev flash $DL_HANDLE file dummy
 	check_err $? "Failed to flash with status updates on"
 
+	devlink dev flash $DL_HANDLE file dummy component fw.mgmt
+	check_err $? "Failed to flash with component attribute"
+
+	devlink dev flash $DL_HANDLE file dummy overwrite settings
+	check_fail $? "Flash with overwrite settings should be rejected"
+
+	echo "1"> $DEBUGFS_DIR/fw_update_overwrite_mask
+	check_err $? "Failed to change allowed overwrite mask"
+
+	devlink dev flash $DL_HANDLE file dummy overwrite settings
+	check_err $? "Failed to flash with settings overwrite enabled"
+
+	devlink dev flash $DL_HANDLE file dummy overwrite identifiers
+	check_fail $? "Flash with overwrite settings should be identifiers"
+
+	echo "3"> $DEBUGFS_DIR/fw_update_overwrite_mask
+	check_err $? "Failed to change allowed overwrite mask"
+
+	devlink dev flash $DL_HANDLE file dummy overwrite identifiers overwrite settings
+	check_err $? "Failed to flash with settings and identifiers overwrite enabled"
+
 	echo "n"> $DEBUGFS_DIR/fw_update_status
 	check_err $? "Failed to disable status updates"
 
diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-pause.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-pause.sh
new file mode 100755
index 000000000000..25c896b9e2eb
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/ethtool-pause.sh
@@ -0,0 +1,108 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only
+
+NSIM_ID=$((RANDOM % 1024))
+NSIM_DEV_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_ID
+NSIM_DEV_DFS=/sys/kernel/debug/netdevsim/netdevsim$NSIM_ID/ports/0
+NSIM_NETDEV=
+num_passes=0
+num_errors=0
+
+function cleanup_nsim {
+    if [ -e $NSIM_DEV_SYS ]; then
+	echo $NSIM_ID > /sys/bus/netdevsim/del_device
+    fi
+}
+
+function cleanup {
+    cleanup_nsim
+}
+
+trap cleanup EXIT
+
+function get_netdev_name {
+    local -n old=$1
+
+    new=$(ls /sys/class/net)
+
+    for netdev in $new; do
+	for check in $old; do
+            [ $netdev == $check ] && break
+	done
+
+	if [ $netdev != $check ]; then
+	    echo $netdev
+	    break
+	fi
+    done
+}
+
+function check {
+    local code=$1
+    local str=$2
+    local exp_str=$3
+
+    if [ $code -ne 0 ]; then
+	((num_errors++))
+	return
+    fi
+
+    if [ "$str" != "$exp_str"  ]; then
+	echo -e "Expected: '$exp_str', got '$str'"
+	((num_errors++))
+	return
+    fi
+
+    ((num_passes++))
+}
+
+# Bail if ethtool is too old
+if ! ethtool -h | grep include-stat 2>&1 >/dev/null; then
+    echo "SKIP: No --include-statistics support in ethtool"
+    exit 4
+fi
+
+# Make a netdevsim
+old_netdevs=$(ls /sys/class/net)
+
+modprobe netdevsim
+echo $NSIM_ID > /sys/bus/netdevsim/new_device
+
+NSIM_NETDEV=`get_netdev_name old_netdevs`
+
+set -o pipefail
+
+echo n > $NSIM_DEV_DFS/ethtool/pause/report_stats_tx
+echo n > $NSIM_DEV_DFS/ethtool/pause/report_stats_rx
+
+s=$(ethtool --json -a $NSIM_NETDEV | jq '.[].statistics')
+check $? "$s" "null"
+
+s=$(ethtool -I --json -a $NSIM_NETDEV | jq '.[].statistics')
+check $? "$s" "{}"
+
+echo y > $NSIM_DEV_DFS/ethtool/pause/report_stats_tx
+
+s=$(ethtool -I --json -a $NSIM_NETDEV | jq '.[].statistics | length')
+check $? "$s" "1"
+
+s=$(ethtool -I --json -a $NSIM_NETDEV | jq '.[].statistics.tx_pause_frames')
+check $? "$s" "2"
+
+echo y > $NSIM_DEV_DFS/ethtool/pause/report_stats_rx
+
+s=$(ethtool -I --json -a $NSIM_NETDEV | jq '.[].statistics | length')
+check $? "$s" "2"
+
+s=$(ethtool -I --json -a $NSIM_NETDEV | jq '.[].statistics.rx_pause_frames')
+check $? "$s" "1"
+s=$(ethtool -I --json -a $NSIM_NETDEV | jq '.[].statistics.tx_pause_frames')
+check $? "$s" "2"
+
+if [ $num_errors -eq 0 ]; then
+    echo "PASSED all $((num_passes)) checks"
+    exit 0
+else
+    echo "FAILED $num_errors/$((num_errors+num_passes)) checks"
+    exit 1
+fi
diff --git a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh
index ba1d53b9f815..1b08e042cf94 100644..100755
--- a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh
@@ -7,6 +7,7 @@ NSIM_DEV_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_ID
 NSIM_DEV_DFS=/sys/kernel/debug/netdevsim/netdevsim$NSIM_ID
 NSIM_NETDEV=
 HAS_ETHTOOL=
+STATIC_ENTRIES=
 EXIT_STATUS=0
 num_cases=0
 num_errors=0
@@ -193,6 +194,21 @@ function check_tables {
 	sleep 0.02
 	((retries--))
     done
+
+    if [ -n "$HAS_ETHTOOL" -a -n "${STATIC_ENTRIES[0]}" ]; then
+	fail=0
+	for i in "${!STATIC_ENTRIES[@]}"; do
+	    pp_expected=`pre_ethtool ${STATIC_ENTRIES[i]}`
+	    cnt=$(ethtool --show-tunnels $NSIM_NETDEV | grep -c "$pp_expected")
+	    if [ $cnt -ne 1 ]; then
+		err_cnt "ethtool static entry: $pfx - $msg"
+		echo "       check_table: ethtool does not contain '$pp_expected'"
+		ethtool --show-tunnels $NSIM_NETDEV
+		fail=1
+	    fi
+	done
+	[ $fail == 0 ] && pass_cnt
+    fi
 }
 
 function print_table {
@@ -775,6 +791,157 @@ for port in 0 1; do
     exp1=( 0 0 0 0 )
 done
 
+cleanup_nsim
+
+# shared port tables
+pfx="table sharing"
+
+echo $NSIM_ID > /sys/bus/netdevsim/new_device
+echo 0 > $NSIM_DEV_SYS/del_port
+
+echo 0 > $NSIM_DEV_DFS/udp_ports_open_only
+echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
+echo 1 > $NSIM_DEV_DFS/udp_ports_shared
+
+old_netdevs=$(ls /sys/class/net)
+echo 1 > $NSIM_DEV_SYS/new_port
+NSIM_NETDEV=`get_netdev_name old_netdevs`
+old_netdevs=$(ls /sys/class/net)
+echo 2 > $NSIM_DEV_SYS/new_port
+NSIM_NETDEV2=`get_netdev_name old_netdevs`
+
+msg="VxLAN v4 devices"
+exp0=( `mke 4789 1` 0 0 0 )
+exp1=( 0 0 0 0 )
+new_vxlan vxlan0 4789 $NSIM_NETDEV
+new_vxlan vxlan1 4789 $NSIM_NETDEV2
+
+msg="VxLAN v4 devices go down"
+exp0=( 0 0 0 0 )
+ifconfig vxlan1 down
+ifconfig vxlan0 down
+check_tables
+
+for ifc in vxlan0 vxlan1; do
+    ifconfig $ifc up
+done
+
+msg="VxLAN v6 device"
+exp0=( `mke 4789 1` `mke 4790 1` 0 0 )
+new_vxlan vxlanC 4790 $NSIM_NETDEV 6
+
+msg="Geneve device"
+exp1=( `mke 6081 2` 0 0 0 )
+new_geneve gnv0 6081
+
+msg="NIC device goes down"
+ifconfig $NSIM_NETDEV down
+check_tables
+
+msg="NIC device goes up again"
+ifconfig $NSIM_NETDEV up
+check_tables
+
+for i in `seq 2`; do
+    msg="turn feature off - 1, rep $i"
+    ethtool -K $NSIM_NETDEV rx-udp_tunnel-port-offload off
+    check_tables
+
+    msg="turn feature off - 2, rep $i"
+    exp0=( 0 0 0 0 )
+    exp1=( 0 0 0 0 )
+    ethtool -K $NSIM_NETDEV2 rx-udp_tunnel-port-offload off
+    check_tables
+
+    msg="turn feature on - 1, rep $i"
+    exp0=( `mke 4789 1` `mke 4790 1` 0 0 )
+    exp1=( `mke 6081 2` 0 0 0 )
+    ethtool -K $NSIM_NETDEV rx-udp_tunnel-port-offload on
+    check_tables
+
+    msg="turn feature on - 2, rep $i"
+    ethtool -K $NSIM_NETDEV2 rx-udp_tunnel-port-offload on
+    check_tables
+done
+
+msg="tunnels destroyed 1"
+cleanup_tuns
+exp0=( 0 0 0 0 )
+exp1=( 0 0 0 0 )
+check_tables
+
+overflow_table0 "overflow NIC table"
+
+msg="re-add a port"
+
+echo 2 > $NSIM_DEV_SYS/del_port
+echo 2 > $NSIM_DEV_SYS/new_port
+check_tables
+
+msg="replace VxLAN in overflow table"
+exp0=( `mke 10000 1` `mke 10004 1` `mke 10002 1` `mke 10003 1` )
+del_dev vxlan1
+
+msg="vacate VxLAN in overflow table"
+exp0=( `mke 10000 1` `mke 10004 1` 0 `mke 10003 1` )
+del_dev vxlan2
+
+echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports_reset
+check_tables
+
+msg="tunnels destroyed 2"
+cleanup_tuns
+exp0=( 0 0 0 0 )
+exp1=( 0 0 0 0 )
+check_tables
+
+echo 1 > $NSIM_DEV_SYS/del_port
+echo 2 > $NSIM_DEV_SYS/del_port
+
+cleanup_nsim
+
+# Static IANA port
+pfx="static IANA vxlan"
+
+echo $NSIM_ID > /sys/bus/netdevsim/new_device
+echo 0 > $NSIM_DEV_SYS/del_port
+
+echo 1 > $NSIM_DEV_DFS/udp_ports_static_iana_vxlan
+STATIC_ENTRIES=( `mke 4789 1` )
+
+port=1
+old_netdevs=$(ls /sys/class/net)
+echo $port > $NSIM_DEV_SYS/new_port
+NSIM_NETDEV=`get_netdev_name old_netdevs`
+
+msg="check empty"
+exp0=( 0 0 0 0 )
+exp1=( 0 0 0 0 )
+check_tables
+
+msg="add on static port"
+new_vxlan vxlan0 4789 $NSIM_NETDEV
+new_vxlan vxlan1 4789 $NSIM_NETDEV
+
+msg="add on different port"
+exp0=( `mke 4790 1` 0 0 0 )
+new_vxlan vxlan2 4790 $NSIM_NETDEV
+
+cleanup_tuns
+
+msg="tunnels destroyed"
+exp0=( 0 0 0 0 )
+exp1=( 0 0 0 0 )
+check_tables
+
+msg="different type"
+new_geneve gnv0	4789
+
+cleanup_tuns
+cleanup_nsim
+
+# END
+
 modprobe -r netdevsim
 
 if [ $num_errors -eq 0 ]; then
diff --git a/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh
new file mode 100755
index 000000000000..beee0d5646a6
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh
@@ -0,0 +1,316 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright 2020 NXP Semiconductors
+
+WAIT_TIME=1
+NUM_NETIFS=4
+lib_dir=$(dirname $0)/../../../net/forwarding
+source $lib_dir/tc_common.sh
+source $lib_dir/lib.sh
+
+require_command tcpdump
+
+#
+#   +---------------------------------------------+
+#   |       DUT ports         Generator ports     |
+#   | +--------+ +--------+ +--------+ +--------+ |
+#   | |        | |        | |        | |        | |
+#   | |  eth0  | |  eth1  | |  eth2  | |  eth3  | |
+#   | |        | |        | |        | |        | |
+#   +-+--------+-+--------+-+--------+-+--------+-+
+#          |         |           |          |
+#          |         |           |          |
+#          |         +-----------+          |
+#          |                                |
+#          +--------------------------------+
+
+eth0=${NETIFS[p1]}
+eth1=${NETIFS[p2]}
+eth2=${NETIFS[p3]}
+eth3=${NETIFS[p4]}
+
+eth0_mac="de:ad:be:ef:00:00"
+eth1_mac="de:ad:be:ef:00:01"
+eth2_mac="de:ad:be:ef:00:02"
+eth3_mac="de:ad:be:ef:00:03"
+
+# Helpers to map a VCAP IS1 and VCAP IS2 lookup and policy to a chain number
+# used by the kernel driver. The numbers are:
+# VCAP IS1 lookup 0:            10000
+# VCAP IS1 lookup 1:            11000
+# VCAP IS1 lookup 2:            12000
+# VCAP IS2 lookup 0 policy 0:   20000
+# VCAP IS2 lookup 0 policy 1:   20001
+# VCAP IS2 lookup 0 policy 255: 20255
+# VCAP IS2 lookup 1 policy 0:   21000
+# VCAP IS2 lookup 1 policy 1:   21001
+# VCAP IS2 lookup 1 policy 255: 21255
+IS1()
+{
+	local lookup=$1
+
+	echo $((10000 + 1000 * lookup))
+}
+
+IS2()
+{
+	local lookup=$1
+	local pag=$2
+
+	echo $((20000 + 1000 * lookup + pag))
+}
+
+ES0()
+{
+	echo 0
+}
+
+# The Ocelot switches have a fixed ingress pipeline composed of:
+#
+# +----------------------------------------------+      +-----------------------------------------+
+# |                   VCAP IS1                   |      |                  VCAP IS2               |
+# |                                              |      |                                         |
+# | +----------+    +----------+    +----------+ |      |            +----------+    +----------+ |
+# | | Lookup 0 |    | Lookup 1 |    | Lookup 2 | | --+------> PAG 0: | Lookup 0 | -> | Lookup 1 | |
+# | +----------+ -> +----------+ -> +----------+ |   |  |            +----------+    +----------+ |
+# | |key&action|    |key&action|    |key&action| |   |  |            |key&action|    |key&action| |
+# | |key&action|    |key&action|    |key&action| |   |  |            |    ..    |    |    ..    | |
+# | |    ..    |    |    ..    |    |    ..    | |   |  |            +----------+    +----------+ |
+# | +----------+    +----------+    +----------+ |   |  |                                         |
+# |                                 selects PAG  |   |  |            +----------+    +----------+ |
+# +----------------------------------------------+   +------> PAG 1: | Lookup 0 | -> | Lookup 1 | |
+#                                                    |  |            +----------+    +----------+ |
+#                                                    |  |            |key&action|    |key&action| |
+#                                                    |  |            |    ..    |    |    ..    | |
+#                                                    |  |            +----------+    +----------+ |
+#                                                    |  |      ...                                |
+#                                                    |  |                                         |
+#                                                    |  |            +----------+    +----------+ |
+#                                                    +----> PAG 254: | Lookup 0 | -> | Lookup 1 | |
+#                                                    |  |            +----------+    +----------+ |
+#                                                    |  |            |key&action|    |key&action| |
+#                                                    |  |            |    ..    |    |    ..    | |
+#                                                    |  |            +----------+    +----------+ |
+#                                                    |  |                                         |
+#                                                    |  |            +----------+    +----------+ |
+#                                                    +----> PAG 255: | Lookup 0 | -> | Lookup 1 | |
+#                                                       |            +----------+    +----------+ |
+#                                                       |            |key&action|    |key&action| |
+#                                                       |            |    ..    |    |    ..    | |
+#                                                       |            +----------+    +----------+ |
+#                                                       +-----------------------------------------+
+#
+# Both the VCAP IS1 (Ingress Stage 1) and IS2 (Ingress Stage 2) are indexed
+# (looked up) multiple times: IS1 3 times, and IS2 2 times. Each filter
+# (key and action pair) can be configured to only match during the first, or
+# second, etc, lookup.
+#
+# During one TCAM lookup, the filter processing stops at the first entry that
+# matches, then the pipeline jumps to the next lookup.
+# The driver maps each individual lookup of each individual ingress TCAM to a
+# separate chain number. For correct rule offloading, it is mandatory that each
+# filter installed in one TCAM is terminated by a non-optional GOTO action to
+# the next lookup from the fixed pipeline.
+#
+# A chain can only be used if there is a GOTO action correctly set up from the
+# prior lookup in the processing pipeline. Setting up all chains is not
+# mandatory.
+
+# NOTE: VCAP IS1 currently uses only S1_NORMAL half keys and VCAP IS2
+# dynamically chooses between MAC_ETYPE, ARP, IP4_TCP_UDP, IP4_OTHER, which are
+# all half keys as well.
+
+create_tcam_skeleton()
+{
+	local eth=$1
+
+	tc qdisc add dev $eth clsact
+
+	# VCAP IS1 is the Ingress Classification TCAM and can offload the
+	# following actions:
+	# - skbedit priority
+	# - vlan pop
+	# - vlan modify
+	# - goto (only in lookup 2, the last IS1 lookup)
+	tc filter add dev $eth ingress chain 0 pref 49152 flower \
+		skip_sw action goto chain $(IS1 0)
+	tc filter add dev $eth ingress chain $(IS1 0) pref 49152 \
+		flower skip_sw action goto chain $(IS1 1)
+	tc filter add dev $eth ingress chain $(IS1 1) pref 49152 \
+		flower skip_sw action goto chain $(IS1 2)
+	tc filter add dev $eth ingress chain $(IS1 2) pref 49152 \
+		flower skip_sw action goto chain $(IS2 0 0)
+
+	# VCAP IS2 is the Security Enforcement ingress TCAM and can offload the
+	# following actions:
+	# - trap
+	# - drop
+	# - police
+	# The two VCAP IS2 lookups can be segmented into up to 256 groups of
+	# rules, called Policies. A Policy is selected through the Policy
+	# Association Group (PAG) action of VCAP IS1 (which is the
+	# GOTO offload).
+	tc filter add dev $eth ingress chain $(IS2 0 0) pref 49152 \
+		flower skip_sw action goto chain $(IS2 1 0)
+}
+
+setup_prepare()
+{
+	create_tcam_skeleton $eth0
+
+	ip link add br0 type bridge
+	ip link set $eth0 master br0
+	ip link set $eth1 master br0
+	ip link set br0 up
+
+	ip link add link $eth3 name $eth3.100 type vlan id 100
+	ip link set $eth3.100 up
+
+	ip link add link $eth3 name $eth3.200 type vlan id 200
+	ip link set $eth3.200 up
+
+	tc filter add dev $eth0 ingress chain $(IS1 1) pref 1 \
+		protocol 802.1Q flower skip_sw vlan_id 100 \
+		action vlan pop \
+		action goto chain $(IS1 2)
+
+	tc filter add dev $eth0 egress chain $(ES0) pref 1 \
+		flower skip_sw indev $eth1 \
+		action vlan push protocol 802.1Q id 100
+
+	tc filter add dev $eth0 ingress chain $(IS1 0) pref 2 \
+		protocol ipv4 flower skip_sw src_ip 10.1.1.2 \
+		action skbedit priority 7 \
+		action goto chain $(IS1 1)
+
+	tc filter add dev $eth0 ingress chain $(IS2 0 0) pref 1 \
+		protocol ipv4 flower skip_sw ip_proto udp dst_port 5201 \
+		action police rate 50mbit burst 64k \
+		action goto chain $(IS2 1 0)
+}
+
+cleanup()
+{
+	ip link del $eth3.200
+	ip link del $eth3.100
+	tc qdisc del dev $eth0 clsact
+	ip link del br0
+}
+
+test_vlan_pop()
+{
+	printf "Testing VLAN pop..			"
+
+	tcpdump_start $eth2
+
+	# Work around Mausezahn VLAN builder bug
+	# (https://github.com/netsniff-ng/netsniff-ng/issues/225) by using
+	# an 8021q upper
+	$MZ $eth3.100 -q -c 1 -p 64 -a $eth3_mac -b $eth2_mac -t ip
+
+	sleep 1
+
+	tcpdump_stop
+
+	if tcpdump_show | grep -q "$eth3_mac > $eth2_mac, ethertype IPv4"; then
+		echo "OK"
+	else
+		echo "FAIL"
+	fi
+
+	tcpdump_cleanup
+}
+
+test_vlan_push()
+{
+	printf "Testing VLAN push..			"
+
+	tcpdump_start $eth3.100
+
+	$MZ $eth2 -q -c 1 -p 64 -a $eth2_mac -b $eth3_mac -t ip
+
+	sleep 1
+
+	tcpdump_stop
+
+	if tcpdump_show | grep -q "$eth2_mac > $eth3_mac"; then
+		echo "OK"
+	else
+		echo "FAIL"
+	fi
+
+	tcpdump_cleanup
+}
+
+test_vlan_modify()
+{
+	printf "Testing VLAN modification..		"
+
+	ip link set br0 type bridge vlan_filtering 1
+	bridge vlan add dev $eth0 vid 200
+	bridge vlan add dev $eth0 vid 300
+	bridge vlan add dev $eth1 vid 300
+
+	tc filter add dev $eth0 ingress chain $(IS1 2) pref 3 \
+		protocol 802.1Q flower skip_sw vlan_id 200 \
+		action vlan modify id 300 \
+		action goto chain $(IS2 0 0)
+
+	tcpdump_start $eth2
+
+	$MZ $eth3.200 -q -c 1 -p 64 -a $eth3_mac -b $eth2_mac -t ip
+
+	sleep 1
+
+	tcpdump_stop
+
+	if tcpdump_show | grep -q "$eth3_mac > $eth2_mac, .* vlan 300"; then
+		echo "OK"
+	else
+		echo "FAIL"
+	fi
+
+	tcpdump_cleanup
+
+	tc filter del dev $eth0 ingress chain $(IS1 2) pref 3
+
+	bridge vlan del dev $eth0 vid 200
+	bridge vlan del dev $eth0 vid 300
+	bridge vlan del dev $eth1 vid 300
+	ip link set br0 type bridge vlan_filtering 0
+}
+
+test_skbedit_priority()
+{
+	local num_pkts=100
+
+	printf "Testing frame prioritization..		"
+
+	before=$(ethtool_stats_get $eth0 'rx_green_prio_7')
+
+	$MZ $eth3 -q -c $num_pkts -p 64 -a $eth3_mac -b $eth2_mac -t ip -A 10.1.1.2
+
+	after=$(ethtool_stats_get $eth0 'rx_green_prio_7')
+
+	if [ $((after - before)) = $num_pkts ]; then
+		echo "OK"
+	else
+		echo "FAIL"
+	fi
+}
+
+trap cleanup EXIT
+
+ALL_TESTS="
+	test_vlan_pop
+	test_vlan_push
+	test_vlan_modify
+	test_skbedit_priority
+"
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/exec/.gitignore b/tools/testing/selftests/exec/.gitignore
index 344a99c6da1b..9e2f00343f15 100644
--- a/tools/testing/selftests/exec/.gitignore
+++ b/tools/testing/selftests/exec/.gitignore
@@ -7,6 +7,7 @@ execveat.moved
 execveat.path.ephemeral
 execveat.ephemeral
 execveat.denatured
+/load_address_*
 /recursion-depth
 xxxxxxxx*
 pipe
diff --git a/tools/testing/selftests/exec/Makefile b/tools/testing/selftests/exec/Makefile
index 0a13b110c1e6..cf69b2fcce59 100644
--- a/tools/testing/selftests/exec/Makefile
+++ b/tools/testing/selftests/exec/Makefile
@@ -4,7 +4,7 @@ CFLAGS += -Wno-nonnull
 CFLAGS += -D_GNU_SOURCE
 
 TEST_PROGS := binfmt_script non-regular
-TEST_GEN_PROGS := execveat
+TEST_GEN_PROGS := execveat load_address_4096 load_address_2097152 load_address_16777216
 TEST_GEN_FILES := execveat.symlink execveat.denatured script subdir pipe
 # Makefile is a run-time dependency, since it's accessed by the execveat test
 TEST_FILES := Makefile
@@ -27,4 +27,9 @@ $(OUTPUT)/execveat.symlink: $(OUTPUT)/execveat
 $(OUTPUT)/execveat.denatured: $(OUTPUT)/execveat
 	cp $< $@
 	chmod -x $@
-
+$(OUTPUT)/load_address_4096: load_address.c
+	$(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x1000 -pie $< -o $@
+$(OUTPUT)/load_address_2097152: load_address.c
+	$(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x200000 -pie $< -o $@
+$(OUTPUT)/load_address_16777216: load_address.c
+	$(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x1000000 -pie $< -o $@
diff --git a/tools/testing/selftests/exec/load_address.c b/tools/testing/selftests/exec/load_address.c
new file mode 100644
index 000000000000..d487c2f6a615
--- /dev/null
+++ b/tools/testing/selftests/exec/load_address.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <link.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+struct Statistics {
+	unsigned long long load_address;
+	unsigned long long alignment;
+};
+
+int ExtractStatistics(struct dl_phdr_info *info, size_t size, void *data)
+{
+	struct Statistics *stats = (struct Statistics *) data;
+	int i;
+
+	if (info->dlpi_name != NULL && info->dlpi_name[0] != '\0') {
+		// Ignore headers from other than the executable.
+		return 2;
+	}
+
+	stats->load_address = (unsigned long long) info->dlpi_addr;
+	stats->alignment = 0;
+
+	for (i = 0; i < info->dlpi_phnum; i++) {
+		if (info->dlpi_phdr[i].p_type != PT_LOAD)
+			continue;
+
+		if (info->dlpi_phdr[i].p_align > stats->alignment)
+			stats->alignment = info->dlpi_phdr[i].p_align;
+	}
+
+	return 1;  // Terminate dl_iterate_phdr.
+}
+
+int main(int argc, char **argv)
+{
+	struct Statistics extracted;
+	unsigned long long misalign;
+	int ret;
+
+	ret = dl_iterate_phdr(ExtractStatistics, &extracted);
+	if (ret != 1) {
+		fprintf(stderr, "FAILED\n");
+		return 1;
+	}
+
+	if (extracted.alignment == 0) {
+		fprintf(stderr, "No alignment found\n");
+		return 1;
+	} else if (extracted.alignment & (extracted.alignment - 1)) {
+		fprintf(stderr, "Alignment is not a power of 2\n");
+		return 1;
+	}
+
+	misalign = extracted.load_address & (extracted.alignment - 1);
+	if (misalign) {
+		printf("alignment = %llu, load_address = %llu\n",
+			extracted.alignment, extracted.load_address);
+		fprintf(stderr, "FAILED\n");
+		return 1;
+	}
+
+	fprintf(stderr, "PASS\n");
+	return 0;
+}
diff --git a/tools/testing/selftests/firmware/.gitignore b/tools/testing/selftests/firmware/.gitignore
new file mode 100644
index 000000000000..62abc92a94c4
--- /dev/null
+++ b/tools/testing/selftests/firmware/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+fw_namespace
diff --git a/tools/testing/selftests/firmware/fw_filesystem.sh b/tools/testing/selftests/firmware/fw_filesystem.sh
index fcc281373b4d..c2a2a100114b 100755
--- a/tools/testing/selftests/firmware/fw_filesystem.sh
+++ b/tools/testing/selftests/firmware/fw_filesystem.sh
@@ -149,6 +149,26 @@ config_unset_into_buf()
 	echo 0 >  $DIR/config_into_buf
 }
 
+config_set_buf_size()
+{
+	echo $1 >  $DIR/config_buf_size
+}
+
+config_set_file_offset()
+{
+	echo $1 >  $DIR/config_file_offset
+}
+
+config_set_partial()
+{
+	echo 1 >  $DIR/config_partial
+}
+
+config_unset_partial()
+{
+	echo 0 >  $DIR/config_partial
+}
+
 config_set_sync_direct()
 {
 	echo 1 >  $DIR/config_sync_direct
@@ -207,6 +227,35 @@ read_firmwares()
 	done
 }
 
+read_partial_firmwares()
+{
+	if [ "$(cat $DIR/config_into_buf)" == "1" ]; then
+		fwfile="${FW_INTO_BUF}"
+	else
+		fwfile="${FW}"
+	fi
+
+	if [ "$1" = "xzonly" ]; then
+		fwfile="${fwfile}-orig"
+	fi
+
+	# Strip fwfile down to match partial offset and length
+	partial_data="$(cat $fwfile)"
+	partial_data="${partial_data:$2:$3}"
+
+	for i in $(seq 0 3); do
+		config_set_read_fw_idx $i
+
+		read_firmware="$(cat $DIR/read_firmware)"
+
+		# Verify the contents are what we expect.
+		if [ $read_firmware != $partial_data ]; then
+			echo "request #$i: partial firmware was not loaded" >&2
+			exit 1
+		fi
+	done
+}
+
 read_firmwares_expect_nofile()
 {
 	for i in $(seq 0 3); do
@@ -242,6 +291,21 @@ test_batched_request_firmware_into_buf_nofile()
 	echo "OK"
 }
 
+test_request_partial_firmware_into_buf_nofile()
+{
+	echo -n "Test request_partial_firmware_into_buf() off=$1 size=$2 nofile: "
+	config_reset
+	config_set_name nope-test-firmware.bin
+	config_set_into_buf
+	config_set_partial
+	config_set_buf_size $2
+	config_set_file_offset $1
+	config_trigger_sync
+	read_firmwares_expect_nofile
+	release_all_firmware
+	echo "OK"
+}
+
 test_batched_request_firmware_direct_nofile()
 {
 	echo -n "Batched request_firmware_direct() nofile try #$1: "
@@ -356,6 +420,21 @@ test_request_firmware_nowait_custom()
 	echo "OK"
 }
 
+test_request_partial_firmware_into_buf()
+{
+	echo -n "Test request_partial_firmware_into_buf() off=$1 size=$2: "
+	config_reset
+	config_set_name $TEST_FIRMWARE_INTO_BUF_FILENAME
+	config_set_into_buf
+	config_set_partial
+	config_set_buf_size $2
+	config_set_file_offset $1
+	config_trigger_sync
+	read_partial_firmwares normal $1 $2
+	release_all_firmware
+	echo "OK"
+}
+
 # Only continue if batched request triggers are present on the
 # test-firmware driver
 test_config_present
@@ -383,6 +462,12 @@ for i in $(seq 1 5); do
 	test_request_firmware_nowait_custom $i normal
 done
 
+# Partial loads cannot use fallback, so do not repeat tests.
+test_request_partial_firmware_into_buf 0 10
+test_request_partial_firmware_into_buf 0 5
+test_request_partial_firmware_into_buf 1 6
+test_request_partial_firmware_into_buf 2 10
+
 # Test for file not found, errors are expected, the failure would be
 # a hung task, which would require a hard reset.
 echo
@@ -407,6 +492,12 @@ for i in $(seq 1 5); do
 	test_request_firmware_nowait_custom_nofile $i
 done
 
+# Partial loads cannot use fallback, so do not repeat tests.
+test_request_partial_firmware_into_buf_nofile 0 10
+test_request_partial_firmware_into_buf_nofile 0 5
+test_request_partial_firmware_into_buf_nofile 1 6
+test_request_partial_firmware_into_buf_nofile 2 10
+
 test "$HAS_FW_LOADER_COMPRESS" != "yes" && exit 0
 
 # test with both files present
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc
index 68550f97d3c3..3bcd4c3624ee 100644
--- a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc
@@ -6,7 +6,7 @@
 echo 0 > events/enable
 echo > dynamic_events
 
-PLACE=_do_fork
+PLACE=kernel_clone
 
 echo "p:myevent1 $PLACE" >> dynamic_events
 echo "r:myevent2 $PLACE" >> dynamic_events
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc b/tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc
index c969be9eb7de..438961971b7e 100644
--- a/tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc
@@ -6,7 +6,7 @@
 echo 0 > events/enable
 echo > dynamic_events
 
-PLACE=_do_fork
+PLACE=kernel_clone
 
 setup_events() {
 echo "p:myevent1 $PLACE" >> dynamic_events
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc b/tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc
index 16d543eaac88..a8603bd23e0d 100644
--- a/tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc
@@ -6,7 +6,7 @@
 echo 0 > events/enable
 echo > dynamic_events
 
-PLACE=_do_fork
+PLACE=kernel_clone
 
 setup_events() {
 echo "p:myevent1 $PLACE" >> dynamic_events
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc
index 0f41e441c203..98305d76bd04 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc
@@ -4,9 +4,9 @@
 # requires: set_ftrace_filter
 # flags: instance
 
-echo _do_fork:stacktrace >> set_ftrace_filter
+echo kernel_clone:stacktrace >> set_ftrace_filter
 
-grep -q "_do_fork:stacktrace:unlimited" set_ftrace_filter
+grep -q "kernel_clone:stacktrace:unlimited" set_ftrace_filter
 
 (echo "forked"; sleep 1)
 
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc b/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc
index eba858c21815..9737cd0578a7 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc
@@ -3,7 +3,7 @@
 # description: Kprobe dynamic event - adding and removing
 # requires: kprobe_events
 
-echo p:myevent _do_fork > kprobe_events
+echo p:myevent kernel_clone > kprobe_events
 grep myevent kprobe_events
 test -d events/kprobes/myevent
 echo > kprobe_events
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc b/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc
index d10bf4f05bc8..f9a40af76888 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc
@@ -3,7 +3,7 @@
 # description: Kprobe dynamic event - busy event check
 # requires: kprobe_events
 
-echo p:myevent _do_fork > kprobe_events
+echo p:myevent kernel_clone > kprobe_events
 test -d events/kprobes/myevent
 echo 1 > events/kprobes/myevent/enable
 echo > kprobe_events && exit_fail # this must fail
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc
index 61f2ac441aec..eb543d3cfe5f 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc
@@ -3,13 +3,13 @@
 # description: Kprobe dynamic event with arguments
 # requires: kprobe_events
 
-echo 'p:testprobe _do_fork $stack $stack0 +0($stack)' > kprobe_events
+echo 'p:testprobe kernel_clone $stack $stack0 +0($stack)' > kprobe_events
 grep testprobe kprobe_events | grep -q 'arg1=\$stack arg2=\$stack0 arg3=+0(\$stack)'
 test -d events/kprobes/testprobe
 
 echo 1 > events/kprobes/testprobe/enable
 ( echo "forked")
-grep testprobe trace | grep '_do_fork' | \
+grep testprobe trace | grep 'kernel_clone' | \
   grep -q 'arg1=0x[[:xdigit:]]* arg2=0x[[:xdigit:]]* arg3=0x[[:xdigit:]]*$'
 
 echo 0 > events/kprobes/testprobe/enable
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc
index 05aaeed6987f..4e5b63be51c9 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc
@@ -5,7 +5,7 @@
 
 grep -A1 "fetcharg:" README | grep -q "\$comm" || exit_unsupported # this is too old
 
-echo 'p:testprobe _do_fork comm=$comm ' > kprobe_events
+echo 'p:testprobe kernel_clone comm=$comm ' > kprobe_events
 grep testprobe kprobe_events | grep -q 'comm=$comm'
 test -d events/kprobes/testprobe
 
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc
index b5fa05443b39..a1d70588ab21 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc
@@ -30,13 +30,13 @@ esac
 : "Test get argument (1)"
 echo "p:testprobe tracefs_create_dir arg1=+0(${ARG1}):string" > kprobe_events
 echo 1 > events/kprobes/testprobe/enable
-echo "p:test _do_fork" >> kprobe_events
+echo "p:test kernel_clone" >> kprobe_events
 grep -qe "testprobe.* arg1=\"test\"" trace
 
 echo 0 > events/kprobes/testprobe/enable
 : "Test get argument (2)"
 echo "p:testprobe tracefs_create_dir arg1=+0(${ARG1}):string arg2=+0(${ARG1}):string" > kprobe_events
 echo 1 > events/kprobes/testprobe/enable
-echo "p:test _do_fork" >> kprobe_events
+echo "p:test kernel_clone" >> kprobe_events
 grep -qe "testprobe.* arg1=\"test\" arg2=\"test\"" trace
 
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc
index b8c75a3d003c..bd25dd0ba0d0 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc
@@ -14,12 +14,12 @@ elif ! grep "$SYMBOL\$" /proc/kallsyms; then
 fi
 
 : "Test get basic types symbol argument"
-echo "p:testprobe_u _do_fork arg1=@linux_proc_banner:u64 arg2=@linux_proc_banner:u32 arg3=@linux_proc_banner:u16 arg4=@linux_proc_banner:u8" > kprobe_events
-echo "p:testprobe_s _do_fork arg1=@linux_proc_banner:s64 arg2=@linux_proc_banner:s32 arg3=@linux_proc_banner:s16 arg4=@linux_proc_banner:s8" >> kprobe_events
+echo "p:testprobe_u kernel_clone arg1=@linux_proc_banner:u64 arg2=@linux_proc_banner:u32 arg3=@linux_proc_banner:u16 arg4=@linux_proc_banner:u8" > kprobe_events
+echo "p:testprobe_s kernel_clone arg1=@linux_proc_banner:s64 arg2=@linux_proc_banner:s32 arg3=@linux_proc_banner:s16 arg4=@linux_proc_banner:s8" >> kprobe_events
 if grep -q "x8/16/32/64" README; then
-  echo "p:testprobe_x _do_fork arg1=@linux_proc_banner:x64 arg2=@linux_proc_banner:x32 arg3=@linux_proc_banner:x16 arg4=@linux_proc_banner:x8" >> kprobe_events
+  echo "p:testprobe_x kernel_clone arg1=@linux_proc_banner:x64 arg2=@linux_proc_banner:x32 arg3=@linux_proc_banner:x16 arg4=@linux_proc_banner:x8" >> kprobe_events
 fi
-echo "p:testprobe_bf _do_fork arg1=@linux_proc_banner:b8@4/32" >> kprobe_events
+echo "p:testprobe_bf kernel_clone arg1=@linux_proc_banner:b8@4/32" >> kprobe_events
 echo 1 > events/kprobes/enable
 (echo "forked")
 echo 0 > events/kprobes/enable
@@ -27,7 +27,7 @@ grep "testprobe_[usx]:.* arg1=.* arg2=.* arg3=.* arg4=.*" trace
 grep "testprobe_bf:.* arg1=.*" trace
 
 : "Test get string symbol argument"
-echo "p:testprobe_str _do_fork arg1=@linux_proc_banner:string" > kprobe_events
+echo "p:testprobe_str kernel_clone arg1=@linux_proc_banner:string" > kprobe_events
 echo 1 > events/kprobes/enable
 (echo "forked")
 echo 0 > events/kprobes/enable
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc
index 0610e0b5587c..91fcce1c241c 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc
@@ -4,7 +4,7 @@
 # requires: kprobe_events "x8/16/32/64":README
 
 gen_event() { # Bitsize
-  echo "p:testprobe _do_fork \$stack0:s$1 \$stack0:u$1 \$stack0:x$1 \$stack0:b4@4/$1"
+  echo "p:testprobe kernel_clone \$stack0:s$1 \$stack0:u$1 \$stack0:x$1 \$stack0:b4@4/$1"
 }
 
 check_types() { # s-type u-type x-type bf-type width
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc
index 81d8b58c03bc..0d179094191f 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc
@@ -5,29 +5,29 @@
 
 # prepare
 echo nop > current_tracer
-echo _do_fork > set_ftrace_filter
-echo 'p:testprobe _do_fork' > kprobe_events
+echo kernel_clone > set_ftrace_filter
+echo 'p:testprobe kernel_clone' > kprobe_events
 
 # kprobe on / ftrace off
 echo 1 > events/kprobes/testprobe/enable
 echo > trace
 ( echo "forked")
 grep testprobe trace
-! grep '_do_fork <-' trace
+! grep 'kernel_clone <-' trace
 
 # kprobe on / ftrace on
 echo function > current_tracer
 echo > trace
 ( echo "forked")
 grep testprobe trace
-grep '_do_fork <-' trace
+grep 'kernel_clone <-' trace
 
 # kprobe off / ftrace on
 echo 0 > events/kprobes/testprobe/enable
 echo > trace
 ( echo "forked")
 ! grep testprobe trace
-grep '_do_fork <-' trace
+grep 'kernel_clone <-' trace
 
 # kprobe on / ftrace on
 echo 1 > events/kprobes/testprobe/enable
@@ -35,11 +35,11 @@ echo function > current_tracer
 echo > trace
 ( echo "forked")
 grep testprobe trace
-grep '_do_fork <-' trace
+grep 'kernel_clone <-' trace
 
 # kprobe on / ftrace off
 echo nop > current_tracer
 echo > trace
 ( echo "forked")
 grep testprobe trace
-! grep '_do_fork <-' trace
+! grep 'kernel_clone <-' trace
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc
index 366b7e1b6718..45d90b6c763d 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc
@@ -4,7 +4,7 @@
 # requires: kprobe_events "Create/append/":README
 
 # Choose 2 symbols for target
-SYM1=_do_fork
+SYM1=kernel_clone
 SYM2=do_exit
 EVENT_NAME=kprobes/testevent
 
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc
index b4d834675e59..1b5550ef8a9b 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc
@@ -86,15 +86,21 @@ esac
 
 # multiprobe errors
 if grep -q "Create/append/" README && grep -q "imm-value" README; then
-echo 'p:kprobes/testevent _do_fork' > kprobe_events
+echo 'p:kprobes/testevent kernel_clone' > kprobe_events
 check_error '^r:kprobes/testevent do_exit'	# DIFF_PROBE_TYPE
 
 # Explicitly use printf "%s" to not interpret \1
-printf "%s" 'p:kprobes/testevent _do_fork abcd=\1' > kprobe_events
-check_error 'p:kprobes/testevent _do_fork ^bcd=\1'	# DIFF_ARG_TYPE
-check_error 'p:kprobes/testevent _do_fork ^abcd=\1:u8'	# DIFF_ARG_TYPE
-check_error 'p:kprobes/testevent _do_fork ^abcd=\"foo"'	# DIFF_ARG_TYPE
-check_error '^p:kprobes/testevent _do_fork abcd=\1'	# SAME_PROBE
+printf "%s" 'p:kprobes/testevent kernel_clone abcd=\1' > kprobe_events
+check_error 'p:kprobes/testevent kernel_clone ^bcd=\1'	# DIFF_ARG_TYPE
+check_error 'p:kprobes/testevent kernel_clone ^abcd=\1:u8'	# DIFF_ARG_TYPE
+check_error 'p:kprobes/testevent kernel_clone ^abcd=\"foo"'	# DIFF_ARG_TYPE
+check_error '^p:kprobes/testevent kernel_clone abcd=\1'	# SAME_PROBE
+fi
+
+# %return suffix errors
+if grep -q "place (kretprobe): .*%return.*" README; then
+check_error 'p vfs_read^%hoge'		# BAD_ADDR_SUFFIX
+check_error 'p ^vfs_read+10%return'	# BAD_RETPROBE
 fi
 
 exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc
index 523fde6d1aa5..7ae492c204a4 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc
@@ -4,14 +4,14 @@
 # requires: kprobe_events
 
 # Add new kretprobe event
-echo 'r:testprobe2 _do_fork $retval' > kprobe_events
+echo 'r:testprobe2 kernel_clone $retval' > kprobe_events
 grep testprobe2 kprobe_events | grep -q 'arg1=\$retval'
 test -d events/kprobes/testprobe2
 
 echo 1 > events/kprobes/testprobe2/enable
 ( echo "forked")
 
-cat trace | grep testprobe2 | grep -q '<- _do_fork'
+cat trace | grep testprobe2 | grep -q '<- kernel_clone'
 
 echo 0 > events/kprobes/testprobe2/enable
 echo '-:testprobe2' >> kprobe_events
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_return_suffix.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_return_suffix.tc
new file mode 100644
index 000000000000..f07bd15cc033
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_return_suffix.tc
@@ -0,0 +1,21 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Kretprobe %%return suffix test
+# requires: kprobe_events '<symbol>[+<offset>]%return':README
+
+# Test for kretprobe by "r"
+echo 'r:myprobeaccept vfs_read' > kprobe_events
+RESULT1=`cat kprobe_events`
+
+# Test for kretprobe by "%return"
+echo 'p:myprobeaccept vfs_read%return' > kprobe_events
+RESULT2=`cat kprobe_events`
+
+if [ "$RESULT1" != "$RESULT2" ]; then
+	echo "Error: %return suffix didn't make a return probe."
+	echo "r-command: $RESULT1"
+	echo "%return: $RESULT2"
+	exit_fail
+fi
+
+echo > kprobe_events
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc b/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc
index ff6c44adc8a0..c4093fc1a773 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc
@@ -4,7 +4,7 @@
 # requires: kprobe_events
 
 ! grep -q 'myevent' kprobe_profile
-echo p:myevent _do_fork > kprobe_events
+echo p:myevent kernel_clone > kprobe_events
 grep -q 'myevent[[:space:]]*0[[:space:]]*0$' kprobe_profile
 echo 1 > events/kprobes/myevent/enable
 ( echo "forked" )
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/uprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/kprobe/uprobe_syntax_errors.tc
index 7b5b60c3c5a2..f5e3f9e4a01f 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/uprobe_syntax_errors.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/uprobe_syntax_errors.tc
@@ -17,4 +17,10 @@ check_error 'p /bin/sh:10(10)^a'	# BAD_REFCNT_SUFFIX
 check_error 'p /bin/sh:10 ^@+ab'	# BAD_FILE_OFFS
 check_error 'p /bin/sh:10 ^@symbol'	# SYM_ON_UPROBE
 
+# %return suffix error
+if grep -q "place (uprobe): .*%return.*" README; then
+check_error 'p /bin/sh:10^%hoge'	# BAD_ADDR_SUFFIX
+check_error 'p /bin/sh:10(10)^%return'	# BAD_REFCNT_SUFFIX
+fi
+
 exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc
index 7449a4b8f1f9..9098f1e7433f 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc
@@ -25,12 +25,12 @@ echo 'wakeup_latency u64 lat pid_t pid' >> synthetic_events
 echo 'hist:keys=pid:ts1=common_timestamp.usecs if comm=="ping"' >> events/sched/sched_wakeup/trigger
 echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts1:onmatch(sched.sched_wakeup).wakeup_latency($wakeup_lat,next_pid) if next_comm=="ping"' > events/sched/sched_switch/trigger
 
-echo 'waking+wakeup_latency u64 lat; pid_t pid' >> synthetic_events
-echo 'hist:keys=pid,lat:sort=pid,lat:ww_lat=$waking_lat+$wakeup_lat:onmatch(synthetic.wakeup_latency).waking+wakeup_latency($ww_lat,pid)' >> events/synthetic/wakeup_latency/trigger
-echo 'hist:keys=pid,lat:sort=pid,lat' >> events/synthetic/waking+wakeup_latency/trigger
+echo 'waking_plus_wakeup_latency u64 lat; pid_t pid' >> synthetic_events
+echo 'hist:keys=pid,lat:sort=pid,lat:ww_lat=$waking_lat+$wakeup_lat:onmatch(synthetic.wakeup_latency).waking_plus_wakeup_latency($ww_lat,pid)' >> events/synthetic/wakeup_latency/trigger
+echo 'hist:keys=pid,lat:sort=pid,lat' >> events/synthetic/waking_plus_wakeup_latency/trigger
 
 ping $LOCALHOST -c 3
-if ! grep -q "pid:" events/synthetic/waking+wakeup_latency/hist; then
+if ! grep -q "pid:" events/synthetic/waking_plus_wakeup_latency/hist; then
     fail "Failed to create combined histogram"
 fi
 
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-dynstring.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-dynstring.tc
new file mode 100644
index 000000000000..3d65c856eca3
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-dynstring.tc
@@ -0,0 +1,31 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event trigger - test inter-event histogram trigger trace action with dynamic string param
+# requires: set_event synthetic_events events/sched/sched_process_exec/hist "char name[]' >> synthetic_events":README
+
+fail() { #msg
+    echo $1
+    exit_fail
+}
+
+echo "Test create synthetic event"
+
+echo 'ping_test_latency u64 lat; char filename[]' > synthetic_events
+if [ ! -d events/synthetic/ping_test_latency ]; then
+    fail "Failed to create ping_test_latency synthetic event"
+fi
+
+echo "Test create histogram for synthetic event using trace action and dynamic strings"
+echo "Test histogram dynamic string variables,simple expression support and trace action"
+
+echo 'hist:key=pid:filenamevar=filename:ts0=common_timestamp.usecs' > events/sched/sched_process_exec/trigger
+echo 'hist:key=pid:lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_process_exec).ping_test_latency($lat,$filenamevar) if comm == "ping"' > events/sched/sched_process_exit/trigger
+echo 'hist:keys=filename,lat:sort=filename,lat' > events/synthetic/ping_test_latency/trigger
+
+ping $LOCALHOST -c 5
+
+if ! grep -q "ping" events/synthetic/ping_test_latency/hist; then
+    fail "Failed to create dynamic string trace action inter-event histogram"
+fi
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic_event_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic_event_syntax_errors.tc
new file mode 100644
index 000000000000..ada594fe16cb
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic_event_syntax_errors.tc
@@ -0,0 +1,19 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event trigger - test synthetic_events syntax parser errors
+# requires: synthetic_events error_log
+
+check_error() { # command-with-error-pos-by-^
+    ftrace_errlog_check 'synthetic_events' "$1" 'synthetic_events'
+}
+
+check_error 'myevent ^chr arg'			# INVALID_TYPE
+check_error 'myevent ^char str[];; int v'	# INVALID_TYPE
+check_error 'myevent char ^str]; int v'		# INVALID_NAME
+check_error 'myevent char ^str;[]'		# INVALID_NAME
+check_error 'myevent ^char str[; int v'		# INVALID_TYPE
+check_error '^mye;vent char str[]'		# BAD_NAME
+check_error 'myevent char str[]; ^int'		# INVALID_FIELD
+check_error '^myevent'				# INCOMPLETE_CMD
+
+exit 0
diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h
index 4f78e4805633..f19804df244c 100644
--- a/tools/testing/selftests/kselftest_harness.h
+++ b/tools/testing/selftests/kselftest_harness.h
@@ -971,6 +971,11 @@ void __run_test(struct __fixture_metadata *f,
 
 	ksft_print_msg(" RUN           %s%s%s.%s ...\n",
 	       f->name, variant->name[0] ? "." : "", variant->name, t->name);
+
+	/* Make sure output buffers are flushed before fork */
+	fflush(stdout);
+	fflush(stderr);
+
 	t->pid = fork();
 	if (t->pid < 0) {
 		ksft_print_msg("ERROR SPAWNING TEST CHILD\n");
diff --git a/tools/testing/selftests/kvm/x86_64/debug_regs.c b/tools/testing/selftests/kvm/x86_64/debug_regs.c
index b8d14f9db5f9..2fc6b3af81a1 100644
--- a/tools/testing/selftests/kvm/x86_64/debug_regs.c
+++ b/tools/testing/selftests/kvm/x86_64/debug_regs.c
@@ -73,7 +73,7 @@ int main(void)
 	int i;
 	/* Instruction lengths starting at ss_start */
 	int ss_size[4] = {
-		3,		/* xor */
+		2,		/* xor */
 		2,		/* cpuid */
 		5,		/* mov */
 		2,		/* rdmsr */
diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index 7a17ea815736..30848ca36555 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -47,9 +47,9 @@ ARCH		?= $(SUBARCH)
 khdr:
 ifndef KSFT_KHDR_INSTALL_DONE
 ifeq (1,$(DEFAULT_INSTALL_HDR_PATH))
-	make --no-builtin-rules ARCH=$(ARCH) -C $(top_srcdir) headers_install
+	$(MAKE) --no-builtin-rules ARCH=$(ARCH) -C $(top_srcdir) headers_install
 else
-	make --no-builtin-rules INSTALL_HDR_PATH=$$OUTPUT/usr \
+	$(MAKE) --no-builtin-rules INSTALL_HDR_PATH=$$OUTPUT/usr \
 		ARCH=$(ARCH) -C $(top_srcdir) headers_install
 endif
 endif
@@ -107,9 +107,8 @@ endif
 emit_tests:
 	for TEST in $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_PROGS); do \
 		BASENAME_TEST=`basename $$TEST`;	\
-		echo "	\\";				\
-		echo -n "	\"$$BASENAME_TEST\"";	\
-	done;						\
+		echo "$(COLLECTION):$$BASENAME_TEST";	\
+	done
 
 # define if isn't already. It is undefined in make O= case.
 ifeq ($(RM),)
diff --git a/tools/testing/selftests/livepatch/functions.sh b/tools/testing/selftests/livepatch/functions.sh
index 1aba83c87ad3..846c7ed71556 100644
--- a/tools/testing/selftests/livepatch/functions.sh
+++ b/tools/testing/selftests/livepatch/functions.sh
@@ -278,7 +278,7 @@ function check_result {
 	# help differentiate repeated testing runs.  Remove them with a
 	# post-comparison sed filter.
 
-	result=$(dmesg | comm -13 "$SAVED_DMESG" - | \
+	result=$(dmesg | comm --nocheck-order -13 "$SAVED_DMESG" - | \
 		 grep -e 'livepatch:' -e 'test_klp' | \
 		 grep -v '\(tainting\|taints\) kernel' | \
 		 sed 's/^\[[ 0-9.]*\] //')
diff --git a/tools/testing/selftests/lkdtm/run.sh b/tools/testing/selftests/lkdtm/run.sh
index 8383eb89d88a..bb7a1775307b 100755
--- a/tools/testing/selftests/lkdtm/run.sh
+++ b/tools/testing/selftests/lkdtm/run.sh
@@ -82,7 +82,7 @@ dmesg > "$DMESG"
 ($SHELL -c 'cat <(echo '"$test"') >'"$TRIGGER" 2>/dev/null) || true
 
 # Record and dump the results
-dmesg | diff --changed-group-format='%>' --unchanged-group-format='' "$DMESG" - > "$LOG" || true
+dmesg | comm --nocheck-order -13 "$DMESG" - > "$LOG" || true
 
 cat "$LOG"
 # Check for expected output
diff --git a/tools/testing/selftests/lkdtm/tests.txt b/tools/testing/selftests/lkdtm/tests.txt
index 9d266e79c6a2..74a8d329a72c 100644
--- a/tools/testing/selftests/lkdtm/tests.txt
+++ b/tools/testing/selftests/lkdtm/tests.txt
@@ -9,7 +9,6 @@ EXCEPTION
 #CORRUPT_STACK_STRONG Crashes entire system on success
 CORRUPT_LIST_ADD list_add corruption
 CORRUPT_LIST_DEL list_del corruption
-CORRUPT_USER_DS Invalid address limit on user-mode return
 STACK_GUARD_PAGE_LEADING
 STACK_GUARD_PAGE_TRAILING
 UNSET_SMEP CR4 bits went missing
@@ -67,6 +66,5 @@ USERCOPY_STACK_FRAME_TO
 USERCOPY_STACK_FRAME_FROM
 USERCOPY_STACK_BEYOND
 USERCOPY_KERNEL
-USERCOPY_KERNEL_DS
 STACKLEAK_ERASING OK: the rest of the thread stack is properly erased
 CFI_FORWARD_PROTO
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index 742c499328b2..61ae899cfc17 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -1,4 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0-only
+ipsec
 msg_zerocopy
 socket
 psock_fanout
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 9491bbaa0831..ef352477cac6 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -19,6 +19,8 @@ TEST_PROGS += txtimestamp.sh
 TEST_PROGS += vrf-xfrm-tests.sh
 TEST_PROGS += rxtimestamp.sh
 TEST_PROGS += devlink_port_split.py
+TEST_PROGS += drop_monitor_tests.sh
+TEST_PROGS += vrf_route_leaking.sh
 TEST_PROGS_EXTENDED := in_netns.sh
 TEST_GEN_FILES =  socket nettest
 TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any
@@ -29,6 +31,7 @@ TEST_GEN_FILES += tcp_fastopen_backup_key
 TEST_GEN_FILES += fin_ack_lat
 TEST_GEN_FILES += reuseaddr_ports_exhausted
 TEST_GEN_FILES += hwtstamp_config rxtimestamp timestamping txtimestamp
+TEST_GEN_FILES += ipsec
 TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
 TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls
 
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index 3b42c06b5985..43649242adc0 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -24,10 +24,12 @@ CONFIG_IP_NF_NAT=m
 CONFIG_NF_TABLES=m
 CONFIG_NF_TABLES_IPV6=y
 CONFIG_NF_TABLES_IPV4=y
-CONFIG_NFT_CHAIN_NAT_IPV6=m
-CONFIG_NFT_CHAIN_NAT_IPV4=m
+CONFIG_NFT_NAT=m
 CONFIG_NET_SCH_FQ=m
 CONFIG_NET_SCH_ETF=m
 CONFIG_NET_SCH_NETEM=y
 CONFIG_TEST_BLACKHOLE_DEV=m
 CONFIG_KALLSYMS=y
+CONFIG_TRACEPOINTS=y
+CONFIG_NET_DROP_MONITOR=m
+CONFIG_NETDEVSIM=m
diff --git a/tools/testing/selftests/net/drop_monitor_tests.sh b/tools/testing/selftests/net/drop_monitor_tests.sh
new file mode 100755
index 000000000000..b7650e30d18b
--- /dev/null
+++ b/tools/testing/selftests/net/drop_monitor_tests.sh
@@ -0,0 +1,215 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test is for checking drop monitor functionality.
+
+ret=0
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+# all tests in this script. Can be overridden with -t option
+TESTS="
+	sw_drops
+	hw_drops
+"
+
+IP="ip -netns ns1"
+TC="tc -netns ns1"
+DEVLINK="devlink -N ns1"
+NS_EXEC="ip netns exec ns1"
+NETDEVSIM_PATH=/sys/bus/netdevsim/
+DEV_ADDR=1337
+DEV=netdevsim${DEV_ADDR}
+DEVLINK_DEV=netdevsim/${DEV}
+
+log_test()
+{
+	local rc=$1
+	local expected=$2
+	local msg="$3"
+
+	if [ ${rc} -eq ${expected} ]; then
+		printf "    TEST: %-60s  [ OK ]\n" "${msg}"
+		nsuccess=$((nsuccess+1))
+	else
+		ret=1
+		nfail=$((nfail+1))
+		printf "    TEST: %-60s  [FAIL]\n" "${msg}"
+	fi
+}
+
+setup()
+{
+	modprobe netdevsim &> /dev/null
+
+	set -e
+	ip netns add ns1
+	$IP link add dummy10 up type dummy
+
+	$NS_EXEC echo "$DEV_ADDR 1" > ${NETDEVSIM_PATH}/new_device
+	udevadm settle
+	local netdev=$($NS_EXEC ls ${NETDEVSIM_PATH}/devices/${DEV}/net/)
+	$IP link set dev $netdev up
+
+	set +e
+}
+
+cleanup()
+{
+	$NS_EXEC echo "$DEV_ADDR" > ${NETDEVSIM_PATH}/del_device
+	ip netns del ns1
+}
+
+sw_drops_test()
+{
+	echo
+	echo "Software drops test"
+
+	setup
+
+	local dir=$(mktemp -d)
+
+	$TC qdisc add dev dummy10 clsact
+	$TC filter add dev dummy10 egress pref 1 handle 101 proto ip \
+		flower dst_ip 192.0.2.10 action drop
+
+	$NS_EXEC mausezahn dummy10 -a 00:11:22:33:44:55 -b 00:aa:bb:cc:dd:ee \
+		-A 192.0.2.1 -B 192.0.2.10 -t udp sp=12345,dp=54321 -c 0 -q \
+		-d 100msec &
+	timeout 5 dwdump -o sw -w ${dir}/packets.pcap
+	(( $(tshark -r ${dir}/packets.pcap \
+		-Y 'ip.dst == 192.0.2.10' 2> /dev/null | wc -l) != 0))
+	log_test $? 0 "Capturing active software drops"
+
+	rm ${dir}/packets.pcap
+
+	{ kill %% && wait %%; } 2>/dev/null
+	timeout 5 dwdump -o sw -w ${dir}/packets.pcap
+	(( $(tshark -r ${dir}/packets.pcap \
+		-Y 'ip.dst == 192.0.2.10' 2> /dev/null | wc -l) == 0))
+	log_test $? 0 "Capturing inactive software drops"
+
+	rm -r $dir
+
+	cleanup
+}
+
+hw_drops_test()
+{
+	echo
+	echo "Hardware drops test"
+
+	setup
+
+	local dir=$(mktemp -d)
+
+	$DEVLINK trap set $DEVLINK_DEV trap blackhole_route action trap
+	timeout 5 dwdump -o hw -w ${dir}/packets.pcap
+	(( $(tshark -r ${dir}/packets.pcap \
+		-Y 'net_dm.hw_trap_name== blackhole_route' 2> /dev/null \
+		| wc -l) != 0))
+	log_test $? 0 "Capturing active hardware drops"
+
+	rm ${dir}/packets.pcap
+
+	$DEVLINK trap set $DEVLINK_DEV trap blackhole_route action drop
+	timeout 5 dwdump -o hw -w ${dir}/packets.pcap
+	(( $(tshark -r ${dir}/packets.pcap \
+		-Y 'net_dm.hw_trap_name== blackhole_route' 2> /dev/null \
+		| wc -l) == 0))
+	log_test $? 0 "Capturing inactive hardware drops"
+
+	rm -r $dir
+
+	cleanup
+}
+
+################################################################################
+# usage
+
+usage()
+{
+	cat <<EOF
+usage: ${0##*/} OPTS
+
+        -t <test>   Test(s) to run (default: all)
+                    (options: $TESTS)
+EOF
+}
+
+################################################################################
+# main
+
+while getopts ":t:h" opt; do
+	case $opt in
+		t) TESTS=$OPTARG;;
+		h) usage; exit 0;;
+		*) usage; exit 1;;
+	esac
+done
+
+if [ "$(id -u)" -ne 0 ];then
+	echo "SKIP: Need root privileges"
+	exit $ksft_skip;
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+	echo "SKIP: Could not run test without ip tool"
+	exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v devlink)" ]; then
+	echo "SKIP: Could not run test without devlink tool"
+	exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v tshark)" ]; then
+	echo "SKIP: Could not run test without tshark tool"
+	exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v dwdump)" ]; then
+	echo "SKIP: Could not run test without dwdump tool"
+	exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v udevadm)" ]; then
+	echo "SKIP: Could not run test without udevadm tool"
+	exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v timeout)" ]; then
+	echo "SKIP: Could not run test without timeout tool"
+	exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v mausezahn)" ]; then
+	echo "SKIP: Could not run test without mausezahn tool"
+	exit $ksft_skip
+fi
+
+tshark -G fields 2> /dev/null | grep -q net_dm
+if [ $? -ne 0 ]; then
+	echo "SKIP: tshark too old, missing net_dm dissector"
+	exit $ksft_skip
+fi
+
+# start clean
+cleanup &> /dev/null
+
+for t in $TESTS
+do
+	case $t in
+	sw_drops|sw)			sw_drops_test;;
+	hw_drops|hw)			hw_drops_test;;
+
+	help) echo "Test names: $TESTS"; exit 0;;
+	esac
+done
+
+if [ "$TESTS" != "none" ]; then
+	printf "\nTests passed: %3d\n" ${nsuccess}
+	printf "Tests failed: %3d\n"   ${nfail}
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh
index 22dc2f3d428b..eb693a3b7b4a 100755
--- a/tools/testing/selftests/net/fib_nexthops.sh
+++ b/tools/testing/selftests/net/fib_nexthops.sh
@@ -411,9 +411,16 @@ ipv6_fdb_grp_fcnal()
 	run_cmd "$IP -6 ro add 2001:db8:101::1/128 nhid 103"
 	log_test $? 2 "Route add with fdb nexthop group"
 
+	run_cmd "$IP nexthop del id 61"
+	run_cmd "$BRIDGE fdb get to 02:02:00:00:00:13 dev vx10 self"
+	log_test $? 0 "Fdb entry after deleting a single nexthop"
+
 	run_cmd "$IP nexthop del id 102"
 	log_test $? 0 "Fdb nexthop delete"
 
+	run_cmd "$BRIDGE fdb get to 02:02:00:00:00:13 dev vx10 self"
+	log_test $? 254 "Fdb entry after deleting a nexthop group"
+
 	$IP link del dev vx10
 }
 
@@ -484,9 +491,16 @@ ipv4_fdb_grp_fcnal()
 	run_cmd "$IP ro add 172.16.0.0/22 nhid 103"
 	log_test $? 2 "Route add with fdb nexthop group"
 
+	run_cmd "$IP nexthop del id 12"
+	run_cmd "$BRIDGE fdb get to 02:02:00:00:00:13 dev vx10 self"
+	log_test $? 0 "Fdb entry after deleting a single nexthop"
+
 	run_cmd "$IP nexthop del id 102"
 	log_test $? 0 "Fdb nexthop delete"
 
+	run_cmd "$BRIDGE fdb get to 02:02:00:00:00:13 dev vx10 self"
+	log_test $? 254 "Fdb entry after deleting a nexthop group"
+
 	$IP link del dev vx10
 }
 
@@ -739,6 +753,36 @@ ipv6_fcnal_runtime()
 	run_cmd "$IP nexthop replace id 81 via 172.16.1.1 dev veth1"
 	log_test $? 2 "Nexthop replace of group entry - v6 route, v4 nexthop"
 
+	run_cmd "$IP nexthop add id 86 via 2001:db8:92::2 dev veth3"
+	run_cmd "$IP nexthop add id 87 via 172.16.1.1 dev veth1"
+	run_cmd "$IP nexthop add id 88 via 172.16.1.1 dev veth1"
+	run_cmd "$IP nexthop add id 124 group 86/87/88"
+	run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 124"
+	log_test $? 2 "IPv6 route can not have a group with v4 and v6 gateways"
+
+	run_cmd "$IP nexthop del id 88"
+	run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 124"
+	log_test $? 2 "IPv6 route can not have a group with v4 and v6 gateways"
+
+	run_cmd "$IP nexthop del id 87"
+	run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 124"
+	log_test $? 0 "IPv6 route using a group after removing v4 gateways"
+
+	run_cmd "$IP ro delete 2001:db8:101::1/128"
+	run_cmd "$IP nexthop add id 87 via 172.16.1.1 dev veth1"
+	run_cmd "$IP nexthop add id 88 via 172.16.1.1 dev veth1"
+	run_cmd "$IP nexthop replace id 124 group 86/87/88"
+	run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 124"
+	log_test $? 2 "IPv6 route can not have a group with v4 and v6 gateways"
+
+	run_cmd "$IP nexthop replace id 88 via 2001:db8:92::2 dev veth3"
+	run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 124"
+	log_test $? 2 "IPv6 route can not have a group with v4 and v6 gateways"
+
+	run_cmd "$IP nexthop replace id 87 via 2001:db8:92::2 dev veth3"
+	run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 124"
+	log_test $? 0 "IPv6 route using a group after replacing v4 gateways"
+
 	$IP nexthop flush >/dev/null 2>&1
 
 	#
diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh
index 75fe24bcb9cd..9c12c4fd3afc 100644
--- a/tools/testing/selftests/net/forwarding/devlink_lib.sh
+++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh
@@ -5,7 +5,7 @@
 # Defines
 
 if [[ ! -v DEVLINK_DEV ]]; then
-	DEVLINK_DEV=$(devlink port show "${NETIFS[p1]}" -j \
+	DEVLINK_DEV=$(devlink port show "${NETIFS[p1]:-$NETIF_NO_CABLE}" -j \
 			     | jq -r '.port | keys[]' | cut -d/ -f-2)
 	if [ -z "$DEVLINK_DEV" ]; then
 		echo "SKIP: ${NETIFS[p1]} has no devlink device registered for it"
@@ -117,6 +117,12 @@ devlink_reload()
 
 declare -A DEVLINK_ORIG
 
+# Changing pool type from static to dynamic causes reinterpretation of threshold
+# values. They therefore need to be saved before pool type is changed, then the
+# pool type can be changed, and then the new values need to be set up. Therefore
+# instead of saving the current state implicitly in the _set call, provide
+# functions for all three primitives: save, set, and restore.
+
 devlink_port_pool_threshold()
 {
 	local port=$1; shift
@@ -126,14 +132,21 @@ devlink_port_pool_threshold()
 		| jq '.port_pool."'"$port"'"[].threshold'
 }
 
-devlink_port_pool_th_set()
+devlink_port_pool_th_save()
 {
 	local port=$1; shift
 	local pool=$1; shift
-	local th=$1; shift
 	local key="port_pool($port,$pool).threshold"
 
 	DEVLINK_ORIG[$key]=$(devlink_port_pool_threshold $port $pool)
+}
+
+devlink_port_pool_th_set()
+{
+	local port=$1; shift
+	local pool=$1; shift
+	local th=$1; shift
+
 	devlink sb port pool set $port pool $pool th $th
 }
 
@@ -142,8 +155,13 @@ devlink_port_pool_th_restore()
 	local port=$1; shift
 	local pool=$1; shift
 	local key="port_pool($port,$pool).threshold"
+	local -a orig=(${DEVLINK_ORIG[$key]})
 
-	devlink sb port pool set $port pool $pool th ${DEVLINK_ORIG[$key]}
+	if [[ -z $orig ]]; then
+		echo "WARNING: Mismatched devlink_port_pool_th_restore"
+	else
+		devlink sb port pool set $port pool $pool th $orig
+	fi
 }
 
 devlink_pool_size_thtype()
@@ -154,14 +172,20 @@ devlink_pool_size_thtype()
 	    | jq -r '.pool[][] | (.size, .thtype)'
 }
 
+devlink_pool_size_thtype_save()
+{
+	local pool=$1; shift
+	local key="pool($pool).size_thtype"
+
+	DEVLINK_ORIG[$key]=$(devlink_pool_size_thtype $pool)
+}
+
 devlink_pool_size_thtype_set()
 {
 	local pool=$1; shift
 	local thtype=$1; shift
 	local size=$1; shift
-	local key="pool($pool).size_thtype"
 
-	DEVLINK_ORIG[$key]=$(devlink_pool_size_thtype $pool)
 	devlink sb pool set "$DEVLINK_DEV" pool $pool size $size thtype $thtype
 }
 
@@ -171,8 +195,12 @@ devlink_pool_size_thtype_restore()
 	local key="pool($pool).size_thtype"
 	local -a orig=(${DEVLINK_ORIG[$key]})
 
-	devlink sb pool set "$DEVLINK_DEV" pool $pool \
-		size ${orig[0]} thtype ${orig[1]}
+	if [[ -z ${orig[0]} ]]; then
+		echo "WARNING: Mismatched devlink_pool_size_thtype_restore"
+	else
+		devlink sb pool set "$DEVLINK_DEV" pool $pool \
+			size ${orig[0]} thtype ${orig[1]}
+	fi
 }
 
 devlink_tc_bind_pool_th()
@@ -185,6 +213,16 @@ devlink_tc_bind_pool_th()
 	    | jq -r '.tc_bind[][] | (.pool, .threshold)'
 }
 
+devlink_tc_bind_pool_th_save()
+{
+	local port=$1; shift
+	local tc=$1; shift
+	local dir=$1; shift
+	local key="tc_bind($port,$dir,$tc).pool_th"
+
+	DEVLINK_ORIG[$key]=$(devlink_tc_bind_pool_th $port $tc $dir)
+}
+
 devlink_tc_bind_pool_th_set()
 {
 	local port=$1; shift
@@ -192,9 +230,7 @@ devlink_tc_bind_pool_th_set()
 	local dir=$1; shift
 	local pool=$1; shift
 	local th=$1; shift
-	local key="tc_bind($port,$dir,$tc).pool_th"
 
-	DEVLINK_ORIG[$key]=$(devlink_tc_bind_pool_th $port $tc $dir)
 	devlink sb tc bind set $port tc $tc type $dir pool $pool th $th
 }
 
@@ -206,8 +242,12 @@ devlink_tc_bind_pool_th_restore()
 	local key="tc_bind($port,$dir,$tc).pool_th"
 	local -a orig=(${DEVLINK_ORIG[$key]})
 
-	devlink sb tc bind set $port tc $tc type $dir \
-		pool ${orig[0]} th ${orig[1]}
+	if [[ -z ${orig[0]} ]]; then
+		echo "WARNING: Mismatched devlink_tc_bind_pool_th_restore"
+	else
+		devlink sb tc bind set $port tc $tc type $dir \
+			pool ${orig[0]} th ${orig[1]}
+	fi
 }
 
 devlink_traps_num_get()
@@ -509,3 +549,9 @@ devlink_cpu_port_get()
 
 	echo "$DEVLINK_DEV/$cpu_dl_port_num"
 }
+
+devlink_cell_size_get()
+{
+	devlink sb pool show "$DEVLINK_DEV" pool 0 -j \
+	    | jq '.pool[][].cell_size'
+}
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 977fc2b326a2..927f9ba49e08 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -1227,3 +1227,46 @@ stop_traffic()
 	# Suppress noise from killing mausezahn.
 	{ kill %% && wait %%; } 2>/dev/null
 }
+
+tcpdump_start()
+{
+	local if_name=$1; shift
+	local ns=$1; shift
+
+	capfile=$(mktemp)
+	capout=$(mktemp)
+
+	if [ -z $ns ]; then
+		ns_cmd=""
+	else
+		ns_cmd="ip netns exec ${ns}"
+	fi
+
+	if [ -z $SUDO_USER ] ; then
+		capuser=""
+	else
+		capuser="-Z $SUDO_USER"
+	fi
+
+	$ns_cmd tcpdump -e -n -Q in -i $if_name \
+		-s 65535 -B 32768 $capuser -w $capfile > "$capout" 2>&1 &
+	cappid=$!
+
+	sleep 1
+}
+
+tcpdump_stop()
+{
+	$ns_cmd kill $cappid
+	sleep 1
+}
+
+tcpdump_cleanup()
+{
+	rm $capfile $capout
+}
+
+tcpdump_show()
+{
+	tcpdump -e -n -r $capfile 2>&1
+}
diff --git a/tools/testing/selftests/net/forwarding/mirror_lib.sh b/tools/testing/selftests/net/forwarding/mirror_lib.sh
index c33bfd7ba214..13db1cb50e57 100644
--- a/tools/testing/selftests/net/forwarding/mirror_lib.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_lib.sh
@@ -31,7 +31,7 @@ mirror_test()
 
 	local t0=$(tc_rule_stats_get $dev $pref)
 	$MZ $vrf_name ${sip:+-A $sip} -B $dip -a own -b bc -q \
-	    -c 10 -d 100ms -t icmp type=8
+	    -c 10 -d 100msec -t icmp type=8
 	sleep 0.5
 	local t1=$(tc_rule_stats_get $dev $pref)
 	local delta=$((t1 - t0))
diff --git a/tools/testing/selftests/net/ipsec.c b/tools/testing/selftests/net/ipsec.c
new file mode 100644
index 000000000000..17ced7d6ce25
--- /dev/null
+++ b/tools/testing/selftests/net/ipsec.c
@@ -0,0 +1,2195 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ipsec.c - Check xfrm on veth inside a net-ns.
+ * Copyright (c) 2018 Dmitry Safonov
+ */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <asm/types.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <linux/limits.h>
+#include <linux/netlink.h>
+#include <linux/random.h>
+#include <linux/rtnetlink.h>
+#include <linux/veth.h>
+#include <linux/xfrm.h>
+#include <netinet/in.h>
+#include <net/if.h>
+#include <sched.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "../kselftest.h"
+
+#define printk(fmt, ...)						\
+	ksft_print_msg("%d[%u] " fmt "\n", getpid(), __LINE__, ##__VA_ARGS__)
+
+#define pr_err(fmt, ...)	printk(fmt ": %m", ##__VA_ARGS__)
+
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
+
+#define IPV4_STR_SZ	16	/* xxx.xxx.xxx.xxx is longest + \0 */
+#define MAX_PAYLOAD	2048
+#define XFRM_ALGO_KEY_BUF_SIZE	512
+#define MAX_PROCESSES	(1 << 14) /* /16 mask divided by /30 subnets */
+#define INADDR_A	((in_addr_t) 0x0a000000) /* 10.0.0.0 */
+#define INADDR_B	((in_addr_t) 0xc0a80000) /* 192.168.0.0 */
+
+/* /30 mask for one veth connection */
+#define PREFIX_LEN	30
+#define child_ip(nr)	(4*nr + 1)
+#define grchild_ip(nr)	(4*nr + 2)
+
+#define VETH_FMT	"ktst-%d"
+#define VETH_LEN	12
+
+static int nsfd_parent	= -1;
+static int nsfd_childa	= -1;
+static int nsfd_childb	= -1;
+static long page_size;
+
+/*
+ * ksft_cnt is static in kselftest, so isn't shared with children.
+ * We have to send a test result back to parent and count there.
+ * results_fd is a pipe with test feedback from children.
+ */
+static int results_fd[2];
+
+const unsigned int ping_delay_nsec	= 50 * 1000 * 1000;
+const unsigned int ping_timeout		= 300;
+const unsigned int ping_count		= 100;
+const unsigned int ping_success		= 80;
+
+static void randomize_buffer(void *buf, size_t buflen)
+{
+	int *p = (int *)buf;
+	size_t words = buflen / sizeof(int);
+	size_t leftover = buflen % sizeof(int);
+
+	if (!buflen)
+		return;
+
+	while (words--)
+		*p++ = rand();
+
+	if (leftover) {
+		int tmp = rand();
+
+		memcpy(buf + buflen - leftover, &tmp, leftover);
+	}
+
+	return;
+}
+
+static int unshare_open(void)
+{
+	const char *netns_path = "/proc/self/ns/net";
+	int fd;
+
+	if (unshare(CLONE_NEWNET) != 0) {
+		pr_err("unshare()");
+		return -1;
+	}
+
+	fd = open(netns_path, O_RDONLY);
+	if (fd <= 0) {
+		pr_err("open(%s)", netns_path);
+		return -1;
+	}
+
+	return fd;
+}
+
+static int switch_ns(int fd)
+{
+	if (setns(fd, CLONE_NEWNET)) {
+		pr_err("setns()");
+		return -1;
+	}
+	return 0;
+}
+
+/*
+ * Running the test inside a new parent net namespace to bother less
+ * about cleanup on error-path.
+ */
+static int init_namespaces(void)
+{
+	nsfd_parent = unshare_open();
+	if (nsfd_parent <= 0)
+		return -1;
+
+	nsfd_childa = unshare_open();
+	if (nsfd_childa <= 0)
+		return -1;
+
+	if (switch_ns(nsfd_parent))
+		return -1;
+
+	nsfd_childb = unshare_open();
+	if (nsfd_childb <= 0)
+		return -1;
+
+	if (switch_ns(nsfd_parent))
+		return -1;
+	return 0;
+}
+
+static int netlink_sock(int *sock, uint32_t *seq_nr, int proto)
+{
+	if (*sock > 0) {
+		seq_nr++;
+		return 0;
+	}
+
+	*sock = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, proto);
+	if (*sock <= 0) {
+		pr_err("socket(AF_NETLINK)");
+		return -1;
+	}
+
+	randomize_buffer(seq_nr, sizeof(*seq_nr));
+
+	return 0;
+}
+
+static inline struct rtattr *rtattr_hdr(struct nlmsghdr *nh)
+{
+	return (struct rtattr *)((char *)(nh) + RTA_ALIGN((nh)->nlmsg_len));
+}
+
+static int rtattr_pack(struct nlmsghdr *nh, size_t req_sz,
+		unsigned short rta_type, const void *payload, size_t size)
+{
+	/* NLMSG_ALIGNTO == RTA_ALIGNTO, nlmsg_len already aligned */
+	struct rtattr *attr = rtattr_hdr(nh);
+	size_t nl_size = RTA_ALIGN(nh->nlmsg_len) + RTA_LENGTH(size);
+
+	if (req_sz < nl_size) {
+		printk("req buf is too small: %zu < %zu", req_sz, nl_size);
+		return -1;
+	}
+	nh->nlmsg_len = nl_size;
+
+	attr->rta_len = RTA_LENGTH(size);
+	attr->rta_type = rta_type;
+	memcpy(RTA_DATA(attr), payload, size);
+
+	return 0;
+}
+
+static struct rtattr *_rtattr_begin(struct nlmsghdr *nh, size_t req_sz,
+		unsigned short rta_type, const void *payload, size_t size)
+{
+	struct rtattr *ret = rtattr_hdr(nh);
+
+	if (rtattr_pack(nh, req_sz, rta_type, payload, size))
+		return 0;
+
+	return ret;
+}
+
+static inline struct rtattr *rtattr_begin(struct nlmsghdr *nh, size_t req_sz,
+		unsigned short rta_type)
+{
+	return _rtattr_begin(nh, req_sz, rta_type, 0, 0);
+}
+
+static inline void rtattr_end(struct nlmsghdr *nh, struct rtattr *attr)
+{
+	char *nlmsg_end = (char *)nh + nh->nlmsg_len;
+
+	attr->rta_len = nlmsg_end - (char *)attr;
+}
+
+static int veth_pack_peerb(struct nlmsghdr *nh, size_t req_sz,
+		const char *peer, int ns)
+{
+	struct ifinfomsg pi;
+	struct rtattr *peer_attr;
+
+	memset(&pi, 0, sizeof(pi));
+	pi.ifi_family	= AF_UNSPEC;
+	pi.ifi_change	= 0xFFFFFFFF;
+
+	peer_attr = _rtattr_begin(nh, req_sz, VETH_INFO_PEER, &pi, sizeof(pi));
+	if (!peer_attr)
+		return -1;
+
+	if (rtattr_pack(nh, req_sz, IFLA_IFNAME, peer, strlen(peer)))
+		return -1;
+
+	if (rtattr_pack(nh, req_sz, IFLA_NET_NS_FD, &ns, sizeof(ns)))
+		return -1;
+
+	rtattr_end(nh, peer_attr);
+
+	return 0;
+}
+
+static int netlink_check_answer(int sock)
+{
+	struct nlmsgerror {
+		struct nlmsghdr hdr;
+		int error;
+		struct nlmsghdr orig_msg;
+	} answer;
+
+	if (recv(sock, &answer, sizeof(answer), 0) < 0) {
+		pr_err("recv()");
+		return -1;
+	} else if (answer.hdr.nlmsg_type != NLMSG_ERROR) {
+		printk("expected NLMSG_ERROR, got %d", (int)answer.hdr.nlmsg_type);
+		return -1;
+	} else if (answer.error) {
+		printk("NLMSG_ERROR: %d: %s",
+			answer.error, strerror(-answer.error));
+		return answer.error;
+	}
+
+	return 0;
+}
+
+static int veth_add(int sock, uint32_t seq, const char *peera, int ns_a,
+		const char *peerb, int ns_b)
+{
+	uint16_t flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE;
+	struct {
+		struct nlmsghdr		nh;
+		struct ifinfomsg	info;
+		char			attrbuf[MAX_PAYLOAD];
+	} req;
+	const char veth_type[] = "veth";
+	struct rtattr *link_info, *info_data;
+
+	memset(&req, 0, sizeof(req));
+	req.nh.nlmsg_len	= NLMSG_LENGTH(sizeof(req.info));
+	req.nh.nlmsg_type	= RTM_NEWLINK;
+	req.nh.nlmsg_flags	= flags;
+	req.nh.nlmsg_seq	= seq;
+	req.info.ifi_family	= AF_UNSPEC;
+	req.info.ifi_change	= 0xFFFFFFFF;
+
+	if (rtattr_pack(&req.nh, sizeof(req), IFLA_IFNAME, peera, strlen(peera)))
+		return -1;
+
+	if (rtattr_pack(&req.nh, sizeof(req), IFLA_NET_NS_FD, &ns_a, sizeof(ns_a)))
+		return -1;
+
+	link_info = rtattr_begin(&req.nh, sizeof(req), IFLA_LINKINFO);
+	if (!link_info)
+		return -1;
+
+	if (rtattr_pack(&req.nh, sizeof(req), IFLA_INFO_KIND, veth_type, sizeof(veth_type)))
+		return -1;
+
+	info_data = rtattr_begin(&req.nh, sizeof(req), IFLA_INFO_DATA);
+	if (!info_data)
+		return -1;
+
+	if (veth_pack_peerb(&req.nh, sizeof(req), peerb, ns_b))
+		return -1;
+
+	rtattr_end(&req.nh, info_data);
+	rtattr_end(&req.nh, link_info);
+
+	if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
+		pr_err("send()");
+		return -1;
+	}
+	return netlink_check_answer(sock);
+}
+
+static int ip4_addr_set(int sock, uint32_t seq, const char *intf,
+		struct in_addr addr, uint8_t prefix)
+{
+	uint16_t flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE;
+	struct {
+		struct nlmsghdr		nh;
+		struct ifaddrmsg	info;
+		char			attrbuf[MAX_PAYLOAD];
+	} req;
+
+	memset(&req, 0, sizeof(req));
+	req.nh.nlmsg_len	= NLMSG_LENGTH(sizeof(req.info));
+	req.nh.nlmsg_type	= RTM_NEWADDR;
+	req.nh.nlmsg_flags	= flags;
+	req.nh.nlmsg_seq	= seq;
+	req.info.ifa_family	= AF_INET;
+	req.info.ifa_prefixlen	= prefix;
+	req.info.ifa_index	= if_nametoindex(intf);
+
+#ifdef DEBUG
+	{
+		char addr_str[IPV4_STR_SZ] = {};
+
+		strncpy(addr_str, inet_ntoa(addr), IPV4_STR_SZ - 1);
+
+		printk("ip addr set %s", addr_str);
+	}
+#endif
+
+	if (rtattr_pack(&req.nh, sizeof(req), IFA_LOCAL, &addr, sizeof(addr)))
+		return -1;
+
+	if (rtattr_pack(&req.nh, sizeof(req), IFA_ADDRESS, &addr, sizeof(addr)))
+		return -1;
+
+	if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
+		pr_err("send()");
+		return -1;
+	}
+	return netlink_check_answer(sock);
+}
+
+static int link_set_up(int sock, uint32_t seq, const char *intf)
+{
+	struct {
+		struct nlmsghdr		nh;
+		struct ifinfomsg	info;
+		char			attrbuf[MAX_PAYLOAD];
+	} req;
+
+	memset(&req, 0, sizeof(req));
+	req.nh.nlmsg_len	= NLMSG_LENGTH(sizeof(req.info));
+	req.nh.nlmsg_type	= RTM_NEWLINK;
+	req.nh.nlmsg_flags	= NLM_F_REQUEST | NLM_F_ACK;
+	req.nh.nlmsg_seq	= seq;
+	req.info.ifi_family	= AF_UNSPEC;
+	req.info.ifi_change	= 0xFFFFFFFF;
+	req.info.ifi_index	= if_nametoindex(intf);
+	req.info.ifi_flags	= IFF_UP;
+	req.info.ifi_change	= IFF_UP;
+
+	if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
+		pr_err("send()");
+		return -1;
+	}
+	return netlink_check_answer(sock);
+}
+
+static int ip4_route_set(int sock, uint32_t seq, const char *intf,
+		struct in_addr src, struct in_addr dst)
+{
+	struct {
+		struct nlmsghdr	nh;
+		struct rtmsg	rt;
+		char		attrbuf[MAX_PAYLOAD];
+	} req;
+	unsigned int index = if_nametoindex(intf);
+
+	memset(&req, 0, sizeof(req));
+	req.nh.nlmsg_len	= NLMSG_LENGTH(sizeof(req.rt));
+	req.nh.nlmsg_type	= RTM_NEWROUTE;
+	req.nh.nlmsg_flags	= NLM_F_REQUEST | NLM_F_ACK | NLM_F_CREATE;
+	req.nh.nlmsg_seq	= seq;
+	req.rt.rtm_family	= AF_INET;
+	req.rt.rtm_dst_len	= 32;
+	req.rt.rtm_table	= RT_TABLE_MAIN;
+	req.rt.rtm_protocol	= RTPROT_BOOT;
+	req.rt.rtm_scope	= RT_SCOPE_LINK;
+	req.rt.rtm_type		= RTN_UNICAST;
+
+	if (rtattr_pack(&req.nh, sizeof(req), RTA_DST, &dst, sizeof(dst)))
+		return -1;
+
+	if (rtattr_pack(&req.nh, sizeof(req), RTA_PREFSRC, &src, sizeof(src)))
+		return -1;
+
+	if (rtattr_pack(&req.nh, sizeof(req), RTA_OIF, &index, sizeof(index)))
+		return -1;
+
+	if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
+		pr_err("send()");
+		return -1;
+	}
+
+	return netlink_check_answer(sock);
+}
+
+static int tunnel_set_route(int route_sock, uint32_t *route_seq, char *veth,
+		struct in_addr tunsrc, struct in_addr tundst)
+{
+	if (ip4_addr_set(route_sock, (*route_seq)++, "lo",
+			tunsrc, PREFIX_LEN)) {
+		printk("Failed to set ipv4 addr");
+		return -1;
+	}
+
+	if (ip4_route_set(route_sock, (*route_seq)++, veth, tunsrc, tundst)) {
+		printk("Failed to set ipv4 route");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int init_child(int nsfd, char *veth, unsigned int src, unsigned int dst)
+{
+	struct in_addr intsrc = inet_makeaddr(INADDR_B, src);
+	struct in_addr tunsrc = inet_makeaddr(INADDR_A, src);
+	struct in_addr tundst = inet_makeaddr(INADDR_A, dst);
+	int route_sock = -1, ret = -1;
+	uint32_t route_seq;
+
+	if (switch_ns(nsfd))
+		return -1;
+
+	if (netlink_sock(&route_sock, &route_seq, NETLINK_ROUTE)) {
+		printk("Failed to open netlink route socket in child");
+		return -1;
+	}
+
+	if (ip4_addr_set(route_sock, route_seq++, veth, intsrc, PREFIX_LEN)) {
+		printk("Failed to set ipv4 addr");
+		goto err;
+	}
+
+	if (link_set_up(route_sock, route_seq++, veth)) {
+		printk("Failed to bring up %s", veth);
+		goto err;
+	}
+
+	if (tunnel_set_route(route_sock, &route_seq, veth, tunsrc, tundst)) {
+		printk("Failed to add tunnel route on %s", veth);
+		goto err;
+	}
+	ret = 0;
+
+err:
+	close(route_sock);
+	return ret;
+}
+
+#define ALGO_LEN	64
+enum desc_type {
+	CREATE_TUNNEL	= 0,
+	ALLOCATE_SPI,
+	MONITOR_ACQUIRE,
+	EXPIRE_STATE,
+	EXPIRE_POLICY,
+};
+const char *desc_name[] = {
+	"create tunnel",
+	"alloc spi",
+	"monitor acquire",
+	"expire state",
+	"expire policy"
+};
+struct xfrm_desc {
+	enum desc_type	type;
+	uint8_t		proto;
+	char		a_algo[ALGO_LEN];
+	char		e_algo[ALGO_LEN];
+	char		c_algo[ALGO_LEN];
+	char		ae_algo[ALGO_LEN];
+	unsigned int	icv_len;
+	/* unsigned key_len; */
+};
+
+enum msg_type {
+	MSG_ACK		= 0,
+	MSG_EXIT,
+	MSG_PING,
+	MSG_XFRM_PREPARE,
+	MSG_XFRM_ADD,
+	MSG_XFRM_DEL,
+	MSG_XFRM_CLEANUP,
+};
+
+struct test_desc {
+	enum msg_type type;
+	union {
+		struct {
+			in_addr_t reply_ip;
+			unsigned int port;
+		} ping;
+		struct xfrm_desc xfrm_desc;
+	} body;
+};
+
+struct test_result {
+	struct xfrm_desc desc;
+	unsigned int res;
+};
+
+static void write_test_result(unsigned int res, struct xfrm_desc *d)
+{
+	struct test_result tr = {};
+	ssize_t ret;
+
+	tr.desc = *d;
+	tr.res = res;
+
+	ret = write(results_fd[1], &tr, sizeof(tr));
+	if (ret != sizeof(tr))
+		pr_err("Failed to write the result in pipe %zd", ret);
+}
+
+static void write_msg(int fd, struct test_desc *msg, bool exit_of_fail)
+{
+	ssize_t bytes = write(fd, msg, sizeof(*msg));
+
+	/* Make sure that write/read is atomic to a pipe */
+	BUILD_BUG_ON(sizeof(struct test_desc) > PIPE_BUF);
+
+	if (bytes < 0) {
+		pr_err("write()");
+		if (exit_of_fail)
+			exit(KSFT_FAIL);
+	}
+	if (bytes != sizeof(*msg)) {
+		pr_err("sent part of the message %zd/%zu", bytes, sizeof(*msg));
+		if (exit_of_fail)
+			exit(KSFT_FAIL);
+	}
+}
+
+static void read_msg(int fd, struct test_desc *msg, bool exit_of_fail)
+{
+	ssize_t bytes = read(fd, msg, sizeof(*msg));
+
+	if (bytes < 0) {
+		pr_err("read()");
+		if (exit_of_fail)
+			exit(KSFT_FAIL);
+	}
+	if (bytes != sizeof(*msg)) {
+		pr_err("got incomplete message %zd/%zu", bytes, sizeof(*msg));
+		if (exit_of_fail)
+			exit(KSFT_FAIL);
+	}
+}
+
+static int udp_ping_init(struct in_addr listen_ip, unsigned int u_timeout,
+		unsigned int *server_port, int sock[2])
+{
+	struct sockaddr_in server;
+	struct timeval t = { .tv_sec = 0, .tv_usec = u_timeout };
+	socklen_t s_len = sizeof(server);
+
+	sock[0] = socket(AF_INET, SOCK_DGRAM, 0);
+	if (sock[0] < 0) {
+		pr_err("socket()");
+		return -1;
+	}
+
+	server.sin_family	= AF_INET;
+	server.sin_port		= 0;
+	memcpy(&server.sin_addr.s_addr, &listen_ip, sizeof(struct in_addr));
+
+	if (bind(sock[0], (struct sockaddr *)&server, s_len)) {
+		pr_err("bind()");
+		goto err_close_server;
+	}
+
+	if (getsockname(sock[0], (struct sockaddr *)&server, &s_len)) {
+		pr_err("getsockname()");
+		goto err_close_server;
+	}
+
+	*server_port = ntohs(server.sin_port);
+
+	if (setsockopt(sock[0], SOL_SOCKET, SO_RCVTIMEO, (const char *)&t, sizeof t)) {
+		pr_err("setsockopt()");
+		goto err_close_server;
+	}
+
+	sock[1] = socket(AF_INET, SOCK_DGRAM, 0);
+	if (sock[1] < 0) {
+		pr_err("socket()");
+		goto err_close_server;
+	}
+
+	return 0;
+
+err_close_server:
+	close(sock[0]);
+	return -1;
+}
+
+static int udp_ping_send(int sock[2], in_addr_t dest_ip, unsigned int port,
+		char *buf, size_t buf_len)
+{
+	struct sockaddr_in server;
+	const struct sockaddr *dest_addr = (struct sockaddr *)&server;
+	char *sock_buf[buf_len];
+	ssize_t r_bytes, s_bytes;
+
+	server.sin_family	= AF_INET;
+	server.sin_port		= htons(port);
+	server.sin_addr.s_addr	= dest_ip;
+
+	s_bytes = sendto(sock[1], buf, buf_len, 0, dest_addr, sizeof(server));
+	if (s_bytes < 0) {
+		pr_err("sendto()");
+		return -1;
+	} else if (s_bytes != buf_len) {
+		printk("send part of the message: %zd/%zu", s_bytes, sizeof(server));
+		return -1;
+	}
+
+	r_bytes = recv(sock[0], sock_buf, buf_len, 0);
+	if (r_bytes < 0) {
+		if (errno != EAGAIN)
+			pr_err("recv()");
+		return -1;
+	} else if (r_bytes == 0) { /* EOF */
+		printk("EOF on reply to ping");
+		return -1;
+	} else if (r_bytes != buf_len || memcmp(buf, sock_buf, buf_len)) {
+		printk("ping reply packet is corrupted %zd/%zu", r_bytes, buf_len);
+		return -1;
+	}
+
+	return 0;
+}
+
+static int udp_ping_reply(int sock[2], in_addr_t dest_ip, unsigned int port,
+		char *buf, size_t buf_len)
+{
+	struct sockaddr_in server;
+	const struct sockaddr *dest_addr = (struct sockaddr *)&server;
+	char *sock_buf[buf_len];
+	ssize_t r_bytes, s_bytes;
+
+	server.sin_family	= AF_INET;
+	server.sin_port		= htons(port);
+	server.sin_addr.s_addr	= dest_ip;
+
+	r_bytes = recv(sock[0], sock_buf, buf_len, 0);
+	if (r_bytes < 0) {
+		if (errno != EAGAIN)
+			pr_err("recv()");
+		return -1;
+	}
+	if (r_bytes == 0) { /* EOF */
+		printk("EOF on reply to ping");
+		return -1;
+	}
+	if (r_bytes != buf_len || memcmp(buf, sock_buf, buf_len)) {
+		printk("ping reply packet is corrupted %zd/%zu", r_bytes, buf_len);
+		return -1;
+	}
+
+	s_bytes = sendto(sock[1], buf, buf_len, 0, dest_addr, sizeof(server));
+	if (s_bytes < 0) {
+		pr_err("sendto()");
+		return -1;
+	} else if (s_bytes != buf_len) {
+		printk("send part of the message: %zd/%zu", s_bytes, sizeof(server));
+		return -1;
+	}
+
+	return 0;
+}
+
+typedef int (*ping_f)(int sock[2], in_addr_t dest_ip, unsigned int port,
+		char *buf, size_t buf_len);
+static int do_ping(int cmd_fd, char *buf, size_t buf_len, struct in_addr from,
+		bool init_side, int d_port, in_addr_t to, ping_f func)
+{
+	struct test_desc msg;
+	unsigned int s_port, i, ping_succeeded = 0;
+	int ping_sock[2];
+	char to_str[IPV4_STR_SZ] = {}, from_str[IPV4_STR_SZ] = {};
+
+	if (udp_ping_init(from, ping_timeout, &s_port, ping_sock)) {
+		printk("Failed to init ping");
+		return -1;
+	}
+
+	memset(&msg, 0, sizeof(msg));
+	msg.type		= MSG_PING;
+	msg.body.ping.port	= s_port;
+	memcpy(&msg.body.ping.reply_ip, &from, sizeof(from));
+
+	write_msg(cmd_fd, &msg, 0);
+	if (init_side) {
+		/* The other end sends ip to ping */
+		read_msg(cmd_fd, &msg, 0);
+		if (msg.type != MSG_PING)
+			return -1;
+		to = msg.body.ping.reply_ip;
+		d_port = msg.body.ping.port;
+	}
+
+	for (i = 0; i < ping_count ; i++) {
+		struct timespec sleep_time = {
+			.tv_sec = 0,
+			.tv_nsec = ping_delay_nsec,
+		};
+
+		ping_succeeded += !func(ping_sock, to, d_port, buf, page_size);
+		nanosleep(&sleep_time, 0);
+	}
+
+	close(ping_sock[0]);
+	close(ping_sock[1]);
+
+	strncpy(to_str, inet_ntoa(*(struct in_addr *)&to), IPV4_STR_SZ - 1);
+	strncpy(from_str, inet_ntoa(from), IPV4_STR_SZ - 1);
+
+	if (ping_succeeded < ping_success) {
+		printk("ping (%s) %s->%s failed %u/%u times",
+			init_side ? "send" : "reply", from_str, to_str,
+			ping_count - ping_succeeded, ping_count);
+		return -1;
+	}
+
+#ifdef DEBUG
+	printk("ping (%s) %s->%s succeeded %u/%u times",
+		init_side ? "send" : "reply", from_str, to_str,
+		ping_succeeded, ping_count);
+#endif
+
+	return 0;
+}
+
+static int xfrm_fill_key(char *name, char *buf,
+		size_t buf_len, unsigned int *key_len)
+{
+	/* TODO: use set/map instead */
+	if (strncmp(name, "digest_null", ALGO_LEN) == 0)
+		*key_len = 0;
+	else if (strncmp(name, "ecb(cipher_null)", ALGO_LEN) == 0)
+		*key_len = 0;
+	else if (strncmp(name, "cbc(des)", ALGO_LEN) == 0)
+		*key_len = 64;
+	else if (strncmp(name, "hmac(md5)", ALGO_LEN) == 0)
+		*key_len = 128;
+	else if (strncmp(name, "cmac(aes)", ALGO_LEN) == 0)
+		*key_len = 128;
+	else if (strncmp(name, "xcbc(aes)", ALGO_LEN) == 0)
+		*key_len = 128;
+	else if (strncmp(name, "cbc(cast5)", ALGO_LEN) == 0)
+		*key_len = 128;
+	else if (strncmp(name, "cbc(serpent)", ALGO_LEN) == 0)
+		*key_len = 128;
+	else if (strncmp(name, "hmac(sha1)", ALGO_LEN) == 0)
+		*key_len = 160;
+	else if (strncmp(name, "hmac(rmd160)", ALGO_LEN) == 0)
+		*key_len = 160;
+	else if (strncmp(name, "cbc(des3_ede)", ALGO_LEN) == 0)
+		*key_len = 192;
+	else if (strncmp(name, "hmac(sha256)", ALGO_LEN) == 0)
+		*key_len = 256;
+	else if (strncmp(name, "cbc(aes)", ALGO_LEN) == 0)
+		*key_len = 256;
+	else if (strncmp(name, "cbc(camellia)", ALGO_LEN) == 0)
+		*key_len = 256;
+	else if (strncmp(name, "cbc(twofish)", ALGO_LEN) == 0)
+		*key_len = 256;
+	else if (strncmp(name, "rfc3686(ctr(aes))", ALGO_LEN) == 0)
+		*key_len = 288;
+	else if (strncmp(name, "hmac(sha384)", ALGO_LEN) == 0)
+		*key_len = 384;
+	else if (strncmp(name, "cbc(blowfish)", ALGO_LEN) == 0)
+		*key_len = 448;
+	else if (strncmp(name, "hmac(sha512)", ALGO_LEN) == 0)
+		*key_len = 512;
+	else if (strncmp(name, "rfc4106(gcm(aes))-128", ALGO_LEN) == 0)
+		*key_len = 160;
+	else if (strncmp(name, "rfc4543(gcm(aes))-128", ALGO_LEN) == 0)
+		*key_len = 160;
+	else if (strncmp(name, "rfc4309(ccm(aes))-128", ALGO_LEN) == 0)
+		*key_len = 152;
+	else if (strncmp(name, "rfc4106(gcm(aes))-192", ALGO_LEN) == 0)
+		*key_len = 224;
+	else if (strncmp(name, "rfc4543(gcm(aes))-192", ALGO_LEN) == 0)
+		*key_len = 224;
+	else if (strncmp(name, "rfc4309(ccm(aes))-192", ALGO_LEN) == 0)
+		*key_len = 216;
+	else if (strncmp(name, "rfc4106(gcm(aes))-256", ALGO_LEN) == 0)
+		*key_len = 288;
+	else if (strncmp(name, "rfc4543(gcm(aes))-256", ALGO_LEN) == 0)
+		*key_len = 288;
+	else if (strncmp(name, "rfc4309(ccm(aes))-256", ALGO_LEN) == 0)
+		*key_len = 280;
+	else if (strncmp(name, "rfc7539(chacha20,poly1305)-128", ALGO_LEN) == 0)
+		*key_len = 0;
+
+	if (*key_len > buf_len) {
+		printk("Can't pack a key - too big for buffer");
+		return -1;
+	}
+
+	randomize_buffer(buf, *key_len);
+
+	return 0;
+}
+
+static int xfrm_state_pack_algo(struct nlmsghdr *nh, size_t req_sz,
+		struct xfrm_desc *desc)
+{
+	struct {
+		union {
+			struct xfrm_algo	alg;
+			struct xfrm_algo_aead	aead;
+			struct xfrm_algo_auth	auth;
+		} u;
+		char buf[XFRM_ALGO_KEY_BUF_SIZE];
+	} alg = {};
+	size_t alen, elen, clen, aelen;
+	unsigned short type;
+
+	alen = strlen(desc->a_algo);
+	elen = strlen(desc->e_algo);
+	clen = strlen(desc->c_algo);
+	aelen = strlen(desc->ae_algo);
+
+	/* Verify desc */
+	switch (desc->proto) {
+	case IPPROTO_AH:
+		if (!alen || elen || clen || aelen) {
+			printk("BUG: buggy ah desc");
+			return -1;
+		}
+		strncpy(alg.u.alg.alg_name, desc->a_algo, ALGO_LEN - 1);
+		if (xfrm_fill_key(desc->a_algo, alg.u.alg.alg_key,
+				sizeof(alg.buf), &alg.u.alg.alg_key_len))
+			return -1;
+		type = XFRMA_ALG_AUTH;
+		break;
+	case IPPROTO_COMP:
+		if (!clen || elen || alen || aelen) {
+			printk("BUG: buggy comp desc");
+			return -1;
+		}
+		strncpy(alg.u.alg.alg_name, desc->c_algo, ALGO_LEN - 1);
+		if (xfrm_fill_key(desc->c_algo, alg.u.alg.alg_key,
+				sizeof(alg.buf), &alg.u.alg.alg_key_len))
+			return -1;
+		type = XFRMA_ALG_COMP;
+		break;
+	case IPPROTO_ESP:
+		if (!((alen && elen) ^ aelen) || clen) {
+			printk("BUG: buggy esp desc");
+			return -1;
+		}
+		if (aelen) {
+			alg.u.aead.alg_icv_len = desc->icv_len;
+			strncpy(alg.u.aead.alg_name, desc->ae_algo, ALGO_LEN - 1);
+			if (xfrm_fill_key(desc->ae_algo, alg.u.aead.alg_key,
+						sizeof(alg.buf), &alg.u.aead.alg_key_len))
+				return -1;
+			type = XFRMA_ALG_AEAD;
+		} else {
+
+			strncpy(alg.u.alg.alg_name, desc->e_algo, ALGO_LEN - 1);
+			type = XFRMA_ALG_CRYPT;
+			if (xfrm_fill_key(desc->e_algo, alg.u.alg.alg_key,
+						sizeof(alg.buf), &alg.u.alg.alg_key_len))
+				return -1;
+			if (rtattr_pack(nh, req_sz, type, &alg, sizeof(alg)))
+				return -1;
+
+			strncpy(alg.u.alg.alg_name, desc->a_algo, ALGO_LEN);
+			type = XFRMA_ALG_AUTH;
+			if (xfrm_fill_key(desc->a_algo, alg.u.alg.alg_key,
+						sizeof(alg.buf), &alg.u.alg.alg_key_len))
+				return -1;
+		}
+		break;
+	default:
+		printk("BUG: unknown proto in desc");
+		return -1;
+	}
+
+	if (rtattr_pack(nh, req_sz, type, &alg, sizeof(alg)))
+		return -1;
+
+	return 0;
+}
+
+static inline uint32_t gen_spi(struct in_addr src)
+{
+	return htonl(inet_lnaof(src));
+}
+
+static int xfrm_state_add(int xfrm_sock, uint32_t seq, uint32_t spi,
+		struct in_addr src, struct in_addr dst,
+		struct xfrm_desc *desc)
+{
+	struct {
+		struct nlmsghdr		nh;
+		struct xfrm_usersa_info	info;
+		char			attrbuf[MAX_PAYLOAD];
+	} req;
+
+	memset(&req, 0, sizeof(req));
+	req.nh.nlmsg_len	= NLMSG_LENGTH(sizeof(req.info));
+	req.nh.nlmsg_type	= XFRM_MSG_NEWSA;
+	req.nh.nlmsg_flags	= NLM_F_REQUEST | NLM_F_ACK;
+	req.nh.nlmsg_seq	= seq;
+
+	/* Fill selector. */
+	memcpy(&req.info.sel.daddr, &dst, sizeof(dst));
+	memcpy(&req.info.sel.saddr, &src, sizeof(src));
+	req.info.sel.family		= AF_INET;
+	req.info.sel.prefixlen_d	= PREFIX_LEN;
+	req.info.sel.prefixlen_s	= PREFIX_LEN;
+
+	/* Fill id */
+	memcpy(&req.info.id.daddr, &dst, sizeof(dst));
+	/* Note: zero-spi cannot be deleted */
+	req.info.id.spi = spi;
+	req.info.id.proto	= desc->proto;
+
+	memcpy(&req.info.saddr, &src, sizeof(src));
+
+	/* Fill lifteme_cfg */
+	req.info.lft.soft_byte_limit	= XFRM_INF;
+	req.info.lft.hard_byte_limit	= XFRM_INF;
+	req.info.lft.soft_packet_limit	= XFRM_INF;
+	req.info.lft.hard_packet_limit	= XFRM_INF;
+
+	req.info.family		= AF_INET;
+	req.info.mode		= XFRM_MODE_TUNNEL;
+
+	if (xfrm_state_pack_algo(&req.nh, sizeof(req), desc))
+		return -1;
+
+	if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) {
+		pr_err("send()");
+		return -1;
+	}
+
+	return netlink_check_answer(xfrm_sock);
+}
+
+static bool xfrm_usersa_found(struct xfrm_usersa_info *info, uint32_t spi,
+		struct in_addr src, struct in_addr dst,
+		struct xfrm_desc *desc)
+{
+	if (memcmp(&info->sel.daddr, &dst, sizeof(dst)))
+		return false;
+
+	if (memcmp(&info->sel.saddr, &src, sizeof(src)))
+		return false;
+
+	if (info->sel.family != AF_INET					||
+			info->sel.prefixlen_d != PREFIX_LEN		||
+			info->sel.prefixlen_s != PREFIX_LEN)
+		return false;
+
+	if (info->id.spi != spi || info->id.proto != desc->proto)
+		return false;
+
+	if (memcmp(&info->id.daddr, &dst, sizeof(dst)))
+		return false;
+
+	if (memcmp(&info->saddr, &src, sizeof(src)))
+		return false;
+
+	if (info->lft.soft_byte_limit != XFRM_INF			||
+			info->lft.hard_byte_limit != XFRM_INF		||
+			info->lft.soft_packet_limit != XFRM_INF		||
+			info->lft.hard_packet_limit != XFRM_INF)
+		return false;
+
+	if (info->family != AF_INET || info->mode != XFRM_MODE_TUNNEL)
+		return false;
+
+	/* XXX: check xfrm algo, see xfrm_state_pack_algo(). */
+
+	return true;
+}
+
+static int xfrm_state_check(int xfrm_sock, uint32_t seq, uint32_t spi,
+		struct in_addr src, struct in_addr dst,
+		struct xfrm_desc *desc)
+{
+	struct {
+		struct nlmsghdr		nh;
+		char			attrbuf[MAX_PAYLOAD];
+	} req;
+	struct {
+		struct nlmsghdr		nh;
+		union {
+			struct xfrm_usersa_info	info;
+			int error;
+		};
+		char			attrbuf[MAX_PAYLOAD];
+	} answer;
+	struct xfrm_address_filter filter = {};
+	bool found = false;
+
+
+	memset(&req, 0, sizeof(req));
+	req.nh.nlmsg_len	= NLMSG_LENGTH(0);
+	req.nh.nlmsg_type	= XFRM_MSG_GETSA;
+	req.nh.nlmsg_flags	= NLM_F_REQUEST | NLM_F_DUMP;
+	req.nh.nlmsg_seq	= seq;
+
+	/*
+	 * Add dump filter by source address as there may be other tunnels
+	 * in this netns (if tests run in parallel).
+	 */
+	filter.family = AF_INET;
+	filter.splen = 0x1f;	/* 0xffffffff mask see addr_match() */
+	memcpy(&filter.saddr, &src, sizeof(src));
+	if (rtattr_pack(&req.nh, sizeof(req), XFRMA_ADDRESS_FILTER,
+				&filter, sizeof(filter)))
+		return -1;
+
+	if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) {
+		pr_err("send()");
+		return -1;
+	}
+
+	while (1) {
+		if (recv(xfrm_sock, &answer, sizeof(answer), 0) < 0) {
+			pr_err("recv()");
+			return -1;
+		}
+		if (answer.nh.nlmsg_type == NLMSG_ERROR) {
+			printk("NLMSG_ERROR: %d: %s",
+				answer.error, strerror(-answer.error));
+			return -1;
+		} else if (answer.nh.nlmsg_type == NLMSG_DONE) {
+			if (found)
+				return 0;
+			printk("didn't find allocated xfrm state in dump");
+			return -1;
+		} else if (answer.nh.nlmsg_type == XFRM_MSG_NEWSA) {
+			if (xfrm_usersa_found(&answer.info, spi, src, dst, desc))
+				found = true;
+		}
+	}
+}
+
+static int xfrm_set(int xfrm_sock, uint32_t *seq,
+		struct in_addr src, struct in_addr dst,
+		struct in_addr tunsrc, struct in_addr tundst,
+		struct xfrm_desc *desc)
+{
+	int err;
+
+	err = xfrm_state_add(xfrm_sock, (*seq)++, gen_spi(src), src, dst, desc);
+	if (err) {
+		printk("Failed to add xfrm state");
+		return -1;
+	}
+
+	err = xfrm_state_add(xfrm_sock, (*seq)++, gen_spi(src), dst, src, desc);
+	if (err) {
+		printk("Failed to add xfrm state");
+		return -1;
+	}
+
+	/* Check dumps for XFRM_MSG_GETSA */
+	err = xfrm_state_check(xfrm_sock, (*seq)++, gen_spi(src), src, dst, desc);
+	err |= xfrm_state_check(xfrm_sock, (*seq)++, gen_spi(src), dst, src, desc);
+	if (err) {
+		printk("Failed to check xfrm state");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int xfrm_policy_add(int xfrm_sock, uint32_t seq, uint32_t spi,
+		struct in_addr src, struct in_addr dst, uint8_t dir,
+		struct in_addr tunsrc, struct in_addr tundst, uint8_t proto)
+{
+	struct {
+		struct nlmsghdr			nh;
+		struct xfrm_userpolicy_info	info;
+		char				attrbuf[MAX_PAYLOAD];
+	} req;
+	struct xfrm_user_tmpl tmpl;
+
+	memset(&req, 0, sizeof(req));
+	memset(&tmpl, 0, sizeof(tmpl));
+	req.nh.nlmsg_len	= NLMSG_LENGTH(sizeof(req.info));
+	req.nh.nlmsg_type	= XFRM_MSG_NEWPOLICY;
+	req.nh.nlmsg_flags	= NLM_F_REQUEST | NLM_F_ACK;
+	req.nh.nlmsg_seq	= seq;
+
+	/* Fill selector. */
+	memcpy(&req.info.sel.daddr, &dst, sizeof(tundst));
+	memcpy(&req.info.sel.saddr, &src, sizeof(tunsrc));
+	req.info.sel.family		= AF_INET;
+	req.info.sel.prefixlen_d	= PREFIX_LEN;
+	req.info.sel.prefixlen_s	= PREFIX_LEN;
+
+	/* Fill lifteme_cfg */
+	req.info.lft.soft_byte_limit	= XFRM_INF;
+	req.info.lft.hard_byte_limit	= XFRM_INF;
+	req.info.lft.soft_packet_limit	= XFRM_INF;
+	req.info.lft.hard_packet_limit	= XFRM_INF;
+
+	req.info.dir = dir;
+
+	/* Fill tmpl */
+	memcpy(&tmpl.id.daddr, &dst, sizeof(dst));
+	/* Note: zero-spi cannot be deleted */
+	tmpl.id.spi = spi;
+	tmpl.id.proto	= proto;
+	tmpl.family	= AF_INET;
+	memcpy(&tmpl.saddr, &src, sizeof(src));
+	tmpl.mode	= XFRM_MODE_TUNNEL;
+	tmpl.aalgos = (~(uint32_t)0);
+	tmpl.ealgos = (~(uint32_t)0);
+	tmpl.calgos = (~(uint32_t)0);
+
+	if (rtattr_pack(&req.nh, sizeof(req), XFRMA_TMPL, &tmpl, sizeof(tmpl)))
+		return -1;
+
+	if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) {
+		pr_err("send()");
+		return -1;
+	}
+
+	return netlink_check_answer(xfrm_sock);
+}
+
+static int xfrm_prepare(int xfrm_sock, uint32_t *seq,
+		struct in_addr src, struct in_addr dst,
+		struct in_addr tunsrc, struct in_addr tundst, uint8_t proto)
+{
+	if (xfrm_policy_add(xfrm_sock, (*seq)++, gen_spi(src), src, dst,
+				XFRM_POLICY_OUT, tunsrc, tundst, proto)) {
+		printk("Failed to add xfrm policy");
+		return -1;
+	}
+
+	if (xfrm_policy_add(xfrm_sock, (*seq)++, gen_spi(src), dst, src,
+				XFRM_POLICY_IN, tunsrc, tundst, proto)) {
+		printk("Failed to add xfrm policy");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int xfrm_policy_del(int xfrm_sock, uint32_t seq,
+		struct in_addr src, struct in_addr dst, uint8_t dir,
+		struct in_addr tunsrc, struct in_addr tundst)
+{
+	struct {
+		struct nlmsghdr			nh;
+		struct xfrm_userpolicy_id	id;
+		char				attrbuf[MAX_PAYLOAD];
+	} req;
+
+	memset(&req, 0, sizeof(req));
+	req.nh.nlmsg_len	= NLMSG_LENGTH(sizeof(req.id));
+	req.nh.nlmsg_type	= XFRM_MSG_DELPOLICY;
+	req.nh.nlmsg_flags	= NLM_F_REQUEST | NLM_F_ACK;
+	req.nh.nlmsg_seq	= seq;
+
+	/* Fill id */
+	memcpy(&req.id.sel.daddr, &dst, sizeof(tundst));
+	memcpy(&req.id.sel.saddr, &src, sizeof(tunsrc));
+	req.id.sel.family		= AF_INET;
+	req.id.sel.prefixlen_d		= PREFIX_LEN;
+	req.id.sel.prefixlen_s		= PREFIX_LEN;
+	req.id.dir = dir;
+
+	if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) {
+		pr_err("send()");
+		return -1;
+	}
+
+	return netlink_check_answer(xfrm_sock);
+}
+
+static int xfrm_cleanup(int xfrm_sock, uint32_t *seq,
+		struct in_addr src, struct in_addr dst,
+		struct in_addr tunsrc, struct in_addr tundst)
+{
+	if (xfrm_policy_del(xfrm_sock, (*seq)++, src, dst,
+				XFRM_POLICY_OUT, tunsrc, tundst)) {
+		printk("Failed to add xfrm policy");
+		return -1;
+	}
+
+	if (xfrm_policy_del(xfrm_sock, (*seq)++, dst, src,
+				XFRM_POLICY_IN, tunsrc, tundst)) {
+		printk("Failed to add xfrm policy");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int xfrm_state_del(int xfrm_sock, uint32_t seq, uint32_t spi,
+		struct in_addr src, struct in_addr dst, uint8_t proto)
+{
+	struct {
+		struct nlmsghdr		nh;
+		struct xfrm_usersa_id	id;
+		char			attrbuf[MAX_PAYLOAD];
+	} req;
+	xfrm_address_t saddr = {};
+
+	memset(&req, 0, sizeof(req));
+	req.nh.nlmsg_len	= NLMSG_LENGTH(sizeof(req.id));
+	req.nh.nlmsg_type	= XFRM_MSG_DELSA;
+	req.nh.nlmsg_flags	= NLM_F_REQUEST | NLM_F_ACK;
+	req.nh.nlmsg_seq	= seq;
+
+	memcpy(&req.id.daddr, &dst, sizeof(dst));
+	req.id.family		= AF_INET;
+	req.id.proto		= proto;
+	/* Note: zero-spi cannot be deleted */
+	req.id.spi = spi;
+
+	memcpy(&saddr, &src, sizeof(src));
+	if (rtattr_pack(&req.nh, sizeof(req), XFRMA_SRCADDR, &saddr, sizeof(saddr)))
+		return -1;
+
+	if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) {
+		pr_err("send()");
+		return -1;
+	}
+
+	return netlink_check_answer(xfrm_sock);
+}
+
+static int xfrm_delete(int xfrm_sock, uint32_t *seq,
+		struct in_addr src, struct in_addr dst,
+		struct in_addr tunsrc, struct in_addr tundst, uint8_t proto)
+{
+	if (xfrm_state_del(xfrm_sock, (*seq)++, gen_spi(src), src, dst, proto)) {
+		printk("Failed to remove xfrm state");
+		return -1;
+	}
+
+	if (xfrm_state_del(xfrm_sock, (*seq)++, gen_spi(src), dst, src, proto)) {
+		printk("Failed to remove xfrm state");
+		return -1;
+	}
+
+	return 0;
+}
+
+static int xfrm_state_allocspi(int xfrm_sock, uint32_t *seq,
+		uint32_t spi, uint8_t proto)
+{
+	struct {
+		struct nlmsghdr			nh;
+		struct xfrm_userspi_info	spi;
+	} req;
+	struct {
+		struct nlmsghdr			nh;
+		union {
+			struct xfrm_usersa_info	info;
+			int error;
+		};
+	} answer;
+
+	memset(&req, 0, sizeof(req));
+	req.nh.nlmsg_len	= NLMSG_LENGTH(sizeof(req.spi));
+	req.nh.nlmsg_type	= XFRM_MSG_ALLOCSPI;
+	req.nh.nlmsg_flags	= NLM_F_REQUEST;
+	req.nh.nlmsg_seq	= (*seq)++;
+
+	req.spi.info.family	= AF_INET;
+	req.spi.min		= spi;
+	req.spi.max		= spi;
+	req.spi.info.id.proto	= proto;
+
+	if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) {
+		pr_err("send()");
+		return KSFT_FAIL;
+	}
+
+	if (recv(xfrm_sock, &answer, sizeof(answer), 0) < 0) {
+		pr_err("recv()");
+		return KSFT_FAIL;
+	} else if (answer.nh.nlmsg_type == XFRM_MSG_NEWSA) {
+		uint32_t new_spi = htonl(answer.info.id.spi);
+
+		if (new_spi != spi) {
+			printk("allocated spi is different from requested: %#x != %#x",
+					new_spi, spi);
+			return KSFT_FAIL;
+		}
+		return KSFT_PASS;
+	} else if (answer.nh.nlmsg_type != NLMSG_ERROR) {
+		printk("expected NLMSG_ERROR, got %d", (int)answer.nh.nlmsg_type);
+		return KSFT_FAIL;
+	}
+
+	printk("NLMSG_ERROR: %d: %s", answer.error, strerror(-answer.error));
+	return (answer.error) ? KSFT_FAIL : KSFT_PASS;
+}
+
+static int netlink_sock_bind(int *sock, uint32_t *seq, int proto, uint32_t groups)
+{
+	struct sockaddr_nl snl = {};
+	socklen_t addr_len;
+	int ret = -1;
+
+	snl.nl_family = AF_NETLINK;
+	snl.nl_groups = groups;
+
+	if (netlink_sock(sock, seq, proto)) {
+		printk("Failed to open xfrm netlink socket");
+		return -1;
+	}
+
+	if (bind(*sock, (struct sockaddr *)&snl, sizeof(snl)) < 0) {
+		pr_err("bind()");
+		goto out_close;
+	}
+
+	addr_len = sizeof(snl);
+	if (getsockname(*sock, (struct sockaddr *)&snl, &addr_len) < 0) {
+		pr_err("getsockname()");
+		goto out_close;
+	}
+	if (addr_len != sizeof(snl)) {
+		printk("Wrong address length %d", addr_len);
+		goto out_close;
+	}
+	if (snl.nl_family != AF_NETLINK) {
+		printk("Wrong address family %d", snl.nl_family);
+		goto out_close;
+	}
+	return 0;
+
+out_close:
+	close(*sock);
+	return ret;
+}
+
+static int xfrm_monitor_acquire(int xfrm_sock, uint32_t *seq, unsigned int nr)
+{
+	struct {
+		struct nlmsghdr nh;
+		union {
+			struct xfrm_user_acquire acq;
+			int error;
+		};
+		char attrbuf[MAX_PAYLOAD];
+	} req;
+	struct xfrm_user_tmpl xfrm_tmpl = {};
+	int xfrm_listen = -1, ret = KSFT_FAIL;
+	uint32_t seq_listen;
+
+	if (netlink_sock_bind(&xfrm_listen, &seq_listen, NETLINK_XFRM, XFRMNLGRP_ACQUIRE))
+		return KSFT_FAIL;
+
+	memset(&req, 0, sizeof(req));
+	req.nh.nlmsg_len	= NLMSG_LENGTH(sizeof(req.acq));
+	req.nh.nlmsg_type	= XFRM_MSG_ACQUIRE;
+	req.nh.nlmsg_flags	= NLM_F_REQUEST | NLM_F_ACK;
+	req.nh.nlmsg_seq	= (*seq)++;
+
+	req.acq.policy.sel.family	= AF_INET;
+	req.acq.aalgos	= 0xfeed;
+	req.acq.ealgos	= 0xbaad;
+	req.acq.calgos	= 0xbabe;
+
+	xfrm_tmpl.family = AF_INET;
+	xfrm_tmpl.id.proto = IPPROTO_ESP;
+	if (rtattr_pack(&req.nh, sizeof(req), XFRMA_TMPL, &xfrm_tmpl, sizeof(xfrm_tmpl)))
+		goto out_close;
+
+	if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) {
+		pr_err("send()");
+		goto out_close;
+	}
+
+	if (recv(xfrm_sock, &req, sizeof(req), 0) < 0) {
+		pr_err("recv()");
+		goto out_close;
+	} else if (req.nh.nlmsg_type != NLMSG_ERROR) {
+		printk("expected NLMSG_ERROR, got %d", (int)req.nh.nlmsg_type);
+		goto out_close;
+	}
+
+	if (req.error) {
+		printk("NLMSG_ERROR: %d: %s", req.error, strerror(-req.error));
+		ret = req.error;
+		goto out_close;
+	}
+
+	if (recv(xfrm_listen, &req, sizeof(req), 0) < 0) {
+		pr_err("recv()");
+		goto out_close;
+	}
+
+	if (req.acq.aalgos != 0xfeed || req.acq.ealgos != 0xbaad
+			|| req.acq.calgos != 0xbabe) {
+		printk("xfrm_user_acquire has changed  %x %x %x",
+				req.acq.aalgos, req.acq.ealgos, req.acq.calgos);
+		goto out_close;
+	}
+
+	ret = KSFT_PASS;
+out_close:
+	close(xfrm_listen);
+	return ret;
+}
+
+static int xfrm_expire_state(int xfrm_sock, uint32_t *seq,
+		unsigned int nr, struct xfrm_desc *desc)
+{
+	struct {
+		struct nlmsghdr nh;
+		union {
+			struct xfrm_user_expire expire;
+			int error;
+		};
+	} req;
+	struct in_addr src, dst;
+	int xfrm_listen = -1, ret = KSFT_FAIL;
+	uint32_t seq_listen;
+
+	src = inet_makeaddr(INADDR_B, child_ip(nr));
+	dst = inet_makeaddr(INADDR_B, grchild_ip(nr));
+
+	if (xfrm_state_add(xfrm_sock, (*seq)++, gen_spi(src), src, dst, desc)) {
+		printk("Failed to add xfrm state");
+		return KSFT_FAIL;
+	}
+
+	if (netlink_sock_bind(&xfrm_listen, &seq_listen, NETLINK_XFRM, XFRMNLGRP_EXPIRE))
+		return KSFT_FAIL;
+
+	memset(&req, 0, sizeof(req));
+	req.nh.nlmsg_len	= NLMSG_LENGTH(sizeof(req.expire));
+	req.nh.nlmsg_type	= XFRM_MSG_EXPIRE;
+	req.nh.nlmsg_flags	= NLM_F_REQUEST | NLM_F_ACK;
+	req.nh.nlmsg_seq	= (*seq)++;
+
+	memcpy(&req.expire.state.id.daddr, &dst, sizeof(dst));
+	req.expire.state.id.spi		= gen_spi(src);
+	req.expire.state.id.proto	= desc->proto;
+	req.expire.state.family		= AF_INET;
+	req.expire.hard			= 0xff;
+
+	if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) {
+		pr_err("send()");
+		goto out_close;
+	}
+
+	if (recv(xfrm_sock, &req, sizeof(req), 0) < 0) {
+		pr_err("recv()");
+		goto out_close;
+	} else if (req.nh.nlmsg_type != NLMSG_ERROR) {
+		printk("expected NLMSG_ERROR, got %d", (int)req.nh.nlmsg_type);
+		goto out_close;
+	}
+
+	if (req.error) {
+		printk("NLMSG_ERROR: %d: %s", req.error, strerror(-req.error));
+		ret = req.error;
+		goto out_close;
+	}
+
+	if (recv(xfrm_listen, &req, sizeof(req), 0) < 0) {
+		pr_err("recv()");
+		goto out_close;
+	}
+
+	if (req.expire.hard != 0x1) {
+		printk("expire.hard is not set: %x", req.expire.hard);
+		goto out_close;
+	}
+
+	ret = KSFT_PASS;
+out_close:
+	close(xfrm_listen);
+	return ret;
+}
+
+static int xfrm_expire_policy(int xfrm_sock, uint32_t *seq,
+		unsigned int nr, struct xfrm_desc *desc)
+{
+	struct {
+		struct nlmsghdr nh;
+		union {
+			struct xfrm_user_polexpire expire;
+			int error;
+		};
+	} req;
+	struct in_addr src, dst, tunsrc, tundst;
+	int xfrm_listen = -1, ret = KSFT_FAIL;
+	uint32_t seq_listen;
+
+	src = inet_makeaddr(INADDR_B, child_ip(nr));
+	dst = inet_makeaddr(INADDR_B, grchild_ip(nr));
+	tunsrc = inet_makeaddr(INADDR_A, child_ip(nr));
+	tundst = inet_makeaddr(INADDR_A, grchild_ip(nr));
+
+	if (xfrm_policy_add(xfrm_sock, (*seq)++, gen_spi(src), src, dst,
+				XFRM_POLICY_OUT, tunsrc, tundst, desc->proto)) {
+		printk("Failed to add xfrm policy");
+		return KSFT_FAIL;
+	}
+
+	if (netlink_sock_bind(&xfrm_listen, &seq_listen, NETLINK_XFRM, XFRMNLGRP_EXPIRE))
+		return KSFT_FAIL;
+
+	memset(&req, 0, sizeof(req));
+	req.nh.nlmsg_len	= NLMSG_LENGTH(sizeof(req.expire));
+	req.nh.nlmsg_type	= XFRM_MSG_POLEXPIRE;
+	req.nh.nlmsg_flags	= NLM_F_REQUEST | NLM_F_ACK;
+	req.nh.nlmsg_seq	= (*seq)++;
+
+	/* Fill selector. */
+	memcpy(&req.expire.pol.sel.daddr, &dst, sizeof(tundst));
+	memcpy(&req.expire.pol.sel.saddr, &src, sizeof(tunsrc));
+	req.expire.pol.sel.family	= AF_INET;
+	req.expire.pol.sel.prefixlen_d	= PREFIX_LEN;
+	req.expire.pol.sel.prefixlen_s	= PREFIX_LEN;
+	req.expire.pol.dir		= XFRM_POLICY_OUT;
+	req.expire.hard			= 0xff;
+
+	if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) {
+		pr_err("send()");
+		goto out_close;
+	}
+
+	if (recv(xfrm_sock, &req, sizeof(req), 0) < 0) {
+		pr_err("recv()");
+		goto out_close;
+	} else if (req.nh.nlmsg_type != NLMSG_ERROR) {
+		printk("expected NLMSG_ERROR, got %d", (int)req.nh.nlmsg_type);
+		goto out_close;
+	}
+
+	if (req.error) {
+		printk("NLMSG_ERROR: %d: %s", req.error, strerror(-req.error));
+		ret = req.error;
+		goto out_close;
+	}
+
+	if (recv(xfrm_listen, &req, sizeof(req), 0) < 0) {
+		pr_err("recv()");
+		goto out_close;
+	}
+
+	if (req.expire.hard != 0x1) {
+		printk("expire.hard is not set: %x", req.expire.hard);
+		goto out_close;
+	}
+
+	ret = KSFT_PASS;
+out_close:
+	close(xfrm_listen);
+	return ret;
+}
+
+static int child_serv(int xfrm_sock, uint32_t *seq,
+		unsigned int nr, int cmd_fd, void *buf, struct xfrm_desc *desc)
+{
+	struct in_addr src, dst, tunsrc, tundst;
+	struct test_desc msg;
+	int ret = KSFT_FAIL;
+
+	src = inet_makeaddr(INADDR_B, child_ip(nr));
+	dst = inet_makeaddr(INADDR_B, grchild_ip(nr));
+	tunsrc = inet_makeaddr(INADDR_A, child_ip(nr));
+	tundst = inet_makeaddr(INADDR_A, grchild_ip(nr));
+
+	/* UDP pinging without xfrm */
+	if (do_ping(cmd_fd, buf, page_size, src, true, 0, 0, udp_ping_send)) {
+		printk("ping failed before setting xfrm");
+		return KSFT_FAIL;
+	}
+
+	memset(&msg, 0, sizeof(msg));
+	msg.type = MSG_XFRM_PREPARE;
+	memcpy(&msg.body.xfrm_desc, desc, sizeof(*desc));
+	write_msg(cmd_fd, &msg, 1);
+
+	if (xfrm_prepare(xfrm_sock, seq, src, dst, tunsrc, tundst, desc->proto)) {
+		printk("failed to prepare xfrm");
+		goto cleanup;
+	}
+
+	memset(&msg, 0, sizeof(msg));
+	msg.type = MSG_XFRM_ADD;
+	memcpy(&msg.body.xfrm_desc, desc, sizeof(*desc));
+	write_msg(cmd_fd, &msg, 1);
+	if (xfrm_set(xfrm_sock, seq, src, dst, tunsrc, tundst, desc)) {
+		printk("failed to set xfrm");
+		goto delete;
+	}
+
+	/* UDP pinging with xfrm tunnel */
+	if (do_ping(cmd_fd, buf, page_size, tunsrc,
+				true, 0, 0, udp_ping_send)) {
+		printk("ping failed for xfrm");
+		goto delete;
+	}
+
+	ret = KSFT_PASS;
+delete:
+	/* xfrm delete */
+	memset(&msg, 0, sizeof(msg));
+	msg.type = MSG_XFRM_DEL;
+	memcpy(&msg.body.xfrm_desc, desc, sizeof(*desc));
+	write_msg(cmd_fd, &msg, 1);
+
+	if (xfrm_delete(xfrm_sock, seq, src, dst, tunsrc, tundst, desc->proto)) {
+		printk("failed ping to remove xfrm");
+		ret = KSFT_FAIL;
+	}
+
+cleanup:
+	memset(&msg, 0, sizeof(msg));
+	msg.type = MSG_XFRM_CLEANUP;
+	memcpy(&msg.body.xfrm_desc, desc, sizeof(*desc));
+	write_msg(cmd_fd, &msg, 1);
+	if (xfrm_cleanup(xfrm_sock, seq, src, dst, tunsrc, tundst)) {
+		printk("failed ping to cleanup xfrm");
+		ret = KSFT_FAIL;
+	}
+	return ret;
+}
+
+static int child_f(unsigned int nr, int test_desc_fd, int cmd_fd, void *buf)
+{
+	struct xfrm_desc desc;
+	struct test_desc msg;
+	int xfrm_sock = -1;
+	uint32_t seq;
+
+	if (switch_ns(nsfd_childa))
+		exit(KSFT_FAIL);
+
+	if (netlink_sock(&xfrm_sock, &seq, NETLINK_XFRM)) {
+		printk("Failed to open xfrm netlink socket");
+		exit(KSFT_FAIL);
+	}
+
+	/* Check that seq sock is ready, just for sure. */
+	memset(&msg, 0, sizeof(msg));
+	msg.type = MSG_ACK;
+	write_msg(cmd_fd, &msg, 1);
+	read_msg(cmd_fd, &msg, 1);
+	if (msg.type != MSG_ACK) {
+		printk("Ack failed");
+		exit(KSFT_FAIL);
+	}
+
+	for (;;) {
+		ssize_t received = read(test_desc_fd, &desc, sizeof(desc));
+		int ret;
+
+		if (received == 0) /* EOF */
+			break;
+
+		if (received != sizeof(desc)) {
+			pr_err("read() returned %zd", received);
+			exit(KSFT_FAIL);
+		}
+
+		switch (desc.type) {
+		case CREATE_TUNNEL:
+			ret = child_serv(xfrm_sock, &seq, nr,
+					 cmd_fd, buf, &desc);
+			break;
+		case ALLOCATE_SPI:
+			ret = xfrm_state_allocspi(xfrm_sock, &seq,
+						  -1, desc.proto);
+			break;
+		case MONITOR_ACQUIRE:
+			ret = xfrm_monitor_acquire(xfrm_sock, &seq, nr);
+			break;
+		case EXPIRE_STATE:
+			ret = xfrm_expire_state(xfrm_sock, &seq, nr, &desc);
+			break;
+		case EXPIRE_POLICY:
+			ret = xfrm_expire_policy(xfrm_sock, &seq, nr, &desc);
+			break;
+		default:
+			printk("Unknown desc type %d", desc.type);
+			exit(KSFT_FAIL);
+		}
+		write_test_result(ret, &desc);
+	}
+
+	close(xfrm_sock);
+
+	msg.type = MSG_EXIT;
+	write_msg(cmd_fd, &msg, 1);
+	exit(KSFT_PASS);
+}
+
+static void grand_child_serv(unsigned int nr, int cmd_fd, void *buf,
+		struct test_desc *msg, int xfrm_sock, uint32_t *seq)
+{
+	struct in_addr src, dst, tunsrc, tundst;
+	bool tun_reply;
+	struct xfrm_desc *desc = &msg->body.xfrm_desc;
+
+	src = inet_makeaddr(INADDR_B, grchild_ip(nr));
+	dst = inet_makeaddr(INADDR_B, child_ip(nr));
+	tunsrc = inet_makeaddr(INADDR_A, grchild_ip(nr));
+	tundst = inet_makeaddr(INADDR_A, child_ip(nr));
+
+	switch (msg->type) {
+	case MSG_EXIT:
+		exit(KSFT_PASS);
+	case MSG_ACK:
+		write_msg(cmd_fd, msg, 1);
+		break;
+	case MSG_PING:
+		tun_reply = memcmp(&dst, &msg->body.ping.reply_ip, sizeof(in_addr_t));
+		/* UDP pinging without xfrm */
+		if (do_ping(cmd_fd, buf, page_size, tun_reply ? tunsrc : src,
+				false, msg->body.ping.port,
+				msg->body.ping.reply_ip, udp_ping_reply)) {
+			printk("ping failed before setting xfrm");
+		}
+		break;
+	case MSG_XFRM_PREPARE:
+		if (xfrm_prepare(xfrm_sock, seq, src, dst, tunsrc, tundst,
+					desc->proto)) {
+			xfrm_cleanup(xfrm_sock, seq, src, dst, tunsrc, tundst);
+			printk("failed to prepare xfrm");
+		}
+		break;
+	case MSG_XFRM_ADD:
+		if (xfrm_set(xfrm_sock, seq, src, dst, tunsrc, tundst, desc)) {
+			xfrm_cleanup(xfrm_sock, seq, src, dst, tunsrc, tundst);
+			printk("failed to set xfrm");
+		}
+		break;
+	case MSG_XFRM_DEL:
+		if (xfrm_delete(xfrm_sock, seq, src, dst, tunsrc, tundst,
+					desc->proto)) {
+			xfrm_cleanup(xfrm_sock, seq, src, dst, tunsrc, tundst);
+			printk("failed to remove xfrm");
+		}
+		break;
+	case MSG_XFRM_CLEANUP:
+		if (xfrm_cleanup(xfrm_sock, seq, src, dst, tunsrc, tundst)) {
+			printk("failed to cleanup xfrm");
+		}
+		break;
+	default:
+		printk("got unknown msg type %d", msg->type);
+	};
+}
+
+static int grand_child_f(unsigned int nr, int cmd_fd, void *buf)
+{
+	struct test_desc msg;
+	int xfrm_sock = -1;
+	uint32_t seq;
+
+	if (switch_ns(nsfd_childb))
+		exit(KSFT_FAIL);
+
+	if (netlink_sock(&xfrm_sock, &seq, NETLINK_XFRM)) {
+		printk("Failed to open xfrm netlink socket");
+		exit(KSFT_FAIL);
+	}
+
+	do {
+		read_msg(cmd_fd, &msg, 1);
+		grand_child_serv(nr, cmd_fd, buf, &msg, xfrm_sock, &seq);
+	} while (1);
+
+	close(xfrm_sock);
+	exit(KSFT_FAIL);
+}
+
+static int start_child(unsigned int nr, char *veth, int test_desc_fd[2])
+{
+	int cmd_sock[2];
+	void *data_map;
+	pid_t child;
+
+	if (init_child(nsfd_childa, veth, child_ip(nr), grchild_ip(nr)))
+		return -1;
+
+	if (init_child(nsfd_childb, veth, grchild_ip(nr), child_ip(nr)))
+		return -1;
+
+	child = fork();
+	if (child < 0) {
+		pr_err("fork()");
+		return -1;
+	} else if (child) {
+		/* in parent - selftest */
+		return switch_ns(nsfd_parent);
+	}
+
+	if (close(test_desc_fd[1])) {
+		pr_err("close()");
+		return -1;
+	}
+
+	/* child */
+	data_map = mmap(0, page_size, PROT_READ | PROT_WRITE,
+			MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+	if (data_map == MAP_FAILED) {
+		pr_err("mmap()");
+		return -1;
+	}
+
+	randomize_buffer(data_map, page_size);
+
+	if (socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, cmd_sock)) {
+		pr_err("socketpair()");
+		return -1;
+	}
+
+	child = fork();
+	if (child < 0) {
+		pr_err("fork()");
+		return -1;
+	} else if (child) {
+		if (close(cmd_sock[0])) {
+			pr_err("close()");
+			return -1;
+		}
+		return child_f(nr, test_desc_fd[0], cmd_sock[1], data_map);
+	}
+	if (close(cmd_sock[1])) {
+		pr_err("close()");
+		return -1;
+	}
+	return grand_child_f(nr, cmd_sock[0], data_map);
+}
+
+static void exit_usage(char **argv)
+{
+	printk("Usage: %s [nr_process]", argv[0]);
+	exit(KSFT_FAIL);
+}
+
+static int __write_desc(int test_desc_fd, struct xfrm_desc *desc)
+{
+	ssize_t ret;
+
+	ret = write(test_desc_fd, desc, sizeof(*desc));
+
+	if (ret == sizeof(*desc))
+		return 0;
+
+	pr_err("Writing test's desc failed %ld", ret);
+
+	return -1;
+}
+
+static int write_desc(int proto, int test_desc_fd,
+		char *a, char *e, char *c, char *ae)
+{
+	struct xfrm_desc desc = {};
+
+	desc.type = CREATE_TUNNEL;
+	desc.proto = proto;
+
+	if (a)
+		strncpy(desc.a_algo, a, ALGO_LEN - 1);
+	if (e)
+		strncpy(desc.e_algo, e, ALGO_LEN - 1);
+	if (c)
+		strncpy(desc.c_algo, c, ALGO_LEN - 1);
+	if (ae)
+		strncpy(desc.ae_algo, ae, ALGO_LEN - 1);
+
+	return __write_desc(test_desc_fd, &desc);
+}
+
+int proto_list[] = { IPPROTO_AH, IPPROTO_COMP, IPPROTO_ESP };
+char *ah_list[] = {
+	"digest_null", "hmac(md5)", "hmac(sha1)", "hmac(sha256)",
+	"hmac(sha384)", "hmac(sha512)", "hmac(rmd160)",
+	"xcbc(aes)", "cmac(aes)"
+};
+char *comp_list[] = {
+	"deflate",
+#if 0
+	/* No compression backend realization */
+	"lzs", "lzjh"
+#endif
+};
+char *e_list[] = {
+	"ecb(cipher_null)", "cbc(des)", "cbc(des3_ede)", "cbc(cast5)",
+	"cbc(blowfish)", "cbc(aes)", "cbc(serpent)", "cbc(camellia)",
+	"cbc(twofish)", "rfc3686(ctr(aes))"
+};
+char *ae_list[] = {
+#if 0
+	/* not implemented */
+	"rfc4106(gcm(aes))", "rfc4309(ccm(aes))", "rfc4543(gcm(aes))",
+	"rfc7539esp(chacha20,poly1305)"
+#endif
+};
+
+const unsigned int proto_plan = ARRAY_SIZE(ah_list) + ARRAY_SIZE(comp_list) \
+				+ (ARRAY_SIZE(ah_list) * ARRAY_SIZE(e_list)) \
+				+ ARRAY_SIZE(ae_list);
+
+static int write_proto_plan(int fd, int proto)
+{
+	unsigned int i;
+
+	switch (proto) {
+	case IPPROTO_AH:
+		for (i = 0; i < ARRAY_SIZE(ah_list); i++) {
+			if (write_desc(proto, fd, ah_list[i], 0, 0, 0))
+				return -1;
+		}
+		break;
+	case IPPROTO_COMP:
+		for (i = 0; i < ARRAY_SIZE(comp_list); i++) {
+			if (write_desc(proto, fd, 0, 0, comp_list[i], 0))
+				return -1;
+		}
+		break;
+	case IPPROTO_ESP:
+		for (i = 0; i < ARRAY_SIZE(ah_list); i++) {
+			int j;
+
+			for (j = 0; j < ARRAY_SIZE(e_list); j++) {
+				if (write_desc(proto, fd, ah_list[i],
+							e_list[j], 0, 0))
+					return -1;
+			}
+		}
+		for (i = 0; i < ARRAY_SIZE(ae_list); i++) {
+			if (write_desc(proto, fd, 0, 0, 0, ae_list[i]))
+				return -1;
+		}
+		break;
+	default:
+		printk("BUG: Specified unknown proto %d", proto);
+		return -1;
+	}
+
+	return 0;
+}
+
+/*
+ * Some structures in xfrm uapi header differ in size between
+ * 64-bit and 32-bit ABI:
+ *
+ *             32-bit UABI               |            64-bit UABI
+ *  -------------------------------------|-------------------------------------
+ *   sizeof(xfrm_usersa_info)     = 220  |  sizeof(xfrm_usersa_info)     = 224
+ *   sizeof(xfrm_userpolicy_info) = 164  |  sizeof(xfrm_userpolicy_info) = 168
+ *   sizeof(xfrm_userspi_info)    = 228  |  sizeof(xfrm_userspi_info)    = 232
+ *   sizeof(xfrm_user_acquire)    = 276  |  sizeof(xfrm_user_acquire)    = 280
+ *   sizeof(xfrm_user_expire)     = 224  |  sizeof(xfrm_user_expire)     = 232
+ *   sizeof(xfrm_user_polexpire)  = 168  |  sizeof(xfrm_user_polexpire)  = 176
+ *
+ * Check the affected by the UABI difference structures.
+ */
+const unsigned int compat_plan = 4;
+static int write_compat_struct_tests(int test_desc_fd)
+{
+	struct xfrm_desc desc = {};
+
+	desc.type = ALLOCATE_SPI;
+	desc.proto = IPPROTO_AH;
+	strncpy(desc.a_algo, ah_list[0], ALGO_LEN - 1);
+
+	if (__write_desc(test_desc_fd, &desc))
+		return -1;
+
+	desc.type = MONITOR_ACQUIRE;
+	if (__write_desc(test_desc_fd, &desc))
+		return -1;
+
+	desc.type = EXPIRE_STATE;
+	if (__write_desc(test_desc_fd, &desc))
+		return -1;
+
+	desc.type = EXPIRE_POLICY;
+	if (__write_desc(test_desc_fd, &desc))
+		return -1;
+
+	return 0;
+}
+
+static int write_test_plan(int test_desc_fd)
+{
+	unsigned int i;
+	pid_t child;
+
+	child = fork();
+	if (child < 0) {
+		pr_err("fork()");
+		return -1;
+	}
+	if (child) {
+		if (close(test_desc_fd))
+			printk("close(): %m");
+		return 0;
+	}
+
+	if (write_compat_struct_tests(test_desc_fd))
+		exit(KSFT_FAIL);
+
+	for (i = 0; i < ARRAY_SIZE(proto_list); i++) {
+		if (write_proto_plan(test_desc_fd, proto_list[i]))
+			exit(KSFT_FAIL);
+	}
+
+	exit(KSFT_PASS);
+}
+
+static int children_cleanup(void)
+{
+	unsigned ret = KSFT_PASS;
+
+	while (1) {
+		int status;
+		pid_t p = wait(&status);
+
+		if ((p < 0) && errno == ECHILD)
+			break;
+
+		if (p < 0) {
+			pr_err("wait()");
+			return KSFT_FAIL;
+		}
+
+		if (!WIFEXITED(status)) {
+			ret = KSFT_FAIL;
+			continue;
+		}
+
+		if (WEXITSTATUS(status) == KSFT_FAIL)
+			ret = KSFT_FAIL;
+	}
+
+	return ret;
+}
+
+typedef void (*print_res)(const char *, ...);
+
+static int check_results(void)
+{
+	struct test_result tr = {};
+	struct xfrm_desc *d = &tr.desc;
+	int ret = KSFT_PASS;
+
+	while (1) {
+		ssize_t received = read(results_fd[0], &tr, sizeof(tr));
+		print_res result;
+
+		if (received == 0) /* EOF */
+			break;
+
+		if (received != sizeof(tr)) {
+			pr_err("read() returned %zd", received);
+			return KSFT_FAIL;
+		}
+
+		switch (tr.res) {
+		case KSFT_PASS:
+			result = ksft_test_result_pass;
+			break;
+		case KSFT_FAIL:
+		default:
+			result = ksft_test_result_fail;
+			ret = KSFT_FAIL;
+		}
+
+		result(" %s: [%u, '%s', '%s', '%s', '%s', %u]\n",
+		       desc_name[d->type], (unsigned int)d->proto, d->a_algo,
+		       d->e_algo, d->c_algo, d->ae_algo, d->icv_len);
+	}
+
+	return ret;
+}
+
+int main(int argc, char **argv)
+{
+	unsigned int nr_process = 1;
+	int route_sock = -1, ret = KSFT_SKIP;
+	int test_desc_fd[2];
+	uint32_t route_seq;
+	unsigned int i;
+
+	if (argc > 2)
+		exit_usage(argv);
+
+	if (argc > 1) {
+		char *endptr;
+
+		errno = 0;
+		nr_process = strtol(argv[1], &endptr, 10);
+		if ((errno == ERANGE && (nr_process == LONG_MAX || nr_process == LONG_MIN))
+				|| (errno != 0 && nr_process == 0)
+				|| (endptr == argv[1]) || (*endptr != '\0')) {
+			printk("Failed to parse [nr_process]");
+			exit_usage(argv);
+		}
+
+		if (nr_process > MAX_PROCESSES || !nr_process) {
+			printk("nr_process should be between [1; %u]",
+					MAX_PROCESSES);
+			exit_usage(argv);
+		}
+	}
+
+	srand(time(NULL));
+	page_size = sysconf(_SC_PAGESIZE);
+	if (page_size < 1)
+		ksft_exit_skip("sysconf(): %m\n");
+
+	if (pipe2(test_desc_fd, O_DIRECT) < 0)
+		ksft_exit_skip("pipe(): %m\n");
+
+	if (pipe2(results_fd, O_DIRECT) < 0)
+		ksft_exit_skip("pipe(): %m\n");
+
+	if (init_namespaces())
+		ksft_exit_skip("Failed to create namespaces\n");
+
+	if (netlink_sock(&route_sock, &route_seq, NETLINK_ROUTE))
+		ksft_exit_skip("Failed to open netlink route socket\n");
+
+	for (i = 0; i < nr_process; i++) {
+		char veth[VETH_LEN];
+
+		snprintf(veth, VETH_LEN, VETH_FMT, i);
+
+		if (veth_add(route_sock, route_seq++, veth, nsfd_childa, veth, nsfd_childb)) {
+			close(route_sock);
+			ksft_exit_fail_msg("Failed to create veth device");
+		}
+
+		if (start_child(i, veth, test_desc_fd)) {
+			close(route_sock);
+			ksft_exit_fail_msg("Child %u failed to start", i);
+		}
+	}
+
+	if (close(route_sock) || close(test_desc_fd[0]) || close(results_fd[1]))
+		ksft_exit_fail_msg("close(): %m");
+
+	ksft_set_plan(proto_plan + compat_plan);
+
+	if (write_test_plan(test_desc_fd[1]))
+		ksft_exit_fail_msg("Failed to write test plan to pipe");
+
+	ret = check_results();
+
+	if (children_cleanup() == KSFT_FAIL)
+		exit(KSFT_FAIL);
+
+	exit(ret);
+}
diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile
index aa254aefc2c3..00bb158b4a5d 100644
--- a/tools/testing/selftests/net/mptcp/Makefile
+++ b/tools/testing/selftests/net/mptcp/Makefile
@@ -5,7 +5,8 @@ KSFT_KHDR_INSTALL := 1
 
 CFLAGS =  -Wall -Wl,--no-as-needed -O2 -g  -I$(top_srcdir)/usr/include
 
-TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh diag.sh
+TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh diag.sh \
+	      simult_flows.sh
 
 TEST_GEN_FILES = mptcp_connect pm_nl_ctl
 
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c
index 090620c3e10c..77bb62feb872 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c
@@ -54,6 +54,7 @@ static int pf = AF_INET;
 static int cfg_sndbuf;
 static int cfg_rcvbuf;
 static bool cfg_join;
+static bool cfg_remove;
 static int cfg_wait;
 
 static void die_usage(void)
@@ -65,8 +66,8 @@ static void die_usage(void)
 	fprintf(stderr, "\t-S num -- set SO_SNDBUF to num\n");
 	fprintf(stderr, "\t-R num -- set SO_RCVBUF to num\n");
 	fprintf(stderr, "\t-p num -- use port num\n");
-	fprintf(stderr, "\t-m [MPTCP|TCP] -- use tcp or mptcp sockets\n");
-	fprintf(stderr, "\t-s [mmap|poll] -- use poll (default) or mmap\n");
+	fprintf(stderr, "\t-s [MPTCP|TCP] -- use mptcp(default) or tcp sockets\n");
+	fprintf(stderr, "\t-m [poll|mmap|sendfile] -- use poll(default)/mmap+write/sendfile\n");
 	fprintf(stderr, "\t-u -- check mptcp ulp\n");
 	fprintf(stderr, "\t-w num -- wait num sec before closing the socket\n");
 	exit(1);
@@ -271,6 +272,9 @@ static size_t do_rnd_write(const int fd, char *buf, const size_t len)
 	if (cfg_join && first && do_w > 100)
 		do_w = 100;
 
+	if (cfg_remove && do_w > 50)
+		do_w = 50;
+
 	bw = write(fd, buf, do_w);
 	if (bw < 0)
 		perror("write");
@@ -281,6 +285,9 @@ static size_t do_rnd_write(const int fd, char *buf, const size_t len)
 		first = false;
 	}
 
+	if (cfg_remove)
+		usleep(200000);
+
 	return bw;
 }
 
@@ -428,7 +435,7 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd)
 	}
 
 	/* leave some time for late join/announce */
-	if (cfg_join)
+	if (cfg_join || cfg_remove)
 		usleep(cfg_wait);
 
 	close(peerfd);
@@ -686,7 +693,7 @@ static void maybe_close(int fd)
 {
 	unsigned int r = rand();
 
-	if (!cfg_join && (r & 1))
+	if (!(cfg_join || cfg_remove) && (r & 1))
 		close(fd);
 }
 
@@ -822,13 +829,18 @@ static void parse_opts(int argc, char **argv)
 {
 	int c;
 
-	while ((c = getopt(argc, argv, "6jlp:s:hut:m:S:R:w:")) != -1) {
+	while ((c = getopt(argc, argv, "6jrlp:s:hut:m:S:R:w:")) != -1) {
 		switch (c) {
 		case 'j':
 			cfg_join = true;
 			cfg_mode = CFG_MODE_POLL;
 			cfg_wait = 400000;
 			break;
+		case 'r':
+			cfg_remove = true;
+			cfg_mode = CFG_MODE_POLL;
+			cfg_wait = 400000;
+			break;
 		case 'l':
 			listen_mode = true;
 			break;
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index 57d75b7f6220..2cfd87d94db8 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -14,9 +14,8 @@ capture=false
 timeout=30
 ipv6=true
 ethtool_random_on=true
-tc_delay="$((RANDOM%400))"
+tc_delay="$((RANDOM%50))"
 tc_loss=$((RANDOM%101))
-tc_reorder=""
 testmode=""
 sndbuf=0
 rcvbuf=0
@@ -444,9 +443,9 @@ do_transfer()
 	duration=$(printf "(duration %05sms)" $duration)
 	if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then
 		echo "$duration [ FAIL ] client exit code $retc, server $rets" 1>&2
-		echo "\nnetns ${listener_ns} socket stat for $port:" 1>&2
+		echo -e "\nnetns ${listener_ns} socket stat for ${port}:" 1>&2
 		ip netns exec ${listener_ns} ss -nita 1>&2 -o "sport = :$port"
-		echo "\nnetns ${connector_ns} socket stat for $port:" 1>&2
+		echo -e "\nnetns ${connector_ns} socket stat for ${port}:" 1>&2
 		ip netns exec ${connector_ns} ss -nita 1>&2 -o "dport = :$port"
 
 		cat "$capout"
@@ -628,30 +627,32 @@ for sender in "$ns1" "$ns2" "$ns3" "$ns4";do
 	do_ping "$ns4" $sender dead:beef:3::1
 done
 
-[ -n "$tc_loss" ] && tc -net "$ns2" qdisc add dev ns2eth3 root netem loss random $tc_loss
+[ -n "$tc_loss" ] && tc -net "$ns2" qdisc add dev ns2eth3 root netem loss random $tc_loss delay ${tc_delay}ms
 echo -n "INFO: Using loss of $tc_loss "
 test "$tc_delay" -gt 0 && echo -n "delay $tc_delay ms "
 
+reorder_delay=$(($tc_delay / 4))
+
 if [ -z "${tc_reorder}" ]; then
 	reorder1=$((RANDOM%10))
 	reorder1=$((100 - reorder1))
 	reorder2=$((RANDOM%100))
 
-	if [ $tc_delay -gt 0 ] && [ $reorder1 -lt 100 ] && [ $reorder2 -gt 0 ]; then
+	if [ $reorder_delay -gt 0 ] && [ $reorder1 -lt 100 ] && [ $reorder2 -gt 0 ]; then
 		tc_reorder="reorder ${reorder1}% ${reorder2}%"
-		echo -n "$tc_reorder "
+		echo -n "$tc_reorder with delay ${reorder_delay}ms "
 	fi
 elif [ "$tc_reorder" = "0" ];then
 	tc_reorder=""
-elif [ "$tc_delay" -gt 0 ];then
+elif [ "$reorder_delay" -gt 0 ];then
 	# reordering requires some delay
 	tc_reorder="reorder $tc_reorder"
-	echo -n "$tc_reorder "
+	echo -n "$tc_reorder with delay ${reorder_delay}ms "
 fi
 
 echo "on ns3eth4"
 
-tc -net "$ns3" qdisc add dev ns3eth4 root netem delay ${tc_delay}ms $tc_reorder
+tc -net "$ns3" qdisc add dev ns3eth4 root netem delay ${reorder_delay}ms $tc_reorder
 
 for sender in $ns1 $ns2 $ns3 $ns4;do
 	run_tests_lo "$ns1" "$sender" 10.0.1.1 1
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index f39c1129ce5f..08f53d86dedc 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -8,6 +8,7 @@ cin=""
 cout=""
 ksft_skip=4
 timeout=30
+mptcp_connect=""
 capture=0
 
 TEST_COUNT=0
@@ -132,6 +133,8 @@ do_transfer()
 	cl_proto="$3"
 	srv_proto="$4"
 	connect_addr="$5"
+	rm_nr_ns1="$6"
+	rm_nr_ns2="$7"
 
 	port=$((10000+$TEST_COUNT))
 	TEST_COUNT=$((TEST_COUNT+1))
@@ -156,14 +159,44 @@ do_transfer()
 		sleep 1
 	fi
 
-	ip netns exec ${listener_ns} ./mptcp_connect -j -t $timeout -l -p $port -s ${srv_proto} 0.0.0.0 < "$sin" > "$sout" &
+	if [[ $rm_nr_ns1 -eq 0 && $rm_nr_ns2 -eq 0 ]]; then
+		mptcp_connect="./mptcp_connect -j"
+	else
+		mptcp_connect="./mptcp_connect -r"
+	fi
+
+	ip netns exec ${listener_ns} $mptcp_connect -t $timeout -l -p $port -s ${srv_proto} 0.0.0.0 < "$sin" > "$sout" &
 	spid=$!
 
 	sleep 1
 
-	ip netns exec ${connector_ns} ./mptcp_connect -j -t $timeout -p $port -s ${cl_proto} $connect_addr < "$cin" > "$cout" &
+	ip netns exec ${connector_ns} $mptcp_connect -t $timeout -p $port -s ${cl_proto} $connect_addr < "$cin" > "$cout" &
 	cpid=$!
 
+	if [ $rm_nr_ns1 -gt 0 ]; then
+		counter=1
+		sleep 1
+
+		while [ $counter -le $rm_nr_ns1 ]
+		do
+			ip netns exec ${listener_ns} ./pm_nl_ctl del $counter
+			sleep 1
+			let counter+=1
+		done
+	fi
+
+	if [ $rm_nr_ns2 -gt 0 ]; then
+		counter=1
+		sleep 1
+
+		while [ $counter -le $rm_nr_ns2 ]
+		do
+			ip netns exec ${connector_ns} ./pm_nl_ctl del $counter
+			sleep 1
+			let counter+=1
+		done
+	fi
+
 	wait $cpid
 	retc=$?
 	wait $spid
@@ -176,9 +209,9 @@ do_transfer()
 
 	if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then
 		echo " client exit code $retc, server $rets" 1>&2
-		echo "\nnetns ${listener_ns} socket stat for $port:" 1>&2
+		echo -e "\nnetns ${listener_ns} socket stat for ${port}:" 1>&2
 		ip netns exec ${listener_ns} ss -nita 1>&2 -o "sport = :$port"
-		echo "\nnetns ${connector_ns} socket stat for $port:" 1>&2
+		echo -e "\nnetns ${connector_ns} socket stat for ${port}:" 1>&2
 		ip netns exec ${connector_ns} ss -nita 1>&2 -o "dport = :$port"
 
 		cat "$capout"
@@ -219,7 +252,24 @@ run_tests()
 	connect_addr="$3"
 	lret=0
 
-	do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr}
+	do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} 0 0
+	lret=$?
+	if [ $lret -ne 0 ]; then
+		ret=$lret
+		return
+	fi
+}
+
+run_remove_tests()
+{
+	listener_ns="$1"
+	connector_ns="$2"
+	connect_addr="$3"
+	rm_nr_ns1="$4"
+	rm_nr_ns2="$5"
+	lret=0
+
+	do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} ${rm_nr_ns1} ${rm_nr_ns2}
 	lret=$?
 	if [ $lret -ne 0 ]; then
 		ret=$lret
@@ -276,6 +326,80 @@ chk_join_nr()
 	fi
 }
 
+chk_add_nr()
+{
+	local add_nr=$1
+	local echo_nr=$2
+	local count
+	local dump_stats
+
+	printf "%-39s %s" " " "add"
+	count=`ip netns exec $ns2 nstat -as | grep MPTcpExtAddAddr | awk '{print $2}'`
+	[ -z "$count" ] && count=0
+	if [ "$count" != "$add_nr" ]; then
+		echo "[fail] got $count ADD_ADDR[s] expected $add_nr"
+		ret=1
+		dump_stats=1
+	else
+		echo -n "[ ok ]"
+	fi
+
+	echo -n " - echo  "
+	count=`ip netns exec $ns1 nstat -as | grep MPTcpExtEchoAdd | awk '{print $2}'`
+	[ -z "$count" ] && count=0
+	if [ "$count" != "$echo_nr" ]; then
+		echo "[fail] got $count ADD_ADDR echo[s] expected $echo_nr"
+		ret=1
+		dump_stats=1
+	else
+		echo "[ ok ]"
+	fi
+
+	if [ "${dump_stats}" = 1 ]; then
+		echo Server ns stats
+		ip netns exec $ns1 nstat -as | grep MPTcp
+		echo Client ns stats
+		ip netns exec $ns2 nstat -as | grep MPTcp
+	fi
+}
+
+chk_rm_nr()
+{
+	local rm_addr_nr=$1
+	local rm_subflow_nr=$2
+	local count
+	local dump_stats
+
+	printf "%-39s %s" " " "rm "
+	count=`ip netns exec $ns1 nstat -as | grep MPTcpExtRmAddr | awk '{print $2}'`
+	[ -z "$count" ] && count=0
+	if [ "$count" != "$rm_addr_nr" ]; then
+		echo "[fail] got $count RM_ADDR[s] expected $rm_addr_nr"
+		ret=1
+		dump_stats=1
+	else
+		echo -n "[ ok ]"
+	fi
+
+	echo -n " - sf    "
+	count=`ip netns exec $ns2 nstat -as | grep MPTcpExtRmSubflow | awk '{print $2}'`
+	[ -z "$count" ] && count=0
+	if [ "$count" != "$rm_subflow_nr" ]; then
+		echo "[fail] got $count RM_SUBFLOW[s] expected $rm_subflow_nr"
+		ret=1
+		dump_stats=1
+	else
+		echo "[ ok ]"
+	fi
+
+	if [ "${dump_stats}" = 1 ]; then
+		echo Server ns stats
+		ip netns exec $ns1 nstat -as | grep MPTcp
+		echo Client ns stats
+		ip netns exec $ns2 nstat -as | grep MPTcp
+	fi
+}
+
 sin=$(mktemp)
 sout=$(mktemp)
 cin=$(mktemp)
@@ -332,6 +456,7 @@ reset
 ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
 run_tests $ns1 $ns2 10.0.1.1
 chk_join_nr "unused signal address" 0 0 0
+chk_add_nr 1 1
 
 # accept and use add_addr
 reset
@@ -340,6 +465,7 @@ ip netns exec $ns2 ./pm_nl_ctl limits 1 1
 ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
 run_tests $ns1 $ns2 10.0.1.1
 chk_join_nr "signal address" 1 1 1
+chk_add_nr 1 1
 
 # accept and use add_addr with an additional subflow
 # note: signal address in server ns and local addresses in client ns must
@@ -352,6 +478,7 @@ ip netns exec $ns2 ./pm_nl_ctl limits 1 2
 ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
 run_tests $ns1 $ns2 10.0.1.1
 chk_join_nr "subflow and signal" 2 2 2
+chk_add_nr 1 1
 
 # accept and use add_addr with additional subflows
 reset
@@ -362,6 +489,59 @@ ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
 ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow
 run_tests $ns1 $ns2 10.0.1.1
 chk_join_nr "multiple subflows and signal" 3 3 3
+chk_add_nr 1 1
+
+# single subflow, remove
+reset
+ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ip netns exec $ns2 ./pm_nl_ctl limits 0 1
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+run_remove_tests $ns1 $ns2 10.0.1.1 0 1
+chk_join_nr "remove single subflow" 1 1 1
+chk_rm_nr 1 1
+
+# multiple subflows, remove
+reset
+ip netns exec $ns1 ./pm_nl_ctl limits 0 2
+ip netns exec $ns2 ./pm_nl_ctl limits 0 2
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+run_remove_tests $ns1 $ns2 10.0.1.1 0 2
+chk_join_nr "remove multiple subflows" 2 2 2
+chk_rm_nr 2 2
+
+# single address, remove
+reset
+ip netns exec $ns1 ./pm_nl_ctl limits 0 1
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ip netns exec $ns2 ./pm_nl_ctl limits 1 1
+run_remove_tests $ns1 $ns2 10.0.1.1 1 0
+chk_join_nr "remove single address" 1 1 1
+chk_add_nr 1 1
+chk_rm_nr 0 0
+
+# subflow and signal, remove
+reset
+ip netns exec $ns1 ./pm_nl_ctl limits 0 2
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ip netns exec $ns2 ./pm_nl_ctl limits 1 2
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+run_remove_tests $ns1 $ns2 10.0.1.1 1 1
+chk_join_nr "remove subflow and signal" 2 2 2
+chk_add_nr 1 1
+chk_rm_nr 1 1
+
+# subflows and signal, remove
+reset
+ip netns exec $ns1 ./pm_nl_ctl limits 0 3
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
+ip netns exec $ns2 ./pm_nl_ctl limits 1 3
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
+ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow
+run_remove_tests $ns1 $ns2 10.0.1.1 1 2
+chk_join_nr "remove subflows and signal" 3 3 3
+chk_add_nr 1 1
+chk_rm_nr 2 2
 
 # single subflow, syncookies
 reset_with_cookies
@@ -396,6 +576,7 @@ ip netns exec $ns2 ./pm_nl_ctl limits 1 1
 ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
 run_tests $ns1 $ns2 10.0.1.1
 chk_join_nr "signal address with syn cookies" 1 1 1
+chk_add_nr 1 1
 
 # test cookie with subflow and signal
 reset_with_cookies
@@ -405,6 +586,7 @@ ip netns exec $ns2 ./pm_nl_ctl limits 1 2
 ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
 run_tests $ns1 $ns2 10.0.1.1
 chk_join_nr "subflow and signal w cookies" 2 2 2
+chk_add_nr 1 1
 
 # accept and use add_addr with additional subflows
 reset_with_cookies
@@ -415,5 +597,6 @@ ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
 ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow
 run_tests $ns1 $ns2 10.0.1.1
 chk_join_nr "subflows and signal w. cookies" 3 3 3
+chk_add_nr 1 1
 
 exit $ret
diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh
new file mode 100755
index 000000000000..2f649b431456
--- /dev/null
+++ b/tools/testing/selftests/net/mptcp/simult_flows.sh
@@ -0,0 +1,293 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+rndh=$(printf %x $sec)-$(mktemp -u XXXXXX)
+ns1="ns1-$rndh"
+ns2="ns2-$rndh"
+ns3="ns3-$rndh"
+capture=false
+ksft_skip=4
+timeout=30
+test_cnt=1
+ret=0
+bail=0
+
+usage() {
+	echo "Usage: $0 [ -b ] [ -c ] [ -d ]"
+	echo -e "\t-b: bail out after first error, otherwise runs al testcases"
+	echo -e "\t-c: capture packets for each test using tcpdump (default: no capture)"
+	echo -e "\t-d: debug this script"
+}
+
+cleanup()
+{
+	rm -f "$cin" "$cout"
+	rm -f "$sin" "$sout"
+	rm -f "$capout"
+
+	local netns
+	for netns in "$ns1" "$ns2" "$ns3";do
+		ip netns del $netns
+	done
+}
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run test without ip tool"
+	exit $ksft_skip
+fi
+
+#  "$ns1"              ns2                    ns3
+#     ns1eth1    ns2eth1   ns2eth3      ns3eth1
+#            netem
+#     ns1eth2    ns2eth2
+#            netem
+
+setup()
+{
+	large=$(mktemp)
+	small=$(mktemp)
+	sout=$(mktemp)
+	cout=$(mktemp)
+	capout=$(mktemp)
+	size=$((2048 * 4096))
+	dd if=/dev/zero of=$small bs=4096 count=20 >/dev/null 2>&1
+	dd if=/dev/zero of=$large bs=4096 count=$((size / 4096)) >/dev/null 2>&1
+
+	trap cleanup EXIT
+
+	for i in "$ns1" "$ns2" "$ns3";do
+		ip netns add $i || exit $ksft_skip
+		ip -net $i link set lo up
+	done
+
+	ip link add ns1eth1 netns "$ns1" type veth peer name ns2eth1 netns "$ns2"
+	ip link add ns1eth2 netns "$ns1" type veth peer name ns2eth2 netns "$ns2"
+	ip link add ns2eth3 netns "$ns2" type veth peer name ns3eth1 netns "$ns3"
+
+	ip -net "$ns1" addr add 10.0.1.1/24 dev ns1eth1
+	ip -net "$ns1" addr add dead:beef:1::1/64 dev ns1eth1 nodad
+	ip -net "$ns1" link set ns1eth1 up mtu 1500
+	ip -net "$ns1" route add default via 10.0.1.2
+	ip -net "$ns1" route add default via dead:beef:1::2
+
+	ip -net "$ns1" addr add 10.0.2.1/24 dev ns1eth2
+	ip -net "$ns1" addr add dead:beef:2::1/64 dev ns1eth2 nodad
+	ip -net "$ns1" link set ns1eth2 up mtu 1500
+	ip -net "$ns1" route add default via 10.0.2.2 metric 101
+	ip -net "$ns1" route add default via dead:beef:2::2 metric 101
+
+	ip netns exec "$ns1" ./pm_nl_ctl limits 1 1
+	ip netns exec "$ns1" ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags subflow
+	ip netns exec "$ns1" sysctl -q net.ipv4.conf.all.rp_filter=0
+
+	ip -net "$ns2" addr add 10.0.1.2/24 dev ns2eth1
+	ip -net "$ns2" addr add dead:beef:1::2/64 dev ns2eth1 nodad
+	ip -net "$ns2" link set ns2eth1 up mtu 1500
+
+	ip -net "$ns2" addr add 10.0.2.2/24 dev ns2eth2
+	ip -net "$ns2" addr add dead:beef:2::2/64 dev ns2eth2 nodad
+	ip -net "$ns2" link set ns2eth2 up mtu 1500
+
+	ip -net "$ns2" addr add 10.0.3.2/24 dev ns2eth3
+	ip -net "$ns2" addr add dead:beef:3::2/64 dev ns2eth3 nodad
+	ip -net "$ns2" link set ns2eth3 up mtu 1500
+	ip netns exec "$ns2" sysctl -q net.ipv4.ip_forward=1
+	ip netns exec "$ns2" sysctl -q net.ipv6.conf.all.forwarding=1
+
+	ip -net "$ns3" addr add 10.0.3.3/24 dev ns3eth1
+	ip -net "$ns3" addr add dead:beef:3::3/64 dev ns3eth1 nodad
+	ip -net "$ns3" link set ns3eth1 up mtu 1500
+	ip -net "$ns3" route add default via 10.0.3.2
+	ip -net "$ns3" route add default via dead:beef:3::2
+
+	ip netns exec "$ns3" ./pm_nl_ctl limits 1 1
+}
+
+# $1: ns, $2: port
+wait_local_port_listen()
+{
+	local listener_ns="${1}"
+	local port="${2}"
+
+	local port_hex i
+
+	port_hex="$(printf "%04X" "${port}")"
+	for i in $(seq 10); do
+		ip netns exec "${listener_ns}" cat /proc/net/tcp* | \
+			awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) {rc=0; exit}} END {exit rc}" &&
+			break
+		sleep 0.1
+	done
+}
+
+do_transfer()
+{
+	local cin=$1
+	local sin=$2
+	local max_time=$3
+	local port
+	port=$((10000+$test_cnt))
+	test_cnt=$((test_cnt+1))
+
+	:> "$cout"
+	:> "$sout"
+	:> "$capout"
+
+	local addr_port
+	addr_port=$(printf "%s:%d" ${connect_addr} ${port})
+
+	if $capture; then
+		local capuser
+		if [ -z $SUDO_USER ] ; then
+			capuser=""
+		else
+			capuser="-Z $SUDO_USER"
+		fi
+
+		local capfile="${rndh}-${port}"
+		local capopt="-i any -s 65535 -B 32768 ${capuser}"
+
+		ip netns exec ${ns3}  tcpdump ${capopt} -w "${capfile}-listener.pcap"  >> "${capout}" 2>&1 &
+		local cappid_listener=$!
+
+		ip netns exec ${ns1} tcpdump ${capopt} -w "${capfile}-connector.pcap" >> "${capout}" 2>&1 &
+		local cappid_connector=$!
+
+		sleep 1
+	fi
+
+	ip netns exec ${ns3} ./mptcp_connect -jt $timeout -l -p $port 0.0.0.0 < "$sin" > "$sout" &
+	local spid=$!
+
+	wait_local_port_listen "${ns3}" "${port}"
+
+	local start
+	start=$(date +%s%3N)
+	ip netns exec ${ns1} ./mptcp_connect -jt $timeout -p $port 10.0.3.3 < "$cin" > "$cout" &
+	local cpid=$!
+
+	wait $cpid
+	local retc=$?
+	wait $spid
+	local rets=$?
+
+	local stop
+	stop=$(date +%s%3N)
+
+	if $capture; then
+		sleep 1
+		kill ${cappid_listener}
+		kill ${cappid_connector}
+	fi
+
+	local duration
+	duration=$((stop-start))
+
+	cmp $sin $cout > /dev/null 2>&1
+	local cmps=$?
+	cmp $cin $sout > /dev/null 2>&1
+	local cmpc=$?
+
+	printf "%16s" "$duration max $max_time "
+	if [ $retc -eq 0 ] && [ $rets -eq 0 ] && \
+	   [ $cmpc -eq 0 ] && [ $cmps -eq 0 ] && \
+	   [ $duration -lt $max_time ]; then
+		echo "[ OK ]"
+		cat "$capout"
+		return 0
+	fi
+
+	echo " [ fail ]"
+	echo "client exit code $retc, server $rets" 1>&2
+	echo -e "\nnetns ${ns3} socket stat for $port:" 1>&2
+	ip netns exec ${ns3} ss -nita 1>&2 -o "sport = :$port"
+	echo -e "\nnetns ${ns1} socket stat for $port:" 1>&2
+	ip netns exec ${ns1} ss -nita 1>&2 -o "dport = :$port"
+	ls -l $sin $cout
+	ls -l $cin $sout
+
+	cat "$capout"
+	return 1
+}
+
+run_test()
+{
+	local rate1=$1
+	local rate2=$2
+	local delay1=$3
+	local delay2=$4
+	local lret
+	local dev
+	shift 4
+	local msg=$*
+
+	[ $delay1 -gt 0 ] && delay1="delay $delay1" || delay1=""
+	[ $delay2 -gt 0 ] && delay2="delay $delay2" || delay2=""
+
+	for dev in ns1eth1 ns1eth2; do
+		tc -n $ns1 qdisc del dev $dev root >/dev/null 2>&1
+	done
+	for dev in ns2eth1 ns2eth2; do
+		tc -n $ns2 qdisc del dev $dev root >/dev/null 2>&1
+	done
+	tc -n $ns1 qdisc add dev ns1eth1 root netem rate ${rate1}mbit $delay1
+	tc -n $ns1 qdisc add dev ns1eth2 root netem rate ${rate2}mbit $delay2
+	tc -n $ns2 qdisc add dev ns2eth1 root netem rate ${rate1}mbit $delay1
+	tc -n $ns2 qdisc add dev ns2eth2 root netem rate ${rate2}mbit $delay2
+
+	# time is measure in ms
+	local time=$((size * 8 * 1000 / (( $rate1 + $rate2) * 1024 *1024) ))
+
+	# mptcp_connect will do some sleeps to allow the mp_join handshake
+	# completion
+	time=$((time + 1350))
+
+	printf "%-50s" "$msg"
+	do_transfer $small $large $((time * 11 / 10))
+	lret=$?
+	if [ $lret -ne 0 ]; then
+		ret=$lret
+		[ $bail -eq 0 ] || exit $ret
+	fi
+
+	printf "%-50s" "$msg - reverse direction"
+	do_transfer $large $small $((time * 11 / 10))
+	lret=$?
+	if [ $lret -ne 0 ]; then
+		ret=$lret
+		[ $bail -eq 0 ] || exit $ret
+	fi
+}
+
+while getopts "bcdh" option;do
+	case "$option" in
+	"h")
+		usage $0
+		exit 0
+		;;
+	"b")
+		bail=1
+		;;
+	"c")
+		capture=true
+		;;
+	"d")
+		set -x
+		;;
+	"?")
+		usage $0
+		exit 1
+		;;
+	esac
+done
+
+setup
+run_test 10 10 0 0 "balanced bwidth"
+run_test 10 10 1 50 "balanced bwidth with unbalanced delay"
+
+# we still need some additional infrastructure to pass the following test-cases
+# run_test 30 10 0 0 "unbalanced bwidth"
+# run_test 30 10 1 50 "unbalanced bwidth with unbalanced delay"
+# run_test 30 10 50 1 "unbalanced bwidth with opposed, unbalanced delay"
+exit $ret
diff --git a/tools/testing/selftests/net/nettest.c b/tools/testing/selftests/net/nettest.c
index 93208caacbe6..f75c53ce0a2d 100644
--- a/tools/testing/selftests/net/nettest.c
+++ b/tools/testing/selftests/net/nettest.c
@@ -1667,6 +1667,8 @@ int main(int argc, char *argv[])
 		case 'R':
 			args.type = SOCK_RAW;
 			args.port = 0;
+			if (!args.protocol)
+				args.protocol = IPPROTO_RAW;
 			break;
 		case 'P':
 			pe = getprotobyname(optarg);
diff --git a/tools/testing/selftests/net/psock_snd.sh b/tools/testing/selftests/net/psock_snd.sh
index 6331d91b86a6..170be65e0816 100755
--- a/tools/testing/selftests/net/psock_snd.sh
+++ b/tools/testing/selftests/net/psock_snd.sh
@@ -45,7 +45,7 @@ echo "raw vnet hdr"
 echo "raw csum_off"
 ./in_netns.sh ./psock_snd -v -c
 
-echo "raw csum_off with bad offset (fails)"
+echo "raw csum_off with bad offset (expected to fail)"
 (! ./in_netns.sh ./psock_snd -v -c -C)
 
 
@@ -57,7 +57,7 @@ echo "raw min size"
 echo "raw mtu size"
 ./in_netns.sh ./psock_snd -l "${mss}"
 
-echo "raw mtu size + 1 (fails)"
+echo "raw mtu size + 1 (expected to fail)"
 (! ./in_netns.sh ./psock_snd -l "${mss_exceeds}")
 
 # fails due to ARPHRD_ETHER check in packet_extra_vlan_len_allowed
@@ -65,19 +65,19 @@ echo "raw mtu size + 1 (fails)"
 # echo "raw vlan mtu size"
 # ./in_netns.sh ./psock_snd -V -l "${mss}"
 
-echo "raw vlan mtu size + 1 (fails)"
+echo "raw vlan mtu size + 1 (expected to fail)"
 (! ./in_netns.sh ./psock_snd -V -l "${mss_exceeds}")
 
 echo "dgram mtu size"
 ./in_netns.sh ./psock_snd -d -l "${mss}"
 
-echo "dgram mtu size + 1 (fails)"
+echo "dgram mtu size + 1 (expected to fail)"
 (! ./in_netns.sh ./psock_snd -d -l "${mss_exceeds}")
 
-echo "raw truncate hlen (fails: does not arrive)"
+echo "raw truncate hlen (expected to fail: does not arrive)"
 (! ./in_netns.sh ./psock_snd -t "$((${vnet_hlen} + ${eth_hlen}))")
 
-echo "raw truncate hlen - 1 (fails: EINVAL)"
+echo "raw truncate hlen - 1 (expected to fail: EINVAL)"
 (! ./in_netns.sh ./psock_snd -t "$((${vnet_hlen} + ${eth_hlen} - 1))")
 
 
@@ -86,13 +86,13 @@ echo "raw truncate hlen - 1 (fails: EINVAL)"
 echo "raw gso min size"
 ./in_netns.sh ./psock_snd -v -c -g -l "${mss_exceeds}"
 
-echo "raw gso min size - 1 (fails)"
+echo "raw gso min size - 1 (expected to fail)"
 (! ./in_netns.sh ./psock_snd -v -c -g -l "${mss}")
 
 echo "raw gso max size"
 ./in_netns.sh ./psock_snd -v -c -g -l "${max_mss}"
 
-echo "raw gso max size + 1 (fails)"
+echo "raw gso max size + 1 (expected to fail)"
 (! ./in_netns.sh ./psock_snd -v -c -g -l "${max_mss_exceeds}")
 
 echo "OK. All tests passed"
diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh
index 7c38a909f8b8..8a2fe6d64bf2 100755
--- a/tools/testing/selftests/net/rtnetlink.sh
+++ b/tools/testing/selftests/net/rtnetlink.sh
@@ -1175,6 +1175,51 @@ kci_test_neigh_get()
 	echo "PASS: neigh get"
 }
 
+kci_test_bridge_parent_id()
+{
+	local ret=0
+	sysfsnet=/sys/bus/netdevsim/devices/netdevsim
+	probed=false
+
+	if [ ! -w /sys/bus/netdevsim/new_device ] ; then
+		modprobe -q netdevsim
+		check_err $?
+		if [ $ret -ne 0 ]; then
+			echo "SKIP: bridge_parent_id can't load netdevsim"
+			return $ksft_skip
+		fi
+		probed=true
+	fi
+
+	echo "10 1" > /sys/bus/netdevsim/new_device
+	while [ ! -d ${sysfsnet}10 ] ; do :; done
+	echo "20 1" > /sys/bus/netdevsim/new_device
+	while [ ! -d ${sysfsnet}20 ] ; do :; done
+	udevadm settle
+	dev10=`ls ${sysfsnet}10/net/`
+	dev20=`ls ${sysfsnet}20/net/`
+
+	ip link add name test-bond0 type bond mode 802.3ad
+	ip link set dev $dev10 master test-bond0
+	ip link set dev $dev20 master test-bond0
+	ip link add name test-br0 type bridge
+	ip link set dev test-bond0 master test-br0
+	check_err $?
+
+	# clean up any leftovers
+	ip link del dev test-br0
+	ip link del dev test-bond0
+	echo 20 > /sys/bus/netdevsim/del_device
+	echo 10 > /sys/bus/netdevsim/del_device
+	$probed && rmmod netdevsim
+
+	if [ $ret -ne 0 ]; then
+		echo "FAIL: bridge_parent_id"
+		return 1
+	fi
+	echo "PASS: bridge_parent_id"
+}
+
 kci_test_rtnl()
 {
 	local ret=0
@@ -1224,6 +1269,8 @@ kci_test_rtnl()
 	check_err $?
 	kci_test_neigh_get
 	check_err $?
+	kci_test_bridge_parent_id
+	check_err $?
 
 	kci_del_dummy
 	return $ret
diff --git a/tools/testing/selftests/net/tcp_mmap.c b/tools/testing/selftests/net/tcp_mmap.c
index a61b7b3da549..00f837c9bc6c 100644
--- a/tools/testing/selftests/net/tcp_mmap.c
+++ b/tools/testing/selftests/net/tcp_mmap.c
@@ -123,6 +123,28 @@ void hash_zone(void *zone, unsigned int length)
 #define ALIGN_UP(x, align_to)	(((x) + ((align_to)-1)) & ~((align_to)-1))
 #define ALIGN_PTR_UP(p, ptr_align_to)	((typeof(p))ALIGN_UP((unsigned long)(p), ptr_align_to))
 
+
+static void *mmap_large_buffer(size_t need, size_t *allocated)
+{
+	void *buffer;
+	size_t sz;
+
+	/* Attempt to use huge pages if possible. */
+	sz = ALIGN_UP(need, map_align);
+	buffer = mmap(NULL, sz, PROT_READ | PROT_WRITE,
+		      MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0);
+
+	if (buffer == (void *)-1) {
+		sz = need;
+		buffer = mmap(NULL, sz, PROT_READ | PROT_WRITE,
+			      MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+		if (buffer != (void *)-1)
+			fprintf(stderr, "MAP_HUGETLB attempt failed, look at /sys/kernel/mm/hugepages for optimal performance\n");
+	}
+	*allocated = sz;
+	return buffer;
+}
+
 void *child_thread(void *arg)
 {
 	unsigned long total_mmap = 0, total = 0;
@@ -135,6 +157,7 @@ void *child_thread(void *arg)
 	void *addr = NULL;
 	double throughput;
 	struct rusage ru;
+	size_t buffer_sz;
 	int lu, fd;
 
 	fd = (int)(unsigned long)arg;
@@ -142,9 +165,9 @@ void *child_thread(void *arg)
 	gettimeofday(&t0, NULL);
 
 	fcntl(fd, F_SETFL, O_NDELAY);
-	buffer = malloc(chunk_size);
-	if (!buffer) {
-		perror("malloc");
+	buffer = mmap_large_buffer(chunk_size, &buffer_sz);
+	if (buffer == (void *)-1) {
+		perror("mmap");
 		goto error;
 	}
 	if (zflg) {
@@ -179,6 +202,10 @@ void *child_thread(void *arg)
 				total_mmap += zc.length;
 				if (xflg)
 					hash_zone(addr, zc.length);
+				/* It is more efficient to unmap the pages right now,
+				 * instead of doing this in next TCP_ZEROCOPY_RECEIVE.
+				 */
+				madvise(addr, zc.length, MADV_DONTNEED);
 				total += zc.length;
 			}
 			if (zc.recv_skip_hint) {
@@ -230,7 +257,7 @@ end:
 				ru.ru_nvcsw);
 	}
 error:
-	free(buffer);
+	munmap(buffer, buffer_sz);
 	close(fd);
 	if (zflg)
 		munmap(raddr, chunk_size + map_align);
@@ -347,6 +374,7 @@ int main(int argc, char *argv[])
 	uint64_t total = 0;
 	char *host = NULL;
 	int fd, c, on = 1;
+	size_t buffer_sz;
 	char *buffer;
 	int sflg = 0;
 	int mss = 0;
@@ -437,8 +465,8 @@ int main(int argc, char *argv[])
 		}
 		do_accept(fdlisten);
 	}
-	buffer = mmap(NULL, chunk_size, PROT_READ | PROT_WRITE,
-			      MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+
+	buffer = mmap_large_buffer(chunk_size, &buffer_sz);
 	if (buffer == (char *)-1) {
 		perror("mmap");
 		exit(1);
@@ -484,6 +512,6 @@ int main(int argc, char *argv[])
 		total += wr;
 	}
 	close(fd);
-	munmap(buffer, chunk_size);
+	munmap(buffer, buffer_sz);
 	return 0;
 }
diff --git a/tools/testing/selftests/net/vrf_route_leaking.sh b/tools/testing/selftests/net/vrf_route_leaking.sh
new file mode 100755
index 000000000000..23cf924754a5
--- /dev/null
+++ b/tools/testing/selftests/net/vrf_route_leaking.sh
@@ -0,0 +1,626 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (c) 2019 David Ahern <dsahern@gmail.com>. All rights reserved.
+# Copyright (c) 2020 Michael Jeanson <mjeanson@efficios.com>. All rights reserved.
+#
+# Requires CONFIG_NET_VRF, CONFIG_VETH, CONFIG_BRIDGE and CONFIG_NET_NS.
+#
+#
+# Symmetric routing topology
+#
+#                     blue         red
+# +----+              .253 +----+ .253              +----+
+# | h1 |-------------------| r1 |-------------------| h2 |
+# +----+ .1                +----+                .2 +----+
+#         172.16.1/24                  172.16.2/24
+#    2001:db8:16:1/64                  2001:db8:16:2/64
+#
+#
+# Route from h1 to h2 and back goes through r1, incoming vrf blue has a route
+# to the outgoing vrf red for the n2 network and red has a route back to n1.
+# The red VRF interface has a MTU of 1400.
+#
+# The first test sends a ping with a ttl of 1 from h1 to h2 and parses the
+# output of the command to check that a ttl expired error is received.
+#
+# The second test runs traceroute from h1 to h2 and parses the output to check
+# for a hop on r1.
+#
+# The third test sends a ping with a packet size of 1450 from h1 to h2 and
+# parses the output of the command to check that a fragmentation error is
+# received.
+#
+#
+# Asymmetric routing topology
+#
+# This topology represents a customer setup where the issue with icmp errors
+# and VRF route leaking was initialy reported. The MTU test isn't done here
+# because of the lack of a return route in the red VRF.
+#
+#                     blue         red
+#                     .253 +----+ .253
+#                     +----| r1 |----+
+#                     |    +----+    |
+# +----+              |              |              +----+
+# | h1 |--------------+              +--------------| h2 |
+# +----+ .1           |              |           .2 +----+
+#         172.16.1/24 |    +----+    | 172.16.2/24
+#    2001:db8:16:1/64 +----| r2 |----+ 2001:db8:16:2/64
+#                     .254 +----+ .254
+#
+#
+# Route from h1 to h2 goes through r1, incoming vrf blue has a route to the
+# outgoing vrf red for the n2 network but red doesn't have a route back to n1.
+# Route from h2 to h1 goes through r2.
+#
+# The objective is to check that the incoming vrf routing table is selected
+# to send an ICMP error back to the source when the ttl of a packet reaches 1
+# while it is forwarded between different vrfs.
+
+VERBOSE=0
+PAUSE_ON_FAIL=no
+DEFAULT_TTYPE=sym
+
+H1_N1=172.16.1.0/24
+H1_N1_6=2001:db8:16:1::/64
+
+H1_N1_IP=172.16.1.1
+R1_N1_IP=172.16.1.253
+R2_N1_IP=172.16.1.254
+
+H1_N1_IP6=2001:db8:16:1::1
+R1_N1_IP6=2001:db8:16:1::253
+R2_N1_IP6=2001:db8:16:1::254
+
+H2_N2=172.16.2.0/24
+H2_N2_6=2001:db8:16:2::/64
+
+H2_N2_IP=172.16.2.2
+R1_N2_IP=172.16.2.253
+R2_N2_IP=172.16.2.254
+
+H2_N2_IP6=2001:db8:16:2::2
+R1_N2_IP6=2001:db8:16:2::253
+R2_N2_IP6=2001:db8:16:2::254
+
+################################################################################
+# helpers
+
+log_section()
+{
+	echo
+	echo "###########################################################################"
+	echo "$*"
+	echo "###########################################################################"
+	echo
+}
+
+log_test()
+{
+	local rc=$1
+	local expected=$2
+	local msg="$3"
+
+	if [ "${rc}" -eq "${expected}" ]; then
+		printf "TEST: %-60s  [ OK ]\n" "${msg}"
+		nsuccess=$((nsuccess+1))
+	else
+		ret=1
+		nfail=$((nfail+1))
+		printf "TEST: %-60s  [FAIL]\n" "${msg}"
+		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+			echo
+			echo "hit enter to continue, 'q' to quit"
+			read -r a
+			[ "$a" = "q" ] && exit 1
+		fi
+	fi
+}
+
+run_cmd()
+{
+	local cmd="$*"
+	local out
+	local rc
+
+	if [ "$VERBOSE" = "1" ]; then
+		echo "COMMAND: $cmd"
+	fi
+
+	# shellcheck disable=SC2086
+	out=$(eval $cmd 2>&1)
+	rc=$?
+	if [ "$VERBOSE" = "1" ] && [ -n "$out" ]; then
+		echo "$out"
+	fi
+
+	[ "$VERBOSE" = "1" ] && echo
+
+	return $rc
+}
+
+run_cmd_grep()
+{
+	local grep_pattern="$1"
+	shift
+	local cmd="$*"
+	local out
+	local rc
+
+	if [ "$VERBOSE" = "1" ]; then
+		echo "COMMAND: $cmd"
+	fi
+
+	# shellcheck disable=SC2086
+	out=$(eval $cmd 2>&1)
+	if [ "$VERBOSE" = "1" ] && [ -n "$out" ]; then
+		echo "$out"
+	fi
+
+	echo "$out" | grep -q "$grep_pattern"
+	rc=$?
+
+	[ "$VERBOSE" = "1" ] && echo
+
+	return $rc
+}
+
+################################################################################
+# setup and teardown
+
+cleanup()
+{
+	local ns
+
+	for ns in h1 h2 r1 r2; do
+		ip netns del $ns 2>/dev/null
+	done
+}
+
+setup_vrf()
+{
+	local ns=$1
+
+	ip -netns "${ns}" rule del pref 0
+	ip -netns "${ns}" rule add pref 32765 from all lookup local
+	ip -netns "${ns}" -6 rule del pref 0
+	ip -netns "${ns}" -6 rule add pref 32765 from all lookup local
+}
+
+create_vrf()
+{
+	local ns=$1
+	local vrf=$2
+	local table=$3
+
+	ip -netns "${ns}" link add "${vrf}" type vrf table "${table}"
+	ip -netns "${ns}" link set "${vrf}" up
+	ip -netns "${ns}" route add vrf "${vrf}" unreachable default metric 8192
+	ip -netns "${ns}" -6 route add vrf "${vrf}" unreachable default metric 8192
+
+	ip -netns "${ns}" addr add 127.0.0.1/8 dev "${vrf}"
+	ip -netns "${ns}" -6 addr add ::1 dev "${vrf}" nodad
+}
+
+setup_sym()
+{
+	local ns
+
+	# make sure we are starting with a clean slate
+	cleanup
+
+	#
+	# create nodes as namespaces
+	#
+	for ns in h1 h2 r1; do
+		ip netns add $ns
+		ip -netns $ns link set lo up
+
+		case "${ns}" in
+		h[12]) ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=0
+		       ip netns exec $ns sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1
+			;;
+		r1)    ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1
+		       ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1
+		esac
+	done
+
+	#
+	# create interconnects
+	#
+	ip -netns h1 link add eth0 type veth peer name r1h1
+	ip -netns h1 link set r1h1 netns r1 name eth0 up
+
+	ip -netns h2 link add eth0 type veth peer name r1h2
+	ip -netns h2 link set r1h2 netns r1 name eth1 up
+
+	#
+	# h1
+	#
+	ip -netns h1 addr add dev eth0 ${H1_N1_IP}/24
+	ip -netns h1 -6 addr add dev eth0 ${H1_N1_IP6}/64 nodad
+	ip -netns h1 link set eth0 up
+
+	# h1 to h2 via r1
+	ip -netns h1    route add ${H2_N2} via ${R1_N1_IP} dev eth0
+	ip -netns h1 -6 route add ${H2_N2_6} via "${R1_N1_IP6}" dev eth0
+
+	#
+	# h2
+	#
+	ip -netns h2 addr add dev eth0 ${H2_N2_IP}/24
+	ip -netns h2 -6 addr add dev eth0 ${H2_N2_IP6}/64 nodad
+	ip -netns h2 link set eth0 up
+
+	# h2 to h1 via r1
+	ip -netns h2 route add default via ${R1_N2_IP} dev eth0
+	ip -netns h2 -6 route add default via ${R1_N2_IP6} dev eth0
+
+	#
+	# r1
+	#
+	setup_vrf r1
+	create_vrf r1 blue 1101
+	create_vrf r1 red 1102
+	ip -netns r1 link set mtu 1400 dev eth1
+	ip -netns r1 link set eth0 vrf blue up
+	ip -netns r1 link set eth1 vrf red up
+	ip -netns r1 addr add dev eth0 ${R1_N1_IP}/24
+	ip -netns r1 -6 addr add dev eth0 ${R1_N1_IP6}/64 nodad
+	ip -netns r1 addr add dev eth1 ${R1_N2_IP}/24
+	ip -netns r1 -6 addr add dev eth1 ${R1_N2_IP6}/64 nodad
+
+	# Route leak from blue to red
+	ip -netns r1 route add vrf blue ${H2_N2} dev red
+	ip -netns r1 -6 route add vrf blue ${H2_N2_6} dev red
+
+	# Route leak from red to blue
+	ip -netns r1 route add vrf red ${H1_N1} dev blue
+	ip -netns r1 -6 route add vrf red ${H1_N1_6} dev blue
+
+
+	# Wait for ip config to settle
+	sleep 2
+}
+
+setup_asym()
+{
+	local ns
+
+	# make sure we are starting with a clean slate
+	cleanup
+
+	#
+	# create nodes as namespaces
+	#
+	for ns in h1 h2 r1 r2; do
+		ip netns add $ns
+		ip -netns $ns link set lo up
+
+		case "${ns}" in
+		h[12]) ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=0
+		       ip netns exec $ns sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1
+			;;
+		r[12]) ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1
+		       ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1
+		esac
+	done
+
+	#
+	# create interconnects
+	#
+	ip -netns h1 link add eth0 type veth peer name r1h1
+	ip -netns h1 link set r1h1 netns r1 name eth0 up
+
+	ip -netns h1 link add eth1 type veth peer name r2h1
+	ip -netns h1 link set r2h1 netns r2 name eth0 up
+
+	ip -netns h2 link add eth0 type veth peer name r1h2
+	ip -netns h2 link set r1h2 netns r1 name eth1 up
+
+	ip -netns h2 link add eth1 type veth peer name r2h2
+	ip -netns h2 link set r2h2 netns r2 name eth1 up
+
+	#
+	# h1
+	#
+	ip -netns h1 link add br0 type bridge
+	ip -netns h1 link set br0 up
+	ip -netns h1 addr add dev br0 ${H1_N1_IP}/24
+	ip -netns h1 -6 addr add dev br0 ${H1_N1_IP6}/64 nodad
+	ip -netns h1 link set eth0 master br0 up
+	ip -netns h1 link set eth1 master br0 up
+
+	# h1 to h2 via r1
+	ip -netns h1    route add ${H2_N2} via ${R1_N1_IP} dev br0
+	ip -netns h1 -6 route add ${H2_N2_6} via "${R1_N1_IP6}" dev br0
+
+	#
+	# h2
+	#
+	ip -netns h2 link add br0 type bridge
+	ip -netns h2 link set br0 up
+	ip -netns h2 addr add dev br0 ${H2_N2_IP}/24
+	ip -netns h2 -6 addr add dev br0 ${H2_N2_IP6}/64 nodad
+	ip -netns h2 link set eth0 master br0 up
+	ip -netns h2 link set eth1 master br0 up
+
+	# h2 to h1 via r2
+	ip -netns h2 route add default via ${R2_N2_IP} dev br0
+	ip -netns h2 -6 route add default via ${R2_N2_IP6} dev br0
+
+	#
+	# r1
+	#
+	setup_vrf r1
+	create_vrf r1 blue 1101
+	create_vrf r1 red 1102
+	ip -netns r1 link set mtu 1400 dev eth1
+	ip -netns r1 link set eth0 vrf blue up
+	ip -netns r1 link set eth1 vrf red up
+	ip -netns r1 addr add dev eth0 ${R1_N1_IP}/24
+	ip -netns r1 -6 addr add dev eth0 ${R1_N1_IP6}/64 nodad
+	ip -netns r1 addr add dev eth1 ${R1_N2_IP}/24
+	ip -netns r1 -6 addr add dev eth1 ${R1_N2_IP6}/64 nodad
+
+	# Route leak from blue to red
+	ip -netns r1 route add vrf blue ${H2_N2} dev red
+	ip -netns r1 -6 route add vrf blue ${H2_N2_6} dev red
+
+	# No route leak from red to blue
+
+	#
+	# r2
+	#
+	ip -netns r2 addr add dev eth0 ${R2_N1_IP}/24
+	ip -netns r2 -6 addr add dev eth0 ${R2_N1_IP6}/64 nodad
+	ip -netns r2 addr add dev eth1 ${R2_N2_IP}/24
+	ip -netns r2 -6 addr add dev eth1 ${R2_N2_IP6}/64 nodad
+
+	# Wait for ip config to settle
+	sleep 2
+}
+
+check_connectivity()
+{
+	ip netns exec h1 ping -c1 -w1 ${H2_N2_IP} >/dev/null 2>&1
+	log_test $? 0 "Basic IPv4 connectivity"
+	return $?
+}
+
+check_connectivity6()
+{
+	ip netns exec h1 "${ping6}" -c1 -w1 ${H2_N2_IP6} >/dev/null 2>&1
+	log_test $? 0 "Basic IPv6 connectivity"
+	return $?
+}
+
+check_traceroute()
+{
+	if [ ! -x "$(command -v traceroute)" ]; then
+		echo "SKIP: Could not run IPV4 test without traceroute"
+		return 1
+	fi
+}
+
+check_traceroute6()
+{
+	if [ ! -x "$(command -v traceroute6)" ]; then
+		echo "SKIP: Could not run IPV6 test without traceroute6"
+		return 1
+	fi
+}
+
+ipv4_traceroute()
+{
+	local ttype="$1"
+
+	[ "x$ttype" = "x" ] && ttype="$DEFAULT_TTYPE"
+
+	log_section "IPv4 ($ttype route): VRF ICMP error route lookup traceroute"
+
+	check_traceroute || return
+
+	setup_"$ttype"
+
+	check_connectivity || return
+
+	run_cmd_grep "${R1_N1_IP}" ip netns exec h1 traceroute ${H2_N2_IP}
+	log_test $? 0 "Traceroute reports a hop on r1"
+}
+
+ipv4_traceroute_asym()
+{
+	ipv4_traceroute asym
+}
+
+ipv6_traceroute()
+{
+	local ttype="$1"
+
+	[ "x$ttype" = "x" ] && ttype="$DEFAULT_TTYPE"
+
+	log_section "IPv6 ($ttype route): VRF ICMP error route lookup traceroute"
+
+	check_traceroute6 || return
+
+	setup_"$ttype"
+
+	check_connectivity6 || return
+
+	run_cmd_grep "${R1_N1_IP6}" ip netns exec h1 traceroute6 ${H2_N2_IP6}
+	log_test $? 0 "Traceroute6 reports a hop on r1"
+}
+
+ipv6_traceroute_asym()
+{
+	ipv6_traceroute asym
+}
+
+ipv4_ping_ttl()
+{
+	local ttype="$1"
+
+	[ "x$ttype" = "x" ] && ttype="$DEFAULT_TTYPE"
+
+	log_section "IPv4 ($ttype route): VRF ICMP ttl error route lookup ping"
+
+	setup_"$ttype"
+
+	check_connectivity || return
+
+	run_cmd_grep "Time to live exceeded" ip netns exec h1 ping -t1 -c1 -W2 ${H2_N2_IP}
+	log_test $? 0 "Ping received ICMP ttl exceeded"
+}
+
+ipv4_ping_ttl_asym()
+{
+	ipv4_ping_ttl asym
+}
+
+ipv4_ping_frag()
+{
+	local ttype="$1"
+
+	[ "x$ttype" = "x" ] && ttype="$DEFAULT_TTYPE"
+
+	log_section "IPv4 ($ttype route): VRF ICMP fragmentation error route lookup ping"
+
+	setup_"$ttype"
+
+	check_connectivity || return
+
+	run_cmd_grep "Frag needed" ip netns exec h1 ping -s 1450 -Mdo -c1 -W2 ${H2_N2_IP}
+	log_test $? 0 "Ping received ICMP Frag needed"
+}
+
+ipv4_ping_frag_asym()
+{
+	ipv4_ping_frag asym
+}
+
+ipv6_ping_ttl()
+{
+	local ttype="$1"
+
+	[ "x$ttype" = "x" ] && ttype="$DEFAULT_TTYPE"
+
+	log_section "IPv6 ($ttype route): VRF ICMP ttl error route lookup ping"
+
+	setup_"$ttype"
+
+	check_connectivity6 || return
+
+	run_cmd_grep "Time exceeded: Hop limit" ip netns exec h1 "${ping6}" -t1 -c1 -W2 ${H2_N2_IP6}
+	log_test $? 0 "Ping received ICMP Hop limit"
+}
+
+ipv6_ping_ttl_asym()
+{
+	ipv6_ping_ttl asym
+}
+
+ipv6_ping_frag()
+{
+	local ttype="$1"
+
+	[ "x$ttype" = "x" ] && ttype="$DEFAULT_TTYPE"
+
+	log_section "IPv6 ($ttype route): VRF ICMP fragmentation error route lookup ping"
+
+	setup_"$ttype"
+
+	check_connectivity6 || return
+
+	run_cmd_grep "Packet too big" ip netns exec h1 "${ping6}" -s 1450 -Mdo -c1 -W2 ${H2_N2_IP6}
+	log_test $? 0 "Ping received ICMP Packet too big"
+}
+
+ipv6_ping_frag_asym()
+{
+	ipv6_ping_frag asym
+}
+
+################################################################################
+# usage
+
+usage()
+{
+        cat <<EOF
+usage: ${0##*/} OPTS
+
+	-4          Run IPv4 tests only
+	-6          Run IPv6 tests only
+        -t TEST     Run only TEST
+	-p          Pause on fail
+	-v          verbose mode (show commands and output)
+EOF
+}
+
+################################################################################
+# main
+
+# Some systems don't have a ping6 binary anymore
+command -v ping6 > /dev/null 2>&1 && ping6=$(command -v ping6) || ping6=$(command -v ping)
+
+TESTS_IPV4="ipv4_ping_ttl ipv4_traceroute ipv4_ping_frag ipv4_ping_ttl_asym ipv4_traceroute_asym"
+TESTS_IPV6="ipv6_ping_ttl ipv6_traceroute ipv6_ping_frag ipv6_ping_ttl_asym ipv6_traceroute_asym"
+
+ret=0
+nsuccess=0
+nfail=0
+
+while getopts :46t:pvh o
+do
+	case $o in
+		4) TESTS=ipv4;;
+		6) TESTS=ipv6;;
+		t) TESTS=$OPTARG;;
+		p) PAUSE_ON_FAIL=yes;;
+		v) VERBOSE=1;;
+		h) usage; exit 0;;
+		*) usage; exit 1;;
+	esac
+done
+
+#
+# show user test config
+#
+if [ -z "$TESTS" ]; then
+        TESTS="$TESTS_IPV4 $TESTS_IPV6"
+elif [ "$TESTS" = "ipv4" ]; then
+        TESTS="$TESTS_IPV4"
+elif [ "$TESTS" = "ipv6" ]; then
+        TESTS="$TESTS_IPV6"
+fi
+
+for t in $TESTS
+do
+	case $t in
+	ipv4_ping_ttl|ping)              ipv4_ping_ttl;;&
+	ipv4_ping_ttl_asym|ping)         ipv4_ping_ttl_asym;;&
+	ipv4_traceroute|traceroute)      ipv4_traceroute;;&
+	ipv4_traceroute_asym|traceroute) ipv4_traceroute_asym;;&
+	ipv4_ping_frag|ping)             ipv4_ping_frag;;&
+
+	ipv6_ping_ttl|ping)              ipv6_ping_ttl;;&
+	ipv6_ping_ttl_asym|ping)         ipv6_ping_ttl_asym;;&
+	ipv6_traceroute|traceroute)      ipv6_traceroute;;&
+	ipv6_traceroute_asym|traceroute) ipv6_traceroute_asym;;&
+	ipv6_ping_frag|ping)             ipv6_ping_frag;;&
+
+	# setup namespaces and config, but do not run any tests
+	setup_sym|setup)                 setup_sym; exit 0;;
+	setup_asym)                      setup_asym; exit 0;;
+
+	help)                       echo "Test names: $TESTS"; exit 0;;
+	esac
+done
+
+cleanup
+
+printf "\nTests passed: %3d\n" ${nsuccess}
+printf "Tests failed: %3d\n"   ${nfail}
+
+exit $ret
diff --git a/tools/testing/selftests/netfilter/.gitignore b/tools/testing/selftests/netfilter/.gitignore
new file mode 100644
index 000000000000..8448f74adfec
--- /dev/null
+++ b/tools/testing/selftests/netfilter/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+nf-queue
diff --git a/tools/testing/selftests/netfilter/nf-queue.c b/tools/testing/selftests/netfilter/nf-queue.c
index 29c73bce38fa..9e56b9d47037 100644
--- a/tools/testing/selftests/netfilter/nf-queue.c
+++ b/tools/testing/selftests/netfilter/nf-queue.c
@@ -17,9 +17,12 @@
 
 struct options {
 	bool count_packets;
+	bool gso_enabled;
 	int verbose;
 	unsigned int queue_num;
 	unsigned int timeout;
+	uint32_t verdict;
+	uint32_t delay_ms;
 };
 
 static unsigned int queue_stats[5];
@@ -27,7 +30,7 @@ static struct options opts;
 
 static void help(const char *p)
 {
-	printf("Usage: %s [-c|-v [-vv] ] [-t timeout] [-q queue_num]\n", p);
+	printf("Usage: %s [-c|-v [-vv] ] [-t timeout] [-q queue_num] [-Qdst_queue ] [ -d ms_delay ] [-G]\n", p);
 }
 
 static int parse_attr_cb(const struct nlattr *attr, void *data)
@@ -162,7 +165,7 @@ nfq_build_cfg_params(char *buf, uint8_t mode, int range, int queue_num)
 }
 
 static struct nlmsghdr *
-nfq_build_verdict(char *buf, int id, int queue_num, int verd)
+nfq_build_verdict(char *buf, int id, int queue_num, uint32_t verd)
 {
 	struct nfqnl_msg_verdict_hdr vh = {
 		.verdict = htonl(verd),
@@ -189,9 +192,6 @@ static void print_stats(void)
 	unsigned int last, total;
 	int i;
 
-	if (!opts.count_packets)
-		return;
-
 	total = 0;
 	last = queue_stats[0];
 
@@ -234,7 +234,8 @@ struct mnl_socket *open_queue(void)
 
 	nlh = nfq_build_cfg_params(buf, NFQNL_COPY_PACKET, 0xFFFF, queue_num);
 
-	flags = NFQA_CFG_F_GSO | NFQA_CFG_F_UID_GID;
+	flags = opts.gso_enabled ? NFQA_CFG_F_GSO : 0;
+	flags |= NFQA_CFG_F_UID_GID;
 	mnl_attr_put_u32(nlh, NFQA_CFG_FLAGS, htonl(flags));
 	mnl_attr_put_u32(nlh, NFQA_CFG_MASK, htonl(flags));
 
@@ -255,6 +256,17 @@ struct mnl_socket *open_queue(void)
 	return nl;
 }
 
+static void sleep_ms(uint32_t delay)
+{
+	struct timespec ts = { .tv_sec = delay / 1000 };
+
+	delay %= 1000;
+
+	ts.tv_nsec = delay * 1000llu * 1000llu;
+
+	nanosleep(&ts, NULL);
+}
+
 static int mainloop(void)
 {
 	unsigned int buflen = 64 * 1024 + MNL_SOCKET_BUFFER_SIZE;
@@ -278,7 +290,7 @@ static int mainloop(void)
 
 		ret = mnl_socket_recvfrom(nl, buf, buflen);
 		if (ret == -1) {
-			if (errno == ENOBUFS)
+			if (errno == ENOBUFS || errno == EINTR)
 				continue;
 
 			if (errno == EAGAIN) {
@@ -298,7 +310,10 @@ static int mainloop(void)
 		}
 
 		id = ret - MNL_CB_OK;
-		nlh = nfq_build_verdict(buf, id, opts.queue_num, NF_ACCEPT);
+		if (opts.delay_ms)
+			sleep_ms(opts.delay_ms);
+
+		nlh = nfq_build_verdict(buf, id, opts.queue_num, opts.verdict);
 		if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) < 0) {
 			perror("mnl_socket_sendto");
 			exit(EXIT_FAILURE);
@@ -314,7 +329,7 @@ static void parse_opts(int argc, char **argv)
 {
 	int c;
 
-	while ((c = getopt(argc, argv, "chvt:q:")) != -1) {
+	while ((c = getopt(argc, argv, "chvt:q:Q:d:G")) != -1) {
 		switch (c) {
 		case 'c':
 			opts.count_packets = true;
@@ -328,20 +343,48 @@ static void parse_opts(int argc, char **argv)
 			if (opts.queue_num > 0xffff)
 				opts.queue_num = 0;
 			break;
+		case 'Q':
+			opts.verdict = atoi(optarg);
+			if (opts.verdict > 0xffff) {
+				fprintf(stderr, "Expected destination queue number\n");
+				exit(1);
+			}
+
+			opts.verdict <<= 16;
+			opts.verdict |= NF_QUEUE;
+			break;
+		case 'd':
+			opts.delay_ms = atoi(optarg);
+			if (opts.delay_ms == 0) {
+				fprintf(stderr, "Expected nonzero delay (in milliseconds)\n");
+				exit(1);
+			}
+			break;
 		case 't':
 			opts.timeout = atoi(optarg);
 			break;
+		case 'G':
+			opts.gso_enabled = false;
+			break;
 		case 'v':
 			opts.verbose++;
 			break;
 		}
 	}
+
+	if (opts.verdict != NF_ACCEPT && (opts.verdict >> 16 == opts.queue_num)) {
+		fprintf(stderr, "Cannot use same destination and source queue\n");
+		exit(1);
+	}
 }
 
 int main(int argc, char *argv[])
 {
 	int ret;
 
+	opts.verdict = NF_ACCEPT;
+	opts.gso_enabled = true;
+
 	parse_opts(argc, argv);
 
 	ret = mainloop();
diff --git a/tools/testing/selftests/netfilter/nft_flowtable.sh b/tools/testing/selftests/netfilter/nft_flowtable.sh
index a47d1d832210..431296c0f91c 100755
--- a/tools/testing/selftests/netfilter/nft_flowtable.sh
+++ b/tools/testing/selftests/netfilter/nft_flowtable.sh
@@ -11,7 +11,7 @@
 # result in fragmentation and/or PMTU discovery.
 #
 # You can check with different Orgininator/Link/Responder MTU eg:
-# sh nft_flowtable.sh -o1000 -l500 -r100
+# nft_flowtable.sh -o8000 -l1500 -r2000
 #
 
 
@@ -27,8 +27,7 @@ ns2out=""
 log_netns=$(sysctl -n net.netfilter.nf_log_all_netns)
 
 checktool (){
-	$1 > /dev/null 2>&1
-	if [ $? -ne 0 ];then
+	if ! $1 > /dev/null 2>&1; then
 		echo "SKIP: Could not $2"
 		exit $ksft_skip
 	fi
@@ -87,19 +86,36 @@ omtu=9000
 lmtu=1500
 rmtu=2000
 
+usage(){
+	echo "nft_flowtable.sh [OPTIONS]"
+	echo
+	echo "MTU options"
+	echo "   -o originator"
+	echo "   -l link"
+	echo "   -r responder"
+	exit 1
+}
+
 while getopts "o:l:r:" o
 do
 	case $o in
 		o) omtu=$OPTARG;;
 		l) lmtu=$OPTARG;;
 		r) rmtu=$OPTARG;;
+		*) usage;;
 	esac
 done
 
-ip -net nsr1 link set veth0 mtu $omtu
+if ! ip -net nsr1 link set veth0 mtu $omtu; then
+	exit 1
+fi
+
 ip -net ns1 link set eth0 mtu $omtu
 
-ip -net nsr2 link set veth1 mtu $rmtu
+if ! ip -net nsr2 link set veth1 mtu $rmtu; then
+	exit 1
+fi
+
 ip -net ns2 link set eth0 mtu $rmtu
 
 # transfer-net between nsr1 and nsr2.
@@ -120,7 +136,10 @@ for i in 1 2; do
   ip -net ns$i route add default via 10.0.$i.1
   ip -net ns$i addr add dead:$i::99/64 dev eth0
   ip -net ns$i route add default via dead:$i::1
-  ip netns exec ns$i sysctl net.ipv4.tcp_no_metrics_save=1 > /dev/null
+  if ! ip netns exec ns$i sysctl net.ipv4.tcp_no_metrics_save=1 > /dev/null; then
+	echo "ERROR: Check Originator/Responder values (problem during address addition)"
+	exit 1
+  fi
 
   # don't set ip DF bit for first two tests
   ip netns exec ns$i sysctl net.ipv4.ip_no_pmtu_disc=1 > /dev/null
@@ -178,15 +197,13 @@ if [ $? -ne 0 ]; then
 fi
 
 # test basic connectivity
-ip netns exec ns1 ping -c 1 -q 10.0.2.99 > /dev/null
-if [ $? -ne 0 ];then
+if ! ip netns exec ns1 ping -c 1 -q 10.0.2.99 > /dev/null; then
   echo "ERROR: ns1 cannot reach ns2" 1>&2
   bash
   exit 1
 fi
 
-ip netns exec ns2 ping -c 1 -q 10.0.1.99 > /dev/null
-if [ $? -ne 0 ];then
+if ! ip netns exec ns2 ping -c 1 -q 10.0.1.99 > /dev/null; then
   echo "ERROR: ns2 cannot reach ns1" 1>&2
   exit 1
 fi
@@ -203,7 +220,6 @@ ns2out=$(mktemp)
 make_file()
 {
 	name=$1
-	who=$2
 
 	SIZE=$((RANDOM % (1024 * 8)))
 	TSIZE=$((SIZE * 1024))
@@ -222,8 +238,7 @@ check_transfer()
 	out=$2
 	what=$3
 
-	cmp "$in" "$out" > /dev/null 2>&1
-	if [ $? -ne 0 ] ;then
+	if ! cmp "$in" "$out" > /dev/null 2>&1; then
 		echo "FAIL: file mismatch for $what" 1>&2
 		ls -l "$in"
 		ls -l "$out"
@@ -260,13 +275,11 @@ test_tcp_forwarding_ip()
 
 	wait
 
-	check_transfer "$ns1in" "$ns2out" "ns1 -> ns2"
-	if [ $? -ne 0 ];then
+	if ! check_transfer "$ns1in" "$ns2out" "ns1 -> ns2"; then
 		lret=1
 	fi
 
-	check_transfer "$ns2in" "$ns1out" "ns1 <- ns2"
-	if [ $? -ne 0 ];then
+	if ! check_transfer "$ns2in" "$ns1out" "ns1 <- ns2"; then
 		lret=1
 	fi
 
@@ -295,13 +308,12 @@ test_tcp_forwarding_nat()
 	return $lret
 }
 
-make_file "$ns1in" "ns1"
-make_file "$ns2in" "ns2"
+make_file "$ns1in"
+make_file "$ns2in"
 
 # First test:
 # No PMTU discovery, nsr1 is expected to fragment packets from ns1 to ns2 as needed.
-test_tcp_forwarding ns1 ns2
-if [ $? -eq 0 ] ;then
+if test_tcp_forwarding ns1 ns2; then
 	echo "PASS: flow offloaded for ns1/ns2"
 else
 	echo "FAIL: flow offload for ns1/ns2:" 1>&2
@@ -332,9 +344,7 @@ table ip nat {
 }
 EOF
 
-test_tcp_forwarding_nat ns1 ns2
-
-if [ $? -eq 0 ] ;then
+if test_tcp_forwarding_nat ns1 ns2; then
 	echo "PASS: flow offloaded for ns1/ns2 with NAT"
 else
 	echo "FAIL: flow offload for ns1/ns2 with NAT" 1>&2
@@ -346,8 +356,7 @@ fi
 # Same as second test, but with PMTU discovery enabled.
 handle=$(ip netns exec nsr1 nft -a list table inet filter | grep something-to-grep-for | cut -d \# -f 2)
 
-ip netns exec nsr1 nft delete rule inet filter forward $handle
-if [ $? -ne 0 ] ;then
+if ! ip netns exec nsr1 nft delete rule inet filter forward $handle; then
 	echo "FAIL: Could not delete large-packet accept rule"
 	exit 1
 fi
@@ -355,8 +364,7 @@ fi
 ip netns exec ns1 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
 ip netns exec ns2 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
 
-test_tcp_forwarding_nat ns1 ns2
-if [ $? -eq 0 ] ;then
+if test_tcp_forwarding_nat ns1 ns2; then
 	echo "PASS: flow offloaded for ns1/ns2 with NAT and pmtu discovery"
 else
 	echo "FAIL: flow offload for ns1/ns2 with NAT and pmtu discovery" 1>&2
@@ -402,8 +410,7 @@ ip -net ns2 route del 192.168.10.1 via 10.0.2.1
 ip -net ns2 route add default via 10.0.2.1
 ip -net ns2 route add default via dead:2::1
 
-test_tcp_forwarding ns1 ns2
-if [ $? -eq 0 ] ;then
+if test_tcp_forwarding ns1 ns2; then
 	echo "PASS: ipsec tunnel mode for ns1/ns2"
 else
 	echo "FAIL: ipsec tunnel mode for ns1/ns2"
diff --git a/tools/testing/selftests/netfilter/nft_meta.sh b/tools/testing/selftests/netfilter/nft_meta.sh
index d250b84dd5bc..087f0e6e71ce 100755
--- a/tools/testing/selftests/netfilter/nft_meta.sh
+++ b/tools/testing/selftests/netfilter/nft_meta.sh
@@ -7,8 +7,7 @@ ksft_skip=4
 sfx=$(mktemp -u "XXXXXXXX")
 ns0="ns0-$sfx"
 
-nft --version > /dev/null 2>&1
-if [ $? -ne 0 ];then
+if ! nft --version > /dev/null 2>&1; then
 	echo "SKIP: Could not run test without nft tool"
 	exit $ksft_skip
 fi
@@ -24,6 +23,8 @@ ip -net "$ns0" addr add 127.0.0.1 dev lo
 
 trap cleanup EXIT
 
+currentyear=$(date +%G)
+lastyear=$((currentyear-1))
 ip netns exec "$ns0" nft -f /dev/stdin <<EOF
 table inet filter {
 	counter iifcount {}
@@ -33,6 +34,9 @@ table inet filter {
 	counter infproto4count {}
 	counter il4protocounter {}
 	counter imarkcounter {}
+	counter icpu0counter {}
+	counter ilastyearcounter {}
+	counter icurrentyearcounter {}
 
 	counter oifcount {}
 	counter oifnamecount {}
@@ -54,6 +58,9 @@ table inet filter {
 		meta nfproto ipv4 counter name "infproto4count"
 		meta l4proto icmp counter name "il4protocounter"
 		meta mark 42 counter name "imarkcounter"
+		meta cpu 0 counter name "icpu0counter"
+		meta time "$lastyear-01-01" - "$lastyear-12-31" counter name ilastyearcounter
+		meta time "$currentyear-01-01" - "$currentyear-12-31" counter name icurrentyearcounter
 	}
 
 	chain output {
@@ -84,11 +91,10 @@ check_one_counter()
 	local want="packets $2"
 	local verbose="$3"
 
-	cnt=$(ip netns exec "$ns0" nft list counter inet filter $cname | grep -q "$want")
-	if [ $? -ne 0 ];then
+	if ! ip netns exec "$ns0" nft list counter inet filter $cname | grep -q "$want"; then
 		echo "FAIL: $cname, want \"$want\", got"
 		ret=1
-		ip netns exec "$ns0" nft list counter inet filter $counter
+		ip netns exec "$ns0" nft list counter inet filter $cname
 	fi
 }
 
@@ -100,8 +106,7 @@ check_lo_counters()
 
 	for counter in iifcount iifnamecount iifgroupcount iiftypecount infproto4count \
 		       oifcount oifnamecount oifgroupcount oiftypecount onfproto4count \
-		       il4protocounter \
-		       ol4protocounter \
+		       il4protocounter icurrentyearcounter ol4protocounter \
 	     ; do
 		check_one_counter "$counter" "$want" "$verbose"
 	done
@@ -116,9 +121,22 @@ check_one_counter oskuidcounter "1" true
 check_one_counter oskgidcounter "1" true
 check_one_counter imarkcounter "1" true
 check_one_counter omarkcounter "1" true
+check_one_counter ilastyearcounter "0" true
 
 if [ $ret -eq 0 ];then
 	echo "OK: nftables meta iif/oif counters at expected values"
+else
+	exit $ret
+fi
+
+#First CPU execution and counter
+taskset -p 01 $$ > /dev/null
+ip netns exec "$ns0" nft reset counters > /dev/null
+ip netns exec "$ns0" ping -q -c 1 127.0.0.1 > /dev/null
+check_one_counter icpu0counter "2" true
+
+if [ $ret -eq 0 ];then
+	echo "OK: nftables meta cpu counter at expected values"
 fi
 
 exit $ret
diff --git a/tools/testing/selftests/netfilter/nft_queue.sh b/tools/testing/selftests/netfilter/nft_queue.sh
index 6898448b4266..3d202b90b33d 100755
--- a/tools/testing/selftests/netfilter/nft_queue.sh
+++ b/tools/testing/selftests/netfilter/nft_queue.sh
@@ -12,6 +12,7 @@ sfx=$(mktemp -u "XXXXXXXX")
 ns1="ns1-$sfx"
 ns2="ns2-$sfx"
 nsrouter="nsrouter-$sfx"
+timeout=4
 
 cleanup()
 {
@@ -20,6 +21,7 @@ cleanup()
 	ip netns del ${nsrouter}
 	rm -f "$TMPFILE0"
 	rm -f "$TMPFILE1"
+	rm -f "$TMPFILE2" "$TMPFILE3"
 }
 
 nft --version > /dev/null 2>&1
@@ -42,6 +44,8 @@ fi
 
 TMPFILE0=$(mktemp)
 TMPFILE1=$(mktemp)
+TMPFILE2=$(mktemp)
+TMPFILE3=$(mktemp)
 trap cleanup EXIT
 
 ip netns add ${ns1}
@@ -83,7 +87,7 @@ load_ruleset() {
 	local name=$1
 	local prio=$2
 
-ip netns exec ${nsrouter} nft -f - <<EOF
+ip netns exec ${nsrouter} nft -f /dev/stdin <<EOF
 table inet $name {
 	chain nfq {
 		ip protocol icmp queue bypass
@@ -118,7 +122,7 @@ EOF
 load_counter_ruleset() {
 	local prio=$1
 
-ip netns exec ${nsrouter} nft -f - <<EOF
+ip netns exec ${nsrouter} nft -f /dev/stdin <<EOF
 table inet countrules {
 	chain pre {
 		type filter hook prerouting priority $prio; policy accept;
@@ -175,7 +179,7 @@ test_ping_router() {
 test_queue_blackhole() {
 	local proto=$1
 
-ip netns exec ${nsrouter} nft -f - <<EOF
+ip netns exec ${nsrouter} nft -f /dev/stdin <<EOF
 table $proto blackh {
 	chain forward {
 	type filter hook forward priority 0; policy accept;
@@ -184,10 +188,10 @@ table $proto blackh {
 }
 EOF
 	if [ $proto = "ip" ] ;then
-		ip netns exec ${ns1} ping -c 1 -q 10.0.2.99 > /dev/null
+		ip netns exec ${ns1} ping -W 2 -c 1 -q 10.0.2.99 > /dev/null
 		lret=$?
 	elif [ $proto = "ip6" ]; then
-		ip netns exec ${ns1} ping -c 1 -q dead:2::99 > /dev/null
+		ip netns exec ${ns1} ping -W 2 -c 1 -q dead:2::99 > /dev/null
 		lret=$?
 	else
 		lret=111
@@ -214,8 +218,8 @@ test_queue()
 	local last=""
 
 	# spawn nf-queue listeners
-	ip netns exec ${nsrouter} ./nf-queue -c -q 0 -t 3 > "$TMPFILE0" &
-	ip netns exec ${nsrouter} ./nf-queue -c -q 1 -t 3 > "$TMPFILE1" &
+	ip netns exec ${nsrouter} ./nf-queue -c -q 0 -t $timeout > "$TMPFILE0" &
+	ip netns exec ${nsrouter} ./nf-queue -c -q 1 -t $timeout > "$TMPFILE1" &
 	sleep 1
 	test_ping
 	ret=$?
@@ -250,11 +254,11 @@ test_queue()
 
 test_tcp_forward()
 {
-	ip netns exec ${nsrouter} ./nf-queue -q 2 -t 10 &
+	ip netns exec ${nsrouter} ./nf-queue -q 2 -t $timeout &
 	local nfqpid=$!
 
 	tmpfile=$(mktemp) || exit 1
-	dd conv=sparse status=none if=/dev/zero bs=1M count=100 of=$tmpfile
+	dd conv=sparse status=none if=/dev/zero bs=1M count=200 of=$tmpfile
 	ip netns exec ${ns2} nc -w 5 -l -p 12345 <"$tmpfile" >/dev/null &
 	local rpid=$!
 
@@ -270,15 +274,13 @@ test_tcp_forward()
 
 test_tcp_localhost()
 {
-	tc -net "${nsrouter}" qdisc add dev lo root netem loss random 1%
-
 	tmpfile=$(mktemp) || exit 1
 
-	dd conv=sparse status=none if=/dev/zero bs=1M count=900 of=$tmpfile
+	dd conv=sparse status=none if=/dev/zero bs=1M count=200 of=$tmpfile
 	ip netns exec ${nsrouter} nc -w 5 -l -p 12345 <"$tmpfile" >/dev/null &
 	local rpid=$!
 
-	ip netns exec ${nsrouter} ./nf-queue -q 3 -t 30 &
+	ip netns exec ${nsrouter} ./nf-queue -q 3 -t $timeout &
 	local nfqpid=$!
 
 	sleep 1
@@ -287,6 +289,47 @@ test_tcp_localhost()
 
 	wait $rpid
 	[ $? -eq 0 ] && echo "PASS: tcp via loopback"
+	wait 2>/dev/null
+}
+
+test_tcp_localhost_requeue()
+{
+ip netns exec ${nsrouter} nft -f /dev/stdin <<EOF
+flush ruleset
+table inet filter {
+	chain output {
+		type filter hook output priority 0; policy accept;
+		tcp dport 12345 limit rate 1/second burst 1 packets counter queue num 0
+	}
+	chain post {
+		type filter hook postrouting priority 0; policy accept;
+		tcp dport 12345 limit rate 1/second burst 1 packets counter queue num 0
+	}
+}
+EOF
+	tmpfile=$(mktemp) || exit 1
+	dd conv=sparse status=none if=/dev/zero bs=1M count=200 of=$tmpfile
+	ip netns exec ${nsrouter} nc -w 5 -l -p 12345 <"$tmpfile" >/dev/null &
+	local rpid=$!
+
+	ip netns exec ${nsrouter} ./nf-queue -c -q 1 -t $timeout > "$TMPFILE2" &
+
+	# nfqueue 1 will be called via output hook.  But this time,
+        # re-queue the packet to nfqueue program on queue 2.
+	ip netns exec ${nsrouter} ./nf-queue -G -d 150 -c -q 0 -Q 1 -t $timeout > "$TMPFILE3" &
+
+	sleep 1
+	ip netns exec ${nsrouter} nc -w 5 127.0.0.1 12345 <"$tmpfile" > /dev/null
+	rm -f "$tmpfile"
+
+	wait
+
+	if ! diff -u "$TMPFILE2" "$TMPFILE3" ; then
+		echo "FAIL: lost packets during requeue?!" 1>&2
+		return
+	fi
+
+	echo "PASS: tcp via loopback and re-queueing"
 }
 
 ip netns exec ${nsrouter} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
@@ -328,5 +371,6 @@ test_queue 20
 
 test_tcp_forward
 test_tcp_localhost
+test_tcp_localhost_requeue
 
 exit $ret
diff --git a/tools/testing/selftests/pidfd/pidfd.h b/tools/testing/selftests/pidfd/pidfd.h
index a2c80914e3dc..01f8d3c0cf2c 100644
--- a/tools/testing/selftests/pidfd/pidfd.h
+++ b/tools/testing/selftests/pidfd/pidfd.h
@@ -46,6 +46,10 @@
 #define __NR_pidfd_getfd -1
 #endif
 
+#ifndef PIDFD_NONBLOCK
+#define PIDFD_NONBLOCK O_NONBLOCK
+#endif
+
 /*
  * The kernel reserves 300 pids via RESERVED_PIDS in kernel/pid.c
  * That means, when it wraps around any pid < 300 will be skipped.
diff --git a/tools/testing/selftests/pidfd/pidfd_setns_test.c b/tools/testing/selftests/pidfd/pidfd_setns_test.c
index 7dca1aa4672d..1f085b922c6e 100644
--- a/tools/testing/selftests/pidfd/pidfd_setns_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_setns_test.c
@@ -75,7 +75,7 @@ static int sys_waitid(int which, pid_t pid, int options)
 
 pid_t create_child(int *pidfd, unsigned flags)
 {
-	struct clone_args args = {
+	struct __clone_args args = {
 		.flags		= CLONE_PIDFD | flags,
 		.exit_signal	= SIGCHLD,
 		.pidfd		= ptr_to_u64(pidfd),
diff --git a/tools/testing/selftests/pidfd/pidfd_wait.c b/tools/testing/selftests/pidfd/pidfd_wait.c
index 7079f8eef792..be2943f072f6 100644
--- a/tools/testing/selftests/pidfd/pidfd_wait.c
+++ b/tools/testing/selftests/pidfd/pidfd_wait.c
@@ -17,10 +17,15 @@
 #include <unistd.h>
 
 #include "pidfd.h"
-#include "../kselftest.h"
+#include "../kselftest_harness.h"
 
 #define ptr_to_u64(ptr) ((__u64)((uintptr_t)(ptr)))
 
+/* Attempt to de-conflict with the selftests tree. */
+#ifndef SKIP
+#define SKIP(s, ...)	XFAIL(s, ##__VA_ARGS__)
+#endif
+
 static pid_t sys_clone3(struct clone_args *args)
 {
 	return syscall(__NR_clone3, args, sizeof(struct clone_args));
@@ -32,9 +37,8 @@ static int sys_waitid(int which, pid_t pid, siginfo_t *info, int options,
 	return syscall(__NR_waitid, which, pid, info, options, ru);
 }
 
-static int test_pidfd_wait_simple(void)
+TEST(wait_simple)
 {
-	const char *test_name = "pidfd wait simple";
 	int pidfd = -1, status = 0;
 	pid_t parent_tid = -1;
 	struct clone_args args = {
@@ -50,76 +54,40 @@ static int test_pidfd_wait_simple(void)
 	};
 
 	pidfd = open("/proc/self", O_DIRECTORY | O_RDONLY | O_CLOEXEC);
-	if (pidfd < 0)
-		ksft_exit_fail_msg("%s test: failed to open /proc/self %s\n",
-				   test_name, strerror(errno));
+	ASSERT_GE(pidfd, 0);
 
 	pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL);
-	if (pid == 0)
-		ksft_exit_fail_msg(
-			"%s test: succeeded to wait on invalid pidfd %s\n",
-			test_name, strerror(errno));
-	close(pidfd);
+	ASSERT_NE(pid, 0);
+	EXPECT_EQ(close(pidfd), 0);
 	pidfd = -1;
 
 	pidfd = open("/dev/null", O_RDONLY | O_CLOEXEC);
-	if (pidfd == 0)
-		ksft_exit_fail_msg("%s test: failed to open /dev/null %s\n",
-				   test_name, strerror(errno));
+	ASSERT_GE(pidfd, 0);
 
 	pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL);
-	if (pid == 0)
-		ksft_exit_fail_msg(
-			"%s test: succeeded to wait on invalid pidfd %s\n",
-			test_name, strerror(errno));
-	close(pidfd);
+	ASSERT_NE(pid, 0);
+	EXPECT_EQ(close(pidfd), 0);
 	pidfd = -1;
 
 	pid = sys_clone3(&args);
-	if (pid < 0)
-		ksft_exit_fail_msg("%s test: failed to create new process %s\n",
-				   test_name, strerror(errno));
+	ASSERT_GE(pid, 0);
 
 	if (pid == 0)
 		exit(EXIT_SUCCESS);
 
 	pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL);
-	if (pid < 0)
-		ksft_exit_fail_msg(
-			"%s test: failed to wait on process with pid %d and pidfd %d: %s\n",
-			test_name, parent_tid, pidfd, strerror(errno));
-
-	if (!WIFEXITED(info.si_status) || WEXITSTATUS(info.si_status))
-		ksft_exit_fail_msg(
-			"%s test: unexpected status received after waiting on process with pid %d and pidfd %d: %s\n",
-			test_name, parent_tid, pidfd, strerror(errno));
-	close(pidfd);
-
-	if (info.si_signo != SIGCHLD)
-		ksft_exit_fail_msg(
-			"%s test: unexpected si_signo value %d received after waiting on process with pid %d and pidfd %d: %s\n",
-			test_name, info.si_signo, parent_tid, pidfd,
-			strerror(errno));
-
-	if (info.si_code != CLD_EXITED)
-		ksft_exit_fail_msg(
-			"%s test: unexpected si_code value %d received after waiting on process with pid %d and pidfd %d: %s\n",
-			test_name, info.si_code, parent_tid, pidfd,
-			strerror(errno));
-
-	if (info.si_pid != parent_tid)
-		ksft_exit_fail_msg(
-			"%s test: unexpected si_pid value %d received after waiting on process with pid %d and pidfd %d: %s\n",
-			test_name, info.si_pid, parent_tid, pidfd,
-			strerror(errno));
-
-	ksft_test_result_pass("%s test: Passed\n", test_name);
-	return 0;
+	ASSERT_GE(pid, 0);
+	ASSERT_EQ(WIFEXITED(info.si_status), true);
+	ASSERT_EQ(WEXITSTATUS(info.si_status), 0);
+	EXPECT_EQ(close(pidfd), 0);
+
+	ASSERT_EQ(info.si_signo, SIGCHLD);
+	ASSERT_EQ(info.si_code, CLD_EXITED);
+	ASSERT_EQ(info.si_pid, parent_tid);
 }
 
-static int test_pidfd_wait_states(void)
+TEST(wait_states)
 {
-	const char *test_name = "pidfd wait states";
 	int pidfd = -1, status = 0;
 	pid_t parent_tid = -1;
 	struct clone_args args = {
@@ -135,9 +103,7 @@ static int test_pidfd_wait_states(void)
 	};
 
 	pid = sys_clone3(&args);
-	if (pid < 0)
-		ksft_exit_fail_msg("%s test: failed to create new process %s\n",
-				   test_name, strerror(errno));
+	ASSERT_GE(pid, 0);
 
 	if (pid == 0) {
 		kill(getpid(), SIGSTOP);
@@ -145,127 +111,115 @@ static int test_pidfd_wait_states(void)
 		exit(EXIT_SUCCESS);
 	}
 
-	ret = sys_waitid(P_PIDFD, pidfd, &info, WSTOPPED, NULL);
-	if (ret < 0)
-		ksft_exit_fail_msg(
-			"%s test: failed to wait on WSTOPPED process with pid %d and pidfd %d: %s\n",
-			test_name, parent_tid, pidfd, strerror(errno));
-
-	if (info.si_signo != SIGCHLD)
-		ksft_exit_fail_msg(
-			"%s test: unexpected si_signo value %d received after waiting on process with pid %d and pidfd %d: %s\n",
-			test_name, info.si_signo, parent_tid, pidfd,
-			strerror(errno));
-
-	if (info.si_code != CLD_STOPPED)
-		ksft_exit_fail_msg(
-			"%s test: unexpected si_code value %d received after waiting on process with pid %d and pidfd %d: %s\n",
-			test_name, info.si_code, parent_tid, pidfd,
-			strerror(errno));
-
-	if (info.si_pid != parent_tid)
-		ksft_exit_fail_msg(
-			"%s test: unexpected si_pid value %d received after waiting on process with pid %d and pidfd %d: %s\n",
-			test_name, info.si_pid, parent_tid, pidfd,
-			strerror(errno));
-
-	ret = sys_pidfd_send_signal(pidfd, SIGCONT, NULL, 0);
-	if (ret < 0)
-		ksft_exit_fail_msg(
-			"%s test: failed to send signal to process with pid %d and pidfd %d: %s\n",
-			test_name, parent_tid, pidfd, strerror(errno));
-
-	ret = sys_waitid(P_PIDFD, pidfd, &info, WCONTINUED, NULL);
-	if (ret < 0)
-		ksft_exit_fail_msg(
-			"%s test: failed to wait WCONTINUED on process with pid %d and pidfd %d: %s\n",
-			test_name, parent_tid, pidfd, strerror(errno));
-
-	if (info.si_signo != SIGCHLD)
-		ksft_exit_fail_msg(
-			"%s test: unexpected si_signo value %d received after waiting on process with pid %d and pidfd %d: %s\n",
-			test_name, info.si_signo, parent_tid, pidfd,
-			strerror(errno));
-
-	if (info.si_code != CLD_CONTINUED)
-		ksft_exit_fail_msg(
-			"%s test: unexpected si_code value %d received after waiting on process with pid %d and pidfd %d: %s\n",
-			test_name, info.si_code, parent_tid, pidfd,
-			strerror(errno));
-
-	if (info.si_pid != parent_tid)
-		ksft_exit_fail_msg(
-			"%s test: unexpected si_pid value %d received after waiting on process with pid %d and pidfd %d: %s\n",
-			test_name, info.si_pid, parent_tid, pidfd,
-			strerror(errno));
-
-	ret = sys_waitid(P_PIDFD, pidfd, &info, WUNTRACED, NULL);
-	if (ret < 0)
-		ksft_exit_fail_msg(
-			"%s test: failed to wait on WUNTRACED process with pid %d and pidfd %d: %s\n",
-			test_name, parent_tid, pidfd, strerror(errno));
-
-	if (info.si_signo != SIGCHLD)
-		ksft_exit_fail_msg(
-			"%s test: unexpected si_signo value %d received after waiting on process with pid %d and pidfd %d: %s\n",
-			test_name, info.si_signo, parent_tid, pidfd,
-			strerror(errno));
-
-	if (info.si_code != CLD_STOPPED)
-		ksft_exit_fail_msg(
-			"%s test: unexpected si_code value %d received after waiting on process with pid %d and pidfd %d: %s\n",
-			test_name, info.si_code, parent_tid, pidfd,
-			strerror(errno));
-
-	if (info.si_pid != parent_tid)
-		ksft_exit_fail_msg(
-			"%s test: unexpected si_pid value %d received after waiting on process with pid %d and pidfd %d: %s\n",
-			test_name, info.si_pid, parent_tid, pidfd,
-			strerror(errno));
-
-	ret = sys_pidfd_send_signal(pidfd, SIGKILL, NULL, 0);
-	if (ret < 0)
-		ksft_exit_fail_msg(
-			"%s test: failed to send SIGKILL to process with pid %d and pidfd %d: %s\n",
-			test_name, parent_tid, pidfd, strerror(errno));
+	ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WSTOPPED, NULL), 0);
+	ASSERT_EQ(info.si_signo, SIGCHLD);
+	ASSERT_EQ(info.si_code, CLD_STOPPED);
+	ASSERT_EQ(info.si_pid, parent_tid);
 
-	ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL);
-	if (ret < 0)
-		ksft_exit_fail_msg(
-			"%s test: failed to wait on WEXITED process with pid %d and pidfd %d: %s\n",
-			test_name, parent_tid, pidfd, strerror(errno));
-
-	if (info.si_signo != SIGCHLD)
-		ksft_exit_fail_msg(
-			"%s test: unexpected si_signo value %d received after waiting on process with pid %d and pidfd %d: %s\n",
-			test_name, info.si_signo, parent_tid, pidfd,
-			strerror(errno));
-
-	if (info.si_code != CLD_KILLED)
-		ksft_exit_fail_msg(
-			"%s test: unexpected si_code value %d received after waiting on process with pid %d and pidfd %d: %s\n",
-			test_name, info.si_code, parent_tid, pidfd,
-			strerror(errno));
-
-	if (info.si_pid != parent_tid)
-		ksft_exit_fail_msg(
-			"%s test: unexpected si_pid value %d received after waiting on process with pid %d and pidfd %d: %s\n",
-			test_name, info.si_pid, parent_tid, pidfd,
-			strerror(errno));
-
-	close(pidfd);
-
-	ksft_test_result_pass("%s test: Passed\n", test_name);
-	return 0;
+	ASSERT_EQ(sys_pidfd_send_signal(pidfd, SIGCONT, NULL, 0), 0);
+
+	ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WCONTINUED, NULL), 0);
+	ASSERT_EQ(info.si_signo, SIGCHLD);
+	ASSERT_EQ(info.si_code, CLD_CONTINUED);
+	ASSERT_EQ(info.si_pid, parent_tid);
+
+	ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WUNTRACED, NULL), 0);
+	ASSERT_EQ(info.si_signo, SIGCHLD);
+	ASSERT_EQ(info.si_code, CLD_STOPPED);
+	ASSERT_EQ(info.si_pid, parent_tid);
+
+	ASSERT_EQ(sys_pidfd_send_signal(pidfd, SIGKILL, NULL, 0), 0);
+
+	ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL), 0);
+	ASSERT_EQ(info.si_signo, SIGCHLD);
+	ASSERT_EQ(info.si_code, CLD_KILLED);
+	ASSERT_EQ(info.si_pid, parent_tid);
+
+	EXPECT_EQ(close(pidfd), 0);
 }
 
-int main(int argc, char **argv)
+TEST(wait_nonblock)
 {
-	ksft_print_header();
-	ksft_set_plan(2);
+	int pidfd, status = 0;
+	unsigned int flags = 0;
+	pid_t parent_tid = -1;
+	struct clone_args args = {
+		.parent_tid = ptr_to_u64(&parent_tid),
+		.flags = CLONE_PARENT_SETTID,
+		.exit_signal = SIGCHLD,
+	};
+	int ret;
+	pid_t pid;
+	siginfo_t info = {
+		.si_signo = 0,
+	};
+
+	/*
+	 * Callers need to see ECHILD with non-blocking pidfds when no child
+	 * processes exists.
+	 */
+	pidfd = sys_pidfd_open(getpid(), PIDFD_NONBLOCK);
+	EXPECT_GE(pidfd, 0) {
+		/* pidfd_open() doesn't support PIDFD_NONBLOCK. */
+		ASSERT_EQ(errno, EINVAL);
+		SKIP(return, "Skipping PIDFD_NONBLOCK test");
+	}
+
+	ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL);
+	ASSERT_LT(ret, 0);
+	ASSERT_EQ(errno, ECHILD);
+	EXPECT_EQ(close(pidfd), 0);
+
+	pid = sys_clone3(&args);
+	ASSERT_GE(pid, 0);
+
+	if (pid == 0) {
+		kill(getpid(), SIGSTOP);
+		exit(EXIT_SUCCESS);
+	}
 
-	test_pidfd_wait_simple();
-	test_pidfd_wait_states();
+	pidfd = sys_pidfd_open(pid, PIDFD_NONBLOCK);
+	EXPECT_GE(pidfd, 0) {
+		/* pidfd_open() doesn't support PIDFD_NONBLOCK. */
+		ASSERT_EQ(errno, EINVAL);
+		SKIP(return, "Skipping PIDFD_NONBLOCK test");
+	}
+
+	flags = fcntl(pidfd, F_GETFL, 0);
+	ASSERT_GT(flags, 0);
+	ASSERT_GT((flags & O_NONBLOCK), 0);
+
+	/*
+	 * Callers need to see EAGAIN/EWOULDBLOCK with non-blocking pidfd when
+	 * child processes exist but none have exited.
+	 */
+	ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL);
+	ASSERT_LT(ret, 0);
+	ASSERT_EQ(errno, EAGAIN);
+
+	/*
+	 * Callers need to continue seeing 0 with non-blocking pidfd and
+	 * WNOHANG raised explicitly when child processes exist but none have
+	 * exited.
+	 */
+	ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED | WNOHANG, NULL);
+	ASSERT_EQ(ret, 0);
 
-	return ksft_exit_pass();
+	ASSERT_EQ(fcntl(pidfd, F_SETFL, (flags & ~O_NONBLOCK)), 0);
+
+	ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WSTOPPED, NULL), 0);
+	ASSERT_EQ(info.si_signo, SIGCHLD);
+	ASSERT_EQ(info.si_code, CLD_STOPPED);
+	ASSERT_EQ(info.si_pid, parent_tid);
+
+	ASSERT_EQ(sys_pidfd_send_signal(pidfd, SIGCONT, NULL, 0), 0);
+
+	ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL), 0);
+	ASSERT_EQ(info.si_signo, SIGCHLD);
+	ASSERT_EQ(info.si_code, CLD_EXITED);
+	ASSERT_EQ(info.si_pid, parent_tid);
+
+	EXPECT_EQ(close(pidfd), 0);
 }
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/powerpc/alignment/alignment_handler.c b/tools/testing/selftests/powerpc/alignment/alignment_handler.c
index 55ef15184057..2a0503bc7e49 100644
--- a/tools/testing/selftests/powerpc/alignment/alignment_handler.c
+++ b/tools/testing/selftests/powerpc/alignment/alignment_handler.c
@@ -55,8 +55,6 @@
 #include <setjmp.h>
 #include <signal.h>
 
-#include <asm/cputable.h>
-
 #include "utils.h"
 #include "instructions.h"
 
@@ -64,6 +62,7 @@ int bufsize;
 int debug;
 int testing;
 volatile int gotsig;
+bool prefixes_enabled;
 char *cipath = "/dev/fb0";
 long cioffset;
 
@@ -77,7 +76,12 @@ void sighandler(int sig, siginfo_t *info, void *ctx)
 	}
 	gotsig = sig;
 #ifdef __powerpc64__
-	ucp->uc_mcontext.gp_regs[PT_NIP] += 4;
+	if (prefixes_enabled) {
+		u32 inst = *(u32 *)ucp->uc_mcontext.gp_regs[PT_NIP];
+		ucp->uc_mcontext.gp_regs[PT_NIP] += ((inst >> 26 == 1) ? 8 : 4);
+	} else {
+		ucp->uc_mcontext.gp_regs[PT_NIP] += 4;
+	}
 #else
 	ucp->uc_mcontext.uc_regs->gregs[PT_NIP] += 4;
 #endif
@@ -648,6 +652,8 @@ int main(int argc, char *argv[])
 		exit(1);
 	}
 
+	prefixes_enabled = have_hwcap2(PPC_FEATURE2_ARCH_3_1);
+
 	rc |= test_harness(test_alignment_handler_vsx_206,
 			   "test_alignment_handler_vsx_206");
 	rc |= test_harness(test_alignment_handler_vsx_207,
diff --git a/tools/testing/selftests/powerpc/benchmarks/context_switch.c b/tools/testing/selftests/powerpc/benchmarks/context_switch.c
index d50cc05df495..96554e2794d1 100644
--- a/tools/testing/selftests/powerpc/benchmarks/context_switch.c
+++ b/tools/testing/selftests/powerpc/benchmarks/context_switch.c
@@ -481,6 +481,12 @@ int main(int argc, char *argv[])
 	else
 		printf("futex");
 
+	if (!have_hwcap(PPC_FEATURE_HAS_ALTIVEC))
+		touch_altivec = 0;
+
+	if (!have_hwcap(PPC_FEATURE_HAS_VSX))
+		touch_vector = 0;
+
 	printf(" on cpus %d/%d touching FP:%s altivec:%s vector:%s vdso:%s\n",
 	       cpu1, cpu2, touch_fp ?  "yes" : "no", touch_altivec ? "yes" : "no",
 	       touch_vector ? "yes" : "no", touch_vdso ? "yes" : "no");
diff --git a/tools/testing/selftests/powerpc/copyloops/.gitignore b/tools/testing/selftests/powerpc/copyloops/.gitignore
index ddaf140b8255..994b11af765c 100644
--- a/tools/testing/selftests/powerpc/copyloops/.gitignore
+++ b/tools/testing/selftests/powerpc/copyloops/.gitignore
@@ -12,4 +12,4 @@ memcpy_p7_t1
 copyuser_64_exc_t0
 copyuser_64_exc_t1
 copyuser_64_exc_t2
-memcpy_mcsafe_64
+copy_mc_64
diff --git a/tools/testing/selftests/powerpc/copyloops/Makefile b/tools/testing/selftests/powerpc/copyloops/Makefile
index 0917983a1c78..3095b1f1c02b 100644
--- a/tools/testing/selftests/powerpc/copyloops/Makefile
+++ b/tools/testing/selftests/powerpc/copyloops/Makefile
@@ -12,7 +12,7 @@ ASFLAGS = $(CFLAGS) -Wa,-mpower4
 TEST_GEN_PROGS := copyuser_64_t0 copyuser_64_t1 copyuser_64_t2 \
 		copyuser_p7_t0 copyuser_p7_t1 \
 		memcpy_64_t0 memcpy_64_t1 memcpy_64_t2 \
-		memcpy_p7_t0 memcpy_p7_t1 memcpy_mcsafe_64 \
+		memcpy_p7_t0 memcpy_p7_t1 copy_mc_64 \
 		copyuser_64_exc_t0 copyuser_64_exc_t1 copyuser_64_exc_t2
 
 EXTRA_SOURCES := validate.c ../harness.c stubs.S
@@ -45,9 +45,9 @@ $(OUTPUT)/memcpy_p7_t%:	memcpy_power7.S $(EXTRA_SOURCES)
 		-D SELFTEST_CASE=$(subst memcpy_p7_t,,$(notdir $@)) \
 		-o $@ $^
 
-$(OUTPUT)/memcpy_mcsafe_64: memcpy_mcsafe_64.S $(EXTRA_SOURCES)
+$(OUTPUT)/copy_mc_64: copy_mc_64.S $(EXTRA_SOURCES)
 	$(CC) $(CPPFLAGS) $(CFLAGS) \
-		-D COPY_LOOP=test_memcpy_mcsafe \
+		-D COPY_LOOP=test_copy_mc_generic \
 		-o $@ $^
 
 $(OUTPUT)/copyuser_64_exc_t%: copyuser_64.S exc_validate.c ../harness.c \
diff --git a/tools/testing/selftests/powerpc/copyloops/copy_mc_64.S b/tools/testing/selftests/powerpc/copyloops/copy_mc_64.S
new file mode 120000
index 000000000000..dcbe06d500fb
--- /dev/null
+++ b/tools/testing/selftests/powerpc/copyloops/copy_mc_64.S
@@ -0,0 +1 @@
+../../../../../arch/powerpc/lib/copy_mc_64.S
+\ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/copyloops/memcpy_mcsafe_64.S b/tools/testing/selftests/powerpc/copyloops/memcpy_mcsafe_64.S
deleted file mode 120000
index f0feef3062f6..000000000000
--- a/tools/testing/selftests/powerpc/copyloops/memcpy_mcsafe_64.S
+++ /dev/null
@@ -1 +0,0 @@
-../../../../../arch/powerpc/lib/memcpy_mcsafe_64.S
-\ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/dscr/Makefile b/tools/testing/selftests/powerpc/dscr/Makefile
index cfa6eedcb66c..845db6273a1b 100644
--- a/tools/testing/selftests/powerpc/dscr/Makefile
+++ b/tools/testing/selftests/powerpc/dscr/Makefile
@@ -10,4 +10,4 @@ include ../../lib.mk
 
 $(OUTPUT)/dscr_default_test: LDLIBS += -lpthread
 
-$(TEST_GEN_PROGS): ../harness.c
+$(TEST_GEN_PROGS): ../harness.c ../utils.c
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_default_test.c b/tools/testing/selftests/powerpc/dscr/dscr_default_test.c
index 288a4e2ad156..e76611e608af 100644
--- a/tools/testing/selftests/powerpc/dscr/dscr_default_test.c
+++ b/tools/testing/selftests/powerpc/dscr/dscr_default_test.c
@@ -63,6 +63,8 @@ int dscr_default(void)
 	unsigned long i, *status[THREADS];
 	unsigned long orig_dscr_default;
 
+	SKIP_IF(!have_hwcap2(PPC_FEATURE2_DSCR));
+
 	orig_dscr_default = get_default_dscr();
 
 	/* Initial DSCR default */
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c b/tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c
index aefcd8d8759b..32fcf2b324b1 100644
--- a/tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c
+++ b/tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c
@@ -21,6 +21,8 @@ int dscr_explicit(void)
 {
 	unsigned long i, dscr = 0;
 
+	SKIP_IF(!have_hwcap2(PPC_FEATURE2_DSCR));
+
 	srand(getpid());
 	set_dscr(dscr);
 
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c b/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c
index 7c1cb46397c6..c6a81b2d6b91 100644
--- a/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c
+++ b/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c
@@ -44,6 +44,8 @@ int dscr_inherit_exec(void)
 	unsigned long i, dscr = 0;
 	pid_t pid;
 
+	SKIP_IF(!have_hwcap2(PPC_FEATURE2_DSCR));
+
 	for (i = 0; i < COUNT; i++) {
 		dscr++;
 		if (dscr > DSCR_MAX)
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c b/tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c
index 04297a69ab59..f9dfd3d3c2d5 100644
--- a/tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c
+++ b/tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c
@@ -22,6 +22,8 @@ int dscr_inherit(void)
 	unsigned long i, dscr = 0;
 	pid_t pid;
 
+	SKIP_IF(!have_hwcap2(PPC_FEATURE2_DSCR));
+
 	srand(getpid());
 	set_dscr(dscr);
 
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c
index 02f6b4efde14..fbbdffdb2e5d 100644
--- a/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c
+++ b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c
@@ -77,6 +77,8 @@ int dscr_sysfs(void)
 	unsigned long orig_dscr_default;
 	int i, j;
 
+	SKIP_IF(!have_hwcap2(PPC_FEATURE2_DSCR));
+
 	orig_dscr_default = get_default_dscr();
 	for (i = 0; i < COUNT; i++) {
 		for (j = 0; j < DSCR_MAX; j++) {
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_sysfs_thread_test.c b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_thread_test.c
index 37be2c25f277..191ed126f118 100644
--- a/tools/testing/selftests/powerpc/dscr/dscr_sysfs_thread_test.c
+++ b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_thread_test.c
@@ -56,6 +56,8 @@ int dscr_sysfs_thread(void)
 	unsigned long orig_dscr_default;
 	int i, j;
 
+	SKIP_IF(!have_hwcap2(PPC_FEATURE2_DSCR));
+
 	orig_dscr_default = get_default_dscr();
 	for (i = 0; i < COUNT; i++) {
 		for (j = 0; j < DSCR_MAX; j++) {
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_user_test.c b/tools/testing/selftests/powerpc/dscr/dscr_user_test.c
index eaf785d11eed..e09072446dd3 100644
--- a/tools/testing/selftests/powerpc/dscr/dscr_user_test.c
+++ b/tools/testing/selftests/powerpc/dscr/dscr_user_test.c
@@ -36,6 +36,8 @@ int dscr_user(void)
 {
 	int i;
 
+	SKIP_IF(!have_hwcap2(PPC_FEATURE2_DSCR));
+
 	check_dscr("");
 
 	for (i = 0; i < COUNT; i++) {
diff --git a/tools/testing/selftests/powerpc/eeh/eeh-basic.sh b/tools/testing/selftests/powerpc/eeh/eeh-basic.sh
index 8a8d0f456946..0d783e1065c8 100755
--- a/tools/testing/selftests/powerpc/eeh/eeh-basic.sh
+++ b/tools/testing/selftests/powerpc/eeh/eeh-basic.sh
@@ -1,17 +1,19 @@
 #!/bin/sh
 # SPDX-License-Identifier: GPL-2.0-only
 
+KSELFTESTS_SKIP=4
+
 . ./eeh-functions.sh
 
 if ! eeh_supported ; then
 	echo "EEH not supported on this system, skipping"
-	exit 0;
+	exit $KSELFTESTS_SKIP;
 fi
 
 if [ ! -e "/sys/kernel/debug/powerpc/eeh_dev_check" ] && \
    [ ! -e "/sys/kernel/debug/powerpc/eeh_dev_break" ] ; then
 	echo "debugfs EEH testing files are missing. Is debugfs mounted?"
-	exit 1;
+	exit $KSELFTESTS_SKIP;
 fi
 
 pre_lspci=`mktemp`
@@ -84,4 +86,5 @@ echo "$failed devices failed to recover ($dev_count tested)"
 lspci | diff -u $pre_lspci -
 rm -f $pre_lspci
 
-exit $failed
+test "$failed" == 0
+exit $?
diff --git a/tools/testing/selftests/powerpc/include/utils.h b/tools/testing/selftests/powerpc/include/utils.h
index 71d2924f5b8b..052b5a775dc2 100644
--- a/tools/testing/selftests/powerpc/include/utils.h
+++ b/tools/testing/selftests/powerpc/include/utils.h
@@ -12,6 +12,7 @@
 #include <stdbool.h>
 #include <linux/auxvec.h>
 #include <linux/perf_event.h>
+#include <asm/cputable.h>
 #include "reg.h"
 
 /* Avoid headaches with PRI?64 - just use %ll? always */
@@ -35,7 +36,6 @@ int pick_online_cpu(void);
 int read_debugfs_file(char *debugfs_file, int *result);
 int write_debugfs_file(char *debugfs_file, int result);
 int read_sysfs_file(char *debugfs_file, char *result, size_t result_size);
-void set_dscr(unsigned long val);
 int perf_event_open_counter(unsigned int type,
 			    unsigned long config, int group_fd);
 int perf_event_enable(int fd);
diff --git a/tools/testing/selftests/powerpc/mm/.gitignore b/tools/testing/selftests/powerpc/mm/.gitignore
index 91c775c23c66..aac4a59f9e28 100644
--- a/tools/testing/selftests/powerpc/mm/.gitignore
+++ b/tools/testing/selftests/powerpc/mm/.gitignore
@@ -2,6 +2,7 @@
 hugetlb_vs_thp_test
 subpage_prot
 tempfile
+prot_sao
 segv_errors
 wild_bctr
 large_vm_fork_separation
diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile
index 250ce172e0da..defe488d6bf1 100644
--- a/tools/testing/selftests/powerpc/mm/Makefile
+++ b/tools/testing/selftests/powerpc/mm/Makefile
@@ -2,7 +2,7 @@
 noarg:
 	$(MAKE) -C ../
 
-TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot segv_errors wild_bctr \
+TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors wild_bctr \
 		  large_vm_fork_separation bad_accesses pkey_exec_prot \
 		  pkey_siginfo stack_expansion_signal stack_expansion_ldst
 
@@ -14,6 +14,8 @@ include ../../lib.mk
 
 $(TEST_GEN_PROGS): ../harness.c ../utils.c
 
+$(OUTPUT)/prot_sao: ../utils.c
+
 $(OUTPUT)/wild_bctr: CFLAGS += -m64
 $(OUTPUT)/large_vm_fork_separation: CFLAGS += -m64
 $(OUTPUT)/bad_accesses: CFLAGS += -m64
diff --git a/tools/testing/selftests/powerpc/mm/bad_accesses.c b/tools/testing/selftests/powerpc/mm/bad_accesses.c
index a864ed7e2008..fd747b2ffcfc 100644
--- a/tools/testing/selftests/powerpc/mm/bad_accesses.c
+++ b/tools/testing/selftests/powerpc/mm/bad_accesses.c
@@ -139,5 +139,6 @@ static int test(void)
 
 int main(void)
 {
+	test_harness_set_timeout(300);
 	return test_harness(test, "bad_accesses");
 }
diff --git a/tools/testing/selftests/powerpc/mm/prot_sao.c b/tools/testing/selftests/powerpc/mm/prot_sao.c
new file mode 100644
index 000000000000..30b71b1d78d5
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/prot_sao.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2016, Michael Ellerman, IBM Corp.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#include <asm/cputable.h>
+
+#include "utils.h"
+
+#define SIZE (64 * 1024)
+
+int test_prot_sao(void)
+{
+	char *p;
+
+	/*
+	 * SAO was introduced in 2.06 and removed in 3.1. It's disabled in
+	 * guests/LPARs by default, so also skip if we are running in a guest.
+	 */
+	SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06) ||
+		have_hwcap2(PPC_FEATURE2_ARCH_3_1) ||
+		access("/proc/device-tree/rtas/ibm,hypertas-functions", F_OK) == 0);
+
+	/*
+	 * Ensure we can ask for PROT_SAO.
+	 * We can't really verify that it does the right thing, but at least we
+	 * confirm the kernel will accept it.
+	 */
+	p = mmap(NULL, SIZE, PROT_READ | PROT_WRITE | PROT_SAO,
+		 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+	FAIL_IF(p == MAP_FAILED);
+
+	/* Write to the mapping, to at least cause a fault */
+	memset(p, 0xaa, SIZE);
+
+	return 0;
+}
+
+int main(void)
+{
+	return test_harness(test_prot_sao, "prot-sao");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c b/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c
index 2980abca31e0..2070a1e2b3a5 100644
--- a/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c
+++ b/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c
@@ -9,7 +9,6 @@
 #include <stdbool.h>
 #include <string.h>
 #include <sys/prctl.h>
-#include <asm/cputable.h>
 
 #include "event.h"
 #include "utils.h"
diff --git a/tools/testing/selftests/powerpc/pmu/l3_bank_test.c b/tools/testing/selftests/powerpc/pmu/l3_bank_test.c
index a96d512a18c4..a5dfa9bf3b9f 100644
--- a/tools/testing/selftests/powerpc/pmu/l3_bank_test.c
+++ b/tools/testing/selftests/powerpc/pmu/l3_bank_test.c
@@ -20,6 +20,9 @@ static int l3_bank_test(void)
 	char *p;
 	int i;
 
+	// The L3 bank logic is only used on Power8 or later
+	SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07));
+
 	p = malloc(MALLOC_SIZE);
 	FAIL_IF(!p);
 
diff --git a/tools/testing/selftests/powerpc/pmu/per_event_excludes.c b/tools/testing/selftests/powerpc/pmu/per_event_excludes.c
index 2d37942bf72b..ad32a09a6540 100644
--- a/tools/testing/selftests/powerpc/pmu/per_event_excludes.c
+++ b/tools/testing/selftests/powerpc/pmu/per_event_excludes.c
@@ -12,8 +12,6 @@
 #include <string.h>
 #include <sys/prctl.h>
 
-#include <asm/cputable.h>
-
 #include "event.h"
 #include "lib.h"
 #include "utils.h"
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c
index fc477dfe86a2..2e0d86e0687e 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c
@@ -20,6 +20,8 @@
 #include <signal.h>
 #include <sys/types.h>
 #include <sys/wait.h>
+#include <sys/syscall.h>
+#include <linux/limits.h>
 #include "ptrace.h"
 
 #define SPRN_PVR	0x11F
@@ -44,6 +46,7 @@ struct gstruct {
 };
 static volatile struct gstruct gstruct __attribute__((aligned(512)));
 
+static volatile char cwd[PATH_MAX] __attribute__((aligned(8)));
 
 static void get_dbginfo(pid_t child_pid, struct ppc_debug_info *dbginfo)
 {
@@ -138,6 +141,9 @@ static void test_workload(void)
 			write_var(len);
 	}
 
+	/* PTRACE_SET_DEBUGREG, Kernel Access Userspace test */
+	syscall(__NR_getcwd, &cwd, PATH_MAX);
+
 	/* PPC_PTRACE_SETHWDEBUG, MODE_EXACT, WO test */
 	write_var(1);
 
@@ -150,6 +156,9 @@ static void test_workload(void)
 	else
 		read_var(1);
 
+	/* PPC_PTRACE_SETHWDEBUG, MODE_EXACT, Kernel Access Userspace test */
+	syscall(__NR_getcwd, &cwd, PATH_MAX);
+
 	/* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW ALIGNED, WO test */
 	gstruct.a[rand() % A_LEN] = 'a';
 
@@ -293,6 +302,24 @@ static int test_set_debugreg(pid_t child_pid)
 	return 0;
 }
 
+static int test_set_debugreg_kernel_userspace(pid_t child_pid)
+{
+	unsigned long wp_addr = (unsigned long)cwd;
+	char *name = "PTRACE_SET_DEBUGREG";
+
+	/* PTRACE_SET_DEBUGREG, Kernel Access Userspace test */
+	wp_addr &= ~0x7UL;
+	wp_addr |= (1Ul << DABR_READ_SHIFT);
+	wp_addr |= (1UL << DABR_WRITE_SHIFT);
+	wp_addr |= (1UL << DABR_TRANSLATION_SHIFT);
+	ptrace_set_debugreg(child_pid, wp_addr);
+	ptrace(PTRACE_CONT, child_pid, NULL, 0);
+	check_success(child_pid, name, "Kernel Access Userspace", wp_addr, 8);
+
+	ptrace_set_debugreg(child_pid, 0);
+	return 0;
+}
+
 static void get_ppc_hw_breakpoint(struct ppc_hw_breakpoint *info, int type,
 				  unsigned long addr, int len)
 {
@@ -338,6 +365,22 @@ static void test_sethwdebug_exact(pid_t child_pid)
 	ptrace_delhwdebug(child_pid, wh);
 }
 
+static void test_sethwdebug_exact_kernel_userspace(pid_t child_pid)
+{
+	struct ppc_hw_breakpoint info;
+	unsigned long wp_addr = (unsigned long)&cwd;
+	char *name = "PPC_PTRACE_SETHWDEBUG, MODE_EXACT";
+	int len = 1; /* hardcoded in kernel */
+	int wh;
+
+	/* PPC_PTRACE_SETHWDEBUG, MODE_EXACT, Kernel Access Userspace test */
+	get_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_WRITE, wp_addr, 0);
+	wh = ptrace_sethwdebug(child_pid, &info);
+	ptrace(PTRACE_CONT, child_pid, NULL, 0);
+	check_success(child_pid, name, "Kernel Access Userspace", wp_addr, len);
+	ptrace_delhwdebug(child_pid, wh);
+}
+
 static void test_sethwdebug_range_aligned(pid_t child_pid)
 {
 	struct ppc_hw_breakpoint info;
@@ -452,9 +495,10 @@ static void
 run_tests(pid_t child_pid, struct ppc_debug_info *dbginfo, bool dawr)
 {
 	test_set_debugreg(child_pid);
+	test_set_debugreg_kernel_userspace(child_pid);
+	test_sethwdebug_exact(child_pid);
+	test_sethwdebug_exact_kernel_userspace(child_pid);
 	if (dbginfo->features & PPC_DEBUG_FEATURE_DATA_BP_RANGE) {
-		test_sethwdebug_exact(child_pid);
-
 		test_sethwdebug_range_aligned(child_pid);
 		if (dawr || is_8xx) {
 			test_sethwdebug_range_unaligned(child_pid);
diff --git a/tools/testing/selftests/powerpc/security/rfi_flush.c b/tools/testing/selftests/powerpc/security/rfi_flush.c
index 0a7d0afb26b8..93a65bd1f231 100644
--- a/tools/testing/selftests/powerpc/security/rfi_flush.c
+++ b/tools/testing/selftests/powerpc/security/rfi_flush.c
@@ -10,6 +10,7 @@
 #include <stdint.h>
 #include <malloc.h>
 #include <unistd.h>
+#include <signal.h>
 #include <stdlib.h>
 #include <string.h>
 #include <stdio.h>
@@ -41,6 +42,40 @@ static void syscall_loop(char *p, unsigned long iterations,
 	}
 }
 
+static void sigill_handler(int signr, siginfo_t *info, void *unused)
+{
+	static int warned = 0;
+	ucontext_t *ctx = (ucontext_t *)unused;
+	unsigned long *pc = &UCONTEXT_NIA(ctx);
+
+	/* mtspr 3,RS to check for move to DSCR below */
+	if ((*((unsigned int *)*pc) & 0xfc1fffff) == 0x7c0303a6) {
+		if (!warned++)
+			printf("WARNING: Skipping over dscr setup. Consider running 'ppc64_cpu --dscr=1' manually.\n");
+		*pc += 4;
+	} else {
+		printf("SIGILL at %p\n", pc);
+		abort();
+	}
+}
+
+static void set_dscr(unsigned long val)
+{
+	static int init = 0;
+	struct sigaction sa;
+
+	if (!init) {
+		memset(&sa, 0, sizeof(sa));
+		sa.sa_sigaction = sigill_handler;
+		sa.sa_flags = SA_SIGINFO;
+		if (sigaction(SIGILL, &sa, NULL))
+			perror("sigill_handler");
+		init = 1;
+	}
+
+	asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR));
+}
+
 int rfi_flush_test(void)
 {
 	char *p;
@@ -54,6 +89,9 @@ int rfi_flush_test(void)
 
 	SKIP_IF(geteuid() != 0);
 
+	// The PMU event we use only works on Power7 or later
+	SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06));
+
 	if (read_debugfs_file("powerpc/rfi_flush", &rfi_flush_org)) {
 		perror("Unable to read powerpc/rfi_flush debugfs file");
 		SKIP_IF(1);
diff --git a/tools/testing/selftests/powerpc/security/spectre_v2.c b/tools/testing/selftests/powerpc/security/spectre_v2.c
index c8d82b784102..adc2b7294e5f 100644
--- a/tools/testing/selftests/powerpc/security/spectre_v2.c
+++ b/tools/testing/selftests/powerpc/security/spectre_v2.c
@@ -134,6 +134,9 @@ int spectre_v2_test(void)
 	s64 miss_percent;
 	bool is_p9;
 
+	// The PMU events we use only work on Power8 or later
+	SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07));
+
 	state = get_sysfs_state();
 	if (state == UNKNOWN) {
 		printf("Error: couldn't determine spectre_v2 mitigation state?\n");
diff --git a/tools/testing/selftests/powerpc/stringloops/memcmp.c b/tools/testing/selftests/powerpc/stringloops/memcmp.c
index 979df3d98368..cb2f18855c8d 100644
--- a/tools/testing/selftests/powerpc/stringloops/memcmp.c
+++ b/tools/testing/selftests/powerpc/stringloops/memcmp.c
@@ -4,7 +4,7 @@
 #include <string.h>
 #include <sys/mman.h>
 #include <time.h>
-#include <asm/cputable.h>
+
 #include "utils.h"
 
 #define SIZE 256
diff --git a/tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S b/tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S
index cc4930467235..7887f78cf072 100644
--- a/tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S
+++ b/tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S
@@ -3,9 +3,13 @@
 
 	.data
 	.balign 8
-message:
+success_message:
 	.ascii "success: switch_endian_test\n\0"
 
+	.balign 8
+failure_message:
+	.ascii "failure: switch_endian_test\n\0"
+
 	.section ".toc"
 	.balign 8
 pattern:
@@ -64,6 +68,9 @@ FUNC_START(_start)
 	li r0, __NR_switch_endian
 	sc
 
+	tdi   0, 0, 0x48	// b +8 if the endian was switched
+	b     .Lfail	  	// exit if endian didn't switch
+
 #include "check-reversed.S"
 
 	/* Flip back, r0 already has the switch syscall number */
@@ -71,12 +78,20 @@ FUNC_START(_start)
 
 #include "check.S"
 
+	ld	r4, success_message@got(%r2)
+	li	r5, 28	// strlen(success_message)
+	li	r14, 0	// exit status
+.Lout:
 	li	r0, __NR_write
 	li	r3, 1	/* stdout */
-	ld	r4, message@got(%r2)
-	li	r5, 28	/* strlen(message3) */
 	sc
 	li      r0, __NR_exit
-	li	r3, 0
+	mr	r3, r14
 	sc
 	b       .
+
+.Lfail:
+	ld	r4, failure_message@got(%r2)
+	li	r5, 28	// strlen(failure_message)
+	li	r14, 1
+	b	.Lout
diff --git a/tools/testing/selftests/powerpc/syscalls/Makefile b/tools/testing/selftests/powerpc/syscalls/Makefile
index 01b22775ca87..b63f8459c704 100644
--- a/tools/testing/selftests/powerpc/syscalls/Makefile
+++ b/tools/testing/selftests/powerpc/syscalls/Makefile
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0-only
-TEST_GEN_PROGS := ipc_unmuxed
+TEST_GEN_PROGS := ipc_unmuxed rtas_filter
 
 CFLAGS += -I../../../../../usr/include
 
diff --git a/tools/testing/selftests/powerpc/syscalls/rtas_filter.c b/tools/testing/selftests/powerpc/syscalls/rtas_filter.c
new file mode 100644
index 000000000000..03b487f18d00
--- /dev/null
+++ b/tools/testing/selftests/powerpc/syscalls/rtas_filter.c
@@ -0,0 +1,285 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright 2005-2020 IBM Corporation.
+ *
+ * Includes code from librtas (https://github.com/ibm-power-utilities/librtas/)
+ */
+
+#include <byteswap.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <errno.h>
+#include "utils.h"
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define cpu_to_be32(x)		bswap_32(x)
+#define be32_to_cpu(x)		bswap_32(x)
+#else
+#define cpu_to_be32(x)		(x)
+#define be32_to_cpu(x)		(x)
+#endif
+
+#define RTAS_IO_ASSERT	-1098	/* Unexpected I/O Error */
+#define RTAS_UNKNOWN_OP -1099	/* No Firmware Implementation of Function */
+#define BLOCK_SIZE 4096
+#define PAGE_SIZE 4096
+#define MAX_PAGES 64
+
+static const char *ofdt_rtas_path = "/proc/device-tree/rtas";
+
+typedef __be32 uint32_t;
+struct rtas_args {
+	__be32 token;
+	__be32 nargs;
+	__be32 nret;
+	__be32 args[16];
+	__be32 *rets;	  /* Pointer to return values in args[]. */
+};
+
+struct region {
+	uint64_t addr;
+	uint32_t size;
+	struct region *next;
+};
+
+int read_entire_file(int fd, char **buf, size_t *len)
+{
+	size_t buf_size = 0;
+	size_t off = 0;
+	int rc;
+
+	*buf = NULL;
+	do {
+		buf_size += BLOCK_SIZE;
+		if (*buf == NULL)
+			*buf = malloc(buf_size);
+		else
+			*buf = realloc(*buf, buf_size);
+
+		if (*buf == NULL)
+			return -ENOMEM;
+
+		rc = read(fd, *buf + off, BLOCK_SIZE);
+		if (rc < 0)
+			return -EIO;
+
+		off += rc;
+	} while (rc == BLOCK_SIZE);
+
+	if (len)
+		*len = off;
+
+	return 0;
+}
+
+static int open_prop_file(const char *prop_path, const char *prop_name, int *fd)
+{
+	char *path;
+	int len;
+
+	/* allocate enough for two string, a slash and trailing NULL */
+	len = strlen(prop_path) + strlen(prop_name) + 1 + 1;
+	path = malloc(len);
+	if (path == NULL)
+		return -ENOMEM;
+
+	snprintf(path, len, "%s/%s", prop_path, prop_name);
+
+	*fd = open(path, O_RDONLY);
+	free(path);
+	if (*fd < 0)
+		return -errno;
+
+	return 0;
+}
+
+static int get_property(const char *prop_path, const char *prop_name,
+			char **prop_val, size_t *prop_len)
+{
+	int rc, fd;
+
+	rc = open_prop_file(prop_path, prop_name, &fd);
+	if (rc)
+		return rc;
+
+	rc = read_entire_file(fd, prop_val, prop_len);
+	close(fd);
+
+	return rc;
+}
+
+int rtas_token(const char *call_name)
+{
+	char *prop_buf = NULL;
+	size_t len;
+	int rc;
+
+	rc = get_property(ofdt_rtas_path, call_name, &prop_buf, &len);
+	if (rc < 0) {
+		rc = RTAS_UNKNOWN_OP;
+		goto err;
+	}
+
+	rc = be32_to_cpu(*(int *)prop_buf);
+
+err:
+	free(prop_buf);
+	return rc;
+}
+
+static int read_kregion_bounds(struct region *kregion)
+{
+	char *buf;
+	int fd;
+	int rc;
+
+	fd = open("/proc/ppc64/rtas/rmo_buffer", O_RDONLY);
+	if (fd < 0) {
+		printf("Could not open rmo_buffer file\n");
+		return RTAS_IO_ASSERT;
+	}
+
+	rc = read_entire_file(fd, &buf, NULL);
+	close(fd);
+	if (rc) {
+		free(buf);
+		return rc;
+	}
+
+	sscanf(buf, "%" SCNx64 " %x", &kregion->addr, &kregion->size);
+	free(buf);
+
+	if (!(kregion->size && kregion->addr) ||
+	    (kregion->size > (PAGE_SIZE * MAX_PAGES))) {
+		printf("Unexpected kregion bounds\n");
+		return RTAS_IO_ASSERT;
+	}
+
+	return 0;
+}
+
+static int rtas_call(const char *name, int nargs,
+		     int nrets, ...)
+{
+	struct rtas_args args;
+	__be32 *rets[16];
+	int i, rc, token;
+	va_list ap;
+
+	va_start(ap, nrets);
+
+	token = rtas_token(name);
+	if (token == RTAS_UNKNOWN_OP) {
+		// We don't care if the call doesn't exist
+		printf("call '%s' not available, skipping...", name);
+		rc = RTAS_UNKNOWN_OP;
+		goto err;
+	}
+
+	args.token = cpu_to_be32(token);
+	args.nargs = cpu_to_be32(nargs);
+	args.nret = cpu_to_be32(nrets);
+
+	for (i = 0; i < nargs; i++)
+		args.args[i] = (__be32) va_arg(ap, unsigned long);
+
+	for (i = 0; i < nrets; i++)
+		rets[i] = (__be32 *) va_arg(ap, unsigned long);
+
+	rc = syscall(__NR_rtas, &args);
+	if (rc) {
+		rc = -errno;
+		goto err;
+	}
+
+	if (nrets) {
+		*(rets[0]) = be32_to_cpu(args.args[nargs]);
+
+		for (i = 1; i < nrets; i++) {
+			*(rets[i]) = args.args[nargs + i];
+		}
+	}
+
+err:
+	va_end(ap);
+	return rc;
+}
+
+static int test(void)
+{
+	struct region rmo_region;
+	uint32_t rmo_start;
+	uint32_t rmo_end;
+	__be32 rets[1];
+	int rc;
+
+	// Test a legitimate harmless call
+	// Expected: call succeeds
+	printf("Test a permitted call, no parameters... ");
+	rc = rtas_call("get-time-of-day", 0, 1, rets);
+	printf("rc: %d\n", rc);
+	FAIL_IF(rc != 0 && rc != RTAS_UNKNOWN_OP);
+
+	// Test a prohibited call
+	// Expected: call returns -EINVAL
+	printf("Test a prohibited call... ");
+	rc = rtas_call("nvram-fetch", 0, 1, rets);
+	printf("rc: %d\n", rc);
+	FAIL_IF(rc != -EINVAL && rc != RTAS_UNKNOWN_OP);
+
+	// Get RMO
+	rc = read_kregion_bounds(&rmo_region);
+	if (rc) {
+		printf("Couldn't read RMO region bounds, skipping remaining cases\n");
+		return 0;
+	}
+	rmo_start = rmo_region.addr;
+	rmo_end = rmo_start + rmo_region.size - 1;
+	printf("RMO range: %08x - %08x\n", rmo_start, rmo_end);
+
+	// Test a permitted call, user-supplied size, buffer inside RMO
+	// Expected: call succeeds
+	printf("Test a permitted call, user-supplied size, buffer inside RMO... ");
+	rc = rtas_call("ibm,get-system-parameter", 3, 1, 0, cpu_to_be32(rmo_start),
+		       cpu_to_be32(rmo_end - rmo_start + 1), rets);
+	printf("rc: %d\n", rc);
+	FAIL_IF(rc != 0 && rc != RTAS_UNKNOWN_OP);
+
+	// Test a permitted call, user-supplied size, buffer start outside RMO
+	// Expected: call returns -EINVAL
+	printf("Test a permitted call, user-supplied size, buffer start outside RMO... ");
+	rc = rtas_call("ibm,get-system-parameter", 3, 1, 0, cpu_to_be32(rmo_end + 1),
+		       cpu_to_be32(4000), rets);
+	printf("rc: %d\n", rc);
+	FAIL_IF(rc != -EINVAL && rc != RTAS_UNKNOWN_OP);
+
+	// Test a permitted call, user-supplied size, buffer end outside RMO
+	// Expected: call returns -EINVAL
+	printf("Test a permitted call, user-supplied size, buffer end outside RMO... ");
+	rc = rtas_call("ibm,get-system-parameter", 3, 1, 0, cpu_to_be32(rmo_start),
+		       cpu_to_be32(rmo_end - rmo_start + 2), rets);
+	printf("rc: %d\n", rc);
+	FAIL_IF(rc != -EINVAL && rc != RTAS_UNKNOWN_OP);
+
+	// Test a permitted call, fixed size, buffer end outside RMO
+	// Expected: call returns -EINVAL
+	printf("Test a permitted call, fixed size, buffer end outside RMO... ");
+	rc = rtas_call("ibm,configure-connector", 2, 1, cpu_to_be32(rmo_end - 4000), 0, rets);
+	printf("rc: %d\n", rc);
+	FAIL_IF(rc != -EINVAL && rc != RTAS_UNKNOWN_OP);
+
+	return 0;
+}
+
+int main(int argc, char *argv[])
+{
+	return test_harness(test, "rtas_filter");
+}
diff --git a/tools/testing/selftests/powerpc/tm/tm-poison.c b/tools/testing/selftests/powerpc/tm/tm-poison.c
index 977558497c16..29e5f26af7b9 100644
--- a/tools/testing/selftests/powerpc/tm/tm-poison.c
+++ b/tools/testing/selftests/powerpc/tm/tm-poison.c
@@ -26,7 +26,7 @@
 
 int tm_poison_test(void)
 {
-	int pid;
+	int cpu, pid;
 	cpu_set_t cpuset;
 	uint64_t poison = 0xdeadbeefc0dec0fe;
 	uint64_t unknown = 0;
@@ -35,10 +35,13 @@ int tm_poison_test(void)
 
 	SKIP_IF(!have_htm());
 
-	/* Attach both Child and Parent to CPU 0 */
+	cpu = pick_online_cpu();
+	FAIL_IF(cpu < 0);
+
+	// Attach both Child and Parent to the same CPU
 	CPU_ZERO(&cpuset);
-	CPU_SET(0, &cpuset);
-	sched_setaffinity(0, sizeof(cpuset), &cpuset);
+	CPU_SET(cpu, &cpuset);
+	FAIL_IF(sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0);
 
 	pid = fork();
 	if (!pid) {
diff --git a/tools/testing/selftests/powerpc/tm/tm-tmspr.c b/tools/testing/selftests/powerpc/tm/tm-tmspr.c
index 17becf3dcee4..794d574db784 100644
--- a/tools/testing/selftests/powerpc/tm/tm-tmspr.c
+++ b/tools/testing/selftests/powerpc/tm/tm-tmspr.c
@@ -33,19 +33,13 @@
 #include "utils.h"
 #include "tm.h"
 
-int	num_loops	= 10000;
+int	num_loops	= 1000000;
 int	passed = 1;
 
 void tfiar_tfhar(void *in)
 {
-	int i, cpu;
 	unsigned long tfhar, tfhar_rd, tfiar, tfiar_rd;
-	cpu_set_t cpuset;
-
-	CPU_ZERO(&cpuset);
-	cpu = (unsigned long)in >> 1;
-	CPU_SET(cpu, &cpuset);
-	sched_setaffinity(0, sizeof(cpuset), &cpuset);
+	int i;
 
 	/* TFIAR: Last bit has to be high so userspace can read register */
 	tfiar = ((unsigned long)in) + 1;
diff --git a/tools/testing/selftests/powerpc/tm/tm-trap.c b/tools/testing/selftests/powerpc/tm/tm-trap.c
index 601f0c1d450d..c75960af8018 100644
--- a/tools/testing/selftests/powerpc/tm/tm-trap.c
+++ b/tools/testing/selftests/powerpc/tm/tm-trap.c
@@ -247,8 +247,7 @@ void *pong(void *not_used)
 int tm_trap_test(void)
 {
 	uint16_t k = 1;
-
-	int rc;
+	int cpu, rc;
 
 	pthread_attr_t attr;
 	cpu_set_t cpuset;
@@ -267,9 +266,12 @@ int tm_trap_test(void)
 	usr1_sa.sa_sigaction = usr1_signal_handler;
 	sigaction(SIGUSR1, &usr1_sa, NULL);
 
-	/* Set only CPU 0 in the mask. Both threads will be bound to cpu 0. */
+	cpu = pick_online_cpu();
+	FAIL_IF(cpu < 0);
+
+	// Set only one CPU in the mask. Both threads will be bound to that CPU.
 	CPU_ZERO(&cpuset);
-	CPU_SET(0, &cpuset);
+	CPU_SET(cpu, &cpuset);
 
 	/* Init pthread attribute */
 	rc = pthread_attr_init(&attr);
diff --git a/tools/testing/selftests/powerpc/tm/tm-unavailable.c b/tools/testing/selftests/powerpc/tm/tm-unavailable.c
index 2ca2fccb0a3e..a1348a5f721a 100644
--- a/tools/testing/selftests/powerpc/tm/tm-unavailable.c
+++ b/tools/testing/selftests/powerpc/tm/tm-unavailable.c
@@ -338,16 +338,19 @@ void test_fp_vec(int fp, int vec, pthread_attr_t *attr)
 
 int tm_unavailable_test(void)
 {
-	int rc, exception; /* FP = 0, VEC = 1, VSX = 2 */
+	int cpu, rc, exception; /* FP = 0, VEC = 1, VSX = 2 */
 	pthread_t t1;
 	pthread_attr_t attr;
 	cpu_set_t cpuset;
 
 	SKIP_IF(!have_htm());
 
-	/* Set only CPU 0 in the mask. Both threads will be bound to CPU 0. */
+	cpu = pick_online_cpu();
+	FAIL_IF(cpu < 0);
+
+	// Set only one CPU in the mask. Both threads will be bound to that CPU.
 	CPU_ZERO(&cpuset);
-	CPU_SET(0, &cpuset);
+	CPU_SET(cpu, &cpuset);
 
 	/* Init pthread attribute. */
 	rc = pthread_attr_init(&attr);
diff --git a/tools/testing/selftests/powerpc/tm/tm.h b/tools/testing/selftests/powerpc/tm/tm.h
index c402464b038f..c5a1e5c163fc 100644
--- a/tools/testing/selftests/powerpc/tm/tm.h
+++ b/tools/testing/selftests/powerpc/tm/tm.h
@@ -6,9 +6,8 @@
 #ifndef _SELFTESTS_POWERPC_TM_TM_H
 #define _SELFTESTS_POWERPC_TM_TM_H
 
-#include <asm/tm.h>
-#include <asm/cputable.h>
 #include <stdbool.h>
+#include <asm/tm.h>
 
 #include "utils.h"
 
diff --git a/tools/testing/selftests/powerpc/utils.c b/tools/testing/selftests/powerpc/utils.c
index 18b6a773d5c7..1f36ee1a909a 100644
--- a/tools/testing/selftests/powerpc/utils.c
+++ b/tools/testing/selftests/powerpc/utils.c
@@ -10,7 +10,6 @@
 #include <fcntl.h>
 #include <link.h>
 #include <sched.h>
-#include <signal.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -273,40 +272,6 @@ int perf_event_reset(int fd)
 	return 0;
 }
 
-static void sigill_handler(int signr, siginfo_t *info, void *unused)
-{
-	static int warned = 0;
-	ucontext_t *ctx = (ucontext_t *)unused;
-	unsigned long *pc = &UCONTEXT_NIA(ctx);
-
-	/* mtspr 3,RS to check for move to DSCR below */
-	if ((*((unsigned int *)*pc) & 0xfc1fffff) == 0x7c0303a6) {
-		if (!warned++)
-			printf("WARNING: Skipping over dscr setup. Consider running 'ppc64_cpu --dscr=1' manually.\n");
-		*pc += 4;
-	} else {
-		printf("SIGILL at %p\n", pc);
-		abort();
-	}
-}
-
-void set_dscr(unsigned long val)
-{
-	static int init = 0;
-	struct sigaction sa;
-
-	if (!init) {
-		memset(&sa, 0, sizeof(sa));
-		sa.sa_sigaction = sigill_handler;
-		sa.sa_flags = SA_SIGINFO;
-		if (sigaction(SIGILL, &sa, NULL))
-			perror("sigill_handler");
-		init = 1;
-	}
-
-	asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR));
-}
-
 int using_hash_mmu(bool *using_hash)
 {
 	char line[128];
@@ -318,7 +283,9 @@ int using_hash_mmu(bool *using_hash)
 
 	rc = 0;
 	while (fgets(line, sizeof(line), f) != NULL) {
-		if (strcmp(line, "MMU		: Hash\n") == 0) {
+		if (!strcmp(line, "MMU		: Hash\n") ||
+		    !strcmp(line, "platform	: Cell\n") ||
+		    !strcmp(line, "platform	: PowerMac\n")) {
 			*using_hash = true;
 			goto out;
 		}
diff --git a/tools/testing/selftests/ptrace/.gitignore b/tools/testing/selftests/ptrace/.gitignore
index 7bebf9534a86..792318aaa30c 100644
--- a/tools/testing/selftests/ptrace/.gitignore
+++ b/tools/testing/selftests/ptrace/.gitignore
@@ -1,3 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0-only
 get_syscall_info
 peeksiginfo
+vmaccess
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf-ftrace.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuscale-ftrace.sh
index 7d3c2be66c64..d4bec538086d 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf-ftrace.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuscale-ftrace.sh
@@ -1,12 +1,12 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0+
 #
-# Analyze a given results directory for rcuperf performance measurements,
+# Analyze a given results directory for rcuscale performance measurements,
 # looking for ftrace data.  Exits with 0 if data was found, analyzed, and
-# printed.  Intended to be invoked from kvm-recheck-rcuperf.sh after
+# printed.  Intended to be invoked from kvm-recheck-rcuscale.sh after
 # argument checking.
 #
-# Usage: kvm-recheck-rcuperf-ftrace.sh resdir
+# Usage: kvm-recheck-rcuscale-ftrace.sh resdir
 #
 # Copyright (C) IBM Corporation, 2016
 #
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuscale.sh
index db0375a57f28..aa745152a525 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuscale.sh
@@ -1,9 +1,9 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0+
 #
-# Analyze a given results directory for rcuperf performance measurements.
+# Analyze a given results directory for rcuscale scalability measurements.
 #
-# Usage: kvm-recheck-rcuperf.sh resdir
+# Usage: kvm-recheck-rcuscale.sh resdir
 #
 # Copyright (C) IBM Corporation, 2016
 #
@@ -20,7 +20,7 @@ fi
 PATH=`pwd`/tools/testing/selftests/rcutorture/bin:$PATH; export PATH
 . functions.sh
 
-if kvm-recheck-rcuperf-ftrace.sh $i
+if kvm-recheck-rcuscale-ftrace.sh $i
 then
 	# ftrace data was successfully analyzed, call it good!
 	exit 0
@@ -30,12 +30,12 @@ configfile=`echo $i | sed -e 's/^.*\///'`
 
 sed -e 's/^\[[^]]*]//' < $i/console.log |
 awk '
-/-perf: .* gps: .* batches:/ {
+/-scale: .* gps: .* batches:/ {
 	ngps = $9;
 	nbatches = $11;
 }
 
-/-perf: .*writer-duration/ {
+/-scale: .*writer-duration/ {
 	gptimes[++n] = $5 / 1000.;
 	sum += $5 / 1000.;
 }
@@ -43,7 +43,7 @@ awk '
 END {
 	newNR = asort(gptimes);
 	if (newNR <= 0) {
-		print "No rcuperf records found???"
+		print "No rcuscale records found???"
 		exit;
 	}
 	pct50 = int(newNR * 50 / 100);
@@ -79,5 +79,5 @@ END {
 	print "99th percentile grace-period duration: " gptimes[pct99];
 	print "Maximum grace-period duration: " gptimes[newNR];
 	print "Grace periods: " ngps + 0 " Batches: " nbatches + 0 " Ratio: " ngps / nbatches;
-	print "Computed from rcuperf printk output.";
+	print "Computed from rcuscale printk output.";
 }'
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-scf.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-scf.sh
new file mode 100755
index 000000000000..671bfee4fcef
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-scf.sh
@@ -0,0 +1,38 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Analyze a given results directory for rcutorture progress.
+#
+# Usage: kvm-recheck-rcu.sh resdir
+#
+# Copyright (C) Facebook, 2020
+#
+# Authors: Paul E. McKenney <paulmck@kernel.org>
+
+i="$1"
+if test -d "$i" -a -r "$i"
+then
+	:
+else
+	echo Unreadable results directory: $i
+	exit 1
+fi
+. functions.sh
+
+configfile=`echo $i | sed -e 's/^.*\///'`
+nscfs="`grep 'scf_invoked_count ver:' $i/console.log 2> /dev/null | tail -1 | sed -e 's/^.* scf_invoked_count ver: //' -e 's/ .*$//' | tr -d '\015'`"
+if test -z "$nscfs"
+then
+	echo "$configfile ------- "
+else
+	dur="`sed -e 's/^.* scftorture.shutdown_secs=//' -e 's/ .*$//' < $i/qemu-cmd 2> /dev/null`"
+	if test -z "$dur"
+	then
+		rate=""
+	else
+		nscfss=`awk -v nscfs=$nscfs -v dur=$dur '
+			BEGIN { print nscfs / dur }' < /dev/null`
+		rate=" ($nscfss/s)"
+	fi
+	echo "${configfile} ------- ${nscfs} SCF handler invocations$rate"
+fi
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
index e07779a62634..6dc2b49b85ea 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
@@ -66,6 +66,7 @@ config_override_param () {
 echo > $T/KcList
 config_override_param "$config_dir/CFcommon" KcList "`cat $config_dir/CFcommon 2> /dev/null`"
 config_override_param "$config_template" KcList "`cat $config_template 2> /dev/null`"
+config_override_param "--gdb options" KcList "$TORTURE_KCONFIG_GDB_ARG"
 config_override_param "--kasan options" KcList "$TORTURE_KCONFIG_KASAN_ARG"
 config_override_param "--kcsan options" KcList "$TORTURE_KCONFIG_KCSAN_ARG"
 config_override_param "--kconfig argument" KcList "$TORTURE_KCONFIG_ARG"
@@ -152,7 +153,11 @@ qemu_append="`identify_qemu_append "$QEMU"`"
 boot_args="`configfrag_boot_params "$boot_args" "$config_template"`"
 # Generate kernel-version-specific boot parameters
 boot_args="`per_version_boot_params "$boot_args" $resdir/.config $seconds`"
-echo $QEMU $qemu_args -m $TORTURE_QEMU_MEM -kernel $KERNEL -append \"$qemu_append $boot_args\" > $resdir/qemu-cmd
+if test -n "$TORTURE_BOOT_GDB_ARG"
+then
+	boot_args="$boot_args $TORTURE_BOOT_GDB_ARG"
+fi
+echo $QEMU $qemu_args -m $TORTURE_QEMU_MEM -kernel $KERNEL -append \"$qemu_append $boot_args\" $TORTURE_QEMU_GDB_ARG > $resdir/qemu-cmd
 
 if test -n "$TORTURE_BUILDONLY"
 then
@@ -171,14 +176,26 @@ echo "NOTE: $QEMU either did not run or was interactive" > $resdir/console.log
 # Attempt to run qemu
 ( . $T/qemu-cmd; wait `cat  $resdir/qemu_pid`; echo $? > $resdir/qemu-retval ) &
 commandcompleted=0
-sleep 10 # Give qemu's pid a chance to reach the file
-if test -s "$resdir/qemu_pid"
+if test -z "$TORTURE_KCONFIG_GDB_ARG"
 then
-	qemu_pid=`cat "$resdir/qemu_pid"`
-	echo Monitoring qemu job at pid $qemu_pid
-else
-	qemu_pid=""
-	echo Monitoring qemu job at yet-as-unknown pid
+	sleep 10 # Give qemu's pid a chance to reach the file
+	if test -s "$resdir/qemu_pid"
+	then
+		qemu_pid=`cat "$resdir/qemu_pid"`
+		echo Monitoring qemu job at pid $qemu_pid
+	else
+		qemu_pid=""
+		echo Monitoring qemu job at yet-as-unknown pid
+	fi
+fi
+if test -n "$TORTURE_KCONFIG_GDB_ARG"
+then
+	echo Waiting for you to attach a debug session, for example: > /dev/tty
+	echo "    gdb $base_resdir/vmlinux" > /dev/tty
+	echo 'After symbols load and the "(gdb)" prompt appears:' > /dev/tty
+	echo "    target remote :1234" > /dev/tty
+	echo "    continue" > /dev/tty
+	kstarttime=`gawk 'BEGIN { print systime() }' < /dev/null`
 fi
 while :
 do
diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh
index e655983b7429..6eb1d3f6524d 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh
@@ -31,6 +31,9 @@ TORTURE_DEFCONFIG=defconfig
 TORTURE_BOOT_IMAGE=""
 TORTURE_INITRD="$KVM/initrd"; export TORTURE_INITRD
 TORTURE_KCONFIG_ARG=""
+TORTURE_KCONFIG_GDB_ARG=""
+TORTURE_BOOT_GDB_ARG=""
+TORTURE_QEMU_GDB_ARG=""
 TORTURE_KCONFIG_KASAN_ARG=""
 TORTURE_KCONFIG_KCSAN_ARG=""
 TORTURE_KMAKE_ARG=""
@@ -46,6 +49,7 @@ jitter="-1"
 
 usage () {
 	echo "Usage: $scriptname optional arguments:"
+	echo "       --allcpus"
 	echo "       --bootargs kernel-boot-arguments"
 	echo "       --bootimage relative-path-to-kernel-boot-image"
 	echo "       --buildonly"
@@ -55,17 +59,19 @@ usage () {
 	echo "       --defconfig string"
 	echo "       --dryrun sched|script"
 	echo "       --duration minutes"
+	echo "       --gdb"
+	echo "       --help"
 	echo "       --interactive"
 	echo "       --jitter N [ maxsleep (us) [ maxspin (us) ] ]"
 	echo "       --kconfig Kconfig-options"
 	echo "       --kmake-arg kernel-make-arguments"
 	echo "       --mac nn:nn:nn:nn:nn:nn"
-	echo "       --memory megabytes | nnnG"
+	echo "       --memory megabytes|nnnG"
 	echo "       --no-initrd"
 	echo "       --qemu-args qemu-arguments"
 	echo "       --qemu-cmd qemu-system-..."
 	echo "       --results absolute-pathname"
-	echo "       --torture rcu"
+	echo "       --torture lock|rcu|rcuscale|refscale|scf"
 	echo "       --trust-make"
 	exit 1
 }
@@ -126,6 +132,14 @@ do
 		dur=$(($2*60))
 		shift
 		;;
+	--gdb)
+		TORTURE_KCONFIG_GDB_ARG="CONFIG_DEBUG_INFO=y"; export TORTURE_KCONFIG_GDB_ARG
+		TORTURE_BOOT_GDB_ARG="nokaslr"; export TORTURE_BOOT_GDB_ARG
+		TORTURE_QEMU_GDB_ARG="-s -S"; export TORTURE_QEMU_GDB_ARG
+		;;
+	--help|-h)
+		usage
+		;;
 	--interactive)
 		TORTURE_QEMU_INTERACTIVE=1; export TORTURE_QEMU_INTERACTIVE
 		;;
@@ -184,13 +198,13 @@ do
 		shift
 		;;
 	--torture)
-		checkarg --torture "(suite name)" "$#" "$2" '^\(lock\|rcu\|rcuperf\|refscale\)$' '^--'
+		checkarg --torture "(suite name)" "$#" "$2" '^\(lock\|rcu\|rcuscale\|refscale\|scf\)$' '^--'
 		TORTURE_SUITE=$2
 		shift
-		if test "$TORTURE_SUITE" = rcuperf || test "$TORTURE_SUITE" = refscale
+		if test "$TORTURE_SUITE" = rcuscale || test "$TORTURE_SUITE" = refscale
 		then
 			# If you really want jitter for refscale or
-			# rcuperf, specify it after specifying the rcuperf
+			# rcuscale, specify it after specifying the rcuscale
 			# or the refscale.  (But why jitter in these cases?)
 			jitter=0
 		fi
@@ -248,6 +262,15 @@ do
 done
 touch $T/cfgcpu
 configs_derep="`echo $configs_derep | sed -e "s/\<CFLIST\>/$defaultconfigs/g"`"
+if test -n "$TORTURE_KCONFIG_GDB_ARG"
+then
+	if test "`echo $configs_derep | wc -w`" -gt 1
+	then
+		echo "The --config list is: $configs_derep."
+		echo "Only one --config permitted with --gdb, terminating."
+		exit 1
+	fi
+fi
 for CF1 in $configs_derep
 do
 	if test -f "$CONFIGFRAG/$CF1"
@@ -323,6 +346,9 @@ TORTURE_BUILDONLY="$TORTURE_BUILDONLY"; export TORTURE_BUILDONLY
 TORTURE_DEFCONFIG="$TORTURE_DEFCONFIG"; export TORTURE_DEFCONFIG
 TORTURE_INITRD="$TORTURE_INITRD"; export TORTURE_INITRD
 TORTURE_KCONFIG_ARG="$TORTURE_KCONFIG_ARG"; export TORTURE_KCONFIG_ARG
+TORTURE_KCONFIG_GDB_ARG="$TORTURE_KCONFIG_GDB_ARG"; export TORTURE_KCONFIG_GDB_ARG
+TORTURE_BOOT_GDB_ARG="$TORTURE_BOOT_GDB_ARG"; export TORTURE_BOOT_GDB_ARG
+TORTURE_QEMU_GDB_ARG="$TORTURE_QEMU_GDB_ARG"; export TORTURE_QEMU_GDB_ARG
 TORTURE_KCONFIG_KASAN_ARG="$TORTURE_KCONFIG_KASAN_ARG"; export TORTURE_KCONFIG_KASAN_ARG
 TORTURE_KCONFIG_KCSAN_ARG="$TORTURE_KCONFIG_KCSAN_ARG"; export TORTURE_KCONFIG_KCSAN_ARG
 TORTURE_KMAKE_ARG="$TORTURE_KMAKE_ARG"; export TORTURE_KMAKE_ARG
diff --git a/tools/testing/selftests/rcutorture/bin/parse-console.sh b/tools/testing/selftests/rcutorture/bin/parse-console.sh
index 71a9f43a3918..e03338091a06 100755
--- a/tools/testing/selftests/rcutorture/bin/parse-console.sh
+++ b/tools/testing/selftests/rcutorture/bin/parse-console.sh
@@ -33,8 +33,8 @@ then
 fi
 cat /dev/null > $file.diags
 
-# Check for proper termination, except for rcuperf and refscale.
-if test "$TORTURE_SUITE" != rcuperf && test "$TORTURE_SUITE" != refscale
+# Check for proper termination, except for rcuscale and refscale.
+if test "$TORTURE_SUITE" != rcuscale && test "$TORTURE_SUITE" != refscale
 then
 	# check for abject failure
 
@@ -67,6 +67,7 @@ then
 	grep --binary-files=text 'torture:.*ver:' $file |
 	egrep --binary-files=text -v '\(null\)|rtc: 000000000* ' |
 	sed -e 's/^(initramfs)[^]]*] //' -e 's/^\[[^]]*] //' |
+	sed -e 's/^.*ver: //' |
 	awk '
 	BEGIN	{
 		ver = 0;
@@ -74,13 +75,13 @@ then
 		}
 
 		{
-		if (!badseq && ($5 + 0 != $5 || $5 <= ver)) {
+		if (!badseq && ($1 + 0 != $1 || $1 <= ver)) {
 			badseqno1 = ver;
-			badseqno2 = $5;
+			badseqno2 = $1;
 			badseqnr = NR;
 			badseq = 1;
 		}
-		ver = $5
+		ver = $1
 		}
 
 	END	{
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE05 b/tools/testing/selftests/rcutorture/configs/rcu/TREE05
index 2dde0d9964e3..4f95f8544f3f 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE05
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE05
@@ -16,5 +16,6 @@ CONFIG_RCU_NOCB_CPU=y
 CONFIG_DEBUG_LOCK_ALLOC=y
 CONFIG_PROVE_LOCKING=y
 #CHECK#CONFIG_PROVE_RCU=y
+CONFIG_PROVE_RCU_LIST=y
 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
 CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/CFcommon b/tools/testing/selftests/rcutorture/configs/rcuperf/CFcommon
deleted file mode 100644
index a09816b8c0f3..000000000000
--- a/tools/testing/selftests/rcutorture/configs/rcuperf/CFcommon
+++ /dev/null
@@ -1,2 +0,0 @@
-CONFIG_RCU_PERF_TEST=y
-CONFIG_PRINTK_TIME=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/CFLIST b/tools/testing/selftests/rcutorture/configs/rcuscale/CFLIST
index c9f56cf20775..c9f56cf20775 100644
--- a/tools/testing/selftests/rcutorture/configs/rcuperf/CFLIST
+++ b/tools/testing/selftests/rcutorture/configs/rcuscale/CFLIST
diff --git a/tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon b/tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon
new file mode 100644
index 000000000000..87caa0e932c7
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon
@@ -0,0 +1,2 @@
+CONFIG_RCU_SCALE_TEST=y
+CONFIG_PRINTK_TIME=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/TINY b/tools/testing/selftests/rcutorture/configs/rcuscale/TINY
index fb05ef5279b4..fb05ef5279b4 100644
--- a/tools/testing/selftests/rcutorture/configs/rcuperf/TINY
+++ b/tools/testing/selftests/rcutorture/configs/rcuscale/TINY
diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/TREE b/tools/testing/selftests/rcutorture/configs/rcuscale/TREE
index 721cfda76ab2..721cfda76ab2 100644
--- a/tools/testing/selftests/rcutorture/configs/rcuperf/TREE
+++ b/tools/testing/selftests/rcutorture/configs/rcuscale/TREE
diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/TREE54 b/tools/testing/selftests/rcutorture/configs/rcuscale/TREE54
index 7629f5dd73b2..7629f5dd73b2 100644
--- a/tools/testing/selftests/rcutorture/configs/rcuperf/TREE54
+++ b/tools/testing/selftests/rcutorture/configs/rcuscale/TREE54
diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/rcuscale/ver_functions.sh
index 777d5b0c190f..0333e9b18522 100644
--- a/tools/testing/selftests/rcutorture/configs/rcuperf/ver_functions.sh
+++ b/tools/testing/selftests/rcutorture/configs/rcuscale/ver_functions.sh
@@ -11,6 +11,6 @@
 #
 # Adds per-version torture-module parameters to kernels supporting them.
 per_version_boot_params () {
-	echo $1 rcuperf.shutdown=1 \
-		rcuperf.verbose=1
+	echo $1 rcuscale.shutdown=1 \
+		rcuscale.verbose=1
 }
diff --git a/tools/testing/selftests/rcutorture/configs/scf/CFLIST b/tools/testing/selftests/rcutorture/configs/scf/CFLIST
new file mode 100644
index 000000000000..4d62eb4a39f9
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/scf/CFLIST
@@ -0,0 +1,2 @@
+NOPREEMPT
+PREEMPT
diff --git a/tools/testing/selftests/rcutorture/configs/scf/CFcommon b/tools/testing/selftests/rcutorture/configs/scf/CFcommon
new file mode 100644
index 000000000000..c11ab91f49f5
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/scf/CFcommon
@@ -0,0 +1,2 @@
+CONFIG_SCF_TORTURE_TEST=y
+CONFIG_PRINTK_TIME=y
diff --git a/tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT b/tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT
new file mode 100644
index 000000000000..b8429d6c6ebc
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT
@@ -0,0 +1,9 @@
+CONFIG_SMP=y
+CONFIG_PREEMPT_NONE=y
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=n
+CONFIG_HZ_PERIODIC=n
+CONFIG_NO_HZ_IDLE=n
+CONFIG_NO_HZ_FULL=y
+CONFIG_DEBUG_LOCK_ALLOC=n
+CONFIG_PROVE_LOCKING=n
diff --git a/tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT.boot b/tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT.boot
new file mode 100644
index 000000000000..d6a7fa097c2e
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT.boot
@@ -0,0 +1 @@
+nohz_full=1
diff --git a/tools/testing/selftests/rcutorture/configs/scf/PREEMPT b/tools/testing/selftests/rcutorture/configs/scf/PREEMPT
new file mode 100644
index 000000000000..ae4992b141b0
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/scf/PREEMPT
@@ -0,0 +1,9 @@
+CONFIG_SMP=y
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
+CONFIG_HZ_PERIODIC=n
+CONFIG_NO_HZ_IDLE=y
+CONFIG_NO_HZ_FULL=n
+CONFIG_DEBUG_LOCK_ALLOC=y
+CONFIG_PROVE_LOCKING=y
diff --git a/tools/testing/selftests/rcutorture/configs/scf/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/scf/ver_functions.sh
new file mode 100644
index 000000000000..d3d9e35d3d55
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/scf/ver_functions.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Torture-suite-dependent shell functions for the rest of the scripts.
+#
+# Copyright (C) Facebook, 2020
+#
+# Authors: Paul E. McKenney <paulmck@kernel.org>
+
+# scftorture_param_onoff bootparam-string config-file
+#
+# Adds onoff scftorture module parameters to kernels having it.
+scftorture_param_onoff () {
+	if ! bootparam_hotplug_cpu "$1" && configfrag_hotplug_cpu "$2"
+	then
+		echo CPU-hotplug kernel, adding scftorture onoff. 1>&2
+		echo scftorture.onoff_interval=1000 scftorture.onoff_holdoff=30
+	fi
+}
+
+# per_version_boot_params bootparam-string config-file seconds
+#
+# Adds per-version torture-module parameters to kernels supporting them.
+per_version_boot_params () {
+	echo $1 `scftorture_param_onoff "$1" "$2"` \
+		scftorture.stat_interval=15 \
+		scftorture.shutdown_secs=$3 \
+		scftorture.verbose=1 \
+		scf
+}
diff --git a/tools/testing/selftests/rcutorture/doc/initrd.txt b/tools/testing/selftests/rcutorture/doc/initrd.txt
index 933b4fd12327..41a4255865d4 100644
--- a/tools/testing/selftests/rcutorture/doc/initrd.txt
+++ b/tools/testing/selftests/rcutorture/doc/initrd.txt
@@ -1,12 +1,11 @@
-The rcutorture scripting tools automatically create the needed initrd
-directory using dracut.  Failing that, this tool will create an initrd
-containing a single statically linked binary named "init" that loops
-over a very long sleep() call.  In both cases, this creation is done
-by tools/testing/selftests/rcutorture/bin/mkinitrd.sh.
+The rcutorture scripting tools automatically create an initrd containing
+a single statically linked binary named "init" that loops over a
+very long sleep() call.  In both cases, this creation is done by
+tools/testing/selftests/rcutorture/bin/mkinitrd.sh.
 
-However, if you are attempting to run rcutorture on a system that does
-not have dracut installed, and if you don't like the notion of static
-linking, you might wish to press an existing initrd into service:
+However, if you don't like the notion of statically linked bare-bones
+userspace environments, you might wish to press an existing initrd
+into service:
 
 ------------------------------------------------------------------------
 cd tools/testing/selftests/rcutorture
@@ -15,24 +14,3 @@ mkdir initrd
 cd initrd
 cpio -id < /tmp/initrd.img.zcat
 # Manually verify that initrd contains needed binaries and libraries.
-------------------------------------------------------------------------
-
-Interestingly enough, if you are running rcutorture, you don't really
-need userspace in many cases.  Running without userspace has the
-advantage of allowing you to test your kernel independently of the
-distro in place, the root-filesystem layout, and so on.  To make this
-happen, put the following script in the initrd's tree's "/init" file,
-with 0755 mode.
-
-------------------------------------------------------------------------
-#!/bin/sh
-
-while :
-do
-	sleep 10
-done
-------------------------------------------------------------------------
-
-This approach also allows most of the binaries and libraries in the
-initrd filesystem to be dispensed with, which can save significant
-space in rcutorture's "res" directory.
diff --git a/tools/testing/selftests/rcutorture/doc/rcu-test-image.txt b/tools/testing/selftests/rcutorture/doc/rcu-test-image.txt
index 449cf579d6f9..b2fc247976b1 100644
--- a/tools/testing/selftests/rcutorture/doc/rcu-test-image.txt
+++ b/tools/testing/selftests/rcutorture/doc/rcu-test-image.txt
@@ -1,8 +1,33 @@
-This document describes one way to create the rcu-test-image file
-that contains the filesystem used by the guest-OS kernel.  There are
-probably much better ways of doing this, and this filesystem could no
-doubt be smaller.  It is probably also possible to simply download
-an appropriate image from any number of places.
+Normally, a minimal initrd is created automatically by the rcutorture
+scripting.  But minimal really does mean "minimal", namely just a single
+root directory with a single statically linked executable named "init":
+
+$ size tools/testing/selftests/rcutorture/initrd/init
+   text    data     bss     dec     hex filename
+    328       0       8     336     150 tools/testing/selftests/rcutorture/initrd/init
+
+Suppose you need to run some scripts, perhaps to monitor or control
+some aspect of the rcutorture testing.  This will require a more fully
+filled-out userspace, perhaps containing libraries, executables for
+the shell and other utilities, and soforth.  In that case, place your
+desired filesystem here:
+
+	tools/testing/selftests/rcutorture/initrd
+
+For example, your tools/testing/selftests/rcutorture/initrd/init might
+be a script that does any needed mount operations and starts whatever
+scripts need starting to properly monitor or control your testing.
+The next rcutorture build will then incorporate this filesystem into
+the kernel image that is passed to qemu.
+
+Or maybe you need a real root filesystem for some reason, in which case
+please read on!
+
+The remainder of this document describes one way to create the
+rcu-test-image file that contains the filesystem used by the guest-OS
+kernel.  There are probably much better ways of doing this, and this
+filesystem could no doubt be smaller.  It is probably also possible to
+simply download an appropriate image from any number of places.
 
 That said, here are the commands:
 
@@ -36,7 +61,7 @@ References:
 	https://help.ubuntu.com/community/JeOSVMBuilder
 	http://wiki.libvirt.org/page/UbuntuKVMWalkthrough
 	http://www.moe.co.uk/2011/01/07/pci_add_option_rom-failed-to-find-romfile-pxe-rtl8139-bin/ -- "apt-get install kvm-pxe"
-	http://www.landley.net/writing/rootfs-howto.html
-	http://en.wikipedia.org/wiki/Initrd
-	http://en.wikipedia.org/wiki/Cpio
+	https://www.landley.net/writing/rootfs-howto.html
+	https://en.wikipedia.org/wiki/Initrd
+	https://en.wikipedia.org/wiki/Cpio
 	http://wiki.libvirt.org/page/UbuntuKVMWalkthrough
diff --git a/tools/testing/selftests/rseq/param_test.c b/tools/testing/selftests/rseq/param_test.c
index e8a657a5f48a..384589095864 100644
--- a/tools/testing/selftests/rseq/param_test.c
+++ b/tools/testing/selftests/rseq/param_test.c
@@ -1,8 +1,10 @@
 // SPDX-License-Identifier: LGPL-2.1
 #define _GNU_SOURCE
 #include <assert.h>
+#include <linux/membarrier.h>
 #include <pthread.h>
 #include <sched.h>
+#include <stdatomic.h>
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -1131,6 +1133,220 @@ static int set_signal_handler(void)
 	return ret;
 }
 
+struct test_membarrier_thread_args {
+	int stop;
+	intptr_t percpu_list_ptr;
+};
+
+/* Worker threads modify data in their "active" percpu lists. */
+void *test_membarrier_worker_thread(void *arg)
+{
+	struct test_membarrier_thread_args *args =
+		(struct test_membarrier_thread_args *)arg;
+	const int iters = opt_reps;
+	int i;
+
+	if (rseq_register_current_thread()) {
+		fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
+			errno, strerror(errno));
+		abort();
+	}
+
+	/* Wait for initialization. */
+	while (!atomic_load(&args->percpu_list_ptr)) {}
+
+	for (i = 0; i < iters; ++i) {
+		int ret;
+
+		do {
+			int cpu = rseq_cpu_start();
+
+			ret = rseq_offset_deref_addv(&args->percpu_list_ptr,
+				sizeof(struct percpu_list_entry) * cpu, 1, cpu);
+		} while (rseq_unlikely(ret));
+	}
+
+	if (rseq_unregister_current_thread()) {
+		fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
+			errno, strerror(errno));
+		abort();
+	}
+	return NULL;
+}
+
+void test_membarrier_init_percpu_list(struct percpu_list *list)
+{
+	int i;
+
+	memset(list, 0, sizeof(*list));
+	for (i = 0; i < CPU_SETSIZE; i++) {
+		struct percpu_list_node *node;
+
+		node = malloc(sizeof(*node));
+		assert(node);
+		node->data = 0;
+		node->next = NULL;
+		list->c[i].head = node;
+	}
+}
+
+void test_membarrier_free_percpu_list(struct percpu_list *list)
+{
+	int i;
+
+	for (i = 0; i < CPU_SETSIZE; i++)
+		free(list->c[i].head);
+}
+
+static int sys_membarrier(int cmd, int flags, int cpu_id)
+{
+	return syscall(__NR_membarrier, cmd, flags, cpu_id);
+}
+
+/*
+ * The manager thread swaps per-cpu lists that worker threads see,
+ * and validates that there are no unexpected modifications.
+ */
+void *test_membarrier_manager_thread(void *arg)
+{
+	struct test_membarrier_thread_args *args =
+		(struct test_membarrier_thread_args *)arg;
+	struct percpu_list list_a, list_b;
+	intptr_t expect_a = 0, expect_b = 0;
+	int cpu_a = 0, cpu_b = 0;
+
+	if (rseq_register_current_thread()) {
+		fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
+			errno, strerror(errno));
+		abort();
+	}
+
+	/* Init lists. */
+	test_membarrier_init_percpu_list(&list_a);
+	test_membarrier_init_percpu_list(&list_b);
+
+	atomic_store(&args->percpu_list_ptr, (intptr_t)&list_a);
+
+	while (!atomic_load(&args->stop)) {
+		/* list_a is "active". */
+		cpu_a = rand() % CPU_SETSIZE;
+		/*
+		 * As list_b is "inactive", we should never see changes
+		 * to list_b.
+		 */
+		if (expect_b != atomic_load(&list_b.c[cpu_b].head->data)) {
+			fprintf(stderr, "Membarrier test failed\n");
+			abort();
+		}
+
+		/* Make list_b "active". */
+		atomic_store(&args->percpu_list_ptr, (intptr_t)&list_b);
+		if (sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
+					MEMBARRIER_CMD_FLAG_CPU, cpu_a) &&
+				errno != ENXIO /* missing CPU */) {
+			perror("sys_membarrier");
+			abort();
+		}
+		/*
+		 * Cpu A should now only modify list_b, so the values
+		 * in list_a should be stable.
+		 */
+		expect_a = atomic_load(&list_a.c[cpu_a].head->data);
+
+		cpu_b = rand() % CPU_SETSIZE;
+		/*
+		 * As list_a is "inactive", we should never see changes
+		 * to list_a.
+		 */
+		if (expect_a != atomic_load(&list_a.c[cpu_a].head->data)) {
+			fprintf(stderr, "Membarrier test failed\n");
+			abort();
+		}
+
+		/* Make list_a "active". */
+		atomic_store(&args->percpu_list_ptr, (intptr_t)&list_a);
+		if (sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
+					MEMBARRIER_CMD_FLAG_CPU, cpu_b) &&
+				errno != ENXIO /* missing CPU*/) {
+			perror("sys_membarrier");
+			abort();
+		}
+		/* Remember a value from list_b. */
+		expect_b = atomic_load(&list_b.c[cpu_b].head->data);
+	}
+
+	test_membarrier_free_percpu_list(&list_a);
+	test_membarrier_free_percpu_list(&list_b);
+
+	if (rseq_unregister_current_thread()) {
+		fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
+			errno, strerror(errno));
+		abort();
+	}
+	return NULL;
+}
+
+/* Test MEMBARRIER_CMD_PRIVATE_RESTART_RSEQ_ON_CPU membarrier command. */
+#ifdef RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV
+void test_membarrier(void)
+{
+	const int num_threads = opt_threads;
+	struct test_membarrier_thread_args thread_args;
+	pthread_t worker_threads[num_threads];
+	pthread_t manager_thread;
+	int i, ret;
+
+	if (sys_membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ, 0, 0)) {
+		perror("sys_membarrier");
+		abort();
+	}
+
+	thread_args.stop = 0;
+	thread_args.percpu_list_ptr = 0;
+	ret = pthread_create(&manager_thread, NULL,
+			test_membarrier_manager_thread, &thread_args);
+	if (ret) {
+		errno = ret;
+		perror("pthread_create");
+		abort();
+	}
+
+	for (i = 0; i < num_threads; i++) {
+		ret = pthread_create(&worker_threads[i], NULL,
+				test_membarrier_worker_thread, &thread_args);
+		if (ret) {
+			errno = ret;
+			perror("pthread_create");
+			abort();
+		}
+	}
+
+
+	for (i = 0; i < num_threads; i++) {
+		ret = pthread_join(worker_threads[i], NULL);
+		if (ret) {
+			errno = ret;
+			perror("pthread_join");
+			abort();
+		}
+	}
+
+	atomic_store(&thread_args.stop, 1);
+	ret = pthread_join(manager_thread, NULL);
+	if (ret) {
+		errno = ret;
+		perror("pthread_join");
+		abort();
+	}
+}
+#else /* RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV */
+void test_membarrier(void)
+{
+	fprintf(stderr, "rseq_offset_deref_addv is not implemented on this architecture. "
+			"Skipping membarrier test.\n");
+}
+#endif
+
 static void show_usage(int argc, char **argv)
 {
 	printf("Usage : %s <OPTIONS>\n",
@@ -1153,7 +1369,7 @@ static void show_usage(int argc, char **argv)
 	printf("	[-r N] Number of repetitions per thread (default 5000)\n");
 	printf("	[-d] Disable rseq system call (no initialization)\n");
 	printf("	[-D M] Disable rseq for each M threads\n");
-	printf("	[-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement\n");
+	printf("	[-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement, membarrie(r)\n");
 	printf("	[-M] Push into buffer and memcpy buffer with memory barriers.\n");
 	printf("	[-v] Verbose output.\n");
 	printf("	[-h] Show this help.\n");
@@ -1268,6 +1484,7 @@ int main(int argc, char **argv)
 			case 'i':
 			case 'b':
 			case 'm':
+			case 'r':
 				break;
 			default:
 				show_usage(argc, argv);
@@ -1320,6 +1537,10 @@ int main(int argc, char **argv)
 		printf_verbose("counter increment\n");
 		test_percpu_inc();
 		break;
+	case 'r':
+		printf_verbose("membarrier\n");
+		test_membarrier();
+		break;
 	}
 	if (!opt_disable_rseq && rseq_unregister_current_thread())
 		abort();
diff --git a/tools/testing/selftests/rseq/rseq-x86.h b/tools/testing/selftests/rseq/rseq-x86.h
index b2da6004fe30..640411518e46 100644
--- a/tools/testing/selftests/rseq/rseq-x86.h
+++ b/tools/testing/selftests/rseq/rseq-x86.h
@@ -279,6 +279,63 @@ error1:
 #endif
 }
 
+#define RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV
+
+/*
+ *   pval = *(ptr+off)
+ *  *pval += inc;
+ */
+static inline __attribute__((always_inline))
+int rseq_offset_deref_addv(intptr_t *ptr, off_t off, intptr_t inc, int cpu)
+{
+	RSEQ_INJECT_C(9)
+
+	__asm__ __volatile__ goto (
+		RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+#endif
+		/* Start rseq by storing table entry pointer into rseq_cs. */
+		RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
+		RSEQ_INJECT_ASM(3)
+#ifdef RSEQ_COMPARE_TWICE
+		RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
+#endif
+		/* get p+v */
+		"movq %[ptr], %%rbx\n\t"
+		"addq %[off], %%rbx\n\t"
+		/* get pv */
+		"movq (%%rbx), %%rcx\n\t"
+		/* *pv += inc */
+		"addq %[inc], (%%rcx)\n\t"
+		"2:\n\t"
+		RSEQ_INJECT_ASM(4)
+		RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+		: /* gcc asm goto does not allow outputs */
+		: [cpu_id]		"r" (cpu),
+		  [rseq_abi]		"r" (&__rseq_abi),
+		  /* final store input */
+		  [ptr]			"m" (*ptr),
+		  [off]			"er" (off),
+		  [inc]			"er" (inc)
+		: "memory", "cc", "rax", "rbx", "rcx"
+		  RSEQ_INJECT_CLOBBER
+		: abort
+#ifdef RSEQ_COMPARE_TWICE
+		  , error1
+#endif
+	);
+	return 0;
+abort:
+	RSEQ_INJECT_FAILED
+	return -1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+	rseq_bug("cpu_id comparison failed");
+#endif
+}
+
 static inline __attribute__((always_inline))
 int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
 				 intptr_t *v2, intptr_t newv2,
diff --git a/tools/testing/selftests/rseq/run_param_test.sh b/tools/testing/selftests/rseq/run_param_test.sh
index e426304fd4a0..f51bc83c9e41 100755
--- a/tools/testing/selftests/rseq/run_param_test.sh
+++ b/tools/testing/selftests/rseq/run_param_test.sh
@@ -15,6 +15,7 @@ TEST_LIST=(
 	"-T m"
 	"-T m -M"
 	"-T i"
+	"-T r"
 )
 
 TEST_NAME=(
@@ -25,6 +26,7 @@ TEST_NAME=(
 	"memcpy"
 	"memcpy with barrier"
 	"increment"
+	"membarrier"
 )
 IFS="$OLDIFS"
 
diff --git a/tools/testing/selftests/run_kselftest.sh b/tools/testing/selftests/run_kselftest.sh
new file mode 100755
index 000000000000..609a4ef9300e
--- /dev/null
+++ b/tools/testing/selftests/run_kselftest.sh
@@ -0,0 +1,93 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Run installed kselftest tests.
+#
+BASE_DIR=$(realpath $(dirname $0))
+cd $BASE_DIR
+TESTS="$BASE_DIR"/kselftest-list.txt
+if [ ! -r "$TESTS" ] ; then
+	echo "$0: Could not find list of tests to run ($TESTS)" >&2
+	available=""
+else
+	available="$(cat "$TESTS")"
+fi
+
+. ./kselftest/runner.sh
+ROOT=$PWD
+
+usage()
+{
+	cat <<EOF
+Usage: $0 [OPTIONS]
+  -s | --summary		Print summary with detailed log in output.log
+  -t | --test COLLECTION:TEST	Run TEST from COLLECTION
+  -c | --collection COLLECTION	Run all tests from COLLECTION
+  -l | --list			List the available collection:test entries
+  -d | --dry-run		Don't actually run any tests
+  -h | --help			Show this usage info
+EOF
+	exit $1
+}
+
+COLLECTIONS=""
+TESTS=""
+dryrun=""
+while true; do
+	case "$1" in
+		-s | --summary)
+			logfile="$BASE_DIR"/output.log
+			cat /dev/null > $logfile
+			shift ;;
+		-t | --test)
+			TESTS="$TESTS $2"
+			shift 2 ;;
+		-c | --collection)
+			COLLECTIONS="$COLLECTIONS $2"
+			shift 2 ;;
+		-l | --list)
+			echo "$available"
+			exit 0 ;;
+		-n | --dry-run)
+			dryrun="echo"
+			shift ;;
+		-h | --help)
+			usage 0 ;;
+		"")
+			break ;;
+		*)
+			usage 1 ;;
+	esac
+done
+
+# Add all selected collections to the explicit test list.
+if [ -n "$COLLECTIONS" ]; then
+	for collection in $COLLECTIONS ; do
+		found="$(echo "$available" | grep "^$collection:")"
+		if [ -z "$found" ] ; then
+			echo "No such collection '$collection'" >&2
+			exit 1
+		fi
+		TESTS="$TESTS $found"
+	done
+fi
+# Replace available test list with explicitly selected tests.
+if [ -n "$TESTS" ]; then
+	valid=""
+	for test in $TESTS ; do
+		found="$(echo "$available" | grep "^${test}$")"
+		if [ -z "$found" ] ; then
+			echo "No such test '$test'" >&2
+			exit 1
+		fi
+		valid="$valid $found"
+	done
+	available="$(echo "$valid" | sed -e 's/ /\n/g')"
+fi
+
+collections=$(echo "$available" | cut -d: -f1 | uniq)
+for collection in $collections ; do
+	[ -w /dev/kmsg ] && echo "kselftest: Running tests in $collection" >> /dev/kmsg
+	tests=$(echo "$available" | grep "^$collection:" | cut -d: -f2)
+	($dryrun cd "$collection" && $dryrun run_many $tests)
+done
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 7a6d40286a42..4a180439ee9e 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -774,8 +774,15 @@ void *kill_thread(void *data)
 	return (void *)SIBLING_EXIT_UNKILLED;
 }
 
+enum kill_t {
+	KILL_THREAD,
+	KILL_PROCESS,
+	RET_UNKNOWN
+};
+
 /* Prepare a thread that will kill itself or both of us. */
-void kill_thread_or_group(struct __test_metadata *_metadata, bool kill_process)
+void kill_thread_or_group(struct __test_metadata *_metadata,
+			  enum kill_t kill_how)
 {
 	pthread_t thread;
 	void *status;
@@ -791,11 +798,12 @@ void kill_thread_or_group(struct __test_metadata *_metadata, bool kill_process)
 		.len = (unsigned short)ARRAY_SIZE(filter_thread),
 		.filter = filter_thread,
 	};
+	int kill = kill_how == KILL_PROCESS ? SECCOMP_RET_KILL_PROCESS : 0xAAAAAAAAA;
 	struct sock_filter filter_process[] = {
 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
 			offsetof(struct seccomp_data, nr)),
 		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
-		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_PROCESS),
+		BPF_STMT(BPF_RET|BPF_K, kill),
 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 	};
 	struct sock_fprog prog_process = {
@@ -808,13 +816,15 @@ void kill_thread_or_group(struct __test_metadata *_metadata, bool kill_process)
 	}
 
 	ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0,
-			     kill_process ? &prog_process : &prog_thread));
+			     kill_how == KILL_THREAD ? &prog_thread
+						     : &prog_process));
 
 	/*
 	 * Add the KILL_THREAD rule again to make sure that the KILL_PROCESS
 	 * flag cannot be downgraded by a new filter.
 	 */
-	ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog_thread));
+	if (kill_how == KILL_PROCESS)
+		ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog_thread));
 
 	/* Start a thread that will exit immediately. */
 	ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)false));
@@ -842,7 +852,7 @@ TEST(KILL_thread)
 	child_pid = fork();
 	ASSERT_LE(0, child_pid);
 	if (child_pid == 0) {
-		kill_thread_or_group(_metadata, false);
+		kill_thread_or_group(_metadata, KILL_THREAD);
 		_exit(38);
 	}
 
@@ -861,7 +871,7 @@ TEST(KILL_process)
 	child_pid = fork();
 	ASSERT_LE(0, child_pid);
 	if (child_pid == 0) {
-		kill_thread_or_group(_metadata, true);
+		kill_thread_or_group(_metadata, KILL_PROCESS);
 		_exit(38);
 	}
 
@@ -872,6 +882,27 @@ TEST(KILL_process)
 	ASSERT_EQ(SIGSYS, WTERMSIG(status));
 }
 
+TEST(KILL_unknown)
+{
+	int status;
+	pid_t child_pid;
+
+	child_pid = fork();
+	ASSERT_LE(0, child_pid);
+	if (child_pid == 0) {
+		kill_thread_or_group(_metadata, RET_UNKNOWN);
+		_exit(38);
+	}
+
+	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
+
+	/* If the entire process was killed, we'll see SIGSYS. */
+	EXPECT_TRUE(WIFSIGNALED(status)) {
+		TH_LOG("Unknown SECCOMP_RET is only killing the thread?");
+	}
+	ASSERT_EQ(SIGSYS, WTERMSIG(status));
+}
+
 /* TODO(wad) add 64-bit versus 32-bit arg tests. */
 TEST(arg_out_of_range)
 {
@@ -1667,70 +1698,148 @@ TEST_F(TRACE_poke, getpid_runs_normally)
 }
 
 #if defined(__x86_64__)
-# define ARCH_REGS	struct user_regs_struct
-# define SYSCALL_NUM	orig_rax
-# define SYSCALL_RET	rax
+# define ARCH_REGS		struct user_regs_struct
+# define SYSCALL_NUM(_regs)	(_regs).orig_rax
+# define SYSCALL_RET(_regs)	(_regs).rax
 #elif defined(__i386__)
-# define ARCH_REGS	struct user_regs_struct
-# define SYSCALL_NUM	orig_eax
-# define SYSCALL_RET	eax
+# define ARCH_REGS		struct user_regs_struct
+# define SYSCALL_NUM(_regs)	(_regs).orig_eax
+# define SYSCALL_RET(_regs)	(_regs).eax
 #elif defined(__arm__)
-# define ARCH_REGS	struct pt_regs
-# define SYSCALL_NUM	ARM_r7
-# define SYSCALL_RET	ARM_r0
+# define ARCH_REGS		struct pt_regs
+# define SYSCALL_NUM(_regs)	(_regs).ARM_r7
+# ifndef PTRACE_SET_SYSCALL
+#  define PTRACE_SET_SYSCALL   23
+# endif
+# define SYSCALL_NUM_SET(_regs, _nr)	\
+		EXPECT_EQ(0, ptrace(PTRACE_SET_SYSCALL, tracee, NULL, _nr))
+# define SYSCALL_RET(_regs)	(_regs).ARM_r0
 #elif defined(__aarch64__)
-# define ARCH_REGS	struct user_pt_regs
-# define SYSCALL_NUM	regs[8]
-# define SYSCALL_RET	regs[0]
+# define ARCH_REGS		struct user_pt_regs
+# define SYSCALL_NUM(_regs)	(_regs).regs[8]
+# ifndef NT_ARM_SYSTEM_CALL
+#  define NT_ARM_SYSTEM_CALL 0x404
+# endif
+# define SYSCALL_NUM_SET(_regs, _nr)				\
+	do {							\
+		struct iovec __v;				\
+		typeof(_nr) __nr = (_nr);			\
+		__v.iov_base = &__nr;				\
+		__v.iov_len = sizeof(__nr);			\
+		EXPECT_EQ(0, ptrace(PTRACE_SETREGSET, tracee,	\
+				    NT_ARM_SYSTEM_CALL, &__v));	\
+	} while (0)
+# define SYSCALL_RET(_regs)	(_regs).regs[0]
 #elif defined(__riscv) && __riscv_xlen == 64
-# define ARCH_REGS	struct user_regs_struct
-# define SYSCALL_NUM	a7
-# define SYSCALL_RET	a0
+# define ARCH_REGS		struct user_regs_struct
+# define SYSCALL_NUM(_regs)	(_regs).a7
+# define SYSCALL_RET(_regs)	(_regs).a0
 #elif defined(__csky__)
-# define ARCH_REGS	struct pt_regs
-#if defined(__CSKYABIV2__)
-# define SYSCALL_NUM	regs[3]
-#else
-# define SYSCALL_NUM	regs[9]
-#endif
-# define SYSCALL_RET	a0
+# define ARCH_REGS		struct pt_regs
+#  if defined(__CSKYABIV2__)
+#   define SYSCALL_NUM(_regs)	(_regs).regs[3]
+#  else
+#   define SYSCALL_NUM(_regs)	(_regs).regs[9]
+#  endif
+# define SYSCALL_RET(_regs)	(_regs).a0
 #elif defined(__hppa__)
-# define ARCH_REGS	struct user_regs_struct
-# define SYSCALL_NUM	gr[20]
-# define SYSCALL_RET	gr[28]
+# define ARCH_REGS		struct user_regs_struct
+# define SYSCALL_NUM(_regs)	(_regs).gr[20]
+# define SYSCALL_RET(_regs)	(_regs).gr[28]
 #elif defined(__powerpc__)
-# define ARCH_REGS	struct pt_regs
-# define SYSCALL_NUM	gpr[0]
-# define SYSCALL_RET	gpr[3]
+# define ARCH_REGS		struct pt_regs
+# define SYSCALL_NUM(_regs)	(_regs).gpr[0]
+# define SYSCALL_RET(_regs)	(_regs).gpr[3]
+# define SYSCALL_RET_SET(_regs, _val)				\
+	do {							\
+		typeof(_val) _result = (_val);			\
+		/*						\
+		 * A syscall error is signaled by CR0 SO bit	\
+		 * and the code is stored as a positive value.	\
+		 */						\
+		if (_result < 0) {				\
+			SYSCALL_RET(_regs) = -result;		\
+			(_regs).ccr |= 0x10000000;		\
+		} else {					\
+			SYSCALL_RET(_regs) = result;		\
+			(_regs).ccr &= ~0x10000000;		\
+		}						\
+	} while (0)
+# define SYSCALL_RET_SET_ON_PTRACE_EXIT
 #elif defined(__s390__)
-# define ARCH_REGS     s390_regs
-# define SYSCALL_NUM   gprs[2]
-# define SYSCALL_RET   gprs[2]
-# define SYSCALL_NUM_RET_SHARE_REG
+# define ARCH_REGS		s390_regs
+# define SYSCALL_NUM(_regs)	(_regs).gprs[2]
+# define SYSCALL_RET_SET(_regs, _val)			\
+		TH_LOG("Can't modify syscall return on this architecture")
 #elif defined(__mips__)
-# define ARCH_REGS	struct pt_regs
-# define SYSCALL_NUM	regs[2]
-# define SYSCALL_SYSCALL_NUM regs[4]
-# define SYSCALL_RET	regs[2]
-# define SYSCALL_NUM_RET_SHARE_REG
+# include <asm/unistd_nr_n32.h>
+# include <asm/unistd_nr_n64.h>
+# include <asm/unistd_nr_o32.h>
+# define ARCH_REGS		struct pt_regs
+# define SYSCALL_NUM(_regs)				\
+	({						\
+		typeof((_regs).regs[2]) _nr;		\
+		if ((_regs).regs[2] == __NR_O32_Linux)	\
+			_nr = (_regs).regs[4];		\
+		else					\
+			_nr = (_regs).regs[2];		\
+		_nr;					\
+	})
+# define SYSCALL_NUM_SET(_regs, _nr)			\
+	do {						\
+		if ((_regs).regs[2] == __NR_O32_Linux)	\
+			(_regs).regs[4] = _nr;		\
+		else					\
+			(_regs).regs[2] = _nr;		\
+	} while (0)
+# define SYSCALL_RET_SET(_regs, _val)			\
+		TH_LOG("Can't modify syscall return on this architecture")
 #elif defined(__xtensa__)
-# define ARCH_REGS	struct user_pt_regs
-# define SYSCALL_NUM	syscall
+# define ARCH_REGS		struct user_pt_regs
+# define SYSCALL_NUM(_regs)	(_regs).syscall
 /*
  * On xtensa syscall return value is in the register
  * a2 of the current window which is not fixed.
  */
-#define SYSCALL_RET(reg) a[(reg).windowbase * 4 + 2]
+#define SYSCALL_RET(_regs)	(_regs).a[(_regs).windowbase * 4 + 2]
 #elif defined(__sh__)
-# define ARCH_REGS	struct pt_regs
-# define SYSCALL_NUM	gpr[3]
-# define SYSCALL_RET	gpr[0]
+# define ARCH_REGS		struct pt_regs
+# define SYSCALL_NUM(_regs)	(_regs).gpr[3]
+# define SYSCALL_RET(_regs)	(_regs).gpr[0]
 #else
 # error "Do not know how to find your architecture's registers and syscalls"
 #endif
 
+/*
+ * Most architectures can change the syscall by just updating the
+ * associated register. This is the default if not defined above.
+ */
+#ifndef SYSCALL_NUM_SET
+# define SYSCALL_NUM_SET(_regs, _nr)		\
+	do {					\
+		SYSCALL_NUM(_regs) = (_nr);	\
+	} while (0)
+#endif
+/*
+ * Most architectures can change the syscall return value by just
+ * writing to the SYSCALL_RET register. This is the default if not
+ * defined above. If an architecture cannot set the return value
+ * (for example when the syscall and return value register is
+ * shared), report it with TH_LOG() in an arch-specific definition
+ * of SYSCALL_RET_SET() above, and leave SYSCALL_RET undefined.
+ */
+#if !defined(SYSCALL_RET) && !defined(SYSCALL_RET_SET)
+# error "One of SYSCALL_RET or SYSCALL_RET_SET is needed for this arch"
+#endif
+#ifndef SYSCALL_RET_SET
+# define SYSCALL_RET_SET(_regs, _val)		\
+	do {					\
+		SYSCALL_RET(_regs) = (_val);	\
+	} while (0)
+#endif
+
 /* When the syscall return can't be changed, stub out the tests for it. */
-#ifdef SYSCALL_NUM_RET_SHARE_REG
+#ifndef SYSCALL_RET
 # define EXPECT_SYSCALL_RETURN(val, action)	EXPECT_EQ(-1, action)
 #else
 # define EXPECT_SYSCALL_RETURN(val, action)		\
@@ -1745,116 +1854,92 @@ TEST_F(TRACE_poke, getpid_runs_normally)
 	} while (0)
 #endif
 
-/* Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for
+/*
+ * Some architectures (e.g. powerpc) can only set syscall
+ * return values on syscall exit during ptrace.
+ */
+const bool ptrace_entry_set_syscall_nr = true;
+const bool ptrace_entry_set_syscall_ret =
+#ifndef SYSCALL_RET_SET_ON_PTRACE_EXIT
+	true;
+#else
+	false;
+#endif
+
+/*
+ * Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for
  * architectures without HAVE_ARCH_TRACEHOOK (e.g. User-mode Linux).
  */
 #if defined(__x86_64__) || defined(__i386__) || defined(__mips__)
-#define HAVE_GETREGS
+# define ARCH_GETREGS(_regs)	ptrace(PTRACE_GETREGS, tracee, 0, &(_regs))
+# define ARCH_SETREGS(_regs)	ptrace(PTRACE_SETREGS, tracee, 0, &(_regs))
+#else
+# define ARCH_GETREGS(_regs)	({					\
+		struct iovec __v;					\
+		__v.iov_base = &(_regs);				\
+		__v.iov_len = sizeof(_regs);				\
+		ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &__v);	\
+	})
+# define ARCH_SETREGS(_regs)	({					\
+		struct iovec __v;					\
+		__v.iov_base = &(_regs);				\
+		__v.iov_len = sizeof(_regs);				\
+		ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &__v);	\
+	})
 #endif
 
 /* Architecture-specific syscall fetching routine. */
 int get_syscall(struct __test_metadata *_metadata, pid_t tracee)
 {
 	ARCH_REGS regs;
-#ifdef HAVE_GETREGS
-	EXPECT_EQ(0, ptrace(PTRACE_GETREGS, tracee, 0, &regs)) {
-		TH_LOG("PTRACE_GETREGS failed");
-		return -1;
-	}
-#else
-	struct iovec iov;
 
-	iov.iov_base = &regs;
-	iov.iov_len = sizeof(regs);
-	EXPECT_EQ(0, ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov)) {
-		TH_LOG("PTRACE_GETREGSET failed");
+	EXPECT_EQ(0, ARCH_GETREGS(regs)) {
 		return -1;
 	}
-#endif
 
-#if defined(__mips__)
-	if (regs.SYSCALL_NUM == __NR_O32_Linux)
-		return regs.SYSCALL_SYSCALL_NUM;
-#endif
-	return regs.SYSCALL_NUM;
+	return SYSCALL_NUM(regs);
 }
 
 /* Architecture-specific syscall changing routine. */
-void change_syscall(struct __test_metadata *_metadata,
-		    pid_t tracee, int syscall, int result)
+void __change_syscall(struct __test_metadata *_metadata,
+		    pid_t tracee, long *syscall, long *ret)
 {
-	int ret;
-	ARCH_REGS regs;
-#ifdef HAVE_GETREGS
-	ret = ptrace(PTRACE_GETREGS, tracee, 0, &regs);
-#else
-	struct iovec iov;
-	iov.iov_base = &regs;
-	iov.iov_len = sizeof(regs);
-	ret = ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov);
-#endif
-	EXPECT_EQ(0, ret) {}
+	ARCH_REGS orig, regs;
 
-#if defined(__x86_64__) || defined(__i386__) || defined(__powerpc__) || \
-	defined(__s390__) || defined(__hppa__) || defined(__riscv) || \
-	defined(__xtensa__) || defined(__csky__) || defined(__sh__)
-	{
-		regs.SYSCALL_NUM = syscall;
-	}
-#elif defined(__mips__)
-	{
-		if (regs.SYSCALL_NUM == __NR_O32_Linux)
-			regs.SYSCALL_SYSCALL_NUM = syscall;
-		else
-			regs.SYSCALL_NUM = syscall;
-	}
+	/* Do not get/set registers if we have nothing to do. */
+	if (!syscall && !ret)
+		return;
 
-#elif defined(__arm__)
-# ifndef PTRACE_SET_SYSCALL
-#  define PTRACE_SET_SYSCALL   23
-# endif
-	{
-		ret = ptrace(PTRACE_SET_SYSCALL, tracee, NULL, syscall);
-		EXPECT_EQ(0, ret);
+	EXPECT_EQ(0, ARCH_GETREGS(regs)) {
+		return;
 	}
+	orig = regs;
 
-#elif defined(__aarch64__)
-# ifndef NT_ARM_SYSTEM_CALL
-#  define NT_ARM_SYSTEM_CALL 0x404
-# endif
-	{
-		iov.iov_base = &syscall;
-		iov.iov_len = sizeof(syscall);
-		ret = ptrace(PTRACE_SETREGSET, tracee, NT_ARM_SYSTEM_CALL,
-			     &iov);
-		EXPECT_EQ(0, ret);
-	}
+	if (syscall)
+		SYSCALL_NUM_SET(regs, *syscall);
 
-#else
-	ASSERT_EQ(1, 0) {
-		TH_LOG("How is the syscall changed on this architecture?");
-	}
-#endif
+	if (ret)
+		SYSCALL_RET_SET(regs, *ret);
 
-	/* If syscall is skipped, change return value. */
-	if (syscall == -1)
-#ifdef SYSCALL_NUM_RET_SHARE_REG
-		TH_LOG("Can't modify syscall return on this architecture");
+	/* Flush any register changes made. */
+	if (memcmp(&orig, &regs, sizeof(orig)) != 0)
+		EXPECT_EQ(0, ARCH_SETREGS(regs));
+}
 
-#elif defined(__xtensa__)
-		regs.SYSCALL_RET(regs) = result;
-#else
-		regs.SYSCALL_RET = result;
-#endif
+/* Change only syscall number. */
+void change_syscall_nr(struct __test_metadata *_metadata,
+		       pid_t tracee, long syscall)
+{
+	__change_syscall(_metadata, tracee, &syscall, NULL);
+}
 
-#ifdef HAVE_GETREGS
-	ret = ptrace(PTRACE_SETREGS, tracee, 0, &regs);
-#else
-	iov.iov_base = &regs;
-	iov.iov_len = sizeof(regs);
-	ret = ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &iov);
-#endif
-	EXPECT_EQ(0, ret);
+/* Change syscall return value (and set syscall number to -1). */
+void change_syscall_ret(struct __test_metadata *_metadata,
+			pid_t tracee, long ret)
+{
+	long syscall = -1;
+
+	__change_syscall(_metadata, tracee, &syscall, &ret);
 }
 
 void tracer_seccomp(struct __test_metadata *_metadata, pid_t tracee,
@@ -1872,17 +1957,17 @@ void tracer_seccomp(struct __test_metadata *_metadata, pid_t tracee,
 	case 0x1002:
 		/* change getpid to getppid. */
 		EXPECT_EQ(__NR_getpid, get_syscall(_metadata, tracee));
-		change_syscall(_metadata, tracee, __NR_getppid, 0);
+		change_syscall_nr(_metadata, tracee, __NR_getppid);
 		break;
 	case 0x1003:
 		/* skip gettid with valid return code. */
 		EXPECT_EQ(__NR_gettid, get_syscall(_metadata, tracee));
-		change_syscall(_metadata, tracee, -1, 45000);
+		change_syscall_ret(_metadata, tracee, 45000);
 		break;
 	case 0x1004:
 		/* skip openat with error. */
 		EXPECT_EQ(__NR_openat, get_syscall(_metadata, tracee));
-		change_syscall(_metadata, tracee, -1, -ESRCH);
+		change_syscall_ret(_metadata, tracee, -ESRCH);
 		break;
 	case 0x1005:
 		/* do nothing (allow getppid) */
@@ -1897,12 +1982,21 @@ void tracer_seccomp(struct __test_metadata *_metadata, pid_t tracee,
 
 }
 
+FIXTURE(TRACE_syscall) {
+	struct sock_fprog prog;
+	pid_t tracer, mytid, mypid, parent;
+	long syscall_nr;
+};
+
 void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee,
 		   int status, void *args)
 {
-	int ret, nr;
+	int ret;
 	unsigned long msg;
 	static bool entry;
+	long syscall_nr_val, syscall_ret_val;
+	long *syscall_nr = NULL, *syscall_ret = NULL;
+	FIXTURE_DATA(TRACE_syscall) *self = args;
 
 	/*
 	 * The traditional way to tell PTRACE_SYSCALL entry/exit
@@ -1916,24 +2010,48 @@ void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee,
 	EXPECT_EQ(entry ? PTRACE_EVENTMSG_SYSCALL_ENTRY
 			: PTRACE_EVENTMSG_SYSCALL_EXIT, msg);
 
-	if (!entry)
-		return;
+	/*
+	 * Some architectures only support setting return values during
+	 * syscall exit under ptrace, and on exit the syscall number may
+	 * no longer be available. Therefore, save the initial sycall
+	 * number here, so it can be examined during both entry and exit
+	 * phases.
+	 */
+	if (entry)
+		self->syscall_nr = get_syscall(_metadata, tracee);
 
-	nr = get_syscall(_metadata, tracee);
+	/*
+	 * Depending on the architecture's syscall setting abilities, we
+	 * pick which things to set during this phase (entry or exit).
+	 */
+	if (entry == ptrace_entry_set_syscall_nr)
+		syscall_nr = &syscall_nr_val;
+	if (entry == ptrace_entry_set_syscall_ret)
+		syscall_ret = &syscall_ret_val;
+
+	/* Now handle the actual rewriting cases. */
+	switch (self->syscall_nr) {
+	case __NR_getpid:
+		syscall_nr_val = __NR_getppid;
+		/* Never change syscall return for this case. */
+		syscall_ret = NULL;
+		break;
+	case __NR_gettid:
+		syscall_nr_val = -1;
+		syscall_ret_val = 45000;
+		break;
+	case __NR_openat:
+		syscall_nr_val = -1;
+		syscall_ret_val = -ESRCH;
+		break;
+	default:
+		/* Unhandled, do nothing. */
+		return;
+	}
 
-	if (nr == __NR_getpid)
-		change_syscall(_metadata, tracee, __NR_getppid, 0);
-	if (nr == __NR_gettid)
-		change_syscall(_metadata, tracee, -1, 45000);
-	if (nr == __NR_openat)
-		change_syscall(_metadata, tracee, -1, -ESRCH);
+	__change_syscall(_metadata, tracee, syscall_nr, syscall_ret);
 }
 
-FIXTURE(TRACE_syscall) {
-	struct sock_fprog prog;
-	pid_t tracer, mytid, mypid, parent;
-};
-
 FIXTURE_VARIANT(TRACE_syscall) {
 	/*
 	 * All of the SECCOMP_RET_TRACE behaviors can be tested with either
@@ -1992,7 +2110,7 @@ FIXTURE_SETUP(TRACE_syscall)
 	self->tracer = setup_trace_fixture(_metadata,
 					   variant->use_ptrace ? tracer_ptrace
 							       : tracer_seccomp,
-					   NULL, variant->use_ptrace);
+					   self, variant->use_ptrace);
 
 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 	ASSERT_EQ(0, ret);
@@ -3142,11 +3260,11 @@ skip:
 static int user_notif_syscall(int nr, unsigned int flags)
 {
 	struct sock_filter filter[] = {
-		BPF_STMT(BPF_LD+BPF_W+BPF_ABS,
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
 			offsetof(struct seccomp_data, nr)),
-		BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, nr, 0, 1),
-		BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_USER_NOTIF),
-		BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, nr, 0, 1),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_USER_NOTIF),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 	};
 
 	struct sock_fprog prog = {
@@ -3699,7 +3817,7 @@ TEST(user_notification_filter_empty)
 	long ret;
 	int status;
 	struct pollfd pollfd;
-	struct clone_args args = {
+	struct __clone_args args = {
 		.flags = CLONE_FILES,
 		.exit_signal = SIGCHLD,
 	};
@@ -3715,7 +3833,7 @@ TEST(user_notification_filter_empty)
 	if (pid == 0) {
 		int listener;
 
-		listener = user_notif_syscall(__NR_mknod, SECCOMP_FILTER_FLAG_NEW_LISTENER);
+		listener = user_notif_syscall(__NR_mknodat, SECCOMP_FILTER_FLAG_NEW_LISTENER);
 		if (listener < 0)
 			_exit(EXIT_FAILURE);
 
@@ -3753,7 +3871,7 @@ TEST(user_notification_filter_empty_threaded)
 	long ret;
 	int status;
 	struct pollfd pollfd;
-	struct clone_args args = {
+	struct __clone_args args = {
 		.flags = CLONE_FILES,
 		.exit_signal = SIGCHLD,
 	};
diff --git a/tools/testing/selftests/timers/Makefile b/tools/testing/selftests/timers/Makefile
index 7656c7ce79d9..0e73a16874c4 100644
--- a/tools/testing/selftests/timers/Makefile
+++ b/tools/testing/selftests/timers/Makefile
@@ -13,6 +13,7 @@ DESTRUCTIVE_TESTS = alarmtimer-suspend valid-adjtimex adjtick change_skew \
 
 TEST_GEN_PROGS_EXTENDED = $(DESTRUCTIVE_TESTS)
 
+TEST_FILES := settings
 
 include ../lib.mk
 
diff --git a/tools/testing/selftests/timers/settings b/tools/testing/selftests/timers/settings
new file mode 100644
index 000000000000..e7b9417537fb
--- /dev/null
+++ b/tools/testing/selftests/timers/settings
@@ -0,0 +1 @@
+timeout=0
diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index a9026706d597..30873b19d04b 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -3,6 +3,23 @@
 uname_M := $(shell uname -m 2>/dev/null || echo not)
 MACHINE ?= $(shell echo $(uname_M) | sed -e 's/aarch64.*/arm64/')
 
+# Without this, failed build products remain, with up-to-date timestamps,
+# thus tricking Make (and you!) into believing that All Is Well, in subsequent
+# make invocations:
+.DELETE_ON_ERROR:
+
+# Avoid accidental wrong builds, due to built-in rules working just a little
+# bit too well--but not quite as well as required for our situation here.
+#
+# In other words, "make userfaultfd" is supposed to fail to build at all,
+# because this Makefile only supports either "make" (all), or "make /full/path".
+# However,  the built-in rules, if not suppressed, will pick up CFLAGS and the
+# initial LDLIBS (but not the target-specific LDLIBS, because those are only
+# set for the full path target!). This causes it to get pretty far into building
+# things despite using incorrect values such as an *occasionally* incomplete
+# LDLIBS.
+MAKEFLAGS += --no-builtin-rules
+
 CFLAGS = -Wall -I ../../../../usr/include $(EXTRA_CFLAGS)
 LDLIBS = -lrt
 TEST_GEN_FILES = compaction_test
diff --git a/tools/testing/selftests/vm/compaction_test.c b/tools/testing/selftests/vm/compaction_test.c
index bcec71250873..9b420140ba2b 100644
--- a/tools/testing/selftests/vm/compaction_test.c
+++ b/tools/testing/selftests/vm/compaction_test.c
@@ -18,7 +18,8 @@
 
 #include "../kselftest.h"
 
-#define MAP_SIZE 1048576
+#define MAP_SIZE_MB	100
+#define MAP_SIZE	(MAP_SIZE_MB * 1024 * 1024)
 
 struct map_list {
 	void *map;
@@ -165,7 +166,7 @@ int main(int argc, char **argv)
 	void *map = NULL;
 	unsigned long mem_free = 0;
 	unsigned long hugepage_size = 0;
-	unsigned long mem_fragmentable = 0;
+	long mem_fragmentable_MB = 0;
 
 	if (prereq() != 0) {
 		printf("Either the sysctl compact_unevictable_allowed is not\n"
@@ -190,9 +191,9 @@ int main(int argc, char **argv)
 		return -1;
 	}
 
-	mem_fragmentable = mem_free * 0.8 / 1024;
+	mem_fragmentable_MB = mem_free * 0.8 / 1024;
 
-	while (mem_fragmentable > 0) {
+	while (mem_fragmentable_MB > 0) {
 		map = mmap(NULL, MAP_SIZE, PROT_READ | PROT_WRITE,
 			   MAP_ANONYMOUS | MAP_PRIVATE | MAP_LOCKED, -1, 0);
 		if (map == MAP_FAILED)
@@ -213,7 +214,7 @@ int main(int argc, char **argv)
 		for (i = 0; i < MAP_SIZE; i += page_size)
 			*(unsigned long *)(map + i) = (unsigned long)map + i;
 
-		mem_fragmentable--;
+		mem_fragmentable_MB -= MAP_SIZE_MB;
 	}
 
 	for (entry = list; entry != NULL; entry = entry->next) {
diff --git a/tools/testing/selftests/vm/config b/tools/testing/selftests/vm/config
index 3ba674b64fa9..69dd0d1aa30b 100644
--- a/tools/testing/selftests/vm/config
+++ b/tools/testing/selftests/vm/config
@@ -3,3 +3,4 @@ CONFIG_USERFAULTFD=y
 CONFIG_TEST_VMALLOC=m
 CONFIG_DEVICE_PRIVATE=y
 CONFIG_TEST_HMM=m
+CONFIG_GUP_BENCHMARK=y
diff --git a/tools/testing/selftests/vm/gup_benchmark.c b/tools/testing/selftests/vm/gup_benchmark.c
index 43b4dfe161a2..1d4359341e44 100644
--- a/tools/testing/selftests/vm/gup_benchmark.c
+++ b/tools/testing/selftests/vm/gup_benchmark.c
@@ -15,12 +15,12 @@
 #define PAGE_SIZE sysconf(_SC_PAGESIZE)
 
 #define GUP_FAST_BENCHMARK	_IOWR('g', 1, struct gup_benchmark)
-#define GUP_LONGTERM_BENCHMARK	_IOWR('g', 2, struct gup_benchmark)
-#define GUP_BENCHMARK		_IOWR('g', 3, struct gup_benchmark)
+#define GUP_BENCHMARK		_IOWR('g', 2, struct gup_benchmark)
 
 /* Similar to above, but use FOLL_PIN instead of FOLL_GET. */
-#define PIN_FAST_BENCHMARK	_IOWR('g', 4, struct gup_benchmark)
-#define PIN_BENCHMARK		_IOWR('g', 5, struct gup_benchmark)
+#define PIN_FAST_BENCHMARK	_IOWR('g', 3, struct gup_benchmark)
+#define PIN_BENCHMARK		_IOWR('g', 4, struct gup_benchmark)
+#define PIN_LONGTERM_BENCHMARK	_IOWR('g', 5, struct gup_benchmark)
 
 /* Just the flags we need, copied from mm.h: */
 #define FOLL_WRITE	0x01	/* check pte is writable */
@@ -52,6 +52,9 @@ int main(int argc, char **argv)
 		case 'b':
 			cmd = PIN_BENCHMARK;
 			break;
+		case 'L':
+			cmd = PIN_LONGTERM_BENCHMARK;
+			break;
 		case 'm':
 			size = atoi(optarg) * MB;
 			break;
@@ -67,9 +70,6 @@ int main(int argc, char **argv)
 		case 'T':
 			thp = 0;
 			break;
-		case 'L':
-			cmd = GUP_LONGTERM_BENCHMARK;
-			break;
 		case 'U':
 			cmd = GUP_BENCHMARK;
 			break;
@@ -105,12 +105,16 @@ int main(int argc, char **argv)
 		gup.flags |= FOLL_WRITE;
 
 	fd = open("/sys/kernel/debug/gup_benchmark", O_RDWR);
-	if (fd == -1)
-		perror("open"), exit(1);
+	if (fd == -1) {
+		perror("open");
+		exit(1);
+	}
 
 	p = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, filed, 0);
-	if (p == MAP_FAILED)
-		perror("mmap"), exit(1);
+	if (p == MAP_FAILED) {
+		perror("mmap");
+		exit(1);
+	}
 	gup.addr = (unsigned long)p;
 
 	if (thp == 1)
@@ -123,8 +127,10 @@ int main(int argc, char **argv)
 
 	for (i = 0; i < repeats; i++) {
 		gup.size = size;
-		if (ioctl(fd, cmd, &gup))
-			perror("ioctl"), exit(1);
+		if (ioctl(fd, cmd, &gup)) {
+			perror("ioctl");
+			exit(1);
+		}
 
 		printf("Time: get:%lld put:%lld us", gup.get_delta_usec,
 			gup.put_delta_usec);
diff --git a/tools/testing/selftests/vm/hmm-tests.c b/tools/testing/selftests/vm/hmm-tests.c
index 93fc5cadce61..c9404ef9698e 100644
--- a/tools/testing/selftests/vm/hmm-tests.c
+++ b/tools/testing/selftests/vm/hmm-tests.c
@@ -45,7 +45,7 @@ struct hmm_buffer {
 #define TWOMEG		(1 << 21)
 #define HMM_BUFFER_SIZE (1024 << 12)
 #define HMM_PATH_MAX    64
-#define NTIMES		256
+#define NTIMES		10
 
 #define ALIGN(x, a) (((x) + (a - 1)) & (~((a) - 1)))
 
@@ -680,7 +680,7 @@ TEST_F(hmm, anon_write_hugetlbfs)
 
 	n = gethugepagesizes(pagesizes, 4);
 	if (n <= 0)
-		return;
+		SKIP(return, "Huge page size could not be determined");
 	for (idx = 0; --n > 0; ) {
 		if (pagesizes[n] < pagesizes[idx])
 			idx = n;
@@ -694,7 +694,7 @@ TEST_F(hmm, anon_write_hugetlbfs)
 	buffer->ptr = get_hugepage_region(size, GHR_STRICT);
 	if (buffer->ptr == NULL) {
 		free(buffer);
-		return;
+		SKIP(return, "Huge page could not be allocated");
 	}
 
 	buffer->fd = -1;
diff --git a/tools/testing/selftests/vm/map_hugetlb.c b/tools/testing/selftests/vm/map_hugetlb.c
index 6af951900aa3..312889edb84a 100644
--- a/tools/testing/selftests/vm/map_hugetlb.c
+++ b/tools/testing/selftests/vm/map_hugetlb.c
@@ -83,7 +83,7 @@ int main(int argc, char **argv)
 	}
 
 	if (shift)
-		printf("%u kB hugepages\n", 1 << shift);
+		printf("%u kB hugepages\n", 1 << (shift - 10));
 	else
 		printf("Default size hugepages\n");
 	printf("Mapping %lu Mbytes\n", (unsigned long)length >> 20);
diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c
index 61e5cfeb1350..9b0912a01777 100644
--- a/tools/testing/selftests/vm/userfaultfd.c
+++ b/tools/testing/selftests/vm/userfaultfd.c
@@ -227,8 +227,10 @@ static void hugetlb_allocate_area(void **alloc_area)
 				  huge_fd, *alloc_area == area_src ? 0 :
 				  nr_pages * page_size);
 		if (area_alias == MAP_FAILED) {
-			if (munmap(*alloc_area, nr_pages * page_size) < 0)
-				perror("hugetlb munmap"), exit(1);
+			if (munmap(*alloc_area, nr_pages * page_size) < 0) {
+				perror("hugetlb munmap");
+				exit(1);
+			}
 			*alloc_area = NULL;
 			return;
 		}
@@ -337,9 +339,10 @@ static void wp_range(int ufd, __u64 start, __u64 len, bool wp)
 	/* Undo write-protect, do wakeup after that */
 	prms.mode = wp ? UFFDIO_WRITEPROTECT_MODE_WP : 0;
 
-	if (ioctl(ufd, UFFDIO_WRITEPROTECT, &prms))
-		fprintf(stderr, "clear WP failed for address 0x%Lx\n",
-			start), exit(1);
+	if (ioctl(ufd, UFFDIO_WRITEPROTECT, &prms)) {
+		fprintf(stderr, "clear WP failed for address 0x%Lx\n", start);
+		exit(1);
+	}
 }
 
 static void *locking_thread(void *arg)
@@ -359,8 +362,10 @@ static void *locking_thread(void *arg)
 			seed += cpu;
 		bzero(&rand, sizeof(rand));
 		bzero(&randstate, sizeof(randstate));
-		if (initstate_r(seed, randstate, sizeof(randstate), &rand))
-			fprintf(stderr, "srandom_r error\n"), exit(1);
+		if (initstate_r(seed, randstate, sizeof(randstate), &rand)) {
+			fprintf(stderr, "srandom_r error\n");
+			exit(1);
+		}
 	} else {
 		page_nr = -bounces;
 		if (!(bounces & BOUNCE_RACINGFAULTS))
@@ -369,12 +374,16 @@ static void *locking_thread(void *arg)
 
 	while (!finished) {
 		if (bounces & BOUNCE_RANDOM) {
-			if (random_r(&rand, &rand_nr))
-				fprintf(stderr, "random_r 1 error\n"), exit(1);
+			if (random_r(&rand, &rand_nr)) {
+				fprintf(stderr, "random_r 1 error\n");
+				exit(1);
+			}
 			page_nr = rand_nr;
 			if (sizeof(page_nr) > sizeof(rand_nr)) {
-				if (random_r(&rand, &rand_nr))
-					fprintf(stderr, "random_r 2 error\n"), exit(1);
+				if (random_r(&rand, &rand_nr)) {
+					fprintf(stderr, "random_r 2 error\n");
+					exit(1);
+				}
 				page_nr |= (((unsigned long) rand_nr) << 16) <<
 					   16;
 			}
@@ -385,11 +394,13 @@ static void *locking_thread(void *arg)
 		start = time(NULL);
 		if (bounces & BOUNCE_VERIFY) {
 			count = *area_count(area_dst, page_nr);
-			if (!count)
+			if (!count) {
 				fprintf(stderr,
 					"page_nr %lu wrong count %Lu %Lu\n",
 					page_nr, count,
-					count_verify[page_nr]), exit(1);
+					count_verify[page_nr]);
+				exit(1);
+			}
 
 
 			/*
@@ -401,11 +412,12 @@ static void *locking_thread(void *arg)
 			 */
 #if 1
 			if (!my_bcmp(area_dst + page_nr * page_size, zeropage,
-				     page_size))
+				     page_size)) {
 				fprintf(stderr,
 					"my_bcmp page_nr %lu wrong count %Lu %Lu\n",
-					page_nr, count,
-					count_verify[page_nr]), exit(1);
+					page_nr, count, count_verify[page_nr]);
+				exit(1);
+			}
 #else
 			unsigned long loops;
 
@@ -437,7 +449,7 @@ static void *locking_thread(void *arg)
 			fprintf(stderr,
 				"page_nr %lu memory corruption %Lu %Lu\n",
 				page_nr, count,
-				count_verify[page_nr]), exit(1);
+				count_verify[page_nr]); exit(1);
 		}
 		count++;
 		*area_count(area_dst, page_nr) = count_verify[page_nr] = count;
@@ -461,12 +473,14 @@ static void retry_copy_page(int ufd, struct uffdio_copy *uffdio_copy,
 				     offset);
 	if (ioctl(ufd, UFFDIO_COPY, uffdio_copy)) {
 		/* real retval in ufdio_copy.copy */
-		if (uffdio_copy->copy != -EEXIST)
+		if (uffdio_copy->copy != -EEXIST) {
 			fprintf(stderr, "UFFDIO_COPY retry error %Ld\n",
-				uffdio_copy->copy), exit(1);
+				uffdio_copy->copy);
+			exit(1);
+		}
 	} else {
 		fprintf(stderr,	"UFFDIO_COPY retry unexpected %Ld\n",
-			uffdio_copy->copy), exit(1);
+			uffdio_copy->copy); exit(1);
 	}
 }
 
@@ -474,9 +488,10 @@ static int __copy_page(int ufd, unsigned long offset, bool retry)
 {
 	struct uffdio_copy uffdio_copy;
 
-	if (offset >= nr_pages * page_size)
-		fprintf(stderr, "unexpected offset %lu\n",
-			offset), exit(1);
+	if (offset >= nr_pages * page_size) {
+		fprintf(stderr, "unexpected offset %lu\n", offset);
+		exit(1);
+	}
 	uffdio_copy.dst = (unsigned long) area_dst + offset;
 	uffdio_copy.src = (unsigned long) area_src + offset;
 	uffdio_copy.len = page_size;
@@ -487,12 +502,14 @@ static int __copy_page(int ufd, unsigned long offset, bool retry)
 	uffdio_copy.copy = 0;
 	if (ioctl(ufd, UFFDIO_COPY, &uffdio_copy)) {
 		/* real retval in ufdio_copy.copy */
-		if (uffdio_copy.copy != -EEXIST)
+		if (uffdio_copy.copy != -EEXIST) {
 			fprintf(stderr, "UFFDIO_COPY error %Ld\n",
-				uffdio_copy.copy), exit(1);
+				uffdio_copy.copy);
+			exit(1);
+		}
 	} else if (uffdio_copy.copy != page_size) {
 		fprintf(stderr, "UFFDIO_COPY unexpected copy %Ld\n",
-			uffdio_copy.copy), exit(1);
+			uffdio_copy.copy); exit(1);
 	} else {
 		if (test_uffdio_copy_eexist && retry) {
 			test_uffdio_copy_eexist = false;
@@ -521,11 +538,11 @@ static int uffd_read_msg(int ufd, struct uffd_msg *msg)
 		if (ret < 0) {
 			if (errno == EAGAIN)
 				return 1;
-			else
-				perror("blocking read error"), exit(1);
+			perror("blocking read error");
 		} else {
-			fprintf(stderr, "short read\n"), exit(1);
+			fprintf(stderr, "short read\n");
 		}
+		exit(1);
 	}
 
 	return 0;
@@ -536,9 +553,10 @@ static void uffd_handle_page_fault(struct uffd_msg *msg,
 {
 	unsigned long offset;
 
-	if (msg->event != UFFD_EVENT_PAGEFAULT)
-		fprintf(stderr, "unexpected msg event %u\n",
-			msg->event), exit(1);
+	if (msg->event != UFFD_EVENT_PAGEFAULT) {
+		fprintf(stderr, "unexpected msg event %u\n", msg->event);
+		exit(1);
+	}
 
 	if (msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WP) {
 		wp_range(uffd, msg->arg.pagefault.address, page_size, false);
@@ -546,8 +564,10 @@ static void uffd_handle_page_fault(struct uffd_msg *msg,
 	} else {
 		/* Missing page faults */
 		if (bounces & BOUNCE_VERIFY &&
-		    msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE)
-			fprintf(stderr, "unexpected write fault\n"), exit(1);
+		    msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE) {
+			fprintf(stderr, "unexpected write fault\n");
+			exit(1);
+		}
 
 		offset = (char *)(unsigned long)msg->arg.pagefault.address - area_dst;
 		offset &= ~(page_size-1);
@@ -574,25 +594,32 @@ static void *uffd_poll_thread(void *arg)
 
 	for (;;) {
 		ret = poll(pollfd, 2, -1);
-		if (!ret)
-			fprintf(stderr, "poll error %d\n", ret), exit(1);
-		if (ret < 0)
-			perror("poll"), exit(1);
+		if (!ret) {
+			fprintf(stderr, "poll error %d\n", ret);
+			exit(1);
+		}
+		if (ret < 0) {
+			perror("poll");
+			exit(1);
+		}
 		if (pollfd[1].revents & POLLIN) {
-			if (read(pollfd[1].fd, &tmp_chr, 1) != 1)
-				fprintf(stderr, "read pipefd error\n"),
-					exit(1);
+			if (read(pollfd[1].fd, &tmp_chr, 1) != 1) {
+				fprintf(stderr, "read pipefd error\n");
+				exit(1);
+			}
 			break;
 		}
-		if (!(pollfd[0].revents & POLLIN))
+		if (!(pollfd[0].revents & POLLIN)) {
 			fprintf(stderr, "pollfd[0].revents %d\n",
-				pollfd[0].revents), exit(1);
+				pollfd[0].revents);
+			exit(1);
+		}
 		if (uffd_read_msg(uffd, &msg))
 			continue;
 		switch (msg.event) {
 		default:
 			fprintf(stderr, "unexpected msg event %u\n",
-				msg.event), exit(1);
+				msg.event); exit(1);
 			break;
 		case UFFD_EVENT_PAGEFAULT:
 			uffd_handle_page_fault(&msg, stats);
@@ -606,8 +633,10 @@ static void *uffd_poll_thread(void *arg)
 			uffd_reg.range.start = msg.arg.remove.start;
 			uffd_reg.range.len = msg.arg.remove.end -
 				msg.arg.remove.start;
-			if (ioctl(uffd, UFFDIO_UNREGISTER, &uffd_reg.range))
-				fprintf(stderr, "remove failure\n"), exit(1);
+			if (ioctl(uffd, UFFDIO_UNREGISTER, &uffd_reg.range)) {
+				fprintf(stderr, "remove failure\n");
+				exit(1);
+			}
 			break;
 		case UFFD_EVENT_REMAP:
 			area_dst = (char *)(unsigned long)msg.arg.remap.to;
@@ -879,8 +908,10 @@ static int faulting_process(int signal_test)
 
 	area_dst = mremap(area_dst, nr_pages * page_size,  nr_pages * page_size,
 			  MREMAP_MAYMOVE | MREMAP_FIXED, area_src);
-	if (area_dst == MAP_FAILED)
-		perror("mremap"), exit(1);
+	if (area_dst == MAP_FAILED) {
+		perror("mremap");
+		exit(1);
+	}
 
 	for (; nr < nr_pages; nr++) {
 		count = *area_count(area_dst, nr);
@@ -888,7 +919,7 @@ static int faulting_process(int signal_test)
 			fprintf(stderr,
 				"nr %lu memory corruption %Lu %Lu\n",
 				nr, count,
-				count_verify[nr]), exit(1);
+				count_verify[nr]); exit(1);
 		}
 		/*
 		 * Trigger write protection if there is by writting
@@ -901,8 +932,10 @@ static int faulting_process(int signal_test)
 		return 1;
 
 	for (nr = 0; nr < nr_pages; nr++) {
-		if (my_bcmp(area_dst + nr * page_size, zeropage, page_size))
-			fprintf(stderr, "nr %lu is not zero\n", nr), exit(1);
+		if (my_bcmp(area_dst + nr * page_size, zeropage, page_size)) {
+			fprintf(stderr, "nr %lu is not zero\n", nr);
+			exit(1);
+		}
 	}
 
 	return 0;
@@ -916,12 +949,14 @@ static void retry_uffdio_zeropage(int ufd,
 				     uffdio_zeropage->range.len,
 				     offset);
 	if (ioctl(ufd, UFFDIO_ZEROPAGE, uffdio_zeropage)) {
-		if (uffdio_zeropage->zeropage != -EEXIST)
+		if (uffdio_zeropage->zeropage != -EEXIST) {
 			fprintf(stderr, "UFFDIO_ZEROPAGE retry error %Ld\n",
-				uffdio_zeropage->zeropage), exit(1);
+				uffdio_zeropage->zeropage);
+			exit(1);
+		}
 	} else {
 		fprintf(stderr, "UFFDIO_ZEROPAGE retry unexpected %Ld\n",
-			uffdio_zeropage->zeropage), exit(1);
+			uffdio_zeropage->zeropage); exit(1);
 	}
 }
 
@@ -933,9 +968,10 @@ static int __uffdio_zeropage(int ufd, unsigned long offset, bool retry)
 
 	has_zeropage = uffd_test_ops->expected_ioctls & (1 << _UFFDIO_ZEROPAGE);
 
-	if (offset >= nr_pages * page_size)
-		fprintf(stderr, "unexpected offset %lu\n",
-			offset), exit(1);
+	if (offset >= nr_pages * page_size) {
+		fprintf(stderr, "unexpected offset %lu\n", offset);
+		exit(1);
+	}
 	uffdio_zeropage.range.start = (unsigned long) area_dst + offset;
 	uffdio_zeropage.range.len = page_size;
 	uffdio_zeropage.mode = 0;
@@ -943,22 +979,26 @@ static int __uffdio_zeropage(int ufd, unsigned long offset, bool retry)
 	if (ret) {
 		/* real retval in ufdio_zeropage.zeropage */
 		if (has_zeropage) {
-			if (uffdio_zeropage.zeropage == -EEXIST)
-				fprintf(stderr, "UFFDIO_ZEROPAGE -EEXIST\n"),
-					exit(1);
-			else
+			if (uffdio_zeropage.zeropage == -EEXIST) {
+				fprintf(stderr, "UFFDIO_ZEROPAGE -EEXIST\n");
+				exit(1);
+			} else {
 				fprintf(stderr, "UFFDIO_ZEROPAGE error %Ld\n",
-					uffdio_zeropage.zeropage), exit(1);
+					uffdio_zeropage.zeropage);
+				exit(1);
+			}
 		} else {
-			if (uffdio_zeropage.zeropage != -EINVAL)
+			if (uffdio_zeropage.zeropage != -EINVAL) {
 				fprintf(stderr,
 					"UFFDIO_ZEROPAGE not -EINVAL %Ld\n",
-					uffdio_zeropage.zeropage), exit(1);
+					uffdio_zeropage.zeropage);
+				exit(1);
+			}
 		}
 	} else if (has_zeropage) {
 		if (uffdio_zeropage.zeropage != page_size) {
 			fprintf(stderr, "UFFDIO_ZEROPAGE unexpected %Ld\n",
-				uffdio_zeropage.zeropage), exit(1);
+				uffdio_zeropage.zeropage); exit(1);
 		} else {
 			if (test_uffdio_zeropage_eexist && retry) {
 				test_uffdio_zeropage_eexist = false;
@@ -970,7 +1010,7 @@ static int __uffdio_zeropage(int ufd, unsigned long offset, bool retry)
 	} else {
 		fprintf(stderr,
 			"UFFDIO_ZEROPAGE succeeded %Ld\n",
-			uffdio_zeropage.zeropage), exit(1);
+			uffdio_zeropage.zeropage); exit(1);
 	}
 
 	return 0;
@@ -1000,19 +1040,24 @@ static int userfaultfd_zeropage_test(void)
 	uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
 	if (test_uffdio_wp)
 		uffdio_register.mode |= UFFDIO_REGISTER_MODE_WP;
-	if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
-		fprintf(stderr, "register failure\n"), exit(1);
+	if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) {
+		fprintf(stderr, "register failure\n");
+		exit(1);
+	}
 
 	expected_ioctls = uffd_test_ops->expected_ioctls;
 	if ((uffdio_register.ioctls & expected_ioctls) !=
-	    expected_ioctls)
+	    expected_ioctls) {
 		fprintf(stderr,
-			"unexpected missing ioctl for anon memory\n"),
-			exit(1);
+			"unexpected missing ioctl for anon memory\n");
+		exit(1);
+	}
 
 	if (uffdio_zeropage(uffd, 0)) {
-		if (my_bcmp(area_dst, zeropage, page_size))
-			fprintf(stderr, "zeropage is not zero\n"), exit(1);
+		if (my_bcmp(area_dst, zeropage, page_size)) {
+			fprintf(stderr, "zeropage is not zero\n");
+			exit(1);
+		}
 	}
 
 	close(uffd);
@@ -1047,32 +1092,41 @@ static int userfaultfd_events_test(void)
 	uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
 	if (test_uffdio_wp)
 		uffdio_register.mode |= UFFDIO_REGISTER_MODE_WP;
-	if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
-		fprintf(stderr, "register failure\n"), exit(1);
+	if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) {
+		fprintf(stderr, "register failure\n");
+		exit(1);
+	}
 
 	expected_ioctls = uffd_test_ops->expected_ioctls;
-	if ((uffdio_register.ioctls & expected_ioctls) !=
-	    expected_ioctls)
-		fprintf(stderr,
-			"unexpected missing ioctl for anon memory\n"),
-			exit(1);
+	if ((uffdio_register.ioctls & expected_ioctls) != expected_ioctls) {
+		fprintf(stderr, "unexpected missing ioctl for anon memory\n");
+		exit(1);
+	}
 
-	if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, &stats))
-		perror("uffd_poll_thread create"), exit(1);
+	if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, &stats)) {
+		perror("uffd_poll_thread create");
+		exit(1);
+	}
 
 	pid = fork();
-	if (pid < 0)
-		perror("fork"), exit(1);
+	if (pid < 0) {
+		perror("fork");
+		exit(1);
+	}
 
 	if (!pid)
 		return faulting_process(0);
 
 	waitpid(pid, &err, 0);
-	if (err)
-		fprintf(stderr, "faulting process failed\n"), exit(1);
+	if (err) {
+		fprintf(stderr, "faulting process failed\n");
+		exit(1);
+	}
 
-	if (write(pipefd[1], &c, sizeof(c)) != sizeof(c))
-		perror("pipe write"), exit(1);
+	if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) {
+		perror("pipe write");
+		exit(1);
+	}
 	if (pthread_join(uffd_mon, NULL))
 		return 1;
 
@@ -1110,38 +1164,49 @@ static int userfaultfd_sig_test(void)
 	uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
 	if (test_uffdio_wp)
 		uffdio_register.mode |= UFFDIO_REGISTER_MODE_WP;
-	if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
-		fprintf(stderr, "register failure\n"), exit(1);
+	if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) {
+		fprintf(stderr, "register failure\n");
+		exit(1);
+	}
 
 	expected_ioctls = uffd_test_ops->expected_ioctls;
-	if ((uffdio_register.ioctls & expected_ioctls) !=
-	    expected_ioctls)
-		fprintf(stderr,
-			"unexpected missing ioctl for anon memory\n"),
-			exit(1);
+	if ((uffdio_register.ioctls & expected_ioctls) != expected_ioctls) {
+		fprintf(stderr, "unexpected missing ioctl for anon memory\n");
+		exit(1);
+	}
 
-	if (faulting_process(1))
-		fprintf(stderr, "faulting process failed\n"), exit(1);
+	if (faulting_process(1)) {
+		fprintf(stderr, "faulting process failed\n");
+		exit(1);
+	}
 
 	if (uffd_test_ops->release_pages(area_dst))
 		return 1;
 
-	if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, &stats))
-		perror("uffd_poll_thread create"), exit(1);
+	if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, &stats)) {
+		perror("uffd_poll_thread create");
+		exit(1);
+	}
 
 	pid = fork();
-	if (pid < 0)
-		perror("fork"), exit(1);
+	if (pid < 0) {
+		perror("fork");
+		exit(1);
+	}
 
 	if (!pid)
 		exit(faulting_process(2));
 
 	waitpid(pid, &err, 0);
-	if (err)
-		fprintf(stderr, "faulting process failed\n"), exit(1);
+	if (err) {
+		fprintf(stderr, "faulting process failed\n");
+		exit(1);
+	}
 
-	if (write(pipefd[1], &c, sizeof(c)) != sizeof(c))
-		perror("pipe write"), exit(1);
+	if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) {
+		perror("pipe write");
+		exit(1);
+	}
 	if (pthread_join(uffd_mon, (void **)&userfaults))
 		return 1;
 
@@ -1395,7 +1460,7 @@ static void set_test_type(const char *type)
 		test_type = TEST_SHMEM;
 		uffd_test_ops = &shmem_uffd_test_ops;
 	} else {
-		fprintf(stderr, "Unknown test type: %s\n", type), exit(1);
+		fprintf(stderr, "Unknown test type: %s\n", type); exit(1);
 	}
 
 	if (test_type == TEST_HUGETLB)
@@ -1403,12 +1468,15 @@ static void set_test_type(const char *type)
 	else
 		page_size = sysconf(_SC_PAGE_SIZE);
 
-	if (!page_size)
-		fprintf(stderr, "Unable to determine page size\n"),
-				exit(2);
+	if (!page_size) {
+		fprintf(stderr, "Unable to determine page size\n");
+		exit(2);
+	}
 	if ((unsigned long) area_count(NULL, 0) + sizeof(unsigned long long) * 2
-	    > page_size)
-		fprintf(stderr, "Impossible to run this test\n"), exit(2);
+	    > page_size) {
+		fprintf(stderr, "Impossible to run this test\n");
+		exit(2);
+	}
 }
 
 static void sigalrm(int sig)
@@ -1425,8 +1493,10 @@ int main(int argc, char **argv)
 	if (argc < 4)
 		usage();
 
-	if (signal(SIGALRM, sigalrm) == SIG_ERR)
-		fprintf(stderr, "failed to arm SIGALRM"), exit(1);
+	if (signal(SIGALRM, sigalrm) == SIG_ERR) {
+		fprintf(stderr, "failed to arm SIGALRM");
+		exit(1);
+	}
 	alarm(ALARM_INTERVAL_SECS);
 
 	set_test_type(argv[1]);
diff --git a/tools/testing/selftests/x86/fsgsbase.c b/tools/testing/selftests/x86/fsgsbase.c
index 998319553523..7161cfc2e60b 100644
--- a/tools/testing/selftests/x86/fsgsbase.c
+++ b/tools/testing/selftests/x86/fsgsbase.c
@@ -443,6 +443,68 @@ static void test_unexpected_base(void)
 
 #define USER_REGS_OFFSET(r) offsetof(struct user_regs_struct, r)
 
+static void test_ptrace_write_gs_read_base(void)
+{
+	int status;
+	pid_t child = fork();
+
+	if (child < 0)
+		err(1, "fork");
+
+	if (child == 0) {
+		printf("[RUN]\tPTRACE_POKE GS, read GSBASE back\n");
+
+		printf("[RUN]\tARCH_SET_GS to 1\n");
+		if (syscall(SYS_arch_prctl, ARCH_SET_GS, 1) != 0)
+			err(1, "ARCH_SET_GS");
+
+		if (ptrace(PTRACE_TRACEME, 0, NULL, NULL) != 0)
+			err(1, "PTRACE_TRACEME");
+
+		raise(SIGTRAP);
+		_exit(0);
+	}
+
+	wait(&status);
+
+	if (WSTOPSIG(status) == SIGTRAP) {
+		unsigned long base;
+		unsigned long gs_offset = USER_REGS_OFFSET(gs);
+		unsigned long base_offset = USER_REGS_OFFSET(gs_base);
+
+		/* Read the initial base.  It should be 1. */
+		base = ptrace(PTRACE_PEEKUSER, child, base_offset, NULL);
+		if (base == 1) {
+			printf("[OK]\tGSBASE started at 1\n");
+		} else {
+			nerrs++;
+			printf("[FAIL]\tGSBASE started at 0x%lx\n", base);
+		}
+
+		printf("[RUN]\tSet GS = 0x7, read GSBASE\n");
+
+		/* Poke an LDT selector into GS. */
+		if (ptrace(PTRACE_POKEUSER, child, gs_offset, 0x7) != 0)
+			err(1, "PTRACE_POKEUSER");
+
+		/* And read the base. */
+		base = ptrace(PTRACE_PEEKUSER, child, base_offset, NULL);
+
+		if (base == 0 || base == 1) {
+			printf("[OK]\tGSBASE reads as 0x%lx with invalid GS\n", base);
+		} else {
+			nerrs++;
+			printf("[FAIL]\tGSBASE=0x%lx (should be 0 or 1)\n", base);
+		}
+	}
+
+	ptrace(PTRACE_CONT, child, NULL, NULL);
+
+	wait(&status);
+	if (!WIFEXITED(status))
+		printf("[WARN]\tChild didn't exit cleanly.\n");
+}
+
 static void test_ptrace_write_gsbase(void)
 {
 	int status;
@@ -517,6 +579,9 @@ static void test_ptrace_write_gsbase(void)
 
 END:
 	ptrace(PTRACE_CONT, child, NULL, NULL);
+	wait(&status);
+	if (!WIFEXITED(status))
+		printf("[WARN]\tChild didn't exit cleanly.\n");
 }
 
 int main()
@@ -526,6 +591,9 @@ int main()
 	shared_scratch = mmap(NULL, 4096, PROT_READ | PROT_WRITE,
 			      MAP_ANONYMOUS | MAP_SHARED, -1, 0);
 
+	/* Do these tests before we have an LDT. */
+	test_ptrace_write_gs_read_base();
+
 	/* Probe FSGSBASE */
 	sethandler(SIGILL, sigill, 0);
 	if (sigsetjmp(jmpbuf, 1) == 0) {
diff --git a/tools/testing/selftests/x86/test_vsyscall.c b/tools/testing/selftests/x86/test_vsyscall.c
index c41f24b517f4..65c141ebfbbd 100644
--- a/tools/testing/selftests/x86/test_vsyscall.c
+++ b/tools/testing/selftests/x86/test_vsyscall.c
@@ -462,6 +462,17 @@ static int test_vsys_x(void)
 	return 0;
 }
 
+/*
+ * Debuggers expect ptrace() to be able to peek at the vsyscall page.
+ * Use process_vm_readv() as a proxy for ptrace() to test this.  We
+ * want it to work in the vsyscall=emulate case and to fail in the
+ * vsyscall=xonly case.
+ *
+ * It's worth noting that this ABI is a bit nutty.  write(2) can't
+ * read from the vsyscall page on any kernel version or mode.  The
+ * fact that ptrace() ever worked was a nice courtesy of old kernels,
+ * but the code to support it is fairly gross.
+ */
 static int test_process_vm_readv(void)
 {
 #ifdef __x86_64__
@@ -477,8 +488,12 @@ static int test_process_vm_readv(void)
 	remote.iov_len = 4096;
 	ret = process_vm_readv(getpid(), &local, 1, &remote, 1, 0);
 	if (ret != 4096) {
-		printf("[OK]\tprocess_vm_readv() failed (ret = %d, errno = %d)\n", ret, errno);
-		return 0;
+		/*
+		 * We expect process_vm_readv() to work if and only if the
+		 * vsyscall page is readable.
+		 */
+		printf("[%s]\tprocess_vm_readv() failed (ret = %d, errno = %d)\n", vsyscall_map_r ? "FAIL" : "OK", ret, errno);
+		return vsyscall_map_r ? 1 : 0;
 	}
 
 	if (vsyscall_map_r) {
@@ -488,6 +503,9 @@ static int test_process_vm_readv(void)
 			printf("[FAIL]\tIt worked but returned incorrect data\n");
 			return 1;
 		}
+	} else {
+		printf("[FAIL]\tprocess_rm_readv() succeeded, but it should have failed in this configuration\n");
+		return 1;
 	}
 #endif