diff options
Diffstat (limited to 'tools/testing')
364 files changed, 23733 insertions, 2432 deletions
diff --git a/tools/testing/kunit/configs/broken_on_uml.config b/tools/testing/kunit/configs/broken_on_uml.config index 239b9f03da2c..a7f0603d33f6 100644 --- a/tools/testing/kunit/configs/broken_on_uml.config +++ b/tools/testing/kunit/configs/broken_on_uml.config @@ -39,3 +39,4 @@ # CONFIG_QCOM_CPR is not set # CONFIG_RESET_BRCMSTB_RESCAL is not set # CONFIG_RESET_INTEL_GW is not set +# CONFIG_ADI_AXI_ADC is not set diff --git a/tools/testing/kunit/kunit.py b/tools/testing/kunit/kunit.py index 425ef40067e7..ebf5f5763dee 100755 --- a/tools/testing/kunit/kunit.py +++ b/tools/testing/kunit/kunit.py @@ -17,6 +17,7 @@ from collections import namedtuple from enum import Enum, auto import kunit_config +import kunit_json import kunit_kernel import kunit_parser @@ -30,9 +31,9 @@ KunitBuildRequest = namedtuple('KunitBuildRequest', KunitExecRequest = namedtuple('KunitExecRequest', ['timeout', 'build_dir', 'alltests']) KunitParseRequest = namedtuple('KunitParseRequest', - ['raw_output', 'input_data']) + ['raw_output', 'input_data', 'build_dir', 'json']) KunitRequest = namedtuple('KunitRequest', ['raw_output','timeout', 'jobs', - 'build_dir', 'alltests', + 'build_dir', 'alltests', 'json', 'make_options']) KernelDirectoryPath = sys.argv[0].split('tools/testing/kunit/')[0] @@ -113,12 +114,22 @@ def parse_tests(request: KunitParseRequest) -> KunitResult: test_result = kunit_parser.TestResult(kunit_parser.TestStatus.SUCCESS, [], 'Tests not Parsed.') + if request.raw_output: kunit_parser.raw_output(request.input_data) else: test_result = kunit_parser.parse_run_tests(request.input_data) parse_end = time.time() + if request.json: + json_obj = kunit_json.get_json_result( + test_result=test_result, + def_config='kunit_defconfig', + build_dir=request.build_dir, + json_path=request.json) + if request.json == 'stdout': + print(json_obj) + if test_result.status != kunit_parser.TestStatus.SUCCESS: return KunitResult(KunitStatus.TEST_FAILURE, test_result, parse_end - parse_start) @@ -151,7 +162,9 @@ def run_tests(linux: kunit_kernel.LinuxSourceTree, return exec_result parse_request = KunitParseRequest(request.raw_output, - exec_result.result) + exec_result.result, + request.build_dir, + request.json) parse_result = parse_tests(parse_request) run_end = time.time() @@ -195,7 +208,12 @@ def add_exec_opts(parser): def add_parse_opts(parser): parser.add_argument('--raw_output', help='don\'t format output from kernel', action='store_true') - + parser.add_argument('--json', + nargs='?', + help='Stores test results in a JSON, and either ' + 'prints to stdout or saves to file if a ' + 'filename is specified', + type=str, const='stdout', default=None) def main(argv, linux=None): parser = argparse.ArgumentParser( @@ -237,10 +255,16 @@ def main(argv, linux=None): cli_args = parser.parse_args(argv) + if get_kernel_root_path(): + os.chdir(get_kernel_root_path()) + if cli_args.subcommand == 'run': if not os.path.exists(cli_args.build_dir): os.mkdir(cli_args.build_dir) + if not os.path.exists(kunit_kernel.kunitconfig_path): + create_default_kunitconfig() + if not linux: linux = kunit_kernel.LinuxSourceTree() @@ -249,14 +273,18 @@ def main(argv, linux=None): cli_args.jobs, cli_args.build_dir, cli_args.alltests, + cli_args.json, cli_args.make_options) result = run_tests(linux, request) if result.status != KunitStatus.SUCCESS: sys.exit(1) elif cli_args.subcommand == 'config': - if cli_args.build_dir: - if not os.path.exists(cli_args.build_dir): - os.mkdir(cli_args.build_dir) + if cli_args.build_dir and ( + not os.path.exists(cli_args.build_dir)): + os.mkdir(cli_args.build_dir) + + if not os.path.exists(kunit_kernel.kunitconfig_path): + create_default_kunitconfig() if not linux: linux = kunit_kernel.LinuxSourceTree() @@ -270,10 +298,6 @@ def main(argv, linux=None): if result.status != KunitStatus.SUCCESS: sys.exit(1) elif cli_args.subcommand == 'build': - if cli_args.build_dir: - if not os.path.exists(cli_args.build_dir): - os.mkdir(cli_args.build_dir) - if not linux: linux = kunit_kernel.LinuxSourceTree() @@ -288,10 +312,6 @@ def main(argv, linux=None): if result.status != KunitStatus.SUCCESS: sys.exit(1) elif cli_args.subcommand == 'exec': - if cli_args.build_dir: - if not os.path.exists(cli_args.build_dir): - os.mkdir(cli_args.build_dir) - if not linux: linux = kunit_kernel.LinuxSourceTree() @@ -300,7 +320,9 @@ def main(argv, linux=None): cli_args.alltests) exec_result = exec_tests(linux, exec_request) parse_request = KunitParseRequest(cli_args.raw_output, - exec_result.result) + exec_result.result, + cli_args.build_dir, + cli_args.json) result = parse_tests(parse_request) kunit_parser.print_with_timestamp(( 'Elapsed time: %.3fs\n') % ( @@ -314,7 +336,9 @@ def main(argv, linux=None): with open(cli_args.file, 'r') as f: kunit_output = f.read().splitlines() request = KunitParseRequest(cli_args.raw_output, - kunit_output) + kunit_output, + cli_args.build_dir, + cli_args.json) result = parse_tests(request) if result.status != KunitStatus.SUCCESS: sys.exit(1) diff --git a/tools/testing/kunit/kunit_json.py b/tools/testing/kunit/kunit_json.py new file mode 100644 index 000000000000..624b31b2dbd6 --- /dev/null +++ b/tools/testing/kunit/kunit_json.py @@ -0,0 +1,63 @@ +# SPDX-License-Identifier: GPL-2.0 +# +# Generates JSON from KUnit results according to +# KernelCI spec: https://github.com/kernelci/kernelci-doc/wiki/Test-API +# +# Copyright (C) 2020, Google LLC. +# Author: Heidi Fahim <heidifahim@google.com> + +import json +import os + +import kunit_parser + +from kunit_parser import TestStatus + +def get_json_result(test_result, def_config, build_dir, json_path): + sub_groups = [] + + # Each test suite is mapped to a KernelCI sub_group + for test_suite in test_result.suites: + sub_group = { + "name": test_suite.name, + "arch": "UM", + "defconfig": def_config, + "build_environment": build_dir, + "test_cases": [], + "lab_name": None, + "kernel": None, + "job": None, + "git_branch": "kselftest", + } + test_cases = [] + # TODO: Add attachments attribute in test_case with detailed + # failure message, see https://api.kernelci.org/schema-test-case.html#get + for case in test_suite.cases: + test_case = {"name": case.name, "status": "FAIL"} + if case.status == TestStatus.SUCCESS: + test_case["status"] = "PASS" + elif case.status == TestStatus.TEST_CRASHED: + test_case["status"] = "ERROR" + test_cases.append(test_case) + sub_group["test_cases"] = test_cases + sub_groups.append(sub_group) + test_group = { + "name": "KUnit Test Group", + "arch": "UM", + "defconfig": def_config, + "build_environment": build_dir, + "sub_groups": sub_groups, + "lab_name": None, + "kernel": None, + "job": None, + "git_branch": "kselftest", + } + json_obj = json.dumps(test_group, indent=4) + if json_path != 'stdout': + with open(json_path, 'w') as result_path: + result_path.write(json_obj) + root = __file__.split('tools/testing/kunit/')[0] + kunit_parser.print_with_timestamp( + "Test results stored in %s" % + os.path.join(root, result_path.name)) + return json_obj diff --git a/tools/testing/kunit/kunit_kernel.py b/tools/testing/kunit/kunit_kernel.py index e20e2056cb38..b557b1e93f98 100644 --- a/tools/testing/kunit/kunit_kernel.py +++ b/tools/testing/kunit/kunit_kernel.py @@ -36,9 +36,9 @@ class LinuxSourceTreeOperations(object): try: subprocess.check_output(['make', 'mrproper'], stderr=subprocess.STDOUT) except OSError as e: - raise ConfigError('Could not call make command: ' + e) + raise ConfigError('Could not call make command: ' + str(e)) except subprocess.CalledProcessError as e: - raise ConfigError(e.output) + raise ConfigError(e.output.decode()) def make_olddefconfig(self, build_dir, make_options): command = ['make', 'ARCH=um', 'olddefconfig'] @@ -49,22 +49,27 @@ class LinuxSourceTreeOperations(object): try: subprocess.check_output(command, stderr=subprocess.STDOUT) except OSError as e: - raise ConfigError('Could not call make command: ' + e) + raise ConfigError('Could not call make command: ' + str(e)) except subprocess.CalledProcessError as e: - raise ConfigError(e.output) + raise ConfigError(e.output.decode()) - def make_allyesconfig(self): + def make_allyesconfig(self, build_dir, make_options): kunit_parser.print_with_timestamp( 'Enabling all CONFIGs for UML...') + command = ['make', 'ARCH=um', 'allyesconfig'] + if make_options: + command.extend(make_options) + if build_dir: + command += ['O=' + build_dir] process = subprocess.Popen( - ['make', 'ARCH=um', 'allyesconfig'], + command, stdout=subprocess.DEVNULL, stderr=subprocess.STDOUT) process.wait() kunit_parser.print_with_timestamp( 'Disabling broken configs to run KUnit tests...') with ExitStack() as es: - config = open(KCONFIG_PATH, 'a') + config = open(get_kconfig_path(build_dir), 'a') disable = open(BROKEN_ALLCONFIG_PATH, 'r').read() config.write(disable) kunit_parser.print_with_timestamp( @@ -79,9 +84,9 @@ class LinuxSourceTreeOperations(object): try: subprocess.check_output(command, stderr=subprocess.STDOUT) except OSError as e: - raise BuildError('Could not call execute make: ' + e) + raise BuildError('Could not call execute make: ' + str(e)) except subprocess.CalledProcessError as e: - raise BuildError(e.output) + raise BuildError(e.output.decode()) def linux_bin(self, params, timeout, build_dir, outfile): """Runs the Linux UML binary. Must be named 'linux'.""" @@ -161,9 +166,9 @@ class LinuxSourceTree(object): return self.build_config(build_dir, make_options) def build_um_kernel(self, alltests, jobs, build_dir, make_options): - if alltests: - self._ops.make_allyesconfig() try: + if alltests: + self._ops.make_allyesconfig(build_dir, make_options) self._ops.make_olddefconfig(build_dir, make_options) self._ops.make(jobs, build_dir, make_options) except (ConfigError, BuildError) as e: diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py index f13e0c0d6663..8019e3dd4c32 100644 --- a/tools/testing/kunit/kunit_parser.py +++ b/tools/testing/kunit/kunit_parser.py @@ -45,10 +45,11 @@ class TestStatus(Enum): FAILURE = auto() TEST_CRASHED = auto() NO_TESTS = auto() + FAILURE_TO_PARSE_TESTS = auto() kunit_start_re = re.compile(r'TAP version [0-9]+$') kunit_end_re = re.compile('(List of all partitions:|' - 'Kernel panic - not syncing: VFS:|reboot: System halted)') + 'Kernel panic - not syncing: VFS:)') def isolate_kunit_output(kernel_output): started = False @@ -109,7 +110,7 @@ OkNotOkResult = namedtuple('OkNotOkResult', ['is_ok','description', 'text']) OK_NOT_OK_SUBTEST = re.compile(r'^[\s]+(ok|not ok) [0-9]+ - (.*)$') -OK_NOT_OK_MODULE = re.compile(r'^(ok|not ok) [0-9]+ - (.*)$') +OK_NOT_OK_MODULE = re.compile(r'^(ok|not ok) ([0-9]+) - (.*)$') def parse_ok_not_ok_test_case(lines: List[str], test_case: TestCase) -> bool: save_non_diagnositic(lines, test_case) @@ -197,7 +198,9 @@ def max_status(left: TestStatus, right: TestStatus) -> TestStatus: else: return TestStatus.SUCCESS -def parse_ok_not_ok_test_suite(lines: List[str], test_suite: TestSuite) -> bool: +def parse_ok_not_ok_test_suite(lines: List[str], + test_suite: TestSuite, + expected_suite_index: int) -> bool: consume_non_diagnositic(lines) if not lines: test_suite.status = TestStatus.TEST_CRASHED @@ -210,6 +213,12 @@ def parse_ok_not_ok_test_suite(lines: List[str], test_suite: TestSuite) -> bool: test_suite.status = TestStatus.SUCCESS else: test_suite.status = TestStatus.FAILURE + suite_index = int(match.group(2)) + if suite_index != expected_suite_index: + print_with_timestamp( + red('[ERROR] ') + 'expected_suite_index ' + + str(expected_suite_index) + ', but got ' + + str(suite_index)) return True else: return False @@ -222,7 +231,7 @@ def bubble_up_test_case_errors(test_suite: TestSuite) -> TestStatus: max_test_case_status = bubble_up_errors(lambda x: x.status, test_suite.cases) return max_status(max_test_case_status, test_suite.status) -def parse_test_suite(lines: List[str]) -> TestSuite: +def parse_test_suite(lines: List[str], expected_suite_index: int) -> TestSuite: if not lines: return None consume_non_diagnositic(lines) @@ -241,7 +250,7 @@ def parse_test_suite(lines: List[str]) -> TestSuite: break test_suite.cases.append(test_case) expected_test_case_num -= 1 - if parse_ok_not_ok_test_suite(lines, test_suite): + if parse_ok_not_ok_test_suite(lines, test_suite, expected_suite_index): test_suite.status = bubble_up_test_case_errors(test_suite) return test_suite elif not lines: @@ -261,6 +270,17 @@ def parse_tap_header(lines: List[str]) -> bool: else: return False +TEST_PLAN = re.compile(r'[0-9]+\.\.([0-9]+)') + +def parse_test_plan(lines: List[str]) -> int: + consume_non_diagnositic(lines) + match = TEST_PLAN.match(lines[0]) + if match: + lines.pop(0) + return int(match.group(1)) + else: + return None + def bubble_up_suite_errors(test_suite_list: List[TestSuite]) -> TestStatus: return bubble_up_errors(lambda x: x.status, test_suite_list) @@ -268,20 +288,33 @@ def parse_test_result(lines: List[str]) -> TestResult: consume_non_diagnositic(lines) if not lines or not parse_tap_header(lines): return TestResult(TestStatus.NO_TESTS, [], lines) + expected_test_suite_num = parse_test_plan(lines) + if not expected_test_suite_num: + return TestResult(TestStatus.FAILURE_TO_PARSE_TESTS, [], lines) test_suites = [] - test_suite = parse_test_suite(lines) - while test_suite: - test_suites.append(test_suite) - test_suite = parse_test_suite(lines) - return TestResult(bubble_up_suite_errors(test_suites), test_suites, lines) + for i in range(1, expected_test_suite_num + 1): + test_suite = parse_test_suite(lines, i) + if test_suite: + test_suites.append(test_suite) + else: + print_with_timestamp( + red('[ERROR] ') + ' expected ' + + str(expected_test_suite_num) + + ' test suites, but got ' + str(i - 2)) + break + test_suite = parse_test_suite(lines, -1) + if test_suite: + print_with_timestamp(red('[ERROR] ') + + 'got unexpected test suite: ' + test_suite.name) + if test_suites: + return TestResult(bubble_up_suite_errors(test_suites), test_suites, lines) + else: + return TestResult(TestStatus.NO_TESTS, [], lines) -def parse_run_tests(kernel_output) -> TestResult: +def print_and_count_results(test_result: TestResult) -> None: total_tests = 0 failed_tests = 0 crashed_tests = 0 - test_result = parse_test_result(list(isolate_kunit_output(kernel_output))) - if test_result.status == TestStatus.NO_TESTS: - print_with_timestamp(red('[ERROR] ') + 'no kunit output detected') for test_suite in test_result.suites: if test_suite.status == TestStatus.SUCCESS: print_suite_divider(green('[PASSED] ') + test_suite.name) @@ -303,6 +336,21 @@ def parse_run_tests(kernel_output) -> TestResult: print_with_timestamp(red('[FAILED] ') + test_case.name) print_log(map(yellow, test_case.log)) print_with_timestamp('') + return total_tests, failed_tests, crashed_tests + +def parse_run_tests(kernel_output) -> TestResult: + total_tests = 0 + failed_tests = 0 + crashed_tests = 0 + test_result = parse_test_result(list(isolate_kunit_output(kernel_output))) + if test_result.status == TestStatus.NO_TESTS: + print(red('[ERROR] ') + yellow('no tests run!')) + elif test_result.status == TestStatus.FAILURE_TO_PARSE_TESTS: + print(red('[ERROR] ') + yellow('could not parse test results!')) + else: + (total_tests, + failed_tests, + crashed_tests) = print_and_count_results(test_result) print_with_timestamp(DIVIDER) fmt = green if test_result.status == TestStatus.SUCCESS else red print_with_timestamp( diff --git a/tools/testing/kunit/kunit_tool_test.py b/tools/testing/kunit/kunit_tool_test.py index 287c74d821c3..99c3c5671ea4 100755 --- a/tools/testing/kunit/kunit_tool_test.py +++ b/tools/testing/kunit/kunit_tool_test.py @@ -11,11 +11,13 @@ from unittest import mock import tempfile, shutil # Handling test_tmpdir +import json import os import kunit_config import kunit_parser import kunit_kernel +import kunit_json import kunit test_tmpdir = '' @@ -230,6 +232,37 @@ class KUnitParserTest(unittest.TestCase): result = kunit_parser.parse_run_tests(file.readlines()) self.assertEqual('kunit-resource-test', result.suites[0].name) +class KUnitJsonTest(unittest.TestCase): + + def _json_for(self, log_file): + with(open(get_absolute_path(log_file))) as file: + test_result = kunit_parser.parse_run_tests(file) + json_obj = kunit_json.get_json_result( + test_result=test_result, + def_config='kunit_defconfig', + build_dir=None, + json_path='stdout') + return json.loads(json_obj) + + def test_failed_test_json(self): + result = self._json_for( + 'test_data/test_is_test_passed-failure.log') + self.assertEqual( + {'name': 'example_simple_test', 'status': 'FAIL'}, + result["sub_groups"][1]["test_cases"][0]) + + def test_crashed_test_json(self): + result = self._json_for( + 'test_data/test_is_test_passed-crash.log') + self.assertEqual( + {'name': 'example_simple_test', 'status': 'ERROR'}, + result["sub_groups"][1]["test_cases"][0]) + + def test_no_tests_json(self): + result = self._json_for( + 'test_data/test_is_test_passed-no_tests_run.log') + self.assertEqual(0, len(result['sub_groups'])) + class StrContains(str): def __eq__(self, other): return self in other diff --git a/tools/testing/kunit/test_data/test_is_test_passed-all_passed.log b/tools/testing/kunit/test_data/test_is_test_passed-all_passed.log index 62ebc0288355..bc0dc8fe35b7 100644 --- a/tools/testing/kunit/test_data/test_is_test_passed-all_passed.log +++ b/tools/testing/kunit/test_data/test_is_test_passed-all_passed.log @@ -1,4 +1,5 @@ TAP version 14 +1..2 # Subtest: sysctl_test 1..8 # sysctl_test_dointvec_null_tbl_data: sysctl_test_dointvec_null_tbl_data passed diff --git a/tools/testing/kunit/test_data/test_is_test_passed-crash.log b/tools/testing/kunit/test_data/test_is_test_passed-crash.log index 0b249870c8be..4d97f6708c4a 100644 --- a/tools/testing/kunit/test_data/test_is_test_passed-crash.log +++ b/tools/testing/kunit/test_data/test_is_test_passed-crash.log @@ -1,6 +1,7 @@ printk: console [tty0] enabled printk: console [mc-1] enabled TAP version 14 +1..2 # Subtest: sysctl_test 1..8 # sysctl_test_dointvec_null_tbl_data: sysctl_test_dointvec_null_tbl_data passed diff --git a/tools/testing/kunit/test_data/test_is_test_passed-failure.log b/tools/testing/kunit/test_data/test_is_test_passed-failure.log index 9e89d32d5667..7a416497e3be 100644 --- a/tools/testing/kunit/test_data/test_is_test_passed-failure.log +++ b/tools/testing/kunit/test_data/test_is_test_passed-failure.log @@ -1,4 +1,5 @@ TAP version 14 +1..2 # Subtest: sysctl_test 1..8 # sysctl_test_dointvec_null_tbl_data: sysctl_test_dointvec_null_tbl_data passed diff --git a/tools/testing/nvdimm/dax-dev.c b/tools/testing/nvdimm/dax-dev.c index 7e5d979e73cb..fb342a8c98d3 100644 --- a/tools/testing/nvdimm/dax-dev.c +++ b/tools/testing/nvdimm/dax-dev.c @@ -9,12 +9,19 @@ phys_addr_t dax_pgoff_to_phys(struct dev_dax *dev_dax, pgoff_t pgoff, unsigned long size) { - struct resource *res = &dev_dax->region->res; - phys_addr_t addr; + int i; - addr = pgoff * PAGE_SIZE + res->start; - if (addr >= res->start && addr <= res->end) { - if (addr + size - 1 <= res->end) { + for (i = 0; i < dev_dax->nr_range; i++) { + struct dev_dax_range *dax_range = &dev_dax->ranges[i]; + struct range *range = &dax_range->range; + unsigned long long pgoff_end; + phys_addr_t addr; + + pgoff_end = dax_range->pgoff + PHYS_PFN(range_len(range)) - 1; + if (pgoff < dax_range->pgoff || pgoff > pgoff_end) + continue; + addr = PFN_PHYS(pgoff - dax_range->pgoff) + range->start; + if (addr + size - 1 <= range->end) { if (get_nfit_res(addr)) { struct page *page; @@ -23,9 +30,10 @@ phys_addr_t dax_pgoff_to_phys(struct dev_dax *dev_dax, pgoff_t pgoff, page = vmalloc_to_page((void *)addr); return PFN_PHYS(page_to_pfn(page)); - } else - return addr; + } + return addr; } + break; } return -1; } diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c index 03e40b3b0106..c62d372d426f 100644 --- a/tools/testing/nvdimm/test/iomap.c +++ b/tools/testing/nvdimm/test/iomap.c @@ -126,7 +126,7 @@ static void dev_pagemap_percpu_release(struct percpu_ref *ref) void *__wrap_devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap) { int error; - resource_size_t offset = pgmap->res.start; + resource_size_t offset = pgmap->range.start; struct nfit_test_resource *nfit_res = get_nfit_res(offset); if (!nfit_res) diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c index a1a5dc645b40..2ac0fff6dad8 100644 --- a/tools/testing/nvdimm/test/nfit.c +++ b/tools/testing/nvdimm/test/nfit.c @@ -23,7 +23,8 @@ #include "nfit_test.h" #include "../watermark.h" -#include <asm/mcsafe_test.h> +#include <asm/copy_mc_test.h> +#include <asm/mce.h> /* * Generate an NFIT table to describe the following topology: @@ -3283,7 +3284,7 @@ static struct platform_driver nfit_test_driver = { .id_table = nfit_test_id, }; -static char mcsafe_buf[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE))); +static char copy_mc_buf[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE))); enum INJECT { INJECT_NONE, @@ -3291,7 +3292,7 @@ enum INJECT { INJECT_DST, }; -static void mcsafe_test_init(char *dst, char *src, size_t size) +static void copy_mc_test_init(char *dst, char *src, size_t size) { size_t i; @@ -3300,7 +3301,7 @@ static void mcsafe_test_init(char *dst, char *src, size_t size) src[i] = (char) i; } -static bool mcsafe_test_validate(unsigned char *dst, unsigned char *src, +static bool copy_mc_test_validate(unsigned char *dst, unsigned char *src, size_t size, unsigned long rem) { size_t i; @@ -3321,12 +3322,12 @@ static bool mcsafe_test_validate(unsigned char *dst, unsigned char *src, return true; } -void mcsafe_test(void) +void copy_mc_test(void) { char *inject_desc[] = { "none", "source", "destination" }; enum INJECT inj; - if (IS_ENABLED(CONFIG_MCSAFE_TEST)) { + if (IS_ENABLED(CONFIG_COPY_MC_TEST)) { pr_info("%s: run...\n", __func__); } else { pr_info("%s: disabled, skip.\n", __func__); @@ -3344,31 +3345,31 @@ void mcsafe_test(void) switch (inj) { case INJECT_NONE: - mcsafe_inject_src(NULL); - mcsafe_inject_dst(NULL); - dst = &mcsafe_buf[2048]; - src = &mcsafe_buf[1024 - i]; + copy_mc_inject_src(NULL); + copy_mc_inject_dst(NULL); + dst = ©_mc_buf[2048]; + src = ©_mc_buf[1024 - i]; expect = 0; break; case INJECT_SRC: - mcsafe_inject_src(&mcsafe_buf[1024]); - mcsafe_inject_dst(NULL); - dst = &mcsafe_buf[2048]; - src = &mcsafe_buf[1024 - i]; + copy_mc_inject_src(©_mc_buf[1024]); + copy_mc_inject_dst(NULL); + dst = ©_mc_buf[2048]; + src = ©_mc_buf[1024 - i]; expect = 512 - i; break; case INJECT_DST: - mcsafe_inject_src(NULL); - mcsafe_inject_dst(&mcsafe_buf[2048]); - dst = &mcsafe_buf[2048 - i]; - src = &mcsafe_buf[1024]; + copy_mc_inject_src(NULL); + copy_mc_inject_dst(©_mc_buf[2048]); + dst = ©_mc_buf[2048 - i]; + src = ©_mc_buf[1024]; expect = 512 - i; break; } - mcsafe_test_init(dst, src, 512); - rem = __memcpy_mcsafe(dst, src, 512); - valid = mcsafe_test_validate(dst, src, 512, expect); + copy_mc_test_init(dst, src, 512); + rem = copy_mc_fragile(dst, src, 512); + valid = copy_mc_test_validate(dst, src, 512, expect); if (rem == expect && valid) continue; pr_info("%s: copy(%#lx, %#lx, %d) off: %d rem: %ld %s expect: %ld\n", @@ -3380,8 +3381,8 @@ void mcsafe_test(void) } } - mcsafe_inject_src(NULL); - mcsafe_inject_dst(NULL); + copy_mc_inject_src(NULL); + copy_mc_inject_dst(NULL); } static __init int nfit_test_init(void) @@ -3392,7 +3393,7 @@ static __init int nfit_test_init(void) libnvdimm_test(); acpi_nfit_test(); device_dax_test(); - mcsafe_test(); + copy_mc_test(); dax_pmem_test(); dax_pmem_core_test(); #ifdef CONFIG_DEV_DAX_PMEM_COMPAT diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c index 8995092d541e..3b796dd5e577 100644 --- a/tools/testing/radix-tree/idr-test.c +++ b/tools/testing/radix-tree/idr-test.c @@ -523,8 +523,27 @@ static void *ida_random_fn(void *arg) return NULL; } +static void *ida_leak_fn(void *arg) +{ + struct ida *ida = arg; + time_t s = time(NULL); + int i, ret; + + rcu_register_thread(); + + do for (i = 0; i < 1000; i++) { + ret = ida_alloc_range(ida, 128, 128, GFP_KERNEL); + if (ret >= 0) + ida_free(ida, 128); + } while (time(NULL) < s + 2); + + rcu_unregister_thread(); + return NULL; +} + void ida_thread_tests(void) { + DEFINE_IDA(ida); pthread_t threads[20]; int i; @@ -536,6 +555,16 @@ void ida_thread_tests(void) while (i--) pthread_join(threads[i], NULL); + + for (i = 0; i < ARRAY_SIZE(threads); i++) + if (pthread_create(&threads[i], NULL, ida_leak_fn, &ida)) { + perror("creating ida thread"); + exit(1); + } + + while (i--) + pthread_join(threads[i], NULL); + assert(ida_is_empty(&ida)); } void ida_tests(void) diff --git a/tools/testing/radix-tree/linux/kernel.h b/tools/testing/radix-tree/linux/kernel.h index 4568248222ae..39867fd80c8f 100644 --- a/tools/testing/radix-tree/linux/kernel.h +++ b/tools/testing/radix-tree/linux/kernel.h @@ -22,4 +22,5 @@ #define __releases(x) #define __must_hold(x) +#define EXPORT_PER_CPU_SYMBOL_GPL(x) #endif /* _KERNEL_H */ diff --git a/tools/testing/radix-tree/linux/local_lock.h b/tools/testing/radix-tree/linux/local_lock.h new file mode 100644 index 000000000000..b3cf8b233ca4 --- /dev/null +++ b/tools/testing/radix-tree/linux/local_lock.h @@ -0,0 +1,8 @@ +#ifndef _LINUX_LOCAL_LOCK +#define _LINUX_LOCAL_LOCK +typedef struct { } local_lock_t; + +static inline void local_lock(local_lock_t *lock) { } +static inline void local_unlock(local_lock_t *lock) { } +#define INIT_LOCAL_LOCK(x) { } +#endif diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h index 34dab4d18744..7ef7067e942c 100644 --- a/tools/testing/radix-tree/test.h +++ b/tools/testing/radix-tree/test.h @@ -56,8 +56,4 @@ int root_tag_get(struct radix_tree_root *root, unsigned int tag); unsigned long node_maxindex(struct radix_tree_node *); unsigned long shift_maxindex(unsigned int shift); int radix_tree_cpu_dead(unsigned int cpu); -struct radix_tree_preload { - unsigned nr; - struct radix_tree_node *nodes; -}; extern struct radix_tree_preload radix_tree_preloads; diff --git a/tools/testing/scatterlist/Makefile b/tools/testing/scatterlist/Makefile index cbb003d9305e..c65233876622 100644 --- a/tools/testing/scatterlist/Makefile +++ b/tools/testing/scatterlist/Makefile @@ -14,7 +14,7 @@ targets: include $(TARGETS) main: $(OFILES) clean: - $(RM) $(TARGETS) $(OFILES) scatterlist.c linux/scatterlist.h linux/highmem.h linux/kmemleak.h asm/io.h + $(RM) $(TARGETS) $(OFILES) scatterlist.c linux/scatterlist.h linux/highmem.h linux/kmemleak.h linux/slab.h asm/io.h @rmdir asm scatterlist.c: ../../../lib/scatterlist.c @@ -28,4 +28,5 @@ include: ../../../include/linux/scatterlist.h @touch asm/io.h @touch linux/highmem.h @touch linux/kmemleak.h + @touch linux/slab.h @cp $< linux/scatterlist.h diff --git a/tools/testing/scatterlist/linux/mm.h b/tools/testing/scatterlist/linux/mm.h index 6f9ac14aa800..6ae907f375d2 100644 --- a/tools/testing/scatterlist/linux/mm.h +++ b/tools/testing/scatterlist/linux/mm.h @@ -114,6 +114,12 @@ static inline void *kmalloc(unsigned int size, unsigned int flags) return malloc(size); } +static inline void * +kmalloc_array(unsigned int n, unsigned int size, unsigned int flags) +{ + return malloc(n * size); +} + #define kfree(x) free(x) #define kmemleak_alloc(a, b, c, d) @@ -122,4 +128,33 @@ static inline void *kmalloc(unsigned int size, unsigned int flags) #define PageSlab(p) (0) #define flush_kernel_dcache_page(p) +#define MAX_ERRNO 4095 + +#define IS_ERR_VALUE(x) unlikely((unsigned long)(void *)(x) >= (unsigned long)-MAX_ERRNO) + +static inline void * __must_check ERR_PTR(long error) +{ + return (void *) error; +} + +static inline long __must_check PTR_ERR(__force const void *ptr) +{ + return (long) ptr; +} + +static inline bool __must_check IS_ERR(__force const void *ptr) +{ + return IS_ERR_VALUE((unsigned long)ptr); +} + +static inline int __must_check PTR_ERR_OR_ZERO(__force const void *ptr) +{ + if (IS_ERR(ptr)) + return PTR_ERR(ptr); + else + return 0; +} + +#define IS_ENABLED(x) (0) + #endif diff --git a/tools/testing/scatterlist/main.c b/tools/testing/scatterlist/main.c index 0a1464181226..b2c7e9f7b8d3 100644 --- a/tools/testing/scatterlist/main.c +++ b/tools/testing/scatterlist/main.c @@ -5,6 +5,15 @@ #define MAX_PAGES (64) +struct test { + int alloc_ret; + unsigned num_pages; + unsigned *pfn; + unsigned size; + unsigned int max_seg; + unsigned int expected_segments; +}; + static void set_pages(struct page **pages, const unsigned *array, unsigned num) { unsigned int i; @@ -17,17 +26,32 @@ static void set_pages(struct page **pages, const unsigned *array, unsigned num) #define pfn(...) (unsigned []){ __VA_ARGS__ } +static void fail(struct test *test, struct sg_table *st, const char *cond) +{ + unsigned int i; + + fprintf(stderr, "Failed on '%s'!\n\n", cond); + + printf("size = %u, max segment = %u, expected nents = %u\nst->nents = %u, st->orig_nents= %u\n", + test->size, test->max_seg, test->expected_segments, st->nents, + st->orig_nents); + + printf("%u input PFNs:", test->num_pages); + for (i = 0; i < test->num_pages; i++) + printf(" %x", test->pfn[i]); + printf("\n"); + + exit(1); +} + +#define VALIDATE(cond, st, test) \ + if (!(cond)) \ + fail((test), (st), #cond); + int main(void) { const unsigned int sgmax = SCATTERLIST_MAX_SEGMENT; - struct test { - int alloc_ret; - unsigned num_pages; - unsigned *pfn; - unsigned size; - unsigned int max_seg; - unsigned int expected_segments; - } *test, tests[] = { + struct test *test, tests[] = { { -EINVAL, 1, pfn(0), PAGE_SIZE, PAGE_SIZE + 1, 1 }, { -EINVAL, 1, pfn(0), PAGE_SIZE, 0, 1 }, { -EINVAL, 1, pfn(0), PAGE_SIZE, sgmax + 1, 1 }, @@ -55,20 +79,19 @@ int main(void) for (i = 0, test = tests; test->expected_segments; test++, i++) { struct page *pages[MAX_PAGES]; struct sg_table st; - int ret; + struct scatterlist *sg; set_pages(pages, test->pfn, test->num_pages); - ret = __sg_alloc_table_from_pages(&st, pages, test->num_pages, - 0, test->size, test->max_seg, - GFP_KERNEL); - assert(ret == test->alloc_ret); + sg = __sg_alloc_table_from_pages(&st, pages, test->num_pages, 0, + test->size, test->max_seg, NULL, 0, GFP_KERNEL); + assert(PTR_ERR_OR_ZERO(sg) == test->alloc_ret); if (test->alloc_ret) continue; - assert(st.nents == test->expected_segments); - assert(st.orig_nents == test->expected_segments); + VALIDATE(st.nents == test->expected_segments, &st, test); + VALIDATE(st.orig_nents == test->expected_segments, &st, test); sg_free_table(&st); } diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 9018f45d631d..d9c283503159 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -88,10 +88,10 @@ endif # of the targets gets built. FORCE_TARGETS ?= -# Clear LDFLAGS and MAKEFLAGS if called from main -# Makefile to avoid test build failures when test -# Makefile doesn't have explicit build rules. -ifeq (1,$(MAKELEVEL)) +# Clear LDFLAGS and MAKEFLAGS when implicit rules are missing. This provides +# implicit rules to sub-test Makefiles which avoids build failures in test +# Makefile that don't have explicit build rules. +ifeq (,$(LINK.c)) override LDFLAGS = override MAKEFLAGS = endif @@ -206,6 +206,7 @@ KSFT_INSTALL_PATH := $(abspath $(KSFT_INSTALL_PATH)) # Avoid changing the rest of the logic here and lib.mk. INSTALL_PATH := $(KSFT_INSTALL_PATH) ALL_SCRIPT := $(INSTALL_PATH)/run_kselftest.sh +TEST_LIST := $(INSTALL_PATH)/kselftest-list.txt install: all ifdef INSTALL_PATH @@ -214,6 +215,8 @@ ifdef INSTALL_PATH install -m 744 kselftest/module.sh $(INSTALL_PATH)/kselftest/ install -m 744 kselftest/runner.sh $(INSTALL_PATH)/kselftest/ install -m 744 kselftest/prefix.pl $(INSTALL_PATH)/kselftest/ + install -m 744 run_kselftest.sh $(INSTALL_PATH)/ + rm -f $(TEST_LIST) @ret=1; \ for TARGET in $(TARGETS); do \ BUILD_TARGET=$$BUILD/$$TARGET; \ @@ -222,33 +225,18 @@ ifdef INSTALL_PATH ret=$$((ret * $$?)); \ done; exit $$ret; - @# Ask all targets to emit their test scripts - echo "#!/bin/sh" > $(ALL_SCRIPT) - echo "BASE_DIR=\$$(realpath \$$(dirname \$$0))" >> $(ALL_SCRIPT) - echo "cd \$$BASE_DIR" >> $(ALL_SCRIPT) - echo ". ./kselftest/runner.sh" >> $(ALL_SCRIPT) - echo "ROOT=\$$PWD" >> $(ALL_SCRIPT) - echo "if [ \"\$$1\" = \"--summary\" ]; then" >> $(ALL_SCRIPT) - echo " logfile=\$$BASE_DIR/output.log" >> $(ALL_SCRIPT) - echo " cat /dev/null > \$$logfile" >> $(ALL_SCRIPT) - echo "fi" >> $(ALL_SCRIPT) - @# While building run_kselftest.sh skip also non-existent TARGET dirs: + @# Ask all targets to emit their test scripts + @# While building kselftest-list.text skip also non-existent TARGET dirs: @# they could be the result of a build failure and should NOT be @# included in the generated runlist. for TARGET in $(TARGETS); do \ BUILD_TARGET=$$BUILD/$$TARGET; \ [ ! -d $(INSTALL_PATH)/$$TARGET ] && echo "Skipping non-existent dir: $$TARGET" && continue; \ - echo "[ -w /dev/kmsg ] && echo \"kselftest: Running tests in $$TARGET\" >> /dev/kmsg" >> $(ALL_SCRIPT); \ - echo "cd $$TARGET" >> $(ALL_SCRIPT); \ - echo -n "run_many" >> $(ALL_SCRIPT); \ echo -n "Emit Tests for $$TARGET\n"; \ - $(MAKE) -s --no-print-directory OUTPUT=$$BUILD_TARGET -C $$TARGET emit_tests >> $(ALL_SCRIPT); \ - echo "" >> $(ALL_SCRIPT); \ - echo "cd \$$ROOT" >> $(ALL_SCRIPT); \ + $(MAKE) -s --no-print-directory OUTPUT=$$BUILD_TARGET COLLECTION=$$TARGET \ + -C $$TARGET emit_tests >> $(TEST_LIST); \ done; - - chmod u+x $(ALL_SCRIPT) else $(error Error: set INSTALL_PATH to use install) endif diff --git a/tools/testing/selftests/arm64/Makefile b/tools/testing/selftests/arm64/Makefile index 93b567d23c8b..2c9d012797a7 100644 --- a/tools/testing/selftests/arm64/Makefile +++ b/tools/testing/selftests/arm64/Makefile @@ -4,7 +4,7 @@ ARCH ?= $(shell uname -m 2>/dev/null || echo not) ifneq (,$(filter $(ARCH),aarch64 arm64)) -ARM64_SUBTARGETS ?= tags signal +ARM64_SUBTARGETS ?= tags signal pauth fp mte else ARM64_SUBTARGETS := endif diff --git a/tools/testing/selftests/arm64/fp/.gitignore b/tools/testing/selftests/arm64/fp/.gitignore new file mode 100644 index 000000000000..d66f76d2a650 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/.gitignore @@ -0,0 +1,5 @@ +fpsimd-test +sve-probe-vls +sve-ptrace +sve-test +vlset diff --git a/tools/testing/selftests/arm64/fp/Makefile b/tools/testing/selftests/arm64/fp/Makefile new file mode 100644 index 000000000000..a57009d3a0dc --- /dev/null +++ b/tools/testing/selftests/arm64/fp/Makefile @@ -0,0 +1,17 @@ +# SPDX-License-Identifier: GPL-2.0 + +CFLAGS += -I../../../../../usr/include/ +TEST_GEN_PROGS := sve-ptrace sve-probe-vls +TEST_PROGS_EXTENDED := fpsimd-test fpsimd-stress sve-test sve-stress vlset + +all: $(TEST_GEN_PROGS) $(TEST_PROGS_EXTENDED) + +fpsimd-test: fpsimd-test.o + $(CC) -nostdlib $^ -o $@ +sve-ptrace: sve-ptrace.o sve-ptrace-asm.o +sve-probe-vls: sve-probe-vls.o +sve-test: sve-test.o + $(CC) -nostdlib $^ -o $@ +vlset: vlset.o + +include ../../lib.mk diff --git a/tools/testing/selftests/arm64/fp/README b/tools/testing/selftests/arm64/fp/README new file mode 100644 index 000000000000..03e3dad865d8 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/README @@ -0,0 +1,100 @@ +This directory contains a mix of tests integrated with kselftest and +standalone stress tests. + +kselftest tests +=============== + +sve-probe-vls - Checks the SVE vector length enumeration interface +sve-ptrace - Checks the SVE ptrace interface + +Running the non-kselftest tests +=============================== + +sve-stress performs an SVE context switch stress test, as described +below. + +(The fpsimd-stress test works the same way; just substitute "fpsimd" for +"sve" in the following commands.) + + +The test runs until killed by the user. + +If no context switch error was detected, you will see output such as +the following: + +$ ./sve-stress +(wait for some time) +^C +Vector length: 512 bits +PID: 1573 +Terminated by signal 15, no error, iterations=9467, signals=1014 +Vector length: 512 bits +PID: 1575 +Terminated by signal 15, no error, iterations=9448, signals=1028 +Vector length: 512 bits +PID: 1577 +Terminated by signal 15, no error, iterations=9436, signals=1039 +Vector length: 512 bits +PID: 1579 +Terminated by signal 15, no error, iterations=9421, signals=1039 +Vector length: 512 bits +PID: 1581 +Terminated by signal 15, no error, iterations=9403, signals=1039 +Vector length: 512 bits +PID: 1583 +Terminated by signal 15, no error, iterations=9385, signals=1036 +Vector length: 512 bits +PID: 1585 +Terminated by signal 15, no error, iterations=9376, signals=1039 +Vector length: 512 bits +PID: 1587 +Terminated by signal 15, no error, iterations=9361, signals=1039 +Vector length: 512 bits +PID: 1589 +Terminated by signal 15, no error, iterations=9350, signals=1039 + + +If an error was detected, details of the mismatch will be printed +instead of "no error". + +Ideally, the test should be allowed to run for many minutes or hours +to maximise test coverage. + + +KVM stress testing +================== + +To try to reproduce the bugs that we have been observing, sve-stress +should be run in parallel in two KVM guests, while simultaneously +running on the host. + +1) Start 2 guests, using the following command for each: + +$ lkvm run --console=virtio -pconsole=hvc0 --sve Image + +(Depending on the hardware GIC implementation, you may also need +--irqchip=gicv3. New kvmtool defaults to that if appropriate, but I +can't remember whether my branch is new enough for that. Try without +the option first.) + +Kvmtool occupies the terminal until you kill it (Ctrl+A x), +or until the guest terminates. It is therefore recommended to run +each instance in separate terminal (use screen or ssh etc.) This +allows multiple guests to be run in parallel while running other +commands on the host. + +Within the guest, the host filesystem is accessible, mounted on /host. + +2) Run the sve-stress on *each* guest with the Vector-Length set to 32: +guest$ ./vlset --inherit 32 ./sve-stress + +3) Run the sve-stress on the host with the maximum Vector-Length: +host$ ./vlset --inherit --max ./sve-stress + + +Again, the test should be allowed to run for many minutes or hours to +maximise test coverage. + +If no error is detected, you will see output from each sve-stress +instance similar to that illustrated above; otherwise details of the +observed mismatches will be printed. diff --git a/tools/testing/selftests/arm64/fp/asm-offsets.h b/tools/testing/selftests/arm64/fp/asm-offsets.h new file mode 100644 index 000000000000..a180851496ec --- /dev/null +++ b/tools/testing/selftests/arm64/fp/asm-offsets.h @@ -0,0 +1,11 @@ +#define sa_sz 32 +#define sa_flags 8 +#define sa_handler 0 +#define sa_mask_sz 8 +#define SIGUSR1 10 +#define SIGTERM 15 +#define SIGINT 2 +#define SIGABRT 6 +#define SA_NODEFER 1073741824 +#define SA_SIGINFO 4 +#define ucontext_regs 184 diff --git a/tools/testing/selftests/arm64/fp/assembler.h b/tools/testing/selftests/arm64/fp/assembler.h new file mode 100644 index 000000000000..8944f2189206 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/assembler.h @@ -0,0 +1,57 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright (C) 2015-2019 ARM Limited. +// Original author: Dave Martin <Dave.Martin@arm.com> + +#ifndef ASSEMBLER_H +#define ASSEMBLER_H + +.macro __for from:req, to:req + .if (\from) == (\to) + _for__body %\from + .else + __for \from, %(\from) + ((\to) - (\from)) / 2 + __for %(\from) + ((\to) - (\from)) / 2 + 1, \to + .endif +.endm + +.macro _for var:req, from:req, to:req, insn:vararg + .macro _for__body \var:req + .noaltmacro + \insn + .altmacro + .endm + + .altmacro + __for \from, \to + .noaltmacro + + .purgem _for__body +.endm + +.macro function name + .macro endfunction + .type \name, @function + .purgem endfunction + .endm +\name: +.endm + +.macro define_accessor name, num, insn + .macro \name\()_entry n + \insn \n, 1 + ret + .endm + +function \name + adr x2, .L__accessor_tbl\@ + add x2, x2, x0, lsl #3 + br x2 + +.L__accessor_tbl\@: + _for x, 0, (\num) - 1, \name\()_entry \x +endfunction + + .purgem \name\()_entry +.endm + +#endif /* ! ASSEMBLER_H */ diff --git a/tools/testing/selftests/arm64/fp/fpsimd-stress b/tools/testing/selftests/arm64/fp/fpsimd-stress new file mode 100755 index 000000000000..781b5b022eaf --- /dev/null +++ b/tools/testing/selftests/arm64/fp/fpsimd-stress @@ -0,0 +1,60 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-only +# Copyright (C) 2015-2019 ARM Limited. +# Original author: Dave Martin <Dave.Martin@arm.com> + +set -ue + +NR_CPUS=`nproc` + +pids= +logs= + +cleanup () { + trap - INT TERM CHLD + set +e + + if [ -n "$pids" ]; then + kill $pids + wait $pids + pids= + fi + + if [ -n "$logs" ]; then + cat $logs + rm $logs + logs= + fi +} + +interrupt () { + cleanup + exit 0 +} + +child_died () { + cleanup + exit 1 +} + +trap interrupt INT TERM EXIT +trap child_died CHLD + +for x in `seq 0 $((NR_CPUS * 4))`; do + log=`mktemp` + logs=$logs\ $log + ./fpsimd-test >$log & + pids=$pids\ $! +done + +# Wait for all child processes to be created: +sleep 10 + +while :; do + kill -USR1 $pids +done & +pids=$pids\ $! + +wait + +exit 1 diff --git a/tools/testing/selftests/arm64/fp/fpsimd-test.S b/tools/testing/selftests/arm64/fp/fpsimd-test.S new file mode 100644 index 000000000000..1c5556bdd11d --- /dev/null +++ b/tools/testing/selftests/arm64/fp/fpsimd-test.S @@ -0,0 +1,482 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright (C) 2015-2019 ARM Limited. +// Original author: Dave Martin <Dave.Martin@arm.com> +// +// Simple FPSIMD context switch test +// Repeatedly writes unique test patterns into each FPSIMD register +// and reads them back to verify integrity. +// +// for x in `seq 1 NR_CPUS`; do fpsimd-test & pids=$pids\ $! ; done +// (leave it running for as long as you want...) +// kill $pids + +#include <asm/unistd.h> +#include "assembler.h" +#include "asm-offsets.h" + +#define NVR 32 +#define MAXVL_B (128 / 8) + +.macro _vldr Vn:req, Xt:req + ld1 {v\Vn\().2d}, [x\Xt] +.endm + +.macro _vstr Vn:req, Xt:req + st1 {v\Vn\().2d}, [x\Xt] +.endm + +// Generate accessor functions to read/write programmatically selected +// FPSIMD registers. +// x0 is the register index to access +// x1 is the memory address to read from (getv,setp) or store to (setv,setp) +// All clobber x0-x2 +define_accessor setv, NVR, _vldr +define_accessor getv, NVR, _vstr + +// Print a single character x0 to stdout +// Clobbers x0-x2,x8 +function putc + str x0, [sp, #-16]! + + mov x0, #1 // STDOUT_FILENO + mov x1, sp + mov x2, #1 + mov x8, #__NR_write + svc #0 + + add sp, sp, #16 + ret +endfunction + +// Print a NUL-terminated string starting at address x0 to stdout +// Clobbers x0-x3,x8 +function puts + mov x1, x0 + + mov x2, #0 +0: ldrb w3, [x0], #1 + cbz w3, 1f + add x2, x2, #1 + b 0b + +1: mov w0, #1 // STDOUT_FILENO + mov x8, #__NR_write + svc #0 + + ret +endfunction + +// Utility macro to print a literal string +// Clobbers x0-x4,x8 +.macro puts string + .pushsection .rodata.str1.1, "aMS", 1 +.L__puts_literal\@: .string "\string" + .popsection + + ldr x0, =.L__puts_literal\@ + bl puts +.endm + +// Print an unsigned decimal number x0 to stdout +// Clobbers x0-x4,x8 +function putdec + mov x1, sp + str x30, [sp, #-32]! // Result can't be > 20 digits + + mov x2, #0 + strb w2, [x1, #-1]! // Write the NUL terminator + + mov x2, #10 +0: udiv x3, x0, x2 // div-mod loop to generate the digits + msub x0, x3, x2, x0 + add w0, w0, #'0' + strb w0, [x1, #-1]! + mov x0, x3 + cbnz x3, 0b + + ldrb w0, [x1] + cbnz w0, 1f + mov w0, #'0' // Print "0" for 0, not "" + strb w0, [x1, #-1]! + +1: mov x0, x1 + bl puts + + ldr x30, [sp], #32 + ret +endfunction + +// Print an unsigned decimal number x0 to stdout, followed by a newline +// Clobbers x0-x5,x8 +function putdecn + mov x5, x30 + + bl putdec + mov x0, #'\n' + bl putc + + ret x5 +endfunction + + +// Clobbers x0-x3,x8 +function puthexb + str x30, [sp, #-0x10]! + + mov w3, w0 + lsr w0, w0, #4 + bl puthexnibble + mov w0, w3 + + ldr x30, [sp], #0x10 + // fall through to puthexnibble +endfunction +// Clobbers x0-x2,x8 +function puthexnibble + and w0, w0, #0xf + cmp w0, #10 + blo 1f + add w0, w0, #'a' - ('9' + 1) +1: add w0, w0, #'0' + b putc +endfunction + +// x0=data in, x1=size in, clobbers x0-x5,x8 +function dumphex + str x30, [sp, #-0x10]! + + mov x4, x0 + mov x5, x1 + +0: subs x5, x5, #1 + b.lo 1f + ldrb w0, [x4], #1 + bl puthexb + b 0b + +1: ldr x30, [sp], #0x10 + ret +endfunction + +// Declare some storate space to shadow the SVE register contents: +.pushsection .text +.data +.align 4 +vref: + .space MAXVL_B * NVR +scratch: + .space MAXVL_B +.popsection + +// Trivial memory copy: copy x2 bytes, starting at address x1, to address x0. +// Clobbers x0-x3 +function memcpy + cmp x2, #0 + b.eq 1f +0: ldrb w3, [x1], #1 + strb w3, [x0], #1 + subs x2, x2, #1 + b.ne 0b +1: ret +endfunction + +// Generate a test pattern for storage in SVE registers +// x0: pid (16 bits) +// x1: register number (6 bits) +// x2: generation (4 bits) +function pattern + orr w1, w0, w1, lsl #16 + orr w2, w1, w2, lsl #28 + + ldr x0, =scratch + mov w1, #MAXVL_B / 4 + +0: str w2, [x0], #4 + add w2, w2, #(1 << 22) + subs w1, w1, #1 + bne 0b + + ret +endfunction + +// Get the address of shadow data for FPSIMD V-register V<xn> +.macro _adrv xd, xn, nrtmp + ldr \xd, =vref + mov x\nrtmp, #16 + madd \xd, x\nrtmp, \xn, \xd +.endm + +// Set up test pattern in a FPSIMD V-register +// x0: pid +// x1: register number +// x2: generation +function setup_vreg + mov x4, x30 + + mov x6, x1 + bl pattern + _adrv x0, x6, 2 + mov x5, x0 + ldr x1, =scratch + bl memcpy + + mov x0, x6 + mov x1, x5 + bl setv + + ret x4 +endfunction + +// Fill x1 bytes starting at x0 with 0xae (for canary purposes) +// Clobbers x1, x2. +function memfill_ae + mov w2, #0xae + b memfill +endfunction + +// Fill x1 bytes starting at x0 with 0. +// Clobbers x1, x2. +function memclr + mov w2, #0 +endfunction + // fall through to memfill + +// Trivial memory fill: fill x1 bytes starting at address x0 with byte w2 +// Clobbers x1 +function memfill + cmp x1, #0 + b.eq 1f + +0: strb w2, [x0], #1 + subs x1, x1, #1 + b.ne 0b + +1: ret +endfunction + +// Trivial memory compare: compare x2 bytes starting at address x0 with +// bytes starting at address x1. +// Returns only if all bytes match; otherwise, the program is aborted. +// Clobbers x0-x5. +function memcmp + cbz x2, 1f + + mov x5, #0 +0: ldrb w3, [x0, x5] + ldrb w4, [x1, x5] + add x5, x5, #1 + cmp w3, w4 + b.ne barf + subs x2, x2, #1 + b.ne 0b + +1: ret +endfunction + +// Verify that a FPSIMD V-register matches its shadow in memory, else abort +// x0: reg number +// Clobbers x0-x5. +function check_vreg + mov x3, x30 + + _adrv x5, x0, 6 + mov x4, x0 + ldr x7, =scratch + + mov x0, x7 + mov x1, x6 + bl memfill_ae + + mov x0, x4 + mov x1, x7 + bl getv + + mov x0, x5 + mov x1, x7 + mov x2, x6 + mov x30, x3 + b memcmp +endfunction + +// Any SVE register modified here can cause corruption in the main +// thread -- but *only* the registers modified here. +function irritator_handler + // Increment the irritation signal count (x23): + ldr x0, [x2, #ucontext_regs + 8 * 23] + add x0, x0, #1 + str x0, [x2, #ucontext_regs + 8 * 23] + + // Corrupt some random V-regs + adr x0, .text + (irritator_handler - .text) / 16 * 16 + movi v0.8b, #7 + movi v9.16b, #9 + movi v31.8b, #31 + + ret +endfunction + +function terminate_handler + mov w21, w0 + mov x20, x2 + + puts "Terminated by signal " + mov w0, w21 + bl putdec + puts ", no error, iterations=" + ldr x0, [x20, #ucontext_regs + 8 * 22] + bl putdec + puts ", signals=" + ldr x0, [x20, #ucontext_regs + 8 * 23] + bl putdecn + + mov x0, #0 + mov x8, #__NR_exit + svc #0 +endfunction + +// w0: signal number +// x1: sa_action +// w2: sa_flags +// Clobbers x0-x6,x8 +function setsignal + str x30, [sp, #-((sa_sz + 15) / 16 * 16 + 16)]! + + mov w4, w0 + mov x5, x1 + mov w6, w2 + + add x0, sp, #16 + mov x1, #sa_sz + bl memclr + + mov w0, w4 + add x1, sp, #16 + str w6, [x1, #sa_flags] + str x5, [x1, #sa_handler] + mov x2, #0 + mov x3, #sa_mask_sz + mov x8, #__NR_rt_sigaction + svc #0 + + cbz w0, 1f + + puts "sigaction failure\n" + b .Labort + +1: ldr x30, [sp], #((sa_sz + 15) / 16 * 16 + 16) + ret +endfunction + +// Main program entry point +.globl _start +function _start +_start: + // Sanity-check and report the vector length + + mov x19, #128 + cmp x19, #128 + b.lo 1f + cmp x19, #2048 + b.hi 1f + tst x19, #(8 - 1) + b.eq 2f + +1: puts "Bad vector length: " + mov x0, x19 + bl putdecn + b .Labort + +2: puts "Vector length:\t" + mov x0, x19 + bl putdec + puts " bits\n" + + // Obtain our PID, to ensure test pattern uniqueness between processes + + mov x8, #__NR_getpid + svc #0 + mov x20, x0 + + puts "PID:\t" + mov x0, x20 + bl putdecn + + mov x23, #0 // Irritation signal count + + mov w0, #SIGINT + adr x1, terminate_handler + mov w2, #SA_SIGINFO + bl setsignal + + mov w0, #SIGTERM + adr x1, terminate_handler + mov w2, #SA_SIGINFO + bl setsignal + + mov w0, #SIGUSR1 + adr x1, irritator_handler + mov w2, #SA_SIGINFO + orr w2, w2, #SA_NODEFER + bl setsignal + + mov x22, #0 // generation number, increments per iteration +.Ltest_loop: + + mov x21, #0 // Set up V-regs & shadow with test pattern +0: mov x0, x20 + mov x1, x21 + and x2, x22, #0xf + bl setup_vreg + add x21, x21, #1 + cmp x21, #NVR + b.lo 0b + +// Can't do this when SVE state is volatile across SVC: + mov x8, #__NR_sched_yield // Encourage preemption + svc #0 + + mov x21, #0 +0: mov x0, x21 + bl check_vreg + add x21, x21, #1 + cmp x21, #NVR + b.lo 0b + + add x22, x22, #1 + b .Ltest_loop + +.Labort: + mov x0, #0 + mov x1, #SIGABRT + mov x8, #__NR_kill + svc #0 +endfunction + +function barf + mov x10, x0 // expected data + mov x11, x1 // actual data + mov x12, x2 // data size + + puts "Mistatch: PID=" + mov x0, x20 + bl putdec + puts ", iteration=" + mov x0, x22 + bl putdec + puts ", reg=" + mov x0, x21 + bl putdecn + puts "\tExpected [" + mov x0, x10 + mov x1, x12 + bl dumphex + puts "]\n\tGot [" + mov x0, x11 + mov x1, x12 + bl dumphex + puts "]\n" + + mov x8, #__NR_exit + mov x1, #1 + svc #0 +endfunction diff --git a/tools/testing/selftests/arm64/fp/sve-probe-vls.c b/tools/testing/selftests/arm64/fp/sve-probe-vls.c new file mode 100644 index 000000000000..b29cbc642c57 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/sve-probe-vls.c @@ -0,0 +1,58 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015-2020 ARM Limited. + * Original author: Dave Martin <Dave.Martin@arm.com> + */ +#include <assert.h> +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/auxv.h> +#include <sys/prctl.h> +#include <asm/sigcontext.h> + +#include "../../kselftest.h" + +int main(int argc, char **argv) +{ + unsigned int vq; + int vl; + static unsigned int vqs[SVE_VQ_MAX]; + unsigned int nvqs = 0; + + ksft_print_header(); + ksft_set_plan(2); + + if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) + ksft_exit_skip("SVE not available"); + + /* + * Enumerate up to SVE_VQ_MAX vector lengths + */ + for (vq = SVE_VQ_MAX; vq > 0; --vq) { + vl = prctl(PR_SVE_SET_VL, vq * 16); + if (vl == -1) + ksft_exit_fail_msg("PR_SVE_SET_VL failed: %s (%d)\n", + strerror(errno), errno); + + vl &= PR_SVE_VL_LEN_MASK; + + if (!sve_vl_valid(vl)) + ksft_exit_fail_msg("VL %d invalid\n", vl); + vq = sve_vq_from_vl(vl); + + if (!(nvqs < SVE_VQ_MAX)) + ksft_exit_fail_msg("Too many VLs %u >= SVE_VQ_MAX\n", + nvqs); + vqs[nvqs++] = vq; + } + ksft_test_result_pass("Enumerated %d vector lengths\n", nvqs); + ksft_test_result_pass("All vector lengths valid\n"); + + /* Print out the vector lengths in ascending order: */ + while (nvqs--) + ksft_print_msg("%u\n", 16 * vqs[nvqs]); + + ksft_exit_pass(); +} diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace-asm.S b/tools/testing/selftests/arm64/fp/sve-ptrace-asm.S new file mode 100644 index 000000000000..3e81f9fab574 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/sve-ptrace-asm.S @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright (C) 2015-2019 ARM Limited. +// Original author: Dave Martin <Dave.Martin@arm.com> +#include <asm/unistd.h> + +.arch_extension sve + +.globl sve_store_patterns + +sve_store_patterns: + mov x1, x0 + + index z0.b, #0, #1 + str q0, [x1] + + mov w8, #__NR_getpid + svc #0 + str q0, [x1, #0x10] + + mov z1.d, z0.d + str q0, [x1, #0x20] + + mov w8, #__NR_getpid + svc #0 + str q0, [x1, #0x30] + + mov z1.d, z0.d + str q0, [x1, #0x40] + + ret + +.size sve_store_patterns, . - sve_store_patterns +.type sve_store_patterns, @function diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c new file mode 100644 index 000000000000..b2282be6f938 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c @@ -0,0 +1,336 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015-2020 ARM Limited. + * Original author: Dave Martin <Dave.Martin@arm.com> + */ +#include <errno.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/auxv.h> +#include <sys/ptrace.h> +#include <sys/types.h> +#include <sys/uio.h> +#include <sys/wait.h> +#include <asm/sigcontext.h> +#include <asm/ptrace.h> + +#include "../../kselftest.h" + +/* <linux/elf.h> and <sys/auxv.h> don't like each other, so: */ +#ifndef NT_ARM_SVE +#define NT_ARM_SVE 0x405 +#endif + +/* Number of registers filled in by sve_store_patterns */ +#define NR_VREGS 5 + +void sve_store_patterns(__uint128_t v[NR_VREGS]); + +static void dump(const void *buf, size_t size) +{ + size_t i; + const unsigned char *p = buf; + + for (i = 0; i < size; ++i) + printf(" %.2x", *p++); +} + +static int check_vregs(const __uint128_t vregs[NR_VREGS]) +{ + int i; + int ok = 1; + + for (i = 0; i < NR_VREGS; ++i) { + printf("# v[%d]:", i); + dump(&vregs[i], sizeof vregs[i]); + putchar('\n'); + + if (vregs[i] != vregs[0]) + ok = 0; + } + + return ok; +} + +static int do_child(void) +{ + if (ptrace(PTRACE_TRACEME, -1, NULL, NULL)) + ksft_exit_fail_msg("PTRACE_TRACEME", strerror(errno)); + + if (raise(SIGSTOP)) + ksft_exit_fail_msg("raise(SIGSTOP)", strerror(errno)); + + return EXIT_SUCCESS; +} + +static struct user_sve_header *get_sve(pid_t pid, void **buf, size_t *size) +{ + struct user_sve_header *sve; + void *p; + size_t sz = sizeof *sve; + struct iovec iov; + + while (1) { + if (*size < sz) { + p = realloc(*buf, sz); + if (!p) { + errno = ENOMEM; + goto error; + } + + *buf = p; + *size = sz; + } + + iov.iov_base = *buf; + iov.iov_len = sz; + if (ptrace(PTRACE_GETREGSET, pid, NT_ARM_SVE, &iov)) + goto error; + + sve = *buf; + if (sve->size <= sz) + break; + + sz = sve->size; + } + + return sve; + +error: + return NULL; +} + +static int set_sve(pid_t pid, const struct user_sve_header *sve) +{ + struct iovec iov; + + iov.iov_base = (void *)sve; + iov.iov_len = sve->size; + return ptrace(PTRACE_SETREGSET, pid, NT_ARM_SVE, &iov); +} + +static void dump_sve_regs(const struct user_sve_header *sve, unsigned int num, + unsigned int vlmax) +{ + unsigned int vq; + unsigned int i; + + if ((sve->flags & SVE_PT_REGS_MASK) != SVE_PT_REGS_SVE) + ksft_exit_fail_msg("Dumping non-SVE register\n"); + + if (vlmax > sve->vl) + vlmax = sve->vl; + + vq = sve_vq_from_vl(sve->vl); + for (i = 0; i < num; ++i) { + printf("# z%u:", i); + dump((const char *)sve + SVE_PT_SVE_ZREG_OFFSET(vq, i), + vlmax); + printf("%s\n", vlmax == sve->vl ? "" : " ..."); + } +} + +static int do_parent(pid_t child) +{ + int ret = EXIT_FAILURE; + pid_t pid; + int status; + siginfo_t si; + void *svebuf = NULL, *newsvebuf; + size_t svebufsz = 0, newsvebufsz; + struct user_sve_header *sve, *new_sve; + struct user_fpsimd_state *fpsimd; + unsigned int i, j; + unsigned char *p; + unsigned int vq; + + /* Attach to the child */ + while (1) { + int sig; + + pid = wait(&status); + if (pid == -1) { + perror("wait"); + goto error; + } + + /* + * This should never happen but it's hard to flag in + * the framework. + */ + if (pid != child) + continue; + + if (WIFEXITED(status) || WIFSIGNALED(status)) + ksft_exit_fail_msg("Child died unexpectedly\n"); + + ksft_test_result(WIFSTOPPED(status), "WIFSTOPPED(%d)\n", + status); + if (!WIFSTOPPED(status)) + goto error; + + sig = WSTOPSIG(status); + + if (ptrace(PTRACE_GETSIGINFO, pid, NULL, &si)) { + if (errno == ESRCH) + goto disappeared; + + if (errno == EINVAL) { + sig = 0; /* bust group-stop */ + goto cont; + } + + ksft_test_result_fail("PTRACE_GETSIGINFO: %s\n", + strerror(errno)); + goto error; + } + + if (sig == SIGSTOP && si.si_code == SI_TKILL && + si.si_pid == pid) + break; + + cont: + if (ptrace(PTRACE_CONT, pid, NULL, sig)) { + if (errno == ESRCH) + goto disappeared; + + ksft_test_result_fail("PTRACE_CONT: %s\n", + strerror(errno)); + goto error; + } + } + + sve = get_sve(pid, &svebuf, &svebufsz); + if (!sve) { + int e = errno; + + ksft_test_result_fail("get_sve: %s\n", strerror(errno)); + if (e == ESRCH) + goto disappeared; + + goto error; + } else { + ksft_test_result_pass("get_sve\n"); + } + + ksft_test_result((sve->flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_FPSIMD, + "FPSIMD registers\n"); + if ((sve->flags & SVE_PT_REGS_MASK) != SVE_PT_REGS_FPSIMD) + goto error; + + fpsimd = (struct user_fpsimd_state *)((char *)sve + + SVE_PT_FPSIMD_OFFSET); + for (i = 0; i < 32; ++i) { + p = (unsigned char *)&fpsimd->vregs[i]; + + for (j = 0; j < sizeof fpsimd->vregs[i]; ++j) + p[j] = j; + } + + if (set_sve(pid, sve)) { + int e = errno; + + ksft_test_result_fail("set_sve(FPSIMD): %s\n", + strerror(errno)); + if (e == ESRCH) + goto disappeared; + + goto error; + } + + vq = sve_vq_from_vl(sve->vl); + + newsvebufsz = SVE_PT_SVE_ZREG_OFFSET(vq, 1); + new_sve = newsvebuf = malloc(newsvebufsz); + if (!new_sve) { + errno = ENOMEM; + perror(NULL); + goto error; + } + + *new_sve = *sve; + new_sve->flags &= ~SVE_PT_REGS_MASK; + new_sve->flags |= SVE_PT_REGS_SVE; + memset((char *)new_sve + SVE_PT_SVE_ZREG_OFFSET(vq, 0), + 0, SVE_PT_SVE_ZREG_SIZE(vq)); + new_sve->size = SVE_PT_SVE_ZREG_OFFSET(vq, 1); + if (set_sve(pid, new_sve)) { + int e = errno; + + ksft_test_result_fail("set_sve(ZREG): %s\n", strerror(errno)); + if (e == ESRCH) + goto disappeared; + + goto error; + } + + new_sve = get_sve(pid, &newsvebuf, &newsvebufsz); + if (!new_sve) { + int e = errno; + + ksft_test_result_fail("get_sve(ZREG): %s\n", strerror(errno)); + if (e == ESRCH) + goto disappeared; + + goto error; + } + + ksft_test_result((new_sve->flags & SVE_PT_REGS_MASK) == SVE_PT_REGS_SVE, + "SVE registers\n"); + if ((new_sve->flags & SVE_PT_REGS_MASK) != SVE_PT_REGS_SVE) + goto error; + + dump_sve_regs(new_sve, 3, sizeof fpsimd->vregs[0]); + + p = (unsigned char *)new_sve + SVE_PT_SVE_ZREG_OFFSET(vq, 1); + for (i = 0; i < sizeof fpsimd->vregs[0]; ++i) { + unsigned char expected = i; + + if (__BYTE_ORDER == __BIG_ENDIAN) + expected = sizeof fpsimd->vregs[0] - 1 - expected; + + ksft_test_result(p[i] == expected, "p[%d] == expected\n", i); + if (p[i] != expected) + goto error; + } + + ret = EXIT_SUCCESS; + +error: + kill(child, SIGKILL); + +disappeared: + return ret; +} + +int main(void) +{ + int ret = EXIT_SUCCESS; + __uint128_t v[NR_VREGS]; + pid_t child; + + ksft_print_header(); + ksft_set_plan(20); + + if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) + ksft_exit_skip("SVE not available\n"); + + sve_store_patterns(v); + + if (!check_vregs(v)) + ksft_exit_fail_msg("Initial check_vregs() failed\n"); + + child = fork(); + if (!child) + return do_child(); + + if (do_parent(child)) + ret = EXIT_FAILURE; + + ksft_print_cnts(); + + return 0; +} diff --git a/tools/testing/selftests/arm64/fp/sve-stress b/tools/testing/selftests/arm64/fp/sve-stress new file mode 100755 index 000000000000..24dd0922cc02 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/sve-stress @@ -0,0 +1,59 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-only +# Copyright (C) 2015-2019 ARM Limited. +# Original author: Dave Martin <Dave.Martin@arm.com> + +set -ue + +NR_CPUS=`nproc` + +pids= +logs= + +cleanup () { + trap - INT TERM CHLD + set +e + + if [ -n "$pids" ]; then + kill $pids + wait $pids + pids= + fi + + if [ -n "$logs" ]; then + cat $logs + rm $logs + logs= + fi +} + +interrupt () { + cleanup + exit 0 +} + +child_died () { + cleanup + exit 1 +} + +trap interrupt INT TERM EXIT + +for x in `seq 0 $((NR_CPUS * 4))`; do + log=`mktemp` + logs=$logs\ $log + ./sve-test >$log & + pids=$pids\ $! +done + +# Wait for all child processes to be created: +sleep 10 + +while :; do + kill -USR1 $pids +done & +pids=$pids\ $! + +wait + +exit 1 diff --git a/tools/testing/selftests/arm64/fp/sve-test.S b/tools/testing/selftests/arm64/fp/sve-test.S new file mode 100644 index 000000000000..f95074c9b48b --- /dev/null +++ b/tools/testing/selftests/arm64/fp/sve-test.S @@ -0,0 +1,672 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright (C) 2015-2019 ARM Limited. +// Original author: Dave Martin <Dave.Martin@arm.com> +// +// Simple Scalable Vector Extension context switch test +// Repeatedly writes unique test patterns into each SVE register +// and reads them back to verify integrity. +// +// for x in `seq 1 NR_CPUS`; do sve-test & pids=$pids\ $! ; done +// (leave it running for as long as you want...) +// kill $pids + +#include <asm/unistd.h> +#include "assembler.h" +#include "asm-offsets.h" + +#define NZR 32 +#define NPR 16 +#define MAXVL_B (2048 / 8) + +.arch_extension sve + +.macro _sve_ldr_v zt, xn + ldr z\zt, [x\xn] +.endm + +.macro _sve_str_v zt, xn + str z\zt, [x\xn] +.endm + +.macro _sve_ldr_p pt, xn + ldr p\pt, [x\xn] +.endm + +.macro _sve_str_p pt, xn + str p\pt, [x\xn] +.endm + +// Generate accessor functions to read/write programmatically selected +// SVE registers. +// x0 is the register index to access +// x1 is the memory address to read from (getz,setp) or store to (setz,setp) +// All clobber x0-x2 +define_accessor setz, NZR, _sve_ldr_v +define_accessor getz, NZR, _sve_str_v +define_accessor setp, NPR, _sve_ldr_p +define_accessor getp, NPR, _sve_str_p + +// Print a single character x0 to stdout +// Clobbers x0-x2,x8 +function putc + str x0, [sp, #-16]! + + mov x0, #1 // STDOUT_FILENO + mov x1, sp + mov x2, #1 + mov x8, #__NR_write + svc #0 + + add sp, sp, #16 + ret +endfunction + +// Print a NUL-terminated string starting at address x0 to stdout +// Clobbers x0-x3,x8 +function puts + mov x1, x0 + + mov x2, #0 +0: ldrb w3, [x0], #1 + cbz w3, 1f + add x2, x2, #1 + b 0b + +1: mov w0, #1 // STDOUT_FILENO + mov x8, #__NR_write + svc #0 + + ret +endfunction + +// Utility macro to print a literal string +// Clobbers x0-x4,x8 +.macro puts string + .pushsection .rodata.str1.1, "aMS", 1 +.L__puts_literal\@: .string "\string" + .popsection + + ldr x0, =.L__puts_literal\@ + bl puts +.endm + +// Print an unsigned decimal number x0 to stdout +// Clobbers x0-x4,x8 +function putdec + mov x1, sp + str x30, [sp, #-32]! // Result can't be > 20 digits + + mov x2, #0 + strb w2, [x1, #-1]! // Write the NUL terminator + + mov x2, #10 +0: udiv x3, x0, x2 // div-mod loop to generate the digits + msub x0, x3, x2, x0 + add w0, w0, #'0' + strb w0, [x1, #-1]! + mov x0, x3 + cbnz x3, 0b + + ldrb w0, [x1] + cbnz w0, 1f + mov w0, #'0' // Print "0" for 0, not "" + strb w0, [x1, #-1]! + +1: mov x0, x1 + bl puts + + ldr x30, [sp], #32 + ret +endfunction + +// Print an unsigned decimal number x0 to stdout, followed by a newline +// Clobbers x0-x5,x8 +function putdecn + mov x5, x30 + + bl putdec + mov x0, #'\n' + bl putc + + ret x5 +endfunction + +// Clobbers x0-x3,x8 +function puthexb + str x30, [sp, #-0x10]! + + mov w3, w0 + lsr w0, w0, #4 + bl puthexnibble + mov w0, w3 + + ldr x30, [sp], #0x10 + // fall through to puthexnibble +endfunction +// Clobbers x0-x2,x8 +function puthexnibble + and w0, w0, #0xf + cmp w0, #10 + blo 1f + add w0, w0, #'a' - ('9' + 1) +1: add w0, w0, #'0' + b putc +endfunction + +// x0=data in, x1=size in, clobbers x0-x5,x8 +function dumphex + str x30, [sp, #-0x10]! + + mov x4, x0 + mov x5, x1 + +0: subs x5, x5, #1 + b.lo 1f + ldrb w0, [x4], #1 + bl puthexb + b 0b + +1: ldr x30, [sp], #0x10 + ret +endfunction + +// Declare some storate space to shadow the SVE register contents: +.pushsection .text +.data +.align 4 +zref: + .space MAXVL_B * NZR +pref: + .space MAXVL_B / 8 * NPR +ffrref: + .space MAXVL_B / 8 +scratch: + .space MAXVL_B +.popsection + +// Trivial memory copy: copy x2 bytes, starting at address x1, to address x0. +// Clobbers x0-x3 +function memcpy + cmp x2, #0 + b.eq 1f +0: ldrb w3, [x1], #1 + strb w3, [x0], #1 + subs x2, x2, #1 + b.ne 0b +1: ret +endfunction + +// Generate a test pattern for storage in SVE registers +// x0: pid (16 bits) +// x1: register number (6 bits) +// x2: generation (4 bits) + +// These values are used to constuct a 32-bit pattern that is repeated in the +// scratch buffer as many times as will fit: +// bits 31:28 generation number (increments once per test_loop) +// bits 27:22 32-bit lane index +// bits 21:16 register number +// bits 15: 0 pid + +function pattern + orr w1, w0, w1, lsl #16 + orr w2, w1, w2, lsl #28 + + ldr x0, =scratch + mov w1, #MAXVL_B / 4 + +0: str w2, [x0], #4 + add w2, w2, #(1 << 22) + subs w1, w1, #1 + bne 0b + + ret +endfunction + +// Get the address of shadow data for SVE Z-register Z<xn> +.macro _adrz xd, xn, nrtmp + ldr \xd, =zref + rdvl x\nrtmp, #1 + madd \xd, x\nrtmp, \xn, \xd +.endm + +// Get the address of shadow data for SVE P-register P<xn - NZR> +.macro _adrp xd, xn, nrtmp + ldr \xd, =pref + rdvl x\nrtmp, #1 + lsr x\nrtmp, x\nrtmp, #3 + sub \xn, \xn, #NZR + madd \xd, x\nrtmp, \xn, \xd +.endm + +// Set up test pattern in a SVE Z-register +// x0: pid +// x1: register number +// x2: generation +function setup_zreg + mov x4, x30 + + mov x6, x1 + bl pattern + _adrz x0, x6, 2 + mov x5, x0 + ldr x1, =scratch + bl memcpy + + mov x0, x6 + mov x1, x5 + bl setz + + ret x4 +endfunction + +// Set up test pattern in a SVE P-register +// x0: pid +// x1: register number +// x2: generation +function setup_preg + mov x4, x30 + + mov x6, x1 + bl pattern + _adrp x0, x6, 2 + mov x5, x0 + ldr x1, =scratch + bl memcpy + + mov x0, x6 + mov x1, x5 + bl setp + + ret x4 +endfunction + +// Set up test pattern in the FFR +// x0: pid +// x2: generation +// Beware: corrupts P0. +function setup_ffr + mov x4, x30 + + bl pattern + ldr x0, =ffrref + ldr x1, =scratch + rdvl x2, #1 + lsr x2, x2, #3 + bl memcpy + + mov x0, #0 + ldr x1, =ffrref + bl setp + + wrffr p0.b + + ret x4 +endfunction + +// Fill x1 bytes starting at x0 with 0xae (for canary purposes) +// Clobbers x1, x2. +function memfill_ae + mov w2, #0xae + b memfill +endfunction + +// Fill x1 bytes starting at x0 with 0. +// Clobbers x1, x2. +function memclr + mov w2, #0 +endfunction + // fall through to memfill + +// Trivial memory fill: fill x1 bytes starting at address x0 with byte w2 +// Clobbers x1 +function memfill + cmp x1, #0 + b.eq 1f + +0: strb w2, [x0], #1 + subs x1, x1, #1 + b.ne 0b + +1: ret +endfunction + +// Trivial memory compare: compare x2 bytes starting at address x0 with +// bytes starting at address x1. +// Returns only if all bytes match; otherwise, the program is aborted. +// Clobbers x0-x5. +function memcmp + cbz x2, 2f + + stp x0, x1, [sp, #-0x20]! + str x2, [sp, #0x10] + + mov x5, #0 +0: ldrb w3, [x0, x5] + ldrb w4, [x1, x5] + add x5, x5, #1 + cmp w3, w4 + b.ne 1f + subs x2, x2, #1 + b.ne 0b + +1: ldr x2, [sp, #0x10] + ldp x0, x1, [sp], #0x20 + b.ne barf + +2: ret +endfunction + +// Verify that a SVE Z-register matches its shadow in memory, else abort +// x0: reg number +// Clobbers x0-x7. +function check_zreg + mov x3, x30 + + _adrz x5, x0, 6 + mov x4, x0 + ldr x7, =scratch + + mov x0, x7 + mov x1, x6 + bl memfill_ae + + mov x0, x4 + mov x1, x7 + bl getz + + mov x0, x5 + mov x1, x7 + mov x2, x6 + mov x30, x3 + b memcmp +endfunction + +// Verify that a SVE P-register matches its shadow in memory, else abort +// x0: reg number +// Clobbers x0-x7. +function check_preg + mov x3, x30 + + _adrp x5, x0, 6 + mov x4, x0 + ldr x7, =scratch + + mov x0, x7 + mov x1, x6 + bl memfill_ae + + mov x0, x4 + mov x1, x7 + bl getp + + mov x0, x5 + mov x1, x7 + mov x2, x6 + mov x30, x3 + b memcmp +endfunction + +// Verify that the FFR matches its shadow in memory, else abort +// Beware -- corrupts P0. +// Clobbers x0-x5. +function check_ffr + mov x3, x30 + + ldr x4, =scratch + rdvl x5, #1 + lsr x5, x5, #3 + + mov x0, x4 + mov x1, x5 + bl memfill_ae + + rdffr p0.b + mov x0, #0 + mov x1, x4 + bl getp + + ldr x0, =ffrref + mov x1, x4 + mov x2, x5 + mov x30, x3 + b memcmp +endfunction + +// Any SVE register modified here can cause corruption in the main +// thread -- but *only* the registers modified here. +function irritator_handler + // Increment the irritation signal count (x23): + ldr x0, [x2, #ucontext_regs + 8 * 23] + add x0, x0, #1 + str x0, [x2, #ucontext_regs + 8 * 23] + + // Corrupt some random Z-regs + adr x0, .text + (irritator_handler - .text) / 16 * 16 + movi v0.8b, #1 + movi v9.16b, #2 + movi v31.8b, #3 + // And P0 + rdffr p0.b + // And FFR + wrffr p15.b + + ret +endfunction + +function terminate_handler + mov w21, w0 + mov x20, x2 + + puts "Terminated by signal " + mov w0, w21 + bl putdec + puts ", no error, iterations=" + ldr x0, [x20, #ucontext_regs + 8 * 22] + bl putdec + puts ", signals=" + ldr x0, [x20, #ucontext_regs + 8 * 23] + bl putdecn + + mov x0, #0 + mov x8, #__NR_exit + svc #0 +endfunction + +// w0: signal number +// x1: sa_action +// w2: sa_flags +// Clobbers x0-x6,x8 +function setsignal + str x30, [sp, #-((sa_sz + 15) / 16 * 16 + 16)]! + + mov w4, w0 + mov x5, x1 + mov w6, w2 + + add x0, sp, #16 + mov x1, #sa_sz + bl memclr + + mov w0, w4 + add x1, sp, #16 + str w6, [x1, #sa_flags] + str x5, [x1, #sa_handler] + mov x2, #0 + mov x3, #sa_mask_sz + mov x8, #__NR_rt_sigaction + svc #0 + + cbz w0, 1f + + puts "sigaction failure\n" + b .Labort + +1: ldr x30, [sp], #((sa_sz + 15) / 16 * 16 + 16) + ret +endfunction + +// Main program entry point +.globl _start +function _start +_start: + // Sanity-check and report the vector length + + rdvl x19, #8 + cmp x19, #128 + b.lo 1f + cmp x19, #2048 + b.hi 1f + tst x19, #(8 - 1) + b.eq 2f + +1: puts "Bad vector length: " + mov x0, x19 + bl putdecn + b .Labort + +2: puts "Vector length:\t" + mov x0, x19 + bl putdec + puts " bits\n" + + // Obtain our PID, to ensure test pattern uniqueness between processes + + mov x8, #__NR_getpid + svc #0 + mov x20, x0 + + puts "PID:\t" + mov x0, x20 + bl putdecn + + mov x23, #0 // Irritation signal count + + mov w0, #SIGINT + adr x1, terminate_handler + mov w2, #SA_SIGINFO + bl setsignal + + mov w0, #SIGTERM + adr x1, terminate_handler + mov w2, #SA_SIGINFO + bl setsignal + + mov w0, #SIGUSR1 + adr x1, irritator_handler + mov w2, #SA_SIGINFO + orr w2, w2, #SA_NODEFER + bl setsignal + + mov x22, #0 // generation number, increments per iteration +.Ltest_loop: + rdvl x0, #8 + cmp x0, x19 + b.ne vl_barf + + mov x21, #0 // Set up Z-regs & shadow with test pattern +0: mov x0, x20 + mov x1, x21 + and x2, x22, #0xf + bl setup_zreg + add x21, x21, #1 + cmp x21, #NZR + b.lo 0b + + mov x0, x20 // Set up FFR & shadow with test pattern + mov x1, #NZR + NPR + and x2, x22, #0xf + bl setup_ffr + +0: mov x0, x20 // Set up P-regs & shadow with test pattern + mov x1, x21 + and x2, x22, #0xf + bl setup_preg + add x21, x21, #1 + cmp x21, #NZR + NPR + b.lo 0b + +// Can't do this when SVE state is volatile across SVC: +// mov x8, #__NR_sched_yield // Encourage preemption +// svc #0 + + mov x21, #0 +0: mov x0, x21 + bl check_zreg + add x21, x21, #1 + cmp x21, #NZR + b.lo 0b + +0: mov x0, x21 + bl check_preg + add x21, x21, #1 + cmp x21, #NZR + NPR + b.lo 0b + + bl check_ffr + + add x22, x22, #1 + b .Ltest_loop + +.Labort: + mov x0, #0 + mov x1, #SIGABRT + mov x8, #__NR_kill + svc #0 +endfunction + +function barf +// fpsimd.c acitivty log dump hack +// ldr w0, =0xdeadc0de +// mov w8, #__NR_exit +// svc #0 +// end hack + mov x10, x0 // expected data + mov x11, x1 // actual data + mov x12, x2 // data size + + puts "Mistatch: PID=" + mov x0, x20 + bl putdec + puts ", iteration=" + mov x0, x22 + bl putdec + puts ", reg=" + mov x0, x21 + bl putdecn + puts "\tExpected [" + mov x0, x10 + mov x1, x12 + bl dumphex + puts "]\n\tGot [" + mov x0, x11 + mov x1, x12 + bl dumphex + puts "]\n" + + mov x8, #__NR_getpid + svc #0 +// fpsimd.c acitivty log dump hack +// ldr w0, =0xdeadc0de +// mov w8, #__NR_exit +// svc #0 +// ^ end of hack + mov x1, #SIGABRT + mov x8, #__NR_kill + svc #0 +// mov x8, #__NR_exit +// mov x1, #1 +// svc #0 +endfunction + +function vl_barf + mov x10, x0 + + puts "Bad active VL: " + mov x0, x10 + bl putdecn + + mov x8, #__NR_exit + mov x1, #1 + svc #0 +endfunction diff --git a/tools/testing/selftests/arm64/fp/vlset.c b/tools/testing/selftests/arm64/fp/vlset.c new file mode 100644 index 000000000000..308d27a68226 --- /dev/null +++ b/tools/testing/selftests/arm64/fp/vlset.c @@ -0,0 +1,155 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright (C) 2015-2019 ARM Limited. + * Original author: Dave Martin <Dave.Martin@arm.com> + */ +#define _GNU_SOURCE +#include <assert.h> +#include <errno.h> +#include <limits.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <getopt.h> +#include <unistd.h> +#include <sys/auxv.h> +#include <sys/prctl.h> +#include <asm/hwcap.h> +#include <asm/sigcontext.h> + +static int inherit = 0; +static int no_inherit = 0; +static int force = 0; +static unsigned long vl; + +static const struct option options[] = { + { "force", no_argument, NULL, 'f' }, + { "inherit", no_argument, NULL, 'i' }, + { "max", no_argument, NULL, 'M' }, + { "no-inherit", no_argument, &no_inherit, 1 }, + { "help", no_argument, NULL, '?' }, + {} +}; + +static char const *program_name; + +static int parse_options(int argc, char **argv) +{ + int c; + char *rest; + + program_name = strrchr(argv[0], '/'); + if (program_name) + ++program_name; + else + program_name = argv[0]; + + while ((c = getopt_long(argc, argv, "Mfhi", options, NULL)) != -1) + switch (c) { + case 'M': vl = SVE_VL_MAX; break; + case 'f': force = 1; break; + case 'i': inherit = 1; break; + case 0: break; + default: goto error; + } + + if (inherit && no_inherit) + goto error; + + if (!vl) { + /* vector length */ + if (optind >= argc) + goto error; + + errno = 0; + vl = strtoul(argv[optind], &rest, 0); + if (*rest) { + vl = ULONG_MAX; + errno = EINVAL; + } + if (vl == ULONG_MAX && errno) { + fprintf(stderr, "%s: %s: %s\n", + program_name, argv[optind], strerror(errno)); + goto error; + } + + ++optind; + } + + /* command */ + if (optind >= argc) + goto error; + + return 0; + +error: + fprintf(stderr, + "Usage: %s [-f | --force] " + "[-i | --inherit | --no-inherit] " + "{-M | --max | <vector length>} " + "<command> [<arguments> ...]\n", + program_name); + return -1; +} + +int main(int argc, char **argv) +{ + int ret = 126; /* same as sh(1) command-not-executable error */ + long flags; + char *path; + int t, e; + + if (parse_options(argc, argv)) + return 2; /* same as sh(1) builtin incorrect-usage */ + + if (vl & ~(vl & PR_SVE_VL_LEN_MASK)) { + fprintf(stderr, "%s: Invalid vector length %lu\n", + program_name, vl); + return 2; /* same as sh(1) builtin incorrect-usage */ + } + + if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) { + fprintf(stderr, "%s: Scalable Vector Extension not present\n", + program_name); + + if (!force) + goto error; + + fputs("Going ahead anyway (--force): " + "This is a debug option. Don't rely on it.\n", + stderr); + } + + flags = PR_SVE_SET_VL_ONEXEC; + if (inherit) + flags |= PR_SVE_VL_INHERIT; + + t = prctl(PR_SVE_SET_VL, vl | flags); + if (t < 0) { + fprintf(stderr, "%s: PR_SVE_SET_VL: %s\n", + program_name, strerror(errno)); + goto error; + } + + t = prctl(PR_SVE_GET_VL); + if (t == -1) { + fprintf(stderr, "%s: PR_SVE_GET_VL: %s\n", + program_name, strerror(errno)); + goto error; + } + flags = PR_SVE_VL_LEN_MASK; + flags = t & ~flags; + + assert(optind < argc); + path = argv[optind]; + + execvp(path, &argv[optind]); + e = errno; + if (errno == ENOENT) + ret = 127; /* same as sh(1) not-found error */ + fprintf(stderr, "%s: %s: %s\n", program_name, path, strerror(e)); + +error: + return ret; /* same as sh(1) not-executable error */ +} diff --git a/tools/testing/selftests/arm64/mte/.gitignore b/tools/testing/selftests/arm64/mte/.gitignore new file mode 100644 index 000000000000..bc3ac63f3314 --- /dev/null +++ b/tools/testing/selftests/arm64/mte/.gitignore @@ -0,0 +1,6 @@ +check_buffer_fill +check_tags_inclusion +check_child_memory +check_mmap_options +check_ksm_options +check_user_mem diff --git a/tools/testing/selftests/arm64/mte/Makefile b/tools/testing/selftests/arm64/mte/Makefile new file mode 100644 index 000000000000..2480226dfe57 --- /dev/null +++ b/tools/testing/selftests/arm64/mte/Makefile @@ -0,0 +1,29 @@ +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2020 ARM Limited + +CFLAGS += -std=gnu99 -I. +SRCS := $(filter-out mte_common_util.c,$(wildcard *.c)) +PROGS := $(patsubst %.c,%,$(SRCS)) + +#Add mte compiler option +ifneq ($(shell $(CC) --version 2>&1 | head -n 1 | grep gcc),) +CFLAGS += -march=armv8.5-a+memtag +endif + +#check if the compiler works well +mte_cc_support := $(shell if ($(CC) $(CFLAGS) -E -x c /dev/null -o /dev/null 2>&1) then echo "1"; fi) + +ifeq ($(mte_cc_support),1) +# Generated binaries to be installed by top KSFT script +TEST_GEN_PROGS := $(PROGS) + +# Get Kernel headers installed and use them. +KSFT_KHDR_INSTALL := 1 +endif + +# Include KSFT lib.mk. +include ../../lib.mk + +ifeq ($(mte_cc_support),1) +$(TEST_GEN_PROGS): mte_common_util.c mte_common_util.h mte_helper.S +endif diff --git a/tools/testing/selftests/arm64/mte/check_buffer_fill.c b/tools/testing/selftests/arm64/mte/check_buffer_fill.c new file mode 100644 index 000000000000..242635d79035 --- /dev/null +++ b/tools/testing/selftests/arm64/mte/check_buffer_fill.c @@ -0,0 +1,475 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2020 ARM Limited + +#define _GNU_SOURCE + +#include <stddef.h> +#include <stdio.h> +#include <string.h> + +#include "kselftest.h" +#include "mte_common_util.h" +#include "mte_def.h" + +#define OVERFLOW_RANGE MT_GRANULE_SIZE + +static int sizes[] = { + 1, 555, 1033, MT_GRANULE_SIZE - 1, MT_GRANULE_SIZE, + /* page size - 1*/ 0, /* page_size */ 0, /* page size + 1 */ 0 +}; + +enum mte_block_test_alloc { + UNTAGGED_TAGGED, + TAGGED_UNTAGGED, + TAGGED_TAGGED, + BLOCK_ALLOC_MAX, +}; + +static int check_buffer_by_byte(int mem_type, int mode) +{ + char *ptr; + int i, j, item; + bool err; + + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + item = sizeof(sizes)/sizeof(int); + + for (i = 0; i < item; i++) { + ptr = (char *)mte_allocate_memory(sizes[i], mem_type, 0, true); + if (check_allocated_memory(ptr, sizes[i], mem_type, true) != KSFT_PASS) + return KSFT_FAIL; + mte_initialize_current_context(mode, (uintptr_t)ptr, sizes[i]); + /* Set some value in tagged memory */ + for (j = 0; j < sizes[i]; j++) + ptr[j] = '1'; + mte_wait_after_trig(); + err = cur_mte_cxt.fault_valid; + /* Check the buffer whether it is filled. */ + for (j = 0; j < sizes[i] && !err; j++) { + if (ptr[j] != '1') + err = true; + } + mte_free_memory((void *)ptr, sizes[i], mem_type, true); + + if (err) + break; + } + if (!err) + return KSFT_PASS; + else + return KSFT_FAIL; +} + +static int check_buffer_underflow_by_byte(int mem_type, int mode, + int underflow_range) +{ + char *ptr; + int i, j, item, last_index; + bool err; + char *und_ptr = NULL; + + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + item = sizeof(sizes)/sizeof(int); + for (i = 0; i < item; i++) { + ptr = (char *)mte_allocate_memory_tag_range(sizes[i], mem_type, 0, + underflow_range, 0); + if (check_allocated_memory_range(ptr, sizes[i], mem_type, + underflow_range, 0) != KSFT_PASS) + return KSFT_FAIL; + + mte_initialize_current_context(mode, (uintptr_t)ptr, -underflow_range); + last_index = 0; + /* Set some value in tagged memory and make the buffer underflow */ + for (j = sizes[i] - 1; (j >= -underflow_range) && + (cur_mte_cxt.fault_valid == false); j--) { + ptr[j] = '1'; + last_index = j; + } + mte_wait_after_trig(); + err = false; + /* Check whether the buffer is filled */ + for (j = 0; j < sizes[i]; j++) { + if (ptr[j] != '1') { + err = true; + ksft_print_msg("Buffer is not filled at index:%d of ptr:0x%lx\n", + j, ptr); + break; + } + } + if (err) + goto check_buffer_underflow_by_byte_err; + + switch (mode) { + case MTE_NONE_ERR: + if (cur_mte_cxt.fault_valid == true || last_index != -underflow_range) { + err = true; + break; + } + /* There were no fault so the underflow area should be filled */ + und_ptr = (char *) MT_CLEAR_TAG((size_t) ptr - underflow_range); + for (j = 0 ; j < underflow_range; j++) { + if (und_ptr[j] != '1') { + err = true; + break; + } + } + break; + case MTE_ASYNC_ERR: + /* Imprecise fault should occur otherwise return error */ + if (cur_mte_cxt.fault_valid == false) { + err = true; + break; + } + /* + * The imprecise fault is checked after the write to the buffer, + * so the underflow area before the fault should be filled. + */ + und_ptr = (char *) MT_CLEAR_TAG((size_t) ptr); + for (j = last_index ; j < 0 ; j++) { + if (und_ptr[j] != '1') { + err = true; + break; + } + } + break; + case MTE_SYNC_ERR: + /* Precise fault should occur otherwise return error */ + if (!cur_mte_cxt.fault_valid || (last_index != (-1))) { + err = true; + break; + } + /* Underflow area should not be filled */ + und_ptr = (char *) MT_CLEAR_TAG((size_t) ptr); + if (und_ptr[-1] == '1') + err = true; + break; + default: + err = true; + break; + } +check_buffer_underflow_by_byte_err: + mte_free_memory_tag_range((void *)ptr, sizes[i], mem_type, underflow_range, 0); + if (err) + break; + } + return (err ? KSFT_FAIL : KSFT_PASS); +} + +static int check_buffer_overflow_by_byte(int mem_type, int mode, + int overflow_range) +{ + char *ptr; + int i, j, item, last_index; + bool err; + size_t tagged_size, overflow_size; + char *over_ptr = NULL; + + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + item = sizeof(sizes)/sizeof(int); + for (i = 0; i < item; i++) { + ptr = (char *)mte_allocate_memory_tag_range(sizes[i], mem_type, 0, + 0, overflow_range); + if (check_allocated_memory_range(ptr, sizes[i], mem_type, + 0, overflow_range) != KSFT_PASS) + return KSFT_FAIL; + + tagged_size = MT_ALIGN_UP(sizes[i]); + + mte_initialize_current_context(mode, (uintptr_t)ptr, sizes[i] + overflow_range); + + /* Set some value in tagged memory and make the buffer underflow */ + for (j = 0, last_index = 0 ; (j < (sizes[i] + overflow_range)) && + (cur_mte_cxt.fault_valid == false); j++) { + ptr[j] = '1'; + last_index = j; + } + mte_wait_after_trig(); + err = false; + /* Check whether the buffer is filled */ + for (j = 0; j < sizes[i]; j++) { + if (ptr[j] != '1') { + err = true; + ksft_print_msg("Buffer is not filled at index:%d of ptr:0x%lx\n", + j, ptr); + break; + } + } + if (err) + goto check_buffer_overflow_by_byte_err; + + overflow_size = overflow_range - (tagged_size - sizes[i]); + + switch (mode) { + case MTE_NONE_ERR: + if ((cur_mte_cxt.fault_valid == true) || + (last_index != (sizes[i] + overflow_range - 1))) { + err = true; + break; + } + /* There were no fault so the overflow area should be filled */ + over_ptr = (char *) MT_CLEAR_TAG((size_t) ptr + tagged_size); + for (j = 0 ; j < overflow_size; j++) { + if (over_ptr[j] != '1') { + err = true; + break; + } + } + break; + case MTE_ASYNC_ERR: + /* Imprecise fault should occur otherwise return error */ + if (cur_mte_cxt.fault_valid == false) { + err = true; + break; + } + /* + * The imprecise fault is checked after the write to the buffer, + * so the overflow area should be filled before the fault. + */ + over_ptr = (char *) MT_CLEAR_TAG((size_t) ptr); + for (j = tagged_size ; j < last_index; j++) { + if (over_ptr[j] != '1') { + err = true; + break; + } + } + break; + case MTE_SYNC_ERR: + /* Precise fault should occur otherwise return error */ + if (!cur_mte_cxt.fault_valid || (last_index != tagged_size)) { + err = true; + break; + } + /* Underflow area should not be filled */ + over_ptr = (char *) MT_CLEAR_TAG((size_t) ptr + tagged_size); + for (j = 0 ; j < overflow_size; j++) { + if (over_ptr[j] == '1') + err = true; + } + break; + default: + err = true; + break; + } +check_buffer_overflow_by_byte_err: + mte_free_memory_tag_range((void *)ptr, sizes[i], mem_type, 0, overflow_range); + if (err) + break; + } + return (err ? KSFT_FAIL : KSFT_PASS); +} + +static int check_buffer_by_block_iterate(int mem_type, int mode, size_t size) +{ + char *src, *dst; + int j, result = KSFT_PASS; + enum mte_block_test_alloc alloc_type = UNTAGGED_TAGGED; + + for (alloc_type = UNTAGGED_TAGGED; alloc_type < (int) BLOCK_ALLOC_MAX; alloc_type++) { + switch (alloc_type) { + case UNTAGGED_TAGGED: + src = (char *)mte_allocate_memory(size, mem_type, 0, false); + if (check_allocated_memory(src, size, mem_type, false) != KSFT_PASS) + return KSFT_FAIL; + + dst = (char *)mte_allocate_memory(size, mem_type, 0, true); + if (check_allocated_memory(dst, size, mem_type, true) != KSFT_PASS) { + mte_free_memory((void *)src, size, mem_type, false); + return KSFT_FAIL; + } + + break; + case TAGGED_UNTAGGED: + dst = (char *)mte_allocate_memory(size, mem_type, 0, false); + if (check_allocated_memory(dst, size, mem_type, false) != KSFT_PASS) + return KSFT_FAIL; + + src = (char *)mte_allocate_memory(size, mem_type, 0, true); + if (check_allocated_memory(src, size, mem_type, true) != KSFT_PASS) { + mte_free_memory((void *)dst, size, mem_type, false); + return KSFT_FAIL; + } + break; + case TAGGED_TAGGED: + src = (char *)mte_allocate_memory(size, mem_type, 0, true); + if (check_allocated_memory(src, size, mem_type, true) != KSFT_PASS) + return KSFT_FAIL; + + dst = (char *)mte_allocate_memory(size, mem_type, 0, true); + if (check_allocated_memory(dst, size, mem_type, true) != KSFT_PASS) { + mte_free_memory((void *)src, size, mem_type, true); + return KSFT_FAIL; + } + break; + default: + return KSFT_FAIL; + } + + cur_mte_cxt.fault_valid = false; + result = KSFT_PASS; + mte_initialize_current_context(mode, (uintptr_t)dst, size); + /* Set some value in memory and copy*/ + memset((void *)src, (int)'1', size); + memcpy((void *)dst, (void *)src, size); + mte_wait_after_trig(); + if (cur_mte_cxt.fault_valid) { + result = KSFT_FAIL; + goto check_buffer_by_block_err; + } + /* Check the buffer whether it is filled. */ + for (j = 0; j < size; j++) { + if (src[j] != dst[j] || src[j] != '1') { + result = KSFT_FAIL; + break; + } + } +check_buffer_by_block_err: + mte_free_memory((void *)src, size, mem_type, + MT_FETCH_TAG((uintptr_t)src) ? true : false); + mte_free_memory((void *)dst, size, mem_type, + MT_FETCH_TAG((uintptr_t)dst) ? true : false); + if (result != KSFT_PASS) + return result; + } + return result; +} + +static int check_buffer_by_block(int mem_type, int mode) +{ + int i, item, result = KSFT_PASS; + + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + item = sizeof(sizes)/sizeof(int); + cur_mte_cxt.fault_valid = false; + for (i = 0; i < item; i++) { + result = check_buffer_by_block_iterate(mem_type, mode, sizes[i]); + if (result != KSFT_PASS) + break; + } + return result; +} + +static int compare_memory_tags(char *ptr, size_t size, int tag) +{ + int i, new_tag; + + for (i = 0 ; i < size ; i += MT_GRANULE_SIZE) { + new_tag = MT_FETCH_TAG((uintptr_t)(mte_get_tag_address(ptr + i))); + if (tag != new_tag) { + ksft_print_msg("FAIL: child mte tag mismatch\n"); + return KSFT_FAIL; + } + } + return KSFT_PASS; +} + +static int check_memory_initial_tags(int mem_type, int mode, int mapping) +{ + char *ptr; + int run, fd; + int total = sizeof(sizes)/sizeof(int); + + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + for (run = 0; run < total; run++) { + /* check initial tags for anonymous mmap */ + ptr = (char *)mte_allocate_memory(sizes[run], mem_type, mapping, false); + if (check_allocated_memory(ptr, sizes[run], mem_type, false) != KSFT_PASS) + return KSFT_FAIL; + if (compare_memory_tags(ptr, sizes[run], 0) != KSFT_PASS) { + mte_free_memory((void *)ptr, sizes[run], mem_type, false); + return KSFT_FAIL; + } + mte_free_memory((void *)ptr, sizes[run], mem_type, false); + + /* check initial tags for file mmap */ + fd = create_temp_file(); + if (fd == -1) + return KSFT_FAIL; + ptr = (char *)mte_allocate_file_memory(sizes[run], mem_type, mapping, false, fd); + if (check_allocated_memory(ptr, sizes[run], mem_type, false) != KSFT_PASS) { + close(fd); + return KSFT_FAIL; + } + if (compare_memory_tags(ptr, sizes[run], 0) != KSFT_PASS) { + mte_free_memory((void *)ptr, sizes[run], mem_type, false); + close(fd); + return KSFT_FAIL; + } + mte_free_memory((void *)ptr, sizes[run], mem_type, false); + close(fd); + } + return KSFT_PASS; +} + +int main(int argc, char *argv[]) +{ + int err; + size_t page_size = getpagesize(); + int item = sizeof(sizes)/sizeof(int); + + sizes[item - 3] = page_size - 1; + sizes[item - 2] = page_size; + sizes[item - 1] = page_size + 1; + + err = mte_default_setup(); + if (err) + return err; + + /* Register SIGSEGV handler */ + mte_register_signal(SIGSEGV, mte_default_handler); + + /* Buffer by byte tests */ + evaluate_test(check_buffer_by_byte(USE_MMAP, MTE_SYNC_ERR), + "Check buffer correctness by byte with sync err mode and mmap memory\n"); + evaluate_test(check_buffer_by_byte(USE_MMAP, MTE_ASYNC_ERR), + "Check buffer correctness by byte with async err mode and mmap memory\n"); + evaluate_test(check_buffer_by_byte(USE_MPROTECT, MTE_SYNC_ERR), + "Check buffer correctness by byte with sync err mode and mmap/mprotect memory\n"); + evaluate_test(check_buffer_by_byte(USE_MPROTECT, MTE_ASYNC_ERR), + "Check buffer correctness by byte with async err mode and mmap/mprotect memory\n"); + + /* Check buffer underflow with underflow size as 16 */ + evaluate_test(check_buffer_underflow_by_byte(USE_MMAP, MTE_SYNC_ERR, MT_GRANULE_SIZE), + "Check buffer write underflow by byte with sync mode and mmap memory\n"); + evaluate_test(check_buffer_underflow_by_byte(USE_MMAP, MTE_ASYNC_ERR, MT_GRANULE_SIZE), + "Check buffer write underflow by byte with async mode and mmap memory\n"); + evaluate_test(check_buffer_underflow_by_byte(USE_MMAP, MTE_NONE_ERR, MT_GRANULE_SIZE), + "Check buffer write underflow by byte with tag check fault ignore and mmap memory\n"); + + /* Check buffer underflow with underflow size as page size */ + evaluate_test(check_buffer_underflow_by_byte(USE_MMAP, MTE_SYNC_ERR, page_size), + "Check buffer write underflow by byte with sync mode and mmap memory\n"); + evaluate_test(check_buffer_underflow_by_byte(USE_MMAP, MTE_ASYNC_ERR, page_size), + "Check buffer write underflow by byte with async mode and mmap memory\n"); + evaluate_test(check_buffer_underflow_by_byte(USE_MMAP, MTE_NONE_ERR, page_size), + "Check buffer write underflow by byte with tag check fault ignore and mmap memory\n"); + + /* Check buffer overflow with overflow size as 16 */ + evaluate_test(check_buffer_overflow_by_byte(USE_MMAP, MTE_SYNC_ERR, MT_GRANULE_SIZE), + "Check buffer write overflow by byte with sync mode and mmap memory\n"); + evaluate_test(check_buffer_overflow_by_byte(USE_MMAP, MTE_ASYNC_ERR, MT_GRANULE_SIZE), + "Check buffer write overflow by byte with async mode and mmap memory\n"); + evaluate_test(check_buffer_overflow_by_byte(USE_MMAP, MTE_NONE_ERR, MT_GRANULE_SIZE), + "Check buffer write overflow by byte with tag fault ignore mode and mmap memory\n"); + + /* Buffer by block tests */ + evaluate_test(check_buffer_by_block(USE_MMAP, MTE_SYNC_ERR), + "Check buffer write correctness by block with sync mode and mmap memory\n"); + evaluate_test(check_buffer_by_block(USE_MMAP, MTE_ASYNC_ERR), + "Check buffer write correctness by block with async mode and mmap memory\n"); + evaluate_test(check_buffer_by_block(USE_MMAP, MTE_NONE_ERR), + "Check buffer write correctness by block with tag fault ignore and mmap memory\n"); + + /* Initial tags are supposed to be 0 */ + evaluate_test(check_memory_initial_tags(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE), + "Check initial tags with private mapping, sync error mode and mmap memory\n"); + evaluate_test(check_memory_initial_tags(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE), + "Check initial tags with private mapping, sync error mode and mmap/mprotect memory\n"); + evaluate_test(check_memory_initial_tags(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED), + "Check initial tags with shared mapping, sync error mode and mmap memory\n"); + evaluate_test(check_memory_initial_tags(USE_MPROTECT, MTE_SYNC_ERR, MAP_SHARED), + "Check initial tags with shared mapping, sync error mode and mmap/mprotect memory\n"); + + mte_restore_setup(); + ksft_print_cnts(); + return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL; +} diff --git a/tools/testing/selftests/arm64/mte/check_child_memory.c b/tools/testing/selftests/arm64/mte/check_child_memory.c new file mode 100644 index 000000000000..97bebdecd29e --- /dev/null +++ b/tools/testing/selftests/arm64/mte/check_child_memory.c @@ -0,0 +1,195 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2020 ARM Limited + +#define _GNU_SOURCE + +#include <errno.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <ucontext.h> +#include <sys/wait.h> + +#include "kselftest.h" +#include "mte_common_util.h" +#include "mte_def.h" + +#define BUFFER_SIZE (5 * MT_GRANULE_SIZE) +#define RUNS (MT_TAG_COUNT) +#define UNDERFLOW MT_GRANULE_SIZE +#define OVERFLOW MT_GRANULE_SIZE + +static size_t page_size; +static int sizes[] = { + 1, 537, 989, 1269, MT_GRANULE_SIZE - 1, MT_GRANULE_SIZE, + /* page size - 1*/ 0, /* page_size */ 0, /* page size + 1 */ 0 +}; + +static int check_child_tag_inheritance(char *ptr, int size, int mode) +{ + int i, parent_tag, child_tag, fault, child_status; + pid_t child; + + parent_tag = MT_FETCH_TAG((uintptr_t)ptr); + fault = 0; + + child = fork(); + if (child == -1) { + ksft_print_msg("FAIL: child process creation\n"); + return KSFT_FAIL; + } else if (child == 0) { + mte_initialize_current_context(mode, (uintptr_t)ptr, size); + /* Do copy on write */ + memset(ptr, '1', size); + mte_wait_after_trig(); + if (cur_mte_cxt.fault_valid == true) { + fault = 1; + goto check_child_tag_inheritance_err; + } + for (i = 0 ; i < size ; i += MT_GRANULE_SIZE) { + child_tag = MT_FETCH_TAG((uintptr_t)(mte_get_tag_address(ptr + i))); + if (parent_tag != child_tag) { + ksft_print_msg("FAIL: child mte tag mismatch\n"); + fault = 1; + goto check_child_tag_inheritance_err; + } + } + mte_initialize_current_context(mode, (uintptr_t)ptr, -UNDERFLOW); + memset(ptr - UNDERFLOW, '2', UNDERFLOW); + mte_wait_after_trig(); + if (cur_mte_cxt.fault_valid == false) { + fault = 1; + goto check_child_tag_inheritance_err; + } + mte_initialize_current_context(mode, (uintptr_t)ptr, size + OVERFLOW); + memset(ptr + size, '3', OVERFLOW); + mte_wait_after_trig(); + if (cur_mte_cxt.fault_valid == false) { + fault = 1; + goto check_child_tag_inheritance_err; + } +check_child_tag_inheritance_err: + _exit(fault); + } + /* Wait for child process to terminate */ + wait(&child_status); + if (WIFEXITED(child_status)) + fault = WEXITSTATUS(child_status); + else + fault = 1; + return (fault) ? KSFT_FAIL : KSFT_PASS; +} + +static int check_child_memory_mapping(int mem_type, int mode, int mapping) +{ + char *ptr; + int run, result; + int item = sizeof(sizes)/sizeof(int); + + item = sizeof(sizes)/sizeof(int); + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + for (run = 0; run < item; run++) { + ptr = (char *)mte_allocate_memory_tag_range(sizes[run], mem_type, mapping, + UNDERFLOW, OVERFLOW); + if (check_allocated_memory_range(ptr, sizes[run], mem_type, + UNDERFLOW, OVERFLOW) != KSFT_PASS) + return KSFT_FAIL; + result = check_child_tag_inheritance(ptr, sizes[run], mode); + mte_free_memory_tag_range((void *)ptr, sizes[run], mem_type, UNDERFLOW, OVERFLOW); + if (result == KSFT_FAIL) + return result; + } + return KSFT_PASS; +} + +static int check_child_file_mapping(int mem_type, int mode, int mapping) +{ + char *ptr, *map_ptr; + int run, fd, map_size, result = KSFT_PASS; + int total = sizeof(sizes)/sizeof(int); + + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + for (run = 0; run < total; run++) { + fd = create_temp_file(); + if (fd == -1) + return KSFT_FAIL; + + map_size = sizes[run] + OVERFLOW + UNDERFLOW; + map_ptr = (char *)mte_allocate_file_memory(map_size, mem_type, mapping, false, fd); + if (check_allocated_memory(map_ptr, map_size, mem_type, false) != KSFT_PASS) { + close(fd); + return KSFT_FAIL; + } + ptr = map_ptr + UNDERFLOW; + mte_initialize_current_context(mode, (uintptr_t)ptr, sizes[run]); + /* Only mte enabled memory will allow tag insertion */ + ptr = mte_insert_tags((void *)ptr, sizes[run]); + if (!ptr || cur_mte_cxt.fault_valid == true) { + ksft_print_msg("FAIL: Insert tags on file based memory\n"); + munmap((void *)map_ptr, map_size); + close(fd); + return KSFT_FAIL; + } + result = check_child_tag_inheritance(ptr, sizes[run], mode); + mte_clear_tags((void *)ptr, sizes[run]); + munmap((void *)map_ptr, map_size); + close(fd); + if (result != KSFT_PASS) + return KSFT_FAIL; + } + return KSFT_PASS; +} + +int main(int argc, char *argv[]) +{ + int err; + int item = sizeof(sizes)/sizeof(int); + + page_size = getpagesize(); + if (!page_size) { + ksft_print_msg("ERR: Unable to get page size\n"); + return KSFT_FAIL; + } + sizes[item - 3] = page_size - 1; + sizes[item - 2] = page_size; + sizes[item - 1] = page_size + 1; + + err = mte_default_setup(); + if (err) + return err; + + /* Register SIGSEGV handler */ + mte_register_signal(SIGSEGV, mte_default_handler); + mte_register_signal(SIGBUS, mte_default_handler); + + evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE), + "Check child anonymous memory with private mapping, precise mode and mmap memory\n"); + evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED), + "Check child anonymous memory with shared mapping, precise mode and mmap memory\n"); + evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE), + "Check child anonymous memory with private mapping, imprecise mode and mmap memory\n"); + evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED), + "Check child anonymous memory with shared mapping, imprecise mode and mmap memory\n"); + evaluate_test(check_child_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE), + "Check child anonymous memory with private mapping, precise mode and mmap/mprotect memory\n"); + evaluate_test(check_child_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_SHARED), + "Check child anonymous memory with shared mapping, precise mode and mmap/mprotect memory\n"); + + evaluate_test(check_child_file_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE), + "Check child file memory with private mapping, precise mode and mmap memory\n"); + evaluate_test(check_child_file_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED), + "Check child file memory with shared mapping, precise mode and mmap memory\n"); + evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE), + "Check child file memory with private mapping, imprecise mode and mmap memory\n"); + evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED), + "Check child file memory with shared mapping, imprecise mode and mmap memory\n"); + evaluate_test(check_child_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE), + "Check child file memory with private mapping, precise mode and mmap/mprotect memory\n"); + evaluate_test(check_child_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_SHARED), + "Check child file memory with shared mapping, precise mode and mmap/mprotect memory\n"); + + mte_restore_setup(); + ksft_print_cnts(); + return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL; +} diff --git a/tools/testing/selftests/arm64/mte/check_ksm_options.c b/tools/testing/selftests/arm64/mte/check_ksm_options.c new file mode 100644 index 000000000000..bc41ae630c86 --- /dev/null +++ b/tools/testing/selftests/arm64/mte/check_ksm_options.c @@ -0,0 +1,159 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2020 ARM Limited + +#define _GNU_SOURCE + +#include <errno.h> +#include <fcntl.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <ucontext.h> +#include <sys/mman.h> + +#include "kselftest.h" +#include "mte_common_util.h" +#include "mte_def.h" + +#define TEST_UNIT 10 +#define PATH_KSM "/sys/kernel/mm/ksm/" +#define MAX_LOOP 4 + +static size_t page_sz; +static unsigned long ksm_sysfs[5]; + +static unsigned long read_sysfs(char *str) +{ + FILE *f; + unsigned long val = 0; + + f = fopen(str, "r"); + if (!f) { + ksft_print_msg("ERR: missing %s\n", str); + return 0; + } + fscanf(f, "%lu", &val); + fclose(f); + return val; +} + +static void write_sysfs(char *str, unsigned long val) +{ + FILE *f; + + f = fopen(str, "w"); + if (!f) { + ksft_print_msg("ERR: missing %s\n", str); + return; + } + fprintf(f, "%lu", val); + fclose(f); +} + +static void mte_ksm_setup(void) +{ + ksm_sysfs[0] = read_sysfs(PATH_KSM "merge_across_nodes"); + write_sysfs(PATH_KSM "merge_across_nodes", 1); + ksm_sysfs[1] = read_sysfs(PATH_KSM "sleep_millisecs"); + write_sysfs(PATH_KSM "sleep_millisecs", 0); + ksm_sysfs[2] = read_sysfs(PATH_KSM "run"); + write_sysfs(PATH_KSM "run", 1); + ksm_sysfs[3] = read_sysfs(PATH_KSM "max_page_sharing"); + write_sysfs(PATH_KSM "max_page_sharing", ksm_sysfs[3] + TEST_UNIT); + ksm_sysfs[4] = read_sysfs(PATH_KSM "pages_to_scan"); + write_sysfs(PATH_KSM "pages_to_scan", ksm_sysfs[4] + TEST_UNIT); +} + +static void mte_ksm_restore(void) +{ + write_sysfs(PATH_KSM "merge_across_nodes", ksm_sysfs[0]); + write_sysfs(PATH_KSM "sleep_millisecs", ksm_sysfs[1]); + write_sysfs(PATH_KSM "run", ksm_sysfs[2]); + write_sysfs(PATH_KSM "max_page_sharing", ksm_sysfs[3]); + write_sysfs(PATH_KSM "pages_to_scan", ksm_sysfs[4]); +} + +static void mte_ksm_scan(void) +{ + int cur_count = read_sysfs(PATH_KSM "full_scans"); + int scan_count = cur_count + 1; + int max_loop_count = MAX_LOOP; + + while ((cur_count < scan_count) && max_loop_count) { + sleep(1); + cur_count = read_sysfs(PATH_KSM "full_scans"); + max_loop_count--; + } +#ifdef DEBUG + ksft_print_msg("INFO: pages_shared=%lu pages_sharing=%lu\n", + read_sysfs(PATH_KSM "pages_shared"), + read_sysfs(PATH_KSM "pages_sharing")); +#endif +} + +static int check_madvise_options(int mem_type, int mode, int mapping) +{ + char *ptr; + int err, ret; + + err = KSFT_FAIL; + if (access(PATH_KSM, F_OK) == -1) { + ksft_print_msg("ERR: Kernel KSM config not enabled\n"); + return err; + } + + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + ptr = mte_allocate_memory(TEST_UNIT * page_sz, mem_type, mapping, true); + if (check_allocated_memory(ptr, TEST_UNIT * page_sz, mem_type, false) != KSFT_PASS) + return KSFT_FAIL; + + /* Insert same data in all the pages */ + memset(ptr, 'A', TEST_UNIT * page_sz); + ret = madvise(ptr, TEST_UNIT * page_sz, MADV_MERGEABLE); + if (ret) { + ksft_print_msg("ERR: madvise failed to set MADV_UNMERGEABLE\n"); + goto madvise_err; + } + mte_ksm_scan(); + /* Tagged pages should not merge */ + if ((read_sysfs(PATH_KSM "pages_shared") < 1) || + (read_sysfs(PATH_KSM "pages_sharing") < (TEST_UNIT - 1))) + err = KSFT_PASS; +madvise_err: + mte_free_memory(ptr, TEST_UNIT * page_sz, mem_type, true); + return err; +} + +int main(int argc, char *argv[]) +{ + int err; + + err = mte_default_setup(); + if (err) + return err; + page_sz = getpagesize(); + if (!page_sz) { + ksft_print_msg("ERR: Unable to get page size\n"); + return KSFT_FAIL; + } + /* Register signal handlers */ + mte_register_signal(SIGBUS, mte_default_handler); + mte_register_signal(SIGSEGV, mte_default_handler); + /* Enable KSM */ + mte_ksm_setup(); + + evaluate_test(check_madvise_options(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE), + "Check KSM mte page merge for private mapping, sync mode and mmap memory\n"); + evaluate_test(check_madvise_options(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE), + "Check KSM mte page merge for private mapping, async mode and mmap memory\n"); + evaluate_test(check_madvise_options(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED), + "Check KSM mte page merge for shared mapping, sync mode and mmap memory\n"); + evaluate_test(check_madvise_options(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED), + "Check KSM mte page merge for shared mapping, async mode and mmap memory\n"); + + mte_ksm_restore(); + mte_restore_setup(); + ksft_print_cnts(); + return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL; +} diff --git a/tools/testing/selftests/arm64/mte/check_mmap_options.c b/tools/testing/selftests/arm64/mte/check_mmap_options.c new file mode 100644 index 000000000000..33b13b86199b --- /dev/null +++ b/tools/testing/selftests/arm64/mte/check_mmap_options.c @@ -0,0 +1,262 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2020 ARM Limited + +#define _GNU_SOURCE + +#include <errno.h> +#include <fcntl.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <ucontext.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <sys/types.h> + +#include "kselftest.h" +#include "mte_common_util.h" +#include "mte_def.h" + +#define RUNS (MT_TAG_COUNT) +#define UNDERFLOW MT_GRANULE_SIZE +#define OVERFLOW MT_GRANULE_SIZE +#define TAG_CHECK_ON 0 +#define TAG_CHECK_OFF 1 + +static size_t page_size; +static int sizes[] = { + 1, 537, 989, 1269, MT_GRANULE_SIZE - 1, MT_GRANULE_SIZE, + /* page size - 1*/ 0, /* page_size */ 0, /* page size + 1 */ 0 +}; + +static int check_mte_memory(char *ptr, int size, int mode, int tag_check) +{ + mte_initialize_current_context(mode, (uintptr_t)ptr, size); + memset(ptr, '1', size); + mte_wait_after_trig(); + if (cur_mte_cxt.fault_valid == true) + return KSFT_FAIL; + + mte_initialize_current_context(mode, (uintptr_t)ptr, -UNDERFLOW); + memset(ptr - UNDERFLOW, '2', UNDERFLOW); + mte_wait_after_trig(); + if (cur_mte_cxt.fault_valid == false && tag_check == TAG_CHECK_ON) + return KSFT_FAIL; + if (cur_mte_cxt.fault_valid == true && tag_check == TAG_CHECK_OFF) + return KSFT_FAIL; + + mte_initialize_current_context(mode, (uintptr_t)ptr, size + OVERFLOW); + memset(ptr + size, '3', OVERFLOW); + mte_wait_after_trig(); + if (cur_mte_cxt.fault_valid == false && tag_check == TAG_CHECK_ON) + return KSFT_FAIL; + if (cur_mte_cxt.fault_valid == true && tag_check == TAG_CHECK_OFF) + return KSFT_FAIL; + + return KSFT_PASS; +} + +static int check_anonymous_memory_mapping(int mem_type, int mode, int mapping, int tag_check) +{ + char *ptr, *map_ptr; + int run, result, map_size; + int item = sizeof(sizes)/sizeof(int); + + item = sizeof(sizes)/sizeof(int); + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + for (run = 0; run < item; run++) { + map_size = sizes[run] + OVERFLOW + UNDERFLOW; + map_ptr = (char *)mte_allocate_memory(map_size, mem_type, mapping, false); + if (check_allocated_memory(map_ptr, map_size, mem_type, false) != KSFT_PASS) + return KSFT_FAIL; + + ptr = map_ptr + UNDERFLOW; + mte_initialize_current_context(mode, (uintptr_t)ptr, sizes[run]); + /* Only mte enabled memory will allow tag insertion */ + ptr = mte_insert_tags((void *)ptr, sizes[run]); + if (!ptr || cur_mte_cxt.fault_valid == true) { + ksft_print_msg("FAIL: Insert tags on anonymous mmap memory\n"); + munmap((void *)map_ptr, map_size); + return KSFT_FAIL; + } + result = check_mte_memory(ptr, sizes[run], mode, tag_check); + mte_clear_tags((void *)ptr, sizes[run]); + mte_free_memory((void *)map_ptr, map_size, mem_type, false); + if (result == KSFT_FAIL) + return KSFT_FAIL; + } + return KSFT_PASS; +} + +static int check_file_memory_mapping(int mem_type, int mode, int mapping, int tag_check) +{ + char *ptr, *map_ptr; + int run, fd, map_size; + int total = sizeof(sizes)/sizeof(int); + int result = KSFT_PASS; + + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + for (run = 0; run < total; run++) { + fd = create_temp_file(); + if (fd == -1) + return KSFT_FAIL; + + map_size = sizes[run] + UNDERFLOW + OVERFLOW; + map_ptr = (char *)mte_allocate_file_memory(map_size, mem_type, mapping, false, fd); + if (check_allocated_memory(map_ptr, map_size, mem_type, false) != KSFT_PASS) { + close(fd); + return KSFT_FAIL; + } + ptr = map_ptr + UNDERFLOW; + mte_initialize_current_context(mode, (uintptr_t)ptr, sizes[run]); + /* Only mte enabled memory will allow tag insertion */ + ptr = mte_insert_tags((void *)ptr, sizes[run]); + if (!ptr || cur_mte_cxt.fault_valid == true) { + ksft_print_msg("FAIL: Insert tags on file based memory\n"); + munmap((void *)map_ptr, map_size); + close(fd); + return KSFT_FAIL; + } + result = check_mte_memory(ptr, sizes[run], mode, tag_check); + mte_clear_tags((void *)ptr, sizes[run]); + munmap((void *)map_ptr, map_size); + close(fd); + if (result == KSFT_FAIL) + break; + } + return result; +} + +static int check_clear_prot_mte_flag(int mem_type, int mode, int mapping) +{ + char *ptr, *map_ptr; + int run, prot_flag, result, fd, map_size; + int total = sizeof(sizes)/sizeof(int); + + prot_flag = PROT_READ | PROT_WRITE; + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + for (run = 0; run < total; run++) { + map_size = sizes[run] + OVERFLOW + UNDERFLOW; + ptr = (char *)mte_allocate_memory_tag_range(sizes[run], mem_type, mapping, + UNDERFLOW, OVERFLOW); + if (check_allocated_memory_range(ptr, sizes[run], mem_type, + UNDERFLOW, OVERFLOW) != KSFT_PASS) + return KSFT_FAIL; + map_ptr = ptr - UNDERFLOW; + /* Try to clear PROT_MTE property and verify it by tag checking */ + if (mprotect(map_ptr, map_size, prot_flag)) { + mte_free_memory_tag_range((void *)ptr, sizes[run], mem_type, + UNDERFLOW, OVERFLOW); + ksft_print_msg("FAIL: mprotect not ignoring clear PROT_MTE property\n"); + return KSFT_FAIL; + } + result = check_mte_memory(ptr, sizes[run], mode, TAG_CHECK_ON); + mte_free_memory_tag_range((void *)ptr, sizes[run], mem_type, UNDERFLOW, OVERFLOW); + if (result != KSFT_PASS) + return KSFT_FAIL; + + fd = create_temp_file(); + if (fd == -1) + return KSFT_FAIL; + ptr = (char *)mte_allocate_file_memory_tag_range(sizes[run], mem_type, mapping, + UNDERFLOW, OVERFLOW, fd); + if (check_allocated_memory_range(ptr, sizes[run], mem_type, + UNDERFLOW, OVERFLOW) != KSFT_PASS) { + close(fd); + return KSFT_FAIL; + } + map_ptr = ptr - UNDERFLOW; + /* Try to clear PROT_MTE property and verify it by tag checking */ + if (mprotect(map_ptr, map_size, prot_flag)) { + ksft_print_msg("FAIL: mprotect not ignoring clear PROT_MTE property\n"); + mte_free_memory_tag_range((void *)ptr, sizes[run], mem_type, + UNDERFLOW, OVERFLOW); + close(fd); + return KSFT_FAIL; + } + result = check_mte_memory(ptr, sizes[run], mode, TAG_CHECK_ON); + mte_free_memory_tag_range((void *)ptr, sizes[run], mem_type, UNDERFLOW, OVERFLOW); + close(fd); + if (result != KSFT_PASS) + return KSFT_FAIL; + } + return KSFT_PASS; +} + +int main(int argc, char *argv[]) +{ + int err; + int item = sizeof(sizes)/sizeof(int); + + err = mte_default_setup(); + if (err) + return err; + page_size = getpagesize(); + if (!page_size) { + ksft_print_msg("ERR: Unable to get page size\n"); + return KSFT_FAIL; + } + sizes[item - 3] = page_size - 1; + sizes[item - 2] = page_size; + sizes[item - 1] = page_size + 1; + + /* Register signal handlers */ + mte_register_signal(SIGBUS, mte_default_handler); + mte_register_signal(SIGSEGV, mte_default_handler); + + mte_enable_pstate_tco(); + evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_OFF), + "Check anonymous memory with private mapping, sync error mode, mmap memory and tag check off\n"); + evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_OFF), + "Check file memory with private mapping, sync error mode, mmap/mprotect memory and tag check off\n"); + + mte_disable_pstate_tco(); + evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_NONE_ERR, MAP_PRIVATE, TAG_CHECK_OFF), + "Check anonymous memory with private mapping, no error mode, mmap memory and tag check off\n"); + evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_NONE_ERR, MAP_PRIVATE, TAG_CHECK_OFF), + "Check file memory with private mapping, no error mode, mmap/mprotect memory and tag check off\n"); + + evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON), + "Check anonymous memory with private mapping, sync error mode, mmap memory and tag check on\n"); + evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON), + "Check anonymous memory with private mapping, sync error mode, mmap/mprotect memory and tag check on\n"); + evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON), + "Check anonymous memory with shared mapping, sync error mode, mmap memory and tag check on\n"); + evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON), + "Check anonymous memory with shared mapping, sync error mode, mmap/mprotect memory and tag check on\n"); + evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON), + "Check anonymous memory with private mapping, async error mode, mmap memory and tag check on\n"); + evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON), + "Check anonymous memory with private mapping, async error mode, mmap/mprotect memory and tag check on\n"); + evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON), + "Check anonymous memory with shared mapping, async error mode, mmap memory and tag check on\n"); + evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON), + "Check anonymous memory with shared mapping, async error mode, mmap/mprotect memory and tag check on\n"); + + evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON), + "Check file memory with private mapping, sync error mode, mmap memory and tag check on\n"); + evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON), + "Check file memory with private mapping, sync error mode, mmap/mprotect memory and tag check on\n"); + evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON), + "Check file memory with shared mapping, sync error mode, mmap memory and tag check on\n"); + evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON), + "Check file memory with shared mapping, sync error mode, mmap/mprotect memory and tag check on\n"); + evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON), + "Check file memory with private mapping, async error mode, mmap memory and tag check on\n"); + evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON), + "Check file memory with private mapping, async error mode, mmap/mprotect memory and tag check on\n"); + evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON), + "Check file memory with shared mapping, async error mode, mmap memory and tag check on\n"); + evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON), + "Check file memory with shared mapping, async error mode, mmap/mprotect memory and tag check on\n"); + + evaluate_test(check_clear_prot_mte_flag(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE), + "Check clear PROT_MTE flags with private mapping, sync error mode and mmap memory\n"); + evaluate_test(check_clear_prot_mte_flag(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE), + "Check clear PROT_MTE flags with private mapping and sync error mode and mmap/mprotect memory\n"); + + mte_restore_setup(); + ksft_print_cnts(); + return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL; +} diff --git a/tools/testing/selftests/arm64/mte/check_tags_inclusion.c b/tools/testing/selftests/arm64/mte/check_tags_inclusion.c new file mode 100644 index 000000000000..94d245a0ed56 --- /dev/null +++ b/tools/testing/selftests/arm64/mte/check_tags_inclusion.c @@ -0,0 +1,185 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2020 ARM Limited + +#define _GNU_SOURCE + +#include <errno.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <ucontext.h> +#include <sys/wait.h> + +#include "kselftest.h" +#include "mte_common_util.h" +#include "mte_def.h" + +#define BUFFER_SIZE (5 * MT_GRANULE_SIZE) +#define RUNS (MT_TAG_COUNT * 2) +#define MTE_LAST_TAG_MASK (0x7FFF) + +static int verify_mte_pointer_validity(char *ptr, int mode) +{ + mte_initialize_current_context(mode, (uintptr_t)ptr, BUFFER_SIZE); + /* Check the validity of the tagged pointer */ + memset((void *)ptr, '1', BUFFER_SIZE); + mte_wait_after_trig(); + if (cur_mte_cxt.fault_valid) + return KSFT_FAIL; + /* Proceed further for nonzero tags */ + if (!MT_FETCH_TAG((uintptr_t)ptr)) + return KSFT_PASS; + mte_initialize_current_context(mode, (uintptr_t)ptr, BUFFER_SIZE + 1); + /* Check the validity outside the range */ + ptr[BUFFER_SIZE] = '2'; + mte_wait_after_trig(); + if (!cur_mte_cxt.fault_valid) + return KSFT_FAIL; + else + return KSFT_PASS; +} + +static int check_single_included_tags(int mem_type, int mode) +{ + char *ptr; + int tag, run, result = KSFT_PASS; + + ptr = (char *)mte_allocate_memory(BUFFER_SIZE + MT_GRANULE_SIZE, mem_type, 0, false); + if (check_allocated_memory(ptr, BUFFER_SIZE + MT_GRANULE_SIZE, + mem_type, false) != KSFT_PASS) + return KSFT_FAIL; + + for (tag = 0; (tag < MT_TAG_COUNT) && (result == KSFT_PASS); tag++) { + mte_switch_mode(mode, MT_INCLUDE_VALID_TAG(tag)); + /* Try to catch a excluded tag by a number of tries. */ + for (run = 0; (run < RUNS) && (result == KSFT_PASS); run++) { + ptr = (char *)mte_insert_tags(ptr, BUFFER_SIZE); + /* Check tag value */ + if (MT_FETCH_TAG((uintptr_t)ptr) == tag) { + ksft_print_msg("FAIL: wrong tag = 0x%x with include mask=0x%x\n", + MT_FETCH_TAG((uintptr_t)ptr), + MT_INCLUDE_VALID_TAG(tag)); + result = KSFT_FAIL; + break; + } + result = verify_mte_pointer_validity(ptr, mode); + } + } + mte_free_memory_tag_range((void *)ptr, BUFFER_SIZE, mem_type, 0, MT_GRANULE_SIZE); + return result; +} + +static int check_multiple_included_tags(int mem_type, int mode) +{ + char *ptr; + int tag, run, result = KSFT_PASS; + unsigned long excl_mask = 0; + + ptr = (char *)mte_allocate_memory(BUFFER_SIZE + MT_GRANULE_SIZE, mem_type, 0, false); + if (check_allocated_memory(ptr, BUFFER_SIZE + MT_GRANULE_SIZE, + mem_type, false) != KSFT_PASS) + return KSFT_FAIL; + + for (tag = 0; (tag < MT_TAG_COUNT - 1) && (result == KSFT_PASS); tag++) { + excl_mask |= 1 << tag; + mte_switch_mode(mode, MT_INCLUDE_VALID_TAGS(excl_mask)); + /* Try to catch a excluded tag by a number of tries. */ + for (run = 0; (run < RUNS) && (result == KSFT_PASS); run++) { + ptr = (char *)mte_insert_tags(ptr, BUFFER_SIZE); + /* Check tag value */ + if (MT_FETCH_TAG((uintptr_t)ptr) < tag) { + ksft_print_msg("FAIL: wrong tag = 0x%x with include mask=0x%x\n", + MT_FETCH_TAG((uintptr_t)ptr), + MT_INCLUDE_VALID_TAGS(excl_mask)); + result = KSFT_FAIL; + break; + } + result = verify_mte_pointer_validity(ptr, mode); + } + } + mte_free_memory_tag_range((void *)ptr, BUFFER_SIZE, mem_type, 0, MT_GRANULE_SIZE); + return result; +} + +static int check_all_included_tags(int mem_type, int mode) +{ + char *ptr; + int run, result = KSFT_PASS; + + ptr = (char *)mte_allocate_memory(BUFFER_SIZE + MT_GRANULE_SIZE, mem_type, 0, false); + if (check_allocated_memory(ptr, BUFFER_SIZE + MT_GRANULE_SIZE, + mem_type, false) != KSFT_PASS) + return KSFT_FAIL; + + mte_switch_mode(mode, MT_INCLUDE_TAG_MASK); + /* Try to catch a excluded tag by a number of tries. */ + for (run = 0; (run < RUNS) && (result == KSFT_PASS); run++) { + ptr = (char *)mte_insert_tags(ptr, BUFFER_SIZE); + /* + * Here tag byte can be between 0x0 to 0xF (full allowed range) + * so no need to match so just verify if it is writable. + */ + result = verify_mte_pointer_validity(ptr, mode); + } + mte_free_memory_tag_range((void *)ptr, BUFFER_SIZE, mem_type, 0, MT_GRANULE_SIZE); + return result; +} + +static int check_none_included_tags(int mem_type, int mode) +{ + char *ptr; + int run; + + ptr = (char *)mte_allocate_memory(BUFFER_SIZE, mem_type, 0, false); + if (check_allocated_memory(ptr, BUFFER_SIZE, mem_type, false) != KSFT_PASS) + return KSFT_FAIL; + + mte_switch_mode(mode, MT_EXCLUDE_TAG_MASK); + /* Try to catch a excluded tag by a number of tries. */ + for (run = 0; run < RUNS; run++) { + ptr = (char *)mte_insert_tags(ptr, BUFFER_SIZE); + /* Here all tags exluded so tag value generated should be 0 */ + if (MT_FETCH_TAG((uintptr_t)ptr)) { + ksft_print_msg("FAIL: included tag value found\n"); + mte_free_memory((void *)ptr, BUFFER_SIZE, mem_type, true); + return KSFT_FAIL; + } + mte_initialize_current_context(mode, (uintptr_t)ptr, BUFFER_SIZE); + /* Check the write validity of the untagged pointer */ + memset((void *)ptr, '1', BUFFER_SIZE); + mte_wait_after_trig(); + if (cur_mte_cxt.fault_valid) + break; + } + mte_free_memory((void *)ptr, BUFFER_SIZE, mem_type, false); + if (cur_mte_cxt.fault_valid) + return KSFT_FAIL; + else + return KSFT_PASS; +} + +int main(int argc, char *argv[]) +{ + int err; + + err = mte_default_setup(); + if (err) + return err; + + /* Register SIGSEGV handler */ + mte_register_signal(SIGSEGV, mte_default_handler); + + evaluate_test(check_single_included_tags(USE_MMAP, MTE_SYNC_ERR), + "Check an included tag value with sync mode\n"); + evaluate_test(check_multiple_included_tags(USE_MMAP, MTE_SYNC_ERR), + "Check different included tags value with sync mode\n"); + evaluate_test(check_none_included_tags(USE_MMAP, MTE_SYNC_ERR), + "Check none included tags value with sync mode\n"); + evaluate_test(check_all_included_tags(USE_MMAP, MTE_SYNC_ERR), + "Check all included tags value with sync mode\n"); + + mte_restore_setup(); + ksft_print_cnts(); + return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL; +} diff --git a/tools/testing/selftests/arm64/mte/check_user_mem.c b/tools/testing/selftests/arm64/mte/check_user_mem.c new file mode 100644 index 000000000000..594e98e76880 --- /dev/null +++ b/tools/testing/selftests/arm64/mte/check_user_mem.c @@ -0,0 +1,111 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2020 ARM Limited + +#define _GNU_SOURCE + +#include <errno.h> +#include <fcntl.h> +#include <signal.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <ucontext.h> +#include <unistd.h> +#include <sys/mman.h> + +#include "kselftest.h" +#include "mte_common_util.h" +#include "mte_def.h" + +static size_t page_sz; + +static int check_usermem_access_fault(int mem_type, int mode, int mapping) +{ + int fd, i, err; + char val = 'A'; + size_t len, read_len; + void *ptr, *ptr_next; + + err = KSFT_FAIL; + len = 2 * page_sz; + mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG); + fd = create_temp_file(); + if (fd == -1) + return KSFT_FAIL; + for (i = 0; i < len; i++) + write(fd, &val, sizeof(val)); + lseek(fd, 0, 0); + ptr = mte_allocate_memory(len, mem_type, mapping, true); + if (check_allocated_memory(ptr, len, mem_type, true) != KSFT_PASS) { + close(fd); + return KSFT_FAIL; + } + mte_initialize_current_context(mode, (uintptr_t)ptr, len); + /* Copy from file into buffer with valid tag */ + read_len = read(fd, ptr, len); + mte_wait_after_trig(); + if (cur_mte_cxt.fault_valid || read_len < len) + goto usermem_acc_err; + /* Verify same pattern is read */ + for (i = 0; i < len; i++) + if (*(char *)(ptr + i) != val) + break; + if (i < len) + goto usermem_acc_err; + + /* Tag the next half of memory with different value */ + ptr_next = (void *)((unsigned long)ptr + page_sz); + ptr_next = mte_insert_new_tag(ptr_next); + mte_set_tag_address_range(ptr_next, page_sz); + + lseek(fd, 0, 0); + /* Copy from file into buffer with invalid tag */ + read_len = read(fd, ptr, len); + mte_wait_after_trig(); + /* + * Accessing user memory in kernel with invalid tag should fail in sync + * mode without fault but may not fail in async mode as per the + * implemented MTE userspace support in Arm64 kernel. + */ + if (mode == MTE_SYNC_ERR && + !cur_mte_cxt.fault_valid && read_len < len) { + err = KSFT_PASS; + } else if (mode == MTE_ASYNC_ERR && + !cur_mte_cxt.fault_valid && read_len == len) { + err = KSFT_PASS; + } +usermem_acc_err: + mte_free_memory((void *)ptr, len, mem_type, true); + close(fd); + return err; +} + +int main(int argc, char *argv[]) +{ + int err; + + page_sz = getpagesize(); + if (!page_sz) { + ksft_print_msg("ERR: Unable to get page size\n"); + return KSFT_FAIL; + } + err = mte_default_setup(); + if (err) + return err; + /* Register signal handlers */ + mte_register_signal(SIGSEGV, mte_default_handler); + + evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE), + "Check memory access from kernel in sync mode, private mapping and mmap memory\n"); + evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED), + "Check memory access from kernel in sync mode, shared mapping and mmap memory\n"); + + evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE), + "Check memory access from kernel in async mode, private mapping and mmap memory\n"); + evaluate_test(check_usermem_access_fault(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED), + "Check memory access from kernel in async mode, shared mapping and mmap memory\n"); + + mte_restore_setup(); + ksft_print_cnts(); + return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL; +} diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.c b/tools/testing/selftests/arm64/mte/mte_common_util.c new file mode 100644 index 000000000000..39f8908988ea --- /dev/null +++ b/tools/testing/selftests/arm64/mte/mte_common_util.c @@ -0,0 +1,341 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2020 ARM Limited + +#include <fcntl.h> +#include <sched.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> + +#include <linux/auxvec.h> +#include <sys/auxv.h> +#include <sys/mman.h> +#include <sys/prctl.h> + +#include <asm/hwcap.h> + +#include "kselftest.h" +#include "mte_common_util.h" +#include "mte_def.h" + +#define INIT_BUFFER_SIZE 256 + +struct mte_fault_cxt cur_mte_cxt; +static unsigned int mte_cur_mode; +static unsigned int mte_cur_pstate_tco; + +void mte_default_handler(int signum, siginfo_t *si, void *uc) +{ + unsigned long addr = (unsigned long)si->si_addr; + + if (signum == SIGSEGV) { +#ifdef DEBUG + ksft_print_msg("INFO: SIGSEGV signal at pc=%lx, fault addr=%lx, si_code=%lx\n", + ((ucontext_t *)uc)->uc_mcontext.pc, addr, si->si_code); +#endif + if (si->si_code == SEGV_MTEAERR) { + if (cur_mte_cxt.trig_si_code == si->si_code) + cur_mte_cxt.fault_valid = true; + return; + } + /* Compare the context for precise error */ + else if (si->si_code == SEGV_MTESERR) { + if (cur_mte_cxt.trig_si_code == si->si_code && + ((cur_mte_cxt.trig_range >= 0 && + addr >= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) && + addr <= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)) || + (cur_mte_cxt.trig_range < 0 && + addr <= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) && + addr >= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)))) { + cur_mte_cxt.fault_valid = true; + /* Adjust the pc by 4 */ + ((ucontext_t *)uc)->uc_mcontext.pc += 4; + } else { + ksft_print_msg("Invalid MTE synchronous exception caught!\n"); + exit(1); + } + } else { + ksft_print_msg("Unknown SIGSEGV exception caught!\n"); + exit(1); + } + } else if (signum == SIGBUS) { + ksft_print_msg("INFO: SIGBUS signal at pc=%lx, fault addr=%lx, si_code=%lx\n", + ((ucontext_t *)uc)->uc_mcontext.pc, addr, si->si_code); + if ((cur_mte_cxt.trig_range >= 0 && + addr >= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) && + addr <= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)) || + (cur_mte_cxt.trig_range < 0 && + addr <= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) && + addr >= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range))) { + cur_mte_cxt.fault_valid = true; + /* Adjust the pc by 4 */ + ((ucontext_t *)uc)->uc_mcontext.pc += 4; + } + } +} + +void mte_register_signal(int signal, void (*handler)(int, siginfo_t *, void *)) +{ + struct sigaction sa; + + sa.sa_sigaction = handler; + sa.sa_flags = SA_SIGINFO; + sigemptyset(&sa.sa_mask); + sigaction(signal, &sa, NULL); +} + +void mte_wait_after_trig(void) +{ + sched_yield(); +} + +void *mte_insert_tags(void *ptr, size_t size) +{ + void *tag_ptr; + int align_size; + + if (!ptr || (unsigned long)(ptr) & MT_ALIGN_GRANULE) { + ksft_print_msg("FAIL: Addr=%lx: invalid\n", ptr); + return NULL; + } + align_size = MT_ALIGN_UP(size); + tag_ptr = mte_insert_random_tag(ptr); + mte_set_tag_address_range(tag_ptr, align_size); + return tag_ptr; +} + +void mte_clear_tags(void *ptr, size_t size) +{ + if (!ptr || (unsigned long)(ptr) & MT_ALIGN_GRANULE) { + ksft_print_msg("FAIL: Addr=%lx: invalid\n", ptr); + return; + } + size = MT_ALIGN_UP(size); + ptr = (void *)MT_CLEAR_TAG((unsigned long)ptr); + mte_clear_tag_address_range(ptr, size); +} + +static void *__mte_allocate_memory_range(size_t size, int mem_type, int mapping, + size_t range_before, size_t range_after, + bool tags, int fd) +{ + void *ptr; + int prot_flag, map_flag; + size_t entire_size = size + range_before + range_after; + + if (mem_type != USE_MALLOC && mem_type != USE_MMAP && + mem_type != USE_MPROTECT) { + ksft_print_msg("FAIL: Invalid allocate request\n"); + return NULL; + } + if (mem_type == USE_MALLOC) + return malloc(entire_size) + range_before; + + prot_flag = PROT_READ | PROT_WRITE; + if (mem_type == USE_MMAP) + prot_flag |= PROT_MTE; + + map_flag = mapping; + if (fd == -1) + map_flag = MAP_ANONYMOUS | map_flag; + if (!(mapping & MAP_SHARED)) + map_flag |= MAP_PRIVATE; + ptr = mmap(NULL, entire_size, prot_flag, map_flag, fd, 0); + if (ptr == MAP_FAILED) { + ksft_print_msg("FAIL: mmap allocation\n"); + return NULL; + } + if (mem_type == USE_MPROTECT) { + if (mprotect(ptr, entire_size, prot_flag | PROT_MTE)) { + munmap(ptr, size); + ksft_print_msg("FAIL: mprotect PROT_MTE property\n"); + return NULL; + } + } + if (tags) + ptr = mte_insert_tags(ptr + range_before, size); + return ptr; +} + +void *mte_allocate_memory_tag_range(size_t size, int mem_type, int mapping, + size_t range_before, size_t range_after) +{ + return __mte_allocate_memory_range(size, mem_type, mapping, range_before, + range_after, true, -1); +} + +void *mte_allocate_memory(size_t size, int mem_type, int mapping, bool tags) +{ + return __mte_allocate_memory_range(size, mem_type, mapping, 0, 0, tags, -1); +} + +void *mte_allocate_file_memory(size_t size, int mem_type, int mapping, bool tags, int fd) +{ + int index; + char buffer[INIT_BUFFER_SIZE]; + + if (mem_type != USE_MPROTECT && mem_type != USE_MMAP) { + ksft_print_msg("FAIL: Invalid mmap file request\n"); + return NULL; + } + /* Initialize the file for mappable size */ + lseek(fd, 0, SEEK_SET); + for (index = INIT_BUFFER_SIZE; index < size; index += INIT_BUFFER_SIZE) + write(fd, buffer, INIT_BUFFER_SIZE); + index -= INIT_BUFFER_SIZE; + write(fd, buffer, size - index); + return __mte_allocate_memory_range(size, mem_type, mapping, 0, 0, tags, fd); +} + +void *mte_allocate_file_memory_tag_range(size_t size, int mem_type, int mapping, + size_t range_before, size_t range_after, int fd) +{ + int index; + char buffer[INIT_BUFFER_SIZE]; + int map_size = size + range_before + range_after; + + if (mem_type != USE_MPROTECT && mem_type != USE_MMAP) { + ksft_print_msg("FAIL: Invalid mmap file request\n"); + return NULL; + } + /* Initialize the file for mappable size */ + lseek(fd, 0, SEEK_SET); + for (index = INIT_BUFFER_SIZE; index < map_size; index += INIT_BUFFER_SIZE) + write(fd, buffer, INIT_BUFFER_SIZE); + index -= INIT_BUFFER_SIZE; + write(fd, buffer, map_size - index); + return __mte_allocate_memory_range(size, mem_type, mapping, range_before, + range_after, true, fd); +} + +static void __mte_free_memory_range(void *ptr, size_t size, int mem_type, + size_t range_before, size_t range_after, bool tags) +{ + switch (mem_type) { + case USE_MALLOC: + free(ptr - range_before); + break; + case USE_MMAP: + case USE_MPROTECT: + if (tags) + mte_clear_tags(ptr, size); + munmap(ptr - range_before, size + range_before + range_after); + break; + default: + ksft_print_msg("FAIL: Invalid free request\n"); + break; + } +} + +void mte_free_memory_tag_range(void *ptr, size_t size, int mem_type, + size_t range_before, size_t range_after) +{ + __mte_free_memory_range(ptr, size, mem_type, range_before, range_after, true); +} + +void mte_free_memory(void *ptr, size_t size, int mem_type, bool tags) +{ + __mte_free_memory_range(ptr, size, mem_type, 0, 0, tags); +} + +void mte_initialize_current_context(int mode, uintptr_t ptr, ssize_t range) +{ + cur_mte_cxt.fault_valid = false; + cur_mte_cxt.trig_addr = ptr; + cur_mte_cxt.trig_range = range; + if (mode == MTE_SYNC_ERR) + cur_mte_cxt.trig_si_code = SEGV_MTESERR; + else if (mode == MTE_ASYNC_ERR) + cur_mte_cxt.trig_si_code = SEGV_MTEAERR; + else + cur_mte_cxt.trig_si_code = 0; +} + +int mte_switch_mode(int mte_option, unsigned long incl_mask) +{ + unsigned long en = 0; + + if (!(mte_option == MTE_SYNC_ERR || mte_option == MTE_ASYNC_ERR || + mte_option == MTE_NONE_ERR || incl_mask <= MTE_ALLOW_NON_ZERO_TAG)) { + ksft_print_msg("FAIL: Invalid mte config option\n"); + return -EINVAL; + } + en = PR_TAGGED_ADDR_ENABLE; + if (mte_option == MTE_SYNC_ERR) + en |= PR_MTE_TCF_SYNC; + else if (mte_option == MTE_ASYNC_ERR) + en |= PR_MTE_TCF_ASYNC; + else if (mte_option == MTE_NONE_ERR) + en |= PR_MTE_TCF_NONE; + + en |= (incl_mask << PR_MTE_TAG_SHIFT); + /* Enable address tagging ABI, mte error reporting mode and tag inclusion mask. */ + if (!prctl(PR_SET_TAGGED_ADDR_CTRL, en, 0, 0, 0) == 0) { + ksft_print_msg("FAIL:prctl PR_SET_TAGGED_ADDR_CTRL for mte mode\n"); + return -EINVAL; + } + return 0; +} + +#define ID_AA64PFR1_MTE_SHIFT 8 +#define ID_AA64PFR1_MTE 2 + +int mte_default_setup(void) +{ + unsigned long hwcaps = getauxval(AT_HWCAP); + unsigned long en = 0; + int ret; + + if (!(hwcaps & HWCAP_CPUID)) { + ksft_print_msg("FAIL: CPUID registers unavailable\n"); + return KSFT_FAIL; + } + /* Read ID_AA64PFR1_EL1 register */ + asm volatile("mrs %0, id_aa64pfr1_el1" : "=r"(hwcaps) : : "memory"); + if (((hwcaps >> ID_AA64PFR1_MTE_SHIFT) & MT_TAG_MASK) != ID_AA64PFR1_MTE) { + ksft_print_msg("FAIL: MTE features unavailable\n"); + return KSFT_SKIP; + } + /* Get current mte mode */ + ret = prctl(PR_GET_TAGGED_ADDR_CTRL, en, 0, 0, 0); + if (ret < 0) { + ksft_print_msg("FAIL:prctl PR_GET_TAGGED_ADDR_CTRL with error =%d\n", ret); + return KSFT_FAIL; + } + if (ret & PR_MTE_TCF_SYNC) + mte_cur_mode = MTE_SYNC_ERR; + else if (ret & PR_MTE_TCF_ASYNC) + mte_cur_mode = MTE_ASYNC_ERR; + else if (ret & PR_MTE_TCF_NONE) + mte_cur_mode = MTE_NONE_ERR; + + mte_cur_pstate_tco = mte_get_pstate_tco(); + /* Disable PSTATE.TCO */ + mte_disable_pstate_tco(); + return 0; +} + +void mte_restore_setup(void) +{ + mte_switch_mode(mte_cur_mode, MTE_ALLOW_NON_ZERO_TAG); + if (mte_cur_pstate_tco == MT_PSTATE_TCO_EN) + mte_enable_pstate_tco(); + else if (mte_cur_pstate_tco == MT_PSTATE_TCO_DIS) + mte_disable_pstate_tco(); +} + +int create_temp_file(void) +{ + int fd; + char filename[] = "/dev/shm/tmp_XXXXXX"; + + /* Create a file in the tmpfs filesystem */ + fd = mkstemp(&filename[0]); + if (fd == -1) { + ksft_print_msg("FAIL: Unable to open temporary file\n"); + return 0; + } + unlink(&filename[0]); + return fd; +} diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.h b/tools/testing/selftests/arm64/mte/mte_common_util.h new file mode 100644 index 000000000000..195a7d1879e6 --- /dev/null +++ b/tools/testing/selftests/arm64/mte/mte_common_util.h @@ -0,0 +1,118 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2020 ARM Limited */ + +#ifndef _MTE_COMMON_UTIL_H +#define _MTE_COMMON_UTIL_H + +#include <signal.h> +#include <stdbool.h> +#include <stdlib.h> +#include <sys/auxv.h> +#include <sys/mman.h> +#include <sys/prctl.h> +#include "mte_def.h" +#include "kselftest.h" + +enum mte_mem_type { + USE_MALLOC, + USE_MMAP, + USE_MPROTECT, +}; + +enum mte_mode { + MTE_NONE_ERR, + MTE_SYNC_ERR, + MTE_ASYNC_ERR, +}; + +struct mte_fault_cxt { + /* Address start which triggers mte tag fault */ + unsigned long trig_addr; + /* Address range for mte tag fault and negative value means underflow */ + ssize_t trig_range; + /* siginfo si code */ + unsigned long trig_si_code; + /* Flag to denote if correct fault caught */ + bool fault_valid; +}; + +extern struct mte_fault_cxt cur_mte_cxt; + +/* MTE utility functions */ +void mte_default_handler(int signum, siginfo_t *si, void *uc); +void mte_register_signal(int signal, void (*handler)(int, siginfo_t *, void *)); +void mte_wait_after_trig(void); +void *mte_allocate_memory(size_t size, int mem_type, int mapping, bool tags); +void *mte_allocate_memory_tag_range(size_t size, int mem_type, int mapping, + size_t range_before, size_t range_after); +void *mte_allocate_file_memory(size_t size, int mem_type, int mapping, + bool tags, int fd); +void *mte_allocate_file_memory_tag_range(size_t size, int mem_type, int mapping, + size_t range_before, size_t range_after, int fd); +void mte_free_memory(void *ptr, size_t size, int mem_type, bool tags); +void mte_free_memory_tag_range(void *ptr, size_t size, int mem_type, + size_t range_before, size_t range_after); +void *mte_insert_tags(void *ptr, size_t size); +void mte_clear_tags(void *ptr, size_t size); +int mte_default_setup(void); +void mte_restore_setup(void); +int mte_switch_mode(int mte_option, unsigned long incl_mask); +void mte_initialize_current_context(int mode, uintptr_t ptr, ssize_t range); + +/* Common utility functions */ +int create_temp_file(void); + +/* Assembly MTE utility functions */ +void *mte_insert_random_tag(void *ptr); +void *mte_insert_new_tag(void *ptr); +void *mte_get_tag_address(void *ptr); +void mte_set_tag_address_range(void *ptr, int range); +void mte_clear_tag_address_range(void *ptr, int range); +void mte_disable_pstate_tco(void); +void mte_enable_pstate_tco(void); +unsigned int mte_get_pstate_tco(void); + +/* Test framework static inline functions/macros */ +static inline void evaluate_test(int err, const char *msg) +{ + if (err == KSFT_PASS) + ksft_test_result_pass(msg); + else if (err == KSFT_FAIL) + ksft_test_result_fail(msg); +} + +static inline int check_allocated_memory(void *ptr, size_t size, + int mem_type, bool tags) +{ + if (ptr == NULL) { + ksft_print_msg("FAIL: memory allocation\n"); + return KSFT_FAIL; + } + + if (tags && !MT_FETCH_TAG((uintptr_t)ptr)) { + ksft_print_msg("FAIL: tag not found at addr(%p)\n", ptr); + mte_free_memory((void *)ptr, size, mem_type, false); + return KSFT_FAIL; + } + + return KSFT_PASS; +} + +static inline int check_allocated_memory_range(void *ptr, size_t size, int mem_type, + size_t range_before, size_t range_after) +{ + if (ptr == NULL) { + ksft_print_msg("FAIL: memory allocation\n"); + return KSFT_FAIL; + } + + if (!MT_FETCH_TAG((uintptr_t)ptr)) { + ksft_print_msg("FAIL: tag not found at addr(%p)\n", ptr); + mte_free_memory_tag_range((void *)ptr, size, mem_type, range_before, + range_after); + return KSFT_FAIL; + } + return KSFT_PASS; +} + +#endif /* _MTE_COMMON_UTIL_H */ diff --git a/tools/testing/selftests/arm64/mte/mte_def.h b/tools/testing/selftests/arm64/mte/mte_def.h new file mode 100644 index 000000000000..9b188254b61a --- /dev/null +++ b/tools/testing/selftests/arm64/mte/mte_def.h @@ -0,0 +1,60 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2020 ARM Limited */ + +/* + * Below definitions may be found in kernel headers, However, they are + * redefined here to decouple the MTE selftests compilations from them. + */ +#ifndef SEGV_MTEAERR +#define SEGV_MTEAERR 8 +#endif +#ifndef SEGV_MTESERR +#define SEGV_MTESERR 9 +#endif +#ifndef PROT_MTE +#define PROT_MTE 0x20 +#endif +#ifndef HWCAP2_MTE +#define HWCAP2_MTE (1 << 18) +#endif + +#ifndef PR_MTE_TCF_SHIFT +#define PR_MTE_TCF_SHIFT 1 +#endif +#ifndef PR_MTE_TCF_NONE +#define PR_MTE_TCF_NONE (0UL << PR_MTE_TCF_SHIFT) +#endif +#ifndef PR_MTE_TCF_SYNC +#define PR_MTE_TCF_SYNC (1UL << PR_MTE_TCF_SHIFT) +#endif +#ifndef PR_MTE_TCF_ASYNC +#define PR_MTE_TCF_ASYNC (2UL << PR_MTE_TCF_SHIFT) +#endif +#ifndef PR_MTE_TAG_SHIFT +#define PR_MTE_TAG_SHIFT 3 +#endif + +/* MTE Hardware feature definitions below. */ +#define MT_TAG_SHIFT 56 +#define MT_TAG_MASK 0xFUL +#define MT_FREE_TAG 0x0UL +#define MT_GRANULE_SIZE 16 +#define MT_TAG_COUNT 16 +#define MT_INCLUDE_TAG_MASK 0xFFFF +#define MT_EXCLUDE_TAG_MASK 0x0 + +#define MT_ALIGN_GRANULE (MT_GRANULE_SIZE - 1) +#define MT_CLEAR_TAG(x) ((x) & ~(MT_TAG_MASK << MT_TAG_SHIFT)) +#define MT_SET_TAG(x, y) ((x) | (y << MT_TAG_SHIFT)) +#define MT_FETCH_TAG(x) ((x >> MT_TAG_SHIFT) & (MT_TAG_MASK)) +#define MT_ALIGN_UP(x) ((x + MT_ALIGN_GRANULE) & ~(MT_ALIGN_GRANULE)) + +#define MT_PSTATE_TCO_SHIFT 25 +#define MT_PSTATE_TCO_MASK ~(0x1 << MT_PSTATE_TCO_SHIFT) +#define MT_PSTATE_TCO_EN 1 +#define MT_PSTATE_TCO_DIS 0 + +#define MT_EXCLUDE_TAG(x) (1 << (x)) +#define MT_INCLUDE_VALID_TAG(x) (MT_INCLUDE_TAG_MASK ^ MT_EXCLUDE_TAG(x)) +#define MT_INCLUDE_VALID_TAGS(x) (MT_INCLUDE_TAG_MASK ^ (x)) +#define MTE_ALLOW_NON_ZERO_TAG MT_INCLUDE_VALID_TAG(0) diff --git a/tools/testing/selftests/arm64/mte/mte_helper.S b/tools/testing/selftests/arm64/mte/mte_helper.S new file mode 100644 index 000000000000..a02c04cd0aac --- /dev/null +++ b/tools/testing/selftests/arm64/mte/mte_helper.S @@ -0,0 +1,128 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2020 ARM Limited */ + +#include "mte_def.h" + +#define ENTRY(name) \ + .globl name ;\ + .p2align 2;\ + .type name, @function ;\ +name: + +#define ENDPROC(name) \ + .size name, .-name ; + + .text +/* + * mte_insert_random_tag: Insert random tag and might be same as the source tag if + * the source pointer has it. + * Input: + * x0 - source pointer with a tag/no-tag + * Return: + * x0 - pointer with random tag + */ +ENTRY(mte_insert_random_tag) + irg x0, x0, xzr + ret +ENDPROC(mte_insert_random_tag) + +/* + * mte_insert_new_tag: Insert new tag and different from the source tag if + * source pointer has it. + * Input: + * x0 - source pointer with a tag/no-tag + * Return: + * x0 - pointer with random tag + */ +ENTRY(mte_insert_new_tag) + gmi x1, x0, xzr + irg x0, x0, x1 + ret +ENDPROC(mte_insert_new_tag) + +/* + * mte_get_tag_address: Get the tag from given address. + * Input: + * x0 - source pointer + * Return: + * x0 - pointer with appended tag + */ +ENTRY(mte_get_tag_address) + ldg x0, [x0] + ret +ENDPROC(mte_get_tag_address) + +/* + * mte_set_tag_address_range: Set the tag range from the given address + * Input: + * x0 - source pointer with tag data + * x1 - range + * Return: + * none + */ +ENTRY(mte_set_tag_address_range) + cbz x1, 2f +1: + stg x0, [x0, #0x0] + add x0, x0, #MT_GRANULE_SIZE + sub x1, x1, #MT_GRANULE_SIZE + cbnz x1, 1b +2: + ret +ENDPROC(mte_set_tag_address_range) + +/* + * mt_clear_tag_address_range: Clear the tag range from the given address + * Input: + * x0 - source pointer with tag data + * x1 - range + * Return: + * none + */ +ENTRY(mte_clear_tag_address_range) + cbz x1, 2f +1: + stzg x0, [x0, #0x0] + add x0, x0, #MT_GRANULE_SIZE + sub x1, x1, #MT_GRANULE_SIZE + cbnz x1, 1b +2: + ret +ENDPROC(mte_clear_tag_address_range) + +/* + * mte_enable_pstate_tco: Enable PSTATE.TCO (tag check override) field + * Input: + * none + * Return: + * none + */ +ENTRY(mte_enable_pstate_tco) + msr tco, #MT_PSTATE_TCO_EN + ret +ENDPROC(mte_enable_pstate_tco) + +/* + * mte_disable_pstate_tco: Disable PSTATE.TCO (tag check override) field + * Input: + * none + * Return: + * none + */ +ENTRY(mte_disable_pstate_tco) + msr tco, #MT_PSTATE_TCO_DIS + ret +ENDPROC(mte_disable_pstate_tco) + +/* + * mte_get_pstate_tco: Get PSTATE.TCO (tag check override) field + * Input: + * none + * Return: + * x0 + */ +ENTRY(mte_get_pstate_tco) + mrs x0, tco + ubfx x0, x0, #MT_PSTATE_TCO_SHIFT, #1 + ret +ENDPROC(mte_get_pstate_tco) diff --git a/tools/testing/selftests/arm64/pauth/.gitignore b/tools/testing/selftests/arm64/pauth/.gitignore new file mode 100644 index 000000000000..155137d92722 --- /dev/null +++ b/tools/testing/selftests/arm64/pauth/.gitignore @@ -0,0 +1,2 @@ +exec_target +pac diff --git a/tools/testing/selftests/arm64/pauth/Makefile b/tools/testing/selftests/arm64/pauth/Makefile new file mode 100644 index 000000000000..72e290b0b10c --- /dev/null +++ b/tools/testing/selftests/arm64/pauth/Makefile @@ -0,0 +1,39 @@ +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2020 ARM Limited + +# preserve CC value from top level Makefile +ifeq ($(CC),cc) +CC := $(CROSS_COMPILE)gcc +endif + +CFLAGS += -mbranch-protection=pac-ret +# check if the compiler supports ARMv8.3 and branch protection with PAuth +pauth_cc_support := $(shell if ($(CC) $(CFLAGS) -march=armv8.3-a -E -x c /dev/null -o /dev/null 2>&1) then echo "1"; fi) + +ifeq ($(pauth_cc_support),1) +TEST_GEN_PROGS := pac +TEST_GEN_FILES := pac_corruptor.o helper.o +TEST_GEN_PROGS_EXTENDED := exec_target +endif + +include ../../lib.mk + +ifeq ($(pauth_cc_support),1) +# pac* and aut* instructions are not available on architectures berfore +# ARMv8.3. Therefore target ARMv8.3 wherever they are used directly +$(OUTPUT)/pac_corruptor.o: pac_corruptor.S + $(CC) -c $^ -o $@ $(CFLAGS) -march=armv8.3-a + +$(OUTPUT)/helper.o: helper.c + $(CC) -c $^ -o $@ $(CFLAGS) -march=armv8.3-a + +# when -mbranch-protection is enabled and the target architecture is ARMv8.3 or +# greater, gcc emits pac* instructions which are not in HINT NOP space, +# preventing the tests from occurring at all. Compile for ARMv8.2 so tests can +# run on earlier targets and print a meaningful error messages +$(OUTPUT)/exec_target: exec_target.c $(OUTPUT)/helper.o + $(CC) $^ -o $@ $(CFLAGS) -march=armv8.2-a + +$(OUTPUT)/pac: pac.c $(OUTPUT)/pac_corruptor.o $(OUTPUT)/helper.o + $(CC) $^ -o $@ $(CFLAGS) -march=armv8.2-a +endif diff --git a/tools/testing/selftests/arm64/pauth/exec_target.c b/tools/testing/selftests/arm64/pauth/exec_target.c new file mode 100644 index 000000000000..4435600ca400 --- /dev/null +++ b/tools/testing/selftests/arm64/pauth/exec_target.c @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2020 ARM Limited + +#include <stdio.h> +#include <stdlib.h> +#include <sys/auxv.h> + +#include "helper.h" + +int main(void) +{ + struct signatures signed_vals; + unsigned long hwcaps; + size_t val; + + fread(&val, sizeof(size_t), 1, stdin); + + /* don't try to execute illegal (unimplemented) instructions) caller + * should have checked this and keep worker simple + */ + hwcaps = getauxval(AT_HWCAP); + + if (hwcaps & HWCAP_PACA) { + signed_vals.keyia = keyia_sign(val); + signed_vals.keyib = keyib_sign(val); + signed_vals.keyda = keyda_sign(val); + signed_vals.keydb = keydb_sign(val); + } + signed_vals.keyg = (hwcaps & HWCAP_PACG) ? keyg_sign(val) : 0; + + fwrite(&signed_vals, sizeof(struct signatures), 1, stdout); + + return 0; +} diff --git a/tools/testing/selftests/arm64/pauth/helper.c b/tools/testing/selftests/arm64/pauth/helper.c new file mode 100644 index 000000000000..2c201e7d0d50 --- /dev/null +++ b/tools/testing/selftests/arm64/pauth/helper.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2020 ARM Limited + +#include "helper.h" + +size_t keyia_sign(size_t ptr) +{ + asm volatile("paciza %0" : "+r" (ptr)); + return ptr; +} + +size_t keyib_sign(size_t ptr) +{ + asm volatile("pacizb %0" : "+r" (ptr)); + return ptr; +} + +size_t keyda_sign(size_t ptr) +{ + asm volatile("pacdza %0" : "+r" (ptr)); + return ptr; +} + +size_t keydb_sign(size_t ptr) +{ + asm volatile("pacdzb %0" : "+r" (ptr)); + return ptr; +} + +size_t keyg_sign(size_t ptr) +{ + /* output is encoded in the upper 32 bits */ + size_t dest = 0; + size_t modifier = 0; + + asm volatile("pacga %0, %1, %2" : "=r" (dest) : "r" (ptr), "r" (modifier)); + + return dest; +} diff --git a/tools/testing/selftests/arm64/pauth/helper.h b/tools/testing/selftests/arm64/pauth/helper.h new file mode 100644 index 000000000000..652496c7b411 --- /dev/null +++ b/tools/testing/selftests/arm64/pauth/helper.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2020 ARM Limited */ + +#ifndef _HELPER_H_ +#define _HELPER_H_ + +#include <stdlib.h> + +#define NKEYS 5 + +struct signatures { + size_t keyia; + size_t keyib; + size_t keyda; + size_t keydb; + size_t keyg; +}; + +void pac_corruptor(void); + +/* PAuth sign a value with key ia and modifier value 0 */ +size_t keyia_sign(size_t val); +size_t keyib_sign(size_t val); +size_t keyda_sign(size_t val); +size_t keydb_sign(size_t val); +size_t keyg_sign(size_t val); + +#endif diff --git a/tools/testing/selftests/arm64/pauth/pac.c b/tools/testing/selftests/arm64/pauth/pac.c new file mode 100644 index 000000000000..592fe538506e --- /dev/null +++ b/tools/testing/selftests/arm64/pauth/pac.c @@ -0,0 +1,368 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (C) 2020 ARM Limited + +#define _GNU_SOURCE + +#include <sys/auxv.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <signal.h> +#include <setjmp.h> +#include <sched.h> + +#include "../../kselftest_harness.h" +#include "helper.h" + +#define PAC_COLLISION_ATTEMPTS 10 +/* + * The kernel sets TBID by default. So bits 55 and above should remain + * untouched no matter what. + * The VA space size is 48 bits. Bigger is opt-in. + */ +#define PAC_MASK (~0xff80ffffffffffff) +#define ARBITRARY_VALUE (0x1234) +#define ASSERT_PAUTH_ENABLED() \ +do { \ + unsigned long hwcaps = getauxval(AT_HWCAP); \ + /* data key instructions are not in NOP space. This prevents a SIGILL */ \ + ASSERT_NE(0, hwcaps & HWCAP_PACA) TH_LOG("PAUTH not enabled"); \ +} while (0) +#define ASSERT_GENERIC_PAUTH_ENABLED() \ +do { \ + unsigned long hwcaps = getauxval(AT_HWCAP); \ + /* generic key instructions are not in NOP space. This prevents a SIGILL */ \ + ASSERT_NE(0, hwcaps & HWCAP_PACG) TH_LOG("Generic PAUTH not enabled"); \ +} while (0) + +void sign_specific(struct signatures *sign, size_t val) +{ + sign->keyia = keyia_sign(val); + sign->keyib = keyib_sign(val); + sign->keyda = keyda_sign(val); + sign->keydb = keydb_sign(val); +} + +void sign_all(struct signatures *sign, size_t val) +{ + sign->keyia = keyia_sign(val); + sign->keyib = keyib_sign(val); + sign->keyda = keyda_sign(val); + sign->keydb = keydb_sign(val); + sign->keyg = keyg_sign(val); +} + +int n_same(struct signatures *old, struct signatures *new, int nkeys) +{ + int res = 0; + + res += old->keyia == new->keyia; + res += old->keyib == new->keyib; + res += old->keyda == new->keyda; + res += old->keydb == new->keydb; + if (nkeys == NKEYS) + res += old->keyg == new->keyg; + + return res; +} + +int n_same_single_set(struct signatures *sign, int nkeys) +{ + size_t vals[nkeys]; + int same = 0; + + vals[0] = sign->keyia & PAC_MASK; + vals[1] = sign->keyib & PAC_MASK; + vals[2] = sign->keyda & PAC_MASK; + vals[3] = sign->keydb & PAC_MASK; + + if (nkeys >= 4) + vals[4] = sign->keyg & PAC_MASK; + + for (int i = 0; i < nkeys - 1; i++) { + for (int j = i + 1; j < nkeys; j++) { + if (vals[i] == vals[j]) + same += 1; + } + } + return same; +} + +int exec_sign_all(struct signatures *signed_vals, size_t val) +{ + int new_stdin[2]; + int new_stdout[2]; + int status; + int i; + ssize_t ret; + pid_t pid; + cpu_set_t mask; + + ret = pipe(new_stdin); + if (ret == -1) { + perror("pipe returned error"); + return -1; + } + + ret = pipe(new_stdout); + if (ret == -1) { + perror("pipe returned error"); + return -1; + } + + /* + * pin this process and all its children to a single CPU, so it can also + * guarantee a context switch with its child + */ + sched_getaffinity(0, sizeof(mask), &mask); + + for (i = 0; i < sizeof(cpu_set_t); i++) + if (CPU_ISSET(i, &mask)) + break; + + CPU_ZERO(&mask); + CPU_SET(i, &mask); + sched_setaffinity(0, sizeof(mask), &mask); + + pid = fork(); + // child + if (pid == 0) { + dup2(new_stdin[0], STDIN_FILENO); + if (ret == -1) { + perror("dup2 returned error"); + exit(1); + } + + dup2(new_stdout[1], STDOUT_FILENO); + if (ret == -1) { + perror("dup2 returned error"); + exit(1); + } + + close(new_stdin[0]); + close(new_stdin[1]); + close(new_stdout[0]); + close(new_stdout[1]); + + ret = execl("exec_target", "exec_target", (char *)NULL); + if (ret == -1) { + perror("exec returned error"); + exit(1); + } + } + + close(new_stdin[0]); + close(new_stdout[1]); + + ret = write(new_stdin[1], &val, sizeof(size_t)); + if (ret == -1) { + perror("write returned error"); + return -1; + } + + /* + * wait for the worker to finish, so that read() reads all data + * will also context switch with worker so that this function can be used + * for context switch tests + */ + waitpid(pid, &status, 0); + if (WIFEXITED(status) == 0) { + fprintf(stderr, "worker exited unexpectedly\n"); + return -1; + } + if (WEXITSTATUS(status) != 0) { + fprintf(stderr, "worker exited with error\n"); + return -1; + } + + ret = read(new_stdout[0], signed_vals, sizeof(struct signatures)); + if (ret == -1) { + perror("read returned error"); + return -1; + } + + return 0; +} + +sigjmp_buf jmpbuf; +void pac_signal_handler(int signum, siginfo_t *si, void *uc) +{ + if (signum == SIGSEGV || signum == SIGILL) + siglongjmp(jmpbuf, 1); +} + +/* check that a corrupted PAC results in SIGSEGV or SIGILL */ +TEST(corrupt_pac) +{ + struct sigaction sa; + + ASSERT_PAUTH_ENABLED(); + if (sigsetjmp(jmpbuf, 1) == 0) { + sa.sa_sigaction = pac_signal_handler; + sa.sa_flags = SA_SIGINFO | SA_RESETHAND; + sigemptyset(&sa.sa_mask); + + sigaction(SIGSEGV, &sa, NULL); + sigaction(SIGILL, &sa, NULL); + + pac_corruptor(); + ASSERT_TRUE(0) TH_LOG("SIGSEGV/SIGILL signal did not occur"); + } +} + +/* + * There are no separate pac* and aut* controls so checking only the pac* + * instructions is sufficient + */ +TEST(pac_instructions_not_nop) +{ + size_t keyia = 0; + size_t keyib = 0; + size_t keyda = 0; + size_t keydb = 0; + + ASSERT_PAUTH_ENABLED(); + + for (int i = 0; i < PAC_COLLISION_ATTEMPTS; i++) { + keyia |= keyia_sign(i) & PAC_MASK; + keyib |= keyib_sign(i) & PAC_MASK; + keyda |= keyda_sign(i) & PAC_MASK; + keydb |= keydb_sign(i) & PAC_MASK; + } + + ASSERT_NE(0, keyia) TH_LOG("keyia instructions did nothing"); + ASSERT_NE(0, keyib) TH_LOG("keyib instructions did nothing"); + ASSERT_NE(0, keyda) TH_LOG("keyda instructions did nothing"); + ASSERT_NE(0, keydb) TH_LOG("keydb instructions did nothing"); +} + +TEST(pac_instructions_not_nop_generic) +{ + size_t keyg = 0; + + ASSERT_GENERIC_PAUTH_ENABLED(); + + for (int i = 0; i < PAC_COLLISION_ATTEMPTS; i++) + keyg |= keyg_sign(i) & PAC_MASK; + + ASSERT_NE(0, keyg) TH_LOG("keyg instructions did nothing"); +} + +TEST(single_thread_different_keys) +{ + int same = 10; + int nkeys = NKEYS; + int tmp; + struct signatures signed_vals; + unsigned long hwcaps = getauxval(AT_HWCAP); + + /* generic and data key instructions are not in NOP space. This prevents a SIGILL */ + ASSERT_NE(0, hwcaps & HWCAP_PACA) TH_LOG("PAUTH not enabled"); + if (!(hwcaps & HWCAP_PACG)) { + TH_LOG("WARNING: Generic PAUTH not enabled. Skipping generic key checks"); + nkeys = NKEYS - 1; + } + + /* + * In Linux the PAC field can be up to 7 bits wide. Even if keys are + * different, there is about 5% chance for PACs to collide with + * different addresses. This chance rapidly increases with fewer bits + * allocated for the PAC (e.g. wider address). A comparison of the keys + * directly will be more reliable. + * All signed values need to be different at least once out of n + * attempts to be certain that the keys are different + */ + for (int i = 0; i < PAC_COLLISION_ATTEMPTS; i++) { + if (nkeys == NKEYS) + sign_all(&signed_vals, i); + else + sign_specific(&signed_vals, i); + + tmp = n_same_single_set(&signed_vals, nkeys); + if (tmp < same) + same = tmp; + } + + ASSERT_EQ(0, same) TH_LOG("%d keys clashed every time", same); +} + +/* + * fork() does not change keys. Only exec() does so call a worker program. + * Its only job is to sign a value and report back the resutls + */ +TEST(exec_changed_keys) +{ + struct signatures new_keys; + struct signatures old_keys; + int ret; + int same = 10; + int nkeys = NKEYS; + unsigned long hwcaps = getauxval(AT_HWCAP); + + /* generic and data key instructions are not in NOP space. This prevents a SIGILL */ + ASSERT_NE(0, hwcaps & HWCAP_PACA) TH_LOG("PAUTH not enabled"); + if (!(hwcaps & HWCAP_PACG)) { + TH_LOG("WARNING: Generic PAUTH not enabled. Skipping generic key checks"); + nkeys = NKEYS - 1; + } + + for (int i = 0; i < PAC_COLLISION_ATTEMPTS; i++) { + ret = exec_sign_all(&new_keys, i); + ASSERT_EQ(0, ret) TH_LOG("failed to run worker"); + + if (nkeys == NKEYS) + sign_all(&old_keys, i); + else + sign_specific(&old_keys, i); + + ret = n_same(&old_keys, &new_keys, nkeys); + if (ret < same) + same = ret; + } + + ASSERT_EQ(0, same) TH_LOG("exec() did not change %d keys", same); +} + +TEST(context_switch_keep_keys) +{ + int ret; + struct signatures trash; + struct signatures before; + struct signatures after; + + ASSERT_PAUTH_ENABLED(); + + sign_specific(&before, ARBITRARY_VALUE); + + /* will context switch with a process with different keys at least once */ + ret = exec_sign_all(&trash, ARBITRARY_VALUE); + ASSERT_EQ(0, ret) TH_LOG("failed to run worker"); + + sign_specific(&after, ARBITRARY_VALUE); + + ASSERT_EQ(before.keyia, after.keyia) TH_LOG("keyia changed after context switching"); + ASSERT_EQ(before.keyib, after.keyib) TH_LOG("keyib changed after context switching"); + ASSERT_EQ(before.keyda, after.keyda) TH_LOG("keyda changed after context switching"); + ASSERT_EQ(before.keydb, after.keydb) TH_LOG("keydb changed after context switching"); +} + +TEST(context_switch_keep_keys_generic) +{ + int ret; + struct signatures trash; + size_t before; + size_t after; + + ASSERT_GENERIC_PAUTH_ENABLED(); + + before = keyg_sign(ARBITRARY_VALUE); + + /* will context switch with a process with different keys at least once */ + ret = exec_sign_all(&trash, ARBITRARY_VALUE); + ASSERT_EQ(0, ret) TH_LOG("failed to run worker"); + + after = keyg_sign(ARBITRARY_VALUE); + + ASSERT_EQ(before, after) TH_LOG("keyg changed after context switching"); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/arm64/pauth/pac_corruptor.S b/tools/testing/selftests/arm64/pauth/pac_corruptor.S new file mode 100644 index 000000000000..aa6588050752 --- /dev/null +++ b/tools/testing/selftests/arm64/pauth/pac_corruptor.S @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2020 ARM Limited */ + +.global pac_corruptor + +.text +/* + * Corrupting a single bit of the PAC ensures the authentication will fail. It + * also guarantees no possible collision. TCR_EL1.TBI0 is set by default so no + * top byte PAC is tested + */ + pac_corruptor: + paciasp + + /* corrupt the top bit of the PAC */ + eor lr, lr, #1 << 53 + + autiasp + ret diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index 9a0946ddb705..3ab1200e172f 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -13,9 +13,7 @@ test_verifier_log feature test_sock test_sock_addr -test_sock_fields urandom_read -test_btf test_sockmap test_lirc_mode2_user get_cgroup_id_user diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index daf186f88a63..542768f5195b 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -33,9 +33,9 @@ LDLIBS += -lcap -lelf -lz -lrt -lpthread # Order correspond to 'make run_tests' order TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \ test_verifier_log test_dev_cgroup test_tcpbpf_user \ - test_sock test_btf test_sockmap get_cgroup_id_user test_socket_cookie \ + test_sock test_sockmap get_cgroup_id_user test_socket_cookie \ test_cgroup_storage \ - test_netcnt test_tcpnotify_user test_sock_fields test_sysctl \ + test_netcnt test_tcpnotify_user test_sysctl \ test_progs-no_alu32 \ test_current_pid_tgid_new_ns @@ -68,7 +68,8 @@ TEST_PROGS := test_kmod.sh \ test_tc_edt.sh \ test_xdping.sh \ test_bpftool_build.sh \ - test_bpftool.sh + test_bpftool.sh \ + test_bpftool_metadata.sh \ TEST_PROGS_EXTENDED := with_addr.sh \ with_tunnels.sh \ @@ -176,6 +177,11 @@ $(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \ $(Q)$(MAKE) $(submake_extras) -C $(BPFTOOLDIR) \ OUTPUT=$(BUILD_DIR)/bpftool/ \ prefix= DESTDIR=$(SCRATCH_DIR)/ install + $(Q)mkdir -p $(BUILD_DIR)/bpftool/Documentation + $(Q)RST2MAN_OPTS="--exit-status=1" $(MAKE) $(submake_extras) \ + -C $(BPFTOOLDIR)/Documentation \ + OUTPUT=$(BUILD_DIR)/bpftool/Documentation/ \ + prefix= DESTDIR=$(SCRATCH_DIR)/ install $(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ ../../../include/uapi/linux/bpf.h \ @@ -316,7 +322,7 @@ $(TRUNNER_BPF_OBJS): $(TRUNNER_OUTPUT)/%.o: \ $(TRUNNER_BPF_PROGS_DIR)/%.c \ $(TRUNNER_BPF_PROGS_DIR)/*.h \ $$(INCLUDE_DIR)/vmlinux.h \ - $$(BPFOBJ) | $(TRUNNER_OUTPUT) + $(wildcard $(BPFDIR)/bpf_*.h) | $(TRUNNER_OUTPUT) $$(call $(TRUNNER_BPF_BUILD_RULE),$$<,$$@, \ $(TRUNNER_BPF_CFLAGS), \ $(TRUNNER_BPF_LDFLAGS)) diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst index e885d351595f..ac9eda830187 100644 --- a/tools/testing/selftests/bpf/README.rst +++ b/tools/testing/selftests/bpf/README.rst @@ -7,6 +7,44 @@ General instructions on running selftests can be found in Additional information about selftest failures are documented here. +profiler[23] test failures with clang/llvm <12.0.0 +================================================== + +With clang/llvm <12.0.0, the profiler[23] test may fail. +The symptom looks like + +.. code-block:: c + + // r9 is a pointer to map_value + // r7 is a scalar + 17: bf 96 00 00 00 00 00 00 r6 = r9 + 18: 0f 76 00 00 00 00 00 00 r6 += r7 + math between map_value pointer and register with unbounded min value is not allowed + + // the instructions below will not be seen in the verifier log + 19: a5 07 01 00 01 01 00 00 if r7 < 257 goto +1 + 20: bf 96 00 00 00 00 00 00 r6 = r9 + // r6 is used here + +The verifier will reject such code with above error. +At insn 18 the r7 is indeed unbounded. The later insn 19 checks the bounds and +the insn 20 undoes map_value addition. It is currently impossible for the +verifier to understand such speculative pointer arithmetic. +Hence + https://reviews.llvm.org/D85570 +addresses it on the compiler side. It was committed on llvm 12. + +The corresponding C code +.. code-block:: c + + for (int i = 0; i < MAX_CGROUPS_PATH_DEPTH; i++) { + filepart_length = bpf_probe_read_str(payload, ...); + if (filepart_length <= MAX_PATH) { + barrier_var(filepart_length); // workaround + payload += filepart_length; + } + } + bpf_iter test failures with clang/llvm 10.0.0 ============================================= @@ -43,3 +81,24 @@ This is due to a llvm BPF backend bug. The fix https://reviews.llvm.org/D78466 has been pushed to llvm 10.x release branch and will be available in 10.0.1. The fix is available in llvm 11.0.0 trunk. + +BPF CO-RE-based tests and Clang version +======================================= + +A set of selftests use BPF target-specific built-ins, which might require +bleeding-edge Clang versions (Clang 12 nightly at this time). + +Few sub-tests of core_reloc test suit (part of test_progs test runner) require +the following built-ins, listed with corresponding Clang diffs introducing +them to Clang/LLVM. These sub-tests are going to be skipped if Clang is too +old to support them, they shouldn't cause build failures or runtime test +failures: + + - __builtin_btf_type_id() ([0], [1], [2]); + - __builtin_preserve_type_info(), __builtin_preserve_enum_value() ([3], [4]). + + [0] https://reviews.llvm.org/D74572 + [1] https://reviews.llvm.org/D74668 + [2] https://reviews.llvm.org/D85174 + [3] https://reviews.llvm.org/D83878 + [4] https://reviews.llvm.org/D83242 diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c index 944ad4721c83..332ed2f7b402 100644 --- a/tools/testing/selftests/bpf/bench.c +++ b/tools/testing/selftests/bpf/bench.c @@ -311,12 +311,12 @@ extern const struct bench bench_rename_kretprobe; extern const struct bench bench_rename_rawtp; extern const struct bench bench_rename_fentry; extern const struct bench bench_rename_fexit; -extern const struct bench bench_rename_fmodret; extern const struct bench bench_trig_base; extern const struct bench bench_trig_tp; extern const struct bench bench_trig_rawtp; extern const struct bench bench_trig_kprobe; extern const struct bench bench_trig_fentry; +extern const struct bench bench_trig_fentry_sleep; extern const struct bench bench_trig_fmodret; extern const struct bench bench_rb_libbpf; extern const struct bench bench_rb_custom; @@ -332,12 +332,12 @@ static const struct bench *benchs[] = { &bench_rename_rawtp, &bench_rename_fentry, &bench_rename_fexit, - &bench_rename_fmodret, &bench_trig_base, &bench_trig_tp, &bench_trig_rawtp, &bench_trig_kprobe, &bench_trig_fentry, + &bench_trig_fentry_sleep, &bench_trig_fmodret, &bench_rb_libbpf, &bench_rb_custom, @@ -462,4 +462,3 @@ int main(int argc, char **argv) return 0; } - diff --git a/tools/testing/selftests/bpf/benchs/bench_rename.c b/tools/testing/selftests/bpf/benchs/bench_rename.c index e74cff40f4fe..a967674098ad 100644 --- a/tools/testing/selftests/bpf/benchs/bench_rename.c +++ b/tools/testing/selftests/bpf/benchs/bench_rename.c @@ -106,12 +106,6 @@ static void setup_fexit() attach_bpf(ctx.skel->progs.prog5); } -static void setup_fmodret() -{ - setup_ctx(); - attach_bpf(ctx.skel->progs.prog6); -} - static void *consumer(void *input) { return NULL; @@ -182,14 +176,3 @@ const struct bench bench_rename_fexit = { .report_progress = hits_drops_report_progress, .report_final = hits_drops_report_final, }; - -const struct bench bench_rename_fmodret = { - .name = "rename-fmodret", - .validate = validate, - .setup = setup_fmodret, - .producer_thread = producer, - .consumer_thread = consumer, - .measure = measure, - .report_progress = hits_drops_report_progress, - .report_final = hits_drops_report_final, -}; diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c index 49c22832f216..2a0b6c9885a4 100644 --- a/tools/testing/selftests/bpf/benchs/bench_trigger.c +++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c @@ -90,6 +90,12 @@ static void trigger_fentry_setup() attach_bpf(ctx.skel->progs.bench_trigger_fentry); } +static void trigger_fentry_sleep_setup() +{ + setup_ctx(); + attach_bpf(ctx.skel->progs.bench_trigger_fentry_sleep); +} + static void trigger_fmodret_setup() { setup_ctx(); @@ -155,6 +161,17 @@ const struct bench bench_trig_fentry = { .report_final = hits_drops_report_final, }; +const struct bench bench_trig_fentry_sleep = { + .name = "trig-fentry-sleep", + .validate = trigger_validate, + .setup = trigger_fentry_sleep_setup, + .producer_thread = trigger_producer, + .consumer_thread = trigger_consumer, + .measure = trigger_measure, + .report_progress = hits_drops_report_progress, + .report_final = hits_drops_report_final, +}; + const struct bench bench_trig_fmodret = { .name = "trig-fmodret", .validate = trigger_validate, diff --git a/tools/testing/selftests/bpf/bpf_tcp_helpers.h b/tools/testing/selftests/bpf/bpf_tcp_helpers.h index 5bf2fe9b1efa..2915664c335d 100644 --- a/tools/testing/selftests/bpf/bpf_tcp_helpers.h +++ b/tools/testing/selftests/bpf/bpf_tcp_helpers.h @@ -16,6 +16,7 @@ BPF_PROG(name, args) struct sock_common { unsigned char skc_state; + __u16 skc_num; } __attribute__((preserve_access_index)); enum sk_pacing { @@ -45,6 +46,10 @@ struct inet_connection_sock { __u64 icsk_ca_priv[104 / sizeof(__u64)]; } __attribute__((preserve_access_index)); +struct request_sock { + struct sock_common __req_common; +} __attribute__((preserve_access_index)); + struct tcp_sock { struct inet_connection_sock inet_conn; @@ -115,14 +120,6 @@ enum tcp_ca_event { CA_EVENT_ECN_IS_CE = 5, }; -enum tcp_ca_state { - TCP_CA_Open = 0, - TCP_CA_Disorder = 1, - TCP_CA_CWR = 2, - TCP_CA_Recovery = 3, - TCP_CA_Loss = 4 -}; - struct ack_sample { __u32 pkts_acked; __s32 rtt_us; diff --git a/tools/testing/selftests/bpf/flow_dissector_load.h b/tools/testing/selftests/bpf/flow_dissector_load.h index daeaeb518894..7290401ec172 100644 --- a/tools/testing/selftests/bpf/flow_dissector_load.h +++ b/tools/testing/selftests/bpf/flow_dissector_load.h @@ -23,7 +23,13 @@ static inline int bpf_flow_load(struct bpf_object **obj, if (ret) return ret; - main_prog = bpf_object__find_program_by_title(*obj, section_name); + main_prog = NULL; + bpf_object__for_each_program(prog, *obj) { + if (strcmp(section_name, bpf_program__section_name(prog)) == 0) { + main_prog = prog; + break; + } + } if (!main_prog) return -1; diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c index f56655690f9b..12ee40284da0 100644 --- a/tools/testing/selftests/bpf/network_helpers.c +++ b/tools/testing/selftests/bpf/network_helpers.c @@ -104,6 +104,43 @@ error_close: return -1; } +int fastopen_connect(int server_fd, const char *data, unsigned int data_len, + int timeout_ms) +{ + struct sockaddr_storage addr; + socklen_t addrlen = sizeof(addr); + struct sockaddr_in *addr_in; + int fd, ret; + + if (getsockname(server_fd, (struct sockaddr *)&addr, &addrlen)) { + log_err("Failed to get server addr"); + return -1; + } + + addr_in = (struct sockaddr_in *)&addr; + fd = socket(addr_in->sin_family, SOCK_STREAM, 0); + if (fd < 0) { + log_err("Failed to create client socket"); + return -1; + } + + if (settimeo(fd, timeout_ms)) + goto error_close; + + ret = sendto(fd, data, data_len, MSG_FASTOPEN, (struct sockaddr *)&addr, + addrlen); + if (ret != data_len) { + log_err("sendto(data, %u) != %d\n", data_len, ret); + goto error_close; + } + + return fd; + +error_close: + save_errno_close(fd); + return -1; +} + static int connect_fd_to_addr(int fd, const struct sockaddr_storage *addr, socklen_t addrlen) diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h index c3728f6667e4..7205f8afdba1 100644 --- a/tools/testing/selftests/bpf/network_helpers.h +++ b/tools/testing/selftests/bpf/network_helpers.h @@ -37,6 +37,8 @@ int start_server(int family, int type, const char *addr, __u16 port, int timeout_ms); int connect_to_fd(int server_fd, int timeout_ms); int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms); +int fastopen_connect(int server_fd, const char *data, unsigned int data_len, + int timeout_ms); int make_sockaddr(int family, const char *addr_str, __u16 port, struct sockaddr_storage *addr, socklen_t *len); diff --git a/tools/testing/selftests/bpf/prog_tests/align.c b/tools/testing/selftests/bpf/prog_tests/align.c index c548aded6585..52414058a627 100644 --- a/tools/testing/selftests/bpf/prog_tests/align.c +++ b/tools/testing/selftests/bpf/prog_tests/align.c @@ -195,13 +195,13 @@ static struct bpf_align_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { {7, "R3_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, - {8, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {8, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"}, {9, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, - {10, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {10, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"}, {11, "R4_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"}, - {12, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {12, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"}, {13, "R4_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, - {14, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {14, "R4_w=inv(id=1,umax_value=255,var_off=(0x0; 0xff))"}, {15, "R4_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"}, {16, "R4_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"}, }, @@ -518,7 +518,7 @@ static struct bpf_align_test tests[] = { * the total offset is 4-byte aligned and meets the * load's requirements. */ - {20, "R5=pkt(id=1,off=0,r=4,umin_value=2,umax_value=1034,var_off=(0x2; 0x7fc)"}, + {20, "R5=pkt(id=2,off=0,r=4,umin_value=2,umax_value=1034,var_off=(0x2; 0x7fc)"}, }, }, @@ -561,18 +561,18 @@ static struct bpf_align_test tests[] = { /* Adding 14 makes R6 be (4n+2) */ {11, "R6_w=inv(id=0,umin_value=14,umax_value=74,var_off=(0x2; 0x7c))"}, /* Subtracting from packet pointer overflows ubounds */ - {13, "R5_w=pkt(id=1,off=0,r=8,umin_value=18446744073709551542,umax_value=18446744073709551602,var_off=(0xffffffffffffff82; 0x7c)"}, + {13, "R5_w=pkt(id=2,off=0,r=8,umin_value=18446744073709551542,umax_value=18446744073709551602,var_off=(0xffffffffffffff82; 0x7c)"}, /* New unknown value in R7 is (4n), >= 76 */ {15, "R7_w=inv(id=0,umin_value=76,umax_value=1096,var_off=(0x0; 0x7fc))"}, /* Adding it to packet pointer gives nice bounds again */ - {16, "R5_w=pkt(id=2,off=0,r=0,umin_value=2,umax_value=1082,var_off=(0x2; 0xfffffffc)"}, + {16, "R5_w=pkt(id=3,off=0,r=0,umin_value=2,umax_value=1082,var_off=(0x2; 0xfffffffc)"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off (0) * which is 2. Then the variable offset is (4n+2), so * the total offset is 4-byte aligned and meets the * load's requirements. */ - {20, "R5=pkt(id=2,off=0,r=4,umin_value=2,umax_value=1082,var_off=(0x2; 0xfffffffc)"}, + {20, "R5=pkt(id=3,off=0,r=4,umin_value=2,umax_value=1082,var_off=(0x2; 0xfffffffc)"}, }, }, }; diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c index 7375d9a6d242..448885b95eed 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c @@ -7,6 +7,7 @@ #include "bpf_iter_task.skel.h" #include "bpf_iter_task_stack.skel.h" #include "bpf_iter_task_file.skel.h" +#include "bpf_iter_task_btf.skel.h" #include "bpf_iter_tcp4.skel.h" #include "bpf_iter_tcp6.skel.h" #include "bpf_iter_udp4.skel.h" @@ -132,20 +133,118 @@ static void test_task_stack(void) bpf_iter_task_stack__destroy(skel); } +static void *do_nothing(void *arg) +{ + pthread_exit(arg); +} + static void test_task_file(void) { struct bpf_iter_task_file *skel; + pthread_t thread_id; + void *ret; skel = bpf_iter_task_file__open_and_load(); if (CHECK(!skel, "bpf_iter_task_file__open_and_load", "skeleton open_and_load failed\n")) return; + skel->bss->tgid = getpid(); + + if (CHECK(pthread_create(&thread_id, NULL, &do_nothing, NULL), + "pthread_create", "pthread_create failed\n")) + goto done; + do_dummy_read(skel->progs.dump_task_file); + if (CHECK(pthread_join(thread_id, &ret) || ret != NULL, + "pthread_join", "pthread_join failed\n")) + goto done; + + CHECK(skel->bss->count != 0, "check_count", + "invalid non pthread file visit count %d\n", skel->bss->count); + +done: bpf_iter_task_file__destroy(skel); } +#define TASKBUFSZ 32768 + +static char taskbuf[TASKBUFSZ]; + +static int do_btf_read(struct bpf_iter_task_btf *skel) +{ + struct bpf_program *prog = skel->progs.dump_task_struct; + struct bpf_iter_task_btf__bss *bss = skel->bss; + int iter_fd = -1, len = 0, bufleft = TASKBUFSZ; + struct bpf_link *link; + char *buf = taskbuf; + int ret = 0; + + link = bpf_program__attach_iter(prog, NULL); + if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) + return ret; + + iter_fd = bpf_iter_create(bpf_link__fd(link)); + if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n")) + goto free_link; + + do { + len = read(iter_fd, buf, bufleft); + if (len > 0) { + buf += len; + bufleft -= len; + } + } while (len > 0); + + if (bss->skip) { + printf("%s:SKIP:no __builtin_btf_type_id\n", __func__); + ret = 1; + test__skip(); + goto free_link; + } + + if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno))) + goto free_link; + + CHECK(strstr(taskbuf, "(struct task_struct)") == NULL, + "check for btf representation of task_struct in iter data", + "struct task_struct not found"); +free_link: + if (iter_fd > 0) + close(iter_fd); + bpf_link__destroy(link); + return ret; +} + +static void test_task_btf(void) +{ + struct bpf_iter_task_btf__bss *bss; + struct bpf_iter_task_btf *skel; + int ret; + + skel = bpf_iter_task_btf__open_and_load(); + if (CHECK(!skel, "bpf_iter_task_btf__open_and_load", + "skeleton open_and_load failed\n")) + return; + + bss = skel->bss; + + ret = do_btf_read(skel); + if (ret) + goto cleanup; + + if (CHECK(bss->tasks == 0, "check if iterated over tasks", + "no task iteration, did BPF program run?\n")) + goto cleanup; + + CHECK(bss->seq_err != 0, "check for unexpected err", + "bpf_seq_printf_btf returned %ld", bss->seq_err); + +cleanup: + bpf_iter_task_btf__destroy(skel); +} + static void test_tcp4(void) { struct bpf_iter_tcp4 *skel; @@ -331,7 +430,7 @@ static void test_overflow(bool test_e2big_overflow, bool ret1) struct bpf_map_info map_info = {}; struct bpf_iter_test_kern4 *skel; struct bpf_link *link; - __u32 page_size; + __u32 iter_size; char *buf; skel = bpf_iter_test_kern4__open(); @@ -353,19 +452,19 @@ static void test_overflow(bool test_e2big_overflow, bool ret1) "map_creation failed: %s\n", strerror(errno))) goto free_map1; - /* bpf_seq_printf kernel buffer is one page, so one map + /* bpf_seq_printf kernel buffer is 8 pages, so one map * bpf_seq_write will mostly fill it, and the other map * will partially fill and then trigger overflow and need * bpf_seq_read restart. */ - page_size = sysconf(_SC_PAGE_SIZE); + iter_size = sysconf(_SC_PAGE_SIZE) << 3; if (test_e2big_overflow) { - skel->rodata->print_len = (page_size + 8) / 8; - expected_read_len = 2 * (page_size + 8); + skel->rodata->print_len = (iter_size + 8) / 8; + expected_read_len = 2 * (iter_size + 8); } else if (!ret1) { - skel->rodata->print_len = (page_size - 8) / 8; - expected_read_len = 2 * (page_size - 8); + skel->rodata->print_len = (iter_size - 8) / 8; + expected_read_len = 2 * (iter_size - 8); } else { skel->rodata->print_len = 1; expected_read_len = 2 * 8; @@ -936,6 +1035,8 @@ void test_bpf_iter(void) test_task_stack(); if (test__start_subtest("task_file")) test_task_file(); + if (test__start_subtest("task_btf")) + test_task_btf(); if (test__start_subtest("tcp4")) test_tcp4(); if (test__start_subtest("tcp6")) diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c index e9f2f12ba06b..e698ee6bb6c2 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c @@ -49,6 +49,7 @@ void test_bpf_verif_scale(void) { "test_verif_scale3.o", BPF_PROG_TYPE_SCHED_CLS }, { "pyperf_global.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, + { "pyperf_subprogs.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, /* full unroll by llvm */ { "pyperf50.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, @@ -86,6 +87,9 @@ void test_bpf_verif_scale(void) { "strobemeta_nounroll1.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, { "strobemeta_nounroll2.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, + /* non-inlined subprogs */ + { "strobemeta_subprogs.o", BPF_PROG_TYPE_RAW_TRACEPOINT }, + { "test_sysctl_loop1.o", BPF_PROG_TYPE_CGROUP_SYSCTL }, { "test_sysctl_loop2.o", BPF_PROG_TYPE_CGROUP_SYSCTL }, diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c index c75fc6447186..93162484c2ca 100644 --- a/tools/testing/selftests/bpf/test_btf.c +++ b/tools/testing/selftests/bpf/prog_tests/btf.c @@ -24,40 +24,17 @@ #include "bpf_rlimit.h" #include "bpf_util.h" -#include "test_btf.h" +#include "../test_btf.h" +#include "test_progs.h" #define MAX_INSNS 512 #define MAX_SUBPROGS 16 -static uint32_t pass_cnt; -static uint32_t error_cnt; -static uint32_t skip_cnt; +static int duration = 0; +static bool always_log; -#define CHECK(condition, format...) ({ \ - int __ret = !!(condition); \ - if (__ret) { \ - fprintf(stderr, "%s:%d:FAIL ", __func__, __LINE__); \ - fprintf(stderr, format); \ - } \ - __ret; \ -}) - -static int count_result(int err) -{ - if (err) - error_cnt++; - else - pass_cnt++; - - fprintf(stderr, "\n"); - return err; -} - -static int __base_pr(enum libbpf_print_level level __attribute__((unused)), - const char *format, va_list args) -{ - return vfprintf(stderr, format, args); -} +#undef CHECK +#define CHECK(condition, format...) _CHECK(condition, "check", duration, format) #define BTF_END_RAW 0xdeadbeef #define NAME_TBD 0xdeadb33f @@ -69,21 +46,6 @@ static int __base_pr(enum libbpf_print_level level __attribute__((unused)), #define MAX_NR_RAW_U32 1024 #define BTF_LOG_BUF_SIZE 65535 -static struct args { - unsigned int raw_test_num; - unsigned int file_test_num; - unsigned int get_info_test_num; - unsigned int info_raw_test_num; - unsigned int dedup_test_num; - bool raw_test; - bool file_test; - bool get_info_test; - bool pprint_test; - bool always_log; - bool info_raw_test; - bool dedup_test; -} args; - static char btf_log_buf[BTF_LOG_BUF_SIZE]; static struct btf_header hdr_tmpl = { @@ -3664,7 +3626,7 @@ done: return raw_btf; } -static int do_test_raw(unsigned int test_num) +static void do_test_raw(unsigned int test_num) { struct btf_raw_test *test = &raw_tests[test_num - 1]; struct bpf_create_map_attr create_attr = {}; @@ -3674,15 +3636,16 @@ static int do_test_raw(unsigned int test_num) void *raw_btf; int err; - fprintf(stderr, "BTF raw test[%u] (%s): ", test_num, test->descr); + if (!test__start_subtest(test->descr)) + return; + raw_btf = btf_raw_create(&hdr_tmpl, test->raw_types, test->str_sec, test->str_sec_size, &raw_btf_size, NULL); - if (!raw_btf) - return -1; + return; hdr = raw_btf; @@ -3694,7 +3657,7 @@ static int do_test_raw(unsigned int test_num) *btf_log_buf = '\0'; btf_fd = bpf_load_btf(raw_btf, raw_btf_size, btf_log_buf, BTF_LOG_BUF_SIZE, - args.always_log); + always_log); free(raw_btf); err = ((btf_fd == -1) != test->btf_load_err); @@ -3725,32 +3688,12 @@ static int do_test_raw(unsigned int test_num) map_fd, test->map_create_err); done: - if (!err) - fprintf(stderr, "OK"); - - if (*btf_log_buf && (err || args.always_log)) + if (*btf_log_buf && (err || always_log)) fprintf(stderr, "\n%s", btf_log_buf); - if (btf_fd != -1) close(btf_fd); if (map_fd != -1) close(map_fd); - - return err; -} - -static int test_raw(void) -{ - unsigned int i; - int err = 0; - - if (args.raw_test_num) - return count_result(do_test_raw(args.raw_test_num)); - - for (i = 1; i <= ARRAY_SIZE(raw_tests); i++) - err |= count_result(do_test_raw(i)); - - return err; } struct btf_get_info_test { @@ -3814,11 +3757,6 @@ const struct btf_get_info_test get_info_tests[] = { }, }; -static inline __u64 ptr_to_u64(const void *ptr) -{ - return (__u64)(unsigned long)ptr; -} - static int test_big_btf_info(unsigned int test_num) { const struct btf_get_info_test *test = &get_info_tests[test_num - 1]; @@ -3851,7 +3789,7 @@ static int test_big_btf_info(unsigned int test_num) btf_fd = bpf_load_btf(raw_btf, raw_btf_size, btf_log_buf, BTF_LOG_BUF_SIZE, - args.always_log); + always_log); if (CHECK(btf_fd == -1, "errno:%d", errno)) { err = -1; goto done; @@ -3892,7 +3830,7 @@ static int test_big_btf_info(unsigned int test_num) fprintf(stderr, "OK"); done: - if (*btf_log_buf && (err || args.always_log)) + if (*btf_log_buf && (err || always_log)) fprintf(stderr, "\n%s", btf_log_buf); free(raw_btf); @@ -3939,7 +3877,7 @@ static int test_btf_id(unsigned int test_num) btf_fd[0] = bpf_load_btf(raw_btf, raw_btf_size, btf_log_buf, BTF_LOG_BUF_SIZE, - args.always_log); + always_log); if (CHECK(btf_fd[0] == -1, "errno:%d", errno)) { err = -1; goto done; @@ -4024,7 +3962,7 @@ static int test_btf_id(unsigned int test_num) fprintf(stderr, "OK"); done: - if (*btf_log_buf && (err || args.always_log)) + if (*btf_log_buf && (err || always_log)) fprintf(stderr, "\n%s", btf_log_buf); free(raw_btf); @@ -4039,7 +3977,7 @@ done: return err; } -static int do_test_get_info(unsigned int test_num) +static void do_test_get_info(unsigned int test_num) { const struct btf_get_info_test *test = &get_info_tests[test_num - 1]; unsigned int raw_btf_size, user_btf_size, expected_nbytes; @@ -4048,11 +3986,14 @@ static int do_test_get_info(unsigned int test_num) int btf_fd = -1, err, ret; uint32_t info_len; - fprintf(stderr, "BTF GET_INFO test[%u] (%s): ", - test_num, test->descr); + if (!test__start_subtest(test->descr)) + return; - if (test->special_test) - return test->special_test(test_num); + if (test->special_test) { + err = test->special_test(test_num); + if (CHECK(err, "failed: %d\n", err)) + return; + } raw_btf = btf_raw_create(&hdr_tmpl, test->raw_types, @@ -4061,7 +4002,7 @@ static int do_test_get_info(unsigned int test_num) &raw_btf_size, NULL); if (!raw_btf) - return -1; + return; *btf_log_buf = '\0'; @@ -4073,7 +4014,7 @@ static int do_test_get_info(unsigned int test_num) btf_fd = bpf_load_btf(raw_btf, raw_btf_size, btf_log_buf, BTF_LOG_BUF_SIZE, - args.always_log); + always_log); if (CHECK(btf_fd == -1, "errno:%d", errno)) { err = -1; goto done; @@ -4114,7 +4055,7 @@ static int do_test_get_info(unsigned int test_num) fprintf(stderr, "OK"); done: - if (*btf_log_buf && (err || args.always_log)) + if (*btf_log_buf && (err || always_log)) fprintf(stderr, "\n%s", btf_log_buf); free(raw_btf); @@ -4122,22 +4063,6 @@ done: if (btf_fd != -1) close(btf_fd); - - return err; -} - -static int test_get_info(void) -{ - unsigned int i; - int err = 0; - - if (args.get_info_test_num) - return count_result(do_test_get_info(args.get_info_test_num)); - - for (i = 1; i <= ARRAY_SIZE(get_info_tests); i++) - err |= count_result(do_test_get_info(i)); - - return err; } struct btf_file_test { @@ -4151,7 +4076,7 @@ static struct btf_file_test file_tests[] = { { .file = "test_btf_nokv.o", .btf_kv_notfound = true, }, }; -static int do_test_file(unsigned int test_num) +static void do_test_file(unsigned int test_num) { const struct btf_file_test *test = &file_tests[test_num - 1]; const char *expected_fnames[] = {"_dummy_tracepoint", @@ -4169,17 +4094,17 @@ static int do_test_file(unsigned int test_num) struct bpf_map *map; int i, err, prog_fd; - fprintf(stderr, "BTF libbpf test[%u] (%s): ", test_num, - test->file); + if (!test__start_subtest(test->file)) + return; btf = btf__parse_elf(test->file, &btf_ext); if (IS_ERR(btf)) { if (PTR_ERR(btf) == -ENOENT) { - fprintf(stderr, "SKIP. No ELF %s found", BTF_ELF_SEC); - skip_cnt++; - return 0; + printf("%s:SKIP: No ELF %s found", __func__, BTF_ELF_SEC); + test__skip(); + return; } - return PTR_ERR(btf); + return; } btf__free(btf); @@ -4188,7 +4113,7 @@ static int do_test_file(unsigned int test_num) obj = bpf_object__open(test->file); if (CHECK(IS_ERR(obj), "obj: %ld", PTR_ERR(obj))) - return PTR_ERR(obj); + return; prog = bpf_program__next(NULL, obj); if (CHECK(!prog, "Cannot find bpf_prog")) { @@ -4310,21 +4235,6 @@ skip: done: free(func_info); bpf_object__close(obj); - return err; -} - -static int test_file(void) -{ - unsigned int i; - int err = 0; - - if (args.file_test_num) - return count_result(do_test_file(args.file_test_num)); - - for (i = 1; i <= ARRAY_SIZE(file_tests); i++) - err |= count_result(do_test_file(i)); - - return err; } const char *pprint_enum_str[] = { @@ -4428,7 +4338,7 @@ static struct btf_raw_test pprint_test_template[] = { .value_size = sizeof(struct pprint_mapv), .key_type_id = 3, /* unsigned int */ .value_type_id = 16, /* struct pprint_mapv */ - .max_entries = 128 * 1024, + .max_entries = 128, }, { @@ -4493,7 +4403,7 @@ static struct btf_raw_test pprint_test_template[] = { .value_size = sizeof(struct pprint_mapv), .key_type_id = 3, /* unsigned int */ .value_type_id = 16, /* struct pprint_mapv */ - .max_entries = 128 * 1024, + .max_entries = 128, }, { @@ -4564,7 +4474,7 @@ static struct btf_raw_test pprint_test_template[] = { .value_size = sizeof(struct pprint_mapv), .key_type_id = 3, /* unsigned int */ .value_type_id = 16, /* struct pprint_mapv */ - .max_entries = 128 * 1024, + .max_entries = 128, }, #ifdef __SIZEOF_INT128__ @@ -4591,7 +4501,7 @@ static struct btf_raw_test pprint_test_template[] = { .value_size = sizeof(struct pprint_mapv_int128), .key_type_id = 1, .value_type_id = 4, - .max_entries = 128 * 1024, + .max_entries = 128, .mapv_kind = PPRINT_MAPV_KIND_INT128, }, #endif @@ -4790,7 +4700,7 @@ static int check_line(const char *expected_line, int nexpected_line, } -static int do_test_pprint(int test_num) +static void do_test_pprint(int test_num) { const struct btf_raw_test *test = &pprint_test_template[test_num]; enum pprint_mapv_kind_t mapv_kind = test->mapv_kind; @@ -4809,18 +4719,20 @@ static int do_test_pprint(int test_num) uint8_t *raw_btf; ssize_t nread; - fprintf(stderr, "%s(#%d)......", test->descr, test_num); + if (!test__start_subtest(test->descr)) + return; + raw_btf = btf_raw_create(&hdr_tmpl, test->raw_types, test->str_sec, test->str_sec_size, &raw_btf_size, NULL); if (!raw_btf) - return -1; + return; *btf_log_buf = '\0'; btf_fd = bpf_load_btf(raw_btf, raw_btf_size, btf_log_buf, BTF_LOG_BUF_SIZE, - args.always_log); + always_log); free(raw_btf); if (CHECK(btf_fd == -1, "errno:%d", errno)) { @@ -4971,7 +4883,7 @@ done: free(mapv); if (!err) fprintf(stderr, "OK"); - if (*btf_log_buf && (err || args.always_log)) + if (*btf_log_buf && (err || always_log)) fprintf(stderr, "\n%s", btf_log_buf); if (btf_fd != -1) close(btf_fd); @@ -4981,14 +4893,11 @@ done: fclose(pin_file); unlink(pin_path); free(line); - - return err; } -static int test_pprint(void) +static void test_pprint(void) { unsigned int i; - int err = 0; /* test various maps with the first test template */ for (i = 0; i < ARRAY_SIZE(pprint_tests_meta); i++) { @@ -4999,7 +4908,7 @@ static int test_pprint(void) pprint_test_template[0].lossless_map = pprint_tests_meta[i].lossless_map; pprint_test_template[0].percpu_map = pprint_tests_meta[i].percpu_map; - err |= count_result(do_test_pprint(0)); + do_test_pprint(0); } /* test rest test templates with the first map */ @@ -5010,10 +4919,8 @@ static int test_pprint(void) pprint_test_template[i].ordered_map = pprint_tests_meta[0].ordered_map; pprint_test_template[i].lossless_map = pprint_tests_meta[0].lossless_map; pprint_test_template[i].percpu_map = pprint_tests_meta[0].percpu_map; - err |= count_result(do_test_pprint(i)); + do_test_pprint(i); } - - return err; } #define BPF_LINE_INFO_ENC(insn_off, file_off, line_off, line_num, line_col) \ @@ -6178,7 +6085,7 @@ done: return err; } -static int do_test_info_raw(unsigned int test_num) +static void do_test_info_raw(unsigned int test_num) { const struct prog_info_raw_test *test = &info_raw_tests[test_num - 1]; unsigned int raw_btf_size, linfo_str_off, linfo_size; @@ -6187,18 +6094,19 @@ static int do_test_info_raw(unsigned int test_num) const char *ret_next_str; union bpf_attr attr = {}; - fprintf(stderr, "BTF prog info raw test[%u] (%s): ", test_num, test->descr); + if (!test__start_subtest(test->descr)) + return; + raw_btf = btf_raw_create(&hdr_tmpl, test->raw_types, test->str_sec, test->str_sec_size, &raw_btf_size, &ret_next_str); - if (!raw_btf) - return -1; + return; *btf_log_buf = '\0'; btf_fd = bpf_load_btf(raw_btf, raw_btf_size, btf_log_buf, BTF_LOG_BUF_SIZE, - args.always_log); + always_log); free(raw_btf); if (CHECK(btf_fd == -1, "invalid btf_fd errno:%d", errno)) { @@ -6206,7 +6114,7 @@ static int do_test_info_raw(unsigned int test_num) goto done; } - if (*btf_log_buf && args.always_log) + if (*btf_log_buf && always_log) fprintf(stderr, "\n%s", btf_log_buf); *btf_log_buf = '\0'; @@ -6261,10 +6169,7 @@ static int do_test_info_raw(unsigned int test_num) goto done; done: - if (!err) - fprintf(stderr, "OK"); - - if (*btf_log_buf && (err || args.always_log)) + if (*btf_log_buf && (err || always_log)) fprintf(stderr, "\n%s", btf_log_buf); if (btf_fd != -1) @@ -6274,22 +6179,6 @@ done: if (!IS_ERR(patched_linfo)) free(patched_linfo); - - return err; -} - -static int test_info_raw(void) -{ - unsigned int i; - int err = 0; - - if (args.info_raw_test_num) - return count_result(do_test_info_raw(args.info_raw_test_num)); - - for (i = 1; i <= ARRAY_SIZE(info_raw_tests); i++) - err |= count_result(do_test_info_raw(i)); - - return err; } struct btf_raw_data { @@ -6754,7 +6643,7 @@ static void dump_btf_strings(const char *strs, __u32 len) } } -static int do_test_dedup(unsigned int test_num) +static void do_test_dedup(unsigned int test_num) { const struct btf_dedup_test *test = &dedup_tests[test_num - 1]; __u32 test_nr_types, expect_nr_types, test_btf_size, expect_btf_size; @@ -6769,13 +6658,15 @@ static int do_test_dedup(unsigned int test_num) void *raw_btf; int err = 0, i; - fprintf(stderr, "BTF dedup test[%u] (%s):", test_num, test->descr); + if (!test__start_subtest(test->descr)) + return; raw_btf = btf_raw_create(&hdr_tmpl, test->input.raw_types, test->input.str_sec, test->input.str_sec_size, &raw_btf_size, &ret_test_next_str); if (!raw_btf) - return -1; + return; + test_btf = btf__new((__u8 *)raw_btf, raw_btf_size); free(raw_btf); if (CHECK(IS_ERR(test_btf), "invalid test_btf errno:%ld", @@ -6789,7 +6680,7 @@ static int do_test_dedup(unsigned int test_num) test->expect.str_sec_size, &raw_btf_size, &ret_expect_next_str); if (!raw_btf) - return -1; + return; expect_btf = btf__new((__u8 *)raw_btf, raw_btf_size); free(raw_btf); if (CHECK(IS_ERR(expect_btf), "invalid expect_btf errno:%ld", @@ -6894,174 +6785,27 @@ static int do_test_dedup(unsigned int test_num) } done: - if (!err) - fprintf(stderr, "OK"); if (!IS_ERR(test_btf)) btf__free(test_btf); if (!IS_ERR(expect_btf)) btf__free(expect_btf); - - return err; } -static int test_dedup(void) +void test_btf(void) { - unsigned int i; - int err = 0; + int i; - if (args.dedup_test_num) - return count_result(do_test_dedup(args.dedup_test_num)); + always_log = env.verbosity > VERBOSE_NONE; + for (i = 1; i <= ARRAY_SIZE(raw_tests); i++) + do_test_raw(i); + for (i = 1; i <= ARRAY_SIZE(get_info_tests); i++) + do_test_get_info(i); + for (i = 1; i <= ARRAY_SIZE(file_tests); i++) + do_test_file(i); + for (i = 1; i <= ARRAY_SIZE(info_raw_tests); i++) + do_test_info_raw(i); for (i = 1; i <= ARRAY_SIZE(dedup_tests); i++) - err |= count_result(do_test_dedup(i)); - - return err; -} - -static void usage(const char *cmd) -{ - fprintf(stderr, "Usage: %s [-l] [[-r btf_raw_test_num (1 - %zu)] |\n" - "\t[-g btf_get_info_test_num (1 - %zu)] |\n" - "\t[-f btf_file_test_num (1 - %zu)] |\n" - "\t[-k btf_prog_info_raw_test_num (1 - %zu)] |\n" - "\t[-p (pretty print test)] |\n" - "\t[-d btf_dedup_test_num (1 - %zu)]]\n", - cmd, ARRAY_SIZE(raw_tests), ARRAY_SIZE(get_info_tests), - ARRAY_SIZE(file_tests), ARRAY_SIZE(info_raw_tests), - ARRAY_SIZE(dedup_tests)); -} - -static int parse_args(int argc, char **argv) -{ - const char *optstr = "hlpk:f:r:g:d:"; - int opt; - - while ((opt = getopt(argc, argv, optstr)) != -1) { - switch (opt) { - case 'l': - args.always_log = true; - break; - case 'f': - args.file_test_num = atoi(optarg); - args.file_test = true; - break; - case 'r': - args.raw_test_num = atoi(optarg); - args.raw_test = true; - break; - case 'g': - args.get_info_test_num = atoi(optarg); - args.get_info_test = true; - break; - case 'p': - args.pprint_test = true; - break; - case 'k': - args.info_raw_test_num = atoi(optarg); - args.info_raw_test = true; - break; - case 'd': - args.dedup_test_num = atoi(optarg); - args.dedup_test = true; - break; - case 'h': - usage(argv[0]); - exit(0); - default: - usage(argv[0]); - return -1; - } - } - - if (args.raw_test_num && - (args.raw_test_num < 1 || - args.raw_test_num > ARRAY_SIZE(raw_tests))) { - fprintf(stderr, "BTF raw test number must be [1 - %zu]\n", - ARRAY_SIZE(raw_tests)); - return -1; - } - - if (args.file_test_num && - (args.file_test_num < 1 || - args.file_test_num > ARRAY_SIZE(file_tests))) { - fprintf(stderr, "BTF file test number must be [1 - %zu]\n", - ARRAY_SIZE(file_tests)); - return -1; - } - - if (args.get_info_test_num && - (args.get_info_test_num < 1 || - args.get_info_test_num > ARRAY_SIZE(get_info_tests))) { - fprintf(stderr, "BTF get info test number must be [1 - %zu]\n", - ARRAY_SIZE(get_info_tests)); - return -1; - } - - if (args.info_raw_test_num && - (args.info_raw_test_num < 1 || - args.info_raw_test_num > ARRAY_SIZE(info_raw_tests))) { - fprintf(stderr, "BTF prog info raw test number must be [1 - %zu]\n", - ARRAY_SIZE(info_raw_tests)); - return -1; - } - - if (args.dedup_test_num && - (args.dedup_test_num < 1 || - args.dedup_test_num > ARRAY_SIZE(dedup_tests))) { - fprintf(stderr, "BTF dedup test number must be [1 - %zu]\n", - ARRAY_SIZE(dedup_tests)); - return -1; - } - - return 0; -} - -static void print_summary(void) -{ - fprintf(stderr, "PASS:%u SKIP:%u FAIL:%u\n", - pass_cnt - skip_cnt, skip_cnt, error_cnt); -} - -int main(int argc, char **argv) -{ - int err = 0; - - err = parse_args(argc, argv); - if (err) - return err; - - if (args.always_log) - libbpf_set_print(__base_pr); - - if (args.raw_test) - err |= test_raw(); - - if (args.get_info_test) - err |= test_get_info(); - - if (args.file_test) - err |= test_file(); - - if (args.pprint_test) - err |= test_pprint(); - - if (args.info_raw_test) - err |= test_info_raw(); - - if (args.dedup_test) - err |= test_dedup(); - - if (args.raw_test || args.get_info_test || args.file_test || - args.pprint_test || args.info_raw_test || args.dedup_test) - goto done; - - err |= test_raw(); - err |= test_get_info(); - err |= test_file(); - err |= test_info_raw(); - err |= test_dedup(); - -done: - print_summary(); - return err; + do_test_dedup(i); + test_pprint(); } diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c index 39fb81d9daeb..c60091ee8a21 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c @@ -129,6 +129,109 @@ done: return err; } +static char *dump_buf; +static size_t dump_buf_sz; +static FILE *dump_buf_file; + +void test_btf_dump_incremental(void) +{ + struct btf *btf = NULL; + struct btf_dump *d = NULL; + struct btf_dump_opts opts; + int id, err, i; + + dump_buf_file = open_memstream(&dump_buf, &dump_buf_sz); + if (!ASSERT_OK_PTR(dump_buf_file, "dump_memstream")) + return; + btf = btf__new_empty(); + if (!ASSERT_OK_PTR(btf, "new_empty")) + goto err_out; + opts.ctx = dump_buf_file; + d = btf_dump__new(btf, NULL, &opts, btf_dump_printf); + if (!ASSERT_OK(libbpf_get_error(d), "btf_dump__new")) + goto err_out; + + /* First, generate BTF corresponding to the following C code: + * + * enum { VAL = 1 }; + * + * struct s { int x; }; + * + */ + id = btf__add_enum(btf, NULL, 4); + ASSERT_EQ(id, 1, "enum_id"); + err = btf__add_enum_value(btf, "VAL", 1); + ASSERT_OK(err, "enum_val_ok"); + + id = btf__add_int(btf, "int", 4, BTF_INT_SIGNED); + ASSERT_EQ(id, 2, "int_id"); + + id = btf__add_struct(btf, "s", 4); + ASSERT_EQ(id, 3, "struct_id"); + err = btf__add_field(btf, "x", 2, 0, 0); + ASSERT_OK(err, "field_ok"); + + for (i = 1; i <= btf__get_nr_types(btf); i++) { + err = btf_dump__dump_type(d, i); + ASSERT_OK(err, "dump_type_ok"); + } + + fflush(dump_buf_file); + dump_buf[dump_buf_sz] = 0; /* some libc implementations don't do this */ + ASSERT_STREQ(dump_buf, +"enum {\n" +" VAL = 1,\n" +"};\n" +"\n" +"struct s {\n" +" int x;\n" +"};\n\n", "c_dump1"); + + /* Now, after dumping original BTF, append another struct that embeds + * anonymous enum. It also has a name conflict with the first struct: + * + * struct s___2 { + * enum { VAL___2 = 1 } x; + * struct s s; + * }; + * + * This will test that btf_dump'er maintains internal state properly. + * Note that VAL___2 enum value. It's because we've already emitted + * that enum as a global anonymous enum, so btf_dump will ensure that + * enum values don't conflict; + * + */ + fseek(dump_buf_file, 0, SEEK_SET); + + id = btf__add_struct(btf, "s", 4); + ASSERT_EQ(id, 4, "struct_id"); + err = btf__add_field(btf, "x", 1, 0, 0); + ASSERT_OK(err, "field_ok"); + err = btf__add_field(btf, "s", 3, 32, 0); + ASSERT_OK(err, "field_ok"); + + for (i = 1; i <= btf__get_nr_types(btf); i++) { + err = btf_dump__dump_type(d, i); + ASSERT_OK(err, "dump_type_ok"); + } + + fflush(dump_buf_file); + dump_buf[dump_buf_sz] = 0; /* some libc implementations don't do this */ + ASSERT_STREQ(dump_buf, +"struct s___2 {\n" +" enum {\n" +" VAL___2 = 1,\n" +" } x;\n" +" struct s s;\n" +"};\n\n" , "c_dump1"); + +err_out: + fclose(dump_buf_file); + free(dump_buf); + btf_dump__free(d); + btf__free(btf); +} + void test_btf_dump() { int i; @@ -140,4 +243,6 @@ void test_btf_dump() { test_btf_dump_case(i, &btf_dump_test_cases[i]); } + if (test__start_subtest("btf_dump: incremental")) + test_btf_dump_incremental(); } diff --git a/tools/testing/selftests/bpf/prog_tests/btf_endian.c b/tools/testing/selftests/bpf/prog_tests/btf_endian.c new file mode 100644 index 000000000000..8c52d72c876e --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/btf_endian.c @@ -0,0 +1,101 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#define _GNU_SOURCE +#include <string.h> +#include <byteswap.h> +#include <test_progs.h> +#include <bpf/btf.h> + +static int duration = 0; + +void test_btf_endian() { +#if __BYTE_ORDER == __LITTLE_ENDIAN + enum btf_endianness endian = BTF_LITTLE_ENDIAN; +#elif __BYTE_ORDER == __BIG_ENDIAN + enum btf_endianness endian = BTF_BIG_ENDIAN; +#else +#error "Unrecognized __BYTE_ORDER" +#endif + enum btf_endianness swap_endian = 1 - endian; + struct btf *btf = NULL, *swap_btf = NULL; + const void *raw_data, *swap_raw_data; + const struct btf_type *t; + const struct btf_header *hdr; + __u32 raw_sz, swap_raw_sz; + int var_id; + + /* Load BTF in native endianness */ + btf = btf__parse_elf("btf_dump_test_case_syntax.o", NULL); + if (!ASSERT_OK_PTR(btf, "parse_native_btf")) + goto err_out; + + ASSERT_EQ(btf__endianness(btf), endian, "endian"); + btf__set_endianness(btf, swap_endian); + ASSERT_EQ(btf__endianness(btf), swap_endian, "endian"); + + /* Get raw BTF data in non-native endianness... */ + raw_data = btf__get_raw_data(btf, &raw_sz); + if (!ASSERT_OK_PTR(raw_data, "raw_data_inverted")) + goto err_out; + + /* ...and open it as a new BTF instance */ + swap_btf = btf__new(raw_data, raw_sz); + if (!ASSERT_OK_PTR(swap_btf, "parse_swap_btf")) + goto err_out; + + ASSERT_EQ(btf__endianness(swap_btf), swap_endian, "endian"); + ASSERT_EQ(btf__get_nr_types(swap_btf), btf__get_nr_types(btf), "nr_types"); + + swap_raw_data = btf__get_raw_data(swap_btf, &swap_raw_sz); + if (!ASSERT_OK_PTR(swap_raw_data, "swap_raw_data")) + goto err_out; + + /* both raw data should be identical (with non-native endianness) */ + ASSERT_OK(memcmp(raw_data, swap_raw_data, raw_sz), "mem_identical"); + + /* make sure that at least BTF header data is really swapped */ + hdr = swap_raw_data; + ASSERT_EQ(bswap_16(hdr->magic), BTF_MAGIC, "btf_magic_swapped"); + ASSERT_EQ(raw_sz, swap_raw_sz, "raw_sizes"); + + /* swap it back to native endianness */ + btf__set_endianness(swap_btf, endian); + swap_raw_data = btf__get_raw_data(swap_btf, &swap_raw_sz); + if (!ASSERT_OK_PTR(swap_raw_data, "swap_raw_data")) + goto err_out; + + /* now header should have native BTF_MAGIC */ + hdr = swap_raw_data; + ASSERT_EQ(hdr->magic, BTF_MAGIC, "btf_magic_native"); + ASSERT_EQ(raw_sz, swap_raw_sz, "raw_sizes"); + + /* now modify original BTF */ + var_id = btf__add_var(btf, "some_var", BTF_VAR_GLOBAL_ALLOCATED, 1); + CHECK(var_id <= 0, "var_id", "failed %d\n", var_id); + + btf__free(swap_btf); + swap_btf = NULL; + + btf__set_endianness(btf, swap_endian); + raw_data = btf__get_raw_data(btf, &raw_sz); + if (!ASSERT_OK_PTR(raw_data, "raw_data_inverted")) + goto err_out; + + /* and re-open swapped raw data again */ + swap_btf = btf__new(raw_data, raw_sz); + if (!ASSERT_OK_PTR(swap_btf, "parse_swap_btf")) + goto err_out; + + ASSERT_EQ(btf__endianness(swap_btf), swap_endian, "endian"); + ASSERT_EQ(btf__get_nr_types(swap_btf), btf__get_nr_types(btf), "nr_types"); + + /* the type should appear as if it was stored in native endianness */ + t = btf__type_by_id(swap_btf, var_id); + ASSERT_STREQ(btf__str_by_offset(swap_btf, t->name_off), "some_var", "var_name"); + ASSERT_EQ(btf_var(t)->linkage, BTF_VAR_GLOBAL_ALLOCATED, "var_linkage"); + ASSERT_EQ(t->type, 1, "var_type"); + +err_out: + btf__free(btf); + btf__free(swap_btf); +} diff --git a/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c b/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c index 6ccecbd39476..76ebe4c250f1 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c +++ b/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c @@ -53,12 +53,12 @@ static int kern_sync_rcu(void) return err; } -void test_btf_map_in_map(void) +static void test_lookup_update(void) { - int err, key = 0, val, i; + int map1_fd, map2_fd, map3_fd, map4_fd, map5_fd, map1_id, map2_id; + int outer_arr_fd, outer_hash_fd, outer_arr_dyn_fd; struct test_btf_map_in_map *skel; - int outer_arr_fd, outer_hash_fd; - int fd, map1_fd, map2_fd, map1_id, map2_id; + int err, key = 0, val, i, fd; skel = test_btf_map_in_map__open_and_load(); if (CHECK(!skel, "skel_open", "failed to open&load skeleton\n")) @@ -70,32 +70,45 @@ void test_btf_map_in_map(void) map1_fd = bpf_map__fd(skel->maps.inner_map1); map2_fd = bpf_map__fd(skel->maps.inner_map2); + map3_fd = bpf_map__fd(skel->maps.inner_map3); + map4_fd = bpf_map__fd(skel->maps.inner_map4); + map5_fd = bpf_map__fd(skel->maps.inner_map5); + outer_arr_dyn_fd = bpf_map__fd(skel->maps.outer_arr_dyn); outer_arr_fd = bpf_map__fd(skel->maps.outer_arr); outer_hash_fd = bpf_map__fd(skel->maps.outer_hash); - /* inner1 = input, inner2 = input + 1 */ - map1_fd = bpf_map__fd(skel->maps.inner_map1); + /* inner1 = input, inner2 = input + 1, inner3 = input + 2 */ bpf_map_update_elem(outer_arr_fd, &key, &map1_fd, 0); - map2_fd = bpf_map__fd(skel->maps.inner_map2); bpf_map_update_elem(outer_hash_fd, &key, &map2_fd, 0); + bpf_map_update_elem(outer_arr_dyn_fd, &key, &map3_fd, 0); skel->bss->input = 1; usleep(1); - bpf_map_lookup_elem(map1_fd, &key, &val); CHECK(val != 1, "inner1", "got %d != exp %d\n", val, 1); bpf_map_lookup_elem(map2_fd, &key, &val); CHECK(val != 2, "inner2", "got %d != exp %d\n", val, 2); + bpf_map_lookup_elem(map3_fd, &key, &val); + CHECK(val != 3, "inner3", "got %d != exp %d\n", val, 3); - /* inner1 = input + 1, inner2 = input */ + /* inner2 = input, inner1 = input + 1, inner4 = input + 2 */ bpf_map_update_elem(outer_arr_fd, &key, &map2_fd, 0); bpf_map_update_elem(outer_hash_fd, &key, &map1_fd, 0); + bpf_map_update_elem(outer_arr_dyn_fd, &key, &map4_fd, 0); skel->bss->input = 3; usleep(1); - bpf_map_lookup_elem(map1_fd, &key, &val); CHECK(val != 4, "inner1", "got %d != exp %d\n", val, 4); bpf_map_lookup_elem(map2_fd, &key, &val); CHECK(val != 3, "inner2", "got %d != exp %d\n", val, 3); + bpf_map_lookup_elem(map4_fd, &key, &val); + CHECK(val != 5, "inner4", "got %d != exp %d\n", val, 5); + + /* inner5 = input + 2 */ + bpf_map_update_elem(outer_arr_dyn_fd, &key, &map5_fd, 0); + skel->bss->input = 5; + usleep(1); + bpf_map_lookup_elem(map5_fd, &key, &val); + CHECK(val != 7, "inner5", "got %d != exp %d\n", val, 7); for (i = 0; i < 5; i++) { val = i % 2 ? map1_fd : map2_fd; @@ -106,7 +119,13 @@ void test_btf_map_in_map(void) } err = bpf_map_update_elem(outer_arr_fd, &key, &val, 0); if (CHECK_FAIL(err)) { - printf("failed to update hash_of_maps on iter #%d\n", i); + printf("failed to update array_of_maps on iter #%d\n", i); + goto cleanup; + } + val = i % 2 ? map4_fd : map5_fd; + err = bpf_map_update_elem(outer_arr_dyn_fd, &key, &val, 0); + if (CHECK_FAIL(err)) { + printf("failed to update array_of_maps (dyn) on iter #%d\n", i); goto cleanup; } } @@ -143,3 +162,36 @@ void test_btf_map_in_map(void) cleanup: test_btf_map_in_map__destroy(skel); } + +static void test_diff_size(void) +{ + struct test_btf_map_in_map *skel; + int err, inner_map_fd, zero = 0; + + skel = test_btf_map_in_map__open_and_load(); + if (CHECK(!skel, "skel_open", "failed to open&load skeleton\n")) + return; + + inner_map_fd = bpf_map__fd(skel->maps.sockarr_sz2); + err = bpf_map_update_elem(bpf_map__fd(skel->maps.outer_sockarr), &zero, + &inner_map_fd, 0); + CHECK(err, "outer_sockarr inner map size check", + "cannot use a different size inner_map\n"); + + inner_map_fd = bpf_map__fd(skel->maps.inner_map_sz2); + err = bpf_map_update_elem(bpf_map__fd(skel->maps.outer_arr), &zero, + &inner_map_fd, 0); + CHECK(!err, "outer_arr inner map size check", + "incorrectly updated with a different size inner_map\n"); + + test_btf_map_in_map__destroy(skel); +} + +void test_btf_map_in_map(void) +{ + if (test__start_subtest("lookup_update")) + test_lookup_update(); + + if (test__start_subtest("diff_size")) + test_diff_size(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c b/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c new file mode 100644 index 000000000000..86ccf37e26b3 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c @@ -0,0 +1,234 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ + +#define _GNU_SOURCE +#include <netinet/in.h> +#include <arpa/inet.h> +#include <unistd.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <sched.h> +#include <linux/compiler.h> +#include <bpf/libbpf.h> + +#include "network_helpers.h" +#include "test_progs.h" +#include "test_btf_skc_cls_ingress.skel.h" + +static struct test_btf_skc_cls_ingress *skel; +struct sockaddr_in6 srv_sa6; +static __u32 duration; + +#define PROG_PIN_FILE "/sys/fs/bpf/btf_skc_cls_ingress" + +static int write_sysctl(const char *sysctl, const char *value) +{ + int fd, err, len; + + fd = open(sysctl, O_WRONLY); + if (CHECK(fd == -1, "open sysctl", "open(%s): %s (%d)\n", + sysctl, strerror(errno), errno)) + return -1; + + len = strlen(value); + err = write(fd, value, len); + close(fd); + if (CHECK(err != len, "write sysctl", + "write(%s, %s, %d): err:%d %s (%d)\n", + sysctl, value, len, err, strerror(errno), errno)) + return -1; + + return 0; +} + +static int prepare_netns(void) +{ + if (CHECK(unshare(CLONE_NEWNET), "create netns", + "unshare(CLONE_NEWNET): %s (%d)", + strerror(errno), errno)) + return -1; + + if (CHECK(system("ip link set dev lo up"), + "ip link set dev lo up", "failed\n")) + return -1; + + if (CHECK(system("tc qdisc add dev lo clsact"), + "tc qdisc add dev lo clsact", "failed\n")) + return -1; + + if (CHECK(system("tc filter add dev lo ingress bpf direct-action object-pinned " PROG_PIN_FILE), + "install tc cls-prog at ingress", "failed\n")) + return -1; + + /* Ensure 20 bytes options (i.e. in total 40 bytes tcp header) for the + * bpf_tcp_gen_syncookie() helper. + */ + if (write_sysctl("/proc/sys/net/ipv4/tcp_window_scaling", "1") || + write_sysctl("/proc/sys/net/ipv4/tcp_timestamps", "1") || + write_sysctl("/proc/sys/net/ipv4/tcp_sack", "1")) + return -1; + + return 0; +} + +static void reset_test(void) +{ + memset(&skel->bss->srv_sa6, 0, sizeof(skel->bss->srv_sa6)); + skel->bss->listen_tp_sport = 0; + skel->bss->req_sk_sport = 0; + skel->bss->recv_cookie = 0; + skel->bss->gen_cookie = 0; + skel->bss->linum = 0; +} + +static void print_err_line(void) +{ + if (skel->bss->linum) + printf("bpf prog error at line %u\n", skel->bss->linum); +} + +static void test_conn(void) +{ + int listen_fd = -1, cli_fd = -1, err; + socklen_t addrlen = sizeof(srv_sa6); + int srv_port; + + if (write_sysctl("/proc/sys/net/ipv4/tcp_syncookies", "1")) + return; + + listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0); + if (CHECK_FAIL(listen_fd == -1)) + return; + + err = getsockname(listen_fd, (struct sockaddr *)&srv_sa6, &addrlen); + if (CHECK(err, "getsockname(listen_fd)", "err:%d errno:%d\n", err, + errno)) + goto done; + memcpy(&skel->bss->srv_sa6, &srv_sa6, sizeof(srv_sa6)); + srv_port = ntohs(srv_sa6.sin6_port); + + cli_fd = connect_to_fd(listen_fd, 0); + if (CHECK_FAIL(cli_fd == -1)) + goto done; + + if (CHECK(skel->bss->listen_tp_sport != srv_port || + skel->bss->req_sk_sport != srv_port, + "Unexpected sk src port", + "listen_tp_sport:%u req_sk_sport:%u expected:%u\n", + skel->bss->listen_tp_sport, skel->bss->req_sk_sport, + srv_port)) + goto done; + + if (CHECK(skel->bss->gen_cookie || skel->bss->recv_cookie, + "Unexpected syncookie states", + "gen_cookie:%u recv_cookie:%u\n", + skel->bss->gen_cookie, skel->bss->recv_cookie)) + goto done; + + CHECK(skel->bss->linum, "bpf prog detected error", "at line %u\n", + skel->bss->linum); + +done: + if (listen_fd != -1) + close(listen_fd); + if (cli_fd != -1) + close(cli_fd); +} + +static void test_syncookie(void) +{ + int listen_fd = -1, cli_fd = -1, err; + socklen_t addrlen = sizeof(srv_sa6); + int srv_port; + + /* Enforce syncookie mode */ + if (write_sysctl("/proc/sys/net/ipv4/tcp_syncookies", "2")) + return; + + listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0); + if (CHECK_FAIL(listen_fd == -1)) + return; + + err = getsockname(listen_fd, (struct sockaddr *)&srv_sa6, &addrlen); + if (CHECK(err, "getsockname(listen_fd)", "err:%d errno:%d\n", err, + errno)) + goto done; + memcpy(&skel->bss->srv_sa6, &srv_sa6, sizeof(srv_sa6)); + srv_port = ntohs(srv_sa6.sin6_port); + + cli_fd = connect_to_fd(listen_fd, 0); + if (CHECK_FAIL(cli_fd == -1)) + goto done; + + if (CHECK(skel->bss->listen_tp_sport != srv_port, + "Unexpected tp src port", + "listen_tp_sport:%u expected:%u\n", + skel->bss->listen_tp_sport, srv_port)) + goto done; + + if (CHECK(skel->bss->req_sk_sport, + "Unexpected req_sk src port", + "req_sk_sport:%u expected:0\n", + skel->bss->req_sk_sport)) + goto done; + + if (CHECK(!skel->bss->gen_cookie || + skel->bss->gen_cookie != skel->bss->recv_cookie, + "Unexpected syncookie states", + "gen_cookie:%u recv_cookie:%u\n", + skel->bss->gen_cookie, skel->bss->recv_cookie)) + goto done; + + CHECK(skel->bss->linum, "bpf prog detected error", "at line %u\n", + skel->bss->linum); + +done: + if (listen_fd != -1) + close(listen_fd); + if (cli_fd != -1) + close(cli_fd); +} + +struct test { + const char *desc; + void (*run)(void); +}; + +#define DEF_TEST(name) { #name, test_##name } +static struct test tests[] = { + DEF_TEST(conn), + DEF_TEST(syncookie), +}; + +void test_btf_skc_cls_ingress(void) +{ + int i, err; + + skel = test_btf_skc_cls_ingress__open_and_load(); + if (CHECK(!skel, "test_btf_skc_cls_ingress__open_and_load", "failed\n")) + return; + + err = bpf_program__pin(skel->progs.cls_ingress, PROG_PIN_FILE); + if (CHECK(err, "bpf_program__pin", + "cannot pin bpf prog to %s. err:%d\n", PROG_PIN_FILE, err)) { + test_btf_skc_cls_ingress__destroy(skel); + return; + } + + for (i = 0; i < ARRAY_SIZE(tests); i++) { + if (!test__start_subtest(tests[i].desc)) + continue; + + if (prepare_netns()) + break; + + tests[i].run(); + + print_err_line(); + reset_test(); + } + + bpf_program__unpin(skel->progs.cls_ingress, PROG_PIN_FILE); + test_btf_skc_cls_ingress__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/btf_write.c b/tools/testing/selftests/bpf/prog_tests/btf_write.c new file mode 100644 index 000000000000..314e1e7c36df --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/btf_write.c @@ -0,0 +1,244 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include <test_progs.h> +#include <bpf/btf.h> + +static int duration = 0; + +void test_btf_write() { + const struct btf_var_secinfo *vi; + const struct btf_type *t; + const struct btf_member *m; + const struct btf_enum *v; + const struct btf_param *p; + struct btf *btf; + int id, err, str_off; + + btf = btf__new_empty(); + if (CHECK(IS_ERR(btf), "new_empty", "failed: %ld\n", PTR_ERR(btf))) + return; + + str_off = btf__find_str(btf, "int"); + ASSERT_EQ(str_off, -ENOENT, "int_str_missing_off"); + + str_off = btf__add_str(btf, "int"); + ASSERT_EQ(str_off, 1, "int_str_off"); + + str_off = btf__find_str(btf, "int"); + ASSERT_EQ(str_off, 1, "int_str_found_off"); + + /* BTF_KIND_INT */ + id = btf__add_int(btf, "int", 4, BTF_INT_SIGNED); + ASSERT_EQ(id, 1, "int_id"); + + t = btf__type_by_id(btf, 1); + /* should re-use previously added "int" string */ + ASSERT_EQ(t->name_off, str_off, "int_name_off"); + ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "int", "int_name"); + ASSERT_EQ(btf_kind(t), BTF_KIND_INT, "int_kind"); + ASSERT_EQ(t->size, 4, "int_sz"); + ASSERT_EQ(btf_int_encoding(t), BTF_INT_SIGNED, "int_enc"); + ASSERT_EQ(btf_int_bits(t), 32, "int_bits"); + + /* invalid int size */ + id = btf__add_int(btf, "bad sz int", 7, 0); + ASSERT_ERR(id, "int_bad_sz"); + /* invalid encoding */ + id = btf__add_int(btf, "bad enc int", 4, 123); + ASSERT_ERR(id, "int_bad_enc"); + /* NULL name */ + id = btf__add_int(btf, NULL, 4, 0); + ASSERT_ERR(id, "int_bad_null_name"); + /* empty name */ + id = btf__add_int(btf, "", 4, 0); + ASSERT_ERR(id, "int_bad_empty_name"); + + /* PTR/CONST/VOLATILE/RESTRICT */ + id = btf__add_ptr(btf, 1); + ASSERT_EQ(id, 2, "ptr_id"); + t = btf__type_by_id(btf, 2); + ASSERT_EQ(btf_kind(t), BTF_KIND_PTR, "ptr_kind"); + ASSERT_EQ(t->type, 1, "ptr_type"); + + id = btf__add_const(btf, 5); /* points forward to restrict */ + ASSERT_EQ(id, 3, "const_id"); + t = btf__type_by_id(btf, 3); + ASSERT_EQ(btf_kind(t), BTF_KIND_CONST, "const_kind"); + ASSERT_EQ(t->type, 5, "const_type"); + + id = btf__add_volatile(btf, 3); + ASSERT_EQ(id, 4, "volatile_id"); + t = btf__type_by_id(btf, 4); + ASSERT_EQ(btf_kind(t), BTF_KIND_VOLATILE, "volatile_kind"); + ASSERT_EQ(t->type, 3, "volatile_type"); + + id = btf__add_restrict(btf, 4); + ASSERT_EQ(id, 5, "restrict_id"); + t = btf__type_by_id(btf, 5); + ASSERT_EQ(btf_kind(t), BTF_KIND_RESTRICT, "restrict_kind"); + ASSERT_EQ(t->type, 4, "restrict_type"); + + /* ARRAY */ + id = btf__add_array(btf, 1, 2, 10); /* int *[10] */ + ASSERT_EQ(id, 6, "array_id"); + t = btf__type_by_id(btf, 6); + ASSERT_EQ(btf_kind(t), BTF_KIND_ARRAY, "array_kind"); + ASSERT_EQ(btf_array(t)->index_type, 1, "array_index_type"); + ASSERT_EQ(btf_array(t)->type, 2, "array_elem_type"); + ASSERT_EQ(btf_array(t)->nelems, 10, "array_nelems"); + + /* STRUCT */ + err = btf__add_field(btf, "field", 1, 0, 0); + ASSERT_ERR(err, "no_struct_field"); + id = btf__add_struct(btf, "s1", 8); + ASSERT_EQ(id, 7, "struct_id"); + err = btf__add_field(btf, "f1", 1, 0, 0); + ASSERT_OK(err, "f1_res"); + err = btf__add_field(btf, "f2", 1, 32, 16); + ASSERT_OK(err, "f2_res"); + + t = btf__type_by_id(btf, 7); + ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "s1", "struct_name"); + ASSERT_EQ(btf_kind(t), BTF_KIND_STRUCT, "struct_kind"); + ASSERT_EQ(btf_vlen(t), 2, "struct_vlen"); + ASSERT_EQ(btf_kflag(t), true, "struct_kflag"); + ASSERT_EQ(t->size, 8, "struct_sz"); + m = btf_members(t) + 0; + ASSERT_STREQ(btf__str_by_offset(btf, m->name_off), "f1", "f1_name"); + ASSERT_EQ(m->type, 1, "f1_type"); + ASSERT_EQ(btf_member_bit_offset(t, 0), 0, "f1_bit_off"); + ASSERT_EQ(btf_member_bitfield_size(t, 0), 0, "f1_bit_sz"); + m = btf_members(t) + 1; + ASSERT_STREQ(btf__str_by_offset(btf, m->name_off), "f2", "f2_name"); + ASSERT_EQ(m->type, 1, "f2_type"); + ASSERT_EQ(btf_member_bit_offset(t, 1), 32, "f2_bit_off"); + ASSERT_EQ(btf_member_bitfield_size(t, 1), 16, "f2_bit_sz"); + + /* UNION */ + id = btf__add_union(btf, "u1", 8); + ASSERT_EQ(id, 8, "union_id"); + + /* invalid, non-zero offset */ + err = btf__add_field(btf, "field", 1, 1, 0); + ASSERT_ERR(err, "no_struct_field"); + + err = btf__add_field(btf, "f1", 1, 0, 16); + ASSERT_OK(err, "f1_res"); + + t = btf__type_by_id(btf, 8); + ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "u1", "union_name"); + ASSERT_EQ(btf_kind(t), BTF_KIND_UNION, "union_kind"); + ASSERT_EQ(btf_vlen(t), 1, "union_vlen"); + ASSERT_EQ(btf_kflag(t), true, "union_kflag"); + ASSERT_EQ(t->size, 8, "union_sz"); + m = btf_members(t) + 0; + ASSERT_STREQ(btf__str_by_offset(btf, m->name_off), "f1", "f1_name"); + ASSERT_EQ(m->type, 1, "f1_type"); + ASSERT_EQ(btf_member_bit_offset(t, 0), 0, "f1_bit_off"); + ASSERT_EQ(btf_member_bitfield_size(t, 0), 16, "f1_bit_sz"); + + /* ENUM */ + id = btf__add_enum(btf, "e1", 4); + ASSERT_EQ(id, 9, "enum_id"); + err = btf__add_enum_value(btf, "v1", 1); + ASSERT_OK(err, "v1_res"); + err = btf__add_enum_value(btf, "v2", 2); + ASSERT_OK(err, "v2_res"); + + t = btf__type_by_id(btf, 9); + ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "e1", "enum_name"); + ASSERT_EQ(btf_kind(t), BTF_KIND_ENUM, "enum_kind"); + ASSERT_EQ(btf_vlen(t), 2, "enum_vlen"); + ASSERT_EQ(t->size, 4, "enum_sz"); + v = btf_enum(t) + 0; + ASSERT_STREQ(btf__str_by_offset(btf, v->name_off), "v1", "v1_name"); + ASSERT_EQ(v->val, 1, "v1_val"); + v = btf_enum(t) + 1; + ASSERT_STREQ(btf__str_by_offset(btf, v->name_off), "v2", "v2_name"); + ASSERT_EQ(v->val, 2, "v2_val"); + + /* FWDs */ + id = btf__add_fwd(btf, "struct_fwd", BTF_FWD_STRUCT); + ASSERT_EQ(id, 10, "struct_fwd_id"); + t = btf__type_by_id(btf, 10); + ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "struct_fwd", "fwd_name"); + ASSERT_EQ(btf_kind(t), BTF_KIND_FWD, "fwd_kind"); + ASSERT_EQ(btf_kflag(t), 0, "fwd_kflag"); + + id = btf__add_fwd(btf, "union_fwd", BTF_FWD_UNION); + ASSERT_EQ(id, 11, "union_fwd_id"); + t = btf__type_by_id(btf, 11); + ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "union_fwd", "fwd_name"); + ASSERT_EQ(btf_kind(t), BTF_KIND_FWD, "fwd_kind"); + ASSERT_EQ(btf_kflag(t), 1, "fwd_kflag"); + + id = btf__add_fwd(btf, "enum_fwd", BTF_FWD_ENUM); + ASSERT_EQ(id, 12, "enum_fwd_id"); + t = btf__type_by_id(btf, 12); + ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "enum_fwd", "fwd_name"); + ASSERT_EQ(btf_kind(t), BTF_KIND_ENUM, "enum_fwd_kind"); + ASSERT_EQ(btf_vlen(t), 0, "enum_fwd_kind"); + ASSERT_EQ(t->size, 4, "enum_fwd_sz"); + + /* TYPEDEF */ + id = btf__add_typedef(btf, "typedef1", 1); + ASSERT_EQ(id, 13, "typedef_fwd_id"); + t = btf__type_by_id(btf, 13); + ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "typedef1", "typedef_name"); + ASSERT_EQ(btf_kind(t), BTF_KIND_TYPEDEF, "typedef_kind"); + ASSERT_EQ(t->type, 1, "typedef_type"); + + /* FUNC & FUNC_PROTO */ + id = btf__add_func(btf, "func1", BTF_FUNC_GLOBAL, 15); + ASSERT_EQ(id, 14, "func_id"); + t = btf__type_by_id(btf, 14); + ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "func1", "func_name"); + ASSERT_EQ(t->type, 15, "func_type"); + ASSERT_EQ(btf_kind(t), BTF_KIND_FUNC, "func_kind"); + ASSERT_EQ(btf_vlen(t), BTF_FUNC_GLOBAL, "func_vlen"); + + id = btf__add_func_proto(btf, 1); + ASSERT_EQ(id, 15, "func_proto_id"); + err = btf__add_func_param(btf, "p1", 1); + ASSERT_OK(err, "p1_res"); + err = btf__add_func_param(btf, "p2", 2); + ASSERT_OK(err, "p2_res"); + + t = btf__type_by_id(btf, 15); + ASSERT_EQ(btf_kind(t), BTF_KIND_FUNC_PROTO, "func_proto_kind"); + ASSERT_EQ(btf_vlen(t), 2, "func_proto_vlen"); + ASSERT_EQ(t->type, 1, "func_proto_ret_type"); + p = btf_params(t) + 0; + ASSERT_STREQ(btf__str_by_offset(btf, p->name_off), "p1", "p1_name"); + ASSERT_EQ(p->type, 1, "p1_type"); + p = btf_params(t) + 1; + ASSERT_STREQ(btf__str_by_offset(btf, p->name_off), "p2", "p2_name"); + ASSERT_EQ(p->type, 2, "p2_type"); + + /* VAR */ + id = btf__add_var(btf, "var1", BTF_VAR_GLOBAL_ALLOCATED, 1); + ASSERT_EQ(id, 16, "var_id"); + t = btf__type_by_id(btf, 16); + ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "var1", "var_name"); + ASSERT_EQ(btf_kind(t), BTF_KIND_VAR, "var_kind"); + ASSERT_EQ(t->type, 1, "var_type"); + ASSERT_EQ(btf_var(t)->linkage, BTF_VAR_GLOBAL_ALLOCATED, "var_type"); + + /* DATASECT */ + id = btf__add_datasec(btf, "datasec1", 12); + ASSERT_EQ(id, 17, "datasec_id"); + err = btf__add_datasec_var_info(btf, 1, 4, 8); + ASSERT_OK(err, "v1_res"); + + t = btf__type_by_id(btf, 17); + ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "datasec1", "datasec_name"); + ASSERT_EQ(t->size, 12, "datasec_sz"); + ASSERT_EQ(btf_kind(t), BTF_KIND_DATASEC, "datasec_kind"); + ASSERT_EQ(btf_vlen(t), 1, "datasec_vlen"); + vi = btf_var_secinfos(t) + 0; + ASSERT_EQ(vi->type, 1, "v1_type"); + ASSERT_EQ(vi->offset, 4, "v1_off"); + ASSERT_EQ(vi->size, 8, "v1_sz"); + + btf__free(btf); +} diff --git a/tools/testing/selftests/bpf/prog_tests/cls_redirect.c b/tools/testing/selftests/bpf/prog_tests/cls_redirect.c index f259085cca6a..9781d85cb223 100644 --- a/tools/testing/selftests/bpf/prog_tests/cls_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/cls_redirect.c @@ -12,10 +12,13 @@ #include "progs/test_cls_redirect.h" #include "test_cls_redirect.skel.h" +#include "test_cls_redirect_subprogs.skel.h" #define ENCAP_IP INADDR_LOOPBACK #define ENCAP_PORT (1234) +static int duration = 0; + struct addr_port { in_port_t port; union { @@ -361,30 +364,18 @@ static void close_fds(int *fds, int n) close(fds[i]); } -void test_cls_redirect(void) +static void test_cls_redirect_common(struct bpf_program *prog) { - struct test_cls_redirect *skel = NULL; struct bpf_prog_test_run_attr tattr = {}; int families[] = { AF_INET, AF_INET6 }; struct sockaddr_storage ss; struct sockaddr *addr; socklen_t slen; int i, j, err; - int servers[__NR_KIND][ARRAY_SIZE(families)] = {}; int conns[__NR_KIND][ARRAY_SIZE(families)] = {}; struct tuple tuples[__NR_KIND][ARRAY_SIZE(families)]; - skel = test_cls_redirect__open(); - if (CHECK_FAIL(!skel)) - return; - - skel->rodata->ENCAPSULATION_IP = htonl(ENCAP_IP); - skel->rodata->ENCAPSULATION_PORT = htons(ENCAP_PORT); - - if (CHECK_FAIL(test_cls_redirect__load(skel))) - goto cleanup; - addr = (struct sockaddr *)&ss; for (i = 0; i < ARRAY_SIZE(families); i++) { slen = prepare_addr(&ss, families[i]); @@ -402,7 +393,7 @@ void test_cls_redirect(void) goto cleanup; } - tattr.prog_fd = bpf_program__fd(skel->progs.cls_redirect); + tattr.prog_fd = bpf_program__fd(prog); for (i = 0; i < ARRAY_SIZE(tests); i++) { struct test_cfg *test = &tests[i]; @@ -450,7 +441,58 @@ void test_cls_redirect(void) } cleanup: - test_cls_redirect__destroy(skel); close_fds((int *)servers, sizeof(servers) / sizeof(servers[0][0])); close_fds((int *)conns, sizeof(conns) / sizeof(conns[0][0])); } + +static void test_cls_redirect_inlined(void) +{ + struct test_cls_redirect *skel; + int err; + + skel = test_cls_redirect__open(); + if (CHECK(!skel, "skel_open", "failed\n")) + return; + + skel->rodata->ENCAPSULATION_IP = htonl(ENCAP_IP); + skel->rodata->ENCAPSULATION_PORT = htons(ENCAP_PORT); + + err = test_cls_redirect__load(skel); + if (CHECK(err, "skel_load", "failed: %d\n", err)) + goto cleanup; + + test_cls_redirect_common(skel->progs.cls_redirect); + +cleanup: + test_cls_redirect__destroy(skel); +} + +static void test_cls_redirect_subprogs(void) +{ + struct test_cls_redirect_subprogs *skel; + int err; + + skel = test_cls_redirect_subprogs__open(); + if (CHECK(!skel, "skel_open", "failed\n")) + return; + + skel->rodata->ENCAPSULATION_IP = htonl(ENCAP_IP); + skel->rodata->ENCAPSULATION_PORT = htons(ENCAP_PORT); + + err = test_cls_redirect_subprogs__load(skel); + if (CHECK(err, "skel_load", "failed: %d\n", err)) + goto cleanup; + + test_cls_redirect_common(skel->progs.cls_redirect); + +cleanup: + test_cls_redirect_subprogs__destroy(skel); +} + +void test_cls_redirect(void) +{ + if (test__start_subtest("cls_redirect_inlined")) + test_cls_redirect_inlined(); + if (test__start_subtest("cls_redirect_subprogs")) + test_cls_redirect_subprogs(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/core_autosize.c b/tools/testing/selftests/bpf/prog_tests/core_autosize.c new file mode 100644 index 000000000000..981c251453d9 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/core_autosize.c @@ -0,0 +1,225 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ + +#include <test_progs.h> +#include <bpf/btf.h> + +/* real layout and sizes according to test's (32-bit) BTF + * needs to be defined before skeleton is included */ +struct test_struct___real { + unsigned int ptr; /* can't use `void *`, it is always 8 byte in BPF target */ + unsigned int val2; + unsigned long long val1; + unsigned short val3; + unsigned char val4; + unsigned char _pad; +}; + +#include "test_core_autosize.skel.h" + +static int duration = 0; + +static struct { + unsigned long long ptr_samesized; + unsigned long long val1_samesized; + unsigned long long val2_samesized; + unsigned long long val3_samesized; + unsigned long long val4_samesized; + struct test_struct___real output_samesized; + + unsigned long long ptr_downsized; + unsigned long long val1_downsized; + unsigned long long val2_downsized; + unsigned long long val3_downsized; + unsigned long long val4_downsized; + struct test_struct___real output_downsized; + + unsigned long long ptr_probed; + unsigned long long val1_probed; + unsigned long long val2_probed; + unsigned long long val3_probed; + unsigned long long val4_probed; + + unsigned long long ptr_signed; + unsigned long long val1_signed; + unsigned long long val2_signed; + unsigned long long val3_signed; + unsigned long long val4_signed; + struct test_struct___real output_signed; +} out; + +void test_core_autosize(void) +{ + char btf_file[] = "/tmp/core_autosize.btf.XXXXXX"; + int err, fd = -1, zero = 0; + int char_id, short_id, int_id, long_long_id, void_ptr_id, id; + struct test_core_autosize* skel = NULL; + struct bpf_object_load_attr load_attr = {}; + struct bpf_program *prog; + struct bpf_map *bss_map; + struct btf *btf = NULL; + size_t written; + const void *raw_data; + __u32 raw_sz; + FILE *f = NULL; + + btf = btf__new_empty(); + if (!ASSERT_OK_PTR(btf, "empty_btf")) + return; + /* Emit the following struct with 32-bit pointer size: + * + * struct test_struct { + * void *ptr; + * unsigned long val2; + * unsigned long long val1; + * unsigned short val3; + * unsigned char val4; + * char: 8; + * }; + * + * This struct is going to be used as the "kernel BTF" for this test. + * It's equivalent memory-layout-wise to test_struct__real above. + */ + + /* force 32-bit pointer size */ + btf__set_pointer_size(btf, 4); + + char_id = btf__add_int(btf, "unsigned char", 1, 0); + ASSERT_EQ(char_id, 1, "char_id"); + short_id = btf__add_int(btf, "unsigned short", 2, 0); + ASSERT_EQ(short_id, 2, "short_id"); + /* "long unsigned int" of 4 byte size tells BTF that sizeof(void *) == 4 */ + int_id = btf__add_int(btf, "long unsigned int", 4, 0); + ASSERT_EQ(int_id, 3, "int_id"); + long_long_id = btf__add_int(btf, "unsigned long long", 8, 0); + ASSERT_EQ(long_long_id, 4, "long_long_id"); + void_ptr_id = btf__add_ptr(btf, 0); + ASSERT_EQ(void_ptr_id, 5, "void_ptr_id"); + + id = btf__add_struct(btf, "test_struct", 20 /* bytes */); + ASSERT_EQ(id, 6, "struct_id"); + err = btf__add_field(btf, "ptr", void_ptr_id, 0, 0); + err = err ?: btf__add_field(btf, "val2", int_id, 32, 0); + err = err ?: btf__add_field(btf, "val1", long_long_id, 64, 0); + err = err ?: btf__add_field(btf, "val3", short_id, 128, 0); + err = err ?: btf__add_field(btf, "val4", char_id, 144, 0); + ASSERT_OK(err, "struct_fields"); + + fd = mkstemp(btf_file); + if (CHECK(fd < 0, "btf_tmp", "failed to create file: %d\n", fd)) + goto cleanup; + f = fdopen(fd, "w"); + if (!ASSERT_OK_PTR(f, "btf_fdopen")) + goto cleanup; + + raw_data = btf__get_raw_data(btf, &raw_sz); + if (!ASSERT_OK_PTR(raw_data, "raw_data")) + goto cleanup; + written = fwrite(raw_data, 1, raw_sz, f); + if (CHECK(written != raw_sz, "btf_write", "written: %zu, errno: %d\n", written, errno)) + goto cleanup; + fflush(f); + fclose(f); + f = NULL; + close(fd); + fd = -1; + + /* open and load BPF program with custom BTF as the kernel BTF */ + skel = test_core_autosize__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + /* disable handle_signed() for now */ + prog = bpf_object__find_program_by_name(skel->obj, "handle_signed"); + if (!ASSERT_OK_PTR(prog, "prog_find")) + goto cleanup; + bpf_program__set_autoload(prog, false); + + load_attr.obj = skel->obj; + load_attr.target_btf_path = btf_file; + err = bpf_object__load_xattr(&load_attr); + if (!ASSERT_OK(err, "prog_load")) + goto cleanup; + + prog = bpf_object__find_program_by_name(skel->obj, "handle_samesize"); + if (!ASSERT_OK_PTR(prog, "prog_find")) + goto cleanup; + skel->links.handle_samesize = bpf_program__attach(prog); + if (!ASSERT_OK_PTR(skel->links.handle_samesize, "prog_attach")) + goto cleanup; + + prog = bpf_object__find_program_by_name(skel->obj, "handle_downsize"); + if (!ASSERT_OK_PTR(prog, "prog_find")) + goto cleanup; + skel->links.handle_downsize = bpf_program__attach(prog); + if (!ASSERT_OK_PTR(skel->links.handle_downsize, "prog_attach")) + goto cleanup; + + prog = bpf_object__find_program_by_name(skel->obj, "handle_probed"); + if (!ASSERT_OK_PTR(prog, "prog_find")) + goto cleanup; + skel->links.handle_probed = bpf_program__attach(prog); + if (!ASSERT_OK_PTR(skel->links.handle_probed, "prog_attach")) + goto cleanup; + + usleep(1); + + bss_map = bpf_object__find_map_by_name(skel->obj, "test_cor.bss"); + if (!ASSERT_OK_PTR(bss_map, "bss_map_find")) + goto cleanup; + + err = bpf_map_lookup_elem(bpf_map__fd(bss_map), &zero, (void *)&out); + if (!ASSERT_OK(err, "bss_lookup")) + goto cleanup; + + ASSERT_EQ(out.ptr_samesized, 0x01020304, "ptr_samesized"); + ASSERT_EQ(out.val1_samesized, 0x1020304050607080, "val1_samesized"); + ASSERT_EQ(out.val2_samesized, 0x0a0b0c0d, "val2_samesized"); + ASSERT_EQ(out.val3_samesized, 0xfeed, "val3_samesized"); + ASSERT_EQ(out.val4_samesized, 0xb9, "val4_samesized"); + ASSERT_EQ(out.output_samesized.ptr, 0x01020304, "ptr_samesized"); + ASSERT_EQ(out.output_samesized.val1, 0x1020304050607080, "val1_samesized"); + ASSERT_EQ(out.output_samesized.val2, 0x0a0b0c0d, "val2_samesized"); + ASSERT_EQ(out.output_samesized.val3, 0xfeed, "val3_samesized"); + ASSERT_EQ(out.output_samesized.val4, 0xb9, "val4_samesized"); + + ASSERT_EQ(out.ptr_downsized, 0x01020304, "ptr_downsized"); + ASSERT_EQ(out.val1_downsized, 0x1020304050607080, "val1_downsized"); + ASSERT_EQ(out.val2_downsized, 0x0a0b0c0d, "val2_downsized"); + ASSERT_EQ(out.val3_downsized, 0xfeed, "val3_downsized"); + ASSERT_EQ(out.val4_downsized, 0xb9, "val4_downsized"); + ASSERT_EQ(out.output_downsized.ptr, 0x01020304, "ptr_downsized"); + ASSERT_EQ(out.output_downsized.val1, 0x1020304050607080, "val1_downsized"); + ASSERT_EQ(out.output_downsized.val2, 0x0a0b0c0d, "val2_downsized"); + ASSERT_EQ(out.output_downsized.val3, 0xfeed, "val3_downsized"); + ASSERT_EQ(out.output_downsized.val4, 0xb9, "val4_downsized"); + + ASSERT_EQ(out.ptr_probed, 0x01020304, "ptr_probed"); + ASSERT_EQ(out.val1_probed, 0x1020304050607080, "val1_probed"); + ASSERT_EQ(out.val2_probed, 0x0a0b0c0d, "val2_probed"); + ASSERT_EQ(out.val3_probed, 0xfeed, "val3_probed"); + ASSERT_EQ(out.val4_probed, 0xb9, "val4_probed"); + + test_core_autosize__destroy(skel); + skel = NULL; + + /* now re-load with handle_signed() enabled, it should fail loading */ + skel = test_core_autosize__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + load_attr.obj = skel->obj; + load_attr.target_btf_path = btf_file; + err = bpf_object__load_xattr(&load_attr); + if (!ASSERT_ERR(err, "bad_prog_load")) + goto cleanup; + +cleanup: + if (f) + fclose(f); + if (fd >= 0) + close(fd); + remove(btf_file); + btf__free(btf); + test_core_autosize__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c index a54eafc5e4b3..30e40ff4b0d8 100644 --- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c +++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c @@ -3,6 +3,9 @@ #include "progs/core_reloc_types.h" #include <sys/mman.h> #include <sys/syscall.h> +#include <bpf/btf.h> + +static int duration = 0; #define STRUCT_TO_CHAR_PTR(struct_name) (const char *)&(struct struct_name) @@ -177,14 +180,13 @@ .fails = true, \ } -#define EXISTENCE_CASE_COMMON(name) \ +#define FIELD_EXISTS_CASE_COMMON(name) \ .case_name = #name, \ .bpf_obj_file = "test_core_reloc_existence.o", \ - .btf_src_file = "btf__core_reloc_" #name ".o", \ - .relaxed_core_relocs = true + .btf_src_file = "btf__core_reloc_" #name ".o" \ -#define EXISTENCE_ERR_CASE(name) { \ - EXISTENCE_CASE_COMMON(name), \ +#define FIELD_EXISTS_ERR_CASE(name) { \ + FIELD_EXISTS_CASE_COMMON(name), \ .fails = true, \ } @@ -253,6 +255,61 @@ .fails = true, \ } +#define TYPE_BASED_CASE_COMMON(name) \ + .case_name = #name, \ + .bpf_obj_file = "test_core_reloc_type_based.o", \ + .btf_src_file = "btf__core_reloc_" #name ".o" \ + +#define TYPE_BASED_CASE(name, ...) { \ + TYPE_BASED_CASE_COMMON(name), \ + .output = STRUCT_TO_CHAR_PTR(core_reloc_type_based_output) \ + __VA_ARGS__, \ + .output_len = sizeof(struct core_reloc_type_based_output), \ +} + +#define TYPE_BASED_ERR_CASE(name) { \ + TYPE_BASED_CASE_COMMON(name), \ + .fails = true, \ +} + +#define TYPE_ID_CASE_COMMON(name) \ + .case_name = #name, \ + .bpf_obj_file = "test_core_reloc_type_id.o", \ + .btf_src_file = "btf__core_reloc_" #name ".o" \ + +#define TYPE_ID_CASE(name, setup_fn) { \ + TYPE_ID_CASE_COMMON(name), \ + .output = STRUCT_TO_CHAR_PTR(core_reloc_type_id_output) {}, \ + .output_len = sizeof(struct core_reloc_type_id_output), \ + .setup = setup_fn, \ +} + +#define TYPE_ID_ERR_CASE(name) { \ + TYPE_ID_CASE_COMMON(name), \ + .fails = true, \ +} + +#define ENUMVAL_CASE_COMMON(name) \ + .case_name = #name, \ + .bpf_obj_file = "test_core_reloc_enumval.o", \ + .btf_src_file = "btf__core_reloc_" #name ".o" \ + +#define ENUMVAL_CASE(name, ...) { \ + ENUMVAL_CASE_COMMON(name), \ + .output = STRUCT_TO_CHAR_PTR(core_reloc_enumval_output) \ + __VA_ARGS__, \ + .output_len = sizeof(struct core_reloc_enumval_output), \ +} + +#define ENUMVAL_ERR_CASE(name) { \ + ENUMVAL_CASE_COMMON(name), \ + .fails = true, \ +} + +struct core_reloc_test_case; + +typedef int (*setup_test_fn)(struct core_reloc_test_case *test); + struct core_reloc_test_case { const char *case_name; const char *bpf_obj_file; @@ -264,8 +321,136 @@ struct core_reloc_test_case { bool fails; bool relaxed_core_relocs; bool direct_raw_tp; + setup_test_fn setup; }; +static int find_btf_type(const struct btf *btf, const char *name, __u32 kind) +{ + int id; + + id = btf__find_by_name_kind(btf, name, kind); + if (CHECK(id <= 0, "find_type_id", "failed to find '%s', kind %d: %d\n", name, kind, id)) + return -1; + + return id; +} + +static int setup_type_id_case_local(struct core_reloc_test_case *test) +{ + struct core_reloc_type_id_output *exp = (void *)test->output; + struct btf *local_btf = btf__parse(test->bpf_obj_file, NULL); + struct btf *targ_btf = btf__parse(test->btf_src_file, NULL); + const struct btf_type *t; + const char *name; + int i; + + if (CHECK(IS_ERR(local_btf), "local_btf", "failed: %ld\n", PTR_ERR(local_btf)) || + CHECK(IS_ERR(targ_btf), "targ_btf", "failed: %ld\n", PTR_ERR(targ_btf))) { + btf__free(local_btf); + btf__free(targ_btf); + return -EINVAL; + } + + exp->local_anon_struct = -1; + exp->local_anon_union = -1; + exp->local_anon_enum = -1; + exp->local_anon_func_proto_ptr = -1; + exp->local_anon_void_ptr = -1; + exp->local_anon_arr = -1; + + for (i = 1; i <= btf__get_nr_types(local_btf); i++) + { + t = btf__type_by_id(local_btf, i); + /* we are interested only in anonymous types */ + if (t->name_off) + continue; + + if (btf_is_struct(t) && btf_vlen(t) && + (name = btf__name_by_offset(local_btf, btf_members(t)[0].name_off)) && + strcmp(name, "marker_field") == 0) { + exp->local_anon_struct = i; + } else if (btf_is_union(t) && btf_vlen(t) && + (name = btf__name_by_offset(local_btf, btf_members(t)[0].name_off)) && + strcmp(name, "marker_field") == 0) { + exp->local_anon_union = i; + } else if (btf_is_enum(t) && btf_vlen(t) && + (name = btf__name_by_offset(local_btf, btf_enum(t)[0].name_off)) && + strcmp(name, "MARKER_ENUM_VAL") == 0) { + exp->local_anon_enum = i; + } else if (btf_is_ptr(t) && (t = btf__type_by_id(local_btf, t->type))) { + if (btf_is_func_proto(t) && (t = btf__type_by_id(local_btf, t->type)) && + btf_is_int(t) && (name = btf__name_by_offset(local_btf, t->name_off)) && + strcmp(name, "_Bool") == 0) { + /* ptr -> func_proto -> _Bool */ + exp->local_anon_func_proto_ptr = i; + } else if (btf_is_void(t)) { + /* ptr -> void */ + exp->local_anon_void_ptr = i; + } + } else if (btf_is_array(t) && (t = btf__type_by_id(local_btf, btf_array(t)->type)) && + btf_is_int(t) && (name = btf__name_by_offset(local_btf, t->name_off)) && + strcmp(name, "_Bool") == 0) { + /* _Bool[] */ + exp->local_anon_arr = i; + } + } + + exp->local_struct = find_btf_type(local_btf, "a_struct", BTF_KIND_STRUCT); + exp->local_union = find_btf_type(local_btf, "a_union", BTF_KIND_UNION); + exp->local_enum = find_btf_type(local_btf, "an_enum", BTF_KIND_ENUM); + exp->local_int = find_btf_type(local_btf, "int", BTF_KIND_INT); + exp->local_struct_typedef = find_btf_type(local_btf, "named_struct_typedef", BTF_KIND_TYPEDEF); + exp->local_func_proto_typedef = find_btf_type(local_btf, "func_proto_typedef", BTF_KIND_TYPEDEF); + exp->local_arr_typedef = find_btf_type(local_btf, "arr_typedef", BTF_KIND_TYPEDEF); + + btf__free(local_btf); + btf__free(targ_btf); + return 0; +} + +static int setup_type_id_case_success(struct core_reloc_test_case *test) { + struct core_reloc_type_id_output *exp = (void *)test->output; + struct btf *targ_btf = btf__parse(test->btf_src_file, NULL); + int err; + + err = setup_type_id_case_local(test); + if (err) + return err; + + targ_btf = btf__parse(test->btf_src_file, NULL); + + exp->targ_struct = find_btf_type(targ_btf, "a_struct", BTF_KIND_STRUCT); + exp->targ_union = find_btf_type(targ_btf, "a_union", BTF_KIND_UNION); + exp->targ_enum = find_btf_type(targ_btf, "an_enum", BTF_KIND_ENUM); + exp->targ_int = find_btf_type(targ_btf, "int", BTF_KIND_INT); + exp->targ_struct_typedef = find_btf_type(targ_btf, "named_struct_typedef", BTF_KIND_TYPEDEF); + exp->targ_func_proto_typedef = find_btf_type(targ_btf, "func_proto_typedef", BTF_KIND_TYPEDEF); + exp->targ_arr_typedef = find_btf_type(targ_btf, "arr_typedef", BTF_KIND_TYPEDEF); + + btf__free(targ_btf); + return 0; +} + +static int setup_type_id_case_failure(struct core_reloc_test_case *test) +{ + struct core_reloc_type_id_output *exp = (void *)test->output; + int err; + + err = setup_type_id_case_local(test); + if (err) + return err; + + exp->targ_struct = 0; + exp->targ_union = 0; + exp->targ_enum = 0; + exp->targ_int = 0; + exp->targ_struct_typedef = 0; + exp->targ_func_proto_typedef = 0; + exp->targ_arr_typedef = 0; + + return 0; +} + static struct core_reloc_test_case test_cases[] = { /* validate we can find kernel image and use its BTF for relocs */ { @@ -364,7 +549,7 @@ static struct core_reloc_test_case test_cases[] = { /* validate field existence checks */ { - EXISTENCE_CASE_COMMON(existence), + FIELD_EXISTS_CASE_COMMON(existence), .input = STRUCT_TO_CHAR_PTR(core_reloc_existence) { .a = 1, .b = 2, @@ -388,7 +573,7 @@ static struct core_reloc_test_case test_cases[] = { .output_len = sizeof(struct core_reloc_existence_output), }, { - EXISTENCE_CASE_COMMON(existence___minimal), + FIELD_EXISTS_CASE_COMMON(existence___minimal), .input = STRUCT_TO_CHAR_PTR(core_reloc_existence___minimal) { .a = 42, }, @@ -408,12 +593,12 @@ static struct core_reloc_test_case test_cases[] = { .output_len = sizeof(struct core_reloc_existence_output), }, - EXISTENCE_ERR_CASE(existence__err_int_sz), - EXISTENCE_ERR_CASE(existence__err_int_type), - EXISTENCE_ERR_CASE(existence__err_int_kind), - EXISTENCE_ERR_CASE(existence__err_arr_kind), - EXISTENCE_ERR_CASE(existence__err_arr_value_type), - EXISTENCE_ERR_CASE(existence__err_struct_type), + FIELD_EXISTS_ERR_CASE(existence__err_int_sz), + FIELD_EXISTS_ERR_CASE(existence__err_int_type), + FIELD_EXISTS_ERR_CASE(existence__err_int_kind), + FIELD_EXISTS_ERR_CASE(existence__err_arr_kind), + FIELD_EXISTS_ERR_CASE(existence__err_arr_value_type), + FIELD_EXISTS_ERR_CASE(existence__err_struct_type), /* bitfield relocation checks */ BITFIELDS_CASE(bitfields, { @@ -452,11 +637,117 @@ static struct core_reloc_test_case test_cases[] = { /* size relocation checks */ SIZE_CASE(size), SIZE_CASE(size___diff_sz), + SIZE_ERR_CASE(size___err_ambiguous), + + /* validate type existence and size relocations */ + TYPE_BASED_CASE(type_based, { + .struct_exists = 1, + .union_exists = 1, + .enum_exists = 1, + .typedef_named_struct_exists = 1, + .typedef_anon_struct_exists = 1, + .typedef_struct_ptr_exists = 1, + .typedef_int_exists = 1, + .typedef_enum_exists = 1, + .typedef_void_ptr_exists = 1, + .typedef_func_proto_exists = 1, + .typedef_arr_exists = 1, + .struct_sz = sizeof(struct a_struct), + .union_sz = sizeof(union a_union), + .enum_sz = sizeof(enum an_enum), + .typedef_named_struct_sz = sizeof(named_struct_typedef), + .typedef_anon_struct_sz = sizeof(anon_struct_typedef), + .typedef_struct_ptr_sz = sizeof(struct_ptr_typedef), + .typedef_int_sz = sizeof(int_typedef), + .typedef_enum_sz = sizeof(enum_typedef), + .typedef_void_ptr_sz = sizeof(void_ptr_typedef), + .typedef_func_proto_sz = sizeof(func_proto_typedef), + .typedef_arr_sz = sizeof(arr_typedef), + }), + TYPE_BASED_CASE(type_based___all_missing, { + /* all zeros */ + }), + TYPE_BASED_CASE(type_based___diff_sz, { + .struct_exists = 1, + .union_exists = 1, + .enum_exists = 1, + .typedef_named_struct_exists = 1, + .typedef_anon_struct_exists = 1, + .typedef_struct_ptr_exists = 1, + .typedef_int_exists = 1, + .typedef_enum_exists = 1, + .typedef_void_ptr_exists = 1, + .typedef_func_proto_exists = 1, + .typedef_arr_exists = 1, + .struct_sz = sizeof(struct a_struct___diff_sz), + .union_sz = sizeof(union a_union___diff_sz), + .enum_sz = sizeof(enum an_enum___diff_sz), + .typedef_named_struct_sz = sizeof(named_struct_typedef___diff_sz), + .typedef_anon_struct_sz = sizeof(anon_struct_typedef___diff_sz), + .typedef_struct_ptr_sz = sizeof(struct_ptr_typedef___diff_sz), + .typedef_int_sz = sizeof(int_typedef___diff_sz), + .typedef_enum_sz = sizeof(enum_typedef___diff_sz), + .typedef_void_ptr_sz = sizeof(void_ptr_typedef___diff_sz), + .typedef_func_proto_sz = sizeof(func_proto_typedef___diff_sz), + .typedef_arr_sz = sizeof(arr_typedef___diff_sz), + }), + TYPE_BASED_CASE(type_based___incompat, { + .enum_exists = 1, + .enum_sz = sizeof(enum an_enum), + }), + TYPE_BASED_CASE(type_based___fn_wrong_args, { + .struct_exists = 1, + .struct_sz = sizeof(struct a_struct), + }), + + /* BTF_TYPE_ID_LOCAL/BTF_TYPE_ID_TARGET tests */ + TYPE_ID_CASE(type_id, setup_type_id_case_success), + TYPE_ID_CASE(type_id___missing_targets, setup_type_id_case_failure), + + /* Enumerator value existence and value relocations */ + ENUMVAL_CASE(enumval, { + .named_val1_exists = true, + .named_val2_exists = true, + .named_val3_exists = true, + .anon_val1_exists = true, + .anon_val2_exists = true, + .anon_val3_exists = true, + .named_val1 = 1, + .named_val2 = 2, + .anon_val1 = 0x10, + .anon_val2 = 0x20, + }), + ENUMVAL_CASE(enumval___diff, { + .named_val1_exists = true, + .named_val2_exists = true, + .named_val3_exists = true, + .anon_val1_exists = true, + .anon_val2_exists = true, + .anon_val3_exists = true, + .named_val1 = 101, + .named_val2 = 202, + .anon_val1 = 0x11, + .anon_val2 = 0x22, + }), + ENUMVAL_CASE(enumval___val3_missing, { + .named_val1_exists = true, + .named_val2_exists = true, + .named_val3_exists = false, + .anon_val1_exists = true, + .anon_val2_exists = true, + .anon_val3_exists = false, + .named_val1 = 111, + .named_val2 = 222, + .anon_val1 = 0x111, + .anon_val2 = 0x222, + }), + ENUMVAL_ERR_CASE(enumval___err_missing), }; struct data { char in[256]; char out[256]; + bool skip; uint64_t my_pid_tgid; }; @@ -472,7 +763,7 @@ void test_core_reloc(void) struct bpf_object_load_attr load_attr = {}; struct core_reloc_test_case *test_case; const char *tp_name, *probe_name; - int err, duration = 0, i, equal; + int err, i, equal; struct bpf_link *link = NULL; struct bpf_map *data_map; struct bpf_program *prog; @@ -488,11 +779,13 @@ void test_core_reloc(void) if (!test__start_subtest(test_case->case_name)) continue; - DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts, - .relaxed_core_relocs = test_case->relaxed_core_relocs, - ); + if (test_case->setup) { + err = test_case->setup(test_case); + if (CHECK(err, "test_setup", "test #%d setup failed: %d\n", i, err)) + continue; + } - obj = bpf_object__open_file(test_case->bpf_obj_file, &opts); + obj = bpf_object__open_file(test_case->bpf_obj_file, NULL); if (CHECK(IS_ERR(obj), "obj_open", "failed to open '%s': %ld\n", test_case->bpf_obj_file, PTR_ERR(obj))) continue; @@ -515,15 +808,10 @@ void test_core_reloc(void) load_attr.log_level = 0; load_attr.target_btf_path = test_case->btf_src_file; err = bpf_object__load_xattr(&load_attr); - if (test_case->fails) { - CHECK(!err, "obj_load_fail", - "should fail to load prog '%s'\n", probe_name); + if (err) { + if (!test_case->fails) + CHECK(false, "obj_load", "failed to load prog '%s': %d\n", probe_name, err); goto cleanup; - } else { - if (CHECK(err, "obj_load", - "failed to load prog '%s': %d\n", - probe_name, err)) - goto cleanup; } data_map = bpf_object__find_map_by_name(obj, "test_cor.bss"); @@ -551,6 +839,16 @@ void test_core_reloc(void) /* trigger test run */ usleep(1); + if (data->skip) { + test__skip(); + goto cleanup; + } + + if (test_case->fails) { + CHECK(false, "obj_load_fail", "should fail to load prog '%s'\n", probe_name); + goto cleanup; + } + equal = memcmp(data->out, test_case->output, test_case->output_len) == 0; if (CHECK(!equal, "check_result", diff --git a/tools/testing/selftests/bpf/prog_tests/d_path.c b/tools/testing/selftests/bpf/prog_tests/d_path.c new file mode 100644 index 000000000000..0a577a248d34 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/d_path.c @@ -0,0 +1,157 @@ +// SPDX-License-Identifier: GPL-2.0 +#define _GNU_SOURCE +#include <test_progs.h> +#include <sys/stat.h> +#include <linux/sched.h> +#include <sys/syscall.h> + +#define MAX_PATH_LEN 128 +#define MAX_FILES 7 + +#include "test_d_path.skel.h" + +static int duration; + +static struct { + __u32 cnt; + char paths[MAX_FILES][MAX_PATH_LEN]; +} src; + +static int set_pathname(int fd, pid_t pid) +{ + char buf[MAX_PATH_LEN]; + + snprintf(buf, MAX_PATH_LEN, "/proc/%d/fd/%d", pid, fd); + return readlink(buf, src.paths[src.cnt++], MAX_PATH_LEN); +} + +static int trigger_fstat_events(pid_t pid) +{ + int sockfd = -1, procfd = -1, devfd = -1; + int localfd = -1, indicatorfd = -1; + int pipefd[2] = { -1, -1 }; + struct stat fileStat; + int ret = -1; + + /* unmountable pseudo-filesystems */ + if (CHECK(pipe(pipefd) < 0, "trigger", "pipe failed\n")) + return ret; + /* unmountable pseudo-filesystems */ + sockfd = socket(AF_INET, SOCK_STREAM, 0); + if (CHECK(sockfd < 0, "trigger", "socket failed\n")) + goto out_close; + /* mountable pseudo-filesystems */ + procfd = open("/proc/self/comm", O_RDONLY); + if (CHECK(procfd < 0, "trigger", "open /proc/self/comm failed\n")) + goto out_close; + devfd = open("/dev/urandom", O_RDONLY); + if (CHECK(devfd < 0, "trigger", "open /dev/urandom failed\n")) + goto out_close; + localfd = open("/tmp/d_path_loadgen.txt", O_CREAT | O_RDONLY, 0644); + if (CHECK(localfd < 0, "trigger", "open /tmp/d_path_loadgen.txt failed\n")) + goto out_close; + /* bpf_d_path will return path with (deleted) */ + remove("/tmp/d_path_loadgen.txt"); + indicatorfd = open("/tmp/", O_PATH); + if (CHECK(indicatorfd < 0, "trigger", "open /tmp/ failed\n")) + goto out_close; + + ret = set_pathname(pipefd[0], pid); + if (CHECK(ret < 0, "trigger", "set_pathname failed for pipe[0]\n")) + goto out_close; + ret = set_pathname(pipefd[1], pid); + if (CHECK(ret < 0, "trigger", "set_pathname failed for pipe[1]\n")) + goto out_close; + ret = set_pathname(sockfd, pid); + if (CHECK(ret < 0, "trigger", "set_pathname failed for socket\n")) + goto out_close; + ret = set_pathname(procfd, pid); + if (CHECK(ret < 0, "trigger", "set_pathname failed for proc\n")) + goto out_close; + ret = set_pathname(devfd, pid); + if (CHECK(ret < 0, "trigger", "set_pathname failed for dev\n")) + goto out_close; + ret = set_pathname(localfd, pid); + if (CHECK(ret < 0, "trigger", "set_pathname failed for file\n")) + goto out_close; + ret = set_pathname(indicatorfd, pid); + if (CHECK(ret < 0, "trigger", "set_pathname failed for dir\n")) + goto out_close; + + /* triggers vfs_getattr */ + fstat(pipefd[0], &fileStat); + fstat(pipefd[1], &fileStat); + fstat(sockfd, &fileStat); + fstat(procfd, &fileStat); + fstat(devfd, &fileStat); + fstat(localfd, &fileStat); + fstat(indicatorfd, &fileStat); + +out_close: + /* triggers filp_close */ + close(pipefd[0]); + close(pipefd[1]); + close(sockfd); + close(procfd); + close(devfd); + close(localfd); + close(indicatorfd); + return ret; +} + +void test_d_path(void) +{ + struct test_d_path__bss *bss; + struct test_d_path *skel; + int err; + + skel = test_d_path__open_and_load(); + if (CHECK(!skel, "setup", "d_path skeleton failed\n")) + goto cleanup; + + err = test_d_path__attach(skel); + if (CHECK(err, "setup", "attach failed: %d\n", err)) + goto cleanup; + + bss = skel->bss; + bss->my_pid = getpid(); + + err = trigger_fstat_events(bss->my_pid); + if (err < 0) + goto cleanup; + + if (CHECK(!bss->called_stat, + "stat", + "trampoline for security_inode_getattr was not called\n")) + goto cleanup; + + if (CHECK(!bss->called_close, + "close", + "trampoline for filp_close was not called\n")) + goto cleanup; + + for (int i = 0; i < MAX_FILES; i++) { + CHECK(strncmp(src.paths[i], bss->paths_stat[i], MAX_PATH_LEN), + "check", + "failed to get stat path[%d]: %s vs %s\n", + i, src.paths[i], bss->paths_stat[i]); + CHECK(strncmp(src.paths[i], bss->paths_close[i], MAX_PATH_LEN), + "check", + "failed to get close path[%d]: %s vs %s\n", + i, src.paths[i], bss->paths_close[i]); + /* The d_path helper returns size plus NUL char, hence + 1 */ + CHECK(bss->rets_stat[i] != strlen(bss->paths_stat[i]) + 1, + "check", + "failed to match stat return [%d]: %d vs %zd [%s]\n", + i, bss->rets_stat[i], strlen(bss->paths_stat[i]) + 1, + bss->paths_stat[i]); + CHECK(bss->rets_close[i] != strlen(bss->paths_stat[i]) + 1, + "check", + "failed to match stat return [%d]: %d vs %zd [%s]\n", + i, bss->rets_close[i], strlen(bss->paths_close[i]) + 1, + bss->paths_stat[i]); + } + +cleanup: + test_d_path__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c index 197d0d217b56..5c0448910426 100644 --- a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c +++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c @@ -2,36 +2,79 @@ /* Copyright (c) 2019 Facebook */ #include <test_progs.h> #include <network_helpers.h> +#include <bpf/btf.h> + +typedef int (*test_cb)(struct bpf_object *obj); + +static int check_data_map(struct bpf_object *obj, int prog_cnt, bool reset) +{ + struct bpf_map *data_map = NULL, *map; + __u64 *result = NULL; + const int zero = 0; + __u32 duration = 0; + int ret = -1, i; + + result = malloc((prog_cnt + 32 /* spare */) * sizeof(__u64)); + if (CHECK(!result, "alloc_memory", "failed to alloc memory")) + return -ENOMEM; + + bpf_object__for_each_map(map, obj) + if (bpf_map__is_internal(map)) { + data_map = map; + break; + } + if (CHECK(!data_map, "find_data_map", "data map not found\n")) + goto out; + + ret = bpf_map_lookup_elem(bpf_map__fd(data_map), &zero, result); + if (CHECK(ret, "get_result", + "failed to get output data: %d\n", ret)) + goto out; + + for (i = 0; i < prog_cnt; i++) { + if (CHECK(result[i] != 1, "result", + "fexit_bpf2bpf result[%d] failed err %llu\n", + i, result[i])) + goto out; + result[i] = 0; + } + if (reset) { + ret = bpf_map_update_elem(bpf_map__fd(data_map), &zero, result, 0); + if (CHECK(ret, "reset_result", "failed to reset result\n")) + goto out; + } + + ret = 0; +out: + free(result); + return ret; +} static void test_fexit_bpf2bpf_common(const char *obj_file, const char *target_obj_file, int prog_cnt, const char **prog_name, - bool run_prog) + bool run_prog, + test_cb cb) { - struct bpf_object *obj = NULL, *pkt_obj; - int err, pkt_fd, i; - struct bpf_link **link = NULL; + struct bpf_object *obj = NULL, *tgt_obj; struct bpf_program **prog = NULL; + struct bpf_link **link = NULL; __u32 duration = 0, retval; - struct bpf_map *data_map; - const int zero = 0; - __u64 *result = NULL; + int err, tgt_fd, i; err = bpf_prog_load(target_obj_file, BPF_PROG_TYPE_UNSPEC, - &pkt_obj, &pkt_fd); + &tgt_obj, &tgt_fd); if (CHECK(err, "tgt_prog_load", "file %s err %d errno %d\n", target_obj_file, err, errno)) return; DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts, - .attach_prog_fd = pkt_fd, + .attach_prog_fd = tgt_fd, ); link = calloc(sizeof(struct bpf_link *), prog_cnt); prog = calloc(sizeof(struct bpf_program *), prog_cnt); - result = malloc((prog_cnt + 32 /* spare */) * sizeof(__u64)); - if (CHECK(!link || !prog || !result, "alloc_memory", - "failed to alloc memory")) + if (CHECK(!link || !prog, "alloc_memory", "failed to alloc memory")) goto close_prog; obj = bpf_object__open_file(obj_file, &opts); @@ -53,39 +96,33 @@ static void test_fexit_bpf2bpf_common(const char *obj_file, goto close_prog; } - if (!run_prog) - goto close_prog; + if (cb) { + err = cb(obj); + if (err) + goto close_prog; + } - data_map = bpf_object__find_map_by_name(obj, "fexit_bp.bss"); - if (CHECK(!data_map, "find_data_map", "data map not found\n")) + if (!run_prog) goto close_prog; - err = bpf_prog_test_run(pkt_fd, 1, &pkt_v6, sizeof(pkt_v6), + err = bpf_prog_test_run(tgt_fd, 1, &pkt_v6, sizeof(pkt_v6), NULL, NULL, &retval, &duration); CHECK(err || retval, "ipv6", "err %d errno %d retval %d duration %d\n", err, errno, retval, duration); - err = bpf_map_lookup_elem(bpf_map__fd(data_map), &zero, result); - if (CHECK(err, "get_result", - "failed to get output data: %d\n", err)) + if (check_data_map(obj, prog_cnt, false)) goto close_prog; - for (i = 0; i < prog_cnt; i++) - if (CHECK(result[i] != 1, "result", "fexit_bpf2bpf failed err %llu\n", - result[i])) - goto close_prog; - close_prog: for (i = 0; i < prog_cnt; i++) if (!IS_ERR_OR_NULL(link[i])) bpf_link__destroy(link[i]); if (!IS_ERR_OR_NULL(obj)) bpf_object__close(obj); - bpf_object__close(pkt_obj); + bpf_object__close(tgt_obj); free(link); free(prog); - free(result); } static void test_target_no_callees(void) @@ -96,7 +133,7 @@ static void test_target_no_callees(void) test_fexit_bpf2bpf_common("./fexit_bpf2bpf_simple.o", "./test_pkt_md_access.o", ARRAY_SIZE(prog_name), - prog_name, true); + prog_name, true, NULL); } static void test_target_yes_callees(void) @@ -110,7 +147,7 @@ static void test_target_yes_callees(void) test_fexit_bpf2bpf_common("./fexit_bpf2bpf.o", "./test_pkt_access.o", ARRAY_SIZE(prog_name), - prog_name, true); + prog_name, true, NULL); } static void test_func_replace(void) @@ -123,11 +160,12 @@ static void test_func_replace(void) "freplace/get_skb_len", "freplace/get_skb_ifindex", "freplace/get_constant", + "freplace/test_pkt_write_access_subprog", }; test_fexit_bpf2bpf_common("./fexit_bpf2bpf.o", "./test_pkt_access.o", ARRAY_SIZE(prog_name), - prog_name, true); + prog_name, true, NULL); } static void test_func_replace_verify(void) @@ -138,13 +176,198 @@ static void test_func_replace_verify(void) test_fexit_bpf2bpf_common("./freplace_connect4.o", "./connect4_prog.o", ARRAY_SIZE(prog_name), - prog_name, false); + prog_name, false, NULL); +} + +static int test_second_attach(struct bpf_object *obj) +{ + const char *prog_name = "freplace/get_constant"; + const char *tgt_name = prog_name + 9; /* cut off freplace/ */ + const char *tgt_obj_file = "./test_pkt_access.o"; + struct bpf_program *prog = NULL; + struct bpf_object *tgt_obj; + __u32 duration = 0, retval; + struct bpf_link *link; + int err = 0, tgt_fd; + + prog = bpf_object__find_program_by_title(obj, prog_name); + if (CHECK(!prog, "find_prog", "prog %s not found\n", prog_name)) + return -ENOENT; + + err = bpf_prog_load(tgt_obj_file, BPF_PROG_TYPE_UNSPEC, + &tgt_obj, &tgt_fd); + if (CHECK(err, "second_prog_load", "file %s err %d errno %d\n", + tgt_obj_file, err, errno)) + return err; + + link = bpf_program__attach_freplace(prog, tgt_fd, tgt_name); + if (CHECK(IS_ERR(link), "second_link", "failed to attach second link prog_fd %d tgt_fd %d\n", bpf_program__fd(prog), tgt_fd)) + goto out; + + err = bpf_prog_test_run(tgt_fd, 1, &pkt_v6, sizeof(pkt_v6), + NULL, NULL, &retval, &duration); + if (CHECK(err || retval, "ipv6", + "err %d errno %d retval %d duration %d\n", + err, errno, retval, duration)) + goto out; + + err = check_data_map(obj, 1, true); + if (err) + goto out; + +out: + bpf_link__destroy(link); + bpf_object__close(tgt_obj); + return err; +} + +static void test_func_replace_multi(void) +{ + const char *prog_name[] = { + "freplace/get_constant", + }; + test_fexit_bpf2bpf_common("./freplace_get_constant.o", + "./test_pkt_access.o", + ARRAY_SIZE(prog_name), + prog_name, true, test_second_attach); +} + +static void test_fmod_ret_freplace(void) +{ + struct bpf_object *freplace_obj = NULL, *pkt_obj, *fmod_obj = NULL; + const char *freplace_name = "./freplace_get_constant.o"; + const char *fmod_ret_name = "./fmod_ret_freplace.o"; + DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts); + const char *tgt_name = "./test_pkt_access.o"; + struct bpf_link *freplace_link = NULL; + struct bpf_program *prog; + __u32 duration = 0; + int err, pkt_fd; + + err = bpf_prog_load(tgt_name, BPF_PROG_TYPE_UNSPEC, + &pkt_obj, &pkt_fd); + /* the target prog should load fine */ + if (CHECK(err, "tgt_prog_load", "file %s err %d errno %d\n", + tgt_name, err, errno)) + return; + opts.attach_prog_fd = pkt_fd; + + freplace_obj = bpf_object__open_file(freplace_name, &opts); + if (CHECK(IS_ERR_OR_NULL(freplace_obj), "freplace_obj_open", + "failed to open %s: %ld\n", freplace_name, + PTR_ERR(freplace_obj))) + goto out; + + err = bpf_object__load(freplace_obj); + if (CHECK(err, "freplace_obj_load", "err %d\n", err)) + goto out; + + prog = bpf_program__next(NULL, freplace_obj); + freplace_link = bpf_program__attach_trace(prog); + if (CHECK(IS_ERR(freplace_link), "freplace_attach_trace", "failed to link\n")) + goto out; + + opts.attach_prog_fd = bpf_program__fd(prog); + fmod_obj = bpf_object__open_file(fmod_ret_name, &opts); + if (CHECK(IS_ERR_OR_NULL(fmod_obj), "fmod_obj_open", + "failed to open %s: %ld\n", fmod_ret_name, + PTR_ERR(fmod_obj))) + goto out; + + err = bpf_object__load(fmod_obj); + if (CHECK(!err, "fmod_obj_load", "loading fmod_ret should fail\n")) + goto out; + +out: + bpf_link__destroy(freplace_link); + bpf_object__close(freplace_obj); + bpf_object__close(fmod_obj); + bpf_object__close(pkt_obj); +} + + +static void test_func_sockmap_update(void) +{ + const char *prog_name[] = { + "freplace/cls_redirect", + }; + test_fexit_bpf2bpf_common("./freplace_cls_redirect.o", + "./test_cls_redirect.o", + ARRAY_SIZE(prog_name), + prog_name, false, NULL); +} + +static void test_obj_load_failure_common(const char *obj_file, + const char *target_obj_file) + +{ + /* + * standalone test that asserts failure to load freplace prog + * because of invalid return code. + */ + struct bpf_object *obj = NULL, *pkt_obj; + int err, pkt_fd; + __u32 duration = 0; + + err = bpf_prog_load(target_obj_file, BPF_PROG_TYPE_UNSPEC, + &pkt_obj, &pkt_fd); + /* the target prog should load fine */ + if (CHECK(err, "tgt_prog_load", "file %s err %d errno %d\n", + target_obj_file, err, errno)) + return; + DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts, + .attach_prog_fd = pkt_fd, + ); + + obj = bpf_object__open_file(obj_file, &opts); + if (CHECK(IS_ERR_OR_NULL(obj), "obj_open", + "failed to open %s: %ld\n", obj_file, + PTR_ERR(obj))) + goto close_prog; + + /* It should fail to load the program */ + err = bpf_object__load(obj); + if (CHECK(!err, "bpf_obj_load should fail", "err %d\n", err)) + goto close_prog; + +close_prog: + if (!IS_ERR_OR_NULL(obj)) + bpf_object__close(obj); + bpf_object__close(pkt_obj); +} + +static void test_func_replace_return_code(void) +{ + /* test invalid return code in the replaced program */ + test_obj_load_failure_common("./freplace_connect_v4_prog.o", + "./connect4_prog.o"); +} + +static void test_func_map_prog_compatibility(void) +{ + /* test with spin lock map value in the replaced program */ + test_obj_load_failure_common("./freplace_attach_probe.o", + "./test_attach_probe.o"); } void test_fexit_bpf2bpf(void) { - test_target_no_callees(); - test_target_yes_callees(); - test_func_replace(); - test_func_replace_verify(); + if (test__start_subtest("target_no_callees")) + test_target_no_callees(); + if (test__start_subtest("target_yes_callees")) + test_target_yes_callees(); + if (test__start_subtest("func_replace")) + test_func_replace(); + if (test__start_subtest("func_replace_verify")) + test_func_replace_verify(); + if (test__start_subtest("func_sockmap_update")) + test_func_sockmap_update(); + if (test__start_subtest("func_replace_return_code")) + test_func_replace_return_code(); + if (test__start_subtest("func_map_prog_compatibility")) + test_func_map_prog_compatibility(); + if (test__start_subtest("func_replace_multi")) + test_func_replace_multi(); + if (test__start_subtest("fmod_ret_freplace")) + test_fmod_ret_freplace(); } diff --git a/tools/testing/selftests/bpf/prog_tests/global_data_init.c b/tools/testing/selftests/bpf/prog_tests/global_data_init.c index 3bdaa5a40744..ee46b11f1f9a 100644 --- a/tools/testing/selftests/bpf/prog_tests/global_data_init.c +++ b/tools/testing/selftests/bpf/prog_tests/global_data_init.c @@ -12,7 +12,8 @@ void test_global_data_init(void) size_t sz; obj = bpf_object__open_file(file, NULL); - if (CHECK_FAIL(!obj)) + err = libbpf_get_error(obj); + if (CHECK_FAIL(err)) return; map = bpf_object__find_map_by_name(obj, "test_glo.rodata"); diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms.c b/tools/testing/selftests/bpf/prog_tests/ksyms.c index e3d6777226a8..b295969b263b 100644 --- a/tools/testing/selftests/bpf/prog_tests/ksyms.c +++ b/tools/testing/selftests/bpf/prog_tests/ksyms.c @@ -7,39 +7,28 @@ static int duration; -static __u64 kallsyms_find(const char *sym) -{ - char type, name[500]; - __u64 addr, res = 0; - FILE *f; - - f = fopen("/proc/kallsyms", "r"); - if (CHECK(!f, "kallsyms_fopen", "failed to open: %d\n", errno)) - return 0; - - while (fscanf(f, "%llx %c %499s%*[^\n]\n", &addr, &type, name) > 0) { - if (strcmp(name, sym) == 0) { - res = addr; - goto out; - } - } - - CHECK(false, "not_found", "symbol %s not found\n", sym); -out: - fclose(f); - return res; -} - void test_ksyms(void) { - __u64 link_fops_addr = kallsyms_find("bpf_link_fops"); const char *btf_path = "/sys/kernel/btf/vmlinux"; struct test_ksyms *skel; struct test_ksyms__data *data; + __u64 link_fops_addr, per_cpu_start_addr; struct stat st; __u64 btf_size; int err; + err = kallsyms_find("bpf_link_fops", &link_fops_addr); + if (CHECK(err == -EINVAL, "kallsyms_fopen", "failed to open: %d\n", errno)) + return; + if (CHECK(err == -ENOENT, "ksym_find", "symbol 'bpf_link_fops' not found\n")) + return; + + err = kallsyms_find("__per_cpu_start", &per_cpu_start_addr); + if (CHECK(err == -EINVAL, "kallsyms_fopen", "failed to open: %d\n", errno)) + return; + if (CHECK(err == -ENOENT, "ksym_find", "symbol 'per_cpu_start' not found\n")) + return; + if (CHECK(stat(btf_path, &st), "stat_btf", "err %d\n", errno)) return; btf_size = st.st_size; @@ -63,8 +52,9 @@ void test_ksyms(void) "got %llu, exp %llu\n", data->out__bpf_link_fops1, (__u64)0); CHECK(data->out__btf_size != btf_size, "btf_size", "got %llu, exp %llu\n", data->out__btf_size, btf_size); - CHECK(data->out__per_cpu_start != 0, "__per_cpu_start", - "got %llu, exp %llu\n", data->out__per_cpu_start, (__u64)0); + CHECK(data->out__per_cpu_start != per_cpu_start_addr, "__per_cpu_start", + "got %llu, exp %llu\n", data->out__per_cpu_start, + per_cpu_start_addr); cleanup: test_ksyms__destroy(skel); diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c new file mode 100644 index 000000000000..28e26bd3e0ca --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Google */ + +#include <test_progs.h> +#include <bpf/libbpf.h> +#include <bpf/btf.h> +#include "test_ksyms_btf.skel.h" + +static int duration; + +void test_ksyms_btf(void) +{ + __u64 runqueues_addr, bpf_prog_active_addr; + __u32 this_rq_cpu; + int this_bpf_prog_active; + struct test_ksyms_btf *skel = NULL; + struct test_ksyms_btf__data *data; + struct btf *btf; + int percpu_datasec; + int err; + + err = kallsyms_find("runqueues", &runqueues_addr); + if (CHECK(err == -EINVAL, "kallsyms_fopen", "failed to open: %d\n", errno)) + return; + if (CHECK(err == -ENOENT, "ksym_find", "symbol 'runqueues' not found\n")) + return; + + err = kallsyms_find("bpf_prog_active", &bpf_prog_active_addr); + if (CHECK(err == -EINVAL, "kallsyms_fopen", "failed to open: %d\n", errno)) + return; + if (CHECK(err == -ENOENT, "ksym_find", "symbol 'bpf_prog_active' not found\n")) + return; + + btf = libbpf_find_kernel_btf(); + if (CHECK(IS_ERR(btf), "btf_exists", "failed to load kernel BTF: %ld\n", + PTR_ERR(btf))) + return; + + percpu_datasec = btf__find_by_name_kind(btf, ".data..percpu", + BTF_KIND_DATASEC); + if (percpu_datasec < 0) { + printf("%s:SKIP:no PERCPU DATASEC in kernel btf\n", + __func__); + test__skip(); + goto cleanup; + } + + skel = test_ksyms_btf__open_and_load(); + if (CHECK(!skel, "skel_open", "failed to open and load skeleton\n")) + goto cleanup; + + err = test_ksyms_btf__attach(skel); + if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err)) + goto cleanup; + + /* trigger tracepoint */ + usleep(1); + + data = skel->data; + CHECK(data->out__runqueues_addr != runqueues_addr, "runqueues_addr", + "got %llu, exp %llu\n", + (unsigned long long)data->out__runqueues_addr, + (unsigned long long)runqueues_addr); + CHECK(data->out__bpf_prog_active_addr != bpf_prog_active_addr, "bpf_prog_active_addr", + "got %llu, exp %llu\n", + (unsigned long long)data->out__bpf_prog_active_addr, + (unsigned long long)bpf_prog_active_addr); + + CHECK(data->out__rq_cpu == -1, "rq_cpu", + "got %u, exp != -1\n", data->out__rq_cpu); + CHECK(data->out__bpf_prog_active < 0, "bpf_prog_active", + "got %d, exp >= 0\n", data->out__bpf_prog_active); + CHECK(data->out__cpu_0_rq_cpu != 0, "cpu_rq(0)->cpu", + "got %u, exp 0\n", data->out__cpu_0_rq_cpu); + + this_rq_cpu = data->out__this_rq_cpu; + CHECK(this_rq_cpu != data->out__rq_cpu, "this_rq_cpu", + "got %u, exp %u\n", this_rq_cpu, data->out__rq_cpu); + + this_bpf_prog_active = data->out__this_bpf_prog_active; + CHECK(this_bpf_prog_active != data->out__bpf_prog_active, "this_bpf_prog_active", + "got %d, exp %d\n", this_bpf_prog_active, + data->out__bpf_prog_active); + +cleanup: + btf__free(btf); + test_ksyms_btf__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/l4lb_all.c b/tools/testing/selftests/bpf/prog_tests/l4lb_all.c index c2d373e294bb..8073105548ff 100644 --- a/tools/testing/selftests/bpf/prog_tests/l4lb_all.c +++ b/tools/testing/selftests/bpf/prog_tests/l4lb_all.c @@ -80,9 +80,8 @@ out: void test_l4lb_all(void) { - const char *file1 = "./test_l4lb.o"; - const char *file2 = "./test_l4lb_noinline.o"; - - test_l4lb(file1); - test_l4lb(file2); + if (test__start_subtest("l4lb_inline")) + test_l4lb("test_l4lb.o"); + if (test__start_subtest("l4lb_noinline")) + test_l4lb("test_l4lb_noinline.o"); } diff --git a/tools/testing/selftests/bpf/prog_tests/metadata.c b/tools/testing/selftests/bpf/prog_tests/metadata.c new file mode 100644 index 000000000000..2c53eade88e3 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/metadata.c @@ -0,0 +1,141 @@ +// SPDX-License-Identifier: GPL-2.0-only + +/* + * Copyright 2020 Google LLC. + */ + +#include <test_progs.h> +#include <cgroup_helpers.h> +#include <network_helpers.h> + +#include "metadata_unused.skel.h" +#include "metadata_used.skel.h" + +static int duration; + +static int prog_holds_map(int prog_fd, int map_fd) +{ + struct bpf_prog_info prog_info = {}; + struct bpf_prog_info map_info = {}; + __u32 prog_info_len; + __u32 map_info_len; + __u32 *map_ids; + int nr_maps; + int ret; + int i; + + map_info_len = sizeof(map_info); + ret = bpf_obj_get_info_by_fd(map_fd, &map_info, &map_info_len); + if (ret) + return -errno; + + prog_info_len = sizeof(prog_info); + ret = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &prog_info_len); + if (ret) + return -errno; + + map_ids = calloc(prog_info.nr_map_ids, sizeof(__u32)); + if (!map_ids) + return -ENOMEM; + + nr_maps = prog_info.nr_map_ids; + memset(&prog_info, 0, sizeof(prog_info)); + prog_info.nr_map_ids = nr_maps; + prog_info.map_ids = ptr_to_u64(map_ids); + prog_info_len = sizeof(prog_info); + + ret = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &prog_info_len); + if (ret) { + ret = -errno; + goto free_map_ids; + } + + ret = -ENOENT; + for (i = 0; i < prog_info.nr_map_ids; i++) { + if (map_ids[i] == map_info.id) { + ret = 0; + break; + } + } + +free_map_ids: + free(map_ids); + return ret; +} + +static void test_metadata_unused(void) +{ + struct metadata_unused *obj; + int err; + + obj = metadata_unused__open_and_load(); + if (CHECK(!obj, "skel-load", "errno %d", errno)) + return; + + err = prog_holds_map(bpf_program__fd(obj->progs.prog), + bpf_map__fd(obj->maps.rodata)); + if (CHECK(err, "prog-holds-rodata", "errno: %d", err)) + return; + + /* Assert that we can access the metadata in skel and the values are + * what we expect. + */ + if (CHECK(strncmp(obj->rodata->bpf_metadata_a, "foo", + sizeof(obj->rodata->bpf_metadata_a)), + "bpf_metadata_a", "expected \"foo\", value differ")) + goto close_bpf_object; + if (CHECK(obj->rodata->bpf_metadata_b != 1, "bpf_metadata_b", + "expected 1, got %d", obj->rodata->bpf_metadata_b)) + goto close_bpf_object; + + /* Assert that binding metadata map to prog again succeeds. */ + err = bpf_prog_bind_map(bpf_program__fd(obj->progs.prog), + bpf_map__fd(obj->maps.rodata), NULL); + CHECK(err, "rebind_map", "errno %d, expected 0", errno); + +close_bpf_object: + metadata_unused__destroy(obj); +} + +static void test_metadata_used(void) +{ + struct metadata_used *obj; + int err; + + obj = metadata_used__open_and_load(); + if (CHECK(!obj, "skel-load", "errno %d", errno)) + return; + + err = prog_holds_map(bpf_program__fd(obj->progs.prog), + bpf_map__fd(obj->maps.rodata)); + if (CHECK(err, "prog-holds-rodata", "errno: %d", err)) + return; + + /* Assert that we can access the metadata in skel and the values are + * what we expect. + */ + if (CHECK(strncmp(obj->rodata->bpf_metadata_a, "bar", + sizeof(obj->rodata->bpf_metadata_a)), + "metadata_a", "expected \"bar\", value differ")) + goto close_bpf_object; + if (CHECK(obj->rodata->bpf_metadata_b != 2, "metadata_b", + "expected 2, got %d", obj->rodata->bpf_metadata_b)) + goto close_bpf_object; + + /* Assert that binding metadata map to prog again succeeds. */ + err = bpf_prog_bind_map(bpf_program__fd(obj->progs.prog), + bpf_map__fd(obj->maps.rodata), NULL); + CHECK(err, "rebind_map", "errno %d, expected 0", errno); + +close_bpf_object: + metadata_used__destroy(obj); +} + +void test_metadata(void) +{ + if (test__start_subtest("unused")) + test_metadata_unused(); + + if (test__start_subtest("used")) + test_metadata_used(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/pe_preserve_elems.c b/tools/testing/selftests/bpf/prog_tests/pe_preserve_elems.c new file mode 100644 index 000000000000..673d38395253 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/pe_preserve_elems.c @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2019 Facebook */ +#include <test_progs.h> +#include <linux/bpf.h> +#include "test_pe_preserve_elems.skel.h" + +static int duration; + +static void test_one_map(struct bpf_map *map, struct bpf_program *prog, + bool has_share_pe) +{ + int err, key = 0, pfd = -1, mfd = bpf_map__fd(map); + DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts); + struct perf_event_attr attr = { + .size = sizeof(struct perf_event_attr), + .type = PERF_TYPE_SOFTWARE, + .config = PERF_COUNT_SW_CPU_CLOCK, + }; + + pfd = syscall(__NR_perf_event_open, &attr, 0 /* pid */, + -1 /* cpu 0 */, -1 /* group id */, 0 /* flags */); + if (CHECK(pfd < 0, "perf_event_open", "failed\n")) + return; + + err = bpf_map_update_elem(mfd, &key, &pfd, BPF_ANY); + close(pfd); + if (CHECK(err < 0, "bpf_map_update_elem", "failed\n")) + return; + + err = bpf_prog_test_run_opts(bpf_program__fd(prog), &opts); + if (CHECK(err < 0, "bpf_prog_test_run_opts", "failed\n")) + return; + if (CHECK(opts.retval != 0, "bpf_perf_event_read_value", + "failed with %d\n", opts.retval)) + return; + + /* closing mfd, prog still holds a reference on map */ + close(mfd); + + err = bpf_prog_test_run_opts(bpf_program__fd(prog), &opts); + if (CHECK(err < 0, "bpf_prog_test_run_opts", "failed\n")) + return; + + if (has_share_pe) { + CHECK(opts.retval != 0, "bpf_perf_event_read_value", + "failed with %d\n", opts.retval); + } else { + CHECK(opts.retval != -ENOENT, "bpf_perf_event_read_value", + "should have failed with %d, but got %d\n", -ENOENT, + opts.retval); + } +} + +void test_pe_preserve_elems(void) +{ + struct test_pe_preserve_elems *skel; + + skel = test_pe_preserve_elems__open_and_load(); + if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) + return; + + test_one_map(skel->maps.array_1, skel->progs.read_array_1, false); + test_one_map(skel->maps.array_2, skel->progs.read_array_2, true); + + test_pe_preserve_elems__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c index c33ec180b3f2..ca9f0895ec84 100644 --- a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c +++ b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c @@ -7,6 +7,8 @@ #include "test_perf_buffer.skel.h" #include "bpf/libbpf_internal.h" +static int duration; + /* AddressSanitizer sometimes crashes due to data dereference below, due to * this being mmap()'ed memory. Disable instrumentation with * no_sanitize_address attribute @@ -24,13 +26,31 @@ static void on_sample(void *ctx, int cpu, void *data, __u32 size) CPU_SET(cpu, cpu_seen); } +int trigger_on_cpu(int cpu) +{ + cpu_set_t cpu_set; + int err; + + CPU_ZERO(&cpu_set); + CPU_SET(cpu, &cpu_set); + + err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set), &cpu_set); + if (err && CHECK(err, "set_affinity", "cpu #%d, err %d\n", cpu, err)) + return err; + + usleep(1); + + return 0; +} + void test_perf_buffer(void) { - int err, on_len, nr_on_cpus = 0, nr_cpus, i, duration = 0; + int err, on_len, nr_on_cpus = 0, nr_cpus, i; struct perf_buffer_opts pb_opts = {}; struct test_perf_buffer *skel; - cpu_set_t cpu_set, cpu_seen; + cpu_set_t cpu_seen; struct perf_buffer *pb; + int last_fd = -1, fd; bool *online; nr_cpus = libbpf_num_possible_cpus(); @@ -63,6 +83,9 @@ void test_perf_buffer(void) if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb))) goto out_close; + CHECK(perf_buffer__epoll_fd(pb) < 0, "epoll_fd", + "bad fd: %d\n", perf_buffer__epoll_fd(pb)); + /* trigger kprobe on every CPU */ CPU_ZERO(&cpu_seen); for (i = 0; i < nr_cpus; i++) { @@ -71,16 +94,8 @@ void test_perf_buffer(void) continue; } - CPU_ZERO(&cpu_set); - CPU_SET(i, &cpu_set); - - err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set), - &cpu_set); - if (err && CHECK(err, "set_affinity", "cpu #%d, err %d\n", - i, err)) + if (trigger_on_cpu(i)) goto out_close; - - usleep(1); } /* read perf buffer */ @@ -92,6 +107,34 @@ void test_perf_buffer(void) "expect %d, seen %d\n", nr_on_cpus, CPU_COUNT(&cpu_seen))) goto out_free_pb; + if (CHECK(perf_buffer__buffer_cnt(pb) != nr_cpus, "buf_cnt", + "got %zu, expected %d\n", perf_buffer__buffer_cnt(pb), nr_cpus)) + goto out_close; + + for (i = 0; i < nr_cpus; i++) { + if (i >= on_len || !online[i]) + continue; + + fd = perf_buffer__buffer_fd(pb, i); + CHECK(fd < 0 || last_fd == fd, "fd_check", "last fd %d == fd %d\n", last_fd, fd); + last_fd = fd; + + err = perf_buffer__consume_buffer(pb, i); + if (CHECK(err, "drain_buf", "cpu %d, err %d\n", i, err)) + goto out_close; + + CPU_CLR(i, &cpu_seen); + if (trigger_on_cpu(i)) + goto out_close; + + err = perf_buffer__consume_buffer(pb, i); + if (CHECK(err, "consume_buf", "cpu %d, err %d\n", i, err)) + goto out_close; + + if (CHECK(!CPU_ISSET(i, &cpu_seen), "cpu_seen", "cpu %d not seen\n", i)) + goto out_close; + } + out_free_pb: perf_buffer__free(pb); out_close: diff --git a/tools/testing/selftests/bpf/prog_tests/pinning.c b/tools/testing/selftests/bpf/prog_tests/pinning.c index 041952524c55..fcf54b3a1dd0 100644 --- a/tools/testing/selftests/bpf/prog_tests/pinning.c +++ b/tools/testing/selftests/bpf/prog_tests/pinning.c @@ -37,7 +37,7 @@ void test_pinning(void) struct stat statbuf = {}; struct bpf_object *obj; struct bpf_map *map; - int err; + int err, map_fd; DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts, .pin_root_path = custpath, ); @@ -213,6 +213,53 @@ void test_pinning(void) if (CHECK(err, "stat custpinpath", "err %d errno %d\n", err, errno)) goto out; + /* remove the custom pin path to re-test it with reuse fd below */ + err = unlink(custpinpath); + if (CHECK(err, "unlink custpinpath", "err %d errno %d\n", err, errno)) + goto out; + + err = rmdir(custpath); + if (CHECK(err, "rmdir custpindir", "err %d errno %d\n", err, errno)) + goto out; + + bpf_object__close(obj); + + /* test pinning at custom path with reuse fd */ + obj = bpf_object__open_file(file, NULL); + err = libbpf_get_error(obj); + if (CHECK(err, "default open", "err %d errno %d\n", err, errno)) { + obj = NULL; + goto out; + } + + map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(__u32), + sizeof(__u64), 1, 0); + if (CHECK(map_fd < 0, "create pinmap manually", "fd %d\n", map_fd)) + goto out; + + map = bpf_object__find_map_by_name(obj, "pinmap"); + if (CHECK(!map, "find map", "NULL map")) + goto close_map_fd; + + err = bpf_map__reuse_fd(map, map_fd); + if (CHECK(err, "reuse pinmap fd", "err %d errno %d\n", err, errno)) + goto close_map_fd; + + err = bpf_map__set_pin_path(map, custpinpath); + if (CHECK(err, "set pin path", "err %d errno %d\n", err, errno)) + goto close_map_fd; + + err = bpf_object__load(obj); + if (CHECK(err, "custom load", "err %d errno %d\n", err, errno)) + goto close_map_fd; + + /* check that pinmap was pinned at the custom path */ + err = stat(custpinpath, &statbuf); + if (CHECK(err, "stat custpinpath", "err %d errno %d\n", err, errno)) + goto close_map_fd; + +close_map_fd: + close(map_fd); out: unlink(pinpath); unlink(nopinpath); diff --git a/tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c b/tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c new file mode 100644 index 000000000000..c5fb191874ac --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c @@ -0,0 +1,96 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright (c) 2019 Facebook */ +#include <test_progs.h> +#include <linux/bpf.h> +#include "bpf/libbpf_internal.h" +#include "test_raw_tp_test_run.skel.h" + +static int duration; + +void test_raw_tp_test_run(void) +{ + struct bpf_prog_test_run_attr test_attr = {}; + int comm_fd = -1, err, nr_online, i, prog_fd; + __u64 args[2] = {0x1234ULL, 0x5678ULL}; + int expected_retval = 0x1234 + 0x5678; + struct test_raw_tp_test_run *skel; + char buf[] = "new_name"; + bool *online = NULL; + DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts, + .ctx_in = args, + .ctx_size_in = sizeof(args), + .flags = BPF_F_TEST_RUN_ON_CPU, + ); + + err = parse_cpu_mask_file("/sys/devices/system/cpu/online", &online, + &nr_online); + if (CHECK(err, "parse_cpu_mask_file", "err %d\n", err)) + return; + + skel = test_raw_tp_test_run__open_and_load(); + if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) + goto cleanup; + + err = test_raw_tp_test_run__attach(skel); + if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err)) + goto cleanup; + + comm_fd = open("/proc/self/comm", O_WRONLY|O_TRUNC); + if (CHECK(comm_fd < 0, "open /proc/self/comm", "err %d\n", errno)) + goto cleanup; + + err = write(comm_fd, buf, sizeof(buf)); + CHECK(err < 0, "task rename", "err %d", errno); + + CHECK(skel->bss->count == 0, "check_count", "didn't increase\n"); + CHECK(skel->data->on_cpu != 0xffffffff, "check_on_cpu", "got wrong value\n"); + + prog_fd = bpf_program__fd(skel->progs.rename); + test_attr.prog_fd = prog_fd; + test_attr.ctx_in = args; + test_attr.ctx_size_in = sizeof(__u64); + + err = bpf_prog_test_run_xattr(&test_attr); + CHECK(err == 0, "test_run", "should fail for too small ctx\n"); + + test_attr.ctx_size_in = sizeof(args); + err = bpf_prog_test_run_xattr(&test_attr); + CHECK(err < 0, "test_run", "err %d\n", errno); + CHECK(test_attr.retval != expected_retval, "check_retval", + "expect 0x%x, got 0x%x\n", expected_retval, test_attr.retval); + + for (i = 0; i < nr_online; i++) { + if (!online[i]) + continue; + + opts.cpu = i; + opts.retval = 0; + err = bpf_prog_test_run_opts(prog_fd, &opts); + CHECK(err < 0, "test_run_opts", "err %d\n", errno); + CHECK(skel->data->on_cpu != i, "check_on_cpu", + "expect %d got %d\n", i, skel->data->on_cpu); + CHECK(opts.retval != expected_retval, + "check_retval", "expect 0x%x, got 0x%x\n", + expected_retval, opts.retval); + } + + /* invalid cpu ID should fail with ENXIO */ + opts.cpu = 0xffffffff; + err = bpf_prog_test_run_opts(prog_fd, &opts); + CHECK(err != -1 || errno != ENXIO, + "test_run_opts_fail", + "should failed with ENXIO\n"); + + /* non-zero cpu w/o BPF_F_TEST_RUN_ON_CPU should fail with EINVAL */ + opts.cpu = 1; + opts.flags = 0; + err = bpf_prog_test_run_opts(prog_fd, &opts); + CHECK(err != -1 || errno != EINVAL, + "test_run_opts_fail", + "should failed with EINVAL\n"); + +cleanup: + close(comm_fd); + test_raw_tp_test_run__destroy(skel); + free(online); +} diff --git a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c index fc0d7f4f02cf..ac1ee10cffd8 100644 --- a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c +++ b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c @@ -27,7 +27,7 @@ void test_reference_tracking(void) const char *title; /* Ignore .text sections */ - title = bpf_program__title(prog, false); + title = bpf_program__section_name(prog); if (strstr(title, ".text") != NULL) continue; diff --git a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c index 3b127cab4864..6ace5e9efec1 100644 --- a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c +++ b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c @@ -28,6 +28,12 @@ struct symbol test_symbols[] = { { "func", BTF_KIND_FUNC, -1 }, }; +/* Align the .BTF_ids section to 4 bytes */ +asm ( +".pushsection " BTF_IDS_SECTION " ,\"a\"; \n" +".balign 4, 0; \n" +".popsection; \n"); + BTF_ID_LIST(test_list_local) BTF_ID_UNUSED BTF_ID(typedef, S) @@ -47,6 +53,15 @@ BTF_ID(struct, S) BTF_ID(union, U) BTF_ID(func, func) +BTF_SET_START(test_set) +BTF_ID(typedef, S) +BTF_ID(typedef, T) +BTF_ID(typedef, U) +BTF_ID(struct, S) +BTF_ID(union, U) +BTF_ID(func, func) +BTF_SET_END(test_set) + static int __resolve_symbol(struct btf *btf, int type_id) { @@ -116,12 +131,40 @@ int test_resolve_btfids(void) */ for (j = 0; j < ARRAY_SIZE(test_lists); j++) { test_list = test_lists[j]; - for (i = 0; i < ARRAY_SIZE(test_symbols) && !ret; i++) { + for (i = 0; i < ARRAY_SIZE(test_symbols); i++) { ret = CHECK(test_list[i] != test_symbols[i].id, "id_check", "wrong ID for %s (%d != %d)\n", test_symbols[i].name, test_list[i], test_symbols[i].id); + if (ret) + return ret; + } + } + + /* Check BTF_SET_START(test_set) IDs */ + for (i = 0; i < test_set.cnt; i++) { + bool found = false; + + for (j = 0; j < ARRAY_SIZE(test_symbols); j++) { + if (test_symbols[j].id != test_set.ids[i]) + continue; + found = true; + break; + } + + ret = CHECK(!found, "id_check", + "ID %d not found in test_symbols\n", + test_set.ids[i]); + if (ret) + break; + + if (i > 0) { + ret = CHECK(test_set.ids[i - 1] > test_set.ids[i], + "sort_check", + "test_set is not sorted\n"); + if (ret) + break; } } diff --git a/tools/testing/selftests/bpf/prog_tests/sk_assign.c b/tools/testing/selftests/bpf/prog_tests/sk_assign.c index 47fa04adc147..3a469099f30d 100644 --- a/tools/testing/selftests/bpf/prog_tests/sk_assign.c +++ b/tools/testing/selftests/bpf/prog_tests/sk_assign.c @@ -49,7 +49,7 @@ configure_stack(void) sprintf(tc_cmd, "%s %s %s %s", "tc filter add dev lo ingress bpf", "direct-action object-file ./test_sk_assign.o", "section classifier/sk_assign_test", - (env.verbosity < VERBOSE_VERY) ? " 2>/dev/null" : ""); + (env.verbosity < VERBOSE_VERY) ? " 2>/dev/null" : "verbose"); if (CHECK(system(tc_cmd), "BPF load failed;", "run with -vv for more info\n")) return false; @@ -265,9 +265,10 @@ void test_sk_assign(void) TEST("ipv6 udp port redir", AF_INET6, SOCK_DGRAM, false), TEST("ipv6 udp addr redir", AF_INET6, SOCK_DGRAM, true), }; - int server = -1; + __s64 server = -1; int server_map; int self_net; + int i; self_net = open(NS_SELF, O_RDONLY); if (CHECK_FAIL(self_net < 0)) { @@ -286,7 +287,7 @@ void test_sk_assign(void) goto cleanup; } - for (int i = 0; i < ARRAY_SIZE(tests) && !READ_ONCE(stop); i++) { + for (i = 0; i < ARRAY_SIZE(tests) && !READ_ONCE(stop); i++) { struct test_sk_cfg *test = &tests[i]; const struct sockaddr *addr; const int zero = 0; diff --git a/tools/testing/selftests/bpf/prog_tests/snprintf_btf.c b/tools/testing/selftests/bpf/prog_tests/snprintf_btf.c new file mode 100644 index 000000000000..686b40f11a45 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/snprintf_btf.c @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> +#include <linux/btf.h> +#include "netif_receive_skb.skel.h" + +/* Demonstrate that bpf_snprintf_btf succeeds and that various data types + * are formatted correctly. + */ +void test_snprintf_btf(void) +{ + struct netif_receive_skb *skel; + struct netif_receive_skb__bss *bss; + int err, duration = 0; + + skel = netif_receive_skb__open(); + if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) + return; + + err = netif_receive_skb__load(skel); + if (CHECK(err, "skel_load", "failed to load skeleton: %d\n", err)) + goto cleanup; + + bss = skel->bss; + + err = netif_receive_skb__attach(skel); + if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err)) + goto cleanup; + + /* generate receive event */ + err = system("ping -c 1 127.0.0.1 > /dev/null"); + if (CHECK(err, "system", "ping failed: %d\n", err)) + goto cleanup; + + if (bss->skip) { + printf("%s:SKIP:no __builtin_btf_type_id\n", __func__); + test__skip(); + goto cleanup; + } + + /* + * Make sure netif_receive_skb program was triggered + * and it set expected return values from bpf_trace_printk()s + * and all tests ran. + */ + if (CHECK(bss->ret <= 0, + "bpf_snprintf_btf: got return value", + "ret <= 0 %ld test %d\n", bss->ret, bss->ran_subtests)) + goto cleanup; + + if (CHECK(bss->ran_subtests == 0, "check if subtests ran", + "no subtests ran, did BPF program run?")) + goto cleanup; + + if (CHECK(bss->num_subtests != bss->ran_subtests, + "check all subtests ran", + "only ran %d of %d tests\n", bss->num_subtests, + bss->ran_subtests)) + goto cleanup; + +cleanup: + netif_receive_skb__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/sock_fields.c b/tools/testing/selftests/bpf/prog_tests/sock_fields.c new file mode 100644 index 000000000000..af87118e748e --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/sock_fields.c @@ -0,0 +1,382 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019 Facebook */ + +#include <netinet/in.h> +#include <arpa/inet.h> +#include <unistd.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> + +#include <bpf/bpf.h> +#include <bpf/libbpf.h> +#include <linux/compiler.h> + +#include "network_helpers.h" +#include "cgroup_helpers.h" +#include "test_progs.h" +#include "bpf_rlimit.h" +#include "test_sock_fields.skel.h" + +enum bpf_linum_array_idx { + EGRESS_LINUM_IDX, + INGRESS_LINUM_IDX, + __NR_BPF_LINUM_ARRAY_IDX, +}; + +struct bpf_spinlock_cnt { + struct bpf_spin_lock lock; + __u32 cnt; +}; + +#define PARENT_CGROUP "/test-bpf-sock-fields" +#define CHILD_CGROUP "/test-bpf-sock-fields/child" +#define DATA "Hello BPF!" +#define DATA_LEN sizeof(DATA) + +static struct sockaddr_in6 srv_sa6, cli_sa6; +static int sk_pkt_out_cnt10_fd; +static struct test_sock_fields *skel; +static int sk_pkt_out_cnt_fd; +static __u64 parent_cg_id; +static __u64 child_cg_id; +static int linum_map_fd; +static __u32 duration; + +static __u32 egress_linum_idx = EGRESS_LINUM_IDX; +static __u32 ingress_linum_idx = INGRESS_LINUM_IDX; + +static void print_sk(const struct bpf_sock *sk, const char *prefix) +{ + char src_ip4[24], dst_ip4[24]; + char src_ip6[64], dst_ip6[64]; + + inet_ntop(AF_INET, &sk->src_ip4, src_ip4, sizeof(src_ip4)); + inet_ntop(AF_INET6, &sk->src_ip6, src_ip6, sizeof(src_ip6)); + inet_ntop(AF_INET, &sk->dst_ip4, dst_ip4, sizeof(dst_ip4)); + inet_ntop(AF_INET6, &sk->dst_ip6, dst_ip6, sizeof(dst_ip6)); + + printf("%s: state:%u bound_dev_if:%u family:%u type:%u protocol:%u mark:%u priority:%u " + "src_ip4:%x(%s) src_ip6:%x:%x:%x:%x(%s) src_port:%u " + "dst_ip4:%x(%s) dst_ip6:%x:%x:%x:%x(%s) dst_port:%u\n", + prefix, + sk->state, sk->bound_dev_if, sk->family, sk->type, sk->protocol, + sk->mark, sk->priority, + sk->src_ip4, src_ip4, + sk->src_ip6[0], sk->src_ip6[1], sk->src_ip6[2], sk->src_ip6[3], + src_ip6, sk->src_port, + sk->dst_ip4, dst_ip4, + sk->dst_ip6[0], sk->dst_ip6[1], sk->dst_ip6[2], sk->dst_ip6[3], + dst_ip6, ntohs(sk->dst_port)); +} + +static void print_tp(const struct bpf_tcp_sock *tp, const char *prefix) +{ + printf("%s: snd_cwnd:%u srtt_us:%u rtt_min:%u snd_ssthresh:%u rcv_nxt:%u " + "snd_nxt:%u snd:una:%u mss_cache:%u ecn_flags:%u " + "rate_delivered:%u rate_interval_us:%u packets_out:%u " + "retrans_out:%u total_retrans:%u segs_in:%u data_segs_in:%u " + "segs_out:%u data_segs_out:%u lost_out:%u sacked_out:%u " + "bytes_received:%llu bytes_acked:%llu\n", + prefix, + tp->snd_cwnd, tp->srtt_us, tp->rtt_min, tp->snd_ssthresh, + tp->rcv_nxt, tp->snd_nxt, tp->snd_una, tp->mss_cache, + tp->ecn_flags, tp->rate_delivered, tp->rate_interval_us, + tp->packets_out, tp->retrans_out, tp->total_retrans, + tp->segs_in, tp->data_segs_in, tp->segs_out, + tp->data_segs_out, tp->lost_out, tp->sacked_out, + tp->bytes_received, tp->bytes_acked); +} + +static void check_result(void) +{ + struct bpf_tcp_sock srv_tp, cli_tp, listen_tp; + struct bpf_sock srv_sk, cli_sk, listen_sk; + __u32 ingress_linum, egress_linum; + int err; + + err = bpf_map_lookup_elem(linum_map_fd, &egress_linum_idx, + &egress_linum); + CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)", + "err:%d errno:%d\n", err, errno); + + err = bpf_map_lookup_elem(linum_map_fd, &ingress_linum_idx, + &ingress_linum); + CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)", + "err:%d errno:%d\n", err, errno); + + memcpy(&srv_sk, &skel->bss->srv_sk, sizeof(srv_sk)); + memcpy(&srv_tp, &skel->bss->srv_tp, sizeof(srv_tp)); + memcpy(&cli_sk, &skel->bss->cli_sk, sizeof(cli_sk)); + memcpy(&cli_tp, &skel->bss->cli_tp, sizeof(cli_tp)); + memcpy(&listen_sk, &skel->bss->listen_sk, sizeof(listen_sk)); + memcpy(&listen_tp, &skel->bss->listen_tp, sizeof(listen_tp)); + + print_sk(&listen_sk, "listen_sk"); + print_sk(&srv_sk, "srv_sk"); + print_sk(&cli_sk, "cli_sk"); + print_tp(&listen_tp, "listen_tp"); + print_tp(&srv_tp, "srv_tp"); + print_tp(&cli_tp, "cli_tp"); + + CHECK(listen_sk.state != 10 || + listen_sk.family != AF_INET6 || + listen_sk.protocol != IPPROTO_TCP || + memcmp(listen_sk.src_ip6, &in6addr_loopback, + sizeof(listen_sk.src_ip6)) || + listen_sk.dst_ip6[0] || listen_sk.dst_ip6[1] || + listen_sk.dst_ip6[2] || listen_sk.dst_ip6[3] || + listen_sk.src_port != ntohs(srv_sa6.sin6_port) || + listen_sk.dst_port, + "listen_sk", + "Unexpected. Check listen_sk output. ingress_linum:%u\n", + ingress_linum); + + CHECK(srv_sk.state == 10 || + !srv_sk.state || + srv_sk.family != AF_INET6 || + srv_sk.protocol != IPPROTO_TCP || + memcmp(srv_sk.src_ip6, &in6addr_loopback, + sizeof(srv_sk.src_ip6)) || + memcmp(srv_sk.dst_ip6, &in6addr_loopback, + sizeof(srv_sk.dst_ip6)) || + srv_sk.src_port != ntohs(srv_sa6.sin6_port) || + srv_sk.dst_port != cli_sa6.sin6_port, + "srv_sk", "Unexpected. Check srv_sk output. egress_linum:%u\n", + egress_linum); + + CHECK(!skel->bss->lsndtime, "srv_tp", "Unexpected lsndtime:0\n"); + + CHECK(cli_sk.state == 10 || + !cli_sk.state || + cli_sk.family != AF_INET6 || + cli_sk.protocol != IPPROTO_TCP || + memcmp(cli_sk.src_ip6, &in6addr_loopback, + sizeof(cli_sk.src_ip6)) || + memcmp(cli_sk.dst_ip6, &in6addr_loopback, + sizeof(cli_sk.dst_ip6)) || + cli_sk.src_port != ntohs(cli_sa6.sin6_port) || + cli_sk.dst_port != srv_sa6.sin6_port, + "cli_sk", "Unexpected. Check cli_sk output. egress_linum:%u\n", + egress_linum); + + CHECK(listen_tp.data_segs_out || + listen_tp.data_segs_in || + listen_tp.total_retrans || + listen_tp.bytes_acked, + "listen_tp", + "Unexpected. Check listen_tp output. ingress_linum:%u\n", + ingress_linum); + + CHECK(srv_tp.data_segs_out != 2 || + srv_tp.data_segs_in || + srv_tp.snd_cwnd != 10 || + srv_tp.total_retrans || + srv_tp.bytes_acked < 2 * DATA_LEN, + "srv_tp", "Unexpected. Check srv_tp output. egress_linum:%u\n", + egress_linum); + + CHECK(cli_tp.data_segs_out || + cli_tp.data_segs_in != 2 || + cli_tp.snd_cwnd != 10 || + cli_tp.total_retrans || + cli_tp.bytes_received < 2 * DATA_LEN, + "cli_tp", "Unexpected. Check cli_tp output. egress_linum:%u\n", + egress_linum); + + CHECK(skel->bss->parent_cg_id != parent_cg_id, + "parent_cg_id", "%zu != %zu\n", + (size_t)skel->bss->parent_cg_id, (size_t)parent_cg_id); + + CHECK(skel->bss->child_cg_id != child_cg_id, + "child_cg_id", "%zu != %zu\n", + (size_t)skel->bss->child_cg_id, (size_t)child_cg_id); +} + +static void check_sk_pkt_out_cnt(int accept_fd, int cli_fd) +{ + struct bpf_spinlock_cnt pkt_out_cnt = {}, pkt_out_cnt10 = {}; + int err; + + pkt_out_cnt.cnt = ~0; + pkt_out_cnt10.cnt = ~0; + err = bpf_map_lookup_elem(sk_pkt_out_cnt_fd, &accept_fd, &pkt_out_cnt); + if (!err) + err = bpf_map_lookup_elem(sk_pkt_out_cnt10_fd, &accept_fd, + &pkt_out_cnt10); + + /* The bpf prog only counts for fullsock and + * passive connection did not become fullsock until 3WHS + * had been finished, so the bpf prog only counted two data + * packet out. + */ + CHECK(err || pkt_out_cnt.cnt < 0xeB9F + 2 || + pkt_out_cnt10.cnt < 0xeB9F + 20, + "bpf_map_lookup_elem(sk_pkt_out_cnt, &accept_fd)", + "err:%d errno:%d pkt_out_cnt:%u pkt_out_cnt10:%u\n", + err, errno, pkt_out_cnt.cnt, pkt_out_cnt10.cnt); + + pkt_out_cnt.cnt = ~0; + pkt_out_cnt10.cnt = ~0; + err = bpf_map_lookup_elem(sk_pkt_out_cnt_fd, &cli_fd, &pkt_out_cnt); + if (!err) + err = bpf_map_lookup_elem(sk_pkt_out_cnt10_fd, &cli_fd, + &pkt_out_cnt10); + /* Active connection is fullsock from the beginning. + * 1 SYN and 1 ACK during 3WHS + * 2 Acks on data packet. + * + * The bpf_prog initialized it to 0xeB9F. + */ + CHECK(err || pkt_out_cnt.cnt < 0xeB9F + 4 || + pkt_out_cnt10.cnt < 0xeB9F + 40, + "bpf_map_lookup_elem(sk_pkt_out_cnt, &cli_fd)", + "err:%d errno:%d pkt_out_cnt:%u pkt_out_cnt10:%u\n", + err, errno, pkt_out_cnt.cnt, pkt_out_cnt10.cnt); +} + +static int init_sk_storage(int sk_fd, __u32 pkt_out_cnt) +{ + struct bpf_spinlock_cnt scnt = {}; + int err; + + scnt.cnt = pkt_out_cnt; + err = bpf_map_update_elem(sk_pkt_out_cnt_fd, &sk_fd, &scnt, + BPF_NOEXIST); + if (CHECK(err, "bpf_map_update_elem(sk_pkt_out_cnt_fd)", + "err:%d errno:%d\n", err, errno)) + return err; + + err = bpf_map_update_elem(sk_pkt_out_cnt10_fd, &sk_fd, &scnt, + BPF_NOEXIST); + if (CHECK(err, "bpf_map_update_elem(sk_pkt_out_cnt10_fd)", + "err:%d errno:%d\n", err, errno)) + return err; + + return 0; +} + +static void test(void) +{ + int listen_fd = -1, cli_fd = -1, accept_fd = -1, err, i; + socklen_t addrlen = sizeof(struct sockaddr_in6); + char buf[DATA_LEN]; + + /* Prepare listen_fd */ + listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0); + /* start_server() has logged the error details */ + if (CHECK_FAIL(listen_fd == -1)) + goto done; + + err = getsockname(listen_fd, (struct sockaddr *)&srv_sa6, &addrlen); + if (CHECK(err, "getsockname(listen_fd)", "err:%d errno:%d\n", err, + errno)) + goto done; + memcpy(&skel->bss->srv_sa6, &srv_sa6, sizeof(srv_sa6)); + + cli_fd = connect_to_fd(listen_fd, 0); + if (CHECK_FAIL(cli_fd == -1)) + goto done; + + err = getsockname(cli_fd, (struct sockaddr *)&cli_sa6, &addrlen); + if (CHECK(err, "getsockname(cli_fd)", "err:%d errno:%d\n", + err, errno)) + goto done; + + accept_fd = accept(listen_fd, NULL, NULL); + if (CHECK(accept_fd == -1, "accept(listen_fd)", + "accept_fd:%d errno:%d\n", + accept_fd, errno)) + goto done; + + if (init_sk_storage(accept_fd, 0xeB9F)) + goto done; + + for (i = 0; i < 2; i++) { + /* Send some data from accept_fd to cli_fd. + * MSG_EOR to stop kernel from coalescing two pkts. + */ + err = send(accept_fd, DATA, DATA_LEN, MSG_EOR); + if (CHECK(err != DATA_LEN, "send(accept_fd)", + "err:%d errno:%d\n", err, errno)) + goto done; + + err = recv(cli_fd, buf, DATA_LEN, 0); + if (CHECK(err != DATA_LEN, "recv(cli_fd)", "err:%d errno:%d\n", + err, errno)) + goto done; + } + + shutdown(cli_fd, SHUT_WR); + err = recv(accept_fd, buf, 1, 0); + if (CHECK(err, "recv(accept_fd) for fin", "err:%d errno:%d\n", + err, errno)) + goto done; + shutdown(accept_fd, SHUT_WR); + err = recv(cli_fd, buf, 1, 0); + if (CHECK(err, "recv(cli_fd) for fin", "err:%d errno:%d\n", + err, errno)) + goto done; + check_sk_pkt_out_cnt(accept_fd, cli_fd); + check_result(); + +done: + if (accept_fd != -1) + close(accept_fd); + if (cli_fd != -1) + close(cli_fd); + if (listen_fd != -1) + close(listen_fd); +} + +void test_sock_fields(void) +{ + struct bpf_link *egress_link = NULL, *ingress_link = NULL; + int parent_cg_fd = -1, child_cg_fd = -1; + + /* Create a cgroup, get fd, and join it */ + parent_cg_fd = test__join_cgroup(PARENT_CGROUP); + if (CHECK_FAIL(parent_cg_fd < 0)) + return; + parent_cg_id = get_cgroup_id(PARENT_CGROUP); + if (CHECK_FAIL(!parent_cg_id)) + goto done; + + child_cg_fd = test__join_cgroup(CHILD_CGROUP); + if (CHECK_FAIL(child_cg_fd < 0)) + goto done; + child_cg_id = get_cgroup_id(CHILD_CGROUP); + if (CHECK_FAIL(!child_cg_id)) + goto done; + + skel = test_sock_fields__open_and_load(); + if (CHECK(!skel, "test_sock_fields__open_and_load", "failed\n")) + goto done; + + egress_link = bpf_program__attach_cgroup(skel->progs.egress_read_sock_fields, + child_cg_fd); + if (CHECK(IS_ERR(egress_link), "attach_cgroup(egress)", "err:%ld\n", + PTR_ERR(egress_link))) + goto done; + + ingress_link = bpf_program__attach_cgroup(skel->progs.ingress_read_sock_fields, + child_cg_fd); + if (CHECK(IS_ERR(ingress_link), "attach_cgroup(ingress)", "err:%ld\n", + PTR_ERR(ingress_link))) + goto done; + + linum_map_fd = bpf_map__fd(skel->maps.linum_map); + sk_pkt_out_cnt_fd = bpf_map__fd(skel->maps.sk_pkt_out_cnt); + sk_pkt_out_cnt10_fd = bpf_map__fd(skel->maps.sk_pkt_out_cnt10); + + test(); + +done: + bpf_link__destroy(egress_link); + bpf_link__destroy(ingress_link); + test_sock_fields__destroy(skel); + if (child_cg_fd != -1) + close(child_cg_fd); + if (parent_cg_fd != -1) + close(parent_cg_fd); +} diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c index 96e7b7f84c65..85f73261fab0 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c +++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c @@ -4,6 +4,9 @@ #include "test_progs.h" #include "test_skmsg_load_helpers.skel.h" +#include "test_sockmap_update.skel.h" +#include "test_sockmap_invalid_update.skel.h" +#include "bpf_iter_sockmap.skel.h" #define TCP_REPAIR 19 /* TCP sock is under repair right now */ @@ -45,6 +48,37 @@ error: return -1; } +static void compare_cookies(struct bpf_map *src, struct bpf_map *dst) +{ + __u32 i, max_entries = bpf_map__max_entries(src); + int err, duration = 0, src_fd, dst_fd; + + src_fd = bpf_map__fd(src); + dst_fd = bpf_map__fd(dst); + + for (i = 0; i < max_entries; i++) { + __u64 src_cookie, dst_cookie; + + err = bpf_map_lookup_elem(src_fd, &i, &src_cookie); + if (err && errno == ENOENT) { + err = bpf_map_lookup_elem(dst_fd, &i, &dst_cookie); + CHECK(!err, "map_lookup_elem(dst)", "element %u not deleted\n", i); + CHECK(err && errno != ENOENT, "map_lookup_elem(dst)", "%s\n", + strerror(errno)); + continue; + } + if (CHECK(err, "lookup_elem(src)", "%s\n", strerror(errno))) + continue; + + err = bpf_map_lookup_elem(dst_fd, &i, &dst_cookie); + if (CHECK(err, "lookup_elem(dst)", "%s\n", strerror(errno))) + continue; + + CHECK(dst_cookie != src_cookie, "cookie mismatch", + "%llu != %llu (pos %u)\n", dst_cookie, src_cookie, i); + } +} + /* Create a map, populate it with one socket, and free the map. */ static void test_sockmap_create_update_free(enum bpf_map_type map_type) { @@ -101,6 +135,151 @@ out: test_skmsg_load_helpers__destroy(skel); } +static void test_sockmap_update(enum bpf_map_type map_type) +{ + struct bpf_prog_test_run_attr tattr; + int err, prog, src, duration = 0; + struct test_sockmap_update *skel; + struct bpf_map *dst_map; + const __u32 zero = 0; + char dummy[14] = {0}; + __s64 sk; + + sk = connected_socket_v4(); + if (CHECK(sk == -1, "connected_socket_v4", "cannot connect\n")) + return; + + skel = test_sockmap_update__open_and_load(); + if (CHECK(!skel, "open_and_load", "cannot load skeleton\n")) + goto close_sk; + + prog = bpf_program__fd(skel->progs.copy_sock_map); + src = bpf_map__fd(skel->maps.src); + if (map_type == BPF_MAP_TYPE_SOCKMAP) + dst_map = skel->maps.dst_sock_map; + else + dst_map = skel->maps.dst_sock_hash; + + err = bpf_map_update_elem(src, &zero, &sk, BPF_NOEXIST); + if (CHECK(err, "update_elem(src)", "errno=%u\n", errno)) + goto out; + + tattr = (struct bpf_prog_test_run_attr){ + .prog_fd = prog, + .repeat = 1, + .data_in = dummy, + .data_size_in = sizeof(dummy), + }; + + err = bpf_prog_test_run_xattr(&tattr); + if (CHECK_ATTR(err || !tattr.retval, "bpf_prog_test_run", + "errno=%u retval=%u\n", errno, tattr.retval)) + goto out; + + compare_cookies(skel->maps.src, dst_map); + +out: + test_sockmap_update__destroy(skel); +close_sk: + close(sk); +} + +static void test_sockmap_invalid_update(void) +{ + struct test_sockmap_invalid_update *skel; + int duration = 0; + + skel = test_sockmap_invalid_update__open_and_load(); + if (CHECK(skel, "open_and_load", "verifier accepted map_update\n")) + test_sockmap_invalid_update__destroy(skel); +} + +static void test_sockmap_copy(enum bpf_map_type map_type) +{ + DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts); + int err, len, src_fd, iter_fd, duration = 0; + union bpf_iter_link_info linfo = {}; + __u32 i, num_sockets, num_elems; + struct bpf_iter_sockmap *skel; + __s64 *sock_fd = NULL; + struct bpf_link *link; + struct bpf_map *src; + char buf[64]; + + skel = bpf_iter_sockmap__open_and_load(); + if (CHECK(!skel, "bpf_iter_sockmap__open_and_load", "skeleton open_and_load failed\n")) + return; + + if (map_type == BPF_MAP_TYPE_SOCKMAP) { + src = skel->maps.sockmap; + num_elems = bpf_map__max_entries(src); + num_sockets = num_elems - 1; + } else { + src = skel->maps.sockhash; + num_elems = bpf_map__max_entries(src) - 1; + num_sockets = num_elems; + } + + sock_fd = calloc(num_sockets, sizeof(*sock_fd)); + if (CHECK(!sock_fd, "calloc(sock_fd)", "failed to allocate\n")) + goto out; + + for (i = 0; i < num_sockets; i++) + sock_fd[i] = -1; + + src_fd = bpf_map__fd(src); + + for (i = 0; i < num_sockets; i++) { + sock_fd[i] = connected_socket_v4(); + if (CHECK(sock_fd[i] == -1, "connected_socket_v4", "cannot connect\n")) + goto out; + + err = bpf_map_update_elem(src_fd, &i, &sock_fd[i], BPF_NOEXIST); + if (CHECK(err, "map_update", "failed: %s\n", strerror(errno))) + goto out; + } + + linfo.map.map_fd = src_fd; + opts.link_info = &linfo; + opts.link_info_len = sizeof(linfo); + link = bpf_program__attach_iter(skel->progs.copy, &opts); + if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n")) + goto out; + + iter_fd = bpf_iter_create(bpf_link__fd(link)); + if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n")) + goto free_link; + + /* do some tests */ + while ((len = read(iter_fd, buf, sizeof(buf))) > 0) + ; + if (CHECK(len < 0, "read", "failed: %s\n", strerror(errno))) + goto close_iter; + + /* test results */ + if (CHECK(skel->bss->elems != num_elems, "elems", "got %u expected %u\n", + skel->bss->elems, num_elems)) + goto close_iter; + + if (CHECK(skel->bss->socks != num_sockets, "socks", "got %u expected %u\n", + skel->bss->socks, num_sockets)) + goto close_iter; + + compare_cookies(src, skel->maps.dst); + +close_iter: + close(iter_fd); +free_link: + bpf_link__destroy(link); +out: + for (i = 0; sock_fd && i < num_sockets; i++) + if (sock_fd[i] >= 0) + close(sock_fd[i]); + if (sock_fd) + free(sock_fd); + bpf_iter_sockmap__destroy(skel); +} + void test_sockmap_basic(void) { if (test__start_subtest("sockmap create_update_free")) @@ -111,4 +290,14 @@ void test_sockmap_basic(void) test_skmsg_helpers(BPF_MAP_TYPE_SOCKMAP); if (test__start_subtest("sockhash sk_msg load helpers")) test_skmsg_helpers(BPF_MAP_TYPE_SOCKHASH); + if (test__start_subtest("sockmap update")) + test_sockmap_update(BPF_MAP_TYPE_SOCKMAP); + if (test__start_subtest("sockhash update")) + test_sockmap_update(BPF_MAP_TYPE_SOCKHASH); + if (test__start_subtest("sockmap update in unsafe context")) + test_sockmap_invalid_update(); + if (test__start_subtest("sockmap copy")) + test_sockmap_copy(BPF_MAP_TYPE_SOCKMAP); + if (test__start_subtest("sockhash copy")) + test_sockmap_copy(BPF_MAP_TYPE_SOCKHASH); } diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c index 5f54c6aec7f0..b25c9c45c148 100644 --- a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c +++ b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c @@ -45,9 +45,9 @@ static int getsetsockopt(void) goto err; } - if (*(int *)big_buf != 0x08) { + if (*big_buf != 0x08) { log_err("Unexpected getsockopt(IP_TOS) optval 0x%x != 0x08", - *(int *)big_buf); + (int)*big_buf); goto err; } diff --git a/tools/testing/selftests/bpf/prog_tests/subprogs.c b/tools/testing/selftests/bpf/prog_tests/subprogs.c new file mode 100644 index 000000000000..a00abf58c037 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/subprogs.c @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include <test_progs.h> +#include <time.h> +#include "test_subprogs.skel.h" + +static int duration; + +void test_subprogs(void) +{ + struct test_subprogs *skel; + int err; + + skel = test_subprogs__open_and_load(); + if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) + return; + + err = test_subprogs__attach(skel); + if (CHECK(err, "skel_attach", "failed to attach skeleton: %d\n", err)) + goto cleanup; + + usleep(1); + + CHECK(skel->bss->res1 != 12, "res1", "got %d, exp %d\n", skel->bss->res1, 12); + CHECK(skel->bss->res2 != 17, "res2", "got %d, exp %d\n", skel->bss->res2, 17); + CHECK(skel->bss->res3 != 19, "res3", "got %d, exp %d\n", skel->bss->res3, 19); + CHECK(skel->bss->res4 != 36, "res4", "got %d, exp %d\n", skel->bss->res4, 36); + +cleanup: + test_subprogs__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/tailcalls.c b/tools/testing/selftests/bpf/prog_tests/tailcalls.c index bb8fe646dd9f..ee27d68d2a1c 100644 --- a/tools/testing/selftests/bpf/prog_tests/tailcalls.c +++ b/tools/testing/selftests/bpf/prog_tests/tailcalls.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <test_progs.h> +#include <network_helpers.h> /* test_tailcall_1 checks basic functionality by patching multiple locations * in a single program for a single tail call slot with nop->jmp, jmp->nop @@ -472,6 +473,329 @@ out: bpf_object__close(obj); } +/* test_tailcall_bpf2bpf_1 purpose is to make sure that tailcalls are working + * correctly in correlation with BPF subprograms + */ +static void test_tailcall_bpf2bpf_1(void) +{ + int err, map_fd, prog_fd, main_fd, i; + struct bpf_map *prog_array; + struct bpf_program *prog; + struct bpf_object *obj; + __u32 retval, duration; + char prog_name[32]; + + err = bpf_prog_load("tailcall_bpf2bpf1.o", BPF_PROG_TYPE_SCHED_CLS, + &obj, &prog_fd); + if (CHECK_FAIL(err)) + return; + + prog = bpf_object__find_program_by_title(obj, "classifier"); + if (CHECK_FAIL(!prog)) + goto out; + + main_fd = bpf_program__fd(prog); + if (CHECK_FAIL(main_fd < 0)) + goto out; + + prog_array = bpf_object__find_map_by_name(obj, "jmp_table"); + if (CHECK_FAIL(!prog_array)) + goto out; + + map_fd = bpf_map__fd(prog_array); + if (CHECK_FAIL(map_fd < 0)) + goto out; + + /* nop -> jmp */ + for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { + snprintf(prog_name, sizeof(prog_name), "classifier/%i", i); + + prog = bpf_object__find_program_by_title(obj, prog_name); + if (CHECK_FAIL(!prog)) + goto out; + + prog_fd = bpf_program__fd(prog); + if (CHECK_FAIL(prog_fd < 0)) + goto out; + + err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY); + if (CHECK_FAIL(err)) + goto out; + } + + err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0, + 0, &retval, &duration); + CHECK(err || retval != 1, "tailcall", + "err %d errno %d retval %d\n", err, errno, retval); + + /* jmp -> nop, call subprog that will do tailcall */ + i = 1; + err = bpf_map_delete_elem(map_fd, &i); + if (CHECK_FAIL(err)) + goto out; + + err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0, + 0, &retval, &duration); + CHECK(err || retval != 0, "tailcall", "err %d errno %d retval %d\n", + err, errno, retval); + + /* make sure that subprog can access ctx and entry prog that + * called this subprog can properly return + */ + i = 0; + err = bpf_map_delete_elem(map_fd, &i); + if (CHECK_FAIL(err)) + goto out; + + err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0, + 0, &retval, &duration); + CHECK(err || retval != sizeof(pkt_v4) * 2, + "tailcall", "err %d errno %d retval %d\n", + err, errno, retval); +out: + bpf_object__close(obj); +} + +/* test_tailcall_bpf2bpf_2 checks that the count value of the tail call limit + * enforcement matches with expectations when tailcall is preceded with + * bpf2bpf call. + */ +static void test_tailcall_bpf2bpf_2(void) +{ + int err, map_fd, prog_fd, main_fd, data_fd, i, val; + struct bpf_map *prog_array, *data_map; + struct bpf_program *prog; + struct bpf_object *obj; + __u32 retval, duration; + char buff[128] = {}; + + err = bpf_prog_load("tailcall_bpf2bpf2.o", BPF_PROG_TYPE_SCHED_CLS, + &obj, &prog_fd); + if (CHECK_FAIL(err)) + return; + + prog = bpf_object__find_program_by_title(obj, "classifier"); + if (CHECK_FAIL(!prog)) + goto out; + + main_fd = bpf_program__fd(prog); + if (CHECK_FAIL(main_fd < 0)) + goto out; + + prog_array = bpf_object__find_map_by_name(obj, "jmp_table"); + if (CHECK_FAIL(!prog_array)) + goto out; + + map_fd = bpf_map__fd(prog_array); + if (CHECK_FAIL(map_fd < 0)) + goto out; + + prog = bpf_object__find_program_by_title(obj, "classifier/0"); + if (CHECK_FAIL(!prog)) + goto out; + + prog_fd = bpf_program__fd(prog); + if (CHECK_FAIL(prog_fd < 0)) + goto out; + + i = 0; + err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY); + if (CHECK_FAIL(err)) + goto out; + + err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0, + &duration, &retval, NULL); + CHECK(err || retval != 1, "tailcall", "err %d errno %d retval %d\n", + err, errno, retval); + + data_map = bpf_object__find_map_by_name(obj, "tailcall.bss"); + if (CHECK_FAIL(!data_map || !bpf_map__is_internal(data_map))) + return; + + data_fd = bpf_map__fd(data_map); + if (CHECK_FAIL(map_fd < 0)) + return; + + i = 0; + err = bpf_map_lookup_elem(data_fd, &i, &val); + CHECK(err || val != 33, "tailcall count", "err %d errno %d count %d\n", + err, errno, val); + + i = 0; + err = bpf_map_delete_elem(map_fd, &i); + if (CHECK_FAIL(err)) + goto out; + + err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0, + &duration, &retval, NULL); + CHECK(err || retval != 0, "tailcall", "err %d errno %d retval %d\n", + err, errno, retval); +out: + bpf_object__close(obj); +} + +/* test_tailcall_bpf2bpf_3 checks that non-trivial amount of stack (up to + * 256 bytes) can be used within bpf subprograms that have the tailcalls + * in them + */ +static void test_tailcall_bpf2bpf_3(void) +{ + int err, map_fd, prog_fd, main_fd, i; + struct bpf_map *prog_array; + struct bpf_program *prog; + struct bpf_object *obj; + __u32 retval, duration; + char prog_name[32]; + + err = bpf_prog_load("tailcall_bpf2bpf3.o", BPF_PROG_TYPE_SCHED_CLS, + &obj, &prog_fd); + if (CHECK_FAIL(err)) + return; + + prog = bpf_object__find_program_by_title(obj, "classifier"); + if (CHECK_FAIL(!prog)) + goto out; + + main_fd = bpf_program__fd(prog); + if (CHECK_FAIL(main_fd < 0)) + goto out; + + prog_array = bpf_object__find_map_by_name(obj, "jmp_table"); + if (CHECK_FAIL(!prog_array)) + goto out; + + map_fd = bpf_map__fd(prog_array); + if (CHECK_FAIL(map_fd < 0)) + goto out; + + for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { + snprintf(prog_name, sizeof(prog_name), "classifier/%i", i); + + prog = bpf_object__find_program_by_title(obj, prog_name); + if (CHECK_FAIL(!prog)) + goto out; + + prog_fd = bpf_program__fd(prog); + if (CHECK_FAIL(prog_fd < 0)) + goto out; + + err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY); + if (CHECK_FAIL(err)) + goto out; + } + + err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0, + &duration, &retval, NULL); + CHECK(err || retval != sizeof(pkt_v4) * 3, + "tailcall", "err %d errno %d retval %d\n", + err, errno, retval); + + i = 1; + err = bpf_map_delete_elem(map_fd, &i); + if (CHECK_FAIL(err)) + goto out; + + err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0, + &duration, &retval, NULL); + CHECK(err || retval != sizeof(pkt_v4), + "tailcall", "err %d errno %d retval %d\n", + err, errno, retval); + + i = 0; + err = bpf_map_delete_elem(map_fd, &i); + if (CHECK_FAIL(err)) + goto out; + + err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0, + &duration, &retval, NULL); + CHECK(err || retval != sizeof(pkt_v4) * 2, + "tailcall", "err %d errno %d retval %d\n", + err, errno, retval); +out: + bpf_object__close(obj); +} + +/* test_tailcall_bpf2bpf_4 checks that tailcall counter is correctly preserved + * across tailcalls combined with bpf2bpf calls. for making sure that tailcall + * counter behaves correctly, bpf program will go through following flow: + * + * entry -> entry_subprog -> tailcall0 -> bpf_func0 -> subprog0 -> + * -> tailcall1 -> bpf_func1 -> subprog1 -> tailcall2 -> bpf_func2 -> + * subprog2 [here bump global counter] --------^ + * + * We go through first two tailcalls and start counting from the subprog2 where + * the loop begins. At the end of the test make sure that the global counter is + * equal to 31, because tailcall counter includes the first two tailcalls + * whereas global counter is incremented only on loop presented on flow above. + */ +static void test_tailcall_bpf2bpf_4(void) +{ + int err, map_fd, prog_fd, main_fd, data_fd, i, val; + struct bpf_map *prog_array, *data_map; + struct bpf_program *prog; + struct bpf_object *obj; + __u32 retval, duration; + char prog_name[32]; + + err = bpf_prog_load("tailcall_bpf2bpf4.o", BPF_PROG_TYPE_SCHED_CLS, + &obj, &prog_fd); + if (CHECK_FAIL(err)) + return; + + prog = bpf_object__find_program_by_title(obj, "classifier"); + if (CHECK_FAIL(!prog)) + goto out; + + main_fd = bpf_program__fd(prog); + if (CHECK_FAIL(main_fd < 0)) + goto out; + + prog_array = bpf_object__find_map_by_name(obj, "jmp_table"); + if (CHECK_FAIL(!prog_array)) + goto out; + + map_fd = bpf_map__fd(prog_array); + if (CHECK_FAIL(map_fd < 0)) + goto out; + + for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) { + snprintf(prog_name, sizeof(prog_name), "classifier/%i", i); + + prog = bpf_object__find_program_by_title(obj, prog_name); + if (CHECK_FAIL(!prog)) + goto out; + + prog_fd = bpf_program__fd(prog); + if (CHECK_FAIL(prog_fd < 0)) + goto out; + + err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY); + if (CHECK_FAIL(err)) + goto out; + } + + err = bpf_prog_test_run(main_fd, 1, &pkt_v4, sizeof(pkt_v4), 0, + &duration, &retval, NULL); + CHECK(err || retval != sizeof(pkt_v4) * 3, "tailcall", "err %d errno %d retval %d\n", + err, errno, retval); + + data_map = bpf_object__find_map_by_name(obj, "tailcall.bss"); + if (CHECK_FAIL(!data_map || !bpf_map__is_internal(data_map))) + return; + + data_fd = bpf_map__fd(data_map); + if (CHECK_FAIL(map_fd < 0)) + return; + + i = 0; + err = bpf_map_lookup_elem(data_fd, &i, &val); + CHECK(err || val != 31, "tailcall count", "err %d errno %d count %d\n", + err, errno, val); + +out: + bpf_object__close(obj); +} + void test_tailcalls(void) { if (test__start_subtest("tailcall_1")) @@ -484,4 +808,12 @@ void test_tailcalls(void) test_tailcall_4(); if (test__start_subtest("tailcall_5")) test_tailcall_5(); + if (test__start_subtest("tailcall_bpf2bpf_1")) + test_tailcall_bpf2bpf_1(); + if (test__start_subtest("tailcall_bpf2bpf_2")) + test_tailcall_bpf2bpf_2(); + if (test__start_subtest("tailcall_bpf2bpf_3")) + test_tailcall_bpf2bpf_3(); + if (test__start_subtest("tailcall_bpf2bpf_4")) + test_tailcall_bpf2bpf_4(); } diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c new file mode 100644 index 000000000000..c85174cdcb77 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c @@ -0,0 +1,610 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ + +#define _GNU_SOURCE +#include <sched.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/socket.h> +#include <linux/compiler.h> + +#include "test_progs.h" +#include "cgroup_helpers.h" +#include "network_helpers.h" +#include "test_tcp_hdr_options.h" +#include "test_tcp_hdr_options.skel.h" +#include "test_misc_tcp_hdr_options.skel.h" + +#define LO_ADDR6 "::1" +#define CG_NAME "/tcpbpf-hdr-opt-test" + +struct bpf_test_option exp_passive_estab_in; +struct bpf_test_option exp_active_estab_in; +struct bpf_test_option exp_passive_fin_in; +struct bpf_test_option exp_active_fin_in; +struct hdr_stg exp_passive_hdr_stg; +struct hdr_stg exp_active_hdr_stg = { .active = true, }; + +static struct test_misc_tcp_hdr_options *misc_skel; +static struct test_tcp_hdr_options *skel; +static int lport_linum_map_fd; +static int hdr_stg_map_fd; +static __u32 duration; +static int cg_fd; + +struct sk_fds { + int srv_fd; + int passive_fd; + int active_fd; + int passive_lport; + int active_lport; +}; + +static int create_netns(void) +{ + if (CHECK(unshare(CLONE_NEWNET), "create netns", + "unshare(CLONE_NEWNET): %s (%d)", + strerror(errno), errno)) + return -1; + + if (CHECK(system("ip link set dev lo up"), "run ip cmd", + "failed to bring lo link up\n")) + return -1; + + return 0; +} + +static int write_sysctl(const char *sysctl, const char *value) +{ + int fd, err, len; + + fd = open(sysctl, O_WRONLY); + if (CHECK(fd == -1, "open sysctl", "open(%s): %s (%d)\n", + sysctl, strerror(errno), errno)) + return -1; + + len = strlen(value); + err = write(fd, value, len); + close(fd); + if (CHECK(err != len, "write sysctl", + "write(%s, %s): err:%d %s (%d)\n", + sysctl, value, err, strerror(errno), errno)) + return -1; + + return 0; +} + +static void print_hdr_stg(const struct hdr_stg *hdr_stg, const char *prefix) +{ + fprintf(stderr, "%s{active:%u, resend_syn:%u, syncookie:%u, fastopen:%u}\n", + prefix ? : "", hdr_stg->active, hdr_stg->resend_syn, + hdr_stg->syncookie, hdr_stg->fastopen); +} + +static void print_option(const struct bpf_test_option *opt, const char *prefix) +{ + fprintf(stderr, "%s{flags:0x%x, max_delack_ms:%u, rand:0x%x}\n", + prefix ? : "", opt->flags, opt->max_delack_ms, opt->rand); +} + +static void sk_fds_close(struct sk_fds *sk_fds) +{ + close(sk_fds->srv_fd); + close(sk_fds->passive_fd); + close(sk_fds->active_fd); +} + +static int sk_fds_shutdown(struct sk_fds *sk_fds) +{ + int ret, abyte; + + shutdown(sk_fds->active_fd, SHUT_WR); + ret = read(sk_fds->passive_fd, &abyte, sizeof(abyte)); + if (CHECK(ret != 0, "read-after-shutdown(passive_fd):", + "ret:%d %s (%d)\n", + ret, strerror(errno), errno)) + return -1; + + shutdown(sk_fds->passive_fd, SHUT_WR); + ret = read(sk_fds->active_fd, &abyte, sizeof(abyte)); + if (CHECK(ret != 0, "read-after-shutdown(active_fd):", + "ret:%d %s (%d)\n", + ret, strerror(errno), errno)) + return -1; + + return 0; +} + +static int sk_fds_connect(struct sk_fds *sk_fds, bool fast_open) +{ + const char fast[] = "FAST!!!"; + struct sockaddr_in6 addr6; + socklen_t len; + + sk_fds->srv_fd = start_server(AF_INET6, SOCK_STREAM, LO_ADDR6, 0, 0); + if (CHECK(sk_fds->srv_fd == -1, "start_server", "%s (%d)\n", + strerror(errno), errno)) + goto error; + + if (fast_open) + sk_fds->active_fd = fastopen_connect(sk_fds->srv_fd, fast, + sizeof(fast), 0); + else + sk_fds->active_fd = connect_to_fd(sk_fds->srv_fd, 0); + + if (CHECK_FAIL(sk_fds->active_fd == -1)) { + close(sk_fds->srv_fd); + goto error; + } + + len = sizeof(addr6); + if (CHECK(getsockname(sk_fds->srv_fd, (struct sockaddr *)&addr6, + &len), "getsockname(srv_fd)", "%s (%d)\n", + strerror(errno), errno)) + goto error_close; + sk_fds->passive_lport = ntohs(addr6.sin6_port); + + len = sizeof(addr6); + if (CHECK(getsockname(sk_fds->active_fd, (struct sockaddr *)&addr6, + &len), "getsockname(active_fd)", "%s (%d)\n", + strerror(errno), errno)) + goto error_close; + sk_fds->active_lport = ntohs(addr6.sin6_port); + + sk_fds->passive_fd = accept(sk_fds->srv_fd, NULL, 0); + if (CHECK(sk_fds->passive_fd == -1, "accept(srv_fd)", "%s (%d)\n", + strerror(errno), errno)) + goto error_close; + + if (fast_open) { + char bytes_in[sizeof(fast)]; + int ret; + + ret = read(sk_fds->passive_fd, bytes_in, sizeof(bytes_in)); + if (CHECK(ret != sizeof(fast), "read fastopen syn data", + "expected=%lu actual=%d\n", sizeof(fast), ret)) { + close(sk_fds->passive_fd); + goto error_close; + } + } + + return 0; + +error_close: + close(sk_fds->active_fd); + close(sk_fds->srv_fd); + +error: + memset(sk_fds, -1, sizeof(*sk_fds)); + return -1; +} + +static int check_hdr_opt(const struct bpf_test_option *exp, + const struct bpf_test_option *act, + const char *hdr_desc) +{ + if (CHECK(memcmp(exp, act, sizeof(*exp)), + "expected-vs-actual", "unexpected %s\n", hdr_desc)) { + print_option(exp, "expected: "); + print_option(act, " actual: "); + return -1; + } + + return 0; +} + +static int check_hdr_stg(const struct hdr_stg *exp, int fd, + const char *stg_desc) +{ + struct hdr_stg act; + + if (CHECK(bpf_map_lookup_elem(hdr_stg_map_fd, &fd, &act), + "map_lookup(hdr_stg_map_fd)", "%s %s (%d)\n", + stg_desc, strerror(errno), errno)) + return -1; + + if (CHECK(memcmp(exp, &act, sizeof(*exp)), + "expected-vs-actual", "unexpected %s\n", stg_desc)) { + print_hdr_stg(exp, "expected: "); + print_hdr_stg(&act, " actual: "); + return -1; + } + + return 0; +} + +static int check_error_linum(const struct sk_fds *sk_fds) +{ + unsigned int nr_errors = 0; + struct linum_err linum_err; + int lport; + + lport = sk_fds->passive_lport; + if (!bpf_map_lookup_elem(lport_linum_map_fd, &lport, &linum_err)) { + fprintf(stderr, + "bpf prog error out at lport:passive(%d), linum:%u err:%d\n", + lport, linum_err.linum, linum_err.err); + nr_errors++; + } + + lport = sk_fds->active_lport; + if (!bpf_map_lookup_elem(lport_linum_map_fd, &lport, &linum_err)) { + fprintf(stderr, + "bpf prog error out at lport:active(%d), linum:%u err:%d\n", + lport, linum_err.linum, linum_err.err); + nr_errors++; + } + + return nr_errors; +} + +static void check_hdr_and_close_fds(struct sk_fds *sk_fds) +{ + const __u32 expected_inherit_cb_flags = + BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG | + BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG | + BPF_SOCK_OPS_STATE_CB_FLAG; + + if (sk_fds_shutdown(sk_fds)) + goto check_linum; + + if (CHECK(expected_inherit_cb_flags != skel->bss->inherit_cb_flags, + "Unexpected inherit_cb_flags", "0x%x != 0x%x\n", + skel->bss->inherit_cb_flags, expected_inherit_cb_flags)) + goto check_linum; + + if (check_hdr_stg(&exp_passive_hdr_stg, sk_fds->passive_fd, + "passive_hdr_stg")) + goto check_linum; + + if (check_hdr_stg(&exp_active_hdr_stg, sk_fds->active_fd, + "active_hdr_stg")) + goto check_linum; + + if (check_hdr_opt(&exp_passive_estab_in, &skel->bss->passive_estab_in, + "passive_estab_in")) + goto check_linum; + + if (check_hdr_opt(&exp_active_estab_in, &skel->bss->active_estab_in, + "active_estab_in")) + goto check_linum; + + if (check_hdr_opt(&exp_passive_fin_in, &skel->bss->passive_fin_in, + "passive_fin_in")) + goto check_linum; + + check_hdr_opt(&exp_active_fin_in, &skel->bss->active_fin_in, + "active_fin_in"); + +check_linum: + CHECK_FAIL(check_error_linum(sk_fds)); + sk_fds_close(sk_fds); +} + +static void prepare_out(void) +{ + skel->bss->active_syn_out = exp_passive_estab_in; + skel->bss->passive_synack_out = exp_active_estab_in; + + skel->bss->active_fin_out = exp_passive_fin_in; + skel->bss->passive_fin_out = exp_active_fin_in; +} + +static void reset_test(void) +{ + size_t optsize = sizeof(struct bpf_test_option); + int lport, err; + + memset(&skel->bss->passive_synack_out, 0, optsize); + memset(&skel->bss->passive_fin_out, 0, optsize); + + memset(&skel->bss->passive_estab_in, 0, optsize); + memset(&skel->bss->passive_fin_in, 0, optsize); + + memset(&skel->bss->active_syn_out, 0, optsize); + memset(&skel->bss->active_fin_out, 0, optsize); + + memset(&skel->bss->active_estab_in, 0, optsize); + memset(&skel->bss->active_fin_in, 0, optsize); + + skel->bss->inherit_cb_flags = 0; + + skel->data->test_kind = TCPOPT_EXP; + skel->data->test_magic = 0xeB9F; + + memset(&exp_passive_estab_in, 0, optsize); + memset(&exp_active_estab_in, 0, optsize); + memset(&exp_passive_fin_in, 0, optsize); + memset(&exp_active_fin_in, 0, optsize); + + memset(&exp_passive_hdr_stg, 0, sizeof(exp_passive_hdr_stg)); + memset(&exp_active_hdr_stg, 0, sizeof(exp_active_hdr_stg)); + exp_active_hdr_stg.active = true; + + err = bpf_map_get_next_key(lport_linum_map_fd, NULL, &lport); + while (!err) { + bpf_map_delete_elem(lport_linum_map_fd, &lport); + err = bpf_map_get_next_key(lport_linum_map_fd, &lport, &lport); + } +} + +static void fastopen_estab(void) +{ + struct bpf_link *link; + struct sk_fds sk_fds; + + hdr_stg_map_fd = bpf_map__fd(skel->maps.hdr_stg_map); + lport_linum_map_fd = bpf_map__fd(skel->maps.lport_linum_map); + + exp_passive_estab_in.flags = OPTION_F_RAND | OPTION_F_MAX_DELACK_MS; + exp_passive_estab_in.rand = 0xfa; + exp_passive_estab_in.max_delack_ms = 11; + + exp_active_estab_in.flags = OPTION_F_RAND | OPTION_F_MAX_DELACK_MS; + exp_active_estab_in.rand = 0xce; + exp_active_estab_in.max_delack_ms = 22; + + exp_passive_hdr_stg.fastopen = true; + + prepare_out(); + + /* Allow fastopen without fastopen cookie */ + if (write_sysctl("/proc/sys/net/ipv4/tcp_fastopen", "1543")) + return; + + link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd); + if (CHECK(IS_ERR(link), "attach_cgroup(estab)", "err: %ld\n", + PTR_ERR(link))) + return; + + if (sk_fds_connect(&sk_fds, true)) { + bpf_link__destroy(link); + return; + } + + check_hdr_and_close_fds(&sk_fds); + bpf_link__destroy(link); +} + +static void syncookie_estab(void) +{ + struct bpf_link *link; + struct sk_fds sk_fds; + + hdr_stg_map_fd = bpf_map__fd(skel->maps.hdr_stg_map); + lport_linum_map_fd = bpf_map__fd(skel->maps.lport_linum_map); + + exp_passive_estab_in.flags = OPTION_F_RAND | OPTION_F_MAX_DELACK_MS; + exp_passive_estab_in.rand = 0xfa; + exp_passive_estab_in.max_delack_ms = 11; + + exp_active_estab_in.flags = OPTION_F_RAND | OPTION_F_MAX_DELACK_MS | + OPTION_F_RESEND; + exp_active_estab_in.rand = 0xce; + exp_active_estab_in.max_delack_ms = 22; + + exp_passive_hdr_stg.syncookie = true; + exp_active_hdr_stg.resend_syn = true, + + prepare_out(); + + /* Clear the RESEND to ensure the bpf prog can learn + * want_cookie and set the RESEND by itself. + */ + skel->bss->passive_synack_out.flags &= ~OPTION_F_RESEND; + + /* Enforce syncookie mode */ + if (write_sysctl("/proc/sys/net/ipv4/tcp_syncookies", "2")) + return; + + link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd); + if (CHECK(IS_ERR(link), "attach_cgroup(estab)", "err: %ld\n", + PTR_ERR(link))) + return; + + if (sk_fds_connect(&sk_fds, false)) { + bpf_link__destroy(link); + return; + } + + check_hdr_and_close_fds(&sk_fds); + bpf_link__destroy(link); +} + +static void fin(void) +{ + struct bpf_link *link; + struct sk_fds sk_fds; + + hdr_stg_map_fd = bpf_map__fd(skel->maps.hdr_stg_map); + lport_linum_map_fd = bpf_map__fd(skel->maps.lport_linum_map); + + exp_passive_fin_in.flags = OPTION_F_RAND; + exp_passive_fin_in.rand = 0xfa; + + exp_active_fin_in.flags = OPTION_F_RAND; + exp_active_fin_in.rand = 0xce; + + prepare_out(); + + if (write_sysctl("/proc/sys/net/ipv4/tcp_syncookies", "1")) + return; + + link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd); + if (CHECK(IS_ERR(link), "attach_cgroup(estab)", "err: %ld\n", + PTR_ERR(link))) + return; + + if (sk_fds_connect(&sk_fds, false)) { + bpf_link__destroy(link); + return; + } + + check_hdr_and_close_fds(&sk_fds); + bpf_link__destroy(link); +} + +static void __simple_estab(bool exprm) +{ + struct bpf_link *link; + struct sk_fds sk_fds; + + hdr_stg_map_fd = bpf_map__fd(skel->maps.hdr_stg_map); + lport_linum_map_fd = bpf_map__fd(skel->maps.lport_linum_map); + + exp_passive_estab_in.flags = OPTION_F_RAND | OPTION_F_MAX_DELACK_MS; + exp_passive_estab_in.rand = 0xfa; + exp_passive_estab_in.max_delack_ms = 11; + + exp_active_estab_in.flags = OPTION_F_RAND | OPTION_F_MAX_DELACK_MS; + exp_active_estab_in.rand = 0xce; + exp_active_estab_in.max_delack_ms = 22; + + prepare_out(); + + if (!exprm) { + skel->data->test_kind = 0xB9; + skel->data->test_magic = 0; + } + + if (write_sysctl("/proc/sys/net/ipv4/tcp_syncookies", "1")) + return; + + link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd); + if (CHECK(IS_ERR(link), "attach_cgroup(estab)", "err: %ld\n", + PTR_ERR(link))) + return; + + if (sk_fds_connect(&sk_fds, false)) { + bpf_link__destroy(link); + return; + } + + check_hdr_and_close_fds(&sk_fds); + bpf_link__destroy(link); +} + +static void no_exprm_estab(void) +{ + __simple_estab(false); +} + +static void simple_estab(void) +{ + __simple_estab(true); +} + +static void misc(void) +{ + const char send_msg[] = "MISC!!!"; + char recv_msg[sizeof(send_msg)]; + const unsigned int nr_data = 2; + struct bpf_link *link; + struct sk_fds sk_fds; + int i, ret; + + lport_linum_map_fd = bpf_map__fd(misc_skel->maps.lport_linum_map); + + if (write_sysctl("/proc/sys/net/ipv4/tcp_syncookies", "1")) + return; + + link = bpf_program__attach_cgroup(misc_skel->progs.misc_estab, cg_fd); + if (CHECK(IS_ERR(link), "attach_cgroup(misc_estab)", "err: %ld\n", + PTR_ERR(link))) + return; + + if (sk_fds_connect(&sk_fds, false)) { + bpf_link__destroy(link); + return; + } + + for (i = 0; i < nr_data; i++) { + /* MSG_EOR to ensure skb will not be combined */ + ret = send(sk_fds.active_fd, send_msg, sizeof(send_msg), + MSG_EOR); + if (CHECK(ret != sizeof(send_msg), "send(msg)", "ret:%d\n", + ret)) + goto check_linum; + + ret = read(sk_fds.passive_fd, recv_msg, sizeof(recv_msg)); + if (CHECK(ret != sizeof(send_msg), "read(msg)", "ret:%d\n", + ret)) + goto check_linum; + } + + if (sk_fds_shutdown(&sk_fds)) + goto check_linum; + + CHECK(misc_skel->bss->nr_syn != 1, "unexpected nr_syn", + "expected (1) != actual (%u)\n", + misc_skel->bss->nr_syn); + + CHECK(misc_skel->bss->nr_data != nr_data, "unexpected nr_data", + "expected (%u) != actual (%u)\n", + nr_data, misc_skel->bss->nr_data); + + /* The last ACK may have been delayed, so it is either 1 or 2. */ + CHECK(misc_skel->bss->nr_pure_ack != 1 && + misc_skel->bss->nr_pure_ack != 2, + "unexpected nr_pure_ack", + "expected (1 or 2) != actual (%u)\n", + misc_skel->bss->nr_pure_ack); + + CHECK(misc_skel->bss->nr_fin != 1, "unexpected nr_fin", + "expected (1) != actual (%u)\n", + misc_skel->bss->nr_fin); + +check_linum: + CHECK_FAIL(check_error_linum(&sk_fds)); + sk_fds_close(&sk_fds); + bpf_link__destroy(link); +} + +struct test { + const char *desc; + void (*run)(void); +}; + +#define DEF_TEST(name) { #name, name } +static struct test tests[] = { + DEF_TEST(simple_estab), + DEF_TEST(no_exprm_estab), + DEF_TEST(syncookie_estab), + DEF_TEST(fastopen_estab), + DEF_TEST(fin), + DEF_TEST(misc), +}; + +void test_tcp_hdr_options(void) +{ + int i; + + skel = test_tcp_hdr_options__open_and_load(); + if (CHECK(!skel, "open and load skel", "failed")) + return; + + misc_skel = test_misc_tcp_hdr_options__open_and_load(); + if (CHECK(!misc_skel, "open and load misc test skel", "failed")) + goto skel_destroy; + + cg_fd = test__join_cgroup(CG_NAME); + if (CHECK_FAIL(cg_fd < 0)) + goto skel_destroy; + + for (i = 0; i < ARRAY_SIZE(tests); i++) { + if (!test__start_subtest(tests[i].desc)) + continue; + + if (create_netns()) + break; + + tests[i].run(); + + reset_test(); + } + + close(cg_fd); +skel_destroy: + test_misc_tcp_hdr_options__destroy(misc_skel); + test_tcp_hdr_options__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/test_bpffs.c b/tools/testing/selftests/bpf/prog_tests/test_bpffs.c new file mode 100644 index 000000000000..172c999e523c --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_bpffs.c @@ -0,0 +1,94 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#define _GNU_SOURCE +#include <sched.h> +#include <sys/mount.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <test_progs.h> + +#define TDIR "/sys/kernel/debug" + +static int read_iter(char *file) +{ + /* 1024 should be enough to get contiguous 4 "iter" letters at some point */ + char buf[1024]; + int fd, len; + + fd = open(file, 0); + if (fd < 0) + return -1; + while ((len = read(fd, buf, sizeof(buf))) > 0) + if (strstr(buf, "iter")) { + close(fd); + return 0; + } + close(fd); + return -1; +} + +static int fn(void) +{ + int err, duration = 0; + + err = unshare(CLONE_NEWNS); + if (CHECK(err, "unshare", "failed: %d\n", errno)) + goto out; + + err = mount("", "/", "", MS_REC | MS_PRIVATE, NULL); + if (CHECK(err, "mount /", "failed: %d\n", errno)) + goto out; + + err = umount(TDIR); + if (CHECK(err, "umount " TDIR, "failed: %d\n", errno)) + goto out; + + err = mount("none", TDIR, "tmpfs", 0, NULL); + if (CHECK(err, "mount", "mount root failed: %d\n", errno)) + goto out; + + err = mkdir(TDIR "/fs1", 0777); + if (CHECK(err, "mkdir "TDIR"/fs1", "failed: %d\n", errno)) + goto out; + err = mkdir(TDIR "/fs2", 0777); + if (CHECK(err, "mkdir "TDIR"/fs2", "failed: %d\n", errno)) + goto out; + + err = mount("bpf", TDIR "/fs1", "bpf", 0, NULL); + if (CHECK(err, "mount bpffs "TDIR"/fs1", "failed: %d\n", errno)) + goto out; + err = mount("bpf", TDIR "/fs2", "bpf", 0, NULL); + if (CHECK(err, "mount bpffs " TDIR "/fs2", "failed: %d\n", errno)) + goto out; + + err = read_iter(TDIR "/fs1/maps.debug"); + if (CHECK(err, "reading " TDIR "/fs1/maps.debug", "failed\n")) + goto out; + err = read_iter(TDIR "/fs2/progs.debug"); + if (CHECK(err, "reading " TDIR "/fs2/progs.debug", "failed\n")) + goto out; +out: + umount(TDIR "/fs1"); + umount(TDIR "/fs2"); + rmdir(TDIR "/fs1"); + rmdir(TDIR "/fs2"); + umount(TDIR); + exit(err); +} + +void test_test_bpffs(void) +{ + int err, duration = 0, status = 0; + pid_t pid; + + pid = fork(); + if (CHECK(pid == -1, "clone", "clone failed %d", errno)) + return; + if (pid == 0) + fn(); + err = waitpid(pid, &status, 0); + if (CHECK(err == -1 && errno != ECHILD, "waitpid", "failed %d", errno)) + return; + if (CHECK(WEXITSTATUS(status), "bpffs test ", "failed %d", WEXITSTATUS(status))) + return; +} diff --git a/tools/testing/selftests/bpf/prog_tests/test_local_storage.c b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c new file mode 100644 index 000000000000..91cd6f357246 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright (C) 2020 Google LLC. + */ + +#include <test_progs.h> +#include <linux/limits.h> + +#include "local_storage.skel.h" +#include "network_helpers.h" + +int create_and_unlink_file(void) +{ + char fname[PATH_MAX] = "/tmp/fileXXXXXX"; + int fd; + + fd = mkstemp(fname); + if (fd < 0) + return fd; + + close(fd); + unlink(fname); + return 0; +} + +void test_test_local_storage(void) +{ + struct local_storage *skel = NULL; + int err, duration = 0, serv_sk = -1; + + skel = local_storage__open_and_load(); + if (CHECK(!skel, "skel_load", "lsm skeleton failed\n")) + goto close_prog; + + err = local_storage__attach(skel); + if (CHECK(err, "attach", "lsm attach failed: %d\n", err)) + goto close_prog; + + skel->bss->monitored_pid = getpid(); + + err = create_and_unlink_file(); + if (CHECK(err < 0, "exec_cmd", "err %d errno %d\n", err, errno)) + goto close_prog; + + CHECK(skel->data->inode_storage_result != 0, "inode_storage_result", + "inode_local_storage not set\n"); + + serv_sk = start_server(AF_INET6, SOCK_STREAM, NULL, 0, 0); + if (CHECK(serv_sk < 0, "start_server", "failed to start server\n")) + goto close_prog; + + CHECK(skel->data->sk_storage_result != 0, "sk_storage_result", + "sk_local_storage not set\n"); + + close(serv_sk); + +close_prog: + local_storage__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/test_lsm.c b/tools/testing/selftests/bpf/prog_tests/test_lsm.c index b17eb2045c1d..6ab29226c99b 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_lsm.c +++ b/tools/testing/selftests/bpf/prog_tests/test_lsm.c @@ -10,6 +10,7 @@ #include <unistd.h> #include <malloc.h> #include <stdlib.h> +#include <unistd.h> #include "lsm.skel.h" @@ -55,6 +56,7 @@ void test_test_lsm(void) { struct lsm *skel = NULL; int err, duration = 0; + int buf = 1234; skel = lsm__open_and_load(); if (CHECK(!skel, "skel_load", "lsm skeleton failed\n")) @@ -81,6 +83,13 @@ void test_test_lsm(void) CHECK(skel->bss->mprotect_count != 1, "mprotect_count", "mprotect_count = %d\n", skel->bss->mprotect_count); + syscall(__NR_setdomainname, &buf, -2L); + syscall(__NR_setdomainname, 0, -3L); + syscall(__NR_setdomainname, ~0L, -4L); + + CHECK(skel->bss->copy_test != 3, "copy_test", + "copy_test = %d\n", skel->bss->copy_test); + close_prog: lsm__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/test_overhead.c b/tools/testing/selftests/bpf/prog_tests/test_overhead.c index 2702df2b2343..9966685866fd 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_overhead.c +++ b/tools/testing/selftests/bpf/prog_tests/test_overhead.c @@ -61,10 +61,9 @@ void test_test_overhead(void) const char *raw_tp_name = "raw_tp/task_rename"; const char *fentry_name = "fentry/__set_task_comm"; const char *fexit_name = "fexit/__set_task_comm"; - const char *fmodret_name = "fmod_ret/__set_task_comm"; const char *kprobe_func = "__set_task_comm"; struct bpf_program *kprobe_prog, *kretprobe_prog, *raw_tp_prog; - struct bpf_program *fentry_prog, *fexit_prog, *fmodret_prog; + struct bpf_program *fentry_prog, *fexit_prog; struct bpf_object *obj; struct bpf_link *link; int err, duration = 0; @@ -97,11 +96,6 @@ void test_test_overhead(void) if (CHECK(!fexit_prog, "find_probe", "prog '%s' not found\n", fexit_name)) goto cleanup; - fmodret_prog = bpf_object__find_program_by_title(obj, fmodret_name); - if (CHECK(!fmodret_prog, "find_probe", - "prog '%s' not found\n", fmodret_name)) - goto cleanup; - err = bpf_object__load(obj); if (CHECK(err, "obj_load", "err %d\n", err)) goto cleanup; @@ -148,12 +142,6 @@ void test_test_overhead(void) test_run("fexit"); bpf_link__destroy(link); - /* attach fmod_ret */ - link = bpf_program__attach_trace(fmodret_prog); - if (CHECK(IS_ERR(link), "attach fmod_ret", "err %ld\n", PTR_ERR(link))) - goto cleanup; - test_run("fmod_ret"); - bpf_link__destroy(link); cleanup: prctl(PR_SET_NAME, comm, 0L, 0L, 0L); bpf_object__close(obj); diff --git a/tools/testing/selftests/bpf/prog_tests/test_profiler.c b/tools/testing/selftests/bpf/prog_tests/test_profiler.c new file mode 100644 index 000000000000..4ca275101ee0 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_profiler.c @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include <test_progs.h> +#include "progs/profiler.h" +#include "profiler1.skel.h" +#include "profiler2.skel.h" +#include "profiler3.skel.h" + +static int sanity_run(struct bpf_program *prog) +{ + struct bpf_prog_test_run_attr test_attr = {}; + __u64 args[] = {1, 2, 3}; + __u32 duration = 0; + int err, prog_fd; + + prog_fd = bpf_program__fd(prog); + test_attr.prog_fd = prog_fd; + test_attr.ctx_in = args; + test_attr.ctx_size_in = sizeof(args); + err = bpf_prog_test_run_xattr(&test_attr); + if (CHECK(err || test_attr.retval, "test_run", + "err %d errno %d retval %d duration %d\n", + err, errno, test_attr.retval, duration)) + return -1; + return 0; +} + +void test_test_profiler(void) +{ + struct profiler1 *profiler1_skel = NULL; + struct profiler2 *profiler2_skel = NULL; + struct profiler3 *profiler3_skel = NULL; + __u32 duration = 0; + int err; + + profiler1_skel = profiler1__open_and_load(); + if (CHECK(!profiler1_skel, "profiler1_skel_load", "profiler1 skeleton failed\n")) + goto cleanup; + + err = profiler1__attach(profiler1_skel); + if (CHECK(err, "profiler1_attach", "profiler1 attach failed: %d\n", err)) + goto cleanup; + + if (sanity_run(profiler1_skel->progs.raw_tracepoint__sched_process_exec)) + goto cleanup; + + profiler2_skel = profiler2__open_and_load(); + if (CHECK(!profiler2_skel, "profiler2_skel_load", "profiler2 skeleton failed\n")) + goto cleanup; + + err = profiler2__attach(profiler2_skel); + if (CHECK(err, "profiler2_attach", "profiler2 attach failed: %d\n", err)) + goto cleanup; + + if (sanity_run(profiler2_skel->progs.raw_tracepoint__sched_process_exec)) + goto cleanup; + + profiler3_skel = profiler3__open_and_load(); + if (CHECK(!profiler3_skel, "profiler3_skel_load", "profiler3 skeleton failed\n")) + goto cleanup; + + err = profiler3__attach(profiler3_skel); + if (CHECK(err, "profiler3_attach", "profiler3 attach failed: %d\n", err)) + goto cleanup; + + if (sanity_run(profiler3_skel->progs.raw_tracepoint__sched_process_exec)) + goto cleanup; +cleanup: + profiler1__destroy(profiler1_skel); + profiler2__destroy(profiler2_skel); + profiler3__destroy(profiler3_skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/trace_ext.c b/tools/testing/selftests/bpf/prog_tests/trace_ext.c new file mode 100644 index 000000000000..924441d4362d --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/trace_ext.c @@ -0,0 +1,111 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE +#include <test_progs.h> +#include <network_helpers.h> +#include <sys/stat.h> +#include <linux/sched.h> +#include <sys/syscall.h> + +#include "test_pkt_md_access.skel.h" +#include "test_trace_ext.skel.h" +#include "test_trace_ext_tracing.skel.h" + +static __u32 duration; + +void test_trace_ext(void) +{ + struct test_pkt_md_access *skel_pkt = NULL; + struct test_trace_ext_tracing *skel_trace = NULL; + struct test_trace_ext_tracing__bss *bss_trace; + struct test_trace_ext *skel_ext = NULL; + struct test_trace_ext__bss *bss_ext; + int err, pkt_fd, ext_fd; + struct bpf_program *prog; + char buf[100]; + __u32 retval; + __u64 len; + + /* open/load/attach test_pkt_md_access */ + skel_pkt = test_pkt_md_access__open_and_load(); + if (CHECK(!skel_pkt, "setup", "classifier/test_pkt_md_access open failed\n")) + goto cleanup; + + err = test_pkt_md_access__attach(skel_pkt); + if (CHECK(err, "setup", "classifier/test_pkt_md_access attach failed: %d\n", err)) + goto cleanup; + + prog = skel_pkt->progs.test_pkt_md_access; + pkt_fd = bpf_program__fd(prog); + + /* open extension */ + skel_ext = test_trace_ext__open(); + if (CHECK(!skel_ext, "setup", "freplace/test_pkt_md_access open failed\n")) + goto cleanup; + + /* set extension's attach target - test_pkt_md_access */ + prog = skel_ext->progs.test_pkt_md_access_new; + bpf_program__set_attach_target(prog, pkt_fd, "test_pkt_md_access"); + + /* load/attach extension */ + err = test_trace_ext__load(skel_ext); + if (CHECK(err, "setup", "freplace/test_pkt_md_access load failed\n")) { + libbpf_strerror(err, buf, sizeof(buf)); + fprintf(stderr, "%s\n", buf); + goto cleanup; + } + + err = test_trace_ext__attach(skel_ext); + if (CHECK(err, "setup", "freplace/test_pkt_md_access attach failed: %d\n", err)) + goto cleanup; + + prog = skel_ext->progs.test_pkt_md_access_new; + ext_fd = bpf_program__fd(prog); + + /* open tracing */ + skel_trace = test_trace_ext_tracing__open(); + if (CHECK(!skel_trace, "setup", "tracing/test_pkt_md_access_new open failed\n")) + goto cleanup; + + /* set tracing's attach target - fentry */ + prog = skel_trace->progs.fentry; + bpf_program__set_attach_target(prog, ext_fd, "test_pkt_md_access_new"); + + /* set tracing's attach target - fexit */ + prog = skel_trace->progs.fexit; + bpf_program__set_attach_target(prog, ext_fd, "test_pkt_md_access_new"); + + /* load/attach tracing */ + err = test_trace_ext_tracing__load(skel_trace); + if (CHECK(err, "setup", "tracing/test_pkt_md_access_new load failed\n")) { + libbpf_strerror(err, buf, sizeof(buf)); + fprintf(stderr, "%s\n", buf); + goto cleanup; + } + + err = test_trace_ext_tracing__attach(skel_trace); + if (CHECK(err, "setup", "tracing/test_pkt_md_access_new attach failed: %d\n", err)) + goto cleanup; + + /* trigger the test */ + err = bpf_prog_test_run(pkt_fd, 1, &pkt_v4, sizeof(pkt_v4), + NULL, NULL, &retval, &duration); + CHECK(err || retval, "run", "err %d errno %d retval %d\n", err, errno, retval); + + bss_ext = skel_ext->bss; + bss_trace = skel_trace->bss; + + len = bss_ext->ext_called; + + CHECK(bss_ext->ext_called == 0, + "check", "failed to trigger freplace/test_pkt_md_access\n"); + CHECK(bss_trace->fentry_called != len, + "check", "failed to trigger fentry/test_pkt_md_access_new\n"); + CHECK(bss_trace->fexit_called != len, + "check", "failed to trigger fexit/test_pkt_md_access_new\n"); + +cleanup: + test_trace_ext_tracing__destroy(skel_trace); + test_trace_ext__destroy(skel_ext); + test_pkt_md_access__destroy(skel_pkt); +} diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c b/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c index f284f72158ef..0281095de266 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c @@ -1,11 +1,12 @@ // SPDX-License-Identifier: GPL-2.0 #include <test_progs.h> #include <network_helpers.h> +#include "test_xdp_noinline.skel.h" void test_xdp_noinline(void) { - const char *file = "./test_xdp_noinline.o"; unsigned int nr_cpus = bpf_num_possible_cpus(); + struct test_xdp_noinline *skel; struct vip key = {.protocol = 6}; struct vip_meta { __u32 flags; @@ -24,59 +25,43 @@ void test_xdp_noinline(void) __u8 flags; } real_def = {.dst = MAGIC_VAL}; __u32 ch_key = 11, real_num = 3; - __u32 duration, retval, size; - int err, i, prog_fd, map_fd; + __u32 duration = 0, retval, size; + int err, i; __u64 bytes = 0, pkts = 0; - struct bpf_object *obj; char buf[128]; u32 *magic = (u32 *)buf; - err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); - if (CHECK_FAIL(err)) + skel = test_xdp_noinline__open_and_load(); + if (CHECK(!skel, "skel_open_and_load", "failed\n")) return; - map_fd = bpf_find_map(__func__, obj, "vip_map"); - if (map_fd < 0) - goto out; - bpf_map_update_elem(map_fd, &key, &value, 0); + bpf_map_update_elem(bpf_map__fd(skel->maps.vip_map), &key, &value, 0); + bpf_map_update_elem(bpf_map__fd(skel->maps.ch_rings), &ch_key, &real_num, 0); + bpf_map_update_elem(bpf_map__fd(skel->maps.reals), &real_num, &real_def, 0); - map_fd = bpf_find_map(__func__, obj, "ch_rings"); - if (map_fd < 0) - goto out; - bpf_map_update_elem(map_fd, &ch_key, &real_num, 0); - - map_fd = bpf_find_map(__func__, obj, "reals"); - if (map_fd < 0) - goto out; - bpf_map_update_elem(map_fd, &real_num, &real_def, 0); - - err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v4, sizeof(pkt_v4), + err = bpf_prog_test_run(bpf_program__fd(skel->progs.balancer_ingress_v4), + NUM_ITER, &pkt_v4, sizeof(pkt_v4), buf, &size, &retval, &duration); CHECK(err || retval != 1 || size != 54 || *magic != MAGIC_VAL, "ipv4", "err %d errno %d retval %d size %d magic %x\n", err, errno, retval, size, *magic); - err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v6, sizeof(pkt_v6), + err = bpf_prog_test_run(bpf_program__fd(skel->progs.balancer_ingress_v6), + NUM_ITER, &pkt_v6, sizeof(pkt_v6), buf, &size, &retval, &duration); CHECK(err || retval != 1 || size != 74 || *magic != MAGIC_VAL, "ipv6", "err %d errno %d retval %d size %d magic %x\n", err, errno, retval, size, *magic); - map_fd = bpf_find_map(__func__, obj, "stats"); - if (map_fd < 0) - goto out; - bpf_map_lookup_elem(map_fd, &stats_key, stats); + bpf_map_lookup_elem(bpf_map__fd(skel->maps.stats), &stats_key, stats); for (i = 0; i < nr_cpus; i++) { bytes += stats[i].bytes; pkts += stats[i].pkts; } - if (CHECK_FAIL(bytes != MAGIC_BYTES * NUM_ITER * 2 || - pkts != NUM_ITER * 2)) { - printf("test_xdp_noinline:FAIL:stats %lld %lld\n", - bytes, pkts); - } -out: - bpf_object__close(obj); + CHECK(bytes != MAGIC_BYTES * NUM_ITER * 2 || pkts != NUM_ITER * 2, + "stats", "bytes %lld pkts %lld\n", + (unsigned long long)bytes, (unsigned long long)pkts); + test_xdp_noinline__destroy(skel); } diff --git a/tools/testing/selftests/bpf/progs/bpf_cubic.c b/tools/testing/selftests/bpf/progs/bpf_cubic.c index ef574087f1e1..6939bfd8690f 100644 --- a/tools/testing/selftests/bpf/progs/bpf_cubic.c +++ b/tools/testing/selftests/bpf/progs/bpf_cubic.c @@ -15,6 +15,8 @@ */ #include <linux/bpf.h> +#include <linux/stddef.h> +#include <linux/tcp.h> #include "bpf_tcp_helpers.h" char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/bpf_dctcp.c b/tools/testing/selftests/bpf/progs/bpf_dctcp.c index 3fb4260570b1..4dc1a967776a 100644 --- a/tools/testing/selftests/bpf/progs/bpf_dctcp.c +++ b/tools/testing/selftests/bpf/progs/bpf_dctcp.c @@ -9,6 +9,8 @@ #include <stddef.h> #include <linux/bpf.h> #include <linux/types.h> +#include <linux/stddef.h> +#include <linux/tcp.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> #include "bpf_tcp_helpers.h" diff --git a/tools/testing/selftests/bpf/progs/bpf_flow.c b/tools/testing/selftests/bpf/progs/bpf_flow.c index de6de9221518..5a65f6b51377 100644 --- a/tools/testing/selftests/bpf/progs/bpf_flow.c +++ b/tools/testing/selftests/bpf/progs/bpf_flow.c @@ -118,18 +118,18 @@ static __always_inline int parse_eth_proto(struct __sk_buff *skb, __be16 proto) switch (proto) { case bpf_htons(ETH_P_IP): - bpf_tail_call(skb, &jmp_table, IP); + bpf_tail_call_static(skb, &jmp_table, IP); break; case bpf_htons(ETH_P_IPV6): - bpf_tail_call(skb, &jmp_table, IPV6); + bpf_tail_call_static(skb, &jmp_table, IPV6); break; case bpf_htons(ETH_P_MPLS_MC): case bpf_htons(ETH_P_MPLS_UC): - bpf_tail_call(skb, &jmp_table, MPLS); + bpf_tail_call_static(skb, &jmp_table, MPLS); break; case bpf_htons(ETH_P_8021Q): case bpf_htons(ETH_P_8021AD): - bpf_tail_call(skb, &jmp_table, VLAN); + bpf_tail_call_static(skb, &jmp_table, VLAN); break; default: /* Protocol not supported */ @@ -246,10 +246,10 @@ static __always_inline int parse_ipv6_proto(struct __sk_buff *skb, __u8 nexthdr) switch (nexthdr) { case IPPROTO_HOPOPTS: case IPPROTO_DSTOPTS: - bpf_tail_call(skb, &jmp_table, IPV6OP); + bpf_tail_call_static(skb, &jmp_table, IPV6OP); break; case IPPROTO_FRAGMENT: - bpf_tail_call(skb, &jmp_table, IPV6FR); + bpf_tail_call_static(skb, &jmp_table, IPV6FR); break; default: return parse_ip_proto(skb, nexthdr); diff --git a/tools/testing/selftests/bpf/progs/bpf_iter.h b/tools/testing/selftests/bpf/progs/bpf_iter.h index c196280df90d..6a1255465fd6 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter.h +++ b/tools/testing/selftests/bpf/progs/bpf_iter.h @@ -13,6 +13,12 @@ #define udp6_sock udp6_sock___not_used #define bpf_iter__bpf_map_elem bpf_iter__bpf_map_elem___not_used #define bpf_iter__bpf_sk_storage_map bpf_iter__bpf_sk_storage_map___not_used +#define bpf_iter__sockmap bpf_iter__sockmap___not_used +#define btf_ptr btf_ptr___not_used +#define BTF_F_COMPACT BTF_F_COMPACT___not_used +#define BTF_F_NONAME BTF_F_NONAME___not_used +#define BTF_F_PTR_RAW BTF_F_PTR_RAW___not_used +#define BTF_F_ZERO BTF_F_ZERO___not_used #include "vmlinux.h" #undef bpf_iter_meta #undef bpf_iter__bpf_map @@ -26,6 +32,12 @@ #undef udp6_sock #undef bpf_iter__bpf_map_elem #undef bpf_iter__bpf_sk_storage_map +#undef bpf_iter__sockmap +#undef btf_ptr +#undef BTF_F_COMPACT +#undef BTF_F_NONAME +#undef BTF_F_PTR_RAW +#undef BTF_F_ZERO struct bpf_iter_meta { struct seq_file *seq; @@ -96,3 +108,23 @@ struct bpf_iter__bpf_sk_storage_map { struct sock *sk; void *value; }; + +struct bpf_iter__sockmap { + struct bpf_iter_meta *meta; + struct bpf_map *map; + void *key; + struct sock *sk; +}; + +struct btf_ptr { + void *ptr; + __u32 type_id; + __u32 flags; +}; + +enum { + BTF_F_COMPACT = (1ULL << 0), + BTF_F_NONAME = (1ULL << 1), + BTF_F_PTR_RAW = (1ULL << 2), + BTF_F_ZERO = (1ULL << 3), +}; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c index 07ddbfdbcab7..6dfce3fd68bc 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c @@ -47,7 +47,10 @@ int dump_bpf_hash_map(struct bpf_iter__bpf_map_elem *ctx) __u32 seq_num = ctx->meta->seq_num; struct bpf_map *map = ctx->map; struct key_t *key = ctx->key; + struct key_t tmp_key; __u64 *val = ctx->value; + __u64 tmp_val = 0; + int ret; if (in_test_mode) { /* test mode is used by selftests to @@ -61,6 +64,18 @@ int dump_bpf_hash_map(struct bpf_iter__bpf_map_elem *ctx) if (key == (void *)0 || val == (void *)0) return 0; + /* update the value and then delete the <key, value> pair. + * it should not impact the existing 'val' which is still + * accessible under rcu. + */ + __builtin_memcpy(&tmp_key, key, sizeof(struct key_t)); + ret = bpf_map_update_elem(&hashmap1, &tmp_key, &tmp_val, 0); + if (ret) + return 0; + ret = bpf_map_delete_elem(&hashmap1, &tmp_key); + if (ret) + return 0; + key_sum_a += key->a; key_sum_b += key->b; key_sum_c += key->c; diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_sockmap.c b/tools/testing/selftests/bpf/progs/bpf_iter_sockmap.c new file mode 100644 index 000000000000..f3af0e30cead --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_sockmap.c @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Cloudflare */ +#include "bpf_iter.h" +#include "bpf_tracing_net.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <errno.h> + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_SOCKMAP); + __uint(max_entries, 64); + __type(key, __u32); + __type(value, __u64); +} sockmap SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_SOCKHASH); + __uint(max_entries, 64); + __type(key, __u32); + __type(value, __u64); +} sockhash SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_SOCKHASH); + __uint(max_entries, 64); + __type(key, __u32); + __type(value, __u64); +} dst SEC(".maps"); + +__u32 elems = 0; +__u32 socks = 0; + +SEC("iter/sockmap") +int copy(struct bpf_iter__sockmap *ctx) +{ + struct sock *sk = ctx->sk; + __u32 tmp, *key = ctx->key; + int ret; + + if (!key) + return 0; + + elems++; + + /* We need a temporary buffer on the stack, since the verifier doesn't + * let us use the pointer from the context as an argument to the helper. + */ + tmp = *key; + + if (sk) { + socks++; + return bpf_map_update_elem(&dst, &tmp, sk, 0) != 0; + } + + ret = bpf_map_delete_elem(&dst, &tmp); + return ret && ret != -ENOENT; +} diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_btf.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_btf.c new file mode 100644 index 000000000000..a1ddc36f13ec --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_btf.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020, Oracle and/or its affiliates. */ +#include "bpf_iter.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_core_read.h> + +#include <errno.h> + +char _license[] SEC("license") = "GPL"; + +long tasks = 0; +long seq_err = 0; +bool skip = false; + +SEC("iter/task") +int dump_task_struct(struct bpf_iter__task *ctx) +{ + struct seq_file *seq = ctx->meta->seq; + struct task_struct *task = ctx->task; + static struct btf_ptr ptr = { }; + long ret; + +#if __has_builtin(__builtin_btf_type_id) + ptr.type_id = bpf_core_type_id_kernel(struct task_struct); + ptr.ptr = task; + + if (ctx->meta->seq_num == 0) + BPF_SEQ_PRINTF(seq, "Raw BTF task\n"); + + ret = bpf_seq_printf_btf(seq, &ptr, sizeof(ptr), 0); + switch (ret) { + case 0: + tasks++; + break; + case -ERANGE: + /* NULL task or task->fs, don't count it as an error. */ + break; + case -E2BIG: + return 1; + default: + seq_err = ret; + break; + } +#else + skip = true; +#endif + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c index 8b787baa2654..b2f7c7c5f952 100644 --- a/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c +++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c @@ -6,6 +6,9 @@ char _license[] SEC("license") = "GPL"; +int count = 0; +int tgid = 0; + SEC("iter/task_file") int dump_task_file(struct bpf_iter__task_file *ctx) { @@ -17,8 +20,13 @@ int dump_task_file(struct bpf_iter__task_file *ctx) if (task == (void *)0 || file == (void *)0) return 0; - if (ctx->meta->seq_num == 0) + if (ctx->meta->seq_num == 0) { + count = 0; BPF_SEQ_PRINTF(seq, " tgid gid fd file\n"); + } + + if (tgid == task->tgid && task->tgid != task->pid) + count++; BPF_SEQ_PRINTF(seq, "%8d %8d %8d %lx\n", task->tgid, task->pid, fd, (long)file->f_op); diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval.c new file mode 100644 index 000000000000..48e62f3f074f --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_enumval x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___diff.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___diff.c new file mode 100644 index 000000000000..53e5e5a76888 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___diff.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_enumval___diff x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___err_missing.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___err_missing.c new file mode 100644 index 000000000000..d024fb2ac06e --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___err_missing.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_enumval___err_missing x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___val3_missing.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___val3_missing.c new file mode 100644 index 000000000000..9de6595d250c --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___val3_missing.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_enumval___val3_missing x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_size___err_ambiguous.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_size___err_ambiguous.c new file mode 100644 index 000000000000..f3e9904df9c2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_size___err_ambiguous.c @@ -0,0 +1,4 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_size___err_ambiguous1 x, + struct core_reloc_size___err_ambiguous2 y) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based.c new file mode 100644 index 000000000000..fc3f69e58c71 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_type_based x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___all_missing.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___all_missing.c new file mode 100644 index 000000000000..51511648b4ec --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___all_missing.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_type_based___all_missing x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___diff_sz.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___diff_sz.c new file mode 100644 index 000000000000..67db3dceb279 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___diff_sz.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_type_based___diff_sz x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___fn_wrong_args.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___fn_wrong_args.c new file mode 100644 index 000000000000..b357fc65431d --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___fn_wrong_args.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_type_based___fn_wrong_args x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___incompat.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___incompat.c new file mode 100644 index 000000000000..8ddf20d33d9e --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___incompat.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_type_based___incompat x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_type_id.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_id.c new file mode 100644 index 000000000000..abbe5bddcefd --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_id.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_type_id x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_type_id___missing_targets.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_id___missing_targets.c new file mode 100644 index 000000000000..24e7caf4f013 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_id___missing_targets.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_type_id___missing_targets x) {} diff --git a/tools/testing/selftests/bpf/progs/btf_ptr.h b/tools/testing/selftests/bpf/progs/btf_ptr.h new file mode 100644 index 000000000000..c3c9797c67db --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf_ptr.h @@ -0,0 +1,27 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2020, Oracle and/or its affiliates. */ +/* "undefine" structs in vmlinux.h, because we "override" them below */ +#define btf_ptr btf_ptr___not_used +#define BTF_F_COMPACT BTF_F_COMPACT___not_used +#define BTF_F_NONAME BTF_F_NONAME___not_used +#define BTF_F_PTR_RAW BTF_F_PTR_RAW___not_used +#define BTF_F_ZERO BTF_F_ZERO___not_used +#include "vmlinux.h" +#undef btf_ptr +#undef BTF_F_COMPACT +#undef BTF_F_NONAME +#undef BTF_F_PTR_RAW +#undef BTF_F_ZERO + +struct btf_ptr { + void *ptr; + __u32 type_id; + __u32 flags; +}; + +enum { + BTF_F_COMPACT = (1ULL << 0), + BTF_F_NONAME = (1ULL << 1), + BTF_F_PTR_RAW = (1ULL << 2), + BTF_F_ZERO = (1ULL << 3), +}; diff --git a/tools/testing/selftests/bpf/progs/connect4_prog.c b/tools/testing/selftests/bpf/progs/connect4_prog.c index b1b2773c0b9d..a943d394fd3a 100644 --- a/tools/testing/selftests/bpf/progs/connect4_prog.c +++ b/tools/testing/selftests/bpf/progs/connect4_prog.c @@ -23,6 +23,10 @@ #define TCP_CA_NAME_MAX 16 #endif +#ifndef TCP_NOTSENT_LOWAT +#define TCP_NOTSENT_LOWAT 25 +#endif + #ifndef IFNAMSIZ #define IFNAMSIZ 16 #endif @@ -128,6 +132,18 @@ static __inline int set_keepalive(struct bpf_sock_addr *ctx) return 0; } +static __inline int set_notsent_lowat(struct bpf_sock_addr *ctx) +{ + int lowat = 65535; + + if (ctx->type == SOCK_STREAM) { + if (bpf_setsockopt(ctx, SOL_TCP, TCP_NOTSENT_LOWAT, &lowat, sizeof(lowat))) + return 1; + } + + return 0; +} + SEC("cgroup/connect4") int connect_v4_prog(struct bpf_sock_addr *ctx) { @@ -148,6 +164,9 @@ int connect_v4_prog(struct bpf_sock_addr *ctx) if (set_keepalive(ctx)) return 0; + if (set_notsent_lowat(ctx)) + return 0; + if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM) return 0; else if (ctx->type == SOCK_STREAM) diff --git a/tools/testing/selftests/bpf/progs/core_reloc_types.h b/tools/testing/selftests/bpf/progs/core_reloc_types.h index 69139ed66216..e6e616cb7bc9 100644 --- a/tools/testing/selftests/bpf/progs/core_reloc_types.h +++ b/tools/testing/selftests/bpf/progs/core_reloc_types.h @@ -652,7 +652,7 @@ struct core_reloc_misc_extensible { }; /* - * EXISTENCE + * FIELD EXISTENCE */ struct core_reloc_existence_output { int a_exists; @@ -809,3 +809,353 @@ struct core_reloc_size___diff_sz { void *ptr_field; enum { OTHER_VALUE = 0xFFFFFFFFFFFFFFFF } enum_field; }; + +/* Error case of two candidates with the fields (int_field) at the same + * offset, but with differing final relocation values: size 4 vs size 1 + */ +struct core_reloc_size___err_ambiguous1 { + /* int at offset 0 */ + int int_field; + + struct { int x; } struct_field; + union { int x; } union_field; + int arr_field[4]; + void *ptr_field; + enum { VALUE___1 = 123 } enum_field; +}; + +struct core_reloc_size___err_ambiguous2 { + /* char at offset 0 */ + char int_field; + + struct { int x; } struct_field; + union { int x; } union_field; + int arr_field[4]; + void *ptr_field; + enum { VALUE___2 = 123 } enum_field; +}; + +/* + * TYPE EXISTENCE & SIZE + */ +struct core_reloc_type_based_output { + bool struct_exists; + bool union_exists; + bool enum_exists; + bool typedef_named_struct_exists; + bool typedef_anon_struct_exists; + bool typedef_struct_ptr_exists; + bool typedef_int_exists; + bool typedef_enum_exists; + bool typedef_void_ptr_exists; + bool typedef_func_proto_exists; + bool typedef_arr_exists; + + int struct_sz; + int union_sz; + int enum_sz; + int typedef_named_struct_sz; + int typedef_anon_struct_sz; + int typedef_struct_ptr_sz; + int typedef_int_sz; + int typedef_enum_sz; + int typedef_void_ptr_sz; + int typedef_func_proto_sz; + int typedef_arr_sz; +}; + +struct a_struct { + int x; +}; + +union a_union { + int y; + int z; +}; + +typedef struct a_struct named_struct_typedef; + +typedef struct { int x, y, z; } anon_struct_typedef; + +typedef struct { + int a, b, c; +} *struct_ptr_typedef; + +enum an_enum { + AN_ENUM_VAL1 = 1, + AN_ENUM_VAL2 = 2, + AN_ENUM_VAL3 = 3, +}; + +typedef int int_typedef; + +typedef enum { TYPEDEF_ENUM_VAL1, TYPEDEF_ENUM_VAL2 } enum_typedef; + +typedef void *void_ptr_typedef; + +typedef int (*func_proto_typedef)(long); + +typedef char arr_typedef[20]; + +struct core_reloc_type_based { + struct a_struct f1; + union a_union f2; + enum an_enum f3; + named_struct_typedef f4; + anon_struct_typedef f5; + struct_ptr_typedef f6; + int_typedef f7; + enum_typedef f8; + void_ptr_typedef f9; + func_proto_typedef f10; + arr_typedef f11; +}; + +/* no types in target */ +struct core_reloc_type_based___all_missing { +}; + +/* different type sizes, extra modifiers, anon vs named enums, etc */ +struct a_struct___diff_sz { + long x; + int y; + char z; +}; + +union a_union___diff_sz { + char yy; + char zz; +}; + +typedef struct a_struct___diff_sz named_struct_typedef___diff_sz; + +typedef struct { long xx, yy, zzz; } anon_struct_typedef___diff_sz; + +typedef struct { + char aa[1], bb[2], cc[3]; +} *struct_ptr_typedef___diff_sz; + +enum an_enum___diff_sz { + AN_ENUM_VAL1___diff_sz = 0x123412341234, + AN_ENUM_VAL2___diff_sz = 2, +}; + +typedef unsigned long int_typedef___diff_sz; + +typedef enum an_enum___diff_sz enum_typedef___diff_sz; + +typedef const void * const void_ptr_typedef___diff_sz; + +typedef int_typedef___diff_sz (*func_proto_typedef___diff_sz)(char); + +typedef int arr_typedef___diff_sz[2]; + +struct core_reloc_type_based___diff_sz { + struct a_struct___diff_sz f1; + union a_union___diff_sz f2; + enum an_enum___diff_sz f3; + named_struct_typedef___diff_sz f4; + anon_struct_typedef___diff_sz f5; + struct_ptr_typedef___diff_sz f6; + int_typedef___diff_sz f7; + enum_typedef___diff_sz f8; + void_ptr_typedef___diff_sz f9; + func_proto_typedef___diff_sz f10; + arr_typedef___diff_sz f11; +}; + +/* incompatibilities between target and local types */ +union a_struct___incompat { /* union instead of struct */ + int x; +}; + +struct a_union___incompat { /* struct instead of union */ + int y; + int z; +}; + +/* typedef to union, not to struct */ +typedef union a_struct___incompat named_struct_typedef___incompat; + +/* typedef to void pointer, instead of struct */ +typedef void *anon_struct_typedef___incompat; + +/* extra pointer indirection */ +typedef struct { + int a, b, c; +} **struct_ptr_typedef___incompat; + +/* typedef of a struct with int, instead of int */ +typedef struct { int x; } int_typedef___incompat; + +/* typedef to func_proto, instead of enum */ +typedef int (*enum_typedef___incompat)(void); + +/* pointer to char instead of void */ +typedef char *void_ptr_typedef___incompat; + +/* void return type instead of int */ +typedef void (*func_proto_typedef___incompat)(long); + +/* multi-dimensional array instead of a single-dimensional */ +typedef int arr_typedef___incompat[20][2]; + +struct core_reloc_type_based___incompat { + union a_struct___incompat f1; + struct a_union___incompat f2; + /* the only valid one is enum, to check that something still succeeds */ + enum an_enum f3; + named_struct_typedef___incompat f4; + anon_struct_typedef___incompat f5; + struct_ptr_typedef___incompat f6; + int_typedef___incompat f7; + enum_typedef___incompat f8; + void_ptr_typedef___incompat f9; + func_proto_typedef___incompat f10; + arr_typedef___incompat f11; +}; + +/* func_proto with incompatible signature */ +typedef void (*func_proto_typedef___fn_wrong_ret1)(long); +typedef int * (*func_proto_typedef___fn_wrong_ret2)(long); +typedef struct { int x; } int_struct_typedef; +typedef int_struct_typedef (*func_proto_typedef___fn_wrong_ret3)(long); +typedef int (*func_proto_typedef___fn_wrong_arg)(void *); +typedef int (*func_proto_typedef___fn_wrong_arg_cnt1)(long, long); +typedef int (*func_proto_typedef___fn_wrong_arg_cnt2)(void); + +struct core_reloc_type_based___fn_wrong_args { + /* one valid type to make sure relos still work */ + struct a_struct f1; + func_proto_typedef___fn_wrong_ret1 f2; + func_proto_typedef___fn_wrong_ret2 f3; + func_proto_typedef___fn_wrong_ret3 f4; + func_proto_typedef___fn_wrong_arg f5; + func_proto_typedef___fn_wrong_arg_cnt1 f6; + func_proto_typedef___fn_wrong_arg_cnt2 f7; +}; + +/* + * TYPE ID MAPPING (LOCAL AND TARGET) + */ +struct core_reloc_type_id_output { + int local_anon_struct; + int local_anon_union; + int local_anon_enum; + int local_anon_func_proto_ptr; + int local_anon_void_ptr; + int local_anon_arr; + + int local_struct; + int local_union; + int local_enum; + int local_int; + int local_struct_typedef; + int local_func_proto_typedef; + int local_arr_typedef; + + int targ_struct; + int targ_union; + int targ_enum; + int targ_int; + int targ_struct_typedef; + int targ_func_proto_typedef; + int targ_arr_typedef; +}; + +struct core_reloc_type_id { + struct a_struct f1; + union a_union f2; + enum an_enum f3; + named_struct_typedef f4; + func_proto_typedef f5; + arr_typedef f6; +}; + +struct core_reloc_type_id___missing_targets { + /* nothing */ +}; + +/* + * ENUMERATOR VALUE EXISTENCE AND VALUE RELOCATION + */ +struct core_reloc_enumval_output { + bool named_val1_exists; + bool named_val2_exists; + bool named_val3_exists; + bool anon_val1_exists; + bool anon_val2_exists; + bool anon_val3_exists; + + int named_val1; + int named_val2; + int anon_val1; + int anon_val2; +}; + +enum named_enum { + NAMED_ENUM_VAL1 = 1, + NAMED_ENUM_VAL2 = 2, + NAMED_ENUM_VAL3 = 3, +}; + +typedef enum { + ANON_ENUM_VAL1 = 0x10, + ANON_ENUM_VAL2 = 0x20, + ANON_ENUM_VAL3 = 0x30, +} anon_enum; + +struct core_reloc_enumval { + enum named_enum f1; + anon_enum f2; +}; + +/* differing enumerator values */ +enum named_enum___diff { + NAMED_ENUM_VAL1___diff = 101, + NAMED_ENUM_VAL2___diff = 202, + NAMED_ENUM_VAL3___diff = 303, +}; + +typedef enum { + ANON_ENUM_VAL1___diff = 0x11, + ANON_ENUM_VAL2___diff = 0x22, + ANON_ENUM_VAL3___diff = 0x33, +} anon_enum___diff; + +struct core_reloc_enumval___diff { + enum named_enum___diff f1; + anon_enum___diff f2; +}; + +/* missing (optional) third enum value */ +enum named_enum___val3_missing { + NAMED_ENUM_VAL1___val3_missing = 111, + NAMED_ENUM_VAL2___val3_missing = 222, +}; + +typedef enum { + ANON_ENUM_VAL1___val3_missing = 0x111, + ANON_ENUM_VAL2___val3_missing = 0x222, +} anon_enum___val3_missing; + +struct core_reloc_enumval___val3_missing { + enum named_enum___val3_missing f1; + anon_enum___val3_missing f2; +}; + +/* missing (mandatory) second enum value, should fail */ +enum named_enum___err_missing { + NAMED_ENUM_VAL1___err_missing = 1, + NAMED_ENUM_VAL3___err_missing = 3, +}; + +typedef enum { + ANON_ENUM_VAL1___err_missing = 0x111, + ANON_ENUM_VAL3___err_missing = 0x222, +} anon_enum___err_missing; + +struct core_reloc_enumval___err_missing { + enum named_enum___err_missing f1; + anon_enum___err_missing f2; +}; diff --git a/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c index 98e1efe14549..49a84a3a2306 100644 --- a/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c +++ b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c @@ -1,8 +1,10 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2019 Facebook */ #include <linux/stddef.h> +#include <linux/if_ether.h> #include <linux/ipv6.h> #include <linux/bpf.h> +#include <linux/tcp.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> #include <bpf/bpf_tracing.h> @@ -151,4 +153,29 @@ int new_get_constant(long val) test_get_constant = 1; return test_get_constant; /* original get_constant() returns val - 122 */ } + +__u64 test_pkt_write_access_subprog = 0; +SEC("freplace/test_pkt_write_access_subprog") +int new_test_pkt_write_access_subprog(struct __sk_buff *skb, __u32 off) +{ + + void *data = (void *)(long)skb->data; + void *data_end = (void *)(long)skb->data_end; + struct tcphdr *tcp; + + if (off > sizeof(struct ethhdr) + sizeof(struct ipv6hdr)) + return -1; + + tcp = data + off; + if (tcp + 1 > data_end) + return -1; + + /* make modifications to the packet data */ + tcp->check++; + tcp->syn = 0; + + test_pkt_write_access_subprog = 1; + return 0; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/fmod_ret_freplace.c b/tools/testing/selftests/bpf/progs/fmod_ret_freplace.c new file mode 100644 index 000000000000..c8943ccee6c0 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/fmod_ret_freplace.c @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +volatile __u64 test_fmod_ret = 0; +SEC("fmod_ret/security_new_get_constant") +int BPF_PROG(fmod_ret_test, long val, int ret) +{ + test_fmod_ret = 1; + return 120; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/freplace_attach_probe.c b/tools/testing/selftests/bpf/progs/freplace_attach_probe.c new file mode 100644 index 000000000000..bb2a77c5b62b --- /dev/null +++ b/tools/testing/selftests/bpf/progs/freplace_attach_probe.c @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Facebook + +#include <linux/ptrace.h> +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +#define VAR_NUM 2 + +struct hmap_elem { + struct bpf_spin_lock lock; + int var[VAR_NUM]; +}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, struct hmap_elem); +} hash_map SEC(".maps"); + +SEC("freplace/handle_kprobe") +int new_handle_kprobe(struct pt_regs *ctx) +{ + struct hmap_elem zero = {}, *val; + int key = 0; + + val = bpf_map_lookup_elem(&hash_map, &key); + if (!val) + return 1; + /* spin_lock in hash map */ + bpf_spin_lock(&val->lock); + val->var[0] = 99; + bpf_spin_unlock(&val->lock); + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/freplace_cls_redirect.c b/tools/testing/selftests/bpf/progs/freplace_cls_redirect.c new file mode 100644 index 000000000000..68a5a9db928a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/freplace_cls_redirect.c @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Facebook + +#include <linux/stddef.h> +#include <linux/bpf.h> +#include <linux/pkt_cls.h> +#include <bpf/bpf_endian.h> +#include <bpf/bpf_helpers.h> + +struct bpf_map_def SEC("maps") sock_map = { + .type = BPF_MAP_TYPE_SOCKMAP, + .key_size = sizeof(int), + .value_size = sizeof(int), + .max_entries = 2, +}; + +SEC("freplace/cls_redirect") +int freplace_cls_redirect_test(struct __sk_buff *skb) +{ + int ret = 0; + const int zero = 0; + struct bpf_sock *sk; + + sk = bpf_map_lookup_elem(&sock_map, &zero); + if (!sk) + return TC_ACT_SHOT; + + ret = bpf_map_update_elem(&sock_map, &zero, sk, 0); + bpf_sk_release(sk); + + return ret == 0 ? TC_ACT_OK : TC_ACT_SHOT; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/freplace_connect_v4_prog.c b/tools/testing/selftests/bpf/progs/freplace_connect_v4_prog.c new file mode 100644 index 000000000000..544e5ac90461 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/freplace_connect_v4_prog.c @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Facebook + +#include <linux/stddef.h> +#include <linux/ipv6.h> +#include <linux/bpf.h> +#include <linux/in.h> +#include <sys/socket.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> + +SEC("freplace/connect_v4_prog") +int new_connect_v4_prog(struct bpf_sock_addr *ctx) +{ + // return value thats in invalid range + return 255; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/freplace_get_constant.c b/tools/testing/selftests/bpf/progs/freplace_get_constant.c new file mode 100644 index 000000000000..705e4b64dfc2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/freplace_get_constant.c @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> + +volatile __u64 test_get_constant = 0; +SEC("freplace/get_constant") +int security_new_get_constant(long val) +{ + if (val != 123) + return 0; + test_get_constant = 1; + return test_get_constant; /* original get_constant() returns val - 122 */ +} +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/local_storage.c b/tools/testing/selftests/bpf/progs/local_storage.c new file mode 100644 index 000000000000..0758ba229ae0 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/local_storage.c @@ -0,0 +1,140 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Copyright 2020 Google LLC. + */ + +#include <errno.h> +#include <linux/bpf.h> +#include <stdbool.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +#define DUMMY_STORAGE_VALUE 0xdeadbeef + +int monitored_pid = 0; +int inode_storage_result = -1; +int sk_storage_result = -1; + +struct dummy_storage { + __u32 value; +}; + +struct { + __uint(type, BPF_MAP_TYPE_INODE_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct dummy_storage); +} inode_storage_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_SK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC | BPF_F_CLONE); + __type(key, int); + __type(value, struct dummy_storage); +} sk_storage_map SEC(".maps"); + +/* TODO Use vmlinux.h once BTF pruning for embedded types is fixed. + */ +struct sock {} __attribute__((preserve_access_index)); +struct sockaddr {} __attribute__((preserve_access_index)); +struct socket { + struct sock *sk; +} __attribute__((preserve_access_index)); + +struct inode {} __attribute__((preserve_access_index)); +struct dentry { + struct inode *d_inode; +} __attribute__((preserve_access_index)); +struct file { + struct inode *f_inode; +} __attribute__((preserve_access_index)); + + +SEC("lsm/inode_unlink") +int BPF_PROG(unlink_hook, struct inode *dir, struct dentry *victim) +{ + __u32 pid = bpf_get_current_pid_tgid() >> 32; + struct dummy_storage *storage; + + if (pid != monitored_pid) + return 0; + + storage = bpf_inode_storage_get(&inode_storage_map, victim->d_inode, 0, + BPF_SK_STORAGE_GET_F_CREATE); + if (!storage) + return 0; + + if (storage->value == DUMMY_STORAGE_VALUE) + inode_storage_result = -1; + + inode_storage_result = + bpf_inode_storage_delete(&inode_storage_map, victim->d_inode); + + return 0; +} + +SEC("lsm/socket_bind") +int BPF_PROG(socket_bind, struct socket *sock, struct sockaddr *address, + int addrlen) +{ + __u32 pid = bpf_get_current_pid_tgid() >> 32; + struct dummy_storage *storage; + + if (pid != monitored_pid) + return 0; + + storage = bpf_sk_storage_get(&sk_storage_map, sock->sk, 0, + BPF_SK_STORAGE_GET_F_CREATE); + if (!storage) + return 0; + + if (storage->value == DUMMY_STORAGE_VALUE) + sk_storage_result = -1; + + sk_storage_result = bpf_sk_storage_delete(&sk_storage_map, sock->sk); + return 0; +} + +SEC("lsm/socket_post_create") +int BPF_PROG(socket_post_create, struct socket *sock, int family, int type, + int protocol, int kern) +{ + __u32 pid = bpf_get_current_pid_tgid() >> 32; + struct dummy_storage *storage; + + if (pid != monitored_pid) + return 0; + + storage = bpf_sk_storage_get(&sk_storage_map, sock->sk, 0, + BPF_SK_STORAGE_GET_F_CREATE); + if (!storage) + return 0; + + storage->value = DUMMY_STORAGE_VALUE; + + return 0; +} + +SEC("lsm/file_open") +int BPF_PROG(file_open, struct file *file) +{ + __u32 pid = bpf_get_current_pid_tgid() >> 32; + struct dummy_storage *storage; + + if (pid != monitored_pid) + return 0; + + if (!file->f_inode) + return 0; + + storage = bpf_inode_storage_get(&inode_storage_map, file->f_inode, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (!storage) + return 0; + + storage->value = DUMMY_STORAGE_VALUE; + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/lsm.c b/tools/testing/selftests/bpf/progs/lsm.c index b4598d4bc4f7..ff4d343b94b5 100644 --- a/tools/testing/selftests/bpf/progs/lsm.c +++ b/tools/testing/selftests/bpf/progs/lsm.c @@ -9,6 +9,27 @@ #include <bpf/bpf_tracing.h> #include <errno.h> +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u64); +} array SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u64); +} hash SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_LRU_HASH); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u64); +} lru_hash SEC(".maps"); + char _license[] SEC("license") = "GPL"; int monitored_pid = 0; @@ -36,13 +57,54 @@ int BPF_PROG(test_int_hook, struct vm_area_struct *vma, return ret; } -SEC("lsm/bprm_committed_creds") +SEC("lsm.s/bprm_committed_creds") int BPF_PROG(test_void_hook, struct linux_binprm *bprm) { __u32 pid = bpf_get_current_pid_tgid() >> 32; + char args[64]; + __u32 key = 0; + __u64 *value; if (monitored_pid == pid) bprm_count++; + bpf_copy_from_user(args, sizeof(args), (void *)bprm->vma->vm_mm->arg_start); + bpf_copy_from_user(args, sizeof(args), (void *)bprm->mm->arg_start); + + value = bpf_map_lookup_elem(&array, &key); + if (value) + *value = 0; + value = bpf_map_lookup_elem(&hash, &key); + if (value) + *value = 0; + value = bpf_map_lookup_elem(&lru_hash, &key); + if (value) + *value = 0; + + return 0; +} +SEC("lsm/task_free") /* lsm/ is ok, lsm.s/ fails */ +int BPF_PROG(test_task_free, struct task_struct *task) +{ + return 0; +} + +int copy_test = 0; + +SEC("fentry.s/__x64_sys_setdomainname") +int BPF_PROG(test_sys_setdomainname, struct pt_regs *regs) +{ + void *ptr = (void *)PT_REGS_PARM1(regs); + int len = PT_REGS_PARM2(regs); + int buf = 0; + long ret; + + ret = bpf_copy_from_user(&buf, sizeof(buf), ptr); + if (len == -2 && ret == 0 && buf == 1234) + copy_test++; + if (len == -3 && ret == -EFAULT) + copy_test++; + if (len == -4 && ret == -EFAULT) + copy_test++; return 0; } diff --git a/tools/testing/selftests/bpf/progs/map_ptr_kern.c b/tools/testing/selftests/bpf/progs/map_ptr_kern.c index 473665cac67e..c325405751e2 100644 --- a/tools/testing/selftests/bpf/progs/map_ptr_kern.c +++ b/tools/testing/selftests/bpf/progs/map_ptr_kern.c @@ -82,6 +82,14 @@ static inline int check_default(struct bpf_map *indirect, return 1; } +static __noinline int +check_default_noinline(struct bpf_map *indirect, struct bpf_map *direct) +{ + VERIFY(check(indirect, direct, sizeof(__u32), sizeof(__u32), + MAX_ENTRIES)); + return 1; +} + typedef struct { int counter; } atomic_t; @@ -107,7 +115,7 @@ static inline int check_hash(void) struct bpf_map *map = (struct bpf_map *)&m_hash; int i; - VERIFY(check_default(&hash->map, map)); + VERIFY(check_default_noinline(&hash->map, map)); VERIFY(hash->n_buckets == MAX_ENTRIES); VERIFY(hash->elem_size == 64); @@ -589,7 +597,7 @@ static inline int check_stack(void) return 1; } -struct bpf_sk_storage_map { +struct bpf_local_storage_map { struct bpf_map map; } __attribute__((preserve_access_index)); @@ -602,8 +610,8 @@ struct { static inline int check_sk_storage(void) { - struct bpf_sk_storage_map *sk_storage = - (struct bpf_sk_storage_map *)&m_sk_storage; + struct bpf_local_storage_map *sk_storage = + (struct bpf_local_storage_map *)&m_sk_storage; struct bpf_map *map = (struct bpf_map *)&m_sk_storage; VERIFY(check(&sk_storage->map, map, sizeof(__u32), sizeof(__u32), 0)); diff --git a/tools/testing/selftests/bpf/progs/metadata_unused.c b/tools/testing/selftests/bpf/progs/metadata_unused.c new file mode 100644 index 000000000000..672a0d19f8d0 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/metadata_unused.c @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +volatile const char bpf_metadata_a[] SEC(".rodata") = "foo"; +volatile const int bpf_metadata_b SEC(".rodata") = 1; + +SEC("cgroup_skb/egress") +int prog(struct xdp_md *ctx) +{ + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/metadata_used.c b/tools/testing/selftests/bpf/progs/metadata_used.c new file mode 100644 index 000000000000..b7198e65383d --- /dev/null +++ b/tools/testing/selftests/bpf/progs/metadata_used.c @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +volatile const char bpf_metadata_a[] SEC(".rodata") = "bar"; +volatile const int bpf_metadata_b SEC(".rodata") = 2; + +SEC("cgroup_skb/egress") +int prog(struct xdp_md *ctx) +{ + return bpf_metadata_b ? 1 : 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/netif_receive_skb.c b/tools/testing/selftests/bpf/progs/netif_receive_skb.c new file mode 100644 index 000000000000..6b670039ea67 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/netif_receive_skb.c @@ -0,0 +1,249 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020, Oracle and/or its affiliates. */ + +#include "btf_ptr.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_core_read.h> + +#include <errno.h> + +long ret = 0; +int num_subtests = 0; +int ran_subtests = 0; +bool skip = false; + +#define STRSIZE 2048 +#define EXPECTED_STRSIZE 256 + +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +#endif + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, char[STRSIZE]); +} strdata SEC(".maps"); + +static int __strncmp(const void *m1, const void *m2, size_t len) +{ + const unsigned char *s1 = m1; + const unsigned char *s2 = m2; + int i, delta = 0; + + for (i = 0; i < len; i++) { + delta = s1[i] - s2[i]; + if (delta || s1[i] == 0 || s2[i] == 0) + break; + } + return delta; +} + +#if __has_builtin(__builtin_btf_type_id) +#define TEST_BTF(_str, _type, _flags, _expected, ...) \ + do { \ + static const char _expectedval[EXPECTED_STRSIZE] = \ + _expected; \ + static const char _ptrtype[64] = #_type; \ + __u64 _hflags = _flags | BTF_F_COMPACT; \ + static _type _ptrdata = __VA_ARGS__; \ + static struct btf_ptr _ptr = { }; \ + int _cmp; \ + \ + ++num_subtests; \ + if (ret < 0) \ + break; \ + ++ran_subtests; \ + _ptr.ptr = &_ptrdata; \ + _ptr.type_id = bpf_core_type_id_kernel(_type); \ + if (_ptr.type_id <= 0) { \ + ret = -EINVAL; \ + break; \ + } \ + ret = bpf_snprintf_btf(_str, STRSIZE, \ + &_ptr, sizeof(_ptr), _hflags); \ + if (ret) \ + break; \ + _cmp = __strncmp(_str, _expectedval, EXPECTED_STRSIZE); \ + if (_cmp != 0) { \ + bpf_printk("(%d) got %s", _cmp, _str); \ + bpf_printk("(%d) expected %s", _cmp, \ + _expectedval); \ + ret = -EBADMSG; \ + break; \ + } \ + } while (0) +#endif + +/* Use where expected data string matches its stringified declaration */ +#define TEST_BTF_C(_str, _type, _flags, ...) \ + TEST_BTF(_str, _type, _flags, "(" #_type ")" #__VA_ARGS__, \ + __VA_ARGS__) + +/* TRACE_EVENT(netif_receive_skb, + * TP_PROTO(struct sk_buff *skb), + */ +SEC("tp_btf/netif_receive_skb") +int BPF_PROG(trace_netif_receive_skb, struct sk_buff *skb) +{ + static __u64 flags[] = { 0, BTF_F_COMPACT, BTF_F_ZERO, BTF_F_PTR_RAW, + BTF_F_NONAME, BTF_F_COMPACT | BTF_F_ZERO | + BTF_F_PTR_RAW | BTF_F_NONAME }; + static struct btf_ptr p = { }; + __u32 key = 0; + int i, __ret; + char *str; + +#if __has_builtin(__builtin_btf_type_id) + str = bpf_map_lookup_elem(&strdata, &key); + if (!str) + return 0; + + /* Ensure we can write skb string representation */ + p.type_id = bpf_core_type_id_kernel(struct sk_buff); + p.ptr = skb; + for (i = 0; i < ARRAY_SIZE(flags); i++) { + ++num_subtests; + ret = bpf_snprintf_btf(str, STRSIZE, &p, sizeof(p), 0); + if (ret < 0) + bpf_printk("returned %d when writing skb", ret); + ++ran_subtests; + } + + /* Check invalid ptr value */ + p.ptr = 0; + __ret = bpf_snprintf_btf(str, STRSIZE, &p, sizeof(p), 0); + if (__ret >= 0) { + bpf_printk("printing NULL should generate error, got (%d)", + __ret); + ret = -ERANGE; + } + + /* Verify type display for various types. */ + + /* simple int */ + TEST_BTF_C(str, int, 0, 1234); + TEST_BTF(str, int, BTF_F_NONAME, "1234", 1234); + /* zero value should be printed at toplevel */ + TEST_BTF(str, int, 0, "(int)0", 0); + TEST_BTF(str, int, BTF_F_NONAME, "0", 0); + TEST_BTF(str, int, BTF_F_ZERO, "(int)0", 0); + TEST_BTF(str, int, BTF_F_NONAME | BTF_F_ZERO, "0", 0); + TEST_BTF_C(str, int, 0, -4567); + TEST_BTF(str, int, BTF_F_NONAME, "-4567", -4567); + + /* simple char */ + TEST_BTF_C(str, char, 0, 100); + TEST_BTF(str, char, BTF_F_NONAME, "100", 100); + /* zero value should be printed at toplevel */ + TEST_BTF(str, char, 0, "(char)0", 0); + TEST_BTF(str, char, BTF_F_NONAME, "0", 0); + TEST_BTF(str, char, BTF_F_ZERO, "(char)0", 0); + TEST_BTF(str, char, BTF_F_NONAME | BTF_F_ZERO, "0", 0); + + /* simple typedef */ + TEST_BTF_C(str, uint64_t, 0, 100); + TEST_BTF(str, u64, BTF_F_NONAME, "1", 1); + /* zero value should be printed at toplevel */ + TEST_BTF(str, u64, 0, "(u64)0", 0); + TEST_BTF(str, u64, BTF_F_NONAME, "0", 0); + TEST_BTF(str, u64, BTF_F_ZERO, "(u64)0", 0); + TEST_BTF(str, u64, BTF_F_NONAME|BTF_F_ZERO, "0", 0); + + /* typedef struct */ + TEST_BTF_C(str, atomic_t, 0, {.counter = (int)1,}); + TEST_BTF(str, atomic_t, BTF_F_NONAME, "{1,}", {.counter = 1,}); + /* typedef with 0 value should be printed at toplevel */ + TEST_BTF(str, atomic_t, 0, "(atomic_t){}", {.counter = 0,}); + TEST_BTF(str, atomic_t, BTF_F_NONAME, "{}", {.counter = 0,}); + TEST_BTF(str, atomic_t, BTF_F_ZERO, "(atomic_t){.counter = (int)0,}", + {.counter = 0,}); + TEST_BTF(str, atomic_t, BTF_F_NONAME|BTF_F_ZERO, + "{0,}", {.counter = 0,}); + + /* enum where enum value does (and does not) exist */ + TEST_BTF_C(str, enum bpf_cmd, 0, BPF_MAP_CREATE); + TEST_BTF(str, enum bpf_cmd, 0, "(enum bpf_cmd)BPF_MAP_CREATE", 0); + TEST_BTF(str, enum bpf_cmd, BTF_F_NONAME, "BPF_MAP_CREATE", + BPF_MAP_CREATE); + TEST_BTF(str, enum bpf_cmd, BTF_F_NONAME|BTF_F_ZERO, + "BPF_MAP_CREATE", 0); + + TEST_BTF(str, enum bpf_cmd, BTF_F_ZERO, "(enum bpf_cmd)BPF_MAP_CREATE", + BPF_MAP_CREATE); + TEST_BTF(str, enum bpf_cmd, BTF_F_NONAME|BTF_F_ZERO, + "BPF_MAP_CREATE", BPF_MAP_CREATE); + TEST_BTF_C(str, enum bpf_cmd, 0, 2000); + TEST_BTF(str, enum bpf_cmd, BTF_F_NONAME, "2000", 2000); + + /* simple struct */ + TEST_BTF_C(str, struct btf_enum, 0, + {.name_off = (__u32)3,.val = (__s32)-1,}); + TEST_BTF(str, struct btf_enum, BTF_F_NONAME, "{3,-1,}", + { .name_off = 3, .val = -1,}); + TEST_BTF(str, struct btf_enum, BTF_F_NONAME, "{-1,}", + { .name_off = 0, .val = -1,}); + TEST_BTF(str, struct btf_enum, BTF_F_NONAME|BTF_F_ZERO, "{0,-1,}", + { .name_off = 0, .val = -1,}); + /* empty struct should be printed */ + TEST_BTF(str, struct btf_enum, 0, "(struct btf_enum){}", + { .name_off = 0, .val = 0,}); + TEST_BTF(str, struct btf_enum, BTF_F_NONAME, "{}", + { .name_off = 0, .val = 0,}); + TEST_BTF(str, struct btf_enum, BTF_F_ZERO, + "(struct btf_enum){.name_off = (__u32)0,.val = (__s32)0,}", + { .name_off = 0, .val = 0,}); + + /* struct with pointers */ + TEST_BTF(str, struct list_head, BTF_F_PTR_RAW, + "(struct list_head){.next = (struct list_head *)0x0000000000000001,}", + { .next = (struct list_head *)1 }); + /* NULL pointer should not be displayed */ + TEST_BTF(str, struct list_head, BTF_F_PTR_RAW, + "(struct list_head){}", + { .next = (struct list_head *)0 }); + + /* struct with char array */ + TEST_BTF(str, struct bpf_prog_info, 0, + "(struct bpf_prog_info){.name = (char[])['f','o','o',],}", + { .name = "foo",}); + TEST_BTF(str, struct bpf_prog_info, BTF_F_NONAME, + "{['f','o','o',],}", + {.name = "foo",}); + /* leading null char means do not display string */ + TEST_BTF(str, struct bpf_prog_info, 0, + "(struct bpf_prog_info){}", + {.name = {'\0', 'f', 'o', 'o'}}); + /* handle non-printable characters */ + TEST_BTF(str, struct bpf_prog_info, 0, + "(struct bpf_prog_info){.name = (char[])[1,2,3,],}", + { .name = {1, 2, 3, 0}}); + + /* struct with non-char array */ + TEST_BTF(str, struct __sk_buff, 0, + "(struct __sk_buff){.cb = (__u32[])[1,2,3,4,5,],}", + { .cb = {1, 2, 3, 4, 5,},}); + TEST_BTF(str, struct __sk_buff, BTF_F_NONAME, + "{[1,2,3,4,5,],}", + { .cb = { 1, 2, 3, 4, 5},}); + /* For non-char, arrays, show non-zero values only */ + TEST_BTF(str, struct __sk_buff, 0, + "(struct __sk_buff){.cb = (__u32[])[1,],}", + { .cb = { 0, 0, 1, 0, 0},}); + + /* struct with bitfields */ + TEST_BTF_C(str, struct bpf_insn, 0, + {.code = (__u8)1,.dst_reg = (__u8)0x2,.src_reg = (__u8)0x3,.off = (__s16)4,.imm = (__s32)5,}); + TEST_BTF(str, struct bpf_insn, BTF_F_NONAME, "{1,0x2,0x3,4,5,}", + {.code = 1, .dst_reg = 0x2, .src_reg = 0x3, .off = 4, + .imm = 5,}); +#else + skip = true; +#endif + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/profiler.h b/tools/testing/selftests/bpf/progs/profiler.h new file mode 100644 index 000000000000..3bac4fdd4bdf --- /dev/null +++ b/tools/testing/selftests/bpf/progs/profiler.h @@ -0,0 +1,177 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#pragma once + +#define TASK_COMM_LEN 16 +#define MAX_ANCESTORS 4 +#define MAX_PATH 256 +#define KILL_TARGET_LEN 64 +#define CTL_MAXNAME 10 +#define MAX_ARGS_LEN 4096 +#define MAX_FILENAME_LEN 512 +#define MAX_ENVIRON_LEN 8192 +#define MAX_PATH_DEPTH 32 +#define MAX_FILEPATH_LENGTH (MAX_PATH_DEPTH * MAX_PATH) +#define MAX_CGROUPS_PATH_DEPTH 8 + +#define MAX_METADATA_PAYLOAD_LEN TASK_COMM_LEN + +#define MAX_CGROUP_PAYLOAD_LEN \ + (MAX_PATH * 2 + (MAX_PATH * MAX_CGROUPS_PATH_DEPTH)) + +#define MAX_CAP_PAYLOAD_LEN (MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN) + +#define MAX_SYSCTL_PAYLOAD_LEN \ + (MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN + CTL_MAXNAME + MAX_PATH) + +#define MAX_KILL_PAYLOAD_LEN \ + (MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN + TASK_COMM_LEN + \ + KILL_TARGET_LEN) + +#define MAX_EXEC_PAYLOAD_LEN \ + (MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN + MAX_FILENAME_LEN + \ + MAX_ARGS_LEN + MAX_ENVIRON_LEN) + +#define MAX_FILEMOD_PAYLOAD_LEN \ + (MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN + MAX_FILEPATH_LENGTH + \ + MAX_FILEPATH_LENGTH) + +enum data_type { + INVALID_EVENT, + EXEC_EVENT, + FORK_EVENT, + KILL_EVENT, + SYSCTL_EVENT, + FILEMOD_EVENT, + MAX_DATA_TYPE_EVENT +}; + +enum filemod_type { + FMOD_OPEN, + FMOD_LINK, + FMOD_SYMLINK, +}; + +struct ancestors_data_t { + pid_t ancestor_pids[MAX_ANCESTORS]; + uint32_t ancestor_exec_ids[MAX_ANCESTORS]; + uint64_t ancestor_start_times[MAX_ANCESTORS]; + uint32_t num_ancestors; +}; + +struct var_metadata_t { + enum data_type type; + pid_t pid; + uint32_t exec_id; + uid_t uid; + gid_t gid; + uint64_t start_time; + uint32_t cpu_id; + uint64_t bpf_stats_num_perf_events; + uint64_t bpf_stats_start_ktime_ns; + uint8_t comm_length; +}; + +struct cgroup_data_t { + ino_t cgroup_root_inode; + ino_t cgroup_proc_inode; + uint64_t cgroup_root_mtime; + uint64_t cgroup_proc_mtime; + uint16_t cgroup_root_length; + uint16_t cgroup_proc_length; + uint16_t cgroup_full_length; + int cgroup_full_path_root_pos; +}; + +struct var_sysctl_data_t { + struct var_metadata_t meta; + struct cgroup_data_t cgroup_data; + struct ancestors_data_t ancestors_info; + uint8_t sysctl_val_length; + uint16_t sysctl_path_length; + char payload[MAX_SYSCTL_PAYLOAD_LEN]; +}; + +struct var_kill_data_t { + struct var_metadata_t meta; + struct cgroup_data_t cgroup_data; + struct ancestors_data_t ancestors_info; + pid_t kill_target_pid; + int kill_sig; + uint32_t kill_count; + uint64_t last_kill_time; + uint8_t kill_target_name_length; + uint8_t kill_target_cgroup_proc_length; + char payload[MAX_KILL_PAYLOAD_LEN]; + size_t payload_length; +}; + +struct var_exec_data_t { + struct var_metadata_t meta; + struct cgroup_data_t cgroup_data; + pid_t parent_pid; + uint32_t parent_exec_id; + uid_t parent_uid; + uint64_t parent_start_time; + uint16_t bin_path_length; + uint16_t cmdline_length; + uint16_t environment_length; + char payload[MAX_EXEC_PAYLOAD_LEN]; +}; + +struct var_fork_data_t { + struct var_metadata_t meta; + pid_t parent_pid; + uint32_t parent_exec_id; + uint64_t parent_start_time; + char payload[MAX_METADATA_PAYLOAD_LEN]; +}; + +struct var_filemod_data_t { + struct var_metadata_t meta; + struct cgroup_data_t cgroup_data; + enum filemod_type fmod_type; + unsigned int dst_flags; + uint32_t src_device_id; + uint32_t dst_device_id; + ino_t src_inode; + ino_t dst_inode; + uint16_t src_filepath_length; + uint16_t dst_filepath_length; + char payload[MAX_FILEMOD_PAYLOAD_LEN]; +}; + +struct profiler_config_struct { + bool fetch_cgroups_from_bpf; + ino_t cgroup_fs_inode; + ino_t cgroup_login_session_inode; + uint64_t kill_signals_mask; + ino_t inode_filter; + uint32_t stale_info_secs; + bool use_variable_buffers; + bool read_environ_from_exec; + bool enable_cgroup_v1_resolver; +}; + +struct bpf_func_stats_data { + uint64_t time_elapsed_ns; + uint64_t num_executions; + uint64_t num_perf_events; +}; + +struct bpf_func_stats_ctx { + uint64_t start_time_ns; + struct bpf_func_stats_data* bpf_func_stats_data_val; +}; + +enum bpf_function_id { + profiler_bpf_proc_sys_write, + profiler_bpf_sched_process_exec, + profiler_bpf_sched_process_exit, + profiler_bpf_sys_enter_kill, + profiler_bpf_do_filp_open_ret, + profiler_bpf_sched_process_fork, + profiler_bpf_vfs_link, + profiler_bpf_vfs_symlink, + profiler_bpf_max_function_id +}; diff --git a/tools/testing/selftests/bpf/progs/profiler.inc.h b/tools/testing/selftests/bpf/progs/profiler.inc.h new file mode 100644 index 000000000000..00578311a423 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/profiler.inc.h @@ -0,0 +1,969 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#include <vmlinux.h> +#include <bpf/bpf_core_read.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +#include "profiler.h" + +#ifndef NULL +#define NULL 0 +#endif + +#define O_WRONLY 00000001 +#define O_RDWR 00000002 +#define O_DIRECTORY 00200000 +#define __O_TMPFILE 020000000 +#define O_TMPFILE (__O_TMPFILE | O_DIRECTORY) +#define MAX_ERRNO 4095 +#define S_IFMT 00170000 +#define S_IFSOCK 0140000 +#define S_IFLNK 0120000 +#define S_IFREG 0100000 +#define S_IFBLK 0060000 +#define S_IFDIR 0040000 +#define S_IFCHR 0020000 +#define S_IFIFO 0010000 +#define S_ISUID 0004000 +#define S_ISGID 0002000 +#define S_ISVTX 0001000 +#define S_ISLNK(m) (((m)&S_IFMT) == S_IFLNK) +#define S_ISDIR(m) (((m)&S_IFMT) == S_IFDIR) +#define S_ISCHR(m) (((m)&S_IFMT) == S_IFCHR) +#define S_ISBLK(m) (((m)&S_IFMT) == S_IFBLK) +#define S_ISFIFO(m) (((m)&S_IFMT) == S_IFIFO) +#define S_ISSOCK(m) (((m)&S_IFMT) == S_IFSOCK) +#define IS_ERR_VALUE(x) (unsigned long)(void*)(x) >= (unsigned long)-MAX_ERRNO + +#define KILL_DATA_ARRAY_SIZE 8 + +struct var_kill_data_arr_t { + struct var_kill_data_t array[KILL_DATA_ARRAY_SIZE]; +}; + +union any_profiler_data_t { + struct var_exec_data_t var_exec; + struct var_kill_data_t var_kill; + struct var_sysctl_data_t var_sysctl; + struct var_filemod_data_t var_filemod; + struct var_fork_data_t var_fork; + struct var_kill_data_arr_t var_kill_data_arr; +}; + +volatile struct profiler_config_struct bpf_config = {}; + +#define FETCH_CGROUPS_FROM_BPF (bpf_config.fetch_cgroups_from_bpf) +#define CGROUP_FS_INODE (bpf_config.cgroup_fs_inode) +#define CGROUP_LOGIN_SESSION_INODE \ + (bpf_config.cgroup_login_session_inode) +#define KILL_SIGNALS (bpf_config.kill_signals_mask) +#define STALE_INFO (bpf_config.stale_info_secs) +#define INODE_FILTER (bpf_config.inode_filter) +#define READ_ENVIRON_FROM_EXEC (bpf_config.read_environ_from_exec) +#define ENABLE_CGROUP_V1_RESOLVER (bpf_config.enable_cgroup_v1_resolver) + +struct kernfs_iattrs___52 { + struct iattr ia_iattr; +}; + +struct kernfs_node___52 { + union /* kernfs_node_id */ { + struct { + u32 ino; + u32 generation; + }; + u64 id; + } id; +}; + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, 1); + __type(key, u32); + __type(value, union any_profiler_data_t); +} data_heap SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(int)); +} events SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, KILL_DATA_ARRAY_SIZE); + __type(key, u32); + __type(value, struct var_kill_data_arr_t); +} var_tpid_to_data SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, profiler_bpf_max_function_id); + __type(key, u32); + __type(value, struct bpf_func_stats_data); +} bpf_func_stats SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, u32); + __type(value, bool); + __uint(max_entries, 16); +} allowed_devices SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, u64); + __type(value, bool); + __uint(max_entries, 1024); +} allowed_file_inodes SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, u64); + __type(value, bool); + __uint(max_entries, 1024); +} allowed_directory_inodes SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, u32); + __type(value, bool); + __uint(max_entries, 16); +} disallowed_exec_inodes SEC(".maps"); + +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof(arr[0])) +#endif + +static INLINE bool IS_ERR(const void* ptr) +{ + return IS_ERR_VALUE((unsigned long)ptr); +} + +static INLINE u32 get_userspace_pid() +{ + return bpf_get_current_pid_tgid() >> 32; +} + +static INLINE bool is_init_process(u32 tgid) +{ + return tgid == 1 || tgid == 0; +} + +static INLINE unsigned long +probe_read_lim(void* dst, void* src, unsigned long len, unsigned long max) +{ + len = len < max ? len : max; + if (len > 1) { + if (bpf_probe_read(dst, len, src)) + return 0; + } else if (len == 1) { + if (bpf_probe_read(dst, 1, src)) + return 0; + } + return len; +} + +static INLINE int get_var_spid_index(struct var_kill_data_arr_t* arr_struct, + int spid) +{ +#ifdef UNROLL +#pragma unroll +#endif + for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++) + if (arr_struct->array[i].meta.pid == spid) + return i; + return -1; +} + +static INLINE void populate_ancestors(struct task_struct* task, + struct ancestors_data_t* ancestors_data) +{ + struct task_struct* parent = task; + u32 num_ancestors, ppid; + + ancestors_data->num_ancestors = 0; +#ifdef UNROLL +#pragma unroll +#endif + for (num_ancestors = 0; num_ancestors < MAX_ANCESTORS; num_ancestors++) { + parent = BPF_CORE_READ(parent, real_parent); + if (parent == NULL) + break; + ppid = BPF_CORE_READ(parent, tgid); + if (is_init_process(ppid)) + break; + ancestors_data->ancestor_pids[num_ancestors] = ppid; + ancestors_data->ancestor_exec_ids[num_ancestors] = + BPF_CORE_READ(parent, self_exec_id); + ancestors_data->ancestor_start_times[num_ancestors] = + BPF_CORE_READ(parent, start_time); + ancestors_data->num_ancestors = num_ancestors; + } +} + +static INLINE void* read_full_cgroup_path(struct kernfs_node* cgroup_node, + struct kernfs_node* cgroup_root_node, + void* payload, + int* root_pos) +{ + void* payload_start = payload; + size_t filepart_length; + +#ifdef UNROLL +#pragma unroll +#endif + for (int i = 0; i < MAX_CGROUPS_PATH_DEPTH; i++) { + filepart_length = + bpf_probe_read_str(payload, MAX_PATH, BPF_CORE_READ(cgroup_node, name)); + if (!cgroup_node) + return payload; + if (cgroup_node == cgroup_root_node) + *root_pos = payload - payload_start; + if (filepart_length <= MAX_PATH) { + barrier_var(filepart_length); + payload += filepart_length; + } + cgroup_node = BPF_CORE_READ(cgroup_node, parent); + } + return payload; +} + +static ino_t get_inode_from_kernfs(struct kernfs_node* node) +{ + struct kernfs_node___52* node52 = (void*)node; + + if (bpf_core_field_exists(node52->id.ino)) { + barrier_var(node52); + return BPF_CORE_READ(node52, id.ino); + } else { + barrier_var(node); + return (u64)BPF_CORE_READ(node, id); + } +} + +int pids_cgrp_id = 1; + +static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data, + struct task_struct* task, + void* payload) +{ + struct kernfs_node* root_kernfs = + BPF_CORE_READ(task, nsproxy, cgroup_ns, root_cset, dfl_cgrp, kn); + struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn); + + if (ENABLE_CGROUP_V1_RESOLVER) { +#ifdef UNROLL +#pragma unroll +#endif + for (int i = 0; i < CGROUP_SUBSYS_COUNT; i++) { + struct cgroup_subsys_state* subsys = + BPF_CORE_READ(task, cgroups, subsys[i]); + if (subsys != NULL) { + int subsys_id = BPF_CORE_READ(subsys, ss, id); + if (subsys_id == pids_cgrp_id) { + proc_kernfs = BPF_CORE_READ(subsys, cgroup, kn); + root_kernfs = BPF_CORE_READ(subsys, ss, root, kf_root, kn); + break; + } + } + } + } + + cgroup_data->cgroup_root_inode = get_inode_from_kernfs(root_kernfs); + cgroup_data->cgroup_proc_inode = get_inode_from_kernfs(proc_kernfs); + + if (bpf_core_field_exists(root_kernfs->iattr->ia_mtime)) { + cgroup_data->cgroup_root_mtime = + BPF_CORE_READ(root_kernfs, iattr, ia_mtime.tv_nsec); + cgroup_data->cgroup_proc_mtime = + BPF_CORE_READ(proc_kernfs, iattr, ia_mtime.tv_nsec); + } else { + struct kernfs_iattrs___52* root_iattr = + (struct kernfs_iattrs___52*)BPF_CORE_READ(root_kernfs, iattr); + cgroup_data->cgroup_root_mtime = + BPF_CORE_READ(root_iattr, ia_iattr.ia_mtime.tv_nsec); + + struct kernfs_iattrs___52* proc_iattr = + (struct kernfs_iattrs___52*)BPF_CORE_READ(proc_kernfs, iattr); + cgroup_data->cgroup_proc_mtime = + BPF_CORE_READ(proc_iattr, ia_iattr.ia_mtime.tv_nsec); + } + + cgroup_data->cgroup_root_length = 0; + cgroup_data->cgroup_proc_length = 0; + cgroup_data->cgroup_full_length = 0; + + size_t cgroup_root_length = + bpf_probe_read_str(payload, MAX_PATH, BPF_CORE_READ(root_kernfs, name)); + barrier_var(cgroup_root_length); + if (cgroup_root_length <= MAX_PATH) { + barrier_var(cgroup_root_length); + cgroup_data->cgroup_root_length = cgroup_root_length; + payload += cgroup_root_length; + } + + size_t cgroup_proc_length = + bpf_probe_read_str(payload, MAX_PATH, BPF_CORE_READ(proc_kernfs, name)); + barrier_var(cgroup_proc_length); + if (cgroup_proc_length <= MAX_PATH) { + barrier_var(cgroup_proc_length); + cgroup_data->cgroup_proc_length = cgroup_proc_length; + payload += cgroup_proc_length; + } + + if (FETCH_CGROUPS_FROM_BPF) { + cgroup_data->cgroup_full_path_root_pos = -1; + void* payload_end_pos = read_full_cgroup_path(proc_kernfs, root_kernfs, payload, + &cgroup_data->cgroup_full_path_root_pos); + cgroup_data->cgroup_full_length = payload_end_pos - payload; + payload = payload_end_pos; + } + + return (void*)payload; +} + +static INLINE void* populate_var_metadata(struct var_metadata_t* metadata, + struct task_struct* task, + u32 pid, void* payload) +{ + u64 uid_gid = bpf_get_current_uid_gid(); + + metadata->uid = (u32)uid_gid; + metadata->gid = uid_gid >> 32; + metadata->pid = pid; + metadata->exec_id = BPF_CORE_READ(task, self_exec_id); + metadata->start_time = BPF_CORE_READ(task, start_time); + metadata->comm_length = 0; + + size_t comm_length = bpf_core_read_str(payload, TASK_COMM_LEN, &task->comm); + barrier_var(comm_length); + if (comm_length <= TASK_COMM_LEN) { + barrier_var(comm_length); + metadata->comm_length = comm_length; + payload += comm_length; + } + + return (void*)payload; +} + +static INLINE struct var_kill_data_t* +get_var_kill_data(struct pt_regs* ctx, int spid, int tpid, int sig) +{ + int zero = 0; + struct var_kill_data_t* kill_data = bpf_map_lookup_elem(&data_heap, &zero); + + if (kill_data == NULL) + return NULL; + struct task_struct* task = (struct task_struct*)bpf_get_current_task(); + + void* payload = populate_var_metadata(&kill_data->meta, task, spid, kill_data->payload); + payload = populate_cgroup_info(&kill_data->cgroup_data, task, payload); + size_t payload_length = payload - (void*)kill_data->payload; + kill_data->payload_length = payload_length; + populate_ancestors(task, &kill_data->ancestors_info); + kill_data->meta.type = KILL_EVENT; + kill_data->kill_target_pid = tpid; + kill_data->kill_sig = sig; + kill_data->kill_count = 1; + kill_data->last_kill_time = bpf_ktime_get_ns(); + return kill_data; +} + +static INLINE int trace_var_sys_kill(void* ctx, int tpid, int sig) +{ + if ((KILL_SIGNALS & (1ULL << sig)) == 0) + return 0; + + u32 spid = get_userspace_pid(); + struct var_kill_data_arr_t* arr_struct = bpf_map_lookup_elem(&var_tpid_to_data, &tpid); + + if (arr_struct == NULL) { + struct var_kill_data_t* kill_data = get_var_kill_data(ctx, spid, tpid, sig); + int zero = 0; + + if (kill_data == NULL) + return 0; + arr_struct = bpf_map_lookup_elem(&data_heap, &zero); + if (arr_struct == NULL) + return 0; + bpf_probe_read(&arr_struct->array[0], sizeof(arr_struct->array[0]), kill_data); + } else { + int index = get_var_spid_index(arr_struct, spid); + + if (index == -1) { + struct var_kill_data_t* kill_data = + get_var_kill_data(ctx, spid, tpid, sig); + if (kill_data == NULL) + return 0; +#ifdef UNROLL +#pragma unroll +#endif + for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++) + if (arr_struct->array[i].meta.pid == 0) { + bpf_probe_read(&arr_struct->array[i], + sizeof(arr_struct->array[i]), kill_data); + bpf_map_update_elem(&var_tpid_to_data, &tpid, + arr_struct, 0); + + return 0; + } + return 0; + } + + struct var_kill_data_t* kill_data = &arr_struct->array[index]; + + u64 delta_sec = + (bpf_ktime_get_ns() - kill_data->last_kill_time) / 1000000000; + + if (delta_sec < STALE_INFO) { + kill_data->kill_count++; + kill_data->last_kill_time = bpf_ktime_get_ns(); + bpf_probe_read(&arr_struct->array[index], + sizeof(arr_struct->array[index]), + kill_data); + } else { + struct var_kill_data_t* kill_data = + get_var_kill_data(ctx, spid, tpid, sig); + if (kill_data == NULL) + return 0; + bpf_probe_read(&arr_struct->array[index], + sizeof(arr_struct->array[index]), + kill_data); + } + } + bpf_map_update_elem(&var_tpid_to_data, &tpid, arr_struct, 0); + return 0; +} + +static INLINE void bpf_stats_enter(struct bpf_func_stats_ctx* bpf_stat_ctx, + enum bpf_function_id func_id) +{ + int func_id_key = func_id; + + bpf_stat_ctx->start_time_ns = bpf_ktime_get_ns(); + bpf_stat_ctx->bpf_func_stats_data_val = + bpf_map_lookup_elem(&bpf_func_stats, &func_id_key); + if (bpf_stat_ctx->bpf_func_stats_data_val) + bpf_stat_ctx->bpf_func_stats_data_val->num_executions++; +} + +static INLINE void bpf_stats_exit(struct bpf_func_stats_ctx* bpf_stat_ctx) +{ + if (bpf_stat_ctx->bpf_func_stats_data_val) + bpf_stat_ctx->bpf_func_stats_data_val->time_elapsed_ns += + bpf_ktime_get_ns() - bpf_stat_ctx->start_time_ns; +} + +static INLINE void +bpf_stats_pre_submit_var_perf_event(struct bpf_func_stats_ctx* bpf_stat_ctx, + struct var_metadata_t* meta) +{ + if (bpf_stat_ctx->bpf_func_stats_data_val) { + bpf_stat_ctx->bpf_func_stats_data_val->num_perf_events++; + meta->bpf_stats_num_perf_events = + bpf_stat_ctx->bpf_func_stats_data_val->num_perf_events; + } + meta->bpf_stats_start_ktime_ns = bpf_stat_ctx->start_time_ns; + meta->cpu_id = bpf_get_smp_processor_id(); +} + +static INLINE size_t +read_absolute_file_path_from_dentry(struct dentry* filp_dentry, void* payload) +{ + size_t length = 0; + size_t filepart_length; + struct dentry* parent_dentry; + +#ifdef UNROLL +#pragma unroll +#endif + for (int i = 0; i < MAX_PATH_DEPTH; i++) { + filepart_length = bpf_probe_read_str(payload, MAX_PATH, + BPF_CORE_READ(filp_dentry, d_name.name)); + barrier_var(filepart_length); + if (filepart_length > MAX_PATH) + break; + barrier_var(filepart_length); + payload += filepart_length; + length += filepart_length; + + parent_dentry = BPF_CORE_READ(filp_dentry, d_parent); + if (filp_dentry == parent_dentry) + break; + filp_dentry = parent_dentry; + } + + return length; +} + +static INLINE bool +is_ancestor_in_allowed_inodes(struct dentry* filp_dentry) +{ + struct dentry* parent_dentry; +#ifdef UNROLL +#pragma unroll +#endif + for (int i = 0; i < MAX_PATH_DEPTH; i++) { + u64 dir_ino = BPF_CORE_READ(filp_dentry, d_inode, i_ino); + bool* allowed_dir = bpf_map_lookup_elem(&allowed_directory_inodes, &dir_ino); + + if (allowed_dir != NULL) + return true; + parent_dentry = BPF_CORE_READ(filp_dentry, d_parent); + if (filp_dentry == parent_dentry) + break; + filp_dentry = parent_dentry; + } + return false; +} + +static INLINE bool is_dentry_allowed_for_filemod(struct dentry* file_dentry, + u32* device_id, + u64* file_ino) +{ + u32 dev_id = BPF_CORE_READ(file_dentry, d_sb, s_dev); + *device_id = dev_id; + bool* allowed_device = bpf_map_lookup_elem(&allowed_devices, &dev_id); + + if (allowed_device == NULL) + return false; + + u64 ino = BPF_CORE_READ(file_dentry, d_inode, i_ino); + *file_ino = ino; + bool* allowed_file = bpf_map_lookup_elem(&allowed_file_inodes, &ino); + + if (allowed_file == NULL) + if (!is_ancestor_in_allowed_inodes(BPF_CORE_READ(file_dentry, d_parent))) + return false; + return true; +} + +SEC("kprobe/proc_sys_write") +ssize_t BPF_KPROBE(kprobe__proc_sys_write, + struct file* filp, const char* buf, + size_t count, loff_t* ppos) +{ + struct bpf_func_stats_ctx stats_ctx; + bpf_stats_enter(&stats_ctx, profiler_bpf_proc_sys_write); + + u32 pid = get_userspace_pid(); + int zero = 0; + struct var_sysctl_data_t* sysctl_data = + bpf_map_lookup_elem(&data_heap, &zero); + if (!sysctl_data) + goto out; + + struct task_struct* task = (struct task_struct*)bpf_get_current_task(); + sysctl_data->meta.type = SYSCTL_EVENT; + void* payload = populate_var_metadata(&sysctl_data->meta, task, pid, sysctl_data->payload); + payload = populate_cgroup_info(&sysctl_data->cgroup_data, task, payload); + + populate_ancestors(task, &sysctl_data->ancestors_info); + + sysctl_data->sysctl_val_length = 0; + sysctl_data->sysctl_path_length = 0; + + size_t sysctl_val_length = bpf_probe_read_str(payload, CTL_MAXNAME, buf); + barrier_var(sysctl_val_length); + if (sysctl_val_length <= CTL_MAXNAME) { + barrier_var(sysctl_val_length); + sysctl_data->sysctl_val_length = sysctl_val_length; + payload += sysctl_val_length; + } + + size_t sysctl_path_length = bpf_probe_read_str(payload, MAX_PATH, + BPF_CORE_READ(filp, f_path.dentry, d_name.name)); + barrier_var(sysctl_path_length); + if (sysctl_path_length <= MAX_PATH) { + barrier_var(sysctl_path_length); + sysctl_data->sysctl_path_length = sysctl_path_length; + payload += sysctl_path_length; + } + + bpf_stats_pre_submit_var_perf_event(&stats_ctx, &sysctl_data->meta); + unsigned long data_len = payload - (void*)sysctl_data; + data_len = data_len > sizeof(struct var_sysctl_data_t) + ? sizeof(struct var_sysctl_data_t) + : data_len; + bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, sysctl_data, data_len); +out: + bpf_stats_exit(&stats_ctx); + return 0; +} + +SEC("tracepoint/syscalls/sys_enter_kill") +int tracepoint__syscalls__sys_enter_kill(struct trace_event_raw_sys_enter* ctx) +{ + struct bpf_func_stats_ctx stats_ctx; + + bpf_stats_enter(&stats_ctx, profiler_bpf_sys_enter_kill); + int pid = ctx->args[0]; + int sig = ctx->args[1]; + int ret = trace_var_sys_kill(ctx, pid, sig); + bpf_stats_exit(&stats_ctx); + return ret; +}; + +SEC("raw_tracepoint/sched_process_exit") +int raw_tracepoint__sched_process_exit(void* ctx) +{ + int zero = 0; + struct bpf_func_stats_ctx stats_ctx; + bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_exit); + + u32 tpid = get_userspace_pid(); + + struct var_kill_data_arr_t* arr_struct = bpf_map_lookup_elem(&var_tpid_to_data, &tpid); + struct var_kill_data_t* kill_data = bpf_map_lookup_elem(&data_heap, &zero); + + if (arr_struct == NULL || kill_data == NULL) + goto out; + + struct task_struct* task = (struct task_struct*)bpf_get_current_task(); + struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn); + +#ifdef UNROLL +#pragma unroll +#endif + for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++) { + struct var_kill_data_t* past_kill_data = &arr_struct->array[i]; + + if (past_kill_data != NULL && past_kill_data->kill_target_pid == tpid) { + bpf_probe_read(kill_data, sizeof(*past_kill_data), past_kill_data); + void* payload = kill_data->payload; + size_t offset = kill_data->payload_length; + if (offset >= MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN) + return 0; + payload += offset; + + kill_data->kill_target_name_length = 0; + kill_data->kill_target_cgroup_proc_length = 0; + + size_t comm_length = bpf_core_read_str(payload, TASK_COMM_LEN, &task->comm); + barrier_var(comm_length); + if (comm_length <= TASK_COMM_LEN) { + barrier_var(comm_length); + kill_data->kill_target_name_length = comm_length; + payload += comm_length; + } + + size_t cgroup_proc_length = bpf_probe_read_str(payload, KILL_TARGET_LEN, + BPF_CORE_READ(proc_kernfs, name)); + barrier_var(cgroup_proc_length); + if (cgroup_proc_length <= KILL_TARGET_LEN) { + barrier_var(cgroup_proc_length); + kill_data->kill_target_cgroup_proc_length = cgroup_proc_length; + payload += cgroup_proc_length; + } + + bpf_stats_pre_submit_var_perf_event(&stats_ctx, &kill_data->meta); + unsigned long data_len = (void*)payload - (void*)kill_data; + data_len = data_len > sizeof(struct var_kill_data_t) + ? sizeof(struct var_kill_data_t) + : data_len; + bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, kill_data, data_len); + } + } + bpf_map_delete_elem(&var_tpid_to_data, &tpid); +out: + bpf_stats_exit(&stats_ctx); + return 0; +} + +SEC("raw_tracepoint/sched_process_exec") +int raw_tracepoint__sched_process_exec(struct bpf_raw_tracepoint_args* ctx) +{ + struct bpf_func_stats_ctx stats_ctx; + bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_exec); + + struct linux_binprm* bprm = (struct linux_binprm*)ctx->args[2]; + u64 inode = BPF_CORE_READ(bprm, file, f_inode, i_ino); + + bool* should_filter_binprm = bpf_map_lookup_elem(&disallowed_exec_inodes, &inode); + if (should_filter_binprm != NULL) + goto out; + + int zero = 0; + struct var_exec_data_t* proc_exec_data = bpf_map_lookup_elem(&data_heap, &zero); + if (!proc_exec_data) + goto out; + + if (INODE_FILTER && inode != INODE_FILTER) + return 0; + + u32 pid = get_userspace_pid(); + struct task_struct* task = (struct task_struct*)bpf_get_current_task(); + + proc_exec_data->meta.type = EXEC_EVENT; + proc_exec_data->bin_path_length = 0; + proc_exec_data->cmdline_length = 0; + proc_exec_data->environment_length = 0; + void* payload = populate_var_metadata(&proc_exec_data->meta, task, pid, + proc_exec_data->payload); + payload = populate_cgroup_info(&proc_exec_data->cgroup_data, task, payload); + + struct task_struct* parent_task = BPF_CORE_READ(task, real_parent); + proc_exec_data->parent_pid = BPF_CORE_READ(parent_task, tgid); + proc_exec_data->parent_uid = BPF_CORE_READ(parent_task, real_cred, uid.val); + proc_exec_data->parent_exec_id = BPF_CORE_READ(parent_task, self_exec_id); + proc_exec_data->parent_start_time = BPF_CORE_READ(parent_task, start_time); + + const char* filename = BPF_CORE_READ(bprm, filename); + size_t bin_path_length = bpf_probe_read_str(payload, MAX_FILENAME_LEN, filename); + barrier_var(bin_path_length); + if (bin_path_length <= MAX_FILENAME_LEN) { + barrier_var(bin_path_length); + proc_exec_data->bin_path_length = bin_path_length; + payload += bin_path_length; + } + + void* arg_start = (void*)BPF_CORE_READ(task, mm, arg_start); + void* arg_end = (void*)BPF_CORE_READ(task, mm, arg_end); + unsigned int cmdline_length = probe_read_lim(payload, arg_start, + arg_end - arg_start, MAX_ARGS_LEN); + + if (cmdline_length <= MAX_ARGS_LEN) { + barrier_var(cmdline_length); + proc_exec_data->cmdline_length = cmdline_length; + payload += cmdline_length; + } + + if (READ_ENVIRON_FROM_EXEC) { + void* env_start = (void*)BPF_CORE_READ(task, mm, env_start); + void* env_end = (void*)BPF_CORE_READ(task, mm, env_end); + unsigned long env_len = probe_read_lim(payload, env_start, + env_end - env_start, MAX_ENVIRON_LEN); + if (cmdline_length <= MAX_ENVIRON_LEN) { + proc_exec_data->environment_length = env_len; + payload += env_len; + } + } + + bpf_stats_pre_submit_var_perf_event(&stats_ctx, &proc_exec_data->meta); + unsigned long data_len = payload - (void*)proc_exec_data; + data_len = data_len > sizeof(struct var_exec_data_t) + ? sizeof(struct var_exec_data_t) + : data_len; + bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, proc_exec_data, data_len); +out: + bpf_stats_exit(&stats_ctx); + return 0; +} + +SEC("kretprobe/do_filp_open") +int kprobe_ret__do_filp_open(struct pt_regs* ctx) +{ + struct bpf_func_stats_ctx stats_ctx; + bpf_stats_enter(&stats_ctx, profiler_bpf_do_filp_open_ret); + + struct file* filp = (struct file*)PT_REGS_RC_CORE(ctx); + + if (filp == NULL || IS_ERR(filp)) + goto out; + unsigned int flags = BPF_CORE_READ(filp, f_flags); + if ((flags & (O_RDWR | O_WRONLY)) == 0) + goto out; + if ((flags & O_TMPFILE) > 0) + goto out; + struct inode* file_inode = BPF_CORE_READ(filp, f_inode); + umode_t mode = BPF_CORE_READ(file_inode, i_mode); + if (S_ISDIR(mode) || S_ISCHR(mode) || S_ISBLK(mode) || S_ISFIFO(mode) || + S_ISSOCK(mode)) + goto out; + + struct dentry* filp_dentry = BPF_CORE_READ(filp, f_path.dentry); + u32 device_id = 0; + u64 file_ino = 0; + if (!is_dentry_allowed_for_filemod(filp_dentry, &device_id, &file_ino)) + goto out; + + int zero = 0; + struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero); + if (!filemod_data) + goto out; + + u32 pid = get_userspace_pid(); + struct task_struct* task = (struct task_struct*)bpf_get_current_task(); + + filemod_data->meta.type = FILEMOD_EVENT; + filemod_data->fmod_type = FMOD_OPEN; + filemod_data->dst_flags = flags; + filemod_data->src_inode = 0; + filemod_data->dst_inode = file_ino; + filemod_data->src_device_id = 0; + filemod_data->dst_device_id = device_id; + filemod_data->src_filepath_length = 0; + filemod_data->dst_filepath_length = 0; + + void* payload = populate_var_metadata(&filemod_data->meta, task, pid, + filemod_data->payload); + payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload); + + size_t len = read_absolute_file_path_from_dentry(filp_dentry, payload); + barrier_var(len); + if (len <= MAX_FILEPATH_LENGTH) { + barrier_var(len); + payload += len; + filemod_data->dst_filepath_length = len; + } + bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta); + unsigned long data_len = payload - (void*)filemod_data; + data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len; + bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len); +out: + bpf_stats_exit(&stats_ctx); + return 0; +} + +SEC("kprobe/vfs_link") +int BPF_KPROBE(kprobe__vfs_link, + struct dentry* old_dentry, struct inode* dir, + struct dentry* new_dentry, struct inode** delegated_inode) +{ + struct bpf_func_stats_ctx stats_ctx; + bpf_stats_enter(&stats_ctx, profiler_bpf_vfs_link); + + u32 src_device_id = 0; + u64 src_file_ino = 0; + u32 dst_device_id = 0; + u64 dst_file_ino = 0; + if (!is_dentry_allowed_for_filemod(old_dentry, &src_device_id, &src_file_ino) && + !is_dentry_allowed_for_filemod(new_dentry, &dst_device_id, &dst_file_ino)) + goto out; + + int zero = 0; + struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero); + if (!filemod_data) + goto out; + + u32 pid = get_userspace_pid(); + struct task_struct* task = (struct task_struct*)bpf_get_current_task(); + + filemod_data->meta.type = FILEMOD_EVENT; + filemod_data->fmod_type = FMOD_LINK; + filemod_data->dst_flags = 0; + filemod_data->src_inode = src_file_ino; + filemod_data->dst_inode = dst_file_ino; + filemod_data->src_device_id = src_device_id; + filemod_data->dst_device_id = dst_device_id; + filemod_data->src_filepath_length = 0; + filemod_data->dst_filepath_length = 0; + + void* payload = populate_var_metadata(&filemod_data->meta, task, pid, + filemod_data->payload); + payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload); + + size_t len = read_absolute_file_path_from_dentry(old_dentry, payload); + barrier_var(len); + if (len <= MAX_FILEPATH_LENGTH) { + barrier_var(len); + payload += len; + filemod_data->src_filepath_length = len; + } + + len = read_absolute_file_path_from_dentry(new_dentry, payload); + barrier_var(len); + if (len <= MAX_FILEPATH_LENGTH) { + barrier_var(len); + payload += len; + filemod_data->dst_filepath_length = len; + } + + bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta); + unsigned long data_len = payload - (void*)filemod_data; + data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len; + bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len); +out: + bpf_stats_exit(&stats_ctx); + return 0; +} + +SEC("kprobe/vfs_symlink") +int BPF_KPROBE(kprobe__vfs_symlink, struct inode* dir, struct dentry* dentry, + const char* oldname) +{ + struct bpf_func_stats_ctx stats_ctx; + bpf_stats_enter(&stats_ctx, profiler_bpf_vfs_symlink); + + u32 dst_device_id = 0; + u64 dst_file_ino = 0; + if (!is_dentry_allowed_for_filemod(dentry, &dst_device_id, &dst_file_ino)) + goto out; + + int zero = 0; + struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero); + if (!filemod_data) + goto out; + + u32 pid = get_userspace_pid(); + struct task_struct* task = (struct task_struct*)bpf_get_current_task(); + + filemod_data->meta.type = FILEMOD_EVENT; + filemod_data->fmod_type = FMOD_SYMLINK; + filemod_data->dst_flags = 0; + filemod_data->src_inode = 0; + filemod_data->dst_inode = dst_file_ino; + filemod_data->src_device_id = 0; + filemod_data->dst_device_id = dst_device_id; + filemod_data->src_filepath_length = 0; + filemod_data->dst_filepath_length = 0; + + void* payload = populate_var_metadata(&filemod_data->meta, task, pid, + filemod_data->payload); + payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload); + + size_t len = bpf_probe_read_str(payload, MAX_FILEPATH_LENGTH, oldname); + barrier_var(len); + if (len <= MAX_FILEPATH_LENGTH) { + barrier_var(len); + payload += len; + filemod_data->src_filepath_length = len; + } + len = read_absolute_file_path_from_dentry(dentry, payload); + barrier_var(len); + if (len <= MAX_FILEPATH_LENGTH) { + barrier_var(len); + payload += len; + filemod_data->dst_filepath_length = len; + } + bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta); + unsigned long data_len = payload - (void*)filemod_data; + data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len; + bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len); +out: + bpf_stats_exit(&stats_ctx); + return 0; +} + +SEC("raw_tracepoint/sched_process_fork") +int raw_tracepoint__sched_process_fork(struct bpf_raw_tracepoint_args* ctx) +{ + struct bpf_func_stats_ctx stats_ctx; + bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_fork); + + int zero = 0; + struct var_fork_data_t* fork_data = bpf_map_lookup_elem(&data_heap, &zero); + if (!fork_data) + goto out; + + struct task_struct* parent = (struct task_struct*)ctx->args[0]; + struct task_struct* child = (struct task_struct*)ctx->args[1]; + fork_data->meta.type = FORK_EVENT; + + void* payload = populate_var_metadata(&fork_data->meta, child, + BPF_CORE_READ(child, pid), fork_data->payload); + fork_data->parent_pid = BPF_CORE_READ(parent, pid); + fork_data->parent_exec_id = BPF_CORE_READ(parent, self_exec_id); + fork_data->parent_start_time = BPF_CORE_READ(parent, start_time); + bpf_stats_pre_submit_var_perf_event(&stats_ctx, &fork_data->meta); + + unsigned long data_len = payload - (void*)fork_data; + data_len = data_len > sizeof(*fork_data) ? sizeof(*fork_data) : data_len; + bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, fork_data, data_len); +out: + bpf_stats_exit(&stats_ctx); + return 0; +} +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/profiler1.c b/tools/testing/selftests/bpf/progs/profiler1.c new file mode 100644 index 000000000000..4df9088bfc00 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/profiler1.c @@ -0,0 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#define barrier_var(var) asm volatile("" : "=r"(var) : "0"(var)) +#define UNROLL +#define INLINE __always_inline +#include "profiler.inc.h" diff --git a/tools/testing/selftests/bpf/progs/profiler2.c b/tools/testing/selftests/bpf/progs/profiler2.c new file mode 100644 index 000000000000..0f32a3cbf556 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/profiler2.c @@ -0,0 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#define barrier_var(var) /**/ +/* undef #define UNROLL */ +#define INLINE /**/ +#include "profiler.inc.h" diff --git a/tools/testing/selftests/bpf/progs/profiler3.c b/tools/testing/selftests/bpf/progs/profiler3.c new file mode 100644 index 000000000000..6249fc31ccb0 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/profiler3.c @@ -0,0 +1,6 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#define barrier_var(var) /**/ +#define UNROLL +#define INLINE __noinline +#include "profiler.inc.h" diff --git a/tools/testing/selftests/bpf/progs/pyperf.h b/tools/testing/selftests/bpf/progs/pyperf.h index cc615b82b56e..2fb7adafb6b6 100644 --- a/tools/testing/selftests/bpf/progs/pyperf.h +++ b/tools/testing/selftests/bpf/progs/pyperf.h @@ -67,7 +67,12 @@ typedef struct { void* co_name; // PyCodeObject.co_name } FrameData; -static __always_inline void *get_thread_state(void *tls_base, PidData *pidData) +#ifdef SUBPROGS +__noinline +#else +__always_inline +#endif +static void *get_thread_state(void *tls_base, PidData *pidData) { void* thread_state; int key; @@ -155,7 +160,9 @@ struct { } stackmap SEC(".maps"); #ifdef GLOBAL_FUNC -__attribute__((noinline)) +__noinline +#elif defined(SUBPROGS) +static __noinline #else static __always_inline #endif diff --git a/tools/testing/selftests/bpf/progs/pyperf_subprogs.c b/tools/testing/selftests/bpf/progs/pyperf_subprogs.c new file mode 100644 index 000000000000..60e27a7f0cca --- /dev/null +++ b/tools/testing/selftests/bpf/progs/pyperf_subprogs.c @@ -0,0 +1,5 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ +#define STACK_MAX_LEN 50 +#define SUBPROGS +#include "pyperf.h" diff --git a/tools/testing/selftests/bpf/progs/strobemeta.h b/tools/testing/selftests/bpf/progs/strobemeta.h index ad61b722a9de..7de534f38c3f 100644 --- a/tools/testing/selftests/bpf/progs/strobemeta.h +++ b/tools/testing/selftests/bpf/progs/strobemeta.h @@ -266,8 +266,12 @@ struct tls_index { uint64_t offset; }; -static __always_inline void *calc_location(struct strobe_value_loc *loc, - void *tls_base) +#ifdef SUBPROGS +__noinline +#else +__always_inline +#endif +static void *calc_location(struct strobe_value_loc *loc, void *tls_base) { /* * tls_mode value is: @@ -327,10 +331,15 @@ static __always_inline void *calc_location(struct strobe_value_loc *loc, : NULL; } -static __always_inline void read_int_var(struct strobemeta_cfg *cfg, - size_t idx, void *tls_base, - struct strobe_value_generic *value, - struct strobemeta_payload *data) +#ifdef SUBPROGS +__noinline +#else +__always_inline +#endif +static void read_int_var(struct strobemeta_cfg *cfg, + size_t idx, void *tls_base, + struct strobe_value_generic *value, + struct strobemeta_payload *data) { void *location = calc_location(&cfg->int_locs[idx], tls_base); if (!location) @@ -440,8 +449,13 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg, * read_strobe_meta returns NULL, if no metadata was read; otherwise returns * pointer to *right after* payload ends */ -static __always_inline void *read_strobe_meta(struct task_struct *task, - struct strobemeta_payload *data) +#ifdef SUBPROGS +__noinline +#else +__always_inline +#endif +static void *read_strobe_meta(struct task_struct *task, + struct strobemeta_payload *data) { pid_t pid = bpf_get_current_pid_tgid() >> 32; struct strobe_value_generic value = {0}; diff --git a/tools/testing/selftests/bpf/progs/strobemeta_subprogs.c b/tools/testing/selftests/bpf/progs/strobemeta_subprogs.c new file mode 100644 index 000000000000..b6c01f8fc559 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/strobemeta_subprogs.c @@ -0,0 +1,10 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +// Copyright (c) 2019 Facebook + +#define STROBE_MAX_INTS 2 +#define STROBE_MAX_STRS 25 +#define STROBE_MAX_MAPS 13 +#define STROBE_MAX_MAP_ENTRIES 20 +#define NO_UNROLL +#define SUBPROGS +#include "strobemeta.h" diff --git a/tools/testing/selftests/bpf/progs/tailcall1.c b/tools/testing/selftests/bpf/progs/tailcall1.c index 1f407e65ae52..7115bcefbe8a 100644 --- a/tools/testing/selftests/bpf/progs/tailcall1.c +++ b/tools/testing/selftests/bpf/progs/tailcall1.c @@ -26,20 +26,20 @@ int entry(struct __sk_buff *skb) /* Multiple locations to make sure we patch * all of them. */ - bpf_tail_call(skb, &jmp_table, 0); - bpf_tail_call(skb, &jmp_table, 0); - bpf_tail_call(skb, &jmp_table, 0); - bpf_tail_call(skb, &jmp_table, 0); - - bpf_tail_call(skb, &jmp_table, 1); - bpf_tail_call(skb, &jmp_table, 1); - bpf_tail_call(skb, &jmp_table, 1); - bpf_tail_call(skb, &jmp_table, 1); - - bpf_tail_call(skb, &jmp_table, 2); - bpf_tail_call(skb, &jmp_table, 2); - bpf_tail_call(skb, &jmp_table, 2); - bpf_tail_call(skb, &jmp_table, 2); + bpf_tail_call_static(skb, &jmp_table, 0); + bpf_tail_call_static(skb, &jmp_table, 0); + bpf_tail_call_static(skb, &jmp_table, 0); + bpf_tail_call_static(skb, &jmp_table, 0); + + bpf_tail_call_static(skb, &jmp_table, 1); + bpf_tail_call_static(skb, &jmp_table, 1); + bpf_tail_call_static(skb, &jmp_table, 1); + bpf_tail_call_static(skb, &jmp_table, 1); + + bpf_tail_call_static(skb, &jmp_table, 2); + bpf_tail_call_static(skb, &jmp_table, 2); + bpf_tail_call_static(skb, &jmp_table, 2); + bpf_tail_call_static(skb, &jmp_table, 2); return 3; } diff --git a/tools/testing/selftests/bpf/progs/tailcall2.c b/tools/testing/selftests/bpf/progs/tailcall2.c index a093e739cf0e..0431e4fe7efd 100644 --- a/tools/testing/selftests/bpf/progs/tailcall2.c +++ b/tools/testing/selftests/bpf/progs/tailcall2.c @@ -13,14 +13,14 @@ struct { SEC("classifier/0") int bpf_func_0(struct __sk_buff *skb) { - bpf_tail_call(skb, &jmp_table, 1); + bpf_tail_call_static(skb, &jmp_table, 1); return 0; } SEC("classifier/1") int bpf_func_1(struct __sk_buff *skb) { - bpf_tail_call(skb, &jmp_table, 2); + bpf_tail_call_static(skb, &jmp_table, 2); return 1; } @@ -33,25 +33,25 @@ int bpf_func_2(struct __sk_buff *skb) SEC("classifier/3") int bpf_func_3(struct __sk_buff *skb) { - bpf_tail_call(skb, &jmp_table, 4); + bpf_tail_call_static(skb, &jmp_table, 4); return 3; } SEC("classifier/4") int bpf_func_4(struct __sk_buff *skb) { - bpf_tail_call(skb, &jmp_table, 3); + bpf_tail_call_static(skb, &jmp_table, 3); return 4; } SEC("classifier") int entry(struct __sk_buff *skb) { - bpf_tail_call(skb, &jmp_table, 0); + bpf_tail_call_static(skb, &jmp_table, 0); /* Check multi-prog update. */ - bpf_tail_call(skb, &jmp_table, 2); + bpf_tail_call_static(skb, &jmp_table, 2); /* Check tail call limit. */ - bpf_tail_call(skb, &jmp_table, 3); + bpf_tail_call_static(skb, &jmp_table, 3); return 3; } diff --git a/tools/testing/selftests/bpf/progs/tailcall3.c b/tools/testing/selftests/bpf/progs/tailcall3.c index cabda877cf0a..739dc2a51e74 100644 --- a/tools/testing/selftests/bpf/progs/tailcall3.c +++ b/tools/testing/selftests/bpf/progs/tailcall3.c @@ -16,14 +16,14 @@ SEC("classifier/0") int bpf_func_0(struct __sk_buff *skb) { count++; - bpf_tail_call(skb, &jmp_table, 0); + bpf_tail_call_static(skb, &jmp_table, 0); return 1; } SEC("classifier") int entry(struct __sk_buff *skb) { - bpf_tail_call(skb, &jmp_table, 0); + bpf_tail_call_static(skb, &jmp_table, 0); return 0; } diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf1.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf1.c new file mode 100644 index 000000000000..0103f3dd9f02 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf1.c @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +struct { + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); + __uint(max_entries, 2); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); +} jmp_table SEC(".maps"); + +#define TAIL_FUNC(x) \ + SEC("classifier/" #x) \ + int bpf_func_##x(struct __sk_buff *skb) \ + { \ + return x; \ + } +TAIL_FUNC(0) +TAIL_FUNC(1) + +static __noinline +int subprog_tail(struct __sk_buff *skb) +{ + bpf_tail_call_static(skb, &jmp_table, 0); + + return skb->len * 2; +} + +SEC("classifier") +int entry(struct __sk_buff *skb) +{ + bpf_tail_call_static(skb, &jmp_table, 1); + + return subprog_tail(skb); +} + +char __license[] SEC("license") = "GPL"; +int _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c new file mode 100644 index 000000000000..7b1c04183824 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include "bpf_legacy.h" + +struct { + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); + __uint(max_entries, 1); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); +} jmp_table SEC(".maps"); + +static __noinline +int subprog_tail(struct __sk_buff *skb) +{ + if (load_byte(skb, 0)) + bpf_tail_call_static(skb, &jmp_table, 1); + else + bpf_tail_call_static(skb, &jmp_table, 0); + return 1; +} + +static volatile int count; + +SEC("classifier/0") +int bpf_func_0(struct __sk_buff *skb) +{ + count++; + return subprog_tail(skb); +} + +SEC("classifier") +int entry(struct __sk_buff *skb) +{ + bpf_tail_call_static(skb, &jmp_table, 0); + + return 0; +} + +char __license[] SEC("license") = "GPL"; +int _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf3.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf3.c new file mode 100644 index 000000000000..0d5482bea6c9 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf3.c @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include "bpf_legacy.h" + +struct { + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); + __uint(max_entries, 2); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); +} jmp_table SEC(".maps"); + +__noinline +int subprog_tail2(struct __sk_buff *skb) +{ + volatile char arr[64] = {}; + + if (load_word(skb, 0) || load_half(skb, 0)) + bpf_tail_call_static(skb, &jmp_table, 10); + else + bpf_tail_call_static(skb, &jmp_table, 1); + + return skb->len; +} + +static __noinline +int subprog_tail(struct __sk_buff *skb) +{ + volatile char arr[64] = {}; + + bpf_tail_call_static(skb, &jmp_table, 0); + + return skb->len * 2; +} + +SEC("classifier/0") +int bpf_func_0(struct __sk_buff *skb) +{ + volatile char arr[128] = {}; + + return subprog_tail2(skb); +} + +SEC("classifier/1") +int bpf_func_1(struct __sk_buff *skb) +{ + volatile char arr[128] = {}; + + return skb->len * 3; +} + +SEC("classifier") +int entry(struct __sk_buff *skb) +{ + volatile char arr[128] = {}; + + return subprog_tail(skb); +} + +char __license[] SEC("license") = "GPL"; +int _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c new file mode 100644 index 000000000000..9a1b166b7fbe --- /dev/null +++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +struct { + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); + __uint(max_entries, 3); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); +} jmp_table SEC(".maps"); + +static volatile int count; + +__noinline +int subprog_tail_2(struct __sk_buff *skb) +{ + bpf_tail_call_static(skb, &jmp_table, 2); + return skb->len * 3; +} + +__noinline +int subprog_tail_1(struct __sk_buff *skb) +{ + bpf_tail_call_static(skb, &jmp_table, 1); + return skb->len * 2; +} + +__noinline +int subprog_tail(struct __sk_buff *skb) +{ + bpf_tail_call_static(skb, &jmp_table, 0); + return skb->len; +} + +SEC("classifier/1") +int bpf_func_1(struct __sk_buff *skb) +{ + return subprog_tail_2(skb); +} + +SEC("classifier/2") +int bpf_func_2(struct __sk_buff *skb) +{ + count++; + return subprog_tail_2(skb); +} + +SEC("classifier/0") +int bpf_func_0(struct __sk_buff *skb) +{ + return subprog_tail_1(skb); +} + +SEC("classifier") +int entry(struct __sk_buff *skb) +{ + return subprog_tail(skb); +} + +char __license[] SEC("license") = "GPL"; +int _version SEC("version") = 1; diff --git a/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c b/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c index e5093796be97..c1e0c8c7c55f 100644 --- a/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c +++ b/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c @@ -11,6 +11,13 @@ struct inner_map { } inner_map1 SEC(".maps"), inner_map2 SEC(".maps"); +struct inner_map_sz2 { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(max_entries, 2); + __type(key, int); + __type(value, int); +} inner_map_sz2 SEC(".maps"); + struct outer_arr { __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); __uint(max_entries, 3); @@ -34,6 +41,43 @@ struct outer_arr { .values = { (void *)&inner_map1, 0, (void *)&inner_map2 }, }; +struct inner_map_sz3 { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(map_flags, BPF_F_INNER_MAP); + __uint(max_entries, 3); + __type(key, int); + __type(value, int); +} inner_map3 SEC(".maps"), + inner_map4 SEC(".maps"); + +struct inner_map_sz4 { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(map_flags, BPF_F_INNER_MAP); + __uint(max_entries, 5); + __type(key, int); + __type(value, int); +} inner_map5 SEC(".maps"); + +struct outer_arr_dyn { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __uint(max_entries, 3); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(int)); + __array(values, struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(map_flags, BPF_F_INNER_MAP); + __uint(max_entries, 1); + __type(key, int); + __type(value, int); + }); +} outer_arr_dyn SEC(".maps") = { + .values = { + [0] = (void *)&inner_map3, + [1] = (void *)&inner_map4, + [2] = (void *)&inner_map5, + }, +}; + struct outer_hash { __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS); __uint(max_entries, 5); @@ -50,6 +94,30 @@ struct outer_hash { }, }; +struct sockarr_sz1 { + __uint(type, BPF_MAP_TYPE_REUSEPORT_SOCKARRAY); + __uint(max_entries, 1); + __type(key, int); + __type(value, int); +} sockarr_sz1 SEC(".maps"); + +struct sockarr_sz2 { + __uint(type, BPF_MAP_TYPE_REUSEPORT_SOCKARRAY); + __uint(max_entries, 2); + __type(key, int); + __type(value, int); +} sockarr_sz2 SEC(".maps"); + +struct outer_sockarr_sz1 { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __uint(max_entries, 1); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(int)); + __array(values, struct sockarr_sz1); +} outer_sockarr SEC(".maps") = { + .values = { (void *)&sockarr_sz1 }, +}; + int input = 0; SEC("raw_tp/sys_enter") @@ -70,6 +138,12 @@ int handle__sys_enter(void *ctx) val = input + 1; bpf_map_update_elem(inner_map, &key, &val, 0); + inner_map = bpf_map_lookup_elem(&outer_arr_dyn, &key); + if (!inner_map) + return 1; + val = input + 2; + bpf_map_update_elem(inner_map, &key, &val, 0); + return 0; } diff --git a/tools/testing/selftests/bpf/progs/test_btf_skc_cls_ingress.c b/tools/testing/selftests/bpf/progs/test_btf_skc_cls_ingress.c new file mode 100644 index 000000000000..9a6b85dd52d2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_btf_skc_cls_ingress.c @@ -0,0 +1,174 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ + +#include <string.h> +#include <errno.h> +#include <netinet/in.h> +#include <linux/stddef.h> +#include <linux/bpf.h> +#include <linux/ipv6.h> +#include <linux/tcp.h> +#include <linux/if_ether.h> +#include <linux/pkt_cls.h> + +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> +#include "bpf_tcp_helpers.h" + +struct sockaddr_in6 srv_sa6 = {}; +__u16 listen_tp_sport = 0; +__u16 req_sk_sport = 0; +__u32 recv_cookie = 0; +__u32 gen_cookie = 0; +__u32 linum = 0; + +#define LOG() ({ if (!linum) linum = __LINE__; }) + +static void test_syncookie_helper(struct ipv6hdr *ip6h, struct tcphdr *th, + struct tcp_sock *tp, + struct __sk_buff *skb) +{ + if (th->syn) { + __s64 mss_cookie; + void *data_end; + + data_end = (void *)(long)(skb->data_end); + + if (th->doff * 4 != 40) { + LOG(); + return; + } + + if ((void *)th + 40 > data_end) { + LOG(); + return; + } + + mss_cookie = bpf_tcp_gen_syncookie(tp, ip6h, sizeof(*ip6h), + th, 40); + if (mss_cookie < 0) { + if (mss_cookie != -ENOENT) + LOG(); + } else { + gen_cookie = (__u32)mss_cookie; + } + } else if (gen_cookie) { + /* It was in cookie mode */ + int ret = bpf_tcp_check_syncookie(tp, ip6h, sizeof(*ip6h), + th, sizeof(*th)); + + if (ret < 0) { + if (ret != -ENOENT) + LOG(); + } else { + recv_cookie = bpf_ntohl(th->ack_seq) - 1; + } + } +} + +static int handle_ip6_tcp(struct ipv6hdr *ip6h, struct __sk_buff *skb) +{ + struct bpf_sock_tuple *tuple; + struct bpf_sock *bpf_skc; + unsigned int tuple_len; + struct tcphdr *th; + void *data_end; + + data_end = (void *)(long)(skb->data_end); + + th = (struct tcphdr *)(ip6h + 1); + if (th + 1 > data_end) + return TC_ACT_OK; + + /* Is it the testing traffic? */ + if (th->dest != srv_sa6.sin6_port) + return TC_ACT_OK; + + tuple_len = sizeof(tuple->ipv6); + tuple = (struct bpf_sock_tuple *)&ip6h->saddr; + if ((void *)tuple + tuple_len > data_end) { + LOG(); + return TC_ACT_OK; + } + + bpf_skc = bpf_skc_lookup_tcp(skb, tuple, tuple_len, + BPF_F_CURRENT_NETNS, 0); + if (!bpf_skc) { + LOG(); + return TC_ACT_OK; + } + + if (bpf_skc->state == BPF_TCP_NEW_SYN_RECV) { + struct request_sock *req_sk; + + req_sk = (struct request_sock *)bpf_skc_to_tcp_request_sock(bpf_skc); + if (!req_sk) { + LOG(); + goto release; + } + + if (bpf_sk_assign(skb, req_sk, 0)) { + LOG(); + goto release; + } + + req_sk_sport = req_sk->__req_common.skc_num; + + bpf_sk_release(req_sk); + return TC_ACT_OK; + } else if (bpf_skc->state == BPF_TCP_LISTEN) { + struct tcp_sock *tp; + + tp = bpf_skc_to_tcp_sock(bpf_skc); + if (!tp) { + LOG(); + goto release; + } + + if (bpf_sk_assign(skb, tp, 0)) { + LOG(); + goto release; + } + + listen_tp_sport = tp->inet_conn.icsk_inet.sk.__sk_common.skc_num; + + test_syncookie_helper(ip6h, th, tp, skb); + bpf_sk_release(tp); + return TC_ACT_OK; + } + + if (bpf_sk_assign(skb, bpf_skc, 0)) + LOG(); + +release: + bpf_sk_release(bpf_skc); + return TC_ACT_OK; +} + +SEC("classifier/ingress") +int cls_ingress(struct __sk_buff *skb) +{ + struct ipv6hdr *ip6h; + struct ethhdr *eth; + void *data_end; + + data_end = (void *)(long)(skb->data_end); + + eth = (struct ethhdr *)(long)(skb->data); + if (eth + 1 > data_end) + return TC_ACT_OK; + + if (eth->h_proto != bpf_htons(ETH_P_IPV6)) + return TC_ACT_OK; + + ip6h = (struct ipv6hdr *)(eth + 1); + if (ip6h + 1 > data_end) + return TC_ACT_OK; + + if (ip6h->nexthdr == IPPROTO_TCP) + return handle_ip6_tcp(ip6h, skb); + + return TC_ACT_OK; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect.c b/tools/testing/selftests/bpf/progs/test_cls_redirect.c index f0b72e86bee5..c9f8464996ea 100644 --- a/tools/testing/selftests/bpf/progs/test_cls_redirect.c +++ b/tools/testing/selftests/bpf/progs/test_cls_redirect.c @@ -22,6 +22,12 @@ #include "test_cls_redirect.h" +#ifdef SUBPROGS +#define INLINING __noinline +#else +#define INLINING __always_inline +#endif + #define offsetofend(TYPE, MEMBER) \ (offsetof(TYPE, MEMBER) + sizeof((((TYPE *)0)->MEMBER))) @@ -125,7 +131,7 @@ typedef struct buf { uint8_t *const tail; } buf_t; -static size_t buf_off(const buf_t *buf) +static __always_inline size_t buf_off(const buf_t *buf) { /* Clang seems to optimize constructs like * a - b + c @@ -145,7 +151,7 @@ static size_t buf_off(const buf_t *buf) return off; } -static bool buf_copy(buf_t *buf, void *dst, size_t len) +static __always_inline bool buf_copy(buf_t *buf, void *dst, size_t len) { if (bpf_skb_load_bytes(buf->skb, buf_off(buf), dst, len)) { return false; @@ -155,7 +161,7 @@ static bool buf_copy(buf_t *buf, void *dst, size_t len) return true; } -static bool buf_skip(buf_t *buf, const size_t len) +static __always_inline bool buf_skip(buf_t *buf, const size_t len) { /* Check whether off + len is valid in the non-linear part. */ if (buf_off(buf) + len > buf->skb->len) { @@ -173,7 +179,7 @@ static bool buf_skip(buf_t *buf, const size_t len) * If scratch is not NULL, the function will attempt to load non-linear * data via bpf_skb_load_bytes. On success, scratch is returned. */ -static void *buf_assign(buf_t *buf, const size_t len, void *scratch) +static __always_inline void *buf_assign(buf_t *buf, const size_t len, void *scratch) { if (buf->head + len > buf->tail) { if (scratch == NULL) { @@ -188,7 +194,7 @@ static void *buf_assign(buf_t *buf, const size_t len, void *scratch) return ptr; } -static bool pkt_skip_ipv4_options(buf_t *buf, const struct iphdr *ipv4) +static INLINING bool pkt_skip_ipv4_options(buf_t *buf, const struct iphdr *ipv4) { if (ipv4->ihl <= 5) { return true; @@ -197,13 +203,13 @@ static bool pkt_skip_ipv4_options(buf_t *buf, const struct iphdr *ipv4) return buf_skip(buf, (ipv4->ihl - 5) * 4); } -static bool ipv4_is_fragment(const struct iphdr *ip) +static INLINING bool ipv4_is_fragment(const struct iphdr *ip) { uint16_t frag_off = ip->frag_off & bpf_htons(IP_OFFSET_MASK); return (ip->frag_off & bpf_htons(IP_MF)) != 0 || frag_off > 0; } -static struct iphdr *pkt_parse_ipv4(buf_t *pkt, struct iphdr *scratch) +static __always_inline struct iphdr *pkt_parse_ipv4(buf_t *pkt, struct iphdr *scratch) { struct iphdr *ipv4 = buf_assign(pkt, sizeof(*ipv4), scratch); if (ipv4 == NULL) { @@ -222,7 +228,7 @@ static struct iphdr *pkt_parse_ipv4(buf_t *pkt, struct iphdr *scratch) } /* Parse the L4 ports from a packet, assuming a layout like TCP or UDP. */ -static bool pkt_parse_icmp_l4_ports(buf_t *pkt, flow_ports_t *ports) +static INLINING bool pkt_parse_icmp_l4_ports(buf_t *pkt, flow_ports_t *ports) { if (!buf_copy(pkt, ports, sizeof(*ports))) { return false; @@ -237,7 +243,7 @@ static bool pkt_parse_icmp_l4_ports(buf_t *pkt, flow_ports_t *ports) return true; } -static uint16_t pkt_checksum_fold(uint32_t csum) +static INLINING uint16_t pkt_checksum_fold(uint32_t csum) { /* The highest reasonable value for an IPv4 header * checksum requires two folds, so we just do that always. @@ -247,7 +253,7 @@ static uint16_t pkt_checksum_fold(uint32_t csum) return (uint16_t)~csum; } -static void pkt_ipv4_checksum(struct iphdr *iph) +static INLINING void pkt_ipv4_checksum(struct iphdr *iph) { iph->check = 0; @@ -268,10 +274,11 @@ static void pkt_ipv4_checksum(struct iphdr *iph) iph->check = pkt_checksum_fold(acc); } -static bool pkt_skip_ipv6_extension_headers(buf_t *pkt, - const struct ipv6hdr *ipv6, - uint8_t *upper_proto, - bool *is_fragment) +static INLINING +bool pkt_skip_ipv6_extension_headers(buf_t *pkt, + const struct ipv6hdr *ipv6, + uint8_t *upper_proto, + bool *is_fragment) { /* We understand five extension headers. * https://tools.ietf.org/html/rfc8200#section-4.1 states that all @@ -336,7 +343,7 @@ static bool pkt_skip_ipv6_extension_headers(buf_t *pkt, * scratch is allocated on the stack. However, this usage should be safe since * it's the callers stack after all. */ -static inline __attribute__((__always_inline__)) struct ipv6hdr * +static __always_inline struct ipv6hdr * pkt_parse_ipv6(buf_t *pkt, struct ipv6hdr *scratch, uint8_t *proto, bool *is_fragment) { @@ -354,20 +361,20 @@ pkt_parse_ipv6(buf_t *pkt, struct ipv6hdr *scratch, uint8_t *proto, /* Global metrics, per CPU */ -struct bpf_map_def metrics_map SEC("maps") = { - .type = BPF_MAP_TYPE_PERCPU_ARRAY, - .key_size = sizeof(unsigned int), - .value_size = sizeof(metrics_t), - .max_entries = 1, -}; - -static metrics_t *get_global_metrics(void) +struct { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __uint(max_entries, 1); + __type(key, unsigned int); + __type(value, metrics_t); +} metrics_map SEC(".maps"); + +static INLINING metrics_t *get_global_metrics(void) { uint64_t key = 0; return bpf_map_lookup_elem(&metrics_map, &key); } -static ret_t accept_locally(struct __sk_buff *skb, encap_headers_t *encap) +static INLINING ret_t accept_locally(struct __sk_buff *skb, encap_headers_t *encap) { const int payload_off = sizeof(*encap) + @@ -388,8 +395,8 @@ static ret_t accept_locally(struct __sk_buff *skb, encap_headers_t *encap) return bpf_redirect(skb->ifindex, BPF_F_INGRESS); } -static ret_t forward_with_gre(struct __sk_buff *skb, encap_headers_t *encap, - struct in_addr *next_hop, metrics_t *metrics) +static INLINING ret_t forward_with_gre(struct __sk_buff *skb, encap_headers_t *encap, + struct in_addr *next_hop, metrics_t *metrics) { metrics->forwarded_packets_total_gre++; @@ -509,8 +516,8 @@ static ret_t forward_with_gre(struct __sk_buff *skb, encap_headers_t *encap, return bpf_redirect(skb->ifindex, 0); } -static ret_t forward_to_next_hop(struct __sk_buff *skb, encap_headers_t *encap, - struct in_addr *next_hop, metrics_t *metrics) +static INLINING ret_t forward_to_next_hop(struct __sk_buff *skb, encap_headers_t *encap, + struct in_addr *next_hop, metrics_t *metrics) { /* swap L2 addresses */ /* This assumes that packets are received from a router. @@ -546,7 +553,7 @@ static ret_t forward_to_next_hop(struct __sk_buff *skb, encap_headers_t *encap, return bpf_redirect(skb->ifindex, 0); } -static ret_t skip_next_hops(buf_t *pkt, int n) +static INLINING ret_t skip_next_hops(buf_t *pkt, int n) { switch (n) { case 1: @@ -566,8 +573,8 @@ static ret_t skip_next_hops(buf_t *pkt, int n) * pkt is positioned just after the variable length GLB header * iff the call is successful. */ -static ret_t get_next_hop(buf_t *pkt, encap_headers_t *encap, - struct in_addr *next_hop) +static INLINING ret_t get_next_hop(buf_t *pkt, encap_headers_t *encap, + struct in_addr *next_hop) { if (encap->unigue.next_hop > encap->unigue.hop_count) { return TC_ACT_SHOT; @@ -601,8 +608,8 @@ static ret_t get_next_hop(buf_t *pkt, encap_headers_t *encap, * return value, and calling code works while still being "generic" to * IPv4 and IPv6. */ -static uint64_t fill_tuple(struct bpf_sock_tuple *tuple, void *iph, - uint64_t iphlen, uint16_t sport, uint16_t dport) +static INLINING uint64_t fill_tuple(struct bpf_sock_tuple *tuple, void *iph, + uint64_t iphlen, uint16_t sport, uint16_t dport) { switch (iphlen) { case sizeof(struct iphdr): { @@ -630,9 +637,9 @@ static uint64_t fill_tuple(struct bpf_sock_tuple *tuple, void *iph, } } -static verdict_t classify_tcp(struct __sk_buff *skb, - struct bpf_sock_tuple *tuple, uint64_t tuplen, - void *iph, struct tcphdr *tcp) +static INLINING verdict_t classify_tcp(struct __sk_buff *skb, + struct bpf_sock_tuple *tuple, uint64_t tuplen, + void *iph, struct tcphdr *tcp) { struct bpf_sock *sk = bpf_skc_lookup_tcp(skb, tuple, tuplen, BPF_F_CURRENT_NETNS, 0); @@ -663,8 +670,8 @@ static verdict_t classify_tcp(struct __sk_buff *skb, return UNKNOWN; } -static verdict_t classify_udp(struct __sk_buff *skb, - struct bpf_sock_tuple *tuple, uint64_t tuplen) +static INLINING verdict_t classify_udp(struct __sk_buff *skb, + struct bpf_sock_tuple *tuple, uint64_t tuplen) { struct bpf_sock *sk = bpf_sk_lookup_udp(skb, tuple, tuplen, BPF_F_CURRENT_NETNS, 0); @@ -681,9 +688,9 @@ static verdict_t classify_udp(struct __sk_buff *skb, return UNKNOWN; } -static verdict_t classify_icmp(struct __sk_buff *skb, uint8_t proto, - struct bpf_sock_tuple *tuple, uint64_t tuplen, - metrics_t *metrics) +static INLINING verdict_t classify_icmp(struct __sk_buff *skb, uint8_t proto, + struct bpf_sock_tuple *tuple, uint64_t tuplen, + metrics_t *metrics) { switch (proto) { case IPPROTO_TCP: @@ -698,7 +705,7 @@ static verdict_t classify_icmp(struct __sk_buff *skb, uint8_t proto, } } -static verdict_t process_icmpv4(buf_t *pkt, metrics_t *metrics) +static INLINING verdict_t process_icmpv4(buf_t *pkt, metrics_t *metrics) { struct icmphdr icmp; if (!buf_copy(pkt, &icmp, sizeof(icmp))) { @@ -745,7 +752,7 @@ static verdict_t process_icmpv4(buf_t *pkt, metrics_t *metrics) sizeof(tuple.ipv4), metrics); } -static verdict_t process_icmpv6(buf_t *pkt, metrics_t *metrics) +static INLINING verdict_t process_icmpv6(buf_t *pkt, metrics_t *metrics) { struct icmp6hdr icmp6; if (!buf_copy(pkt, &icmp6, sizeof(icmp6))) { @@ -797,8 +804,8 @@ static verdict_t process_icmpv6(buf_t *pkt, metrics_t *metrics) metrics); } -static verdict_t process_tcp(buf_t *pkt, void *iph, uint64_t iphlen, - metrics_t *metrics) +static INLINING verdict_t process_tcp(buf_t *pkt, void *iph, uint64_t iphlen, + metrics_t *metrics) { metrics->l4_protocol_packets_total_tcp++; @@ -819,8 +826,8 @@ static verdict_t process_tcp(buf_t *pkt, void *iph, uint64_t iphlen, return classify_tcp(pkt->skb, &tuple, tuplen, iph, tcp); } -static verdict_t process_udp(buf_t *pkt, void *iph, uint64_t iphlen, - metrics_t *metrics) +static INLINING verdict_t process_udp(buf_t *pkt, void *iph, uint64_t iphlen, + metrics_t *metrics) { metrics->l4_protocol_packets_total_udp++; @@ -837,7 +844,7 @@ static verdict_t process_udp(buf_t *pkt, void *iph, uint64_t iphlen, return classify_udp(pkt->skb, &tuple, tuplen); } -static verdict_t process_ipv4(buf_t *pkt, metrics_t *metrics) +static INLINING verdict_t process_ipv4(buf_t *pkt, metrics_t *metrics) { metrics->l3_protocol_packets_total_ipv4++; @@ -874,7 +881,7 @@ static verdict_t process_ipv4(buf_t *pkt, metrics_t *metrics) } } -static verdict_t process_ipv6(buf_t *pkt, metrics_t *metrics) +static INLINING verdict_t process_ipv6(buf_t *pkt, metrics_t *metrics) { metrics->l3_protocol_packets_total_ipv6++; diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect_subprogs.c b/tools/testing/selftests/bpf/progs/test_cls_redirect_subprogs.c new file mode 100644 index 000000000000..eed26b70e3a2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_cls_redirect_subprogs.c @@ -0,0 +1,2 @@ +#define SUBPROGS +#include "test_cls_redirect.c" diff --git a/tools/testing/selftests/bpf/progs/test_core_autosize.c b/tools/testing/selftests/bpf/progs/test_core_autosize.c new file mode 100644 index 000000000000..44f5aa2e8956 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_core_autosize.c @@ -0,0 +1,172 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ + +#include <linux/bpf.h> +#include <stdint.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> + +char _license[] SEC("license") = "GPL"; + +/* fields of exactly the same size */ +struct test_struct___samesize { + void *ptr; + unsigned long long val1; + unsigned int val2; + unsigned short val3; + unsigned char val4; +} __attribute((preserve_access_index)); + +/* unsigned fields that have to be downsized by libbpf */ +struct test_struct___downsize { + void *ptr; + unsigned long val1; + unsigned long val2; + unsigned long val3; + unsigned long val4; + /* total sz: 40 */ +} __attribute__((preserve_access_index)); + +/* fields with signed integers of wrong size, should be rejected */ +struct test_struct___signed { + void *ptr; + long val1; + long val2; + long val3; + long val4; +} __attribute((preserve_access_index)); + +/* real layout and sizes according to test's (32-bit) BTF */ +struct test_struct___real { + unsigned int ptr; /* can't use `void *`, it is always 8 byte in BPF target */ + unsigned int val2; + unsigned long long val1; + unsigned short val3; + unsigned char val4; + unsigned char _pad; + /* total sz: 20 */ +}; + +struct test_struct___real input = { + .ptr = 0x01020304, + .val1 = 0x1020304050607080, + .val2 = 0x0a0b0c0d, + .val3 = 0xfeed, + .val4 = 0xb9, + ._pad = 0xff, /* make sure no accidental zeros are present */ +}; + +unsigned long long ptr_samesized = 0; +unsigned long long val1_samesized = 0; +unsigned long long val2_samesized = 0; +unsigned long long val3_samesized = 0; +unsigned long long val4_samesized = 0; +struct test_struct___real output_samesized = {}; + +unsigned long long ptr_downsized = 0; +unsigned long long val1_downsized = 0; +unsigned long long val2_downsized = 0; +unsigned long long val3_downsized = 0; +unsigned long long val4_downsized = 0; +struct test_struct___real output_downsized = {}; + +unsigned long long ptr_probed = 0; +unsigned long long val1_probed = 0; +unsigned long long val2_probed = 0; +unsigned long long val3_probed = 0; +unsigned long long val4_probed = 0; + +unsigned long long ptr_signed = 0; +unsigned long long val1_signed = 0; +unsigned long long val2_signed = 0; +unsigned long long val3_signed = 0; +unsigned long long val4_signed = 0; +struct test_struct___real output_signed = {}; + +SEC("raw_tp/sys_exit") +int handle_samesize(void *ctx) +{ + struct test_struct___samesize *in = (void *)&input; + struct test_struct___samesize *out = (void *)&output_samesized; + + ptr_samesized = (unsigned long long)in->ptr; + val1_samesized = in->val1; + val2_samesized = in->val2; + val3_samesized = in->val3; + val4_samesized = in->val4; + + out->ptr = in->ptr; + out->val1 = in->val1; + out->val2 = in->val2; + out->val3 = in->val3; + out->val4 = in->val4; + + return 0; +} + +SEC("raw_tp/sys_exit") +int handle_downsize(void *ctx) +{ + struct test_struct___downsize *in = (void *)&input; + struct test_struct___downsize *out = (void *)&output_downsized; + + ptr_downsized = (unsigned long long)in->ptr; + val1_downsized = in->val1; + val2_downsized = in->val2; + val3_downsized = in->val3; + val4_downsized = in->val4; + + out->ptr = in->ptr; + out->val1 = in->val1; + out->val2 = in->val2; + out->val3 = in->val3; + out->val4 = in->val4; + + return 0; +} + +SEC("raw_tp/sys_enter") +int handle_probed(void *ctx) +{ + struct test_struct___downsize *in = (void *)&input; + __u64 tmp; + + tmp = 0; + bpf_core_read(&tmp, bpf_core_field_size(in->ptr), &in->ptr); + ptr_probed = tmp; + + tmp = 0; + bpf_core_read(&tmp, bpf_core_field_size(in->val1), &in->val1); + val1_probed = tmp; + + tmp = 0; + bpf_core_read(&tmp, bpf_core_field_size(in->val2), &in->val2); + val2_probed = tmp; + + tmp = 0; + bpf_core_read(&tmp, bpf_core_field_size(in->val3), &in->val3); + val3_probed = tmp; + + tmp = 0; + bpf_core_read(&tmp, bpf_core_field_size(in->val4), &in->val4); + val4_probed = tmp; + + return 0; +} + +SEC("raw_tp/sys_enter") +int handle_signed(void *ctx) +{ + struct test_struct___signed *in = (void *)&input; + struct test_struct___signed *out = (void *)&output_signed; + + val2_signed = in->val2; + val3_signed = in->val3; + val4_signed = in->val4; + + out->val2= in->val2; + out->val3= in->val3; + out->val4= in->val4; + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_enumval.c b/tools/testing/selftests/bpf/progs/test_core_reloc_enumval.c new file mode 100644 index 000000000000..e7ef3dada2bf --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_enumval.c @@ -0,0 +1,72 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Facebook + +#include <linux/bpf.h> +#include <stdint.h> +#include <stdbool.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> + +char _license[] SEC("license") = "GPL"; + +struct { + char in[256]; + char out[256]; + bool skip; +} data = {}; + +enum named_enum { + NAMED_ENUM_VAL1 = 1, + NAMED_ENUM_VAL2 = 2, + NAMED_ENUM_VAL3 = 3, +}; + +typedef enum { + ANON_ENUM_VAL1 = 0x10, + ANON_ENUM_VAL2 = 0x20, + ANON_ENUM_VAL3 = 0x30, +} anon_enum; + +struct core_reloc_enumval_output { + bool named_val1_exists; + bool named_val2_exists; + bool named_val3_exists; + bool anon_val1_exists; + bool anon_val2_exists; + bool anon_val3_exists; + + int named_val1; + int named_val2; + int anon_val1; + int anon_val2; +}; + +SEC("raw_tracepoint/sys_enter") +int test_core_enumval(void *ctx) +{ +#if __has_builtin(__builtin_preserve_enum_value) + struct core_reloc_enumval_output *out = (void *)&data.out; + enum named_enum named = 0; + anon_enum anon = 0; + + out->named_val1_exists = bpf_core_enum_value_exists(named, NAMED_ENUM_VAL1); + out->named_val2_exists = bpf_core_enum_value_exists(enum named_enum, NAMED_ENUM_VAL2); + out->named_val3_exists = bpf_core_enum_value_exists(enum named_enum, NAMED_ENUM_VAL3); + + out->anon_val1_exists = bpf_core_enum_value_exists(anon, ANON_ENUM_VAL1); + out->anon_val2_exists = bpf_core_enum_value_exists(anon_enum, ANON_ENUM_VAL2); + out->anon_val3_exists = bpf_core_enum_value_exists(anon_enum, ANON_ENUM_VAL3); + + out->named_val1 = bpf_core_enum_value(named, NAMED_ENUM_VAL1); + out->named_val2 = bpf_core_enum_value(named, NAMED_ENUM_VAL2); + /* NAMED_ENUM_VAL3 value is optional */ + + out->anon_val1 = bpf_core_enum_value(anon, ANON_ENUM_VAL1); + out->anon_val2 = bpf_core_enum_value(anon, ANON_ENUM_VAL2); + /* ANON_ENUM_VAL3 value is optional */ +#else + data.skip = true; +#endif + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c b/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c index aba928fd60d3..145028b52ad8 100644 --- a/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c @@ -3,6 +3,7 @@ #include <linux/bpf.h> #include <stdint.h> +#include <stdbool.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_core_read.h> @@ -11,6 +12,7 @@ char _license[] SEC("license") = "GPL"; struct { char in[256]; char out[256]; + bool skip; uint64_t my_pid_tgid; } data = {}; diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c b/tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c new file mode 100644 index 000000000000..fb60f8195c53 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c @@ -0,0 +1,110 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Facebook + +#include <linux/bpf.h> +#include <stdint.h> +#include <stdbool.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> + +char _license[] SEC("license") = "GPL"; + +struct { + char in[256]; + char out[256]; + bool skip; +} data = {}; + +struct a_struct { + int x; +}; + +union a_union { + int y; + int z; +}; + +typedef struct a_struct named_struct_typedef; + +typedef struct { int x, y, z; } anon_struct_typedef; + +typedef struct { + int a, b, c; +} *struct_ptr_typedef; + +enum an_enum { + AN_ENUM_VAL1 = 1, + AN_ENUM_VAL2 = 2, + AN_ENUM_VAL3 = 3, +}; + +typedef int int_typedef; + +typedef enum { TYPEDEF_ENUM_VAL1, TYPEDEF_ENUM_VAL2 } enum_typedef; + +typedef void *void_ptr_typedef; + +typedef int (*func_proto_typedef)(long); + +typedef char arr_typedef[20]; + +struct core_reloc_type_based_output { + bool struct_exists; + bool union_exists; + bool enum_exists; + bool typedef_named_struct_exists; + bool typedef_anon_struct_exists; + bool typedef_struct_ptr_exists; + bool typedef_int_exists; + bool typedef_enum_exists; + bool typedef_void_ptr_exists; + bool typedef_func_proto_exists; + bool typedef_arr_exists; + + int struct_sz; + int union_sz; + int enum_sz; + int typedef_named_struct_sz; + int typedef_anon_struct_sz; + int typedef_struct_ptr_sz; + int typedef_int_sz; + int typedef_enum_sz; + int typedef_void_ptr_sz; + int typedef_func_proto_sz; + int typedef_arr_sz; +}; + +SEC("raw_tracepoint/sys_enter") +int test_core_type_based(void *ctx) +{ +#if __has_builtin(__builtin_preserve_type_info) + struct core_reloc_type_based_output *out = (void *)&data.out; + + out->struct_exists = bpf_core_type_exists(struct a_struct); + out->union_exists = bpf_core_type_exists(union a_union); + out->enum_exists = bpf_core_type_exists(enum an_enum); + out->typedef_named_struct_exists = bpf_core_type_exists(named_struct_typedef); + out->typedef_anon_struct_exists = bpf_core_type_exists(anon_struct_typedef); + out->typedef_struct_ptr_exists = bpf_core_type_exists(struct_ptr_typedef); + out->typedef_int_exists = bpf_core_type_exists(int_typedef); + out->typedef_enum_exists = bpf_core_type_exists(enum_typedef); + out->typedef_void_ptr_exists = bpf_core_type_exists(void_ptr_typedef); + out->typedef_func_proto_exists = bpf_core_type_exists(func_proto_typedef); + out->typedef_arr_exists = bpf_core_type_exists(arr_typedef); + + out->struct_sz = bpf_core_type_size(struct a_struct); + out->union_sz = bpf_core_type_size(union a_union); + out->enum_sz = bpf_core_type_size(enum an_enum); + out->typedef_named_struct_sz = bpf_core_type_size(named_struct_typedef); + out->typedef_anon_struct_sz = bpf_core_type_size(anon_struct_typedef); + out->typedef_struct_ptr_sz = bpf_core_type_size(struct_ptr_typedef); + out->typedef_int_sz = bpf_core_type_size(int_typedef); + out->typedef_enum_sz = bpf_core_type_size(enum_typedef); + out->typedef_void_ptr_sz = bpf_core_type_size(void_ptr_typedef); + out->typedef_func_proto_sz = bpf_core_type_size(func_proto_typedef); + out->typedef_arr_sz = bpf_core_type_size(arr_typedef); +#else + data.skip = true; +#endif + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_type_id.c b/tools/testing/selftests/bpf/progs/test_core_reloc_type_id.c new file mode 100644 index 000000000000..22aba3f6e344 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_type_id.c @@ -0,0 +1,115 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Facebook + +#include <linux/bpf.h> +#include <stdint.h> +#include <stdbool.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> + +char _license[] SEC("license") = "GPL"; + +struct { + char in[256]; + char out[256]; + bool skip; +} data = {}; + +/* some types are shared with test_core_reloc_type_based.c */ +struct a_struct { + int x; +}; + +union a_union { + int y; + int z; +}; + +enum an_enum { + AN_ENUM_VAL1 = 1, + AN_ENUM_VAL2 = 2, + AN_ENUM_VAL3 = 3, +}; + +typedef struct a_struct named_struct_typedef; + +typedef int (*func_proto_typedef)(long); + +typedef char arr_typedef[20]; + +struct core_reloc_type_id_output { + int local_anon_struct; + int local_anon_union; + int local_anon_enum; + int local_anon_func_proto_ptr; + int local_anon_void_ptr; + int local_anon_arr; + + int local_struct; + int local_union; + int local_enum; + int local_int; + int local_struct_typedef; + int local_func_proto_typedef; + int local_arr_typedef; + + int targ_struct; + int targ_union; + int targ_enum; + int targ_int; + int targ_struct_typedef; + int targ_func_proto_typedef; + int targ_arr_typedef; +}; + +/* preserve types even if Clang doesn't support built-in */ +struct a_struct t1 = {}; +union a_union t2 = {}; +enum an_enum t3 = 0; +named_struct_typedef t4 = {}; +func_proto_typedef t5 = 0; +arr_typedef t6 = {}; + +SEC("raw_tracepoint/sys_enter") +int test_core_type_id(void *ctx) +{ + /* We use __builtin_btf_type_id() in this tests, but up until the time + * __builtin_preserve_type_info() was added it contained a bug that + * would make this test fail. The bug was fixed ([0]) with addition of + * __builtin_preserve_type_info(), though, so that's what we are using + * to detect whether this test has to be executed, however strange + * that might look like. + * + * [0] https://reviews.llvm.org/D85174 + */ +#if __has_builtin(__builtin_preserve_type_info) + struct core_reloc_type_id_output *out = (void *)&data.out; + + out->local_anon_struct = bpf_core_type_id_local(struct { int marker_field; }); + out->local_anon_union = bpf_core_type_id_local(union { int marker_field; }); + out->local_anon_enum = bpf_core_type_id_local(enum { MARKER_ENUM_VAL = 123 }); + out->local_anon_func_proto_ptr = bpf_core_type_id_local(_Bool(*)(int)); + out->local_anon_void_ptr = bpf_core_type_id_local(void *); + out->local_anon_arr = bpf_core_type_id_local(_Bool[47]); + + out->local_struct = bpf_core_type_id_local(struct a_struct); + out->local_union = bpf_core_type_id_local(union a_union); + out->local_enum = bpf_core_type_id_local(enum an_enum); + out->local_int = bpf_core_type_id_local(int); + out->local_struct_typedef = bpf_core_type_id_local(named_struct_typedef); + out->local_func_proto_typedef = bpf_core_type_id_local(func_proto_typedef); + out->local_arr_typedef = bpf_core_type_id_local(arr_typedef); + + out->targ_struct = bpf_core_type_id_kernel(struct a_struct); + out->targ_union = bpf_core_type_id_kernel(union a_union); + out->targ_enum = bpf_core_type_id_kernel(enum an_enum); + out->targ_int = bpf_core_type_id_kernel(int); + out->targ_struct_typedef = bpf_core_type_id_kernel(named_struct_typedef); + out->targ_func_proto_typedef = bpf_core_type_id_kernel(func_proto_typedef); + out->targ_arr_typedef = bpf_core_type_id_kernel(arr_typedef); +#else + data.skip = true; +#endif + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_d_path.c b/tools/testing/selftests/bpf/progs/test_d_path.c new file mode 100644 index 000000000000..84e1f883f97b --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_d_path.c @@ -0,0 +1,65 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +#define MAX_PATH_LEN 128 +#define MAX_FILES 7 + +pid_t my_pid = 0; +__u32 cnt_stat = 0; +__u32 cnt_close = 0; +char paths_stat[MAX_FILES][MAX_PATH_LEN] = {}; +char paths_close[MAX_FILES][MAX_PATH_LEN] = {}; +int rets_stat[MAX_FILES] = {}; +int rets_close[MAX_FILES] = {}; + +int called_stat = 0; +int called_close = 0; + +SEC("fentry/security_inode_getattr") +int BPF_PROG(prog_stat, struct path *path, struct kstat *stat, + __u32 request_mask, unsigned int query_flags) +{ + pid_t pid = bpf_get_current_pid_tgid() >> 32; + __u32 cnt = cnt_stat; + int ret; + + called_stat = 1; + + if (pid != my_pid) + return 0; + + if (cnt >= MAX_FILES) + return 0; + ret = bpf_d_path(path, paths_stat[cnt], MAX_PATH_LEN); + + rets_stat[cnt] = ret; + cnt_stat++; + return 0; +} + +SEC("fentry/filp_close") +int BPF_PROG(prog_close, struct file *file, void *id) +{ + pid_t pid = bpf_get_current_pid_tgid() >> 32; + __u32 cnt = cnt_close; + int ret; + + called_close = 1; + + if (pid != my_pid) + return 0; + + if (cnt >= MAX_FILES) + return 0; + ret = bpf_d_path(&file->f_path, + paths_close[cnt], MAX_PATH_LEN); + + rets_close[cnt] = ret; + cnt_close++; + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_ksyms_btf.c b/tools/testing/selftests/bpf/progs/test_ksyms_btf.c new file mode 100644 index 000000000000..bb8ea9270f29 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_ksyms_btf.c @@ -0,0 +1,55 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Google */ + +#include "vmlinux.h" + +#include <bpf/bpf_helpers.h> + +__u64 out__runqueues_addr = -1; +__u64 out__bpf_prog_active_addr = -1; + +__u32 out__rq_cpu = -1; /* percpu struct fields */ +int out__bpf_prog_active = -1; /* percpu int */ + +__u32 out__this_rq_cpu = -1; +int out__this_bpf_prog_active = -1; + +__u32 out__cpu_0_rq_cpu = -1; /* cpu_rq(0)->cpu */ + +extern const struct rq runqueues __ksym; /* struct type global var. */ +extern const int bpf_prog_active __ksym; /* int type global var. */ + +SEC("raw_tp/sys_enter") +int handler(const void *ctx) +{ + struct rq *rq; + int *active; + __u32 cpu; + + out__runqueues_addr = (__u64)&runqueues; + out__bpf_prog_active_addr = (__u64)&bpf_prog_active; + + cpu = bpf_get_smp_processor_id(); + + /* test bpf_per_cpu_ptr() */ + rq = (struct rq *)bpf_per_cpu_ptr(&runqueues, cpu); + if (rq) + out__rq_cpu = rq->cpu; + active = (int *)bpf_per_cpu_ptr(&bpf_prog_active, cpu); + if (active) + out__bpf_prog_active = *active; + + rq = (struct rq *)bpf_per_cpu_ptr(&runqueues, 0); + if (rq) /* should always be valid, but we can't spare the check. */ + out__cpu_0_rq_cpu = rq->cpu; + + /* test bpf_this_cpu_ptr */ + rq = (struct rq *)bpf_this_cpu_ptr(&runqueues); + out__this_rq_cpu = rq->cpu; + active = (int *)bpf_this_cpu_ptr(&bpf_prog_active); + out__this_bpf_prog_active = *active; + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c b/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c index 28351936a438..b9e2753f4f91 100644 --- a/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c +++ b/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c @@ -17,9 +17,7 @@ #include "test_iptunnel_common.h" #include <bpf/bpf_endian.h> -int _version SEC("version") = 1; - -static __u32 rol32(__u32 word, unsigned int shift) +static __always_inline __u32 rol32(__u32 word, unsigned int shift) { return (word << shift) | (word >> ((-shift) & 31)); } @@ -52,7 +50,7 @@ static __u32 rol32(__u32 word, unsigned int shift) typedef unsigned int u32; -static u32 jhash(const void *key, u32 length, u32 initval) +static __noinline u32 jhash(const void *key, u32 length, u32 initval) { u32 a, b, c; const unsigned char *k = key; @@ -88,7 +86,7 @@ static u32 jhash(const void *key, u32 length, u32 initval) return c; } -static u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval) +static __noinline u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval) { a += initval; b += initval; @@ -97,7 +95,7 @@ static u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval) return c; } -static u32 jhash_2words(u32 a, u32 b, u32 initval) +static __noinline u32 jhash_2words(u32 a, u32 b, u32 initval) { return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2)); } @@ -200,8 +198,7 @@ struct { __type(value, struct ctl_value); } ctl_array SEC(".maps"); -static __u32 get_packet_hash(struct packet_description *pckt, - bool ipv6) +static __noinline __u32 get_packet_hash(struct packet_description *pckt, bool ipv6) { if (ipv6) return jhash_2words(jhash(pckt->srcv6, 16, MAX_VIPS), @@ -210,10 +207,10 @@ static __u32 get_packet_hash(struct packet_description *pckt, return jhash_2words(pckt->src, pckt->ports, CH_RINGS_SIZE); } -static bool get_packet_dst(struct real_definition **real, - struct packet_description *pckt, - struct vip_meta *vip_info, - bool is_ipv6) +static __noinline bool get_packet_dst(struct real_definition **real, + struct packet_description *pckt, + struct vip_meta *vip_info, + bool is_ipv6) { __u32 hash = get_packet_hash(pckt, is_ipv6); __u32 key = RING_SIZE * vip_info->vip_num + hash % RING_SIZE; @@ -233,8 +230,8 @@ static bool get_packet_dst(struct real_definition **real, return true; } -static int parse_icmpv6(void *data, void *data_end, __u64 off, - struct packet_description *pckt) +static __noinline int parse_icmpv6(void *data, void *data_end, __u64 off, + struct packet_description *pckt) { struct icmp6hdr *icmp_hdr; struct ipv6hdr *ip6h; @@ -255,8 +252,8 @@ static int parse_icmpv6(void *data, void *data_end, __u64 off, return TC_ACT_UNSPEC; } -static int parse_icmp(void *data, void *data_end, __u64 off, - struct packet_description *pckt) +static __noinline int parse_icmp(void *data, void *data_end, __u64 off, + struct packet_description *pckt) { struct icmphdr *icmp_hdr; struct iphdr *iph; @@ -280,8 +277,8 @@ static int parse_icmp(void *data, void *data_end, __u64 off, return TC_ACT_UNSPEC; } -static bool parse_udp(void *data, __u64 off, void *data_end, - struct packet_description *pckt) +static __noinline bool parse_udp(void *data, __u64 off, void *data_end, + struct packet_description *pckt) { struct udphdr *udp; udp = data + off; @@ -299,8 +296,8 @@ static bool parse_udp(void *data, __u64 off, void *data_end, return true; } -static bool parse_tcp(void *data, __u64 off, void *data_end, - struct packet_description *pckt) +static __noinline bool parse_tcp(void *data, __u64 off, void *data_end, + struct packet_description *pckt) { struct tcphdr *tcp; @@ -321,8 +318,8 @@ static bool parse_tcp(void *data, __u64 off, void *data_end, return true; } -static int process_packet(void *data, __u64 off, void *data_end, - bool is_ipv6, struct __sk_buff *skb) +static __noinline int process_packet(void *data, __u64 off, void *data_end, + bool is_ipv6, struct __sk_buff *skb) { void *pkt_start = (void *)(long)skb->data; struct packet_description pckt = {}; diff --git a/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c b/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c new file mode 100644 index 000000000000..6077a025092c --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c @@ -0,0 +1,325 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ + +#include <stddef.h> +#include <errno.h> +#include <stdbool.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <linux/ipv6.h> +#include <linux/tcp.h> +#include <linux/socket.h> +#include <linux/bpf.h> +#include <linux/types.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> +#define BPF_PROG_TEST_TCP_HDR_OPTIONS +#include "test_tcp_hdr_options.h" + +__u16 last_addr16_n = __bpf_htons(1); +__u16 active_lport_n = 0; +__u16 active_lport_h = 0; +__u16 passive_lport_n = 0; +__u16 passive_lport_h = 0; + +/* options received at passive side */ +unsigned int nr_pure_ack = 0; +unsigned int nr_data = 0; +unsigned int nr_syn = 0; +unsigned int nr_fin = 0; + +/* Check the header received from the active side */ +static int __check_active_hdr_in(struct bpf_sock_ops *skops, bool check_syn) +{ + union { + struct tcphdr th; + struct ipv6hdr ip6; + struct tcp_exprm_opt exprm_opt; + struct tcp_opt reg_opt; + __u8 data[100]; /* IPv6 (40) + Max TCP hdr (60) */ + } hdr = {}; + __u64 load_flags = check_syn ? BPF_LOAD_HDR_OPT_TCP_SYN : 0; + struct tcphdr *pth; + int ret; + + hdr.reg_opt.kind = 0xB9; + + /* The option is 4 bytes long instead of 2 bytes */ + ret = bpf_load_hdr_opt(skops, &hdr.reg_opt, 2, load_flags); + if (ret != -ENOSPC) + RET_CG_ERR(ret); + + /* Test searching magic with regular kind */ + hdr.reg_opt.len = 4; + ret = bpf_load_hdr_opt(skops, &hdr.reg_opt, sizeof(hdr.reg_opt), + load_flags); + if (ret != -EINVAL) + RET_CG_ERR(ret); + + hdr.reg_opt.len = 0; + ret = bpf_load_hdr_opt(skops, &hdr.reg_opt, sizeof(hdr.reg_opt), + load_flags); + if (ret != 4 || hdr.reg_opt.len != 4 || hdr.reg_opt.kind != 0xB9 || + hdr.reg_opt.data[0] != 0xfa || hdr.reg_opt.data[1] != 0xce) + RET_CG_ERR(ret); + + /* Test searching experimental option with invalid kind length */ + hdr.exprm_opt.kind = TCPOPT_EXP; + hdr.exprm_opt.len = 5; + hdr.exprm_opt.magic = 0; + ret = bpf_load_hdr_opt(skops, &hdr.exprm_opt, sizeof(hdr.exprm_opt), + load_flags); + if (ret != -EINVAL) + RET_CG_ERR(ret); + + /* Test searching experimental option with 0 magic value */ + hdr.exprm_opt.len = 4; + ret = bpf_load_hdr_opt(skops, &hdr.exprm_opt, sizeof(hdr.exprm_opt), + load_flags); + if (ret != -ENOMSG) + RET_CG_ERR(ret); + + hdr.exprm_opt.magic = __bpf_htons(0xeB9F); + ret = bpf_load_hdr_opt(skops, &hdr.exprm_opt, sizeof(hdr.exprm_opt), + load_flags); + if (ret != 4 || hdr.exprm_opt.len != 4 || + hdr.exprm_opt.kind != TCPOPT_EXP || + hdr.exprm_opt.magic != __bpf_htons(0xeB9F)) + RET_CG_ERR(ret); + + if (!check_syn) + return CG_OK; + + /* Test loading from skops->syn_skb if sk_state == TCP_NEW_SYN_RECV + * + * Test loading from tp->saved_syn for other sk_state. + */ + ret = bpf_getsockopt(skops, SOL_TCP, TCP_BPF_SYN_IP, &hdr.ip6, + sizeof(hdr.ip6)); + if (ret != -ENOSPC) + RET_CG_ERR(ret); + + if (hdr.ip6.saddr.s6_addr16[7] != last_addr16_n || + hdr.ip6.daddr.s6_addr16[7] != last_addr16_n) + RET_CG_ERR(0); + + ret = bpf_getsockopt(skops, SOL_TCP, TCP_BPF_SYN_IP, &hdr, sizeof(hdr)); + if (ret < 0) + RET_CG_ERR(ret); + + pth = (struct tcphdr *)(&hdr.ip6 + 1); + if (pth->dest != passive_lport_n || pth->source != active_lport_n) + RET_CG_ERR(0); + + ret = bpf_getsockopt(skops, SOL_TCP, TCP_BPF_SYN, &hdr, sizeof(hdr)); + if (ret < 0) + RET_CG_ERR(ret); + + if (hdr.th.dest != passive_lport_n || hdr.th.source != active_lport_n) + RET_CG_ERR(0); + + return CG_OK; +} + +static int check_active_syn_in(struct bpf_sock_ops *skops) +{ + return __check_active_hdr_in(skops, true); +} + +static int check_active_hdr_in(struct bpf_sock_ops *skops) +{ + struct tcphdr *th; + + if (__check_active_hdr_in(skops, false) == CG_ERR) + return CG_ERR; + + th = skops->skb_data; + if (th + 1 > skops->skb_data_end) + RET_CG_ERR(0); + + if (tcp_hdrlen(th) < skops->skb_len) + nr_data++; + + if (th->fin) + nr_fin++; + + if (th->ack && !th->fin && tcp_hdrlen(th) == skops->skb_len) + nr_pure_ack++; + + return CG_OK; +} + +static int active_opt_len(struct bpf_sock_ops *skops) +{ + int err; + + /* Reserve more than enough to allow the -EEXIST test in + * the write_active_opt(). + */ + err = bpf_reserve_hdr_opt(skops, 12, 0); + if (err) + RET_CG_ERR(err); + + return CG_OK; +} + +static int write_active_opt(struct bpf_sock_ops *skops) +{ + struct tcp_exprm_opt exprm_opt = {}; + struct tcp_opt win_scale_opt = {}; + struct tcp_opt reg_opt = {}; + struct tcphdr *th; + int err, ret; + + exprm_opt.kind = TCPOPT_EXP; + exprm_opt.len = 4; + exprm_opt.magic = __bpf_htons(0xeB9F); + + reg_opt.kind = 0xB9; + reg_opt.len = 4; + reg_opt.data[0] = 0xfa; + reg_opt.data[1] = 0xce; + + win_scale_opt.kind = TCPOPT_WINDOW; + + err = bpf_store_hdr_opt(skops, &exprm_opt, sizeof(exprm_opt), 0); + if (err) + RET_CG_ERR(err); + + /* Store the same exprm option */ + err = bpf_store_hdr_opt(skops, &exprm_opt, sizeof(exprm_opt), 0); + if (err != -EEXIST) + RET_CG_ERR(err); + + err = bpf_store_hdr_opt(skops, ®_opt, sizeof(reg_opt), 0); + if (err) + RET_CG_ERR(err); + err = bpf_store_hdr_opt(skops, ®_opt, sizeof(reg_opt), 0); + if (err != -EEXIST) + RET_CG_ERR(err); + + /* Check the option has been written and can be searched */ + ret = bpf_load_hdr_opt(skops, &exprm_opt, sizeof(exprm_opt), 0); + if (ret != 4 || exprm_opt.len != 4 || exprm_opt.kind != TCPOPT_EXP || + exprm_opt.magic != __bpf_htons(0xeB9F)) + RET_CG_ERR(ret); + + reg_opt.len = 0; + ret = bpf_load_hdr_opt(skops, ®_opt, sizeof(reg_opt), 0); + if (ret != 4 || reg_opt.len != 4 || reg_opt.kind != 0xB9 || + reg_opt.data[0] != 0xfa || reg_opt.data[1] != 0xce) + RET_CG_ERR(ret); + + th = skops->skb_data; + if (th + 1 > skops->skb_data_end) + RET_CG_ERR(0); + + if (th->syn) { + active_lport_h = skops->local_port; + active_lport_n = th->source; + + /* Search the win scale option written by kernel + * in the SYN packet. + */ + ret = bpf_load_hdr_opt(skops, &win_scale_opt, + sizeof(win_scale_opt), 0); + if (ret != 3 || win_scale_opt.len != 3 || + win_scale_opt.kind != TCPOPT_WINDOW) + RET_CG_ERR(ret); + + /* Write the win scale option that kernel + * has already written. + */ + err = bpf_store_hdr_opt(skops, &win_scale_opt, + sizeof(win_scale_opt), 0); + if (err != -EEXIST) + RET_CG_ERR(err); + } + + return CG_OK; +} + +static int handle_hdr_opt_len(struct bpf_sock_ops *skops) +{ + __u8 tcp_flags = skops_tcp_flags(skops); + + if ((tcp_flags & TCPHDR_SYNACK) == TCPHDR_SYNACK) + /* Check the SYN from bpf_sock_ops_kern->syn_skb */ + return check_active_syn_in(skops); + + /* Passive side should have cleared the write hdr cb by now */ + if (skops->local_port == passive_lport_h) + RET_CG_ERR(0); + + return active_opt_len(skops); +} + +static int handle_write_hdr_opt(struct bpf_sock_ops *skops) +{ + if (skops->local_port == passive_lport_h) + RET_CG_ERR(0); + + return write_active_opt(skops); +} + +static int handle_parse_hdr(struct bpf_sock_ops *skops) +{ + /* Passive side is not writing any non-standard/unknown + * option, so the active side should never be called. + */ + if (skops->local_port == active_lport_h) + RET_CG_ERR(0); + + return check_active_hdr_in(skops); +} + +static int handle_passive_estab(struct bpf_sock_ops *skops) +{ + int err; + + /* No more write hdr cb */ + bpf_sock_ops_cb_flags_set(skops, + skops->bpf_sock_ops_cb_flags & + ~BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG); + + /* Recheck the SYN but check the tp->saved_syn this time */ + err = check_active_syn_in(skops); + if (err == CG_ERR) + return err; + + nr_syn++; + + /* The ack has header option written by the active side also */ + return check_active_hdr_in(skops); +} + +SEC("sockops/misc_estab") +int misc_estab(struct bpf_sock_ops *skops) +{ + int true_val = 1; + + switch (skops->op) { + case BPF_SOCK_OPS_TCP_LISTEN_CB: + passive_lport_h = skops->local_port; + passive_lport_n = __bpf_htons(passive_lport_h); + bpf_setsockopt(skops, SOL_TCP, TCP_SAVE_SYN, + &true_val, sizeof(true_val)); + set_hdr_cb_flags(skops, 0); + break; + case BPF_SOCK_OPS_TCP_CONNECT_CB: + set_hdr_cb_flags(skops, 0); + break; + case BPF_SOCK_OPS_PARSE_HDR_OPT_CB: + return handle_parse_hdr(skops); + case BPF_SOCK_OPS_HDR_OPT_LEN_CB: + return handle_hdr_opt_len(skops); + case BPF_SOCK_OPS_WRITE_HDR_OPT_CB: + return handle_write_hdr_opt(skops); + case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: + return handle_passive_estab(skops); + } + + return CG_OK; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_overhead.c b/tools/testing/selftests/bpf/progs/test_overhead.c index 42403d088abc..abb7344b531f 100644 --- a/tools/testing/selftests/bpf/progs/test_overhead.c +++ b/tools/testing/selftests/bpf/progs/test_overhead.c @@ -39,10 +39,4 @@ int BPF_PROG(prog5, struct task_struct *tsk, const char *buf, bool exec) return 0; } -SEC("fmod_ret/__set_task_comm") -int BPF_PROG(prog6, struct task_struct *tsk, const char *buf, bool exec) -{ - return !tsk; -} - char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_pe_preserve_elems.c b/tools/testing/selftests/bpf/progs/test_pe_preserve_elems.c new file mode 100644 index 000000000000..fb22de7c365d --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_pe_preserve_elems.c @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Facebook +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +struct { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __uint(max_entries, 1); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(int)); +} array_1 SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __uint(max_entries, 1); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(int)); + __uint(map_flags, BPF_F_PRESERVE_ELEMS); +} array_2 SEC(".maps"); + +SEC("raw_tp/sched_switch") +int BPF_PROG(read_array_1) +{ + struct bpf_perf_event_value val; + + return bpf_perf_event_read_value(&array_1, 0, &val, sizeof(val)); +} + +SEC("raw_tp/task_rename") +int BPF_PROG(read_array_2) +{ + struct bpf_perf_event_value val; + + return bpf_perf_event_read_value(&array_2, 0, &val, sizeof(val)); +} + +char LICENSE[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_pkt_access.c b/tools/testing/selftests/bpf/progs/test_pkt_access.c index e72eba4a93d2..852051064507 100644 --- a/tools/testing/selftests/bpf/progs/test_pkt_access.c +++ b/tools/testing/selftests/bpf/progs/test_pkt_access.c @@ -79,6 +79,24 @@ int get_skb_ifindex(int val, struct __sk_buff *skb, int var) return skb->ifindex * val * var; } +__attribute__ ((noinline)) +int test_pkt_write_access_subprog(struct __sk_buff *skb, __u32 off) +{ + void *data = (void *)(long)skb->data; + void *data_end = (void *)(long)skb->data_end; + struct tcphdr *tcp = NULL; + + if (off > sizeof(struct ethhdr) + sizeof(struct ipv6hdr)) + return -1; + + tcp = data + off; + if (tcp + 1 > data_end) + return -1; + /* make modification to the packet data */ + tcp->check++; + return 0; +} + SEC("classifier/test_pkt_access") int test_pkt_access(struct __sk_buff *skb) { @@ -117,6 +135,8 @@ int test_pkt_access(struct __sk_buff *skb) if (test_pkt_access_subprog3(3, skb) != skb->len * 3 * skb->ifindex) return TC_ACT_SHOT; if (tcp) { + if (test_pkt_write_access_subprog(skb, (void *)tcp - data)) + return TC_ACT_SHOT; if (((void *)(tcp) + 20) > data_end || proto != 6) return TC_ACT_SHOT; barrier(); /* to force ordering of checks */ diff --git a/tools/testing/selftests/bpf/progs/test_raw_tp_test_run.c b/tools/testing/selftests/bpf/progs/test_raw_tp_test_run.c new file mode 100644 index 000000000000..4c63cc87b9d0 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_raw_tp_test_run.c @@ -0,0 +1,24 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +__u32 count = 0; +__u32 on_cpu = 0xffffffff; + +SEC("raw_tp/task_rename") +int BPF_PROG(rename, struct task_struct *task, char *comm) +{ + + count++; + if ((__u64) task == 0x1234ULL && (__u64) comm == 0x5678ULL) { + on_cpu = bpf_get_smp_processor_id(); + return (long)task + (long)comm; + } + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_sk_lookup.c b/tools/testing/selftests/bpf/progs/test_sk_lookup.c index bbf8296f4d66..1032b292af5b 100644 --- a/tools/testing/selftests/bpf/progs/test_sk_lookup.c +++ b/tools/testing/selftests/bpf/progs/test_sk_lookup.c @@ -19,6 +19,17 @@ #define IP6(aaaa, bbbb, cccc, dddd) \ { bpf_htonl(aaaa), bpf_htonl(bbbb), bpf_htonl(cccc), bpf_htonl(dddd) } +/* Macros for least-significant byte and word accesses. */ +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#define LSE_INDEX(index, size) (index) +#else +#define LSE_INDEX(index, size) ((size) - (index) - 1) +#endif +#define LSB(value, index) \ + (((__u8 *)&(value))[LSE_INDEX((index), sizeof(value))]) +#define LSW(value, index) \ + (((__u16 *)&(value))[LSE_INDEX((index), sizeof(value) / 2)]) + #define MAX_SOCKS 32 struct { @@ -369,171 +380,146 @@ int ctx_narrow_access(struct bpf_sk_lookup *ctx) { struct bpf_sock *sk; int err, family; - __u16 *half; - __u8 *byte; bool v4; v4 = (ctx->family == AF_INET); /* Narrow loads from family field */ - byte = (__u8 *)&ctx->family; - half = (__u16 *)&ctx->family; - if (byte[0] != (v4 ? AF_INET : AF_INET6) || - byte[1] != 0 || byte[2] != 0 || byte[3] != 0) + if (LSB(ctx->family, 0) != (v4 ? AF_INET : AF_INET6) || + LSB(ctx->family, 1) != 0 || LSB(ctx->family, 2) != 0 || LSB(ctx->family, 3) != 0) return SK_DROP; - if (half[0] != (v4 ? AF_INET : AF_INET6)) + if (LSW(ctx->family, 0) != (v4 ? AF_INET : AF_INET6)) return SK_DROP; - byte = (__u8 *)&ctx->protocol; - if (byte[0] != IPPROTO_TCP || - byte[1] != 0 || byte[2] != 0 || byte[3] != 0) + /* Narrow loads from protocol field */ + if (LSB(ctx->protocol, 0) != IPPROTO_TCP || + LSB(ctx->protocol, 1) != 0 || LSB(ctx->protocol, 2) != 0 || LSB(ctx->protocol, 3) != 0) return SK_DROP; - half = (__u16 *)&ctx->protocol; - if (half[0] != IPPROTO_TCP) + if (LSW(ctx->protocol, 0) != IPPROTO_TCP) return SK_DROP; /* Narrow loads from remote_port field. Expect non-0 value. */ - byte = (__u8 *)&ctx->remote_port; - if (byte[0] == 0 && byte[1] == 0 && byte[2] == 0 && byte[3] == 0) + if (LSB(ctx->remote_port, 0) == 0 && LSB(ctx->remote_port, 1) == 0 && + LSB(ctx->remote_port, 2) == 0 && LSB(ctx->remote_port, 3) == 0) return SK_DROP; - half = (__u16 *)&ctx->remote_port; - if (half[0] == 0) + if (LSW(ctx->remote_port, 0) == 0) return SK_DROP; /* Narrow loads from local_port field. Expect DST_PORT. */ - byte = (__u8 *)&ctx->local_port; - if (byte[0] != ((DST_PORT >> 0) & 0xff) || - byte[1] != ((DST_PORT >> 8) & 0xff) || - byte[2] != 0 || byte[3] != 0) + if (LSB(ctx->local_port, 0) != ((DST_PORT >> 0) & 0xff) || + LSB(ctx->local_port, 1) != ((DST_PORT >> 8) & 0xff) || + LSB(ctx->local_port, 2) != 0 || LSB(ctx->local_port, 3) != 0) return SK_DROP; - half = (__u16 *)&ctx->local_port; - if (half[0] != DST_PORT) + if (LSW(ctx->local_port, 0) != DST_PORT) return SK_DROP; /* Narrow loads from IPv4 fields */ if (v4) { /* Expect non-0.0.0.0 in remote_ip4 */ - byte = (__u8 *)&ctx->remote_ip4; - if (byte[0] == 0 && byte[1] == 0 && - byte[2] == 0 && byte[3] == 0) + if (LSB(ctx->remote_ip4, 0) == 0 && LSB(ctx->remote_ip4, 1) == 0 && + LSB(ctx->remote_ip4, 2) == 0 && LSB(ctx->remote_ip4, 3) == 0) return SK_DROP; - half = (__u16 *)&ctx->remote_ip4; - if (half[0] == 0 && half[1] == 0) + if (LSW(ctx->remote_ip4, 0) == 0 && LSW(ctx->remote_ip4, 1) == 0) return SK_DROP; /* Expect DST_IP4 in local_ip4 */ - byte = (__u8 *)&ctx->local_ip4; - if (byte[0] != ((DST_IP4 >> 0) & 0xff) || - byte[1] != ((DST_IP4 >> 8) & 0xff) || - byte[2] != ((DST_IP4 >> 16) & 0xff) || - byte[3] != ((DST_IP4 >> 24) & 0xff)) + if (LSB(ctx->local_ip4, 0) != ((DST_IP4 >> 0) & 0xff) || + LSB(ctx->local_ip4, 1) != ((DST_IP4 >> 8) & 0xff) || + LSB(ctx->local_ip4, 2) != ((DST_IP4 >> 16) & 0xff) || + LSB(ctx->local_ip4, 3) != ((DST_IP4 >> 24) & 0xff)) return SK_DROP; - half = (__u16 *)&ctx->local_ip4; - if (half[0] != ((DST_IP4 >> 0) & 0xffff) || - half[1] != ((DST_IP4 >> 16) & 0xffff)) + if (LSW(ctx->local_ip4, 0) != ((DST_IP4 >> 0) & 0xffff) || + LSW(ctx->local_ip4, 1) != ((DST_IP4 >> 16) & 0xffff)) return SK_DROP; } else { /* Expect 0.0.0.0 IPs when family != AF_INET */ - byte = (__u8 *)&ctx->remote_ip4; - if (byte[0] != 0 || byte[1] != 0 && - byte[2] != 0 || byte[3] != 0) + if (LSB(ctx->remote_ip4, 0) != 0 || LSB(ctx->remote_ip4, 1) != 0 || + LSB(ctx->remote_ip4, 2) != 0 || LSB(ctx->remote_ip4, 3) != 0) return SK_DROP; - half = (__u16 *)&ctx->remote_ip4; - if (half[0] != 0 || half[1] != 0) + if (LSW(ctx->remote_ip4, 0) != 0 || LSW(ctx->remote_ip4, 1) != 0) return SK_DROP; - byte = (__u8 *)&ctx->local_ip4; - if (byte[0] != 0 || byte[1] != 0 && - byte[2] != 0 || byte[3] != 0) + if (LSB(ctx->local_ip4, 0) != 0 || LSB(ctx->local_ip4, 1) != 0 || + LSB(ctx->local_ip4, 2) != 0 || LSB(ctx->local_ip4, 3) != 0) return SK_DROP; - half = (__u16 *)&ctx->local_ip4; - if (half[0] != 0 || half[1] != 0) + if (LSW(ctx->local_ip4, 0) != 0 || LSW(ctx->local_ip4, 1) != 0) return SK_DROP; } /* Narrow loads from IPv6 fields */ if (!v4) { - /* Expenct non-:: IP in remote_ip6 */ - byte = (__u8 *)&ctx->remote_ip6; - if (byte[0] == 0 && byte[1] == 0 && - byte[2] == 0 && byte[3] == 0 && - byte[4] == 0 && byte[5] == 0 && - byte[6] == 0 && byte[7] == 0 && - byte[8] == 0 && byte[9] == 0 && - byte[10] == 0 && byte[11] == 0 && - byte[12] == 0 && byte[13] == 0 && - byte[14] == 0 && byte[15] == 0) + /* Expect non-:: IP in remote_ip6 */ + if (LSB(ctx->remote_ip6[0], 0) == 0 && LSB(ctx->remote_ip6[0], 1) == 0 && + LSB(ctx->remote_ip6[0], 2) == 0 && LSB(ctx->remote_ip6[0], 3) == 0 && + LSB(ctx->remote_ip6[1], 0) == 0 && LSB(ctx->remote_ip6[1], 1) == 0 && + LSB(ctx->remote_ip6[1], 2) == 0 && LSB(ctx->remote_ip6[1], 3) == 0 && + LSB(ctx->remote_ip6[2], 0) == 0 && LSB(ctx->remote_ip6[2], 1) == 0 && + LSB(ctx->remote_ip6[2], 2) == 0 && LSB(ctx->remote_ip6[2], 3) == 0 && + LSB(ctx->remote_ip6[3], 0) == 0 && LSB(ctx->remote_ip6[3], 1) == 0 && + LSB(ctx->remote_ip6[3], 2) == 0 && LSB(ctx->remote_ip6[3], 3) == 0) return SK_DROP; - half = (__u16 *)&ctx->remote_ip6; - if (half[0] == 0 && half[1] == 0 && - half[2] == 0 && half[3] == 0 && - half[4] == 0 && half[5] == 0 && - half[6] == 0 && half[7] == 0) + if (LSW(ctx->remote_ip6[0], 0) == 0 && LSW(ctx->remote_ip6[0], 1) == 0 && + LSW(ctx->remote_ip6[1], 0) == 0 && LSW(ctx->remote_ip6[1], 1) == 0 && + LSW(ctx->remote_ip6[2], 0) == 0 && LSW(ctx->remote_ip6[2], 1) == 0 && + LSW(ctx->remote_ip6[3], 0) == 0 && LSW(ctx->remote_ip6[3], 1) == 0) return SK_DROP; - /* Expect DST_IP6 in local_ip6 */ - byte = (__u8 *)&ctx->local_ip6; - if (byte[0] != ((DST_IP6[0] >> 0) & 0xff) || - byte[1] != ((DST_IP6[0] >> 8) & 0xff) || - byte[2] != ((DST_IP6[0] >> 16) & 0xff) || - byte[3] != ((DST_IP6[0] >> 24) & 0xff) || - byte[4] != ((DST_IP6[1] >> 0) & 0xff) || - byte[5] != ((DST_IP6[1] >> 8) & 0xff) || - byte[6] != ((DST_IP6[1] >> 16) & 0xff) || - byte[7] != ((DST_IP6[1] >> 24) & 0xff) || - byte[8] != ((DST_IP6[2] >> 0) & 0xff) || - byte[9] != ((DST_IP6[2] >> 8) & 0xff) || - byte[10] != ((DST_IP6[2] >> 16) & 0xff) || - byte[11] != ((DST_IP6[2] >> 24) & 0xff) || - byte[12] != ((DST_IP6[3] >> 0) & 0xff) || - byte[13] != ((DST_IP6[3] >> 8) & 0xff) || - byte[14] != ((DST_IP6[3] >> 16) & 0xff) || - byte[15] != ((DST_IP6[3] >> 24) & 0xff)) + if (LSB(ctx->local_ip6[0], 0) != ((DST_IP6[0] >> 0) & 0xff) || + LSB(ctx->local_ip6[0], 1) != ((DST_IP6[0] >> 8) & 0xff) || + LSB(ctx->local_ip6[0], 2) != ((DST_IP6[0] >> 16) & 0xff) || + LSB(ctx->local_ip6[0], 3) != ((DST_IP6[0] >> 24) & 0xff) || + LSB(ctx->local_ip6[1], 0) != ((DST_IP6[1] >> 0) & 0xff) || + LSB(ctx->local_ip6[1], 1) != ((DST_IP6[1] >> 8) & 0xff) || + LSB(ctx->local_ip6[1], 2) != ((DST_IP6[1] >> 16) & 0xff) || + LSB(ctx->local_ip6[1], 3) != ((DST_IP6[1] >> 24) & 0xff) || + LSB(ctx->local_ip6[2], 0) != ((DST_IP6[2] >> 0) & 0xff) || + LSB(ctx->local_ip6[2], 1) != ((DST_IP6[2] >> 8) & 0xff) || + LSB(ctx->local_ip6[2], 2) != ((DST_IP6[2] >> 16) & 0xff) || + LSB(ctx->local_ip6[2], 3) != ((DST_IP6[2] >> 24) & 0xff) || + LSB(ctx->local_ip6[3], 0) != ((DST_IP6[3] >> 0) & 0xff) || + LSB(ctx->local_ip6[3], 1) != ((DST_IP6[3] >> 8) & 0xff) || + LSB(ctx->local_ip6[3], 2) != ((DST_IP6[3] >> 16) & 0xff) || + LSB(ctx->local_ip6[3], 3) != ((DST_IP6[3] >> 24) & 0xff)) return SK_DROP; - half = (__u16 *)&ctx->local_ip6; - if (half[0] != ((DST_IP6[0] >> 0) & 0xffff) || - half[1] != ((DST_IP6[0] >> 16) & 0xffff) || - half[2] != ((DST_IP6[1] >> 0) & 0xffff) || - half[3] != ((DST_IP6[1] >> 16) & 0xffff) || - half[4] != ((DST_IP6[2] >> 0) & 0xffff) || - half[5] != ((DST_IP6[2] >> 16) & 0xffff) || - half[6] != ((DST_IP6[3] >> 0) & 0xffff) || - half[7] != ((DST_IP6[3] >> 16) & 0xffff)) + if (LSW(ctx->local_ip6[0], 0) != ((DST_IP6[0] >> 0) & 0xffff) || + LSW(ctx->local_ip6[0], 1) != ((DST_IP6[0] >> 16) & 0xffff) || + LSW(ctx->local_ip6[1], 0) != ((DST_IP6[1] >> 0) & 0xffff) || + LSW(ctx->local_ip6[1], 1) != ((DST_IP6[1] >> 16) & 0xffff) || + LSW(ctx->local_ip6[2], 0) != ((DST_IP6[2] >> 0) & 0xffff) || + LSW(ctx->local_ip6[2], 1) != ((DST_IP6[2] >> 16) & 0xffff) || + LSW(ctx->local_ip6[3], 0) != ((DST_IP6[3] >> 0) & 0xffff) || + LSW(ctx->local_ip6[3], 1) != ((DST_IP6[3] >> 16) & 0xffff)) return SK_DROP; } else { /* Expect :: IPs when family != AF_INET6 */ - byte = (__u8 *)&ctx->remote_ip6; - if (byte[0] != 0 || byte[1] != 0 || - byte[2] != 0 || byte[3] != 0 || - byte[4] != 0 || byte[5] != 0 || - byte[6] != 0 || byte[7] != 0 || - byte[8] != 0 || byte[9] != 0 || - byte[10] != 0 || byte[11] != 0 || - byte[12] != 0 || byte[13] != 0 || - byte[14] != 0 || byte[15] != 0) + if (LSB(ctx->remote_ip6[0], 0) != 0 || LSB(ctx->remote_ip6[0], 1) != 0 || + LSB(ctx->remote_ip6[0], 2) != 0 || LSB(ctx->remote_ip6[0], 3) != 0 || + LSB(ctx->remote_ip6[1], 0) != 0 || LSB(ctx->remote_ip6[1], 1) != 0 || + LSB(ctx->remote_ip6[1], 2) != 0 || LSB(ctx->remote_ip6[1], 3) != 0 || + LSB(ctx->remote_ip6[2], 0) != 0 || LSB(ctx->remote_ip6[2], 1) != 0 || + LSB(ctx->remote_ip6[2], 2) != 0 || LSB(ctx->remote_ip6[2], 3) != 0 || + LSB(ctx->remote_ip6[3], 0) != 0 || LSB(ctx->remote_ip6[3], 1) != 0 || + LSB(ctx->remote_ip6[3], 2) != 0 || LSB(ctx->remote_ip6[3], 3) != 0) return SK_DROP; - half = (__u16 *)&ctx->remote_ip6; - if (half[0] != 0 || half[1] != 0 || - half[2] != 0 || half[3] != 0 || - half[4] != 0 || half[5] != 0 || - half[6] != 0 || half[7] != 0) + if (LSW(ctx->remote_ip6[0], 0) != 0 || LSW(ctx->remote_ip6[0], 1) != 0 || + LSW(ctx->remote_ip6[1], 0) != 0 || LSW(ctx->remote_ip6[1], 1) != 0 || + LSW(ctx->remote_ip6[2], 0) != 0 || LSW(ctx->remote_ip6[2], 1) != 0 || + LSW(ctx->remote_ip6[3], 0) != 0 || LSW(ctx->remote_ip6[3], 1) != 0) return SK_DROP; - byte = (__u8 *)&ctx->local_ip6; - if (byte[0] != 0 || byte[1] != 0 || - byte[2] != 0 || byte[3] != 0 || - byte[4] != 0 || byte[5] != 0 || - byte[6] != 0 || byte[7] != 0 || - byte[8] != 0 || byte[9] != 0 || - byte[10] != 0 || byte[11] != 0 || - byte[12] != 0 || byte[13] != 0 || - byte[14] != 0 || byte[15] != 0) + if (LSB(ctx->local_ip6[0], 0) != 0 || LSB(ctx->local_ip6[0], 1) != 0 || + LSB(ctx->local_ip6[0], 2) != 0 || LSB(ctx->local_ip6[0], 3) != 0 || + LSB(ctx->local_ip6[1], 0) != 0 || LSB(ctx->local_ip6[1], 1) != 0 || + LSB(ctx->local_ip6[1], 2) != 0 || LSB(ctx->local_ip6[1], 3) != 0 || + LSB(ctx->local_ip6[2], 0) != 0 || LSB(ctx->local_ip6[2], 1) != 0 || + LSB(ctx->local_ip6[2], 2) != 0 || LSB(ctx->local_ip6[2], 3) != 0 || + LSB(ctx->local_ip6[3], 0) != 0 || LSB(ctx->local_ip6[3], 1) != 0 || + LSB(ctx->local_ip6[3], 2) != 0 || LSB(ctx->local_ip6[3], 3) != 0) return SK_DROP; - half = (__u16 *)&ctx->local_ip6; - if (half[0] != 0 || half[1] != 0 || - half[2] != 0 || half[3] != 0 || - half[4] != 0 || half[5] != 0 || - half[6] != 0 || half[7] != 0) + if (LSW(ctx->remote_ip6[0], 0) != 0 || LSW(ctx->remote_ip6[0], 1) != 0 || + LSW(ctx->remote_ip6[1], 0) != 0 || LSW(ctx->remote_ip6[1], 1) != 0 || + LSW(ctx->remote_ip6[2], 0) != 0 || LSW(ctx->remote_ip6[2], 1) != 0 || + LSW(ctx->remote_ip6[3], 0) != 0 || LSW(ctx->remote_ip6[3], 1) != 0) return SK_DROP; } diff --git a/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c b/tools/testing/selftests/bpf/progs/test_sock_fields.c index 9bcaa37f476a..81b57b9aaaea 100644 --- a/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c +++ b/tools/testing/selftests/bpf/progs/test_sock_fields.c @@ -7,19 +7,7 @@ #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> - -enum bpf_addr_array_idx { - ADDR_SRV_IDX, - ADDR_CLI_IDX, - __NR_BPF_ADDR_ARRAY_IDX, -}; - -enum bpf_result_array_idx { - EGRESS_SRV_IDX, - EGRESS_CLI_IDX, - INGRESS_LISTEN_IDX, - __NR_BPF_RESULT_ARRAY_IDX, -}; +#include "bpf_tcp_helpers.h" enum bpf_linum_array_idx { EGRESS_LINUM_IDX, @@ -29,27 +17,6 @@ enum bpf_linum_array_idx { struct { __uint(type, BPF_MAP_TYPE_ARRAY); - __uint(max_entries, __NR_BPF_ADDR_ARRAY_IDX); - __type(key, __u32); - __type(value, struct sockaddr_in6); -} addr_map SEC(".maps"); - -struct { - __uint(type, BPF_MAP_TYPE_ARRAY); - __uint(max_entries, __NR_BPF_RESULT_ARRAY_IDX); - __type(key, __u32); - __type(value, struct bpf_sock); -} sock_result_map SEC(".maps"); - -struct { - __uint(type, BPF_MAP_TYPE_ARRAY); - __uint(max_entries, __NR_BPF_RESULT_ARRAY_IDX); - __type(key, __u32); - __type(value, struct bpf_tcp_sock); -} tcp_sock_result_map SEC(".maps"); - -struct { - __uint(type, BPF_MAP_TYPE_ARRAY); __uint(max_entries, __NR_BPF_LINUM_ARRAY_IDX); __type(key, __u32); __type(value, __u32); @@ -74,6 +41,17 @@ struct { __type(value, struct bpf_spinlock_cnt); } sk_pkt_out_cnt10 SEC(".maps"); +struct bpf_tcp_sock listen_tp = {}; +struct sockaddr_in6 srv_sa6 = {}; +struct bpf_tcp_sock cli_tp = {}; +struct bpf_tcp_sock srv_tp = {}; +struct bpf_sock listen_sk = {}; +struct bpf_sock srv_sk = {}; +struct bpf_sock cli_sk = {}; +__u64 parent_cg_id = 0; +__u64 child_cg_id = 0; +__u64 lsndtime = 0; + static bool is_loopback6(__u32 *a6) { return !a6[0] && !a6[1] && !a6[2] && a6[3] == bpf_htonl(1); @@ -130,62 +108,86 @@ static void tpcpy(struct bpf_tcp_sock *dst, dst->bytes_acked = src->bytes_acked; } -#define RETURN { \ +/* Always return CG_OK so that no pkt will be filtered out */ +#define CG_OK 1 + +#define RET_LOG() ({ \ linum = __LINE__; \ - bpf_map_update_elem(&linum_map, &linum_idx, &linum, 0); \ - return 1; \ -} + bpf_map_update_elem(&linum_map, &linum_idx, &linum, BPF_NOEXIST); \ + return CG_OK; \ +}) SEC("cgroup_skb/egress") int egress_read_sock_fields(struct __sk_buff *skb) { struct bpf_spinlock_cnt cli_cnt_init = { .lock = 0, .cnt = 0xeB9F }; - __u32 srv_idx = ADDR_SRV_IDX, cli_idx = ADDR_CLI_IDX, result_idx; struct bpf_spinlock_cnt *pkt_out_cnt, *pkt_out_cnt10; - struct sockaddr_in6 *srv_sa6, *cli_sa6; struct bpf_tcp_sock *tp, *tp_ret; struct bpf_sock *sk, *sk_ret; __u32 linum, linum_idx; + struct tcp_sock *ktp; linum_idx = EGRESS_LINUM_IDX; sk = skb->sk; - if (!sk || sk->state == 10) - RETURN; + if (!sk) + RET_LOG(); + /* Not the testing egress traffic or + * TCP_LISTEN (10) socket will be copied at the ingress side. + */ + if (sk->family != AF_INET6 || !is_loopback6(sk->src_ip6) || + sk->state == 10) + return CG_OK; + + if (sk->src_port == bpf_ntohs(srv_sa6.sin6_port)) { + /* Server socket */ + sk_ret = &srv_sk; + tp_ret = &srv_tp; + } else if (sk->dst_port == srv_sa6.sin6_port) { + /* Client socket */ + sk_ret = &cli_sk; + tp_ret = &cli_tp; + } else { + /* Not the testing egress traffic */ + return CG_OK; + } + + /* It must be a fullsock for cgroup_skb/egress prog */ sk = bpf_sk_fullsock(sk); - if (!sk || sk->family != AF_INET6 || sk->protocol != IPPROTO_TCP || - !is_loopback6(sk->src_ip6)) - RETURN; + if (!sk) + RET_LOG(); + + /* Not the testing egress traffic */ + if (sk->protocol != IPPROTO_TCP) + return CG_OK; tp = bpf_tcp_sock(sk); if (!tp) - RETURN; + RET_LOG(); - srv_sa6 = bpf_map_lookup_elem(&addr_map, &srv_idx); - cli_sa6 = bpf_map_lookup_elem(&addr_map, &cli_idx); - if (!srv_sa6 || !cli_sa6) - RETURN; + skcpy(sk_ret, sk); + tpcpy(tp_ret, tp); - if (sk->src_port == bpf_ntohs(srv_sa6->sin6_port)) - result_idx = EGRESS_SRV_IDX; - else if (sk->src_port == bpf_ntohs(cli_sa6->sin6_port)) - result_idx = EGRESS_CLI_IDX; - else - RETURN; + if (sk_ret == &srv_sk) { + ktp = bpf_skc_to_tcp_sock(sk); - sk_ret = bpf_map_lookup_elem(&sock_result_map, &result_idx); - tp_ret = bpf_map_lookup_elem(&tcp_sock_result_map, &result_idx); - if (!sk_ret || !tp_ret) - RETURN; + if (!ktp) + RET_LOG(); - skcpy(sk_ret, sk); - tpcpy(tp_ret, tp); + lsndtime = ktp->lsndtime; + + child_cg_id = bpf_sk_cgroup_id(ktp); + if (!child_cg_id) + RET_LOG(); + + parent_cg_id = bpf_sk_ancestor_cgroup_id(ktp, 2); + if (!parent_cg_id) + RET_LOG(); - if (result_idx == EGRESS_SRV_IDX) { /* The userspace has created it for srv sk */ - pkt_out_cnt = bpf_sk_storage_get(&sk_pkt_out_cnt, sk, 0, 0); - pkt_out_cnt10 = bpf_sk_storage_get(&sk_pkt_out_cnt10, sk, + pkt_out_cnt = bpf_sk_storage_get(&sk_pkt_out_cnt, ktp, 0, 0); + pkt_out_cnt10 = bpf_sk_storage_get(&sk_pkt_out_cnt10, ktp, 0, 0); } else { pkt_out_cnt = bpf_sk_storage_get(&sk_pkt_out_cnt, sk, @@ -197,7 +199,7 @@ int egress_read_sock_fields(struct __sk_buff *skb) } if (!pkt_out_cnt || !pkt_out_cnt10) - RETURN; + RET_LOG(); /* Even both cnt and cnt10 have lock defined in their BTF, * intentionally one cnt takes lock while one does not @@ -208,48 +210,44 @@ int egress_read_sock_fields(struct __sk_buff *skb) pkt_out_cnt10->cnt += 10; bpf_spin_unlock(&pkt_out_cnt10->lock); - RETURN; + return CG_OK; } SEC("cgroup_skb/ingress") int ingress_read_sock_fields(struct __sk_buff *skb) { - __u32 srv_idx = ADDR_SRV_IDX, result_idx = INGRESS_LISTEN_IDX; - struct bpf_tcp_sock *tp, *tp_ret; - struct bpf_sock *sk, *sk_ret; - struct sockaddr_in6 *srv_sa6; + struct bpf_tcp_sock *tp; __u32 linum, linum_idx; + struct bpf_sock *sk; linum_idx = INGRESS_LINUM_IDX; sk = skb->sk; - if (!sk || sk->family != AF_INET6 || !is_loopback6(sk->src_ip6)) - RETURN; + if (!sk) + RET_LOG(); - srv_sa6 = bpf_map_lookup_elem(&addr_map, &srv_idx); - if (!srv_sa6 || sk->src_port != bpf_ntohs(srv_sa6->sin6_port)) - RETURN; + /* Not the testing ingress traffic to the server */ + if (sk->family != AF_INET6 || !is_loopback6(sk->src_ip6) || + sk->src_port != bpf_ntohs(srv_sa6.sin6_port)) + return CG_OK; - if (sk->state != 10 && sk->state != 12) - RETURN; + /* Only interested in TCP_LISTEN */ + if (sk->state != 10) + return CG_OK; - sk = bpf_get_listener_sock(sk); + /* It must be a fullsock for cgroup_skb/ingress prog */ + sk = bpf_sk_fullsock(sk); if (!sk) - RETURN; + RET_LOG(); tp = bpf_tcp_sock(sk); if (!tp) - RETURN; - - sk_ret = bpf_map_lookup_elem(&sock_result_map, &result_idx); - tp_ret = bpf_map_lookup_elem(&tcp_sock_result_map, &result_idx); - if (!sk_ret || !tp_ret) - RETURN; + RET_LOG(); - skcpy(sk_ret, sk); - tpcpy(tp_ret, tp); + skcpy(&listen_sk, sk); + tpcpy(&listen_tp, tp); - RETURN; + return CG_OK; } char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_invalid_update.c b/tools/testing/selftests/bpf/progs/test_sockmap_invalid_update.c new file mode 100644 index 000000000000..02a59e220cbc --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_sockmap_invalid_update.c @@ -0,0 +1,23 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Cloudflare +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> + +struct { + __uint(type, BPF_MAP_TYPE_SOCKMAP); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u64); +} map SEC(".maps"); + +SEC("sockops") +int bpf_sockmap(struct bpf_sock_ops *skops) +{ + __u32 key = 0; + + if (skops->sk) + bpf_map_update_elem(&map, &key, skops->sk, 0); + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_kern.h b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h index 3dca4c2e2418..1858435de7aa 100644 --- a/tools/testing/selftests/bpf/progs/test_sockmap_kern.h +++ b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h @@ -131,39 +131,55 @@ int bpf_prog2(struct __sk_buff *skb) } -SEC("sk_skb3") -int bpf_prog3(struct __sk_buff *skb) +static inline void bpf_write_pass(struct __sk_buff *skb, int offset) { - const int one = 1; - int err, *f, ret = SK_PASS; + int err = bpf_skb_pull_data(skb, 6 + offset); void *data_end; char *c; - err = bpf_skb_pull_data(skb, 19); if (err) - goto tls_out; + return; c = (char *)(long)skb->data; data_end = (void *)(long)skb->data_end; - if (c + 18 < data_end) - memcpy(&c[13], "PASS", 4); + if (c + 5 + offset < data_end) + memcpy(c + offset, "PASS", 4); +} + +SEC("sk_skb3") +int bpf_prog3(struct __sk_buff *skb) +{ + int err, *f, ret = SK_PASS; + const int one = 1; + f = bpf_map_lookup_elem(&sock_skb_opts, &one); if (f && *f) { __u64 flags = 0; ret = 0; flags = *f; + + err = bpf_skb_adjust_room(skb, -13, 0, 0); + if (err) + return SK_DROP; + err = bpf_skb_adjust_room(skb, 4, 0, 0); + if (err) + return SK_DROP; + bpf_write_pass(skb, 0); #ifdef SOCKMAP return bpf_sk_redirect_map(skb, &tls_sock_map, ret, flags); #else return bpf_sk_redirect_hash(skb, &tls_sock_map, &ret, flags); #endif } - f = bpf_map_lookup_elem(&sock_skb_opts, &one); if (f && *f) ret = SK_DROP; + err = bpf_skb_adjust_room(skb, 4, 0, 0); + if (err) + return SK_DROP; + bpf_write_pass(skb, 13); tls_out: return ret; } diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_update.c b/tools/testing/selftests/bpf/progs/test_sockmap_update.c new file mode 100644 index 000000000000..9d0c9f28cab2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_sockmap_update.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2020 Cloudflare +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> + +struct { + __uint(type, BPF_MAP_TYPE_SOCKMAP); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u64); +} src SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_SOCKMAP); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u64); +} dst_sock_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_SOCKHASH); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u64); +} dst_sock_hash SEC(".maps"); + +SEC("classifier/copy_sock_map") +int copy_sock_map(void *ctx) +{ + struct bpf_sock *sk; + bool failed = false; + __u32 key = 0; + + sk = bpf_map_lookup_elem(&src, &key); + if (!sk) + return SK_DROP; + + if (bpf_map_update_elem(&dst_sock_map, &key, sk, 0)) + failed = true; + + if (bpf_map_update_elem(&dst_sock_hash, &key, sk, 0)) + failed = true; + + bpf_sk_release(sk); + return failed ? SK_DROP : SK_PASS; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_subprogs.c b/tools/testing/selftests/bpf/progs/test_subprogs.c new file mode 100644 index 000000000000..d3c5673c0218 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_subprogs.c @@ -0,0 +1,103 @@ +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> + +const char LICENSE[] SEC("license") = "GPL"; + +__noinline int sub1(int x) +{ + return x + 1; +} + +static __noinline int sub5(int v); + +__noinline int sub2(int y) +{ + return sub5(y + 2); +} + +static __noinline int sub3(int z) +{ + return z + 3 + sub1(4); +} + +static __noinline int sub4(int w) +{ + return w + sub3(5) + sub1(6); +} + +/* sub5() is an identitify function, just to test weirder functions layout and + * call patterns + */ +static __noinline int sub5(int v) +{ + return sub1(v) - 1; /* compensates sub1()'s + 1 */ +} + +/* unfortunately verifier rejects `struct task_struct *t` as an unkown pointer + * type, so we need to accept pointer as integer and then cast it inside the + * function + */ +__noinline int get_task_tgid(uintptr_t t) +{ + /* this ensures that CO-RE relocs work in multi-subprogs .text */ + return BPF_CORE_READ((struct task_struct *)(void *)t, tgid); +} + +int res1 = 0; +int res2 = 0; +int res3 = 0; +int res4 = 0; + +SEC("raw_tp/sys_enter") +int prog1(void *ctx) +{ + /* perform some CO-RE relocations to ensure they work with multi-prog + * sections correctly + */ + struct task_struct *t = (void *)bpf_get_current_task(); + + if (!BPF_CORE_READ(t, pid) || !get_task_tgid((uintptr_t)t)) + return 1; + + res1 = sub1(1) + sub3(2); /* (1 + 1) + (2 + 3 + (4 + 1)) = 12 */ + return 0; +} + +SEC("raw_tp/sys_exit") +int prog2(void *ctx) +{ + struct task_struct *t = (void *)bpf_get_current_task(); + + if (!BPF_CORE_READ(t, pid) || !get_task_tgid((uintptr_t)t)) + return 1; + + res2 = sub2(3) + sub3(4); /* (3 + 2) + (4 + 3 + (4 + 1)) = 17 */ + return 0; +} + +/* prog3 has the same section name as prog1 */ +SEC("raw_tp/sys_enter") +int prog3(void *ctx) +{ + struct task_struct *t = (void *)bpf_get_current_task(); + + if (!BPF_CORE_READ(t, pid) || !get_task_tgid((uintptr_t)t)) + return 1; + + res3 = sub3(5) + 6; /* (5 + 3 + (4 + 1)) + 6 = 19 */ + return 0; +} + +/* prog4 has the same section name as prog2 */ +SEC("raw_tp/sys_exit") +int prog4(void *ctx) +{ + struct task_struct *t = (void *)bpf_get_current_task(); + + if (!BPF_CORE_READ(t, pid) || !get_task_tgid((uintptr_t)t)) + return 1; + + res4 = sub4(7) + sub1(8); /* (7 + (5 + 3 + (4 + 1)) + (6 + 1)) + (8 + 1) = 36 */ + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c b/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c index 458b0d69133e..553a282d816a 100644 --- a/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c +++ b/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c @@ -18,11 +18,11 @@ #define MAX_ULONG_STR_LEN 7 #define MAX_VALUE_STR_LEN (TCP_MEM_LOOPS * MAX_ULONG_STR_LEN) +const char tcp_mem_name[] = "net/ipv4/tcp_mem/very_very_very_very_long_pointless_string"; static __always_inline int is_tcp_mem(struct bpf_sysctl *ctx) { - volatile char tcp_mem_name[] = "net/ipv4/tcp_mem/very_very_very_very_long_pointless_string"; unsigned char i; - char name[64]; + char name[sizeof(tcp_mem_name)]; int ret; memset(name, 0, sizeof(name)); diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c b/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c index b2e6f9b0894d..2b64bc563a12 100644 --- a/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c +++ b/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c @@ -18,11 +18,11 @@ #define MAX_ULONG_STR_LEN 7 #define MAX_VALUE_STR_LEN (TCP_MEM_LOOPS * MAX_ULONG_STR_LEN) +const char tcp_mem_name[] = "net/ipv4/tcp_mem/very_very_very_very_long_pointless_string_to_stress_byte_loop"; static __attribute__((noinline)) int is_tcp_mem(struct bpf_sysctl *ctx) { - volatile char tcp_mem_name[] = "net/ipv4/tcp_mem/very_very_very_very_long_pointless_string_to_stress_byte_loop"; unsigned char i; - char name[64]; + char name[sizeof(tcp_mem_name)]; int ret; memset(name, 0, sizeof(name)); diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_prog.c b/tools/testing/selftests/bpf/progs/test_sysctl_prog.c index 50525235380e..5489823c83fc 100644 --- a/tools/testing/selftests/bpf/progs/test_sysctl_prog.c +++ b/tools/testing/selftests/bpf/progs/test_sysctl_prog.c @@ -19,11 +19,11 @@ #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) #endif +const char tcp_mem_name[] = "net/ipv4/tcp_mem"; static __always_inline int is_tcp_mem(struct bpf_sysctl *ctx) { - char tcp_mem_name[] = "net/ipv4/tcp_mem"; unsigned char i; - char name[64]; + char name[sizeof(tcp_mem_name)]; int ret; memset(name, 0, sizeof(name)); diff --git a/tools/testing/selftests/bpf/progs/test_tc_neigh.c b/tools/testing/selftests/bpf/progs/test_tc_neigh.c new file mode 100644 index 000000000000..fe182616b112 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_tc_neigh.c @@ -0,0 +1,148 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <stdint.h> +#include <stdbool.h> + +#include <linux/bpf.h> +#include <linux/stddef.h> +#include <linux/pkt_cls.h> +#include <linux/if_ether.h> +#include <linux/in.h> +#include <linux/ip.h> +#include <linux/ipv6.h> + +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> + +#ifndef ctx_ptr +# define ctx_ptr(field) (void *)(long)(field) +#endif + +#define ip4_src 0xac100164 /* 172.16.1.100 */ +#define ip4_dst 0xac100264 /* 172.16.2.100 */ + +#define ip6_src { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ + 0x00, 0x01, 0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe } +#define ip6_dst { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \ + 0x00, 0x02, 0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe } + +#ifndef v6_equal +# define v6_equal(a, b) (a.s6_addr32[0] == b.s6_addr32[0] && \ + a.s6_addr32[1] == b.s6_addr32[1] && \ + a.s6_addr32[2] == b.s6_addr32[2] && \ + a.s6_addr32[3] == b.s6_addr32[3]) +#endif + +enum { + dev_src, + dev_dst, +}; + +struct bpf_map_def SEC("maps") ifindex_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(int), + .value_size = sizeof(int), + .max_entries = 2, +}; + +static __always_inline bool is_remote_ep_v4(struct __sk_buff *skb, + __be32 addr) +{ + void *data_end = ctx_ptr(skb->data_end); + void *data = ctx_ptr(skb->data); + struct iphdr *ip4h; + + if (data + sizeof(struct ethhdr) > data_end) + return false; + + ip4h = (struct iphdr *)(data + sizeof(struct ethhdr)); + if ((void *)(ip4h + 1) > data_end) + return false; + + return ip4h->daddr == addr; +} + +static __always_inline bool is_remote_ep_v6(struct __sk_buff *skb, + struct in6_addr addr) +{ + void *data_end = ctx_ptr(skb->data_end); + void *data = ctx_ptr(skb->data); + struct ipv6hdr *ip6h; + + if (data + sizeof(struct ethhdr) > data_end) + return false; + + ip6h = (struct ipv6hdr *)(data + sizeof(struct ethhdr)); + if ((void *)(ip6h + 1) > data_end) + return false; + + return v6_equal(ip6h->daddr, addr); +} + +static __always_inline int get_dev_ifindex(int which) +{ + int *ifindex = bpf_map_lookup_elem(&ifindex_map, &which); + + return ifindex ? *ifindex : 0; +} + +SEC("chk_egress") int tc_chk(struct __sk_buff *skb) +{ + void *data_end = ctx_ptr(skb->data_end); + void *data = ctx_ptr(skb->data); + __u32 *raw = data; + + if (data + sizeof(struct ethhdr) > data_end) + return TC_ACT_SHOT; + + return !raw[0] && !raw[1] && !raw[2] ? TC_ACT_SHOT : TC_ACT_OK; +} + +SEC("dst_ingress") int tc_dst(struct __sk_buff *skb) +{ + __u8 zero[ETH_ALEN * 2]; + bool redirect = false; + + switch (skb->protocol) { + case __bpf_constant_htons(ETH_P_IP): + redirect = is_remote_ep_v4(skb, __bpf_constant_htonl(ip4_src)); + break; + case __bpf_constant_htons(ETH_P_IPV6): + redirect = is_remote_ep_v6(skb, (struct in6_addr)ip6_src); + break; + } + + if (!redirect) + return TC_ACT_OK; + + __builtin_memset(&zero, 0, sizeof(zero)); + if (bpf_skb_store_bytes(skb, 0, &zero, sizeof(zero), 0) < 0) + return TC_ACT_SHOT; + + return bpf_redirect_neigh(get_dev_ifindex(dev_src), 0); +} + +SEC("src_ingress") int tc_src(struct __sk_buff *skb) +{ + __u8 zero[ETH_ALEN * 2]; + bool redirect = false; + + switch (skb->protocol) { + case __bpf_constant_htons(ETH_P_IP): + redirect = is_remote_ep_v4(skb, __bpf_constant_htonl(ip4_dst)); + break; + case __bpf_constant_htons(ETH_P_IPV6): + redirect = is_remote_ep_v6(skb, (struct in6_addr)ip6_dst); + break; + } + + if (!redirect) + return TC_ACT_OK; + + __builtin_memset(&zero, 0, sizeof(zero)); + if (bpf_skb_store_bytes(skb, 0, &zero, sizeof(zero), 0) < 0) + return TC_ACT_SHOT; + + return bpf_redirect_neigh(get_dev_ifindex(dev_dst), 0); +} + +char __license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_tc_peer.c b/tools/testing/selftests/bpf/progs/test_tc_peer.c new file mode 100644 index 000000000000..fc84a7685aa2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_tc_peer.c @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <stdint.h> +#include <stdbool.h> + +#include <linux/bpf.h> +#include <linux/stddef.h> +#include <linux/pkt_cls.h> + +#include <bpf/bpf_helpers.h> + +enum { + dev_src, + dev_dst, +}; + +struct bpf_map_def SEC("maps") ifindex_map = { + .type = BPF_MAP_TYPE_ARRAY, + .key_size = sizeof(int), + .value_size = sizeof(int), + .max_entries = 2, +}; + +static __always_inline int get_dev_ifindex(int which) +{ + int *ifindex = bpf_map_lookup_elem(&ifindex_map, &which); + + return ifindex ? *ifindex : 0; +} + +SEC("chk_egress") int tc_chk(struct __sk_buff *skb) +{ + return TC_ACT_SHOT; +} + +SEC("dst_ingress") int tc_dst(struct __sk_buff *skb) +{ + return bpf_redirect_peer(get_dev_ifindex(dev_src), 0); +} + +SEC("src_ingress") int tc_src(struct __sk_buff *skb) +{ + return bpf_redirect_peer(get_dev_ifindex(dev_dst), 0); +} + +char __license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c b/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c new file mode 100644 index 000000000000..678bd0fad29e --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c @@ -0,0 +1,626 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ + +#include <stddef.h> +#include <errno.h> +#include <stdbool.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <linux/tcp.h> +#include <linux/socket.h> +#include <linux/bpf.h> +#include <linux/types.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> +#define BPF_PROG_TEST_TCP_HDR_OPTIONS +#include "test_tcp_hdr_options.h" + +#ifndef sizeof_field +#define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER)) +#endif + +__u8 test_kind = TCPOPT_EXP; +__u16 test_magic = 0xeB9F; +__u32 inherit_cb_flags = 0; + +struct bpf_test_option passive_synack_out = {}; +struct bpf_test_option passive_fin_out = {}; + +struct bpf_test_option passive_estab_in = {}; +struct bpf_test_option passive_fin_in = {}; + +struct bpf_test_option active_syn_out = {}; +struct bpf_test_option active_fin_out = {}; + +struct bpf_test_option active_estab_in = {}; +struct bpf_test_option active_fin_in = {}; + +struct { + __uint(type, BPF_MAP_TYPE_SK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct hdr_stg); +} hdr_stg_map SEC(".maps"); + +static bool skops_want_cookie(const struct bpf_sock_ops *skops) +{ + return skops->args[0] == BPF_WRITE_HDR_TCP_SYNACK_COOKIE; +} + +static bool skops_current_mss(const struct bpf_sock_ops *skops) +{ + return skops->args[0] == BPF_WRITE_HDR_TCP_CURRENT_MSS; +} + +static __u8 option_total_len(__u8 flags) +{ + __u8 i, len = 1; /* +1 for flags */ + + if (!flags) + return 0; + + /* RESEND bit does not use a byte */ + for (i = OPTION_RESEND + 1; i < __NR_OPTION_FLAGS; i++) + len += !!TEST_OPTION_FLAGS(flags, i); + + if (test_kind == TCPOPT_EXP) + return len + TCP_BPF_EXPOPT_BASE_LEN; + else + return len + 2; /* +1 kind, +1 kind-len */ +} + +static void write_test_option(const struct bpf_test_option *test_opt, + __u8 *data) +{ + __u8 offset = 0; + + data[offset++] = test_opt->flags; + if (TEST_OPTION_FLAGS(test_opt->flags, OPTION_MAX_DELACK_MS)) + data[offset++] = test_opt->max_delack_ms; + + if (TEST_OPTION_FLAGS(test_opt->flags, OPTION_RAND)) + data[offset++] = test_opt->rand; +} + +static int store_option(struct bpf_sock_ops *skops, + const struct bpf_test_option *test_opt) +{ + union { + struct tcp_exprm_opt exprm; + struct tcp_opt regular; + } write_opt; + int err; + + if (test_kind == TCPOPT_EXP) { + write_opt.exprm.kind = TCPOPT_EXP; + write_opt.exprm.len = option_total_len(test_opt->flags); + write_opt.exprm.magic = __bpf_htons(test_magic); + write_opt.exprm.data32 = 0; + write_test_option(test_opt, write_opt.exprm.data); + err = bpf_store_hdr_opt(skops, &write_opt.exprm, + sizeof(write_opt.exprm), 0); + } else { + write_opt.regular.kind = test_kind; + write_opt.regular.len = option_total_len(test_opt->flags); + write_opt.regular.data32 = 0; + write_test_option(test_opt, write_opt.regular.data); + err = bpf_store_hdr_opt(skops, &write_opt.regular, + sizeof(write_opt.regular), 0); + } + + if (err) + RET_CG_ERR(err); + + return CG_OK; +} + +static int parse_test_option(struct bpf_test_option *opt, const __u8 *start) +{ + opt->flags = *start++; + + if (TEST_OPTION_FLAGS(opt->flags, OPTION_MAX_DELACK_MS)) + opt->max_delack_ms = *start++; + + if (TEST_OPTION_FLAGS(opt->flags, OPTION_RAND)) + opt->rand = *start++; + + return 0; +} + +static int load_option(struct bpf_sock_ops *skops, + struct bpf_test_option *test_opt, bool from_syn) +{ + union { + struct tcp_exprm_opt exprm; + struct tcp_opt regular; + } search_opt; + int ret, load_flags = from_syn ? BPF_LOAD_HDR_OPT_TCP_SYN : 0; + + if (test_kind == TCPOPT_EXP) { + search_opt.exprm.kind = TCPOPT_EXP; + search_opt.exprm.len = 4; + search_opt.exprm.magic = __bpf_htons(test_magic); + search_opt.exprm.data32 = 0; + ret = bpf_load_hdr_opt(skops, &search_opt.exprm, + sizeof(search_opt.exprm), load_flags); + if (ret < 0) + return ret; + return parse_test_option(test_opt, search_opt.exprm.data); + } else { + search_opt.regular.kind = test_kind; + search_opt.regular.len = 0; + search_opt.regular.data32 = 0; + ret = bpf_load_hdr_opt(skops, &search_opt.regular, + sizeof(search_opt.regular), load_flags); + if (ret < 0) + return ret; + return parse_test_option(test_opt, search_opt.regular.data); + } +} + +static int synack_opt_len(struct bpf_sock_ops *skops) +{ + struct bpf_test_option test_opt = {}; + __u8 optlen; + int err; + + if (!passive_synack_out.flags) + return CG_OK; + + err = load_option(skops, &test_opt, true); + + /* bpf_test_option is not found */ + if (err == -ENOMSG) + return CG_OK; + + if (err) + RET_CG_ERR(err); + + optlen = option_total_len(passive_synack_out.flags); + if (optlen) { + err = bpf_reserve_hdr_opt(skops, optlen, 0); + if (err) + RET_CG_ERR(err); + } + + return CG_OK; +} + +static int write_synack_opt(struct bpf_sock_ops *skops) +{ + struct bpf_test_option opt; + + if (!passive_synack_out.flags) + /* We should not even be called since no header + * space has been reserved. + */ + RET_CG_ERR(0); + + opt = passive_synack_out; + if (skops_want_cookie(skops)) + SET_OPTION_FLAGS(opt.flags, OPTION_RESEND); + + return store_option(skops, &opt); +} + +static int syn_opt_len(struct bpf_sock_ops *skops) +{ + __u8 optlen; + int err; + + if (!active_syn_out.flags) + return CG_OK; + + optlen = option_total_len(active_syn_out.flags); + if (optlen) { + err = bpf_reserve_hdr_opt(skops, optlen, 0); + if (err) + RET_CG_ERR(err); + } + + return CG_OK; +} + +static int write_syn_opt(struct bpf_sock_ops *skops) +{ + if (!active_syn_out.flags) + RET_CG_ERR(0); + + return store_option(skops, &active_syn_out); +} + +static int fin_opt_len(struct bpf_sock_ops *skops) +{ + struct bpf_test_option *opt; + struct hdr_stg *hdr_stg; + __u8 optlen; + int err; + + if (!skops->sk) + RET_CG_ERR(0); + + hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0); + if (!hdr_stg) + RET_CG_ERR(0); + + if (hdr_stg->active) + opt = &active_fin_out; + else + opt = &passive_fin_out; + + optlen = option_total_len(opt->flags); + if (optlen) { + err = bpf_reserve_hdr_opt(skops, optlen, 0); + if (err) + RET_CG_ERR(err); + } + + return CG_OK; +} + +static int write_fin_opt(struct bpf_sock_ops *skops) +{ + struct bpf_test_option *opt; + struct hdr_stg *hdr_stg; + + if (!skops->sk) + RET_CG_ERR(0); + + hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0); + if (!hdr_stg) + RET_CG_ERR(0); + + if (hdr_stg->active) + opt = &active_fin_out; + else + opt = &passive_fin_out; + + if (!opt->flags) + RET_CG_ERR(0); + + return store_option(skops, opt); +} + +static int resend_in_ack(struct bpf_sock_ops *skops) +{ + struct hdr_stg *hdr_stg; + + if (!skops->sk) + return -1; + + hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0); + if (!hdr_stg) + return -1; + + return !!hdr_stg->resend_syn; +} + +static int nodata_opt_len(struct bpf_sock_ops *skops) +{ + int resend; + + resend = resend_in_ack(skops); + if (resend < 0) + RET_CG_ERR(0); + + if (resend) + return syn_opt_len(skops); + + return CG_OK; +} + +static int write_nodata_opt(struct bpf_sock_ops *skops) +{ + int resend; + + resend = resend_in_ack(skops); + if (resend < 0) + RET_CG_ERR(0); + + if (resend) + return write_syn_opt(skops); + + return CG_OK; +} + +static int data_opt_len(struct bpf_sock_ops *skops) +{ + /* Same as the nodata version. Mostly to show + * an example usage on skops->skb_len. + */ + return nodata_opt_len(skops); +} + +static int write_data_opt(struct bpf_sock_ops *skops) +{ + return write_nodata_opt(skops); +} + +static int current_mss_opt_len(struct bpf_sock_ops *skops) +{ + /* Reserve maximum that may be needed */ + int err; + + err = bpf_reserve_hdr_opt(skops, option_total_len(OPTION_MASK), 0); + if (err) + RET_CG_ERR(err); + + return CG_OK; +} + +static int handle_hdr_opt_len(struct bpf_sock_ops *skops) +{ + __u8 tcp_flags = skops_tcp_flags(skops); + + if ((tcp_flags & TCPHDR_SYNACK) == TCPHDR_SYNACK) + return synack_opt_len(skops); + + if (tcp_flags & TCPHDR_SYN) + return syn_opt_len(skops); + + if (tcp_flags & TCPHDR_FIN) + return fin_opt_len(skops); + + if (skops_current_mss(skops)) + /* The kernel is calculating the MSS */ + return current_mss_opt_len(skops); + + if (skops->skb_len) + return data_opt_len(skops); + + return nodata_opt_len(skops); +} + +static int handle_write_hdr_opt(struct bpf_sock_ops *skops) +{ + __u8 tcp_flags = skops_tcp_flags(skops); + struct tcphdr *th; + + if ((tcp_flags & TCPHDR_SYNACK) == TCPHDR_SYNACK) + return write_synack_opt(skops); + + if (tcp_flags & TCPHDR_SYN) + return write_syn_opt(skops); + + if (tcp_flags & TCPHDR_FIN) + return write_fin_opt(skops); + + th = skops->skb_data; + if (th + 1 > skops->skb_data_end) + RET_CG_ERR(0); + + if (skops->skb_len > tcp_hdrlen(th)) + return write_data_opt(skops); + + return write_nodata_opt(skops); +} + +static int set_delack_max(struct bpf_sock_ops *skops, __u8 max_delack_ms) +{ + __u32 max_delack_us = max_delack_ms * 1000; + + return bpf_setsockopt(skops, SOL_TCP, TCP_BPF_DELACK_MAX, + &max_delack_us, sizeof(max_delack_us)); +} + +static int set_rto_min(struct bpf_sock_ops *skops, __u8 peer_max_delack_ms) +{ + __u32 min_rto_us = peer_max_delack_ms * 1000; + + return bpf_setsockopt(skops, SOL_TCP, TCP_BPF_RTO_MIN, &min_rto_us, + sizeof(min_rto_us)); +} + +static int handle_active_estab(struct bpf_sock_ops *skops) +{ + struct hdr_stg init_stg = { + .active = true, + }; + int err; + + err = load_option(skops, &active_estab_in, false); + if (err && err != -ENOMSG) + RET_CG_ERR(err); + + init_stg.resend_syn = TEST_OPTION_FLAGS(active_estab_in.flags, + OPTION_RESEND); + if (!skops->sk || !bpf_sk_storage_get(&hdr_stg_map, skops->sk, + &init_stg, + BPF_SK_STORAGE_GET_F_CREATE)) + RET_CG_ERR(0); + + if (init_stg.resend_syn) + /* Don't clear the write_hdr cb now because + * the ACK may get lost and retransmit may + * be needed. + * + * PARSE_ALL_HDR cb flag is set to learn if this + * resend_syn option has received by the peer. + * + * The header option will be resent until a valid + * packet is received at handle_parse_hdr() + * and all hdr cb flags will be cleared in + * handle_parse_hdr(). + */ + set_parse_all_hdr_cb_flags(skops); + else if (!active_fin_out.flags) + /* No options will be written from now */ + clear_hdr_cb_flags(skops); + + if (active_syn_out.max_delack_ms) { + err = set_delack_max(skops, active_syn_out.max_delack_ms); + if (err) + RET_CG_ERR(err); + } + + if (active_estab_in.max_delack_ms) { + err = set_rto_min(skops, active_estab_in.max_delack_ms); + if (err) + RET_CG_ERR(err); + } + + return CG_OK; +} + +static int handle_passive_estab(struct bpf_sock_ops *skops) +{ + struct hdr_stg init_stg = {}; + struct tcphdr *th; + int err; + + inherit_cb_flags = skops->bpf_sock_ops_cb_flags; + + err = load_option(skops, &passive_estab_in, true); + if (err == -ENOENT) { + /* saved_syn is not found. It was in syncookie mode. + * We have asked the active side to resend the options + * in ACK, so try to find the bpf_test_option from ACK now. + */ + err = load_option(skops, &passive_estab_in, false); + init_stg.syncookie = true; + } + + /* ENOMSG: The bpf_test_option is not found which is fine. + * Bail out now for all other errors. + */ + if (err && err != -ENOMSG) + RET_CG_ERR(err); + + th = skops->skb_data; + if (th + 1 > skops->skb_data_end) + RET_CG_ERR(0); + + if (th->syn) { + /* Fastopen */ + + /* Cannot clear cb_flags to stop write_hdr cb. + * synack is not sent yet for fast open. + * Even it was, the synack may need to be retransmitted. + * + * PARSE_ALL_HDR cb flag is set to learn + * if synack has reached the peer. + * All cb_flags will be cleared in handle_parse_hdr(). + */ + set_parse_all_hdr_cb_flags(skops); + init_stg.fastopen = true; + } else if (!passive_fin_out.flags) { + /* No options will be written from now */ + clear_hdr_cb_flags(skops); + } + + if (!skops->sk || + !bpf_sk_storage_get(&hdr_stg_map, skops->sk, &init_stg, + BPF_SK_STORAGE_GET_F_CREATE)) + RET_CG_ERR(0); + + if (passive_synack_out.max_delack_ms) { + err = set_delack_max(skops, passive_synack_out.max_delack_ms); + if (err) + RET_CG_ERR(err); + } + + if (passive_estab_in.max_delack_ms) { + err = set_rto_min(skops, passive_estab_in.max_delack_ms); + if (err) + RET_CG_ERR(err); + } + + return CG_OK; +} + +static int handle_parse_hdr(struct bpf_sock_ops *skops) +{ + struct hdr_stg *hdr_stg; + struct tcphdr *th; + + if (!skops->sk) + RET_CG_ERR(0); + + th = skops->skb_data; + if (th + 1 > skops->skb_data_end) + RET_CG_ERR(0); + + hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0); + if (!hdr_stg) + RET_CG_ERR(0); + + if (hdr_stg->resend_syn || hdr_stg->fastopen) + /* The PARSE_ALL_HDR cb flag was turned on + * to ensure that the previously written + * options have reached the peer. + * Those previously written option includes: + * - Active side: resend_syn in ACK during syncookie + * or + * - Passive side: SYNACK during fastopen + * + * A valid packet has been received here after + * the 3WHS, so the PARSE_ALL_HDR cb flag + * can be cleared now. + */ + clear_parse_all_hdr_cb_flags(skops); + + if (hdr_stg->resend_syn && !active_fin_out.flags) + /* Active side resent the syn option in ACK + * because the server was in syncookie mode. + * A valid packet has been received, so + * clear header cb flags if there is no + * more option to send. + */ + clear_hdr_cb_flags(skops); + + if (hdr_stg->fastopen && !passive_fin_out.flags) + /* Passive side was in fastopen. + * A valid packet has been received, so + * the SYNACK has reached the peer. + * Clear header cb flags if there is no more + * option to send. + */ + clear_hdr_cb_flags(skops); + + if (th->fin) { + struct bpf_test_option *fin_opt; + int err; + + if (hdr_stg->active) + fin_opt = &active_fin_in; + else + fin_opt = &passive_fin_in; + + err = load_option(skops, fin_opt, false); + if (err && err != -ENOMSG) + RET_CG_ERR(err); + } + + return CG_OK; +} + +SEC("sockops/estab") +int estab(struct bpf_sock_ops *skops) +{ + int true_val = 1; + + switch (skops->op) { + case BPF_SOCK_OPS_TCP_LISTEN_CB: + bpf_setsockopt(skops, SOL_TCP, TCP_SAVE_SYN, + &true_val, sizeof(true_val)); + set_hdr_cb_flags(skops, BPF_SOCK_OPS_STATE_CB_FLAG); + break; + case BPF_SOCK_OPS_TCP_CONNECT_CB: + set_hdr_cb_flags(skops, 0); + break; + case BPF_SOCK_OPS_PARSE_HDR_OPT_CB: + return handle_parse_hdr(skops); + case BPF_SOCK_OPS_HDR_OPT_LEN_CB: + return handle_hdr_opt_len(skops); + case BPF_SOCK_OPS_WRITE_HDR_OPT_CB: + return handle_write_hdr_opt(skops); + case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB: + return handle_passive_estab(skops); + case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB: + return handle_active_estab(skops); + } + + return CG_OK; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_trace_ext.c b/tools/testing/selftests/bpf/progs/test_trace_ext.c new file mode 100644 index 000000000000..d19a634d0e78 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_trace_ext.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2019 Facebook +#include <linux/bpf.h> +#include <stdbool.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> +#include <bpf/bpf_tracing.h> + +__u64 ext_called = 0; + +SEC("freplace/test_pkt_md_access") +int test_pkt_md_access_new(struct __sk_buff *skb) +{ + ext_called = skb->len; + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_trace_ext_tracing.c b/tools/testing/selftests/bpf/progs/test_trace_ext_tracing.c new file mode 100644 index 000000000000..52f3baf98f20 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_trace_ext_tracing.c @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +__u64 fentry_called = 0; + +SEC("fentry/test_pkt_md_access_new") +int BPF_PROG(fentry, struct sk_buff *skb) +{ + fentry_called = skb->len; + return 0; +} + +__u64 fexit_called = 0; + +SEC("fexit/test_pkt_md_access_new") +int BPF_PROG(fexit, struct sk_buff *skb) +{ + fexit_called = skb->len; + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_vmlinux.c b/tools/testing/selftests/bpf/progs/test_vmlinux.c index 29fa09d6a6c6..e9dfa0313d1b 100644 --- a/tools/testing/selftests/bpf/progs/test_vmlinux.c +++ b/tools/testing/selftests/bpf/progs/test_vmlinux.c @@ -19,12 +19,14 @@ SEC("tp/syscalls/sys_enter_nanosleep") int handle__tp(struct trace_event_raw_sys_enter *args) { struct __kernel_timespec *ts; + long tv_nsec; if (args->id != __NR_nanosleep) return 0; ts = (void *)args->args[0]; - if (BPF_CORE_READ(ts, tv_nsec) != MY_TV_NSEC) + if (bpf_probe_read_user(&tv_nsec, sizeof(ts->tv_nsec), &ts->tv_nsec) || + tv_nsec != MY_TV_NSEC) return 0; tp_called = true; @@ -35,12 +37,14 @@ SEC("raw_tp/sys_enter") int BPF_PROG(handle__raw_tp, struct pt_regs *regs, long id) { struct __kernel_timespec *ts; + long tv_nsec; if (id != __NR_nanosleep) return 0; ts = (void *)PT_REGS_PARM1_CORE(regs); - if (BPF_CORE_READ(ts, tv_nsec) != MY_TV_NSEC) + if (bpf_probe_read_user(&tv_nsec, sizeof(ts->tv_nsec), &ts->tv_nsec) || + tv_nsec != MY_TV_NSEC) return 0; raw_tp_called = true; @@ -51,12 +55,14 @@ SEC("tp_btf/sys_enter") int BPF_PROG(handle__tp_btf, struct pt_regs *regs, long id) { struct __kernel_timespec *ts; + long tv_nsec; if (id != __NR_nanosleep) return 0; ts = (void *)PT_REGS_PARM1_CORE(regs); - if (BPF_CORE_READ(ts, tv_nsec) != MY_TV_NSEC) + if (bpf_probe_read_user(&tv_nsec, sizeof(ts->tv_nsec), &ts->tv_nsec) || + tv_nsec != MY_TV_NSEC) return 0; tp_btf_called = true; diff --git a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c index 8beecec166d9..3a67921f62b5 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c @@ -16,7 +16,7 @@ #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> -static __u32 rol32(__u32 word, unsigned int shift) +static __always_inline __u32 rol32(__u32 word, unsigned int shift) { return (word << shift) | (word >> ((-shift) & 31)); } @@ -49,7 +49,7 @@ static __u32 rol32(__u32 word, unsigned int shift) typedef unsigned int u32; -static __attribute__ ((noinline)) +static __noinline u32 jhash(const void *key, u32 length, u32 initval) { u32 a, b, c; @@ -86,7 +86,7 @@ u32 jhash(const void *key, u32 length, u32 initval) return c; } -__attribute__ ((noinline)) +__noinline u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval) { a += initval; @@ -96,7 +96,7 @@ u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval) return c; } -__attribute__ ((noinline)) +__noinline u32 jhash_2words(u32 a, u32 b, u32 initval) { return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2)); @@ -213,7 +213,7 @@ struct eth_hdr { unsigned short eth_proto; }; -static inline __u64 calc_offset(bool is_ipv6, bool is_icmp) +static __noinline __u64 calc_offset(bool is_ipv6, bool is_icmp) { __u64 off = sizeof(struct eth_hdr); if (is_ipv6) { @@ -797,8 +797,8 @@ out: return XDP_DROP; } -__attribute__ ((section("xdp-test"), used)) -int balancer_ingress(struct xdp_md *ctx) +SEC("xdp-test-v4") +int balancer_ingress_v4(struct xdp_md *ctx) { void *data = (void *)(long)ctx->data; void *data_end = (void *)(long)ctx->data_end; @@ -812,11 +812,27 @@ int balancer_ingress(struct xdp_md *ctx) eth_proto = bpf_ntohs(eth->eth_proto); if (eth_proto == ETH_P_IP) return process_packet(data, nh_off, data_end, 0, ctx); - else if (eth_proto == ETH_P_IPV6) + else + return XDP_DROP; +} + +SEC("xdp-test-v6") +int balancer_ingress_v6(struct xdp_md *ctx) +{ + void *data = (void *)(long)ctx->data; + void *data_end = (void *)(long)ctx->data_end; + struct eth_hdr *eth = data; + __u32 eth_proto; + __u32 nh_off; + + nh_off = sizeof(struct eth_hdr); + if (data + nh_off > data_end) + return XDP_DROP; + eth_proto = bpf_ntohs(eth->eth_proto); + if (eth_proto == ETH_P_IPV6) return process_packet(data, nh_off, data_end, 1, ctx); else return XDP_DROP; } -char _license[] __attribute__ ((section("license"), used)) = "GPL"; -int _version __attribute__ ((section("version"), used)) = 1; +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/trigger_bench.c b/tools/testing/selftests/bpf/progs/trigger_bench.c index 8b36b6640e7e..9a4d09590b3d 100644 --- a/tools/testing/selftests/bpf/progs/trigger_bench.c +++ b/tools/testing/selftests/bpf/progs/trigger_bench.c @@ -39,6 +39,13 @@ int bench_trigger_fentry(void *ctx) return 0; } +SEC("fentry.s/__x64_sys_getpgid") +int bench_trigger_fentry_sleep(void *ctx) +{ + __sync_add_and_fetch(&hits, 1); + return 0; +} + SEC("fmod_ret/__x64_sys_getpgid") int bench_trigger_fmodret(void *ctx) { diff --git a/tools/testing/selftests/bpf/test_bpftool_build.sh b/tools/testing/selftests/bpf/test_bpftool_build.sh index ac349a5cea7e..2db3c60e1e61 100755 --- a/tools/testing/selftests/bpf/test_bpftool_build.sh +++ b/tools/testing/selftests/bpf/test_bpftool_build.sh @@ -85,6 +85,23 @@ make_with_tmpdir() { echo } +make_doc_and_clean() { + echo -e "\$PWD: $PWD" + echo -e "command: make -s $* doc >/dev/null" + RST2MAN_OPTS="--exit-status=1" make $J -s $* doc + if [ $? -ne 0 ] ; then + ERROR=1 + printf "FAILURE: Errors or warnings when building documentation\n" + fi + ( + if [ $# -ge 1 ] ; then + cd ${@: -1} + fi + make -s doc-clean + ) + echo +} + echo "Trying to build bpftool" echo -e "... through kbuild\n" @@ -145,3 +162,7 @@ make_and_clean make_with_tmpdir OUTPUT make_with_tmpdir O + +echo -e "Checking documentation build\n" +# From tools/bpf/bpftool +make_doc_and_clean diff --git a/tools/testing/selftests/bpf/test_bpftool_metadata.sh b/tools/testing/selftests/bpf/test_bpftool_metadata.sh new file mode 100755 index 000000000000..1bf81b49457a --- /dev/null +++ b/tools/testing/selftests/bpf/test_bpftool_metadata.sh @@ -0,0 +1,82 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +TESTNAME=bpftool_metadata +BPF_FS=$(awk '$3 == "bpf" {print $2; exit}' /proc/mounts) +BPF_DIR=$BPF_FS/test_$TESTNAME + +_cleanup() +{ + set +e + rm -rf $BPF_DIR 2> /dev/null +} + +cleanup_skip() +{ + echo "selftests: $TESTNAME [SKIP]" + _cleanup + + exit $ksft_skip +} + +cleanup() +{ + if [ "$?" = 0 ]; then + echo "selftests: $TESTNAME [PASS]" + else + echo "selftests: $TESTNAME [FAILED]" + fi + _cleanup +} + +if [ $(id -u) -ne 0 ]; then + echo "selftests: $TESTNAME [SKIP] Need root privileges" + exit $ksft_skip +fi + +if [ -z "$BPF_FS" ]; then + echo "selftests: $TESTNAME [SKIP] Could not run test without bpffs mounted" + exit $ksft_skip +fi + +if ! bpftool version > /dev/null 2>&1; then + echo "selftests: $TESTNAME [SKIP] Could not run test without bpftool" + exit $ksft_skip +fi + +set -e + +trap cleanup_skip EXIT + +mkdir $BPF_DIR + +trap cleanup EXIT + +bpftool prog load metadata_unused.o $BPF_DIR/unused + +METADATA_PLAIN="$(bpftool prog)" +echo "$METADATA_PLAIN" | grep 'a = "foo"' > /dev/null +echo "$METADATA_PLAIN" | grep 'b = 1' > /dev/null + +bpftool prog --json | grep '"metadata":{"a":"foo","b":1}' > /dev/null + +bpftool map | grep 'metadata.rodata' > /dev/null + +rm $BPF_DIR/unused + +bpftool prog load metadata_used.o $BPF_DIR/used + +METADATA_PLAIN="$(bpftool prog)" +echo "$METADATA_PLAIN" | grep 'a = "bar"' > /dev/null +echo "$METADATA_PLAIN" | grep 'b = 2' > /dev/null + +bpftool prog --json | grep '"metadata":{"a":"bar","b":2}' > /dev/null + +bpftool map | grep 'metadata.rodata' > /dev/null + +rm $BPF_DIR/used + +exit 0 diff --git a/tools/testing/selftests/bpf/test_current_pid_tgid_new_ns.c b/tools/testing/selftests/bpf/test_current_pid_tgid_new_ns.c index ed253f252cd0..ec53b1ef90d2 100644 --- a/tools/testing/selftests/bpf/test_current_pid_tgid_new_ns.c +++ b/tools/testing/selftests/bpf/test_current_pid_tgid_new_ns.c @@ -156,4 +156,5 @@ cleanup: bpf_object__close(obj); } } + return 0; } diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index 754cf611723e..0d92ebcb335d 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -1274,6 +1274,8 @@ static void __run_parallel(unsigned int tasks, pid_t pid[tasks]; int i; + fflush(stdout); + for (i = 0; i < tasks; i++) { pid[i] = fork(); if (pid[i] == 0) { diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index b1e4dadacd9b..22943b58d752 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -618,7 +618,9 @@ int cd_flavor_subdir(const char *exec_name) if (!flavor) return 0; flavor++; - fprintf(stdout, "Switching to flavor '%s' subdirectory...\n", flavor); + if (env.verbosity > VERBOSE_NONE) + fprintf(stdout, "Switching to flavor '%s' subdirectory...\n", flavor); + return chdir(flavor); } diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index dbb820dde138..238f5f61189e 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -130,6 +130,69 @@ extern int test__join_cgroup(const char *path); #define CHECK_ATTR(condition, tag, format...) \ _CHECK(condition, tag, tattr.duration, format) +#define ASSERT_EQ(actual, expected, name) ({ \ + static int duration = 0; \ + typeof(actual) ___act = (actual); \ + typeof(expected) ___exp = (expected); \ + bool ___ok = ___act == ___exp; \ + CHECK(!___ok, (name), \ + "unexpected %s: actual %lld != expected %lld\n", \ + (name), (long long)(___act), (long long)(___exp)); \ + ___ok; \ +}) + +#define ASSERT_STREQ(actual, expected, name) ({ \ + static int duration = 0; \ + const char *___act = actual; \ + const char *___exp = expected; \ + bool ___ok = strcmp(___act, ___exp) == 0; \ + CHECK(!___ok, (name), \ + "unexpected %s: actual '%s' != expected '%s'\n", \ + (name), ___act, ___exp); \ + ___ok; \ +}) + +#define ASSERT_OK(res, name) ({ \ + static int duration = 0; \ + long long ___res = (res); \ + bool ___ok = ___res == 0; \ + CHECK(!___ok, (name), "unexpected error: %lld\n", ___res); \ + ___ok; \ +}) + +#define ASSERT_ERR(res, name) ({ \ + static int duration = 0; \ + long long ___res = (res); \ + bool ___ok = ___res < 0; \ + CHECK(!___ok, (name), "unexpected success: %lld\n", ___res); \ + ___ok; \ +}) + +#define ASSERT_NULL(ptr, name) ({ \ + static int duration = 0; \ + const void *___res = (ptr); \ + bool ___ok = !___res; \ + CHECK(!___ok, (name), "unexpected pointer: %p\n", ___res); \ + ___ok; \ +}) + +#define ASSERT_OK_PTR(ptr, name) ({ \ + static int duration = 0; \ + const void *___res = (ptr); \ + bool ___ok = !IS_ERR_OR_NULL(___res); \ + CHECK(!___ok, (name), \ + "unexpected error: %ld\n", PTR_ERR(___res)); \ + ___ok; \ +}) + +#define ASSERT_ERR_PTR(ptr, name) ({ \ + static int duration = 0; \ + const void *___res = (ptr); \ + bool ___ok = IS_ERR(___res) \ + CHECK(!___ok, (name), "unexpected pointer: %p\n", ___res); \ + ___ok; \ +}) + static inline __u64 ptr_to_u64(const void *ptr) { return (__u64) (unsigned long) ptr; diff --git a/tools/testing/selftests/bpf/test_sock_fields.c b/tools/testing/selftests/bpf/test_sock_fields.c deleted file mode 100644 index 6c9f269c396d..000000000000 --- a/tools/testing/selftests/bpf/test_sock_fields.c +++ /dev/null @@ -1,482 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* Copyright (c) 2019 Facebook */ - -#include <sys/socket.h> -#include <sys/epoll.h> -#include <netinet/in.h> -#include <arpa/inet.h> -#include <unistd.h> -#include <stdlib.h> -#include <string.h> -#include <errno.h> - -#include <bpf/bpf.h> -#include <bpf/libbpf.h> - -#include "cgroup_helpers.h" -#include "bpf_rlimit.h" - -enum bpf_addr_array_idx { - ADDR_SRV_IDX, - ADDR_CLI_IDX, - __NR_BPF_ADDR_ARRAY_IDX, -}; - -enum bpf_result_array_idx { - EGRESS_SRV_IDX, - EGRESS_CLI_IDX, - INGRESS_LISTEN_IDX, - __NR_BPF_RESULT_ARRAY_IDX, -}; - -enum bpf_linum_array_idx { - EGRESS_LINUM_IDX, - INGRESS_LINUM_IDX, - __NR_BPF_LINUM_ARRAY_IDX, -}; - -struct bpf_spinlock_cnt { - struct bpf_spin_lock lock; - __u32 cnt; -}; - -#define CHECK(condition, tag, format...) ({ \ - int __ret = !!(condition); \ - if (__ret) { \ - printf("%s(%d):FAIL:%s ", __func__, __LINE__, tag); \ - printf(format); \ - printf("\n"); \ - exit(-1); \ - } \ -}) - -#define TEST_CGROUP "/test-bpf-sock-fields" -#define DATA "Hello BPF!" -#define DATA_LEN sizeof(DATA) - -static struct sockaddr_in6 srv_sa6, cli_sa6; -static int sk_pkt_out_cnt10_fd; -static int sk_pkt_out_cnt_fd; -static int linum_map_fd; -static int addr_map_fd; -static int tp_map_fd; -static int sk_map_fd; - -static __u32 addr_srv_idx = ADDR_SRV_IDX; -static __u32 addr_cli_idx = ADDR_CLI_IDX; - -static __u32 egress_srv_idx = EGRESS_SRV_IDX; -static __u32 egress_cli_idx = EGRESS_CLI_IDX; -static __u32 ingress_listen_idx = INGRESS_LISTEN_IDX; - -static __u32 egress_linum_idx = EGRESS_LINUM_IDX; -static __u32 ingress_linum_idx = INGRESS_LINUM_IDX; - -static void init_loopback6(struct sockaddr_in6 *sa6) -{ - memset(sa6, 0, sizeof(*sa6)); - sa6->sin6_family = AF_INET6; - sa6->sin6_addr = in6addr_loopback; -} - -static void print_sk(const struct bpf_sock *sk) -{ - char src_ip4[24], dst_ip4[24]; - char src_ip6[64], dst_ip6[64]; - - inet_ntop(AF_INET, &sk->src_ip4, src_ip4, sizeof(src_ip4)); - inet_ntop(AF_INET6, &sk->src_ip6, src_ip6, sizeof(src_ip6)); - inet_ntop(AF_INET, &sk->dst_ip4, dst_ip4, sizeof(dst_ip4)); - inet_ntop(AF_INET6, &sk->dst_ip6, dst_ip6, sizeof(dst_ip6)); - - printf("state:%u bound_dev_if:%u family:%u type:%u protocol:%u mark:%u priority:%u " - "src_ip4:%x(%s) src_ip6:%x:%x:%x:%x(%s) src_port:%u " - "dst_ip4:%x(%s) dst_ip6:%x:%x:%x:%x(%s) dst_port:%u\n", - sk->state, sk->bound_dev_if, sk->family, sk->type, sk->protocol, - sk->mark, sk->priority, - sk->src_ip4, src_ip4, - sk->src_ip6[0], sk->src_ip6[1], sk->src_ip6[2], sk->src_ip6[3], - src_ip6, sk->src_port, - sk->dst_ip4, dst_ip4, - sk->dst_ip6[0], sk->dst_ip6[1], sk->dst_ip6[2], sk->dst_ip6[3], - dst_ip6, ntohs(sk->dst_port)); -} - -static void print_tp(const struct bpf_tcp_sock *tp) -{ - printf("snd_cwnd:%u srtt_us:%u rtt_min:%u snd_ssthresh:%u rcv_nxt:%u " - "snd_nxt:%u snd:una:%u mss_cache:%u ecn_flags:%u " - "rate_delivered:%u rate_interval_us:%u packets_out:%u " - "retrans_out:%u total_retrans:%u segs_in:%u data_segs_in:%u " - "segs_out:%u data_segs_out:%u lost_out:%u sacked_out:%u " - "bytes_received:%llu bytes_acked:%llu\n", - tp->snd_cwnd, tp->srtt_us, tp->rtt_min, tp->snd_ssthresh, - tp->rcv_nxt, tp->snd_nxt, tp->snd_una, tp->mss_cache, - tp->ecn_flags, tp->rate_delivered, tp->rate_interval_us, - tp->packets_out, tp->retrans_out, tp->total_retrans, - tp->segs_in, tp->data_segs_in, tp->segs_out, - tp->data_segs_out, tp->lost_out, tp->sacked_out, - tp->bytes_received, tp->bytes_acked); -} - -static void check_result(void) -{ - struct bpf_tcp_sock srv_tp, cli_tp, listen_tp; - struct bpf_sock srv_sk, cli_sk, listen_sk; - __u32 ingress_linum, egress_linum; - int err; - - err = bpf_map_lookup_elem(linum_map_fd, &egress_linum_idx, - &egress_linum); - CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)", - "err:%d errno:%d", err, errno); - - err = bpf_map_lookup_elem(linum_map_fd, &ingress_linum_idx, - &ingress_linum); - CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)", - "err:%d errno:%d", err, errno); - - err = bpf_map_lookup_elem(sk_map_fd, &egress_srv_idx, &srv_sk); - CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &egress_srv_idx)", - "err:%d errno:%d", err, errno); - err = bpf_map_lookup_elem(tp_map_fd, &egress_srv_idx, &srv_tp); - CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &egress_srv_idx)", - "err:%d errno:%d", err, errno); - - err = bpf_map_lookup_elem(sk_map_fd, &egress_cli_idx, &cli_sk); - CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &egress_cli_idx)", - "err:%d errno:%d", err, errno); - err = bpf_map_lookup_elem(tp_map_fd, &egress_cli_idx, &cli_tp); - CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &egress_cli_idx)", - "err:%d errno:%d", err, errno); - - err = bpf_map_lookup_elem(sk_map_fd, &ingress_listen_idx, &listen_sk); - CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &ingress_listen_idx)", - "err:%d errno:%d", err, errno); - err = bpf_map_lookup_elem(tp_map_fd, &ingress_listen_idx, &listen_tp); - CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &ingress_listen_idx)", - "err:%d errno:%d", err, errno); - - printf("listen_sk: "); - print_sk(&listen_sk); - printf("\n"); - - printf("srv_sk: "); - print_sk(&srv_sk); - printf("\n"); - - printf("cli_sk: "); - print_sk(&cli_sk); - printf("\n"); - - printf("listen_tp: "); - print_tp(&listen_tp); - printf("\n"); - - printf("srv_tp: "); - print_tp(&srv_tp); - printf("\n"); - - printf("cli_tp: "); - print_tp(&cli_tp); - printf("\n"); - - CHECK(listen_sk.state != 10 || - listen_sk.family != AF_INET6 || - listen_sk.protocol != IPPROTO_TCP || - memcmp(listen_sk.src_ip6, &in6addr_loopback, - sizeof(listen_sk.src_ip6)) || - listen_sk.dst_ip6[0] || listen_sk.dst_ip6[1] || - listen_sk.dst_ip6[2] || listen_sk.dst_ip6[3] || - listen_sk.src_port != ntohs(srv_sa6.sin6_port) || - listen_sk.dst_port, - "Unexpected listen_sk", - "Check listen_sk output. ingress_linum:%u", - ingress_linum); - - CHECK(srv_sk.state == 10 || - !srv_sk.state || - srv_sk.family != AF_INET6 || - srv_sk.protocol != IPPROTO_TCP || - memcmp(srv_sk.src_ip6, &in6addr_loopback, - sizeof(srv_sk.src_ip6)) || - memcmp(srv_sk.dst_ip6, &in6addr_loopback, - sizeof(srv_sk.dst_ip6)) || - srv_sk.src_port != ntohs(srv_sa6.sin6_port) || - srv_sk.dst_port != cli_sa6.sin6_port, - "Unexpected srv_sk", "Check srv_sk output. egress_linum:%u", - egress_linum); - - CHECK(cli_sk.state == 10 || - !cli_sk.state || - cli_sk.family != AF_INET6 || - cli_sk.protocol != IPPROTO_TCP || - memcmp(cli_sk.src_ip6, &in6addr_loopback, - sizeof(cli_sk.src_ip6)) || - memcmp(cli_sk.dst_ip6, &in6addr_loopback, - sizeof(cli_sk.dst_ip6)) || - cli_sk.src_port != ntohs(cli_sa6.sin6_port) || - cli_sk.dst_port != srv_sa6.sin6_port, - "Unexpected cli_sk", "Check cli_sk output. egress_linum:%u", - egress_linum); - - CHECK(listen_tp.data_segs_out || - listen_tp.data_segs_in || - listen_tp.total_retrans || - listen_tp.bytes_acked, - "Unexpected listen_tp", "Check listen_tp output. ingress_linum:%u", - ingress_linum); - - CHECK(srv_tp.data_segs_out != 2 || - srv_tp.data_segs_in || - srv_tp.snd_cwnd != 10 || - srv_tp.total_retrans || - srv_tp.bytes_acked != 2 * DATA_LEN, - "Unexpected srv_tp", "Check srv_tp output. egress_linum:%u", - egress_linum); - - CHECK(cli_tp.data_segs_out || - cli_tp.data_segs_in != 2 || - cli_tp.snd_cwnd != 10 || - cli_tp.total_retrans || - cli_tp.bytes_received != 2 * DATA_LEN, - "Unexpected cli_tp", "Check cli_tp output. egress_linum:%u", - egress_linum); -} - -static void check_sk_pkt_out_cnt(int accept_fd, int cli_fd) -{ - struct bpf_spinlock_cnt pkt_out_cnt = {}, pkt_out_cnt10 = {}; - int err; - - pkt_out_cnt.cnt = ~0; - pkt_out_cnt10.cnt = ~0; - err = bpf_map_lookup_elem(sk_pkt_out_cnt_fd, &accept_fd, &pkt_out_cnt); - if (!err) - err = bpf_map_lookup_elem(sk_pkt_out_cnt10_fd, &accept_fd, - &pkt_out_cnt10); - - /* The bpf prog only counts for fullsock and - * passive conneciton did not become fullsock until 3WHS - * had been finished. - * The bpf prog only counted two data packet out but we - * specially init accept_fd's pkt_out_cnt by 2 in - * init_sk_storage(). Hence, 4 here. - */ - CHECK(err || pkt_out_cnt.cnt != 4 || pkt_out_cnt10.cnt != 40, - "bpf_map_lookup_elem(sk_pkt_out_cnt, &accept_fd)", - "err:%d errno:%d pkt_out_cnt:%u pkt_out_cnt10:%u", - err, errno, pkt_out_cnt.cnt, pkt_out_cnt10.cnt); - - pkt_out_cnt.cnt = ~0; - pkt_out_cnt10.cnt = ~0; - err = bpf_map_lookup_elem(sk_pkt_out_cnt_fd, &cli_fd, &pkt_out_cnt); - if (!err) - err = bpf_map_lookup_elem(sk_pkt_out_cnt10_fd, &cli_fd, - &pkt_out_cnt10); - /* Active connection is fullsock from the beginning. - * 1 SYN and 1 ACK during 3WHS - * 2 Acks on data packet. - * - * The bpf_prog initialized it to 0xeB9F. - */ - CHECK(err || pkt_out_cnt.cnt != 0xeB9F + 4 || - pkt_out_cnt10.cnt != 0xeB9F + 40, - "bpf_map_lookup_elem(sk_pkt_out_cnt, &cli_fd)", - "err:%d errno:%d pkt_out_cnt:%u pkt_out_cnt10:%u", - err, errno, pkt_out_cnt.cnt, pkt_out_cnt10.cnt); -} - -static void init_sk_storage(int sk_fd, __u32 pkt_out_cnt) -{ - struct bpf_spinlock_cnt scnt = {}; - int err; - - scnt.cnt = pkt_out_cnt; - err = bpf_map_update_elem(sk_pkt_out_cnt_fd, &sk_fd, &scnt, - BPF_NOEXIST); - CHECK(err, "bpf_map_update_elem(sk_pkt_out_cnt_fd)", - "err:%d errno:%d", err, errno); - - scnt.cnt *= 10; - err = bpf_map_update_elem(sk_pkt_out_cnt10_fd, &sk_fd, &scnt, - BPF_NOEXIST); - CHECK(err, "bpf_map_update_elem(sk_pkt_out_cnt10_fd)", - "err:%d errno:%d", err, errno); -} - -static void test(void) -{ - int listen_fd, cli_fd, accept_fd, epfd, err; - struct epoll_event ev; - socklen_t addrlen; - int i; - - addrlen = sizeof(struct sockaddr_in6); - ev.events = EPOLLIN; - - epfd = epoll_create(1); - CHECK(epfd == -1, "epoll_create()", "epfd:%d errno:%d", epfd, errno); - - /* Prepare listen_fd */ - listen_fd = socket(AF_INET6, SOCK_STREAM | SOCK_NONBLOCK, 0); - CHECK(listen_fd == -1, "socket()", "listen_fd:%d errno:%d", - listen_fd, errno); - - init_loopback6(&srv_sa6); - err = bind(listen_fd, (struct sockaddr *)&srv_sa6, sizeof(srv_sa6)); - CHECK(err, "bind(listen_fd)", "err:%d errno:%d", err, errno); - - err = getsockname(listen_fd, (struct sockaddr *)&srv_sa6, &addrlen); - CHECK(err, "getsockname(listen_fd)", "err:%d errno:%d", err, errno); - - err = listen(listen_fd, 1); - CHECK(err, "listen(listen_fd)", "err:%d errno:%d", err, errno); - - /* Prepare cli_fd */ - cli_fd = socket(AF_INET6, SOCK_STREAM | SOCK_NONBLOCK, 0); - CHECK(cli_fd == -1, "socket()", "cli_fd:%d errno:%d", cli_fd, errno); - - init_loopback6(&cli_sa6); - err = bind(cli_fd, (struct sockaddr *)&cli_sa6, sizeof(cli_sa6)); - CHECK(err, "bind(cli_fd)", "err:%d errno:%d", err, errno); - - err = getsockname(cli_fd, (struct sockaddr *)&cli_sa6, &addrlen); - CHECK(err, "getsockname(cli_fd)", "err:%d errno:%d", - err, errno); - - /* Update addr_map with srv_sa6 and cli_sa6 */ - err = bpf_map_update_elem(addr_map_fd, &addr_srv_idx, &srv_sa6, 0); - CHECK(err, "map_update", "err:%d errno:%d", err, errno); - - err = bpf_map_update_elem(addr_map_fd, &addr_cli_idx, &cli_sa6, 0); - CHECK(err, "map_update", "err:%d errno:%d", err, errno); - - /* Connect from cli_sa6 to srv_sa6 */ - err = connect(cli_fd, (struct sockaddr *)&srv_sa6, addrlen); - printf("srv_sa6.sin6_port:%u cli_sa6.sin6_port:%u\n\n", - ntohs(srv_sa6.sin6_port), ntohs(cli_sa6.sin6_port)); - CHECK(err && errno != EINPROGRESS, - "connect(cli_fd)", "err:%d errno:%d", err, errno); - - ev.data.fd = listen_fd; - err = epoll_ctl(epfd, EPOLL_CTL_ADD, listen_fd, &ev); - CHECK(err, "epoll_ctl(EPOLL_CTL_ADD, listen_fd)", "err:%d errno:%d", - err, errno); - - /* Accept the connection */ - /* Have some timeout in accept(listen_fd). Just in case. */ - err = epoll_wait(epfd, &ev, 1, 1000); - CHECK(err != 1 || ev.data.fd != listen_fd, - "epoll_wait(listen_fd)", - "err:%d errno:%d ev.data.fd:%d listen_fd:%d", - err, errno, ev.data.fd, listen_fd); - - accept_fd = accept(listen_fd, NULL, NULL); - CHECK(accept_fd == -1, "accept(listen_fd)", "accept_fd:%d errno:%d", - accept_fd, errno); - close(listen_fd); - - ev.data.fd = cli_fd; - err = epoll_ctl(epfd, EPOLL_CTL_ADD, cli_fd, &ev); - CHECK(err, "epoll_ctl(EPOLL_CTL_ADD, cli_fd)", "err:%d errno:%d", - err, errno); - - init_sk_storage(accept_fd, 2); - - for (i = 0; i < 2; i++) { - /* Send some data from accept_fd to cli_fd */ - err = send(accept_fd, DATA, DATA_LEN, 0); - CHECK(err != DATA_LEN, "send(accept_fd)", "err:%d errno:%d", - err, errno); - - /* Have some timeout in recv(cli_fd). Just in case. */ - err = epoll_wait(epfd, &ev, 1, 1000); - CHECK(err != 1 || ev.data.fd != cli_fd, - "epoll_wait(cli_fd)", "err:%d errno:%d ev.data.fd:%d cli_fd:%d", - err, errno, ev.data.fd, cli_fd); - - err = recv(cli_fd, NULL, 0, MSG_TRUNC); - CHECK(err, "recv(cli_fd)", "err:%d errno:%d", err, errno); - } - - check_sk_pkt_out_cnt(accept_fd, cli_fd); - - close(epfd); - close(accept_fd); - close(cli_fd); - - check_result(); -} - -int main(int argc, char **argv) -{ - struct bpf_prog_load_attr attr = { - .file = "test_sock_fields_kern.o", - .prog_type = BPF_PROG_TYPE_CGROUP_SKB, - .prog_flags = BPF_F_TEST_RND_HI32, - }; - int cgroup_fd, egress_fd, ingress_fd, err; - struct bpf_program *ingress_prog; - struct bpf_object *obj; - struct bpf_map *map; - - /* Create a cgroup, get fd, and join it */ - cgroup_fd = cgroup_setup_and_join(TEST_CGROUP); - CHECK(cgroup_fd < 0, "cgroup_setup_and_join()", - "cgroup_fd:%d errno:%d", cgroup_fd, errno); - atexit(cleanup_cgroup_environment); - - err = bpf_prog_load_xattr(&attr, &obj, &egress_fd); - CHECK(err, "bpf_prog_load_xattr()", "err:%d", err); - - ingress_prog = bpf_object__find_program_by_title(obj, - "cgroup_skb/ingress"); - CHECK(!ingress_prog, - "bpf_object__find_program_by_title(cgroup_skb/ingress)", - "not found"); - ingress_fd = bpf_program__fd(ingress_prog); - - err = bpf_prog_attach(egress_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0); - CHECK(err == -1, "bpf_prog_attach(CPF_CGROUP_INET_EGRESS)", - "err:%d errno%d", err, errno); - - err = bpf_prog_attach(ingress_fd, cgroup_fd, - BPF_CGROUP_INET_INGRESS, 0); - CHECK(err == -1, "bpf_prog_attach(CPF_CGROUP_INET_INGRESS)", - "err:%d errno%d", err, errno); - close(cgroup_fd); - - map = bpf_object__find_map_by_name(obj, "addr_map"); - CHECK(!map, "cannot find addr_map", "(null)"); - addr_map_fd = bpf_map__fd(map); - - map = bpf_object__find_map_by_name(obj, "sock_result_map"); - CHECK(!map, "cannot find sock_result_map", "(null)"); - sk_map_fd = bpf_map__fd(map); - - map = bpf_object__find_map_by_name(obj, "tcp_sock_result_map"); - CHECK(!map, "cannot find tcp_sock_result_map", "(null)"); - tp_map_fd = bpf_map__fd(map); - - map = bpf_object__find_map_by_name(obj, "linum_map"); - CHECK(!map, "cannot find linum_map", "(null)"); - linum_map_fd = bpf_map__fd(map); - - map = bpf_object__find_map_by_name(obj, "sk_pkt_out_cnt"); - CHECK(!map, "cannot find sk_pkt_out_cnt", "(null)"); - sk_pkt_out_cnt_fd = bpf_map__fd(map); - - map = bpf_object__find_map_by_name(obj, "sk_pkt_out_cnt10"); - CHECK(!map, "cannot find sk_pkt_out_cnt10", "(null)"); - sk_pkt_out_cnt10_fd = bpf_map__fd(map); - - test(); - - bpf_object__close(obj); - cleanup_cgroup_environment(); - - printf("PASS\n"); - - return 0; -} diff --git a/tools/testing/selftests/bpf/test_socket_cookie.c b/tools/testing/selftests/bpf/test_socket_cookie.c index 154a8fd2a48d..ca7ca87e91aa 100644 --- a/tools/testing/selftests/bpf/test_socket_cookie.c +++ b/tools/testing/selftests/bpf/test_socket_cookie.c @@ -151,7 +151,7 @@ static int run_test(int cgfd) } bpf_object__for_each_program(prog, pobj) { - prog_name = bpf_program__title(prog, /*needs_copy*/ false); + prog_name = bpf_program__section_name(prog); if (libbpf_attach_type_by_name(prog_name, &attach_type)) goto err; diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c index 9b6fb00dc7a0..0fa1e421c3d7 100644 --- a/tools/testing/selftests/bpf/test_sockmap.c +++ b/tools/testing/selftests/bpf/test_sockmap.c @@ -86,6 +86,7 @@ int txmsg_ktls_skb_redir; int ktls; int peek_flag; int skb_use_parser; +int txmsg_omit_skb_parser; static const struct option long_options[] = { {"help", no_argument, NULL, 'h' }, @@ -111,6 +112,7 @@ static const struct option long_options[] = { {"txmsg_redir_skb", no_argument, &txmsg_redir_skb, 1 }, {"ktls", no_argument, &ktls, 1 }, {"peek", no_argument, &peek_flag, 1 }, + {"txmsg_omit_skb_parser", no_argument, &txmsg_omit_skb_parser, 1}, {"whitelist", required_argument, NULL, 'n' }, {"blacklist", required_argument, NULL, 'b' }, {0, 0, NULL, 0 } @@ -175,6 +177,7 @@ static void test_reset(void) txmsg_apply = txmsg_cork = 0; txmsg_ingress = txmsg_redir_skb = 0; txmsg_ktls_skb = txmsg_ktls_skb_drop = txmsg_ktls_skb_redir = 0; + txmsg_omit_skb_parser = 0; skb_use_parser = 0; } @@ -518,28 +521,13 @@ static int msg_verify_data(struct msghdr *msg, int size, int chunk_sz) if (i == 0 && txmsg_ktls_skb) { if (msg->msg_iov[i].iov_len < 4) return -EIO; - if (txmsg_ktls_skb_redir) { - if (memcmp(&d[13], "PASS", 4) != 0) { - fprintf(stderr, - "detected redirect ktls_skb data error with skb ingress update @iov[%i]:%i \"%02x %02x %02x %02x\" != \"PASS\"\n", i, 0, d[13], d[14], d[15], d[16]); - return -EIO; - } - d[13] = 0; - d[14] = 1; - d[15] = 2; - d[16] = 3; - j = 13; - } else if (txmsg_ktls_skb) { - if (memcmp(d, "PASS", 4) != 0) { - fprintf(stderr, - "detected ktls_skb data error with skb ingress update @iov[%i]:%i \"%02x %02x %02x %02x\" != \"PASS\"\n", i, 0, d[0], d[1], d[2], d[3]); - return -EIO; - } - d[0] = 0; - d[1] = 1; - d[2] = 2; - d[3] = 3; + if (memcmp(d, "PASS", 4) != 0) { + fprintf(stderr, + "detected skb data error with skb ingress update @iov[%i]:%i \"%02x %02x %02x %02x\" != \"PASS\"\n", + i, 0, d[0], d[1], d[2], d[3]); + return -EIO; } + j = 4; /* advance index past PASS header */ } for (; j < msg->msg_iov[i].iov_len && size; j++) { @@ -927,13 +915,15 @@ static int run_options(struct sockmap_options *options, int cg_fd, int test) goto run; /* Attach programs to sockmap */ - err = bpf_prog_attach(prog_fd[0], map_fd[0], - BPF_SK_SKB_STREAM_PARSER, 0); - if (err) { - fprintf(stderr, - "ERROR: bpf_prog_attach (sockmap %i->%i): %d (%s)\n", - prog_fd[0], map_fd[0], err, strerror(errno)); - return err; + if (!txmsg_omit_skb_parser) { + err = bpf_prog_attach(prog_fd[0], map_fd[0], + BPF_SK_SKB_STREAM_PARSER, 0); + if (err) { + fprintf(stderr, + "ERROR: bpf_prog_attach (sockmap %i->%i): %d (%s)\n", + prog_fd[0], map_fd[0], err, strerror(errno)); + return err; + } } err = bpf_prog_attach(prog_fd[1], map_fd[0], @@ -946,13 +936,15 @@ static int run_options(struct sockmap_options *options, int cg_fd, int test) /* Attach programs to TLS sockmap */ if (txmsg_ktls_skb) { - err = bpf_prog_attach(prog_fd[0], map_fd[8], - BPF_SK_SKB_STREAM_PARSER, 0); - if (err) { - fprintf(stderr, - "ERROR: bpf_prog_attach (TLS sockmap %i->%i): %d (%s)\n", - prog_fd[0], map_fd[8], err, strerror(errno)); - return err; + if (!txmsg_omit_skb_parser) { + err = bpf_prog_attach(prog_fd[0], map_fd[8], + BPF_SK_SKB_STREAM_PARSER, 0); + if (err) { + fprintf(stderr, + "ERROR: bpf_prog_attach (TLS sockmap %i->%i): %d (%s)\n", + prog_fd[0], map_fd[8], err, strerror(errno)); + return err; + } } err = bpf_prog_attach(prog_fd[2], map_fd[8], @@ -1480,12 +1472,29 @@ static void test_txmsg_skb(int cgrp, struct sockmap_options *opt) txmsg_ktls_skb_drop = 0; txmsg_ktls_skb_redir = 1; test_exec(cgrp, opt); + txmsg_ktls_skb_redir = 0; + + /* Tests that omit skb_parser */ + txmsg_omit_skb_parser = 1; + ktls = 0; + txmsg_ktls_skb = 0; + test_exec(cgrp, opt); + + txmsg_ktls_skb_drop = 1; + test_exec(cgrp, opt); + txmsg_ktls_skb_drop = 0; + + txmsg_ktls_skb_redir = 1; + test_exec(cgrp, opt); + + ktls = 1; + test_exec(cgrp, opt); + txmsg_omit_skb_parser = 0; opt->data_test = data; ktls = k; } - /* Test cork with hung data. This tests poor usage patterns where * cork can leave data on the ring if user program is buggy and * doesn't flush them somehow. They do take some time however diff --git a/tools/testing/selftests/bpf/test_tc_redirect.sh b/tools/testing/selftests/bpf/test_tc_redirect.sh new file mode 100755 index 000000000000..6d7482562140 --- /dev/null +++ b/tools/testing/selftests/bpf/test_tc_redirect.sh @@ -0,0 +1,204 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# This test sets up 3 netns (src <-> fwd <-> dst). There is no direct veth link +# between src and dst. The netns fwd has veth links to each src and dst. The +# client is in src and server in dst. The test installs a TC BPF program to each +# host facing veth in fwd which calls into i) bpf_redirect_neigh() to perform the +# neigh addr population and redirect or ii) bpf_redirect_peer() for namespace +# switch from ingress side; it also installs a checker prog on the egress side +# to drop unexpected traffic. + +if [[ $EUID -ne 0 ]]; then + echo "This script must be run as root" + echo "FAIL" + exit 1 +fi + +# check that needed tools are present +command -v nc >/dev/null 2>&1 || \ + { echo >&2 "nc is not available"; exit 1; } +command -v dd >/dev/null 2>&1 || \ + { echo >&2 "dd is not available"; exit 1; } +command -v timeout >/dev/null 2>&1 || \ + { echo >&2 "timeout is not available"; exit 1; } +command -v ping >/dev/null 2>&1 || \ + { echo >&2 "ping is not available"; exit 1; } +command -v ping6 >/dev/null 2>&1 || \ + { echo >&2 "ping6 is not available"; exit 1; } +command -v perl >/dev/null 2>&1 || \ + { echo >&2 "perl is not available"; exit 1; } +command -v jq >/dev/null 2>&1 || \ + { echo >&2 "jq is not available"; exit 1; } +command -v bpftool >/dev/null 2>&1 || \ + { echo >&2 "bpftool is not available"; exit 1; } + +readonly GREEN='\033[0;92m' +readonly RED='\033[0;31m' +readonly NC='\033[0m' # No Color + +readonly PING_ARG="-c 3 -w 10 -q" + +readonly TIMEOUT=10 + +readonly NS_SRC="ns-src-$(mktemp -u XXXXXX)" +readonly NS_FWD="ns-fwd-$(mktemp -u XXXXXX)" +readonly NS_DST="ns-dst-$(mktemp -u XXXXXX)" + +readonly IP4_SRC="172.16.1.100" +readonly IP4_DST="172.16.2.100" + +readonly IP6_SRC="::1:dead:beef:cafe" +readonly IP6_DST="::2:dead:beef:cafe" + +readonly IP4_SLL="169.254.0.1" +readonly IP4_DLL="169.254.0.2" +readonly IP4_NET="169.254.0.0" + +netns_cleanup() +{ + ip netns del ${NS_SRC} + ip netns del ${NS_FWD} + ip netns del ${NS_DST} +} + +netns_setup() +{ + ip netns add "${NS_SRC}" + ip netns add "${NS_FWD}" + ip netns add "${NS_DST}" + + ip link add veth_src type veth peer name veth_src_fwd + ip link add veth_dst type veth peer name veth_dst_fwd + + ip link set veth_src netns ${NS_SRC} + ip link set veth_src_fwd netns ${NS_FWD} + + ip link set veth_dst netns ${NS_DST} + ip link set veth_dst_fwd netns ${NS_FWD} + + ip -netns ${NS_SRC} addr add ${IP4_SRC}/32 dev veth_src + ip -netns ${NS_DST} addr add ${IP4_DST}/32 dev veth_dst + + # The fwd netns automatically get a v6 LL address / routes, but also + # needs v4 one in order to start ARP probing. IP4_NET route is added + # to the endpoints so that the ARP processing will reply. + + ip -netns ${NS_FWD} addr add ${IP4_SLL}/32 dev veth_src_fwd + ip -netns ${NS_FWD} addr add ${IP4_DLL}/32 dev veth_dst_fwd + + ip -netns ${NS_SRC} addr add ${IP6_SRC}/128 dev veth_src nodad + ip -netns ${NS_DST} addr add ${IP6_DST}/128 dev veth_dst nodad + + ip -netns ${NS_SRC} link set dev veth_src up + ip -netns ${NS_FWD} link set dev veth_src_fwd up + + ip -netns ${NS_DST} link set dev veth_dst up + ip -netns ${NS_FWD} link set dev veth_dst_fwd up + + ip -netns ${NS_SRC} route add ${IP4_DST}/32 dev veth_src scope global + ip -netns ${NS_SRC} route add ${IP4_NET}/16 dev veth_src scope global + ip -netns ${NS_FWD} route add ${IP4_SRC}/32 dev veth_src_fwd scope global + + ip -netns ${NS_SRC} route add ${IP6_DST}/128 dev veth_src scope global + ip -netns ${NS_FWD} route add ${IP6_SRC}/128 dev veth_src_fwd scope global + + ip -netns ${NS_DST} route add ${IP4_SRC}/32 dev veth_dst scope global + ip -netns ${NS_DST} route add ${IP4_NET}/16 dev veth_dst scope global + ip -netns ${NS_FWD} route add ${IP4_DST}/32 dev veth_dst_fwd scope global + + ip -netns ${NS_DST} route add ${IP6_SRC}/128 dev veth_dst scope global + ip -netns ${NS_FWD} route add ${IP6_DST}/128 dev veth_dst_fwd scope global + + fmac_src=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_src_fwd/address) + fmac_dst=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_dst_fwd/address) + + ip -netns ${NS_SRC} neigh add ${IP4_DST} dev veth_src lladdr $fmac_src + ip -netns ${NS_DST} neigh add ${IP4_SRC} dev veth_dst lladdr $fmac_dst + + ip -netns ${NS_SRC} neigh add ${IP6_DST} dev veth_src lladdr $fmac_src + ip -netns ${NS_DST} neigh add ${IP6_SRC} dev veth_dst lladdr $fmac_dst +} + +netns_test_connectivity() +{ + set +e + + ip netns exec ${NS_DST} bash -c "nc -4 -l -p 9004 &" + ip netns exec ${NS_DST} bash -c "nc -6 -l -p 9006 &" + + TEST="TCPv4 connectivity test" + ip netns exec ${NS_SRC} bash -c "timeout ${TIMEOUT} dd if=/dev/zero bs=1000 count=100 > /dev/tcp/${IP4_DST}/9004" + if [ $? -ne 0 ]; then + echo -e "${TEST}: ${RED}FAIL${NC}" + exit 1 + fi + echo -e "${TEST}: ${GREEN}PASS${NC}" + + TEST="TCPv6 connectivity test" + ip netns exec ${NS_SRC} bash -c "timeout ${TIMEOUT} dd if=/dev/zero bs=1000 count=100 > /dev/tcp/${IP6_DST}/9006" + if [ $? -ne 0 ]; then + echo -e "${TEST}: ${RED}FAIL${NC}" + exit 1 + fi + echo -e "${TEST}: ${GREEN}PASS${NC}" + + TEST="ICMPv4 connectivity test" + ip netns exec ${NS_SRC} ping $PING_ARG ${IP4_DST} + if [ $? -ne 0 ]; then + echo -e "${TEST}: ${RED}FAIL${NC}" + exit 1 + fi + echo -e "${TEST}: ${GREEN}PASS${NC}" + + TEST="ICMPv6 connectivity test" + ip netns exec ${NS_SRC} ping6 $PING_ARG ${IP6_DST} + if [ $? -ne 0 ]; then + echo -e "${TEST}: ${RED}FAIL${NC}" + exit 1 + fi + echo -e "${TEST}: ${GREEN}PASS${NC}" + + set -e +} + +hex_mem_str() +{ + perl -e 'print join(" ", unpack("(H2)8", pack("L", @ARGV)))' $1 +} + +netns_setup_bpf() +{ + local obj=$1 + + ip netns exec ${NS_FWD} tc qdisc add dev veth_src_fwd clsact + ip netns exec ${NS_FWD} tc filter add dev veth_src_fwd ingress bpf da obj $obj sec src_ingress + ip netns exec ${NS_FWD} tc filter add dev veth_src_fwd egress bpf da obj $obj sec chk_egress + + ip netns exec ${NS_FWD} tc qdisc add dev veth_dst_fwd clsact + ip netns exec ${NS_FWD} tc filter add dev veth_dst_fwd ingress bpf da obj $obj sec dst_ingress + ip netns exec ${NS_FWD} tc filter add dev veth_dst_fwd egress bpf da obj $obj sec chk_egress + + veth_src=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_src_fwd/ifindex) + veth_dst=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_dst_fwd/ifindex) + + progs=$(ip netns exec ${NS_FWD} bpftool net --json | jq -r '.[] | .tc | map(.id) | .[]') + for prog in $progs; do + map=$(bpftool prog show id $prog --json | jq -r '.map_ids | .? | .[]') + if [ ! -z "$map" ]; then + bpftool map update id $map key hex $(hex_mem_str 0) value hex $(hex_mem_str $veth_src) + bpftool map update id $map key hex $(hex_mem_str 1) value hex $(hex_mem_str $veth_dst) + fi + done +} + +trap netns_cleanup EXIT +set -e + +netns_setup +netns_setup_bpf test_tc_neigh.o +netns_test_connectivity +netns_cleanup +netns_setup +netns_setup_bpf test_tc_peer.o +netns_test_connectivity diff --git a/tools/testing/selftests/bpf/test_tcp_hdr_options.h b/tools/testing/selftests/bpf/test_tcp_hdr_options.h new file mode 100644 index 000000000000..6118e3ab61fc --- /dev/null +++ b/tools/testing/selftests/bpf/test_tcp_hdr_options.h @@ -0,0 +1,152 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (c) 2020 Facebook */ + +#ifndef _TEST_TCP_HDR_OPTIONS_H +#define _TEST_TCP_HDR_OPTIONS_H + +struct bpf_test_option { + __u8 flags; + __u8 max_delack_ms; + __u8 rand; +} __attribute__((packed)); + +enum { + OPTION_RESEND, + OPTION_MAX_DELACK_MS, + OPTION_RAND, + __NR_OPTION_FLAGS, +}; + +#define OPTION_F_RESEND (1 << OPTION_RESEND) +#define OPTION_F_MAX_DELACK_MS (1 << OPTION_MAX_DELACK_MS) +#define OPTION_F_RAND (1 << OPTION_RAND) +#define OPTION_MASK ((1 << __NR_OPTION_FLAGS) - 1) + +#define TEST_OPTION_FLAGS(flags, option) (1 & ((flags) >> (option))) +#define SET_OPTION_FLAGS(flags, option) ((flags) |= (1 << (option))) + +/* Store in bpf_sk_storage */ +struct hdr_stg { + bool active; + bool resend_syn; /* active side only */ + bool syncookie; /* passive side only */ + bool fastopen; /* passive side only */ +}; + +struct linum_err { + unsigned int linum; + int err; +}; + +#define TCPHDR_FIN 0x01 +#define TCPHDR_SYN 0x02 +#define TCPHDR_RST 0x04 +#define TCPHDR_PSH 0x08 +#define TCPHDR_ACK 0x10 +#define TCPHDR_URG 0x20 +#define TCPHDR_ECE 0x40 +#define TCPHDR_CWR 0x80 +#define TCPHDR_SYNACK (TCPHDR_SYN | TCPHDR_ACK) + +#define TCPOPT_EOL 0 +#define TCPOPT_NOP 1 +#define TCPOPT_WINDOW 3 +#define TCPOPT_EXP 254 + +#define TCP_BPF_EXPOPT_BASE_LEN 4 +#define MAX_TCP_HDR_LEN 60 +#define MAX_TCP_OPTION_SPACE 40 + +#ifdef BPF_PROG_TEST_TCP_HDR_OPTIONS + +#define CG_OK 1 +#define CG_ERR 0 + +#ifndef SOL_TCP +#define SOL_TCP 6 +#endif + +struct tcp_exprm_opt { + __u8 kind; + __u8 len; + __u16 magic; + union { + __u8 data[4]; + __u32 data32; + }; +} __attribute__((packed)); + +struct tcp_opt { + __u8 kind; + __u8 len; + union { + __u8 data[4]; + __u32 data32; + }; +} __attribute__((packed)); + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 2); + __type(key, int); + __type(value, struct linum_err); +} lport_linum_map SEC(".maps"); + +static inline unsigned int tcp_hdrlen(const struct tcphdr *th) +{ + return th->doff << 2; +} + +static inline __u8 skops_tcp_flags(const struct bpf_sock_ops *skops) +{ + return skops->skb_tcp_flags; +} + +static inline void clear_hdr_cb_flags(struct bpf_sock_ops *skops) +{ + bpf_sock_ops_cb_flags_set(skops, + skops->bpf_sock_ops_cb_flags & + ~(BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG | + BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG)); +} + +static inline void set_hdr_cb_flags(struct bpf_sock_ops *skops, __u32 extra) +{ + bpf_sock_ops_cb_flags_set(skops, + skops->bpf_sock_ops_cb_flags | + BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG | + BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG | + extra); +} +static inline void +clear_parse_all_hdr_cb_flags(struct bpf_sock_ops *skops) +{ + bpf_sock_ops_cb_flags_set(skops, + skops->bpf_sock_ops_cb_flags & + ~BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG); +} + +static inline void +set_parse_all_hdr_cb_flags(struct bpf_sock_ops *skops) +{ + bpf_sock_ops_cb_flags_set(skops, + skops->bpf_sock_ops_cb_flags | + BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG); +} + +#define RET_CG_ERR(__err) ({ \ + struct linum_err __linum_err; \ + int __lport; \ + \ + __linum_err.linum = __LINE__; \ + __linum_err.err = __err; \ + __lport = skops->local_port; \ + bpf_map_update_elem(&lport_linum_map, &__lport, &__linum_err, BPF_NOEXIST); \ + clear_hdr_cb_flags(skops); \ + clear_parse_all_hdr_cb_flags(skops); \ + return CG_ERR; \ +}) + +#endif /* BPF_PROG_TEST_TCP_HDR_OPTIONS */ + +#endif /* _TEST_TCP_HDR_OPTIONS_H */ diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index 78a6bae56ea6..9be395d9dc64 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -114,6 +114,7 @@ struct bpf_test { bpf_testdata_struct_t retvals[MAX_TEST_RUNS]; }; enum bpf_attach_type expected_attach_type; + const char *kfunc; }; /* Note we want this to be 64 bit aligned so that the end of our array is @@ -984,8 +985,24 @@ static void do_test_single(struct bpf_test *test, bool unpriv, attr.log_level = 4; attr.prog_flags = pflags; + if (prog_type == BPF_PROG_TYPE_TRACING && test->kfunc) { + attr.attach_btf_id = libbpf_find_vmlinux_btf_id(test->kfunc, + attr.expected_attach_type); + if (attr.attach_btf_id < 0) { + printf("FAIL\nFailed to find BTF ID for '%s'!\n", + test->kfunc); + (*errors)++; + return; + } + } + fd_prog = bpf_load_program_xattr(&attr, bpf_vlog, sizeof(bpf_vlog)); - if (fd_prog < 0 && !bpf_probe_prog_type(prog_type, 0)) { + + /* BPF_PROG_TYPE_TRACING requires more setup and + * bpf_probe_prog_type won't give correct answer + */ + if (fd_prog < 0 && prog_type != BPF_PROG_TYPE_TRACING && + !bpf_probe_prog_type(prog_type, 0)) { printf("SKIP (unsupported program type %d)\n", prog_type); skips++; goto close_fds; diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c index 4d0e913bbb22..1bbd1d9830c8 100644 --- a/tools/testing/selftests/bpf/trace_helpers.c +++ b/tools/testing/selftests/bpf/trace_helpers.c @@ -90,6 +90,33 @@ long ksym_get_addr(const char *name) return 0; } +/* open kallsyms and read symbol addresses on the fly. Without caching all symbols, + * this is faster than load + find. + */ +int kallsyms_find(const char *sym, unsigned long long *addr) +{ + char type, name[500]; + unsigned long long value; + int err = 0; + FILE *f; + + f = fopen("/proc/kallsyms", "r"); + if (!f) + return -EINVAL; + + while (fscanf(f, "%llx %c %499s%*[^\n]\n", &value, &type, name) > 0) { + if (strcmp(name, sym) == 0) { + *addr = value; + goto out; + } + } + err = -ENOENT; + +out: + fclose(f); + return err; +} + void read_trace_pipe(void) { int trace_fd; diff --git a/tools/testing/selftests/bpf/trace_helpers.h b/tools/testing/selftests/bpf/trace_helpers.h index 25ef597dd03f..f62fdef9e589 100644 --- a/tools/testing/selftests/bpf/trace_helpers.h +++ b/tools/testing/selftests/bpf/trace_helpers.h @@ -12,6 +12,10 @@ struct ksym { int load_kallsyms(void); struct ksym *ksym_search(long key); long ksym_get_addr(const char *name); + +/* open kallsyms and find addresses on the fly, faster than load + search. */ +int kallsyms_find(const char *sym, unsigned long long *addr); + void read_trace_pipe(void); #endif diff --git a/tools/testing/selftests/bpf/verifier/and.c b/tools/testing/selftests/bpf/verifier/and.c index d781bc86e100..ca8fdb1b3f01 100644 --- a/tools/testing/selftests/bpf/verifier/and.c +++ b/tools/testing/selftests/bpf/verifier/and.c @@ -48,3 +48,19 @@ .result = REJECT, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, +{ + "check known subreg with unknown reg", + .insns = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32), + BPF_ALU64_IMM(BPF_LSH, BPF_REG_0, 32), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1), + BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xFFFF1234), + /* Upper bits are unknown but AND above masks out 1 zero'ing lower bits */ + BPF_JMP32_IMM(BPF_JLT, BPF_REG_0, 1, 1), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, 512), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .retval = 0 +}, diff --git a/tools/testing/selftests/bpf/verifier/basic.c b/tools/testing/selftests/bpf/verifier/basic.c index b8d18642653a..de84f0d57082 100644 --- a/tools/testing/selftests/bpf/verifier/basic.c +++ b/tools/testing/selftests/bpf/verifier/basic.c @@ -2,7 +2,7 @@ "empty prog", .insns = { }, - .errstr = "unknown opcode 00", + .errstr = "last insn is not an exit or jmp", .result = REJECT, }, { diff --git a/tools/testing/selftests/bpf/verifier/bounds.c b/tools/testing/selftests/bpf/verifier/bounds.c index 4d6645f2874c..dac40de3f868 100644 --- a/tools/testing/selftests/bpf/verifier/bounds.c +++ b/tools/testing/selftests/bpf/verifier/bounds.c @@ -557,3 +557,149 @@ .result = ACCEPT, .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, +{ + "bounds check for reg = 0, reg xor 1", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_1, 0), + BPF_ALU64_IMM(BPF_XOR, BPF_REG_1, 1), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + .result = ACCEPT, +}, +{ + "bounds check for reg32 = 0, reg32 xor 1", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV32_IMM(BPF_REG_1, 0), + BPF_ALU32_IMM(BPF_XOR, BPF_REG_1, 1), + BPF_JMP32_IMM(BPF_JNE, BPF_REG_1, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + .result = ACCEPT, +}, +{ + "bounds check for reg = 2, reg xor 3", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_1, 2), + BPF_ALU64_IMM(BPF_XOR, BPF_REG_1, 3), + BPF_JMP_IMM(BPF_JGT, BPF_REG_1, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + .result = ACCEPT, +}, +{ + "bounds check for reg = any, reg xor 3", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0), + BPF_ALU64_IMM(BPF_XOR, BPF_REG_1, 3), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + .result = REJECT, + .errstr = "invalid access to map value", + .errstr_unpriv = "invalid access to map value", +}, +{ + "bounds check for reg32 = any, reg32 xor 3", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0), + BPF_ALU32_IMM(BPF_XOR, BPF_REG_1, 3), + BPF_JMP32_IMM(BPF_JNE, BPF_REG_1, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + .result = REJECT, + .errstr = "invalid access to map value", + .errstr_unpriv = "invalid access to map value", +}, +{ + "bounds check for reg > 0, reg xor 3", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JLE, BPF_REG_1, 0, 3), + BPF_ALU64_IMM(BPF_XOR, BPF_REG_1, 3), + BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + .result = ACCEPT, +}, +{ + "bounds check for reg32 > 0, reg32 xor 3", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0), + BPF_JMP32_IMM(BPF_JLE, BPF_REG_1, 0, 3), + BPF_ALU32_IMM(BPF_XOR, BPF_REG_1, 3), + BPF_JMP32_IMM(BPF_JGE, BPF_REG_1, 0, 1), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + .result = ACCEPT, +}, diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c index 94258c6b5235..c4f5d909e58a 100644 --- a/tools/testing/selftests/bpf/verifier/calls.c +++ b/tools/testing/selftests/bpf/verifier/calls.c @@ -647,13 +647,14 @@ .result = REJECT, }, { - "calls: ld_abs with changing ctx data in callee", + "calls: subprog call with ld_abs in main prog", .insns = { BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), BPF_LD_ABS(BPF_B, 0), BPF_LD_ABS(BPF_H, 0), BPF_LD_ABS(BPF_W, 0), BPF_MOV64_REG(BPF_REG_7, BPF_REG_6), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 5), BPF_MOV64_REG(BPF_REG_6, BPF_REG_7), BPF_LD_ABS(BPF_B, 0), @@ -666,8 +667,7 @@ BPF_EXIT_INSN(), }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, - .errstr = "BPF_LD_[ABS|IND] instructions cannot be mixed", - .result = REJECT, + .result = ACCEPT, }, { "calls: two calls with bad fallthrough", diff --git a/tools/testing/selftests/bpf/verifier/d_path.c b/tools/testing/selftests/bpf/verifier/d_path.c new file mode 100644 index 000000000000..b988396379a7 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/d_path.c @@ -0,0 +1,37 @@ +{ + "d_path accept", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_MOV64_IMM(BPF_REG_6, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_6, 0), + BPF_LD_IMM64(BPF_REG_3, 8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_d_path), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACING, + .expected_attach_type = BPF_TRACE_FENTRY, + .kfunc = "dentry_open", +}, +{ + "d_path reject", + .insns = { + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_MOV64_IMM(BPF_REG_6, 0), + BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_6, 0), + BPF_LD_IMM64(BPF_REG_3, 8), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_d_path), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .errstr = "helper call is not allowed in probe", + .result = REJECT, + .prog_type = BPF_PROG_TYPE_TRACING, + .expected_attach_type = BPF_TRACE_FENTRY, + .kfunc = "d_path", +}, diff --git a/tools/testing/selftests/bpf/verifier/direct_packet_access.c b/tools/testing/selftests/bpf/verifier/direct_packet_access.c index 2c5fbe7bcd27..ae72536603fe 100644 --- a/tools/testing/selftests/bpf/verifier/direct_packet_access.c +++ b/tools/testing/selftests/bpf/verifier/direct_packet_access.c @@ -529,7 +529,7 @@ }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = REJECT, - .errstr = "invalid access to packet, off=0 size=8, R5(id=1,off=0,r=0)", + .errstr = "invalid access to packet, off=0 size=8, R5(id=2,off=0,r=0)", .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS, }, { diff --git a/tools/testing/selftests/bpf/verifier/ld_imm64.c b/tools/testing/selftests/bpf/verifier/ld_imm64.c index 3856dba733e9..f9297900cea6 100644 --- a/tools/testing/selftests/bpf/verifier/ld_imm64.c +++ b/tools/testing/selftests/bpf/verifier/ld_imm64.c @@ -51,14 +51,6 @@ .result = REJECT, }, { - "test5 ld_imm64", - .insns = { - BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 0), - }, - .errstr = "invalid bpf_ld_imm64 insn", - .result = REJECT, -}, -{ "test6 ld_imm64", .insns = { BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 0), diff --git a/tools/testing/selftests/bpf/verifier/map_ptr.c b/tools/testing/selftests/bpf/verifier/map_ptr.c index b52209db8250..637f9293bda8 100644 --- a/tools/testing/selftests/bpf/verifier/map_ptr.c +++ b/tools/testing/selftests/bpf/verifier/map_ptr.c @@ -60,3 +60,35 @@ .result = ACCEPT, .retval = 1, }, +{ + "bpf_map_ptr: r = 0, map_ptr = map_ptr + r", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_16b = { 4 }, + .result = ACCEPT, +}, +{ + "bpf_map_ptr: r = 0, r = r + map_ptr", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_MOV64_IMM(BPF_REG_1, 0), + BPF_LD_MAP_FD(BPF_REG_0, 0), + BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_16b = { 4 }, + .result = ACCEPT, +}, diff --git a/tools/testing/selftests/bpf/verifier/ref_tracking.c b/tools/testing/selftests/bpf/verifier/ref_tracking.c index 056e0273bf12..006b5bd99c08 100644 --- a/tools/testing/selftests/bpf/verifier/ref_tracking.c +++ b/tools/testing/selftests/bpf/verifier/ref_tracking.c @@ -854,3 +854,50 @@ .errstr = "Unreleased reference", .result = REJECT, }, +{ + "reference tracking: bpf_sk_release(btf_tcp_sock)", + .insns = { + BPF_SK_LOOKUP(sk_lookup_tcp), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_skc_to_tcp_sock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = ACCEPT, + .result_unpriv = REJECT, + .errstr_unpriv = "unknown func", +}, +{ + "reference tracking: use ptr from bpf_skc_to_tcp_sock() after release", + .insns = { + BPF_SK_LOOKUP(sk_lookup_tcp), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_skc_to_tcp_sock), + BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_sk_release), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_7, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = REJECT, + .errstr = "invalid mem access", + .result_unpriv = REJECT, + .errstr_unpriv = "unknown func", +}, diff --git a/tools/testing/selftests/bpf/verifier/regalloc.c b/tools/testing/selftests/bpf/verifier/regalloc.c new file mode 100644 index 000000000000..4ad7e05de706 --- /dev/null +++ b/tools/testing/selftests/bpf/verifier/regalloc.c @@ -0,0 +1,269 @@ +{ + "regalloc basic", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 20, 4), + BPF_JMP_IMM(BPF_JSLT, BPF_REG_2, 0, 3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_2), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 4 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "regalloc negative", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 24, 4), + BPF_JMP_IMM(BPF_JSLT, BPF_REG_2, 0, 3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_2), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_7, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 4 }, + .result = REJECT, + .errstr = "invalid access to map value, value_size=48 off=48 size=1", + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "regalloc src_reg mark", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 20, 5), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_JMP_REG(BPF_JSGE, BPF_REG_3, BPF_REG_2, 3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_2), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 4 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "regalloc src_reg negative", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 22, 5), + BPF_MOV64_IMM(BPF_REG_3, 0), + BPF_JMP_REG(BPF_JSGE, BPF_REG_3, BPF_REG_2, 3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_2), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 4 }, + .result = REJECT, + .errstr = "invalid access to map value, value_size=48 off=44 size=8", + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "regalloc and spill", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 20, 7), + /* r0 has upper bound that should propagate into r2 */ + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -8), /* spill r2 */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_2, 0), /* clear r0 and r2 */ + BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_10, -8), /* fill r3 */ + BPF_JMP_REG(BPF_JSGE, BPF_REG_0, BPF_REG_3, 2), + /* r3 has lower and upper bounds */ + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_3), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 4 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "regalloc and spill negative", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 48, 7), + /* r0 has upper bound that should propagate into r2 */ + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -8), /* spill r2 */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_MOV64_IMM(BPF_REG_2, 0), /* clear r0 and r2 */ + BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_10, -8), /* fill r3 */ + BPF_JMP_REG(BPF_JSGE, BPF_REG_0, BPF_REG_3, 2), + /* r3 has lower and upper bounds */ + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_3), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 4 }, + .result = REJECT, + .errstr = "invalid access to map value, value_size=48 off=48 size=8", + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "regalloc three regs", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_4, BPF_REG_2), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_0, 12, 5), + BPF_JMP_IMM(BPF_JSLT, BPF_REG_2, 0, 4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_0), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_2), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_4), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 4 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "regalloc after call", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), + BPF_MOV64_REG(BPF_REG_8, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_9, BPF_REG_0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 6), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_8, 20, 4), + BPF_JMP_IMM(BPF_JSLT, BPF_REG_9, 0, 3), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_8), + BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_9), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 4 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "regalloc in callee", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_3, BPF_REG_7), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 20, 5), + BPF_JMP_IMM(BPF_JSLT, BPF_REG_2, 0, 4), + BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_1), + BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_2), + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0), + BPF_EXIT_INSN(), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 4 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, +{ + "regalloc, spill, JEQ", + .insns = { + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8), /* spill r0 */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 0), + /* The verifier will walk the rest twice with r0 == 0 and r0 == map_value */ + BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_0), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_2, 20, 0), + /* The verifier will walk the rest two more times with r0 == 20 and r0 == unknown */ + BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_10, -8), /* fill r3 with map_value */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_3, 0, 1), /* skip ldx if map_value == NULL */ + /* Buggy verifier will think that r3 == 20 here */ + BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0), /* read from map_value */ + BPF_EXIT_INSN(), + }, + .fixup_map_hash_48b = { 4 }, + .result = ACCEPT, + .prog_type = BPF_PROG_TYPE_TRACEPOINT, +}, diff --git a/tools/testing/selftests/clone3/clone3.c b/tools/testing/selftests/clone3/clone3.c index b7e6dec36173..42be3b925830 100644 --- a/tools/testing/selftests/clone3/clone3.c +++ b/tools/testing/selftests/clone3/clone3.c @@ -20,13 +20,6 @@ #include "../kselftest.h" #include "clone3_selftests.h" -/* - * Different sizes of struct clone_args - */ -#ifndef CLONE3_ARGS_SIZE_V0 -#define CLONE3_ARGS_SIZE_V0 64 -#endif - enum test_mode { CLONE3_ARGS_NO_TEST, CLONE3_ARGS_ALL_0, @@ -38,13 +31,13 @@ enum test_mode { static int call_clone3(uint64_t flags, size_t size, enum test_mode test_mode) { - struct clone_args args = { + struct __clone_args args = { .flags = flags, .exit_signal = SIGCHLD, }; struct clone_args_extended { - struct clone_args args; + struct __clone_args args; __aligned_u64 excess_space[2]; } args_ext; @@ -52,11 +45,11 @@ static int call_clone3(uint64_t flags, size_t size, enum test_mode test_mode) int status; memset(&args_ext, 0, sizeof(args_ext)); - if (size > sizeof(struct clone_args)) + if (size > sizeof(struct __clone_args)) args_ext.excess_space[1] = 1; if (size == 0) - size = sizeof(struct clone_args); + size = sizeof(struct __clone_args); switch (test_mode) { case CLONE3_ARGS_ALL_0: @@ -77,9 +70,9 @@ static int call_clone3(uint64_t flags, size_t size, enum test_mode test_mode) break; } - memcpy(&args_ext.args, &args, sizeof(struct clone_args)); + memcpy(&args_ext.args, &args, sizeof(struct __clone_args)); - pid = sys_clone3((struct clone_args *)&args_ext, size); + pid = sys_clone3((struct __clone_args *)&args_ext, size); if (pid < 0) { ksft_print_msg("%s - Failed to create new process\n", strerror(errno)); @@ -144,14 +137,14 @@ int main(int argc, char *argv[]) else ksft_test_result_skip("Skipping clone3() with CLONE_NEWPID\n"); - /* Do a clone3() with CLONE3_ARGS_SIZE_V0. */ - test_clone3(0, CLONE3_ARGS_SIZE_V0, 0, CLONE3_ARGS_NO_TEST); + /* Do a clone3() with CLONE_ARGS_SIZE_VER0. */ + test_clone3(0, CLONE_ARGS_SIZE_VER0, 0, CLONE3_ARGS_NO_TEST); - /* Do a clone3() with CLONE3_ARGS_SIZE_V0 - 8 */ - test_clone3(0, CLONE3_ARGS_SIZE_V0 - 8, -EINVAL, CLONE3_ARGS_NO_TEST); + /* Do a clone3() with CLONE_ARGS_SIZE_VER0 - 8 */ + test_clone3(0, CLONE_ARGS_SIZE_VER0 - 8, -EINVAL, CLONE3_ARGS_NO_TEST); /* Do a clone3() with sizeof(struct clone_args) + 8 */ - test_clone3(0, sizeof(struct clone_args) + 8, 0, CLONE3_ARGS_NO_TEST); + test_clone3(0, sizeof(struct __clone_args) + 8, 0, CLONE3_ARGS_NO_TEST); /* Do a clone3() with exit_signal having highest 32 bits non-zero */ test_clone3(0, 0, -EINVAL, CLONE3_ARGS_INVAL_EXIT_SIGNAL_BIG); @@ -165,31 +158,31 @@ int main(int argc, char *argv[]) /* Do a clone3() with NSIG < exit_signal < CSIG */ test_clone3(0, 0, -EINVAL, CLONE3_ARGS_INVAL_EXIT_SIGNAL_NSIG); - test_clone3(0, sizeof(struct clone_args) + 8, 0, CLONE3_ARGS_ALL_0); + test_clone3(0, sizeof(struct __clone_args) + 8, 0, CLONE3_ARGS_ALL_0); - test_clone3(0, sizeof(struct clone_args) + 16, -E2BIG, + test_clone3(0, sizeof(struct __clone_args) + 16, -E2BIG, CLONE3_ARGS_ALL_0); - test_clone3(0, sizeof(struct clone_args) * 2, -E2BIG, + test_clone3(0, sizeof(struct __clone_args) * 2, -E2BIG, CLONE3_ARGS_ALL_0); /* Do a clone3() with > page size */ test_clone3(0, getpagesize() + 8, -E2BIG, CLONE3_ARGS_NO_TEST); - /* Do a clone3() with CLONE3_ARGS_SIZE_V0 in a new PID NS. */ + /* Do a clone3() with CLONE_ARGS_SIZE_VER0 in a new PID NS. */ if (uid == 0) - test_clone3(CLONE_NEWPID, CLONE3_ARGS_SIZE_V0, 0, + test_clone3(CLONE_NEWPID, CLONE_ARGS_SIZE_VER0, 0, CLONE3_ARGS_NO_TEST); else ksft_test_result_skip("Skipping clone3() with CLONE_NEWPID\n"); - /* Do a clone3() with CLONE3_ARGS_SIZE_V0 - 8 in a new PID NS */ - test_clone3(CLONE_NEWPID, CLONE3_ARGS_SIZE_V0 - 8, -EINVAL, + /* Do a clone3() with CLONE_ARGS_SIZE_VER0 - 8 in a new PID NS */ + test_clone3(CLONE_NEWPID, CLONE_ARGS_SIZE_VER0 - 8, -EINVAL, CLONE3_ARGS_NO_TEST); /* Do a clone3() with sizeof(struct clone_args) + 8 in a new PID NS */ if (uid == 0) - test_clone3(CLONE_NEWPID, sizeof(struct clone_args) + 8, 0, + test_clone3(CLONE_NEWPID, sizeof(struct __clone_args) + 8, 0, CLONE3_ARGS_NO_TEST); else ksft_test_result_skip("Skipping clone3() with CLONE_NEWPID\n"); diff --git a/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c b/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c index 9562425aa0a9..55bd387ce7ec 100644 --- a/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c +++ b/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c @@ -44,13 +44,13 @@ static int call_clone3_set_tid(struct __test_metadata *_metadata, int status; pid_t pid = -1; - struct clone_args args = { + struct __clone_args args = { .exit_signal = SIGCHLD, .set_tid = ptr_to_u64(set_tid), .set_tid_size = set_tid_size, }; - pid = sys_clone3(&args, sizeof(struct clone_args)); + pid = sys_clone3(&args, sizeof(args)); if (pid < 0) { TH_LOG("%s - Failed to create new process", strerror(errno)); return -errno; diff --git a/tools/testing/selftests/clone3/clone3_clear_sighand.c b/tools/testing/selftests/clone3/clone3_clear_sighand.c index db5fc9c5edcf..47a8c0fc3676 100644 --- a/tools/testing/selftests/clone3/clone3_clear_sighand.c +++ b/tools/testing/selftests/clone3/clone3_clear_sighand.c @@ -47,7 +47,7 @@ static void test_clone3_clear_sighand(void) { int ret; pid_t pid; - struct clone_args args = {}; + struct __clone_args args = {}; struct sigaction act; /* diff --git a/tools/testing/selftests/clone3/clone3_selftests.h b/tools/testing/selftests/clone3/clone3_selftests.h index 91c1a78ddb39..e81ffaaee02b 100644 --- a/tools/testing/selftests/clone3/clone3_selftests.h +++ b/tools/testing/selftests/clone3/clone3_selftests.h @@ -19,13 +19,11 @@ #define CLONE_INTO_CGROUP 0x200000000ULL /* Clone into a specific cgroup given the right permissions. */ #endif -#ifndef CLONE_ARGS_SIZE_VER0 -#define CLONE_ARGS_SIZE_VER0 64 -#endif - #ifndef __NR_clone3 #define __NR_clone3 -1 -struct clone_args { +#endif + +struct __clone_args { __aligned_u64 flags; __aligned_u64 pidfd; __aligned_u64 child_tid; @@ -34,15 +32,21 @@ struct clone_args { __aligned_u64 stack; __aligned_u64 stack_size; __aligned_u64 tls; -#define CLONE_ARGS_SIZE_VER1 80 +#ifndef CLONE_ARGS_SIZE_VER0 +#define CLONE_ARGS_SIZE_VER0 64 /* sizeof first published struct */ +#endif __aligned_u64 set_tid; __aligned_u64 set_tid_size; -#define CLONE_ARGS_SIZE_VER2 88 +#ifndef CLONE_ARGS_SIZE_VER1 +#define CLONE_ARGS_SIZE_VER1 80 /* sizeof second published struct */ +#endif __aligned_u64 cgroup; +#ifndef CLONE_ARGS_SIZE_VER2 +#define CLONE_ARGS_SIZE_VER2 88 /* sizeof third published struct */ +#endif }; -#endif /* __NR_clone3 */ -static pid_t sys_clone3(struct clone_args *args, size_t size) +static pid_t sys_clone3(struct __clone_args *args, size_t size) { fflush(stdout); fflush(stderr); @@ -52,7 +56,7 @@ static pid_t sys_clone3(struct clone_args *args, size_t size) static inline void test_clone3_supported(void) { pid_t pid; - struct clone_args args = {}; + struct __clone_args args = {}; if (__NR_clone3 < 0) ksft_exit_skip("clone3() syscall is not supported\n"); diff --git a/tools/testing/selftests/clone3/clone3_set_tid.c b/tools/testing/selftests/clone3/clone3_set_tid.c index 5831c1082d6d..0229e9ebb995 100644 --- a/tools/testing/selftests/clone3/clone3_set_tid.c +++ b/tools/testing/selftests/clone3/clone3_set_tid.c @@ -46,14 +46,14 @@ static int call_clone3_set_tid(pid_t *set_tid, int status; pid_t pid = -1; - struct clone_args args = { + struct __clone_args args = { .flags = flags, .exit_signal = SIGCHLD, .set_tid = ptr_to_u64(set_tid), .set_tid_size = set_tid_size, }; - pid = sys_clone3(&args, sizeof(struct clone_args)); + pid = sys_clone3(&args, sizeof(args)); if (pid < 0) { ksft_print_msg("%s - Failed to create new process\n", strerror(errno)); diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh index 47edf099a17e..508a702f0021 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh @@ -207,7 +207,7 @@ __rate_test() RET=0 - devlink trap policer set $DEVLINK_DEV policer $id rate 1000 burst 16 + devlink trap policer set $DEVLINK_DEV policer $id rate 1000 burst 512 devlink trap group set $DEVLINK_DEV group l3_drops policer $id # Send packets at highest possible rate and make sure they are dropped @@ -220,8 +220,8 @@ __rate_test() rate=$(trap_rate_get) pct=$((100 * (rate - 1000) / 1000)) - ((-5 <= pct && pct <= 5)) - check_err $? "Expected rate 1000 pps, got $rate pps, which is $pct% off. Required accuracy is +-5%" + ((-10 <= pct && pct <= 10)) + check_err $? "Expected rate 1000 pps, got $rate pps, which is $pct% off. Required accuracy is +-10%" log_info "Expected rate 1000 pps, measured rate $rate pps" drop_rate=$(policer_drop_rate_get $id) @@ -288,35 +288,12 @@ __burst_test() RET=0 - devlink trap policer set $DEVLINK_DEV policer $id rate 1000 burst 32 + devlink trap policer set $DEVLINK_DEV policer $id rate 1000 burst 512 devlink trap group set $DEVLINK_DEV group l3_drops policer $id - # Send a burst of 64 packets and make sure that about 32 are received - # and the rest are dropped by the policer - log_info "=== Tx burst size: 64, Policer burst size: 32 pps ===" - - t0_rx=$(devlink_trap_rx_packets_get blackhole_route) - t0_drop=$(devlink_trap_policer_rx_dropped_get $id) - - start_traffic $h1 192.0.2.1 198.51.100.100 $rp1_mac -c 64 - - t1_rx=$(devlink_trap_rx_packets_get blackhole_route) - t1_drop=$(devlink_trap_policer_rx_dropped_get $id) - - rx=$((t1_rx - t0_rx)) - pct=$((100 * (rx - 32) / 32)) - ((-20 <= pct && pct <= 20)) - check_err $? "Expected burst size of 32 packets, got $rx packets, which is $pct% off. Required accuracy is +-20%" - log_info "Expected burst size of 32 packets, measured burst size of $rx packets" - - drop=$((t1_drop - t0_drop)) - (( drop > 0 )) - check_err $? "Expected non-zero policer drops, got 0" - log_info "Measured policer drops of $drop packets" - # Send a burst of 16 packets and make sure that 16 are received # and that none are dropped by the policer - log_info "=== Tx burst size: 16, Policer burst size: 32 pps ===" + log_info "=== Tx burst size: 16, Policer burst size: 512 ===" t0_rx=$(devlink_trap_rx_packets_get blackhole_route) t0_drop=$(devlink_trap_policer_rx_dropped_get $id) diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh index 6d1790b5de7a..e9f8718af979 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh @@ -147,17 +147,26 @@ switch_create() # Make sure that ingress quotas are smaller than egress so that there is # room for both streams of traffic to be admitted to shared buffer. + devlink_pool_size_thtype_save 0 devlink_pool_size_thtype_set 0 dynamic 10000000 + devlink_pool_size_thtype_save 4 devlink_pool_size_thtype_set 4 dynamic 10000000 + devlink_port_pool_th_save $swp1 0 devlink_port_pool_th_set $swp1 0 6 + devlink_tc_bind_pool_th_save $swp1 1 ingress devlink_tc_bind_pool_th_set $swp1 1 ingress 0 6 + devlink_port_pool_th_save $swp2 0 devlink_port_pool_th_set $swp2 0 6 + devlink_tc_bind_pool_th_save $swp2 2 ingress devlink_tc_bind_pool_th_set $swp2 2 ingress 0 6 + devlink_tc_bind_pool_th_save $swp3 1 egress devlink_tc_bind_pool_th_set $swp3 1 egress 4 7 + devlink_tc_bind_pool_th_save $swp3 2 egress devlink_tc_bind_pool_th_set $swp3 2 egress 4 7 + devlink_port_pool_th_save $swp3 4 devlink_port_pool_th_set $swp3 4 7 } diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh new file mode 100755 index 000000000000..27de3d9ed08e --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh @@ -0,0 +1,379 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ALL_TESTS=" + test_defaults + test_dcb_ets + test_mtu + test_pfc + test_int_buf + test_tc_priomap + test_tc_mtu + test_tc_sizes + test_tc_int_buf +" + +lib_dir=$(dirname $0)/../../../net/forwarding + +NUM_NETIFS=0 +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh +source qos_lib.sh + +swp=$NETIF_NO_CABLE + +cleanup() +{ + pre_cleanup +} + +get_prio_pg() +{ + __mlnx_qos -i $swp | sed -n '/^PFC/,/^[^[:space:]]/p' | + grep buffer | sed 's/ \+/ /g' | cut -d' ' -f 2- +} + +get_prio_pfc() +{ + __mlnx_qos -i $swp | sed -n '/^PFC/,/^[^[:space:]]/p' | + grep enabled | sed 's/ \+/ /g' | cut -d' ' -f 2- +} + +get_prio_tc() +{ + __mlnx_qos -i $swp | sed -n '/^tc/,$p' | + awk '/^tc/ { TC = $2 } + /priority:/ { PRIO[$2]=TC } + END { + for (i in PRIO) + printf("%d ", PRIO[i]) + }' +} + +get_buf_size() +{ + local idx=$1; shift + + __mlnx_qos -i $swp | grep Receive | sed 's/.*: //' | cut -d, -f $((idx + 1)) +} + +get_tot_size() +{ + __mlnx_qos -i $swp | grep Receive | sed 's/.*total_size=//' +} + +check_prio_pg() +{ + local expect=$1; shift + + local current=$(get_prio_pg) + test "$current" = "$expect" + check_err $? "prio2buffer is '$current', expected '$expect'" +} + +check_prio_pfc() +{ + local expect=$1; shift + + local current=$(get_prio_pfc) + test "$current" = "$expect" + check_err $? "prio PFC is '$current', expected '$expect'" +} + +check_prio_tc() +{ + local expect=$1; shift + + local current=$(get_prio_tc) + test "$current" = "$expect" + check_err $? "prio_tc is '$current', expected '$expect'" +} + +__check_buf_size() +{ + local idx=$1; shift + local expr=$1; shift + local what=$1; shift + + local current=$(get_buf_size $idx) + ((current $expr)) + check_err $? "${what}buffer $idx size is '$current', expected '$expr'" + echo $current +} + +check_buf_size() +{ + __check_buf_size "$@" > /dev/null +} + +test_defaults() +{ + RET=0 + + check_prio_pg "0 0 0 0 0 0 0 0 " + check_prio_tc "0 0 0 0 0 0 0 0 " + check_prio_pfc "0 0 0 0 0 0 0 0 " + + log_test "Default headroom configuration" +} + +test_dcb_ets() +{ + RET=0 + + __mlnx_qos -i $swp --prio_tc=0,2,4,6,1,3,5,7 > /dev/null + + check_prio_pg "0 2 4 6 1 3 5 7 " + check_prio_tc "0 2 4 6 1 3 5 7 " + check_prio_pfc "0 0 0 0 0 0 0 0 " + + __mlnx_qos -i $swp --prio_tc=0,0,0,0,0,0,0,0 > /dev/null + + check_prio_pg "0 0 0 0 0 0 0 0 " + check_prio_tc "0 0 0 0 0 0 0 0 " + + __mlnx_qos -i $swp --prio2buffer=1,3,5,7,0,2,4,6 &> /dev/null + check_fail $? "prio2buffer accepted in DCB mode" + + log_test "Configuring headroom through ETS" +} + +test_mtu() +{ + local what=$1; shift + local buf0size_2 + local buf0size + + RET=0 + buf0size=$(__check_buf_size 0 "> 0") + + mtu_set $swp 3000 + buf0size_2=$(__check_buf_size 0 "> $buf0size" "MTU 3000: ") + mtu_restore $swp + + mtu_set $swp 6000 + check_buf_size 0 "> $buf0size_2" "MTU 6000: " + mtu_restore $swp + + check_buf_size 0 "== $buf0size" + + log_test "${what}MTU impacts buffer size" +} + +test_tc_mtu() +{ + # In TC mode, MTU still impacts the threshold below which a buffer is + # not permitted to go. + + tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M + test_mtu "TC: " + tc qdisc delete dev $swp root +} + +test_pfc() +{ + RET=0 + + __mlnx_qos -i $swp --prio_tc=0,0,0,0,0,1,2,3 > /dev/null + + local buf0size=$(get_buf_size 0) + local buf1size=$(get_buf_size 1) + local buf2size=$(get_buf_size 2) + local buf3size=$(get_buf_size 3) + check_buf_size 0 "> 0" + check_buf_size 1 "> 0" + check_buf_size 2 "> 0" + check_buf_size 3 "> 0" + check_buf_size 4 "== 0" + check_buf_size 5 "== 0" + check_buf_size 6 "== 0" + check_buf_size 7 "== 0" + + log_test "Buffer size sans PFC" + + RET=0 + + __mlnx_qos -i $swp --pfc=0,0,0,0,0,1,1,1 --cable_len=0 > /dev/null + + check_prio_pg "0 0 0 0 0 1 2 3 " + check_prio_pfc "0 0 0 0 0 1 1 1 " + check_buf_size 0 "== $buf0size" + check_buf_size 1 "> $buf1size" + check_buf_size 2 "> $buf2size" + check_buf_size 3 "> $buf3size" + + local buf1size=$(get_buf_size 1) + check_buf_size 2 "== $buf1size" + check_buf_size 3 "== $buf1size" + + log_test "PFC: Cable length 0" + + RET=0 + + __mlnx_qos -i $swp --pfc=0,0,0,0,0,1,1,1 --cable_len=1000 > /dev/null + + check_buf_size 0 "== $buf0size" + check_buf_size 1 "> $buf1size" + check_buf_size 2 "> $buf1size" + check_buf_size 3 "> $buf1size" + + log_test "PFC: Cable length 1000" + + RET=0 + + __mlnx_qos -i $swp --pfc=0,0,0,0,0,0,0,0 --cable_len=0 > /dev/null + __mlnx_qos -i $swp --prio_tc=0,0,0,0,0,0,0,0 > /dev/null + + check_prio_pg "0 0 0 0 0 0 0 0 " + check_prio_tc "0 0 0 0 0 0 0 0 " + check_buf_size 0 "> 0" + check_buf_size 1 "== 0" + check_buf_size 2 "== 0" + check_buf_size 3 "== 0" + check_buf_size 4 "== 0" + check_buf_size 5 "== 0" + check_buf_size 6 "== 0" + check_buf_size 7 "== 0" + + log_test "PFC: Restore defaults" +} + +test_tc_priomap() +{ + RET=0 + + __mlnx_qos -i $swp --prio_tc=0,1,2,3,4,5,6,7 > /dev/null + check_prio_pg "0 1 2 3 4 5 6 7 " + + tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M + check_prio_pg "0 0 0 0 0 0 0 0 " + + __mlnx_qos -i $swp --prio2buffer=1,3,5,7,0,2,4,6 > /dev/null + check_prio_pg "1 3 5 7 0 2 4 6 " + + tc qdisc delete dev $swp root + check_prio_pg "0 1 2 3 4 5 6 7 " + + # Clean up. + tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M + __mlnx_qos -i $swp --prio2buffer=0,0,0,0,0,0,0,0 > /dev/null + tc qdisc delete dev $swp root + __mlnx_qos -i $swp --prio_tc=0,0,0,0,0,0,0,0 > /dev/null + + log_test "TC: priomap" +} + +test_tc_sizes() +{ + local cell_size=$(devlink_cell_size_get) + local size=$((cell_size * 1000)) + + RET=0 + + __mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 &> /dev/null + check_fail $? "buffer_size should fail before qdisc is added" + + tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M + + __mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 > /dev/null + check_err $? "buffer_size should pass after qdisc is added" + check_buf_size 0 "== $size" "set size: " + + mtu_set $swp 6000 + check_buf_size 0 "== $size" "set MTU: " + mtu_restore $swp + + __mlnx_qos -i $swp --buffer_size=0,0,0,0,0,0,0,0 > /dev/null + + # After replacing the qdisc for the same kind, buffer_size still has to + # work. + tc qdisc replace dev $swp root handle 1: bfifo limit 1M + + __mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 > /dev/null + check_buf_size 0 "== $size" "post replace, set size: " + + __mlnx_qos -i $swp --buffer_size=0,0,0,0,0,0,0,0 > /dev/null + + # Likewise after replacing for a different kind. + tc qdisc replace dev $swp root handle 2: prio bands 8 + + __mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 > /dev/null + check_buf_size 0 "== $size" "post replace different kind, set size: " + + tc qdisc delete dev $swp root + + __mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 &> /dev/null + check_fail $? "buffer_size should fail after qdisc is deleted" + + log_test "TC: buffer size" +} + +test_int_buf() +{ + local what=$1; shift + + RET=0 + + local buf0size=$(get_buf_size 0) + local tot_size=$(get_tot_size) + + # Size of internal buffer and buffer 9. + local dsize=$((tot_size - buf0size)) + + tc qdisc add dev $swp clsact + tc filter add dev $swp egress matchall skip_sw action mirred egress mirror dev $swp + + local buf0size_2=$(get_buf_size 0) + local tot_size_2=$(get_tot_size) + local dsize_2=$((tot_size_2 - buf0size_2)) + + # Egress SPAN should have added to the "invisible" buffer configuration. + ((dsize_2 > dsize)) + check_err $? "Invisible buffers account for '$dsize_2', expected '> $dsize'" + + mtu_set $swp 3000 + + local buf0size_3=$(get_buf_size 0) + local tot_size_3=$(get_tot_size) + local dsize_3=$((tot_size_3 - buf0size_3)) + + # MTU change might change buffer 0, which will show at total, but the + # hidden buffers should stay the same size. + ((dsize_3 == dsize_2)) + check_err $? "MTU change: Invisible buffers account for '$dsize_3', expected '== $dsize_2'" + + mtu_restore $swp + tc qdisc del dev $swp clsact + + # After SPAN removal, hidden buffers should be back to the original sizes. + local buf0size_4=$(get_buf_size 0) + local tot_size_4=$(get_tot_size) + local dsize_4=$((tot_size_4 - buf0size_4)) + ((dsize_4 == dsize)) + check_err $? "SPAN removed: Invisible buffers account for '$dsize_4', expected '== $dsize'" + + log_test "${what}internal buffer size" +} + +test_tc_int_buf() +{ + local cell_size=$(devlink_cell_size_get) + local size=$((cell_size * 1000)) + + tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M + test_int_buf "TC: " + + __mlnx_qos -i $swp --buffer_size=$size,0,0,0,0,0,0,0 > /dev/null + test_int_buf "TC+buffsize: " + + __mlnx_qos -i $swp --buffer_size=0,0,0,0,0,0,0,0 > /dev/null + tc qdisc delete dev $swp root +} + +trap cleanup EXIT + +bail_on_lldpad +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh index faa51012cdac..0bf76f13c030 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh @@ -82,3 +82,17 @@ bail_on_lldpad() fi fi } + +__mlnx_qos() +{ + local err + + mlnx_qos "$@" 2>/dev/null + err=$? + + if ((err)); then + echo "Error ($err) in mlnx_qos $@" >/dev/stderr + fi + + return $err +} diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh index b025daea062d..8f164c80e215 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh @@ -145,12 +145,17 @@ switch_create() # Make sure that ingress quotas are smaller than egress so that there is # room for both streams of traffic to be admitted to shared buffer. + devlink_port_pool_th_save $swp1 0 devlink_port_pool_th_set $swp1 0 5 + devlink_tc_bind_pool_th_save $swp1 0 ingress devlink_tc_bind_pool_th_set $swp1 0 ingress 0 5 + devlink_port_pool_th_save $swp2 0 devlink_port_pool_th_set $swp2 0 5 + devlink_tc_bind_pool_th_save $swp2 1 ingress devlink_tc_bind_pool_th_set $swp2 1 ingress 0 5 + devlink_port_pool_th_save $swp3 4 devlink_port_pool_th_set $swp3 4 12 } diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh new file mode 100755 index 000000000000..4d900bc1f76c --- /dev/null +++ b/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh @@ -0,0 +1,403 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# This test injects a 10-MB burst of traffic with VLAN tag and 802.1p priority +# of 1. This stream is consistently prioritized as priority 1, is put to PG +# buffer 1, and scheduled at TC 1. +# +# - the stream first ingresses through $swp1, where it is forwarded to $swp3 +# +# - then it ingresses through $swp4. Here it is put to a lossless buffer and put +# to a small pool ("PFC pool"). The traffic is forwarded to $swp2, which is +# shaped, and thus the PFC pool eventually fills, therefore the headroom +# fills, and $swp3 is paused. +# +# - since $swp3 now can't send traffic, the traffic ingressing $swp1 is kept at +# a pool ("overflow pool"). The overflow pool needs to be large enough to +# contain the whole burst. +# +# - eventually the PFC pool gets some traffic out, headroom therefore gets some +# traffic to the pool, and $swp3 is unpaused again. This way the traffic is +# gradually forwarded from the overflow pool, through the PFC pool, out of +# $swp2, and eventually to $h2. +# +# - if PFC works, all lossless flow packets that ingress through $swp1 should +# also be seen ingressing $h2. If it doesn't, there will be drops due to +# discrepancy between the speeds of $swp1 and $h2. +# +# - it should all play out relatively quickly, so that SLL and HLL will not +# cause drops. +# +# +-----------------------+ +# | H1 | +# | + $h1.111 | +# | | 192.0.2.33/28 | +# | | | +# | + $h1 | +# +---|-------------------+ +--------------------+ +# | | | +# +---|----------------------|--------------------|---------------------------+ +# | + $swp1 $swp3 + + $swp4 | +# | | iPOOL1 iPOOL0 | | iPOOL2 | +# | | ePOOL4 ePOOL5 | | ePOOL4 | +# | | 1Gbps | | 1Gbps | +# | | PFC:enabled=1 | | PFC:enabled=1 | +# | +-|----------------------|-+ +-|------------------------+ | +# | | + $swp1.111 $swp3.111 + | | + $swp4.111 | | +# | | | | | | +# | | BR1 | | BR2 | | +# | | | | | | +# | | | | + $swp2.111 | | +# | +--------------------------+ +---------|----------------+ | +# | | | +# | iPOOL0: 500KB dynamic | | +# | iPOOL1: 10MB static | | +# | iPOOL2: 1MB static + $swp2 | +# | ePOOL4: 500KB dynamic | iPOOL0 | +# | ePOOL5: 10MB static | ePOOL6 | +# | ePOOL6: "infinite" static | 200Mbps shaper | +# +-------------------------------------------------------|-------------------+ +# | +# +---|-------------------+ +# | + $h2 H2 | +# | | | +# | + $h2.111 | +# | 192.0.2.34/28 | +# +-----------------------+ +# +# iPOOL0+ePOOL4 is a helper pool for control traffic etc. +# iPOOL1+ePOOL5 are overflow pools. +# iPOOL2+ePOOL6 are PFC pools. + +ALL_TESTS=" + ping_ipv4 + test_qos_pfc +" + +lib_dir=$(dirname $0)/../../../net/forwarding + +NUM_NETIFS=6 +source $lib_dir/lib.sh +source $lib_dir/devlink_lib.sh +source qos_lib.sh + +_1KB=1000 +_100KB=$((100 * _1KB)) +_500KB=$((500 * _1KB)) +_1MB=$((1000 * _1KB)) +_10MB=$((10 * _1MB)) + +h1_create() +{ + simple_if_init $h1 + mtu_set $h1 10000 + + vlan_create $h1 111 v$h1 192.0.2.33/28 +} + +h1_destroy() +{ + vlan_destroy $h1 111 + + mtu_restore $h1 + simple_if_fini $h1 +} + +h2_create() +{ + simple_if_init $h2 + mtu_set $h2 10000 + + vlan_create $h2 111 v$h2 192.0.2.34/28 +} + +h2_destroy() +{ + vlan_destroy $h2 111 + + mtu_restore $h2 + simple_if_fini $h2 +} + +switch_create() +{ + # pools + # ----- + + devlink_pool_size_thtype_save 0 + devlink_pool_size_thtype_save 4 + devlink_pool_size_thtype_save 1 + devlink_pool_size_thtype_save 5 + devlink_pool_size_thtype_save 2 + devlink_pool_size_thtype_save 6 + + devlink_port_pool_th_save $swp1 1 + devlink_port_pool_th_save $swp2 6 + devlink_port_pool_th_save $swp3 5 + devlink_port_pool_th_save $swp4 2 + + devlink_tc_bind_pool_th_save $swp1 1 ingress + devlink_tc_bind_pool_th_save $swp2 1 egress + devlink_tc_bind_pool_th_save $swp3 1 egress + devlink_tc_bind_pool_th_save $swp4 1 ingress + + # Control traffic pools. Just reduce the size. Keep them dynamic so that + # we don't need to change all the uninteresting quotas. + devlink_pool_size_thtype_set 0 dynamic $_500KB + devlink_pool_size_thtype_set 4 dynamic $_500KB + + # Overflow pools. + devlink_pool_size_thtype_set 1 static $_10MB + devlink_pool_size_thtype_set 5 static $_10MB + + # PFC pools. As per the writ, the size of egress PFC pool should be + # infinice, but actually it just needs to be large enough to not matter + # in practice, so reuse the 10MB limit. + devlink_pool_size_thtype_set 2 static $_1MB + devlink_pool_size_thtype_set 6 static $_10MB + + # $swp1 + # ----- + + ip link set dev $swp1 up + mtu_set $swp1 10000 + vlan_create $swp1 111 + ip link set dev $swp1.111 type vlan ingress-qos-map 0:0 1:1 + + devlink_port_pool_th_set $swp1 1 $_10MB + devlink_tc_bind_pool_th_set $swp1 1 ingress 1 $_10MB + + # Configure qdisc so that we can configure PG and therefore pool + # assignment. + tc qdisc replace dev $swp1 root handle 1: \ + ets bands 8 strict 8 priomap 7 6 + __mlnx_qos -i $swp1 --prio2buffer=0,1,0,0,0,0,0,0 >/dev/null + + # $swp2 + # ----- + + ip link set dev $swp2 up + mtu_set $swp2 10000 + vlan_create $swp2 111 + ip link set dev $swp2.111 type vlan egress-qos-map 0:0 1:1 + + devlink_port_pool_th_set $swp2 6 $_10MB + devlink_tc_bind_pool_th_set $swp2 1 egress 6 $_10MB + + # prio 0->TC0 (band 7), 1->TC1 (band 6). TC1 is shaped. + tc qdisc replace dev $swp2 root handle 1: \ + ets bands 8 strict 8 priomap 7 6 + tc qdisc replace dev $swp2 parent 1:7 handle 17: \ + tbf rate 200Mbit burst 131072 limit 1M + + # $swp3 + # ----- + + ip link set dev $swp3 up + mtu_set $swp3 10000 + vlan_create $swp3 111 + ip link set dev $swp3.111 type vlan egress-qos-map 0:0 1:1 + + devlink_port_pool_th_set $swp3 5 $_10MB + devlink_tc_bind_pool_th_set $swp3 1 egress 5 $_10MB + + # prio 0->TC0 (band 7), 1->TC1 (band 6) + tc qdisc replace dev $swp3 root handle 1: \ + ets bands 8 strict 8 priomap 7 6 + + # Need to enable PFC so that PAUSE takes effect. Therefore need to put + # the lossless prio into a buffer of its own. Don't bother with buffer + # sizes though, there is not going to be any pressure in the "backward" + # direction. + __mlnx_qos -i $swp3 --prio2buffer=0,1,0,0,0,0,0,0 >/dev/null + __mlnx_qos -i $swp3 --pfc=0,1,0,0,0,0,0,0 >/dev/null + + # $swp4 + # ----- + + ip link set dev $swp4 up + mtu_set $swp4 10000 + vlan_create $swp4 111 + ip link set dev $swp4.111 type vlan ingress-qos-map 0:0 1:1 + + devlink_port_pool_th_set $swp4 2 $_1MB + devlink_tc_bind_pool_th_set $swp4 1 ingress 2 $_1MB + + # Configure qdisc so that we can hand-tune headroom. + tc qdisc replace dev $swp4 root handle 1: \ + ets bands 8 strict 8 priomap 7 6 + __mlnx_qos -i $swp4 --prio2buffer=0,1,0,0,0,0,0,0 >/dev/null + __mlnx_qos -i $swp4 --pfc=0,1,0,0,0,0,0,0 >/dev/null + # PG0 will get autoconfigured to Xoff, give PG1 arbitrarily 100K, which + # is (-2*MTU) about 80K of delay provision. + __mlnx_qos -i $swp3 --buffer_size=0,$_100KB,0,0,0,0,0,0 >/dev/null + + # bridges + # ------- + + ip link add name br1 type bridge vlan_filtering 0 + ip link set dev $swp1.111 master br1 + ip link set dev $swp3.111 master br1 + ip link set dev br1 up + + ip link add name br2 type bridge vlan_filtering 0 + ip link set dev $swp2.111 master br2 + ip link set dev $swp4.111 master br2 + ip link set dev br2 up +} + +switch_destroy() +{ + # Do this first so that we can reset the limits to values that are only + # valid for the original static / dynamic setting. + devlink_pool_size_thtype_restore 6 + devlink_pool_size_thtype_restore 5 + devlink_pool_size_thtype_restore 4 + devlink_pool_size_thtype_restore 2 + devlink_pool_size_thtype_restore 1 + devlink_pool_size_thtype_restore 0 + + # bridges + # ------- + + ip link set dev br2 down + ip link set dev $swp4.111 nomaster + ip link set dev $swp2.111 nomaster + ip link del dev br2 + + ip link set dev br1 down + ip link set dev $swp3.111 nomaster + ip link set dev $swp1.111 nomaster + ip link del dev br1 + + # $swp4 + # ----- + + __mlnx_qos -i $swp4 --buffer_size=0,0,0,0,0,0,0,0 >/dev/null + __mlnx_qos -i $swp4 --pfc=0,0,0,0,0,0,0,0 >/dev/null + __mlnx_qos -i $swp4 --prio2buffer=0,0,0,0,0,0,0,0 >/dev/null + tc qdisc del dev $swp4 root + + devlink_tc_bind_pool_th_restore $swp4 1 ingress + devlink_port_pool_th_restore $swp4 2 + + vlan_destroy $swp4 111 + mtu_restore $swp4 + ip link set dev $swp4 down + + # $swp3 + # ----- + + __mlnx_qos -i $swp3 --pfc=0,0,0,0,0,0,0,0 >/dev/null + __mlnx_qos -i $swp3 --prio2buffer=0,0,0,0,0,0,0,0 >/dev/null + tc qdisc del dev $swp3 root + + devlink_tc_bind_pool_th_restore $swp3 1 egress + devlink_port_pool_th_restore $swp3 5 + + vlan_destroy $swp3 111 + mtu_restore $swp3 + ip link set dev $swp3 down + + # $swp2 + # ----- + + tc qdisc del dev $swp2 parent 1:7 + tc qdisc del dev $swp2 root + + devlink_tc_bind_pool_th_restore $swp2 1 egress + devlink_port_pool_th_restore $swp2 6 + + vlan_destroy $swp2 111 + mtu_restore $swp2 + ip link set dev $swp2 down + + # $swp1 + # ----- + + __mlnx_qos -i $swp1 --prio2buffer=0,0,0,0,0,0,0,0 >/dev/null + tc qdisc del dev $swp1 root + + devlink_tc_bind_pool_th_restore $swp1 1 ingress + devlink_port_pool_th_restore $swp1 1 + + vlan_destroy $swp1 111 + mtu_restore $swp1 + ip link set dev $swp1 down +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + swp3=${NETIFS[p5]} + swp4=${NETIFS[p6]} + + h2mac=$(mac_get $h2) + + vrf_prepare + + h1_create + h2_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h2_destroy + h1_destroy + + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1 192.0.2.34 +} + +test_qos_pfc() +{ + RET=0 + + # 10M pool, each packet is 8K of payload + headers + local pkts=$((_10MB / 8050)) + local size=$((pkts * 8050)) + local in0=$(ethtool_stats_get $swp1 rx_octets_prio_1) + local out0=$(ethtool_stats_get $swp2 tx_octets_prio_1) + + $MZ $h1 -p 8000 -Q 1:111 -A 192.0.2.33 -B 192.0.2.34 \ + -a own -b $h2mac -c $pkts -t udp -q + sleep 2 + + local in1=$(ethtool_stats_get $swp1 rx_octets_prio_1) + local out1=$(ethtool_stats_get $swp2 tx_octets_prio_1) + + local din=$((in1 - in0)) + local dout=$((out1 - out0)) + + local pct_in=$((din * 100 / size)) + + ((pct_in > 95 && pct_in < 105)) + check_err $? "Relative ingress out of expected bounds, $pct_in% should be 100%" + + ((dout == din)) + check_err $? "$((din - dout)) bytes out of $din ingressed got lost" + + log_test "PFC" +} + +trap cleanup EXIT + +bail_on_lldpad +setup_prepare +setup_wait +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh index 94c37124a840..af64bc9ea8ab 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh @@ -27,11 +27,17 @@ switch_create() # amount of traffic that is admitted to the shared buffers. This makes # sure that there is always enough traffic of all types to select from # for the DWRR process. + devlink_port_pool_th_save $swp1 0 devlink_port_pool_th_set $swp1 0 12 + devlink_tc_bind_pool_th_save $swp1 0 ingress devlink_tc_bind_pool_th_set $swp1 0 ingress 0 12 + devlink_port_pool_th_save $swp2 4 devlink_port_pool_th_set $swp2 4 12 + devlink_tc_bind_pool_th_save $swp2 7 egress devlink_tc_bind_pool_th_set $swp2 7 egress 4 5 + devlink_tc_bind_pool_th_save $swp2 6 egress devlink_tc_bind_pool_th_set $swp2 6 egress 4 5 + devlink_tc_bind_pool_th_save $swp2 5 egress devlink_tc_bind_pool_th_set $swp2 5 egress 4 5 # Note: sch_ets_core.sh uses VLAN ingress-qos-map to assign packet diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh index 517297a14ecf..b0cb1aaffdda 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh @@ -208,6 +208,7 @@ switch_create() ip link set dev br2_11 up local size=$(devlink_pool_size_thtype 0 | cut -d' ' -f 1) + devlink_port_pool_th_save $swp3 8 devlink_port_pool_th_set $swp3 8 $size } diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh index 4b96561c462f..3e3e06ea5703 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh @@ -24,6 +24,13 @@ tc_police_switch_destroy() simple_if_fini $swp1 } +tc_police_addr() +{ + local num=$1; shift + + printf "2001:db8:1::%x" $num +} + tc_police_rules_create() { local count=$1; shift @@ -34,8 +41,9 @@ tc_police_rules_create() for ((i = 0; i < count; ++i)); do cat >> $TC_POLICE_BATCH_FILE <<-EOF filter add dev $swp1 ingress \ - prot ip \ - flower skip_sw \ + prot ipv6 \ + pref 1000 \ + flower skip_sw dst_ip $(tc_police_addr $i) \ action police rate 10mbit burst 100k \ conform-exceed drop/ok EOF diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh index de4b32fc4223..40909c254365 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh @@ -23,6 +23,27 @@ fw_flash_test() devlink dev flash $DL_HANDLE file dummy check_err $? "Failed to flash with status updates on" + devlink dev flash $DL_HANDLE file dummy component fw.mgmt + check_err $? "Failed to flash with component attribute" + + devlink dev flash $DL_HANDLE file dummy overwrite settings + check_fail $? "Flash with overwrite settings should be rejected" + + echo "1"> $DEBUGFS_DIR/fw_update_overwrite_mask + check_err $? "Failed to change allowed overwrite mask" + + devlink dev flash $DL_HANDLE file dummy overwrite settings + check_err $? "Failed to flash with settings overwrite enabled" + + devlink dev flash $DL_HANDLE file dummy overwrite identifiers + check_fail $? "Flash with overwrite settings should be identifiers" + + echo "3"> $DEBUGFS_DIR/fw_update_overwrite_mask + check_err $? "Failed to change allowed overwrite mask" + + devlink dev flash $DL_HANDLE file dummy overwrite identifiers overwrite settings + check_err $? "Failed to flash with settings and identifiers overwrite enabled" + echo "n"> $DEBUGFS_DIR/fw_update_status check_err $? "Failed to disable status updates" diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-pause.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-pause.sh new file mode 100755 index 000000000000..25c896b9e2eb --- /dev/null +++ b/tools/testing/selftests/drivers/net/netdevsim/ethtool-pause.sh @@ -0,0 +1,108 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-only + +NSIM_ID=$((RANDOM % 1024)) +NSIM_DEV_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_ID +NSIM_DEV_DFS=/sys/kernel/debug/netdevsim/netdevsim$NSIM_ID/ports/0 +NSIM_NETDEV= +num_passes=0 +num_errors=0 + +function cleanup_nsim { + if [ -e $NSIM_DEV_SYS ]; then + echo $NSIM_ID > /sys/bus/netdevsim/del_device + fi +} + +function cleanup { + cleanup_nsim +} + +trap cleanup EXIT + +function get_netdev_name { + local -n old=$1 + + new=$(ls /sys/class/net) + + for netdev in $new; do + for check in $old; do + [ $netdev == $check ] && break + done + + if [ $netdev != $check ]; then + echo $netdev + break + fi + done +} + +function check { + local code=$1 + local str=$2 + local exp_str=$3 + + if [ $code -ne 0 ]; then + ((num_errors++)) + return + fi + + if [ "$str" != "$exp_str" ]; then + echo -e "Expected: '$exp_str', got '$str'" + ((num_errors++)) + return + fi + + ((num_passes++)) +} + +# Bail if ethtool is too old +if ! ethtool -h | grep include-stat 2>&1 >/dev/null; then + echo "SKIP: No --include-statistics support in ethtool" + exit 4 +fi + +# Make a netdevsim +old_netdevs=$(ls /sys/class/net) + +modprobe netdevsim +echo $NSIM_ID > /sys/bus/netdevsim/new_device + +NSIM_NETDEV=`get_netdev_name old_netdevs` + +set -o pipefail + +echo n > $NSIM_DEV_DFS/ethtool/pause/report_stats_tx +echo n > $NSIM_DEV_DFS/ethtool/pause/report_stats_rx + +s=$(ethtool --json -a $NSIM_NETDEV | jq '.[].statistics') +check $? "$s" "null" + +s=$(ethtool -I --json -a $NSIM_NETDEV | jq '.[].statistics') +check $? "$s" "{}" + +echo y > $NSIM_DEV_DFS/ethtool/pause/report_stats_tx + +s=$(ethtool -I --json -a $NSIM_NETDEV | jq '.[].statistics | length') +check $? "$s" "1" + +s=$(ethtool -I --json -a $NSIM_NETDEV | jq '.[].statistics.tx_pause_frames') +check $? "$s" "2" + +echo y > $NSIM_DEV_DFS/ethtool/pause/report_stats_rx + +s=$(ethtool -I --json -a $NSIM_NETDEV | jq '.[].statistics | length') +check $? "$s" "2" + +s=$(ethtool -I --json -a $NSIM_NETDEV | jq '.[].statistics.rx_pause_frames') +check $? "$s" "1" +s=$(ethtool -I --json -a $NSIM_NETDEV | jq '.[].statistics.tx_pause_frames') +check $? "$s" "2" + +if [ $num_errors -eq 0 ]; then + echo "PASSED all $((num_passes)) checks" + exit 0 +else + echo "FAILED $num_errors/$((num_errors+num_passes)) checks" + exit 1 +fi diff --git a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh index ba1d53b9f815..1b08e042cf94 100644..100755 --- a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh @@ -7,6 +7,7 @@ NSIM_DEV_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_ID NSIM_DEV_DFS=/sys/kernel/debug/netdevsim/netdevsim$NSIM_ID NSIM_NETDEV= HAS_ETHTOOL= +STATIC_ENTRIES= EXIT_STATUS=0 num_cases=0 num_errors=0 @@ -193,6 +194,21 @@ function check_tables { sleep 0.02 ((retries--)) done + + if [ -n "$HAS_ETHTOOL" -a -n "${STATIC_ENTRIES[0]}" ]; then + fail=0 + for i in "${!STATIC_ENTRIES[@]}"; do + pp_expected=`pre_ethtool ${STATIC_ENTRIES[i]}` + cnt=$(ethtool --show-tunnels $NSIM_NETDEV | grep -c "$pp_expected") + if [ $cnt -ne 1 ]; then + err_cnt "ethtool static entry: $pfx - $msg" + echo " check_table: ethtool does not contain '$pp_expected'" + ethtool --show-tunnels $NSIM_NETDEV + fail=1 + fi + done + [ $fail == 0 ] && pass_cnt + fi } function print_table { @@ -775,6 +791,157 @@ for port in 0 1; do exp1=( 0 0 0 0 ) done +cleanup_nsim + +# shared port tables +pfx="table sharing" + +echo $NSIM_ID > /sys/bus/netdevsim/new_device +echo 0 > $NSIM_DEV_SYS/del_port + +echo 0 > $NSIM_DEV_DFS/udp_ports_open_only +echo 1 > $NSIM_DEV_DFS/udp_ports_sleep +echo 1 > $NSIM_DEV_DFS/udp_ports_shared + +old_netdevs=$(ls /sys/class/net) +echo 1 > $NSIM_DEV_SYS/new_port +NSIM_NETDEV=`get_netdev_name old_netdevs` +old_netdevs=$(ls /sys/class/net) +echo 2 > $NSIM_DEV_SYS/new_port +NSIM_NETDEV2=`get_netdev_name old_netdevs` + +msg="VxLAN v4 devices" +exp0=( `mke 4789 1` 0 0 0 ) +exp1=( 0 0 0 0 ) +new_vxlan vxlan0 4789 $NSIM_NETDEV +new_vxlan vxlan1 4789 $NSIM_NETDEV2 + +msg="VxLAN v4 devices go down" +exp0=( 0 0 0 0 ) +ifconfig vxlan1 down +ifconfig vxlan0 down +check_tables + +for ifc in vxlan0 vxlan1; do + ifconfig $ifc up +done + +msg="VxLAN v6 device" +exp0=( `mke 4789 1` `mke 4790 1` 0 0 ) +new_vxlan vxlanC 4790 $NSIM_NETDEV 6 + +msg="Geneve device" +exp1=( `mke 6081 2` 0 0 0 ) +new_geneve gnv0 6081 + +msg="NIC device goes down" +ifconfig $NSIM_NETDEV down +check_tables + +msg="NIC device goes up again" +ifconfig $NSIM_NETDEV up +check_tables + +for i in `seq 2`; do + msg="turn feature off - 1, rep $i" + ethtool -K $NSIM_NETDEV rx-udp_tunnel-port-offload off + check_tables + + msg="turn feature off - 2, rep $i" + exp0=( 0 0 0 0 ) + exp1=( 0 0 0 0 ) + ethtool -K $NSIM_NETDEV2 rx-udp_tunnel-port-offload off + check_tables + + msg="turn feature on - 1, rep $i" + exp0=( `mke 4789 1` `mke 4790 1` 0 0 ) + exp1=( `mke 6081 2` 0 0 0 ) + ethtool -K $NSIM_NETDEV rx-udp_tunnel-port-offload on + check_tables + + msg="turn feature on - 2, rep $i" + ethtool -K $NSIM_NETDEV2 rx-udp_tunnel-port-offload on + check_tables +done + +msg="tunnels destroyed 1" +cleanup_tuns +exp0=( 0 0 0 0 ) +exp1=( 0 0 0 0 ) +check_tables + +overflow_table0 "overflow NIC table" + +msg="re-add a port" + +echo 2 > $NSIM_DEV_SYS/del_port +echo 2 > $NSIM_DEV_SYS/new_port +check_tables + +msg="replace VxLAN in overflow table" +exp0=( `mke 10000 1` `mke 10004 1` `mke 10002 1` `mke 10003 1` ) +del_dev vxlan1 + +msg="vacate VxLAN in overflow table" +exp0=( `mke 10000 1` `mke 10004 1` 0 `mke 10003 1` ) +del_dev vxlan2 + +echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports_reset +check_tables + +msg="tunnels destroyed 2" +cleanup_tuns +exp0=( 0 0 0 0 ) +exp1=( 0 0 0 0 ) +check_tables + +echo 1 > $NSIM_DEV_SYS/del_port +echo 2 > $NSIM_DEV_SYS/del_port + +cleanup_nsim + +# Static IANA port +pfx="static IANA vxlan" + +echo $NSIM_ID > /sys/bus/netdevsim/new_device +echo 0 > $NSIM_DEV_SYS/del_port + +echo 1 > $NSIM_DEV_DFS/udp_ports_static_iana_vxlan +STATIC_ENTRIES=( `mke 4789 1` ) + +port=1 +old_netdevs=$(ls /sys/class/net) +echo $port > $NSIM_DEV_SYS/new_port +NSIM_NETDEV=`get_netdev_name old_netdevs` + +msg="check empty" +exp0=( 0 0 0 0 ) +exp1=( 0 0 0 0 ) +check_tables + +msg="add on static port" +new_vxlan vxlan0 4789 $NSIM_NETDEV +new_vxlan vxlan1 4789 $NSIM_NETDEV + +msg="add on different port" +exp0=( `mke 4790 1` 0 0 0 ) +new_vxlan vxlan2 4790 $NSIM_NETDEV + +cleanup_tuns + +msg="tunnels destroyed" +exp0=( 0 0 0 0 ) +exp1=( 0 0 0 0 ) +check_tables + +msg="different type" +new_geneve gnv0 4789 + +cleanup_tuns +cleanup_nsim + +# END + modprobe -r netdevsim if [ $num_errors -eq 0 ]; then diff --git a/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh new file mode 100755 index 000000000000..beee0d5646a6 --- /dev/null +++ b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh @@ -0,0 +1,316 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# Copyright 2020 NXP Semiconductors + +WAIT_TIME=1 +NUM_NETIFS=4 +lib_dir=$(dirname $0)/../../../net/forwarding +source $lib_dir/tc_common.sh +source $lib_dir/lib.sh + +require_command tcpdump + +# +# +---------------------------------------------+ +# | DUT ports Generator ports | +# | +--------+ +--------+ +--------+ +--------+ | +# | | | | | | | | | | +# | | eth0 | | eth1 | | eth2 | | eth3 | | +# | | | | | | | | | | +# +-+--------+-+--------+-+--------+-+--------+-+ +# | | | | +# | | | | +# | +-----------+ | +# | | +# +--------------------------------+ + +eth0=${NETIFS[p1]} +eth1=${NETIFS[p2]} +eth2=${NETIFS[p3]} +eth3=${NETIFS[p4]} + +eth0_mac="de:ad:be:ef:00:00" +eth1_mac="de:ad:be:ef:00:01" +eth2_mac="de:ad:be:ef:00:02" +eth3_mac="de:ad:be:ef:00:03" + +# Helpers to map a VCAP IS1 and VCAP IS2 lookup and policy to a chain number +# used by the kernel driver. The numbers are: +# VCAP IS1 lookup 0: 10000 +# VCAP IS1 lookup 1: 11000 +# VCAP IS1 lookup 2: 12000 +# VCAP IS2 lookup 0 policy 0: 20000 +# VCAP IS2 lookup 0 policy 1: 20001 +# VCAP IS2 lookup 0 policy 255: 20255 +# VCAP IS2 lookup 1 policy 0: 21000 +# VCAP IS2 lookup 1 policy 1: 21001 +# VCAP IS2 lookup 1 policy 255: 21255 +IS1() +{ + local lookup=$1 + + echo $((10000 + 1000 * lookup)) +} + +IS2() +{ + local lookup=$1 + local pag=$2 + + echo $((20000 + 1000 * lookup + pag)) +} + +ES0() +{ + echo 0 +} + +# The Ocelot switches have a fixed ingress pipeline composed of: +# +# +----------------------------------------------+ +-----------------------------------------+ +# | VCAP IS1 | | VCAP IS2 | +# | | | | +# | +----------+ +----------+ +----------+ | | +----------+ +----------+ | +# | | Lookup 0 | | Lookup 1 | | Lookup 2 | | --+------> PAG 0: | Lookup 0 | -> | Lookup 1 | | +# | +----------+ -> +----------+ -> +----------+ | | | +----------+ +----------+ | +# | |key&action| |key&action| |key&action| | | | |key&action| |key&action| | +# | |key&action| |key&action| |key&action| | | | | .. | | .. | | +# | | .. | | .. | | .. | | | | +----------+ +----------+ | +# | +----------+ +----------+ +----------+ | | | | +# | selects PAG | | | +----------+ +----------+ | +# +----------------------------------------------+ +------> PAG 1: | Lookup 0 | -> | Lookup 1 | | +# | | +----------+ +----------+ | +# | | |key&action| |key&action| | +# | | | .. | | .. | | +# | | +----------+ +----------+ | +# | | ... | +# | | | +# | | +----------+ +----------+ | +# +----> PAG 254: | Lookup 0 | -> | Lookup 1 | | +# | | +----------+ +----------+ | +# | | |key&action| |key&action| | +# | | | .. | | .. | | +# | | +----------+ +----------+ | +# | | | +# | | +----------+ +----------+ | +# +----> PAG 255: | Lookup 0 | -> | Lookup 1 | | +# | +----------+ +----------+ | +# | |key&action| |key&action| | +# | | .. | | .. | | +# | +----------+ +----------+ | +# +-----------------------------------------+ +# +# Both the VCAP IS1 (Ingress Stage 1) and IS2 (Ingress Stage 2) are indexed +# (looked up) multiple times: IS1 3 times, and IS2 2 times. Each filter +# (key and action pair) can be configured to only match during the first, or +# second, etc, lookup. +# +# During one TCAM lookup, the filter processing stops at the first entry that +# matches, then the pipeline jumps to the next lookup. +# The driver maps each individual lookup of each individual ingress TCAM to a +# separate chain number. For correct rule offloading, it is mandatory that each +# filter installed in one TCAM is terminated by a non-optional GOTO action to +# the next lookup from the fixed pipeline. +# +# A chain can only be used if there is a GOTO action correctly set up from the +# prior lookup in the processing pipeline. Setting up all chains is not +# mandatory. + +# NOTE: VCAP IS1 currently uses only S1_NORMAL half keys and VCAP IS2 +# dynamically chooses between MAC_ETYPE, ARP, IP4_TCP_UDP, IP4_OTHER, which are +# all half keys as well. + +create_tcam_skeleton() +{ + local eth=$1 + + tc qdisc add dev $eth clsact + + # VCAP IS1 is the Ingress Classification TCAM and can offload the + # following actions: + # - skbedit priority + # - vlan pop + # - vlan modify + # - goto (only in lookup 2, the last IS1 lookup) + tc filter add dev $eth ingress chain 0 pref 49152 flower \ + skip_sw action goto chain $(IS1 0) + tc filter add dev $eth ingress chain $(IS1 0) pref 49152 \ + flower skip_sw action goto chain $(IS1 1) + tc filter add dev $eth ingress chain $(IS1 1) pref 49152 \ + flower skip_sw action goto chain $(IS1 2) + tc filter add dev $eth ingress chain $(IS1 2) pref 49152 \ + flower skip_sw action goto chain $(IS2 0 0) + + # VCAP IS2 is the Security Enforcement ingress TCAM and can offload the + # following actions: + # - trap + # - drop + # - police + # The two VCAP IS2 lookups can be segmented into up to 256 groups of + # rules, called Policies. A Policy is selected through the Policy + # Association Group (PAG) action of VCAP IS1 (which is the + # GOTO offload). + tc filter add dev $eth ingress chain $(IS2 0 0) pref 49152 \ + flower skip_sw action goto chain $(IS2 1 0) +} + +setup_prepare() +{ + create_tcam_skeleton $eth0 + + ip link add br0 type bridge + ip link set $eth0 master br0 + ip link set $eth1 master br0 + ip link set br0 up + + ip link add link $eth3 name $eth3.100 type vlan id 100 + ip link set $eth3.100 up + + ip link add link $eth3 name $eth3.200 type vlan id 200 + ip link set $eth3.200 up + + tc filter add dev $eth0 ingress chain $(IS1 1) pref 1 \ + protocol 802.1Q flower skip_sw vlan_id 100 \ + action vlan pop \ + action goto chain $(IS1 2) + + tc filter add dev $eth0 egress chain $(ES0) pref 1 \ + flower skip_sw indev $eth1 \ + action vlan push protocol 802.1Q id 100 + + tc filter add dev $eth0 ingress chain $(IS1 0) pref 2 \ + protocol ipv4 flower skip_sw src_ip 10.1.1.2 \ + action skbedit priority 7 \ + action goto chain $(IS1 1) + + tc filter add dev $eth0 ingress chain $(IS2 0 0) pref 1 \ + protocol ipv4 flower skip_sw ip_proto udp dst_port 5201 \ + action police rate 50mbit burst 64k \ + action goto chain $(IS2 1 0) +} + +cleanup() +{ + ip link del $eth3.200 + ip link del $eth3.100 + tc qdisc del dev $eth0 clsact + ip link del br0 +} + +test_vlan_pop() +{ + printf "Testing VLAN pop.. " + + tcpdump_start $eth2 + + # Work around Mausezahn VLAN builder bug + # (https://github.com/netsniff-ng/netsniff-ng/issues/225) by using + # an 8021q upper + $MZ $eth3.100 -q -c 1 -p 64 -a $eth3_mac -b $eth2_mac -t ip + + sleep 1 + + tcpdump_stop + + if tcpdump_show | grep -q "$eth3_mac > $eth2_mac, ethertype IPv4"; then + echo "OK" + else + echo "FAIL" + fi + + tcpdump_cleanup +} + +test_vlan_push() +{ + printf "Testing VLAN push.. " + + tcpdump_start $eth3.100 + + $MZ $eth2 -q -c 1 -p 64 -a $eth2_mac -b $eth3_mac -t ip + + sleep 1 + + tcpdump_stop + + if tcpdump_show | grep -q "$eth2_mac > $eth3_mac"; then + echo "OK" + else + echo "FAIL" + fi + + tcpdump_cleanup +} + +test_vlan_modify() +{ + printf "Testing VLAN modification.. " + + ip link set br0 type bridge vlan_filtering 1 + bridge vlan add dev $eth0 vid 200 + bridge vlan add dev $eth0 vid 300 + bridge vlan add dev $eth1 vid 300 + + tc filter add dev $eth0 ingress chain $(IS1 2) pref 3 \ + protocol 802.1Q flower skip_sw vlan_id 200 \ + action vlan modify id 300 \ + action goto chain $(IS2 0 0) + + tcpdump_start $eth2 + + $MZ $eth3.200 -q -c 1 -p 64 -a $eth3_mac -b $eth2_mac -t ip + + sleep 1 + + tcpdump_stop + + if tcpdump_show | grep -q "$eth3_mac > $eth2_mac, .* vlan 300"; then + echo "OK" + else + echo "FAIL" + fi + + tcpdump_cleanup + + tc filter del dev $eth0 ingress chain $(IS1 2) pref 3 + + bridge vlan del dev $eth0 vid 200 + bridge vlan del dev $eth0 vid 300 + bridge vlan del dev $eth1 vid 300 + ip link set br0 type bridge vlan_filtering 0 +} + +test_skbedit_priority() +{ + local num_pkts=100 + + printf "Testing frame prioritization.. " + + before=$(ethtool_stats_get $eth0 'rx_green_prio_7') + + $MZ $eth3 -q -c $num_pkts -p 64 -a $eth3_mac -b $eth2_mac -t ip -A 10.1.1.2 + + after=$(ethtool_stats_get $eth0 'rx_green_prio_7') + + if [ $((after - before)) = $num_pkts ]; then + echo "OK" + else + echo "FAIL" + fi +} + +trap cleanup EXIT + +ALL_TESTS=" + test_vlan_pop + test_vlan_push + test_vlan_modify + test_skbedit_priority +" + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/exec/.gitignore b/tools/testing/selftests/exec/.gitignore index 344a99c6da1b..9e2f00343f15 100644 --- a/tools/testing/selftests/exec/.gitignore +++ b/tools/testing/selftests/exec/.gitignore @@ -7,6 +7,7 @@ execveat.moved execveat.path.ephemeral execveat.ephemeral execveat.denatured +/load_address_* /recursion-depth xxxxxxxx* pipe diff --git a/tools/testing/selftests/exec/Makefile b/tools/testing/selftests/exec/Makefile index 0a13b110c1e6..cf69b2fcce59 100644 --- a/tools/testing/selftests/exec/Makefile +++ b/tools/testing/selftests/exec/Makefile @@ -4,7 +4,7 @@ CFLAGS += -Wno-nonnull CFLAGS += -D_GNU_SOURCE TEST_PROGS := binfmt_script non-regular -TEST_GEN_PROGS := execveat +TEST_GEN_PROGS := execveat load_address_4096 load_address_2097152 load_address_16777216 TEST_GEN_FILES := execveat.symlink execveat.denatured script subdir pipe # Makefile is a run-time dependency, since it's accessed by the execveat test TEST_FILES := Makefile @@ -27,4 +27,9 @@ $(OUTPUT)/execveat.symlink: $(OUTPUT)/execveat $(OUTPUT)/execveat.denatured: $(OUTPUT)/execveat cp $< $@ chmod -x $@ - +$(OUTPUT)/load_address_4096: load_address.c + $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x1000 -pie $< -o $@ +$(OUTPUT)/load_address_2097152: load_address.c + $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x200000 -pie $< -o $@ +$(OUTPUT)/load_address_16777216: load_address.c + $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x1000000 -pie $< -o $@ diff --git a/tools/testing/selftests/exec/load_address.c b/tools/testing/selftests/exec/load_address.c new file mode 100644 index 000000000000..d487c2f6a615 --- /dev/null +++ b/tools/testing/selftests/exec/load_address.c @@ -0,0 +1,68 @@ +// SPDX-License-Identifier: GPL-2.0-only +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif +#include <link.h> +#include <stdio.h> +#include <stdlib.h> + +struct Statistics { + unsigned long long load_address; + unsigned long long alignment; +}; + +int ExtractStatistics(struct dl_phdr_info *info, size_t size, void *data) +{ + struct Statistics *stats = (struct Statistics *) data; + int i; + + if (info->dlpi_name != NULL && info->dlpi_name[0] != '\0') { + // Ignore headers from other than the executable. + return 2; + } + + stats->load_address = (unsigned long long) info->dlpi_addr; + stats->alignment = 0; + + for (i = 0; i < info->dlpi_phnum; i++) { + if (info->dlpi_phdr[i].p_type != PT_LOAD) + continue; + + if (info->dlpi_phdr[i].p_align > stats->alignment) + stats->alignment = info->dlpi_phdr[i].p_align; + } + + return 1; // Terminate dl_iterate_phdr. +} + +int main(int argc, char **argv) +{ + struct Statistics extracted; + unsigned long long misalign; + int ret; + + ret = dl_iterate_phdr(ExtractStatistics, &extracted); + if (ret != 1) { + fprintf(stderr, "FAILED\n"); + return 1; + } + + if (extracted.alignment == 0) { + fprintf(stderr, "No alignment found\n"); + return 1; + } else if (extracted.alignment & (extracted.alignment - 1)) { + fprintf(stderr, "Alignment is not a power of 2\n"); + return 1; + } + + misalign = extracted.load_address & (extracted.alignment - 1); + if (misalign) { + printf("alignment = %llu, load_address = %llu\n", + extracted.alignment, extracted.load_address); + fprintf(stderr, "FAILED\n"); + return 1; + } + + fprintf(stderr, "PASS\n"); + return 0; +} diff --git a/tools/testing/selftests/firmware/.gitignore b/tools/testing/selftests/firmware/.gitignore new file mode 100644 index 000000000000..62abc92a94c4 --- /dev/null +++ b/tools/testing/selftests/firmware/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +fw_namespace diff --git a/tools/testing/selftests/firmware/fw_filesystem.sh b/tools/testing/selftests/firmware/fw_filesystem.sh index fcc281373b4d..c2a2a100114b 100755 --- a/tools/testing/selftests/firmware/fw_filesystem.sh +++ b/tools/testing/selftests/firmware/fw_filesystem.sh @@ -149,6 +149,26 @@ config_unset_into_buf() echo 0 > $DIR/config_into_buf } +config_set_buf_size() +{ + echo $1 > $DIR/config_buf_size +} + +config_set_file_offset() +{ + echo $1 > $DIR/config_file_offset +} + +config_set_partial() +{ + echo 1 > $DIR/config_partial +} + +config_unset_partial() +{ + echo 0 > $DIR/config_partial +} + config_set_sync_direct() { echo 1 > $DIR/config_sync_direct @@ -207,6 +227,35 @@ read_firmwares() done } +read_partial_firmwares() +{ + if [ "$(cat $DIR/config_into_buf)" == "1" ]; then + fwfile="${FW_INTO_BUF}" + else + fwfile="${FW}" + fi + + if [ "$1" = "xzonly" ]; then + fwfile="${fwfile}-orig" + fi + + # Strip fwfile down to match partial offset and length + partial_data="$(cat $fwfile)" + partial_data="${partial_data:$2:$3}" + + for i in $(seq 0 3); do + config_set_read_fw_idx $i + + read_firmware="$(cat $DIR/read_firmware)" + + # Verify the contents are what we expect. + if [ $read_firmware != $partial_data ]; then + echo "request #$i: partial firmware was not loaded" >&2 + exit 1 + fi + done +} + read_firmwares_expect_nofile() { for i in $(seq 0 3); do @@ -242,6 +291,21 @@ test_batched_request_firmware_into_buf_nofile() echo "OK" } +test_request_partial_firmware_into_buf_nofile() +{ + echo -n "Test request_partial_firmware_into_buf() off=$1 size=$2 nofile: " + config_reset + config_set_name nope-test-firmware.bin + config_set_into_buf + config_set_partial + config_set_buf_size $2 + config_set_file_offset $1 + config_trigger_sync + read_firmwares_expect_nofile + release_all_firmware + echo "OK" +} + test_batched_request_firmware_direct_nofile() { echo -n "Batched request_firmware_direct() nofile try #$1: " @@ -356,6 +420,21 @@ test_request_firmware_nowait_custom() echo "OK" } +test_request_partial_firmware_into_buf() +{ + echo -n "Test request_partial_firmware_into_buf() off=$1 size=$2: " + config_reset + config_set_name $TEST_FIRMWARE_INTO_BUF_FILENAME + config_set_into_buf + config_set_partial + config_set_buf_size $2 + config_set_file_offset $1 + config_trigger_sync + read_partial_firmwares normal $1 $2 + release_all_firmware + echo "OK" +} + # Only continue if batched request triggers are present on the # test-firmware driver test_config_present @@ -383,6 +462,12 @@ for i in $(seq 1 5); do test_request_firmware_nowait_custom $i normal done +# Partial loads cannot use fallback, so do not repeat tests. +test_request_partial_firmware_into_buf 0 10 +test_request_partial_firmware_into_buf 0 5 +test_request_partial_firmware_into_buf 1 6 +test_request_partial_firmware_into_buf 2 10 + # Test for file not found, errors are expected, the failure would be # a hung task, which would require a hard reset. echo @@ -407,6 +492,12 @@ for i in $(seq 1 5); do test_request_firmware_nowait_custom_nofile $i done +# Partial loads cannot use fallback, so do not repeat tests. +test_request_partial_firmware_into_buf_nofile 0 10 +test_request_partial_firmware_into_buf_nofile 0 5 +test_request_partial_firmware_into_buf_nofile 1 6 +test_request_partial_firmware_into_buf_nofile 2 10 + test "$HAS_FW_LOADER_COMPRESS" != "yes" && exit 0 # test with both files present diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc index 68550f97d3c3..3bcd4c3624ee 100644 --- a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc +++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc @@ -6,7 +6,7 @@ echo 0 > events/enable echo > dynamic_events -PLACE=_do_fork +PLACE=kernel_clone echo "p:myevent1 $PLACE" >> dynamic_events echo "r:myevent2 $PLACE" >> dynamic_events diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc b/tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc index c969be9eb7de..438961971b7e 100644 --- a/tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc +++ b/tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc @@ -6,7 +6,7 @@ echo 0 > events/enable echo > dynamic_events -PLACE=_do_fork +PLACE=kernel_clone setup_events() { echo "p:myevent1 $PLACE" >> dynamic_events diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc b/tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc index 16d543eaac88..a8603bd23e0d 100644 --- a/tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc +++ b/tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc @@ -6,7 +6,7 @@ echo 0 > events/enable echo > dynamic_events -PLACE=_do_fork +PLACE=kernel_clone setup_events() { echo "p:myevent1 $PLACE" >> dynamic_events diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc index 0f41e441c203..98305d76bd04 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc @@ -4,9 +4,9 @@ # requires: set_ftrace_filter # flags: instance -echo _do_fork:stacktrace >> set_ftrace_filter +echo kernel_clone:stacktrace >> set_ftrace_filter -grep -q "_do_fork:stacktrace:unlimited" set_ftrace_filter +grep -q "kernel_clone:stacktrace:unlimited" set_ftrace_filter (echo "forked"; sleep 1) diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc b/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc index eba858c21815..9737cd0578a7 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc @@ -3,7 +3,7 @@ # description: Kprobe dynamic event - adding and removing # requires: kprobe_events -echo p:myevent _do_fork > kprobe_events +echo p:myevent kernel_clone > kprobe_events grep myevent kprobe_events test -d events/kprobes/myevent echo > kprobe_events diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc b/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc index d10bf4f05bc8..f9a40af76888 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc @@ -3,7 +3,7 @@ # description: Kprobe dynamic event - busy event check # requires: kprobe_events -echo p:myevent _do_fork > kprobe_events +echo p:myevent kernel_clone > kprobe_events test -d events/kprobes/myevent echo 1 > events/kprobes/myevent/enable echo > kprobe_events && exit_fail # this must fail diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc index 61f2ac441aec..eb543d3cfe5f 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc @@ -3,13 +3,13 @@ # description: Kprobe dynamic event with arguments # requires: kprobe_events -echo 'p:testprobe _do_fork $stack $stack0 +0($stack)' > kprobe_events +echo 'p:testprobe kernel_clone $stack $stack0 +0($stack)' > kprobe_events grep testprobe kprobe_events | grep -q 'arg1=\$stack arg2=\$stack0 arg3=+0(\$stack)' test -d events/kprobes/testprobe echo 1 > events/kprobes/testprobe/enable ( echo "forked") -grep testprobe trace | grep '_do_fork' | \ +grep testprobe trace | grep 'kernel_clone' | \ grep -q 'arg1=0x[[:xdigit:]]* arg2=0x[[:xdigit:]]* arg3=0x[[:xdigit:]]*$' echo 0 > events/kprobes/testprobe/enable diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc index 05aaeed6987f..4e5b63be51c9 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc @@ -5,7 +5,7 @@ grep -A1 "fetcharg:" README | grep -q "\$comm" || exit_unsupported # this is too old -echo 'p:testprobe _do_fork comm=$comm ' > kprobe_events +echo 'p:testprobe kernel_clone comm=$comm ' > kprobe_events grep testprobe kprobe_events | grep -q 'comm=$comm' test -d events/kprobes/testprobe diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc index b5fa05443b39..a1d70588ab21 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc @@ -30,13 +30,13 @@ esac : "Test get argument (1)" echo "p:testprobe tracefs_create_dir arg1=+0(${ARG1}):string" > kprobe_events echo 1 > events/kprobes/testprobe/enable -echo "p:test _do_fork" >> kprobe_events +echo "p:test kernel_clone" >> kprobe_events grep -qe "testprobe.* arg1=\"test\"" trace echo 0 > events/kprobes/testprobe/enable : "Test get argument (2)" echo "p:testprobe tracefs_create_dir arg1=+0(${ARG1}):string arg2=+0(${ARG1}):string" > kprobe_events echo 1 > events/kprobes/testprobe/enable -echo "p:test _do_fork" >> kprobe_events +echo "p:test kernel_clone" >> kprobe_events grep -qe "testprobe.* arg1=\"test\" arg2=\"test\"" trace diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc index b8c75a3d003c..bd25dd0ba0d0 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc @@ -14,12 +14,12 @@ elif ! grep "$SYMBOL\$" /proc/kallsyms; then fi : "Test get basic types symbol argument" -echo "p:testprobe_u _do_fork arg1=@linux_proc_banner:u64 arg2=@linux_proc_banner:u32 arg3=@linux_proc_banner:u16 arg4=@linux_proc_banner:u8" > kprobe_events -echo "p:testprobe_s _do_fork arg1=@linux_proc_banner:s64 arg2=@linux_proc_banner:s32 arg3=@linux_proc_banner:s16 arg4=@linux_proc_banner:s8" >> kprobe_events +echo "p:testprobe_u kernel_clone arg1=@linux_proc_banner:u64 arg2=@linux_proc_banner:u32 arg3=@linux_proc_banner:u16 arg4=@linux_proc_banner:u8" > kprobe_events +echo "p:testprobe_s kernel_clone arg1=@linux_proc_banner:s64 arg2=@linux_proc_banner:s32 arg3=@linux_proc_banner:s16 arg4=@linux_proc_banner:s8" >> kprobe_events if grep -q "x8/16/32/64" README; then - echo "p:testprobe_x _do_fork arg1=@linux_proc_banner:x64 arg2=@linux_proc_banner:x32 arg3=@linux_proc_banner:x16 arg4=@linux_proc_banner:x8" >> kprobe_events + echo "p:testprobe_x kernel_clone arg1=@linux_proc_banner:x64 arg2=@linux_proc_banner:x32 arg3=@linux_proc_banner:x16 arg4=@linux_proc_banner:x8" >> kprobe_events fi -echo "p:testprobe_bf _do_fork arg1=@linux_proc_banner:b8@4/32" >> kprobe_events +echo "p:testprobe_bf kernel_clone arg1=@linux_proc_banner:b8@4/32" >> kprobe_events echo 1 > events/kprobes/enable (echo "forked") echo 0 > events/kprobes/enable @@ -27,7 +27,7 @@ grep "testprobe_[usx]:.* arg1=.* arg2=.* arg3=.* arg4=.*" trace grep "testprobe_bf:.* arg1=.*" trace : "Test get string symbol argument" -echo "p:testprobe_str _do_fork arg1=@linux_proc_banner:string" > kprobe_events +echo "p:testprobe_str kernel_clone arg1=@linux_proc_banner:string" > kprobe_events echo 1 > events/kprobes/enable (echo "forked") echo 0 > events/kprobes/enable diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc index 0610e0b5587c..91fcce1c241c 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc @@ -4,7 +4,7 @@ # requires: kprobe_events "x8/16/32/64":README gen_event() { # Bitsize - echo "p:testprobe _do_fork \$stack0:s$1 \$stack0:u$1 \$stack0:x$1 \$stack0:b4@4/$1" + echo "p:testprobe kernel_clone \$stack0:s$1 \$stack0:u$1 \$stack0:x$1 \$stack0:b4@4/$1" } check_types() { # s-type u-type x-type bf-type width diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc index 81d8b58c03bc..0d179094191f 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc @@ -5,29 +5,29 @@ # prepare echo nop > current_tracer -echo _do_fork > set_ftrace_filter -echo 'p:testprobe _do_fork' > kprobe_events +echo kernel_clone > set_ftrace_filter +echo 'p:testprobe kernel_clone' > kprobe_events # kprobe on / ftrace off echo 1 > events/kprobes/testprobe/enable echo > trace ( echo "forked") grep testprobe trace -! grep '_do_fork <-' trace +! grep 'kernel_clone <-' trace # kprobe on / ftrace on echo function > current_tracer echo > trace ( echo "forked") grep testprobe trace -grep '_do_fork <-' trace +grep 'kernel_clone <-' trace # kprobe off / ftrace on echo 0 > events/kprobes/testprobe/enable echo > trace ( echo "forked") ! grep testprobe trace -grep '_do_fork <-' trace +grep 'kernel_clone <-' trace # kprobe on / ftrace on echo 1 > events/kprobes/testprobe/enable @@ -35,11 +35,11 @@ echo function > current_tracer echo > trace ( echo "forked") grep testprobe trace -grep '_do_fork <-' trace +grep 'kernel_clone <-' trace # kprobe on / ftrace off echo nop > current_tracer echo > trace ( echo "forked") grep testprobe trace -! grep '_do_fork <-' trace +! grep 'kernel_clone <-' trace diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc index 366b7e1b6718..45d90b6c763d 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc @@ -4,7 +4,7 @@ # requires: kprobe_events "Create/append/":README # Choose 2 symbols for target -SYM1=_do_fork +SYM1=kernel_clone SYM2=do_exit EVENT_NAME=kprobes/testevent diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc index b4d834675e59..1b5550ef8a9b 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc @@ -86,15 +86,21 @@ esac # multiprobe errors if grep -q "Create/append/" README && grep -q "imm-value" README; then -echo 'p:kprobes/testevent _do_fork' > kprobe_events +echo 'p:kprobes/testevent kernel_clone' > kprobe_events check_error '^r:kprobes/testevent do_exit' # DIFF_PROBE_TYPE # Explicitly use printf "%s" to not interpret \1 -printf "%s" 'p:kprobes/testevent _do_fork abcd=\1' > kprobe_events -check_error 'p:kprobes/testevent _do_fork ^bcd=\1' # DIFF_ARG_TYPE -check_error 'p:kprobes/testevent _do_fork ^abcd=\1:u8' # DIFF_ARG_TYPE -check_error 'p:kprobes/testevent _do_fork ^abcd=\"foo"' # DIFF_ARG_TYPE -check_error '^p:kprobes/testevent _do_fork abcd=\1' # SAME_PROBE +printf "%s" 'p:kprobes/testevent kernel_clone abcd=\1' > kprobe_events +check_error 'p:kprobes/testevent kernel_clone ^bcd=\1' # DIFF_ARG_TYPE +check_error 'p:kprobes/testevent kernel_clone ^abcd=\1:u8' # DIFF_ARG_TYPE +check_error 'p:kprobes/testevent kernel_clone ^abcd=\"foo"' # DIFF_ARG_TYPE +check_error '^p:kprobes/testevent kernel_clone abcd=\1' # SAME_PROBE +fi + +# %return suffix errors +if grep -q "place (kretprobe): .*%return.*" README; then +check_error 'p vfs_read^%hoge' # BAD_ADDR_SUFFIX +check_error 'p ^vfs_read+10%return' # BAD_RETPROBE fi exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc index 523fde6d1aa5..7ae492c204a4 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc @@ -4,14 +4,14 @@ # requires: kprobe_events # Add new kretprobe event -echo 'r:testprobe2 _do_fork $retval' > kprobe_events +echo 'r:testprobe2 kernel_clone $retval' > kprobe_events grep testprobe2 kprobe_events | grep -q 'arg1=\$retval' test -d events/kprobes/testprobe2 echo 1 > events/kprobes/testprobe2/enable ( echo "forked") -cat trace | grep testprobe2 | grep -q '<- _do_fork' +cat trace | grep testprobe2 | grep -q '<- kernel_clone' echo 0 > events/kprobes/testprobe2/enable echo '-:testprobe2' >> kprobe_events diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_return_suffix.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_return_suffix.tc new file mode 100644 index 000000000000..f07bd15cc033 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_return_suffix.tc @@ -0,0 +1,21 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: Kretprobe %%return suffix test +# requires: kprobe_events '<symbol>[+<offset>]%return':README + +# Test for kretprobe by "r" +echo 'r:myprobeaccept vfs_read' > kprobe_events +RESULT1=`cat kprobe_events` + +# Test for kretprobe by "%return" +echo 'p:myprobeaccept vfs_read%return' > kprobe_events +RESULT2=`cat kprobe_events` + +if [ "$RESULT1" != "$RESULT2" ]; then + echo "Error: %return suffix didn't make a return probe." + echo "r-command: $RESULT1" + echo "%return: $RESULT2" + exit_fail +fi + +echo > kprobe_events diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc b/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc index ff6c44adc8a0..c4093fc1a773 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc @@ -4,7 +4,7 @@ # requires: kprobe_events ! grep -q 'myevent' kprobe_profile -echo p:myevent _do_fork > kprobe_events +echo p:myevent kernel_clone > kprobe_events grep -q 'myevent[[:space:]]*0[[:space:]]*0$' kprobe_profile echo 1 > events/kprobes/myevent/enable ( echo "forked" ) diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/uprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/kprobe/uprobe_syntax_errors.tc index 7b5b60c3c5a2..f5e3f9e4a01f 100644 --- a/tools/testing/selftests/ftrace/test.d/kprobe/uprobe_syntax_errors.tc +++ b/tools/testing/selftests/ftrace/test.d/kprobe/uprobe_syntax_errors.tc @@ -17,4 +17,10 @@ check_error 'p /bin/sh:10(10)^a' # BAD_REFCNT_SUFFIX check_error 'p /bin/sh:10 ^@+ab' # BAD_FILE_OFFS check_error 'p /bin/sh:10 ^@symbol' # SYM_ON_UPROBE +# %return suffix error +if grep -q "place (uprobe): .*%return.*" README; then +check_error 'p /bin/sh:10^%hoge' # BAD_ADDR_SUFFIX +check_error 'p /bin/sh:10(10)^%return' # BAD_REFCNT_SUFFIX +fi + exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc index 7449a4b8f1f9..9098f1e7433f 100644 --- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc @@ -25,12 +25,12 @@ echo 'wakeup_latency u64 lat pid_t pid' >> synthetic_events echo 'hist:keys=pid:ts1=common_timestamp.usecs if comm=="ping"' >> events/sched/sched_wakeup/trigger echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts1:onmatch(sched.sched_wakeup).wakeup_latency($wakeup_lat,next_pid) if next_comm=="ping"' > events/sched/sched_switch/trigger -echo 'waking+wakeup_latency u64 lat; pid_t pid' >> synthetic_events -echo 'hist:keys=pid,lat:sort=pid,lat:ww_lat=$waking_lat+$wakeup_lat:onmatch(synthetic.wakeup_latency).waking+wakeup_latency($ww_lat,pid)' >> events/synthetic/wakeup_latency/trigger -echo 'hist:keys=pid,lat:sort=pid,lat' >> events/synthetic/waking+wakeup_latency/trigger +echo 'waking_plus_wakeup_latency u64 lat; pid_t pid' >> synthetic_events +echo 'hist:keys=pid,lat:sort=pid,lat:ww_lat=$waking_lat+$wakeup_lat:onmatch(synthetic.wakeup_latency).waking_plus_wakeup_latency($ww_lat,pid)' >> events/synthetic/wakeup_latency/trigger +echo 'hist:keys=pid,lat:sort=pid,lat' >> events/synthetic/waking_plus_wakeup_latency/trigger ping $LOCALHOST -c 3 -if ! grep -q "pid:" events/synthetic/waking+wakeup_latency/hist; then +if ! grep -q "pid:" events/synthetic/waking_plus_wakeup_latency/hist; then fail "Failed to create combined histogram" fi diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-dynstring.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-dynstring.tc new file mode 100644 index 000000000000..3d65c856eca3 --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-dynstring.tc @@ -0,0 +1,31 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: event trigger - test inter-event histogram trigger trace action with dynamic string param +# requires: set_event synthetic_events events/sched/sched_process_exec/hist "char name[]' >> synthetic_events":README + +fail() { #msg + echo $1 + exit_fail +} + +echo "Test create synthetic event" + +echo 'ping_test_latency u64 lat; char filename[]' > synthetic_events +if [ ! -d events/synthetic/ping_test_latency ]; then + fail "Failed to create ping_test_latency synthetic event" +fi + +echo "Test create histogram for synthetic event using trace action and dynamic strings" +echo "Test histogram dynamic string variables,simple expression support and trace action" + +echo 'hist:key=pid:filenamevar=filename:ts0=common_timestamp.usecs' > events/sched/sched_process_exec/trigger +echo 'hist:key=pid:lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_process_exec).ping_test_latency($lat,$filenamevar) if comm == "ping"' > events/sched/sched_process_exit/trigger +echo 'hist:keys=filename,lat:sort=filename,lat' > events/synthetic/ping_test_latency/trigger + +ping $LOCALHOST -c 5 + +if ! grep -q "ping" events/synthetic/ping_test_latency/hist; then + fail "Failed to create dynamic string trace action inter-event histogram" +fi + +exit 0 diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic_event_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic_event_syntax_errors.tc new file mode 100644 index 000000000000..ada594fe16cb --- /dev/null +++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic_event_syntax_errors.tc @@ -0,0 +1,19 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# description: event trigger - test synthetic_events syntax parser errors +# requires: synthetic_events error_log + +check_error() { # command-with-error-pos-by-^ + ftrace_errlog_check 'synthetic_events' "$1" 'synthetic_events' +} + +check_error 'myevent ^chr arg' # INVALID_TYPE +check_error 'myevent ^char str[];; int v' # INVALID_TYPE +check_error 'myevent char ^str]; int v' # INVALID_NAME +check_error 'myevent char ^str;[]' # INVALID_NAME +check_error 'myevent ^char str[; int v' # INVALID_TYPE +check_error '^mye;vent char str[]' # BAD_NAME +check_error 'myevent char str[]; ^int' # INVALID_FIELD +check_error '^myevent' # INCOMPLETE_CMD + +exit 0 diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index 4f78e4805633..f19804df244c 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -971,6 +971,11 @@ void __run_test(struct __fixture_metadata *f, ksft_print_msg(" RUN %s%s%s.%s ...\n", f->name, variant->name[0] ? "." : "", variant->name, t->name); + + /* Make sure output buffers are flushed before fork */ + fflush(stdout); + fflush(stderr); + t->pid = fork(); if (t->pid < 0) { ksft_print_msg("ERROR SPAWNING TEST CHILD\n"); diff --git a/tools/testing/selftests/kvm/x86_64/debug_regs.c b/tools/testing/selftests/kvm/x86_64/debug_regs.c index b8d14f9db5f9..2fc6b3af81a1 100644 --- a/tools/testing/selftests/kvm/x86_64/debug_regs.c +++ b/tools/testing/selftests/kvm/x86_64/debug_regs.c @@ -73,7 +73,7 @@ int main(void) int i; /* Instruction lengths starting at ss_start */ int ss_size[4] = { - 3, /* xor */ + 2, /* xor */ 2, /* cpuid */ 5, /* mov */ 2, /* rdmsr */ diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index 7a17ea815736..30848ca36555 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -47,9 +47,9 @@ ARCH ?= $(SUBARCH) khdr: ifndef KSFT_KHDR_INSTALL_DONE ifeq (1,$(DEFAULT_INSTALL_HDR_PATH)) - make --no-builtin-rules ARCH=$(ARCH) -C $(top_srcdir) headers_install + $(MAKE) --no-builtin-rules ARCH=$(ARCH) -C $(top_srcdir) headers_install else - make --no-builtin-rules INSTALL_HDR_PATH=$$OUTPUT/usr \ + $(MAKE) --no-builtin-rules INSTALL_HDR_PATH=$$OUTPUT/usr \ ARCH=$(ARCH) -C $(top_srcdir) headers_install endif endif @@ -107,9 +107,8 @@ endif emit_tests: for TEST in $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_PROGS); do \ BASENAME_TEST=`basename $$TEST`; \ - echo " \\"; \ - echo -n " \"$$BASENAME_TEST\""; \ - done; \ + echo "$(COLLECTION):$$BASENAME_TEST"; \ + done # define if isn't already. It is undefined in make O= case. ifeq ($(RM),) diff --git a/tools/testing/selftests/livepatch/functions.sh b/tools/testing/selftests/livepatch/functions.sh index 1aba83c87ad3..846c7ed71556 100644 --- a/tools/testing/selftests/livepatch/functions.sh +++ b/tools/testing/selftests/livepatch/functions.sh @@ -278,7 +278,7 @@ function check_result { # help differentiate repeated testing runs. Remove them with a # post-comparison sed filter. - result=$(dmesg | comm -13 "$SAVED_DMESG" - | \ + result=$(dmesg | comm --nocheck-order -13 "$SAVED_DMESG" - | \ grep -e 'livepatch:' -e 'test_klp' | \ grep -v '\(tainting\|taints\) kernel' | \ sed 's/^\[[ 0-9.]*\] //') diff --git a/tools/testing/selftests/lkdtm/run.sh b/tools/testing/selftests/lkdtm/run.sh index 8383eb89d88a..bb7a1775307b 100755 --- a/tools/testing/selftests/lkdtm/run.sh +++ b/tools/testing/selftests/lkdtm/run.sh @@ -82,7 +82,7 @@ dmesg > "$DMESG" ($SHELL -c 'cat <(echo '"$test"') >'"$TRIGGER" 2>/dev/null) || true # Record and dump the results -dmesg | diff --changed-group-format='%>' --unchanged-group-format='' "$DMESG" - > "$LOG" || true +dmesg | comm --nocheck-order -13 "$DMESG" - > "$LOG" || true cat "$LOG" # Check for expected output diff --git a/tools/testing/selftests/lkdtm/tests.txt b/tools/testing/selftests/lkdtm/tests.txt index 9d266e79c6a2..74a8d329a72c 100644 --- a/tools/testing/selftests/lkdtm/tests.txt +++ b/tools/testing/selftests/lkdtm/tests.txt @@ -9,7 +9,6 @@ EXCEPTION #CORRUPT_STACK_STRONG Crashes entire system on success CORRUPT_LIST_ADD list_add corruption CORRUPT_LIST_DEL list_del corruption -CORRUPT_USER_DS Invalid address limit on user-mode return STACK_GUARD_PAGE_LEADING STACK_GUARD_PAGE_TRAILING UNSET_SMEP CR4 bits went missing @@ -67,6 +66,5 @@ USERCOPY_STACK_FRAME_TO USERCOPY_STACK_FRAME_FROM USERCOPY_STACK_BEYOND USERCOPY_KERNEL -USERCOPY_KERNEL_DS STACKLEAK_ERASING OK: the rest of the thread stack is properly erased CFI_FORWARD_PROTO diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 742c499328b2..61ae899cfc17 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -1,4 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only +ipsec msg_zerocopy socket psock_fanout diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 9491bbaa0831..ef352477cac6 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -19,6 +19,8 @@ TEST_PROGS += txtimestamp.sh TEST_PROGS += vrf-xfrm-tests.sh TEST_PROGS += rxtimestamp.sh TEST_PROGS += devlink_port_split.py +TEST_PROGS += drop_monitor_tests.sh +TEST_PROGS += vrf_route_leaking.sh TEST_PROGS_EXTENDED := in_netns.sh TEST_GEN_FILES = socket nettest TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any @@ -29,6 +31,7 @@ TEST_GEN_FILES += tcp_fastopen_backup_key TEST_GEN_FILES += fin_ack_lat TEST_GEN_FILES += reuseaddr_ports_exhausted TEST_GEN_FILES += hwtstamp_config rxtimestamp timestamping txtimestamp +TEST_GEN_FILES += ipsec TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 3b42c06b5985..43649242adc0 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -24,10 +24,12 @@ CONFIG_IP_NF_NAT=m CONFIG_NF_TABLES=m CONFIG_NF_TABLES_IPV6=y CONFIG_NF_TABLES_IPV4=y -CONFIG_NFT_CHAIN_NAT_IPV6=m -CONFIG_NFT_CHAIN_NAT_IPV4=m +CONFIG_NFT_NAT=m CONFIG_NET_SCH_FQ=m CONFIG_NET_SCH_ETF=m CONFIG_NET_SCH_NETEM=y CONFIG_TEST_BLACKHOLE_DEV=m CONFIG_KALLSYMS=y +CONFIG_TRACEPOINTS=y +CONFIG_NET_DROP_MONITOR=m +CONFIG_NETDEVSIM=m diff --git a/tools/testing/selftests/net/drop_monitor_tests.sh b/tools/testing/selftests/net/drop_monitor_tests.sh new file mode 100755 index 000000000000..b7650e30d18b --- /dev/null +++ b/tools/testing/selftests/net/drop_monitor_tests.sh @@ -0,0 +1,215 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test is for checking drop monitor functionality. + +ret=0 +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +# all tests in this script. Can be overridden with -t option +TESTS=" + sw_drops + hw_drops +" + +IP="ip -netns ns1" +TC="tc -netns ns1" +DEVLINK="devlink -N ns1" +NS_EXEC="ip netns exec ns1" +NETDEVSIM_PATH=/sys/bus/netdevsim/ +DEV_ADDR=1337 +DEV=netdevsim${DEV_ADDR} +DEVLINK_DEV=netdevsim/${DEV} + +log_test() +{ + local rc=$1 + local expected=$2 + local msg="$3" + + if [ ${rc} -eq ${expected} ]; then + printf " TEST: %-60s [ OK ]\n" "${msg}" + nsuccess=$((nsuccess+1)) + else + ret=1 + nfail=$((nfail+1)) + printf " TEST: %-60s [FAIL]\n" "${msg}" + fi +} + +setup() +{ + modprobe netdevsim &> /dev/null + + set -e + ip netns add ns1 + $IP link add dummy10 up type dummy + + $NS_EXEC echo "$DEV_ADDR 1" > ${NETDEVSIM_PATH}/new_device + udevadm settle + local netdev=$($NS_EXEC ls ${NETDEVSIM_PATH}/devices/${DEV}/net/) + $IP link set dev $netdev up + + set +e +} + +cleanup() +{ + $NS_EXEC echo "$DEV_ADDR" > ${NETDEVSIM_PATH}/del_device + ip netns del ns1 +} + +sw_drops_test() +{ + echo + echo "Software drops test" + + setup + + local dir=$(mktemp -d) + + $TC qdisc add dev dummy10 clsact + $TC filter add dev dummy10 egress pref 1 handle 101 proto ip \ + flower dst_ip 192.0.2.10 action drop + + $NS_EXEC mausezahn dummy10 -a 00:11:22:33:44:55 -b 00:aa:bb:cc:dd:ee \ + -A 192.0.2.1 -B 192.0.2.10 -t udp sp=12345,dp=54321 -c 0 -q \ + -d 100msec & + timeout 5 dwdump -o sw -w ${dir}/packets.pcap + (( $(tshark -r ${dir}/packets.pcap \ + -Y 'ip.dst == 192.0.2.10' 2> /dev/null | wc -l) != 0)) + log_test $? 0 "Capturing active software drops" + + rm ${dir}/packets.pcap + + { kill %% && wait %%; } 2>/dev/null + timeout 5 dwdump -o sw -w ${dir}/packets.pcap + (( $(tshark -r ${dir}/packets.pcap \ + -Y 'ip.dst == 192.0.2.10' 2> /dev/null | wc -l) == 0)) + log_test $? 0 "Capturing inactive software drops" + + rm -r $dir + + cleanup +} + +hw_drops_test() +{ + echo + echo "Hardware drops test" + + setup + + local dir=$(mktemp -d) + + $DEVLINK trap set $DEVLINK_DEV trap blackhole_route action trap + timeout 5 dwdump -o hw -w ${dir}/packets.pcap + (( $(tshark -r ${dir}/packets.pcap \ + -Y 'net_dm.hw_trap_name== blackhole_route' 2> /dev/null \ + | wc -l) != 0)) + log_test $? 0 "Capturing active hardware drops" + + rm ${dir}/packets.pcap + + $DEVLINK trap set $DEVLINK_DEV trap blackhole_route action drop + timeout 5 dwdump -o hw -w ${dir}/packets.pcap + (( $(tshark -r ${dir}/packets.pcap \ + -Y 'net_dm.hw_trap_name== blackhole_route' 2> /dev/null \ + | wc -l) == 0)) + log_test $? 0 "Capturing inactive hardware drops" + + rm -r $dir + + cleanup +} + +################################################################################ +# usage + +usage() +{ + cat <<EOF +usage: ${0##*/} OPTS + + -t <test> Test(s) to run (default: all) + (options: $TESTS) +EOF +} + +################################################################################ +# main + +while getopts ":t:h" opt; do + case $opt in + t) TESTS=$OPTARG;; + h) usage; exit 0;; + *) usage; exit 1;; + esac +done + +if [ "$(id -u)" -ne 0 ];then + echo "SKIP: Need root privileges" + exit $ksft_skip; +fi + +if [ ! -x "$(command -v ip)" ]; then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +if [ ! -x "$(command -v devlink)" ]; then + echo "SKIP: Could not run test without devlink tool" + exit $ksft_skip +fi + +if [ ! -x "$(command -v tshark)" ]; then + echo "SKIP: Could not run test without tshark tool" + exit $ksft_skip +fi + +if [ ! -x "$(command -v dwdump)" ]; then + echo "SKIP: Could not run test without dwdump tool" + exit $ksft_skip +fi + +if [ ! -x "$(command -v udevadm)" ]; then + echo "SKIP: Could not run test without udevadm tool" + exit $ksft_skip +fi + +if [ ! -x "$(command -v timeout)" ]; then + echo "SKIP: Could not run test without timeout tool" + exit $ksft_skip +fi + +if [ ! -x "$(command -v mausezahn)" ]; then + echo "SKIP: Could not run test without mausezahn tool" + exit $ksft_skip +fi + +tshark -G fields 2> /dev/null | grep -q net_dm +if [ $? -ne 0 ]; then + echo "SKIP: tshark too old, missing net_dm dissector" + exit $ksft_skip +fi + +# start clean +cleanup &> /dev/null + +for t in $TESTS +do + case $t in + sw_drops|sw) sw_drops_test;; + hw_drops|hw) hw_drops_test;; + + help) echo "Test names: $TESTS"; exit 0;; + esac +done + +if [ "$TESTS" != "none" ]; then + printf "\nTests passed: %3d\n" ${nsuccess} + printf "Tests failed: %3d\n" ${nfail} +fi + +exit $ret diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh index 22dc2f3d428b..eb693a3b7b4a 100755 --- a/tools/testing/selftests/net/fib_nexthops.sh +++ b/tools/testing/selftests/net/fib_nexthops.sh @@ -411,9 +411,16 @@ ipv6_fdb_grp_fcnal() run_cmd "$IP -6 ro add 2001:db8:101::1/128 nhid 103" log_test $? 2 "Route add with fdb nexthop group" + run_cmd "$IP nexthop del id 61" + run_cmd "$BRIDGE fdb get to 02:02:00:00:00:13 dev vx10 self" + log_test $? 0 "Fdb entry after deleting a single nexthop" + run_cmd "$IP nexthop del id 102" log_test $? 0 "Fdb nexthop delete" + run_cmd "$BRIDGE fdb get to 02:02:00:00:00:13 dev vx10 self" + log_test $? 254 "Fdb entry after deleting a nexthop group" + $IP link del dev vx10 } @@ -484,9 +491,16 @@ ipv4_fdb_grp_fcnal() run_cmd "$IP ro add 172.16.0.0/22 nhid 103" log_test $? 2 "Route add with fdb nexthop group" + run_cmd "$IP nexthop del id 12" + run_cmd "$BRIDGE fdb get to 02:02:00:00:00:13 dev vx10 self" + log_test $? 0 "Fdb entry after deleting a single nexthop" + run_cmd "$IP nexthop del id 102" log_test $? 0 "Fdb nexthop delete" + run_cmd "$BRIDGE fdb get to 02:02:00:00:00:13 dev vx10 self" + log_test $? 254 "Fdb entry after deleting a nexthop group" + $IP link del dev vx10 } @@ -739,6 +753,36 @@ ipv6_fcnal_runtime() run_cmd "$IP nexthop replace id 81 via 172.16.1.1 dev veth1" log_test $? 2 "Nexthop replace of group entry - v6 route, v4 nexthop" + run_cmd "$IP nexthop add id 86 via 2001:db8:92::2 dev veth3" + run_cmd "$IP nexthop add id 87 via 172.16.1.1 dev veth1" + run_cmd "$IP nexthop add id 88 via 172.16.1.1 dev veth1" + run_cmd "$IP nexthop add id 124 group 86/87/88" + run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 124" + log_test $? 2 "IPv6 route can not have a group with v4 and v6 gateways" + + run_cmd "$IP nexthop del id 88" + run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 124" + log_test $? 2 "IPv6 route can not have a group with v4 and v6 gateways" + + run_cmd "$IP nexthop del id 87" + run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 124" + log_test $? 0 "IPv6 route using a group after removing v4 gateways" + + run_cmd "$IP ro delete 2001:db8:101::1/128" + run_cmd "$IP nexthop add id 87 via 172.16.1.1 dev veth1" + run_cmd "$IP nexthop add id 88 via 172.16.1.1 dev veth1" + run_cmd "$IP nexthop replace id 124 group 86/87/88" + run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 124" + log_test $? 2 "IPv6 route can not have a group with v4 and v6 gateways" + + run_cmd "$IP nexthop replace id 88 via 2001:db8:92::2 dev veth3" + run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 124" + log_test $? 2 "IPv6 route can not have a group with v4 and v6 gateways" + + run_cmd "$IP nexthop replace id 87 via 2001:db8:92::2 dev veth3" + run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 124" + log_test $? 0 "IPv6 route using a group after replacing v4 gateways" + $IP nexthop flush >/dev/null 2>&1 # diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh index 75fe24bcb9cd..9c12c4fd3afc 100644 --- a/tools/testing/selftests/net/forwarding/devlink_lib.sh +++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh @@ -5,7 +5,7 @@ # Defines if [[ ! -v DEVLINK_DEV ]]; then - DEVLINK_DEV=$(devlink port show "${NETIFS[p1]}" -j \ + DEVLINK_DEV=$(devlink port show "${NETIFS[p1]:-$NETIF_NO_CABLE}" -j \ | jq -r '.port | keys[]' | cut -d/ -f-2) if [ -z "$DEVLINK_DEV" ]; then echo "SKIP: ${NETIFS[p1]} has no devlink device registered for it" @@ -117,6 +117,12 @@ devlink_reload() declare -A DEVLINK_ORIG +# Changing pool type from static to dynamic causes reinterpretation of threshold +# values. They therefore need to be saved before pool type is changed, then the +# pool type can be changed, and then the new values need to be set up. Therefore +# instead of saving the current state implicitly in the _set call, provide +# functions for all three primitives: save, set, and restore. + devlink_port_pool_threshold() { local port=$1; shift @@ -126,14 +132,21 @@ devlink_port_pool_threshold() | jq '.port_pool."'"$port"'"[].threshold' } -devlink_port_pool_th_set() +devlink_port_pool_th_save() { local port=$1; shift local pool=$1; shift - local th=$1; shift local key="port_pool($port,$pool).threshold" DEVLINK_ORIG[$key]=$(devlink_port_pool_threshold $port $pool) +} + +devlink_port_pool_th_set() +{ + local port=$1; shift + local pool=$1; shift + local th=$1; shift + devlink sb port pool set $port pool $pool th $th } @@ -142,8 +155,13 @@ devlink_port_pool_th_restore() local port=$1; shift local pool=$1; shift local key="port_pool($port,$pool).threshold" + local -a orig=(${DEVLINK_ORIG[$key]}) - devlink sb port pool set $port pool $pool th ${DEVLINK_ORIG[$key]} + if [[ -z $orig ]]; then + echo "WARNING: Mismatched devlink_port_pool_th_restore" + else + devlink sb port pool set $port pool $pool th $orig + fi } devlink_pool_size_thtype() @@ -154,14 +172,20 @@ devlink_pool_size_thtype() | jq -r '.pool[][] | (.size, .thtype)' } +devlink_pool_size_thtype_save() +{ + local pool=$1; shift + local key="pool($pool).size_thtype" + + DEVLINK_ORIG[$key]=$(devlink_pool_size_thtype $pool) +} + devlink_pool_size_thtype_set() { local pool=$1; shift local thtype=$1; shift local size=$1; shift - local key="pool($pool).size_thtype" - DEVLINK_ORIG[$key]=$(devlink_pool_size_thtype $pool) devlink sb pool set "$DEVLINK_DEV" pool $pool size $size thtype $thtype } @@ -171,8 +195,12 @@ devlink_pool_size_thtype_restore() local key="pool($pool).size_thtype" local -a orig=(${DEVLINK_ORIG[$key]}) - devlink sb pool set "$DEVLINK_DEV" pool $pool \ - size ${orig[0]} thtype ${orig[1]} + if [[ -z ${orig[0]} ]]; then + echo "WARNING: Mismatched devlink_pool_size_thtype_restore" + else + devlink sb pool set "$DEVLINK_DEV" pool $pool \ + size ${orig[0]} thtype ${orig[1]} + fi } devlink_tc_bind_pool_th() @@ -185,6 +213,16 @@ devlink_tc_bind_pool_th() | jq -r '.tc_bind[][] | (.pool, .threshold)' } +devlink_tc_bind_pool_th_save() +{ + local port=$1; shift + local tc=$1; shift + local dir=$1; shift + local key="tc_bind($port,$dir,$tc).pool_th" + + DEVLINK_ORIG[$key]=$(devlink_tc_bind_pool_th $port $tc $dir) +} + devlink_tc_bind_pool_th_set() { local port=$1; shift @@ -192,9 +230,7 @@ devlink_tc_bind_pool_th_set() local dir=$1; shift local pool=$1; shift local th=$1; shift - local key="tc_bind($port,$dir,$tc).pool_th" - DEVLINK_ORIG[$key]=$(devlink_tc_bind_pool_th $port $tc $dir) devlink sb tc bind set $port tc $tc type $dir pool $pool th $th } @@ -206,8 +242,12 @@ devlink_tc_bind_pool_th_restore() local key="tc_bind($port,$dir,$tc).pool_th" local -a orig=(${DEVLINK_ORIG[$key]}) - devlink sb tc bind set $port tc $tc type $dir \ - pool ${orig[0]} th ${orig[1]} + if [[ -z ${orig[0]} ]]; then + echo "WARNING: Mismatched devlink_tc_bind_pool_th_restore" + else + devlink sb tc bind set $port tc $tc type $dir \ + pool ${orig[0]} th ${orig[1]} + fi } devlink_traps_num_get() @@ -509,3 +549,9 @@ devlink_cpu_port_get() echo "$DEVLINK_DEV/$cpu_dl_port_num" } + +devlink_cell_size_get() +{ + devlink sb pool show "$DEVLINK_DEV" pool 0 -j \ + | jq '.pool[][].cell_size' +} diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 977fc2b326a2..927f9ba49e08 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -1227,3 +1227,46 @@ stop_traffic() # Suppress noise from killing mausezahn. { kill %% && wait %%; } 2>/dev/null } + +tcpdump_start() +{ + local if_name=$1; shift + local ns=$1; shift + + capfile=$(mktemp) + capout=$(mktemp) + + if [ -z $ns ]; then + ns_cmd="" + else + ns_cmd="ip netns exec ${ns}" + fi + + if [ -z $SUDO_USER ] ; then + capuser="" + else + capuser="-Z $SUDO_USER" + fi + + $ns_cmd tcpdump -e -n -Q in -i $if_name \ + -s 65535 -B 32768 $capuser -w $capfile > "$capout" 2>&1 & + cappid=$! + + sleep 1 +} + +tcpdump_stop() +{ + $ns_cmd kill $cappid + sleep 1 +} + +tcpdump_cleanup() +{ + rm $capfile $capout +} + +tcpdump_show() +{ + tcpdump -e -n -r $capfile 2>&1 +} diff --git a/tools/testing/selftests/net/forwarding/mirror_lib.sh b/tools/testing/selftests/net/forwarding/mirror_lib.sh index c33bfd7ba214..13db1cb50e57 100644 --- a/tools/testing/selftests/net/forwarding/mirror_lib.sh +++ b/tools/testing/selftests/net/forwarding/mirror_lib.sh @@ -31,7 +31,7 @@ mirror_test() local t0=$(tc_rule_stats_get $dev $pref) $MZ $vrf_name ${sip:+-A $sip} -B $dip -a own -b bc -q \ - -c 10 -d 100ms -t icmp type=8 + -c 10 -d 100msec -t icmp type=8 sleep 0.5 local t1=$(tc_rule_stats_get $dev $pref) local delta=$((t1 - t0)) diff --git a/tools/testing/selftests/net/ipsec.c b/tools/testing/selftests/net/ipsec.c new file mode 100644 index 000000000000..17ced7d6ce25 --- /dev/null +++ b/tools/testing/selftests/net/ipsec.c @@ -0,0 +1,2195 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * ipsec.c - Check xfrm on veth inside a net-ns. + * Copyright (c) 2018 Dmitry Safonov + */ + +#define _GNU_SOURCE + +#include <arpa/inet.h> +#include <asm/types.h> +#include <errno.h> +#include <fcntl.h> +#include <limits.h> +#include <linux/limits.h> +#include <linux/netlink.h> +#include <linux/random.h> +#include <linux/rtnetlink.h> +#include <linux/veth.h> +#include <linux/xfrm.h> +#include <netinet/in.h> +#include <net/if.h> +#include <sched.h> +#include <stdbool.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/mman.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <time.h> +#include <unistd.h> + +#include "../kselftest.h" + +#define printk(fmt, ...) \ + ksft_print_msg("%d[%u] " fmt "\n", getpid(), __LINE__, ##__VA_ARGS__) + +#define pr_err(fmt, ...) printk(fmt ": %m", ##__VA_ARGS__) + +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) +#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) + +#define IPV4_STR_SZ 16 /* xxx.xxx.xxx.xxx is longest + \0 */ +#define MAX_PAYLOAD 2048 +#define XFRM_ALGO_KEY_BUF_SIZE 512 +#define MAX_PROCESSES (1 << 14) /* /16 mask divided by /30 subnets */ +#define INADDR_A ((in_addr_t) 0x0a000000) /* 10.0.0.0 */ +#define INADDR_B ((in_addr_t) 0xc0a80000) /* 192.168.0.0 */ + +/* /30 mask for one veth connection */ +#define PREFIX_LEN 30 +#define child_ip(nr) (4*nr + 1) +#define grchild_ip(nr) (4*nr + 2) + +#define VETH_FMT "ktst-%d" +#define VETH_LEN 12 + +static int nsfd_parent = -1; +static int nsfd_childa = -1; +static int nsfd_childb = -1; +static long page_size; + +/* + * ksft_cnt is static in kselftest, so isn't shared with children. + * We have to send a test result back to parent and count there. + * results_fd is a pipe with test feedback from children. + */ +static int results_fd[2]; + +const unsigned int ping_delay_nsec = 50 * 1000 * 1000; +const unsigned int ping_timeout = 300; +const unsigned int ping_count = 100; +const unsigned int ping_success = 80; + +static void randomize_buffer(void *buf, size_t buflen) +{ + int *p = (int *)buf; + size_t words = buflen / sizeof(int); + size_t leftover = buflen % sizeof(int); + + if (!buflen) + return; + + while (words--) + *p++ = rand(); + + if (leftover) { + int tmp = rand(); + + memcpy(buf + buflen - leftover, &tmp, leftover); + } + + return; +} + +static int unshare_open(void) +{ + const char *netns_path = "/proc/self/ns/net"; + int fd; + + if (unshare(CLONE_NEWNET) != 0) { + pr_err("unshare()"); + return -1; + } + + fd = open(netns_path, O_RDONLY); + if (fd <= 0) { + pr_err("open(%s)", netns_path); + return -1; + } + + return fd; +} + +static int switch_ns(int fd) +{ + if (setns(fd, CLONE_NEWNET)) { + pr_err("setns()"); + return -1; + } + return 0; +} + +/* + * Running the test inside a new parent net namespace to bother less + * about cleanup on error-path. + */ +static int init_namespaces(void) +{ + nsfd_parent = unshare_open(); + if (nsfd_parent <= 0) + return -1; + + nsfd_childa = unshare_open(); + if (nsfd_childa <= 0) + return -1; + + if (switch_ns(nsfd_parent)) + return -1; + + nsfd_childb = unshare_open(); + if (nsfd_childb <= 0) + return -1; + + if (switch_ns(nsfd_parent)) + return -1; + return 0; +} + +static int netlink_sock(int *sock, uint32_t *seq_nr, int proto) +{ + if (*sock > 0) { + seq_nr++; + return 0; + } + + *sock = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, proto); + if (*sock <= 0) { + pr_err("socket(AF_NETLINK)"); + return -1; + } + + randomize_buffer(seq_nr, sizeof(*seq_nr)); + + return 0; +} + +static inline struct rtattr *rtattr_hdr(struct nlmsghdr *nh) +{ + return (struct rtattr *)((char *)(nh) + RTA_ALIGN((nh)->nlmsg_len)); +} + +static int rtattr_pack(struct nlmsghdr *nh, size_t req_sz, + unsigned short rta_type, const void *payload, size_t size) +{ + /* NLMSG_ALIGNTO == RTA_ALIGNTO, nlmsg_len already aligned */ + struct rtattr *attr = rtattr_hdr(nh); + size_t nl_size = RTA_ALIGN(nh->nlmsg_len) + RTA_LENGTH(size); + + if (req_sz < nl_size) { + printk("req buf is too small: %zu < %zu", req_sz, nl_size); + return -1; + } + nh->nlmsg_len = nl_size; + + attr->rta_len = RTA_LENGTH(size); + attr->rta_type = rta_type; + memcpy(RTA_DATA(attr), payload, size); + + return 0; +} + +static struct rtattr *_rtattr_begin(struct nlmsghdr *nh, size_t req_sz, + unsigned short rta_type, const void *payload, size_t size) +{ + struct rtattr *ret = rtattr_hdr(nh); + + if (rtattr_pack(nh, req_sz, rta_type, payload, size)) + return 0; + + return ret; +} + +static inline struct rtattr *rtattr_begin(struct nlmsghdr *nh, size_t req_sz, + unsigned short rta_type) +{ + return _rtattr_begin(nh, req_sz, rta_type, 0, 0); +} + +static inline void rtattr_end(struct nlmsghdr *nh, struct rtattr *attr) +{ + char *nlmsg_end = (char *)nh + nh->nlmsg_len; + + attr->rta_len = nlmsg_end - (char *)attr; +} + +static int veth_pack_peerb(struct nlmsghdr *nh, size_t req_sz, + const char *peer, int ns) +{ + struct ifinfomsg pi; + struct rtattr *peer_attr; + + memset(&pi, 0, sizeof(pi)); + pi.ifi_family = AF_UNSPEC; + pi.ifi_change = 0xFFFFFFFF; + + peer_attr = _rtattr_begin(nh, req_sz, VETH_INFO_PEER, &pi, sizeof(pi)); + if (!peer_attr) + return -1; + + if (rtattr_pack(nh, req_sz, IFLA_IFNAME, peer, strlen(peer))) + return -1; + + if (rtattr_pack(nh, req_sz, IFLA_NET_NS_FD, &ns, sizeof(ns))) + return -1; + + rtattr_end(nh, peer_attr); + + return 0; +} + +static int netlink_check_answer(int sock) +{ + struct nlmsgerror { + struct nlmsghdr hdr; + int error; + struct nlmsghdr orig_msg; + } answer; + + if (recv(sock, &answer, sizeof(answer), 0) < 0) { + pr_err("recv()"); + return -1; + } else if (answer.hdr.nlmsg_type != NLMSG_ERROR) { + printk("expected NLMSG_ERROR, got %d", (int)answer.hdr.nlmsg_type); + return -1; + } else if (answer.error) { + printk("NLMSG_ERROR: %d: %s", + answer.error, strerror(-answer.error)); + return answer.error; + } + + return 0; +} + +static int veth_add(int sock, uint32_t seq, const char *peera, int ns_a, + const char *peerb, int ns_b) +{ + uint16_t flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE; + struct { + struct nlmsghdr nh; + struct ifinfomsg info; + char attrbuf[MAX_PAYLOAD]; + } req; + const char veth_type[] = "veth"; + struct rtattr *link_info, *info_data; + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.info)); + req.nh.nlmsg_type = RTM_NEWLINK; + req.nh.nlmsg_flags = flags; + req.nh.nlmsg_seq = seq; + req.info.ifi_family = AF_UNSPEC; + req.info.ifi_change = 0xFFFFFFFF; + + if (rtattr_pack(&req.nh, sizeof(req), IFLA_IFNAME, peera, strlen(peera))) + return -1; + + if (rtattr_pack(&req.nh, sizeof(req), IFLA_NET_NS_FD, &ns_a, sizeof(ns_a))) + return -1; + + link_info = rtattr_begin(&req.nh, sizeof(req), IFLA_LINKINFO); + if (!link_info) + return -1; + + if (rtattr_pack(&req.nh, sizeof(req), IFLA_INFO_KIND, veth_type, sizeof(veth_type))) + return -1; + + info_data = rtattr_begin(&req.nh, sizeof(req), IFLA_INFO_DATA); + if (!info_data) + return -1; + + if (veth_pack_peerb(&req.nh, sizeof(req), peerb, ns_b)) + return -1; + + rtattr_end(&req.nh, info_data); + rtattr_end(&req.nh, link_info); + + if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) { + pr_err("send()"); + return -1; + } + return netlink_check_answer(sock); +} + +static int ip4_addr_set(int sock, uint32_t seq, const char *intf, + struct in_addr addr, uint8_t prefix) +{ + uint16_t flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE; + struct { + struct nlmsghdr nh; + struct ifaddrmsg info; + char attrbuf[MAX_PAYLOAD]; + } req; + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.info)); + req.nh.nlmsg_type = RTM_NEWADDR; + req.nh.nlmsg_flags = flags; + req.nh.nlmsg_seq = seq; + req.info.ifa_family = AF_INET; + req.info.ifa_prefixlen = prefix; + req.info.ifa_index = if_nametoindex(intf); + +#ifdef DEBUG + { + char addr_str[IPV4_STR_SZ] = {}; + + strncpy(addr_str, inet_ntoa(addr), IPV4_STR_SZ - 1); + + printk("ip addr set %s", addr_str); + } +#endif + + if (rtattr_pack(&req.nh, sizeof(req), IFA_LOCAL, &addr, sizeof(addr))) + return -1; + + if (rtattr_pack(&req.nh, sizeof(req), IFA_ADDRESS, &addr, sizeof(addr))) + return -1; + + if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) { + pr_err("send()"); + return -1; + } + return netlink_check_answer(sock); +} + +static int link_set_up(int sock, uint32_t seq, const char *intf) +{ + struct { + struct nlmsghdr nh; + struct ifinfomsg info; + char attrbuf[MAX_PAYLOAD]; + } req; + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.info)); + req.nh.nlmsg_type = RTM_NEWLINK; + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.nh.nlmsg_seq = seq; + req.info.ifi_family = AF_UNSPEC; + req.info.ifi_change = 0xFFFFFFFF; + req.info.ifi_index = if_nametoindex(intf); + req.info.ifi_flags = IFF_UP; + req.info.ifi_change = IFF_UP; + + if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) { + pr_err("send()"); + return -1; + } + return netlink_check_answer(sock); +} + +static int ip4_route_set(int sock, uint32_t seq, const char *intf, + struct in_addr src, struct in_addr dst) +{ + struct { + struct nlmsghdr nh; + struct rtmsg rt; + char attrbuf[MAX_PAYLOAD]; + } req; + unsigned int index = if_nametoindex(intf); + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.rt)); + req.nh.nlmsg_type = RTM_NEWROUTE; + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_CREATE; + req.nh.nlmsg_seq = seq; + req.rt.rtm_family = AF_INET; + req.rt.rtm_dst_len = 32; + req.rt.rtm_table = RT_TABLE_MAIN; + req.rt.rtm_protocol = RTPROT_BOOT; + req.rt.rtm_scope = RT_SCOPE_LINK; + req.rt.rtm_type = RTN_UNICAST; + + if (rtattr_pack(&req.nh, sizeof(req), RTA_DST, &dst, sizeof(dst))) + return -1; + + if (rtattr_pack(&req.nh, sizeof(req), RTA_PREFSRC, &src, sizeof(src))) + return -1; + + if (rtattr_pack(&req.nh, sizeof(req), RTA_OIF, &index, sizeof(index))) + return -1; + + if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) { + pr_err("send()"); + return -1; + } + + return netlink_check_answer(sock); +} + +static int tunnel_set_route(int route_sock, uint32_t *route_seq, char *veth, + struct in_addr tunsrc, struct in_addr tundst) +{ + if (ip4_addr_set(route_sock, (*route_seq)++, "lo", + tunsrc, PREFIX_LEN)) { + printk("Failed to set ipv4 addr"); + return -1; + } + + if (ip4_route_set(route_sock, (*route_seq)++, veth, tunsrc, tundst)) { + printk("Failed to set ipv4 route"); + return -1; + } + + return 0; +} + +static int init_child(int nsfd, char *veth, unsigned int src, unsigned int dst) +{ + struct in_addr intsrc = inet_makeaddr(INADDR_B, src); + struct in_addr tunsrc = inet_makeaddr(INADDR_A, src); + struct in_addr tundst = inet_makeaddr(INADDR_A, dst); + int route_sock = -1, ret = -1; + uint32_t route_seq; + + if (switch_ns(nsfd)) + return -1; + + if (netlink_sock(&route_sock, &route_seq, NETLINK_ROUTE)) { + printk("Failed to open netlink route socket in child"); + return -1; + } + + if (ip4_addr_set(route_sock, route_seq++, veth, intsrc, PREFIX_LEN)) { + printk("Failed to set ipv4 addr"); + goto err; + } + + if (link_set_up(route_sock, route_seq++, veth)) { + printk("Failed to bring up %s", veth); + goto err; + } + + if (tunnel_set_route(route_sock, &route_seq, veth, tunsrc, tundst)) { + printk("Failed to add tunnel route on %s", veth); + goto err; + } + ret = 0; + +err: + close(route_sock); + return ret; +} + +#define ALGO_LEN 64 +enum desc_type { + CREATE_TUNNEL = 0, + ALLOCATE_SPI, + MONITOR_ACQUIRE, + EXPIRE_STATE, + EXPIRE_POLICY, +}; +const char *desc_name[] = { + "create tunnel", + "alloc spi", + "monitor acquire", + "expire state", + "expire policy" +}; +struct xfrm_desc { + enum desc_type type; + uint8_t proto; + char a_algo[ALGO_LEN]; + char e_algo[ALGO_LEN]; + char c_algo[ALGO_LEN]; + char ae_algo[ALGO_LEN]; + unsigned int icv_len; + /* unsigned key_len; */ +}; + +enum msg_type { + MSG_ACK = 0, + MSG_EXIT, + MSG_PING, + MSG_XFRM_PREPARE, + MSG_XFRM_ADD, + MSG_XFRM_DEL, + MSG_XFRM_CLEANUP, +}; + +struct test_desc { + enum msg_type type; + union { + struct { + in_addr_t reply_ip; + unsigned int port; + } ping; + struct xfrm_desc xfrm_desc; + } body; +}; + +struct test_result { + struct xfrm_desc desc; + unsigned int res; +}; + +static void write_test_result(unsigned int res, struct xfrm_desc *d) +{ + struct test_result tr = {}; + ssize_t ret; + + tr.desc = *d; + tr.res = res; + + ret = write(results_fd[1], &tr, sizeof(tr)); + if (ret != sizeof(tr)) + pr_err("Failed to write the result in pipe %zd", ret); +} + +static void write_msg(int fd, struct test_desc *msg, bool exit_of_fail) +{ + ssize_t bytes = write(fd, msg, sizeof(*msg)); + + /* Make sure that write/read is atomic to a pipe */ + BUILD_BUG_ON(sizeof(struct test_desc) > PIPE_BUF); + + if (bytes < 0) { + pr_err("write()"); + if (exit_of_fail) + exit(KSFT_FAIL); + } + if (bytes != sizeof(*msg)) { + pr_err("sent part of the message %zd/%zu", bytes, sizeof(*msg)); + if (exit_of_fail) + exit(KSFT_FAIL); + } +} + +static void read_msg(int fd, struct test_desc *msg, bool exit_of_fail) +{ + ssize_t bytes = read(fd, msg, sizeof(*msg)); + + if (bytes < 0) { + pr_err("read()"); + if (exit_of_fail) + exit(KSFT_FAIL); + } + if (bytes != sizeof(*msg)) { + pr_err("got incomplete message %zd/%zu", bytes, sizeof(*msg)); + if (exit_of_fail) + exit(KSFT_FAIL); + } +} + +static int udp_ping_init(struct in_addr listen_ip, unsigned int u_timeout, + unsigned int *server_port, int sock[2]) +{ + struct sockaddr_in server; + struct timeval t = { .tv_sec = 0, .tv_usec = u_timeout }; + socklen_t s_len = sizeof(server); + + sock[0] = socket(AF_INET, SOCK_DGRAM, 0); + if (sock[0] < 0) { + pr_err("socket()"); + return -1; + } + + server.sin_family = AF_INET; + server.sin_port = 0; + memcpy(&server.sin_addr.s_addr, &listen_ip, sizeof(struct in_addr)); + + if (bind(sock[0], (struct sockaddr *)&server, s_len)) { + pr_err("bind()"); + goto err_close_server; + } + + if (getsockname(sock[0], (struct sockaddr *)&server, &s_len)) { + pr_err("getsockname()"); + goto err_close_server; + } + + *server_port = ntohs(server.sin_port); + + if (setsockopt(sock[0], SOL_SOCKET, SO_RCVTIMEO, (const char *)&t, sizeof t)) { + pr_err("setsockopt()"); + goto err_close_server; + } + + sock[1] = socket(AF_INET, SOCK_DGRAM, 0); + if (sock[1] < 0) { + pr_err("socket()"); + goto err_close_server; + } + + return 0; + +err_close_server: + close(sock[0]); + return -1; +} + +static int udp_ping_send(int sock[2], in_addr_t dest_ip, unsigned int port, + char *buf, size_t buf_len) +{ + struct sockaddr_in server; + const struct sockaddr *dest_addr = (struct sockaddr *)&server; + char *sock_buf[buf_len]; + ssize_t r_bytes, s_bytes; + + server.sin_family = AF_INET; + server.sin_port = htons(port); + server.sin_addr.s_addr = dest_ip; + + s_bytes = sendto(sock[1], buf, buf_len, 0, dest_addr, sizeof(server)); + if (s_bytes < 0) { + pr_err("sendto()"); + return -1; + } else if (s_bytes != buf_len) { + printk("send part of the message: %zd/%zu", s_bytes, sizeof(server)); + return -1; + } + + r_bytes = recv(sock[0], sock_buf, buf_len, 0); + if (r_bytes < 0) { + if (errno != EAGAIN) + pr_err("recv()"); + return -1; + } else if (r_bytes == 0) { /* EOF */ + printk("EOF on reply to ping"); + return -1; + } else if (r_bytes != buf_len || memcmp(buf, sock_buf, buf_len)) { + printk("ping reply packet is corrupted %zd/%zu", r_bytes, buf_len); + return -1; + } + + return 0; +} + +static int udp_ping_reply(int sock[2], in_addr_t dest_ip, unsigned int port, + char *buf, size_t buf_len) +{ + struct sockaddr_in server; + const struct sockaddr *dest_addr = (struct sockaddr *)&server; + char *sock_buf[buf_len]; + ssize_t r_bytes, s_bytes; + + server.sin_family = AF_INET; + server.sin_port = htons(port); + server.sin_addr.s_addr = dest_ip; + + r_bytes = recv(sock[0], sock_buf, buf_len, 0); + if (r_bytes < 0) { + if (errno != EAGAIN) + pr_err("recv()"); + return -1; + } + if (r_bytes == 0) { /* EOF */ + printk("EOF on reply to ping"); + return -1; + } + if (r_bytes != buf_len || memcmp(buf, sock_buf, buf_len)) { + printk("ping reply packet is corrupted %zd/%zu", r_bytes, buf_len); + return -1; + } + + s_bytes = sendto(sock[1], buf, buf_len, 0, dest_addr, sizeof(server)); + if (s_bytes < 0) { + pr_err("sendto()"); + return -1; + } else if (s_bytes != buf_len) { + printk("send part of the message: %zd/%zu", s_bytes, sizeof(server)); + return -1; + } + + return 0; +} + +typedef int (*ping_f)(int sock[2], in_addr_t dest_ip, unsigned int port, + char *buf, size_t buf_len); +static int do_ping(int cmd_fd, char *buf, size_t buf_len, struct in_addr from, + bool init_side, int d_port, in_addr_t to, ping_f func) +{ + struct test_desc msg; + unsigned int s_port, i, ping_succeeded = 0; + int ping_sock[2]; + char to_str[IPV4_STR_SZ] = {}, from_str[IPV4_STR_SZ] = {}; + + if (udp_ping_init(from, ping_timeout, &s_port, ping_sock)) { + printk("Failed to init ping"); + return -1; + } + + memset(&msg, 0, sizeof(msg)); + msg.type = MSG_PING; + msg.body.ping.port = s_port; + memcpy(&msg.body.ping.reply_ip, &from, sizeof(from)); + + write_msg(cmd_fd, &msg, 0); + if (init_side) { + /* The other end sends ip to ping */ + read_msg(cmd_fd, &msg, 0); + if (msg.type != MSG_PING) + return -1; + to = msg.body.ping.reply_ip; + d_port = msg.body.ping.port; + } + + for (i = 0; i < ping_count ; i++) { + struct timespec sleep_time = { + .tv_sec = 0, + .tv_nsec = ping_delay_nsec, + }; + + ping_succeeded += !func(ping_sock, to, d_port, buf, page_size); + nanosleep(&sleep_time, 0); + } + + close(ping_sock[0]); + close(ping_sock[1]); + + strncpy(to_str, inet_ntoa(*(struct in_addr *)&to), IPV4_STR_SZ - 1); + strncpy(from_str, inet_ntoa(from), IPV4_STR_SZ - 1); + + if (ping_succeeded < ping_success) { + printk("ping (%s) %s->%s failed %u/%u times", + init_side ? "send" : "reply", from_str, to_str, + ping_count - ping_succeeded, ping_count); + return -1; + } + +#ifdef DEBUG + printk("ping (%s) %s->%s succeeded %u/%u times", + init_side ? "send" : "reply", from_str, to_str, + ping_succeeded, ping_count); +#endif + + return 0; +} + +static int xfrm_fill_key(char *name, char *buf, + size_t buf_len, unsigned int *key_len) +{ + /* TODO: use set/map instead */ + if (strncmp(name, "digest_null", ALGO_LEN) == 0) + *key_len = 0; + else if (strncmp(name, "ecb(cipher_null)", ALGO_LEN) == 0) + *key_len = 0; + else if (strncmp(name, "cbc(des)", ALGO_LEN) == 0) + *key_len = 64; + else if (strncmp(name, "hmac(md5)", ALGO_LEN) == 0) + *key_len = 128; + else if (strncmp(name, "cmac(aes)", ALGO_LEN) == 0) + *key_len = 128; + else if (strncmp(name, "xcbc(aes)", ALGO_LEN) == 0) + *key_len = 128; + else if (strncmp(name, "cbc(cast5)", ALGO_LEN) == 0) + *key_len = 128; + else if (strncmp(name, "cbc(serpent)", ALGO_LEN) == 0) + *key_len = 128; + else if (strncmp(name, "hmac(sha1)", ALGO_LEN) == 0) + *key_len = 160; + else if (strncmp(name, "hmac(rmd160)", ALGO_LEN) == 0) + *key_len = 160; + else if (strncmp(name, "cbc(des3_ede)", ALGO_LEN) == 0) + *key_len = 192; + else if (strncmp(name, "hmac(sha256)", ALGO_LEN) == 0) + *key_len = 256; + else if (strncmp(name, "cbc(aes)", ALGO_LEN) == 0) + *key_len = 256; + else if (strncmp(name, "cbc(camellia)", ALGO_LEN) == 0) + *key_len = 256; + else if (strncmp(name, "cbc(twofish)", ALGO_LEN) == 0) + *key_len = 256; + else if (strncmp(name, "rfc3686(ctr(aes))", ALGO_LEN) == 0) + *key_len = 288; + else if (strncmp(name, "hmac(sha384)", ALGO_LEN) == 0) + *key_len = 384; + else if (strncmp(name, "cbc(blowfish)", ALGO_LEN) == 0) + *key_len = 448; + else if (strncmp(name, "hmac(sha512)", ALGO_LEN) == 0) + *key_len = 512; + else if (strncmp(name, "rfc4106(gcm(aes))-128", ALGO_LEN) == 0) + *key_len = 160; + else if (strncmp(name, "rfc4543(gcm(aes))-128", ALGO_LEN) == 0) + *key_len = 160; + else if (strncmp(name, "rfc4309(ccm(aes))-128", ALGO_LEN) == 0) + *key_len = 152; + else if (strncmp(name, "rfc4106(gcm(aes))-192", ALGO_LEN) == 0) + *key_len = 224; + else if (strncmp(name, "rfc4543(gcm(aes))-192", ALGO_LEN) == 0) + *key_len = 224; + else if (strncmp(name, "rfc4309(ccm(aes))-192", ALGO_LEN) == 0) + *key_len = 216; + else if (strncmp(name, "rfc4106(gcm(aes))-256", ALGO_LEN) == 0) + *key_len = 288; + else if (strncmp(name, "rfc4543(gcm(aes))-256", ALGO_LEN) == 0) + *key_len = 288; + else if (strncmp(name, "rfc4309(ccm(aes))-256", ALGO_LEN) == 0) + *key_len = 280; + else if (strncmp(name, "rfc7539(chacha20,poly1305)-128", ALGO_LEN) == 0) + *key_len = 0; + + if (*key_len > buf_len) { + printk("Can't pack a key - too big for buffer"); + return -1; + } + + randomize_buffer(buf, *key_len); + + return 0; +} + +static int xfrm_state_pack_algo(struct nlmsghdr *nh, size_t req_sz, + struct xfrm_desc *desc) +{ + struct { + union { + struct xfrm_algo alg; + struct xfrm_algo_aead aead; + struct xfrm_algo_auth auth; + } u; + char buf[XFRM_ALGO_KEY_BUF_SIZE]; + } alg = {}; + size_t alen, elen, clen, aelen; + unsigned short type; + + alen = strlen(desc->a_algo); + elen = strlen(desc->e_algo); + clen = strlen(desc->c_algo); + aelen = strlen(desc->ae_algo); + + /* Verify desc */ + switch (desc->proto) { + case IPPROTO_AH: + if (!alen || elen || clen || aelen) { + printk("BUG: buggy ah desc"); + return -1; + } + strncpy(alg.u.alg.alg_name, desc->a_algo, ALGO_LEN - 1); + if (xfrm_fill_key(desc->a_algo, alg.u.alg.alg_key, + sizeof(alg.buf), &alg.u.alg.alg_key_len)) + return -1; + type = XFRMA_ALG_AUTH; + break; + case IPPROTO_COMP: + if (!clen || elen || alen || aelen) { + printk("BUG: buggy comp desc"); + return -1; + } + strncpy(alg.u.alg.alg_name, desc->c_algo, ALGO_LEN - 1); + if (xfrm_fill_key(desc->c_algo, alg.u.alg.alg_key, + sizeof(alg.buf), &alg.u.alg.alg_key_len)) + return -1; + type = XFRMA_ALG_COMP; + break; + case IPPROTO_ESP: + if (!((alen && elen) ^ aelen) || clen) { + printk("BUG: buggy esp desc"); + return -1; + } + if (aelen) { + alg.u.aead.alg_icv_len = desc->icv_len; + strncpy(alg.u.aead.alg_name, desc->ae_algo, ALGO_LEN - 1); + if (xfrm_fill_key(desc->ae_algo, alg.u.aead.alg_key, + sizeof(alg.buf), &alg.u.aead.alg_key_len)) + return -1; + type = XFRMA_ALG_AEAD; + } else { + + strncpy(alg.u.alg.alg_name, desc->e_algo, ALGO_LEN - 1); + type = XFRMA_ALG_CRYPT; + if (xfrm_fill_key(desc->e_algo, alg.u.alg.alg_key, + sizeof(alg.buf), &alg.u.alg.alg_key_len)) + return -1; + if (rtattr_pack(nh, req_sz, type, &alg, sizeof(alg))) + return -1; + + strncpy(alg.u.alg.alg_name, desc->a_algo, ALGO_LEN); + type = XFRMA_ALG_AUTH; + if (xfrm_fill_key(desc->a_algo, alg.u.alg.alg_key, + sizeof(alg.buf), &alg.u.alg.alg_key_len)) + return -1; + } + break; + default: + printk("BUG: unknown proto in desc"); + return -1; + } + + if (rtattr_pack(nh, req_sz, type, &alg, sizeof(alg))) + return -1; + + return 0; +} + +static inline uint32_t gen_spi(struct in_addr src) +{ + return htonl(inet_lnaof(src)); +} + +static int xfrm_state_add(int xfrm_sock, uint32_t seq, uint32_t spi, + struct in_addr src, struct in_addr dst, + struct xfrm_desc *desc) +{ + struct { + struct nlmsghdr nh; + struct xfrm_usersa_info info; + char attrbuf[MAX_PAYLOAD]; + } req; + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.info)); + req.nh.nlmsg_type = XFRM_MSG_NEWSA; + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.nh.nlmsg_seq = seq; + + /* Fill selector. */ + memcpy(&req.info.sel.daddr, &dst, sizeof(dst)); + memcpy(&req.info.sel.saddr, &src, sizeof(src)); + req.info.sel.family = AF_INET; + req.info.sel.prefixlen_d = PREFIX_LEN; + req.info.sel.prefixlen_s = PREFIX_LEN; + + /* Fill id */ + memcpy(&req.info.id.daddr, &dst, sizeof(dst)); + /* Note: zero-spi cannot be deleted */ + req.info.id.spi = spi; + req.info.id.proto = desc->proto; + + memcpy(&req.info.saddr, &src, sizeof(src)); + + /* Fill lifteme_cfg */ + req.info.lft.soft_byte_limit = XFRM_INF; + req.info.lft.hard_byte_limit = XFRM_INF; + req.info.lft.soft_packet_limit = XFRM_INF; + req.info.lft.hard_packet_limit = XFRM_INF; + + req.info.family = AF_INET; + req.info.mode = XFRM_MODE_TUNNEL; + + if (xfrm_state_pack_algo(&req.nh, sizeof(req), desc)) + return -1; + + if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) { + pr_err("send()"); + return -1; + } + + return netlink_check_answer(xfrm_sock); +} + +static bool xfrm_usersa_found(struct xfrm_usersa_info *info, uint32_t spi, + struct in_addr src, struct in_addr dst, + struct xfrm_desc *desc) +{ + if (memcmp(&info->sel.daddr, &dst, sizeof(dst))) + return false; + + if (memcmp(&info->sel.saddr, &src, sizeof(src))) + return false; + + if (info->sel.family != AF_INET || + info->sel.prefixlen_d != PREFIX_LEN || + info->sel.prefixlen_s != PREFIX_LEN) + return false; + + if (info->id.spi != spi || info->id.proto != desc->proto) + return false; + + if (memcmp(&info->id.daddr, &dst, sizeof(dst))) + return false; + + if (memcmp(&info->saddr, &src, sizeof(src))) + return false; + + if (info->lft.soft_byte_limit != XFRM_INF || + info->lft.hard_byte_limit != XFRM_INF || + info->lft.soft_packet_limit != XFRM_INF || + info->lft.hard_packet_limit != XFRM_INF) + return false; + + if (info->family != AF_INET || info->mode != XFRM_MODE_TUNNEL) + return false; + + /* XXX: check xfrm algo, see xfrm_state_pack_algo(). */ + + return true; +} + +static int xfrm_state_check(int xfrm_sock, uint32_t seq, uint32_t spi, + struct in_addr src, struct in_addr dst, + struct xfrm_desc *desc) +{ + struct { + struct nlmsghdr nh; + char attrbuf[MAX_PAYLOAD]; + } req; + struct { + struct nlmsghdr nh; + union { + struct xfrm_usersa_info info; + int error; + }; + char attrbuf[MAX_PAYLOAD]; + } answer; + struct xfrm_address_filter filter = {}; + bool found = false; + + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(0); + req.nh.nlmsg_type = XFRM_MSG_GETSA; + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP; + req.nh.nlmsg_seq = seq; + + /* + * Add dump filter by source address as there may be other tunnels + * in this netns (if tests run in parallel). + */ + filter.family = AF_INET; + filter.splen = 0x1f; /* 0xffffffff mask see addr_match() */ + memcpy(&filter.saddr, &src, sizeof(src)); + if (rtattr_pack(&req.nh, sizeof(req), XFRMA_ADDRESS_FILTER, + &filter, sizeof(filter))) + return -1; + + if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) { + pr_err("send()"); + return -1; + } + + while (1) { + if (recv(xfrm_sock, &answer, sizeof(answer), 0) < 0) { + pr_err("recv()"); + return -1; + } + if (answer.nh.nlmsg_type == NLMSG_ERROR) { + printk("NLMSG_ERROR: %d: %s", + answer.error, strerror(-answer.error)); + return -1; + } else if (answer.nh.nlmsg_type == NLMSG_DONE) { + if (found) + return 0; + printk("didn't find allocated xfrm state in dump"); + return -1; + } else if (answer.nh.nlmsg_type == XFRM_MSG_NEWSA) { + if (xfrm_usersa_found(&answer.info, spi, src, dst, desc)) + found = true; + } + } +} + +static int xfrm_set(int xfrm_sock, uint32_t *seq, + struct in_addr src, struct in_addr dst, + struct in_addr tunsrc, struct in_addr tundst, + struct xfrm_desc *desc) +{ + int err; + + err = xfrm_state_add(xfrm_sock, (*seq)++, gen_spi(src), src, dst, desc); + if (err) { + printk("Failed to add xfrm state"); + return -1; + } + + err = xfrm_state_add(xfrm_sock, (*seq)++, gen_spi(src), dst, src, desc); + if (err) { + printk("Failed to add xfrm state"); + return -1; + } + + /* Check dumps for XFRM_MSG_GETSA */ + err = xfrm_state_check(xfrm_sock, (*seq)++, gen_spi(src), src, dst, desc); + err |= xfrm_state_check(xfrm_sock, (*seq)++, gen_spi(src), dst, src, desc); + if (err) { + printk("Failed to check xfrm state"); + return -1; + } + + return 0; +} + +static int xfrm_policy_add(int xfrm_sock, uint32_t seq, uint32_t spi, + struct in_addr src, struct in_addr dst, uint8_t dir, + struct in_addr tunsrc, struct in_addr tundst, uint8_t proto) +{ + struct { + struct nlmsghdr nh; + struct xfrm_userpolicy_info info; + char attrbuf[MAX_PAYLOAD]; + } req; + struct xfrm_user_tmpl tmpl; + + memset(&req, 0, sizeof(req)); + memset(&tmpl, 0, sizeof(tmpl)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.info)); + req.nh.nlmsg_type = XFRM_MSG_NEWPOLICY; + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.nh.nlmsg_seq = seq; + + /* Fill selector. */ + memcpy(&req.info.sel.daddr, &dst, sizeof(tundst)); + memcpy(&req.info.sel.saddr, &src, sizeof(tunsrc)); + req.info.sel.family = AF_INET; + req.info.sel.prefixlen_d = PREFIX_LEN; + req.info.sel.prefixlen_s = PREFIX_LEN; + + /* Fill lifteme_cfg */ + req.info.lft.soft_byte_limit = XFRM_INF; + req.info.lft.hard_byte_limit = XFRM_INF; + req.info.lft.soft_packet_limit = XFRM_INF; + req.info.lft.hard_packet_limit = XFRM_INF; + + req.info.dir = dir; + + /* Fill tmpl */ + memcpy(&tmpl.id.daddr, &dst, sizeof(dst)); + /* Note: zero-spi cannot be deleted */ + tmpl.id.spi = spi; + tmpl.id.proto = proto; + tmpl.family = AF_INET; + memcpy(&tmpl.saddr, &src, sizeof(src)); + tmpl.mode = XFRM_MODE_TUNNEL; + tmpl.aalgos = (~(uint32_t)0); + tmpl.ealgos = (~(uint32_t)0); + tmpl.calgos = (~(uint32_t)0); + + if (rtattr_pack(&req.nh, sizeof(req), XFRMA_TMPL, &tmpl, sizeof(tmpl))) + return -1; + + if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) { + pr_err("send()"); + return -1; + } + + return netlink_check_answer(xfrm_sock); +} + +static int xfrm_prepare(int xfrm_sock, uint32_t *seq, + struct in_addr src, struct in_addr dst, + struct in_addr tunsrc, struct in_addr tundst, uint8_t proto) +{ + if (xfrm_policy_add(xfrm_sock, (*seq)++, gen_spi(src), src, dst, + XFRM_POLICY_OUT, tunsrc, tundst, proto)) { + printk("Failed to add xfrm policy"); + return -1; + } + + if (xfrm_policy_add(xfrm_sock, (*seq)++, gen_spi(src), dst, src, + XFRM_POLICY_IN, tunsrc, tundst, proto)) { + printk("Failed to add xfrm policy"); + return -1; + } + + return 0; +} + +static int xfrm_policy_del(int xfrm_sock, uint32_t seq, + struct in_addr src, struct in_addr dst, uint8_t dir, + struct in_addr tunsrc, struct in_addr tundst) +{ + struct { + struct nlmsghdr nh; + struct xfrm_userpolicy_id id; + char attrbuf[MAX_PAYLOAD]; + } req; + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.id)); + req.nh.nlmsg_type = XFRM_MSG_DELPOLICY; + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.nh.nlmsg_seq = seq; + + /* Fill id */ + memcpy(&req.id.sel.daddr, &dst, sizeof(tundst)); + memcpy(&req.id.sel.saddr, &src, sizeof(tunsrc)); + req.id.sel.family = AF_INET; + req.id.sel.prefixlen_d = PREFIX_LEN; + req.id.sel.prefixlen_s = PREFIX_LEN; + req.id.dir = dir; + + if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) { + pr_err("send()"); + return -1; + } + + return netlink_check_answer(xfrm_sock); +} + +static int xfrm_cleanup(int xfrm_sock, uint32_t *seq, + struct in_addr src, struct in_addr dst, + struct in_addr tunsrc, struct in_addr tundst) +{ + if (xfrm_policy_del(xfrm_sock, (*seq)++, src, dst, + XFRM_POLICY_OUT, tunsrc, tundst)) { + printk("Failed to add xfrm policy"); + return -1; + } + + if (xfrm_policy_del(xfrm_sock, (*seq)++, dst, src, + XFRM_POLICY_IN, tunsrc, tundst)) { + printk("Failed to add xfrm policy"); + return -1; + } + + return 0; +} + +static int xfrm_state_del(int xfrm_sock, uint32_t seq, uint32_t spi, + struct in_addr src, struct in_addr dst, uint8_t proto) +{ + struct { + struct nlmsghdr nh; + struct xfrm_usersa_id id; + char attrbuf[MAX_PAYLOAD]; + } req; + xfrm_address_t saddr = {}; + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.id)); + req.nh.nlmsg_type = XFRM_MSG_DELSA; + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.nh.nlmsg_seq = seq; + + memcpy(&req.id.daddr, &dst, sizeof(dst)); + req.id.family = AF_INET; + req.id.proto = proto; + /* Note: zero-spi cannot be deleted */ + req.id.spi = spi; + + memcpy(&saddr, &src, sizeof(src)); + if (rtattr_pack(&req.nh, sizeof(req), XFRMA_SRCADDR, &saddr, sizeof(saddr))) + return -1; + + if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) { + pr_err("send()"); + return -1; + } + + return netlink_check_answer(xfrm_sock); +} + +static int xfrm_delete(int xfrm_sock, uint32_t *seq, + struct in_addr src, struct in_addr dst, + struct in_addr tunsrc, struct in_addr tundst, uint8_t proto) +{ + if (xfrm_state_del(xfrm_sock, (*seq)++, gen_spi(src), src, dst, proto)) { + printk("Failed to remove xfrm state"); + return -1; + } + + if (xfrm_state_del(xfrm_sock, (*seq)++, gen_spi(src), dst, src, proto)) { + printk("Failed to remove xfrm state"); + return -1; + } + + return 0; +} + +static int xfrm_state_allocspi(int xfrm_sock, uint32_t *seq, + uint32_t spi, uint8_t proto) +{ + struct { + struct nlmsghdr nh; + struct xfrm_userspi_info spi; + } req; + struct { + struct nlmsghdr nh; + union { + struct xfrm_usersa_info info; + int error; + }; + } answer; + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.spi)); + req.nh.nlmsg_type = XFRM_MSG_ALLOCSPI; + req.nh.nlmsg_flags = NLM_F_REQUEST; + req.nh.nlmsg_seq = (*seq)++; + + req.spi.info.family = AF_INET; + req.spi.min = spi; + req.spi.max = spi; + req.spi.info.id.proto = proto; + + if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) { + pr_err("send()"); + return KSFT_FAIL; + } + + if (recv(xfrm_sock, &answer, sizeof(answer), 0) < 0) { + pr_err("recv()"); + return KSFT_FAIL; + } else if (answer.nh.nlmsg_type == XFRM_MSG_NEWSA) { + uint32_t new_spi = htonl(answer.info.id.spi); + + if (new_spi != spi) { + printk("allocated spi is different from requested: %#x != %#x", + new_spi, spi); + return KSFT_FAIL; + } + return KSFT_PASS; + } else if (answer.nh.nlmsg_type != NLMSG_ERROR) { + printk("expected NLMSG_ERROR, got %d", (int)answer.nh.nlmsg_type); + return KSFT_FAIL; + } + + printk("NLMSG_ERROR: %d: %s", answer.error, strerror(-answer.error)); + return (answer.error) ? KSFT_FAIL : KSFT_PASS; +} + +static int netlink_sock_bind(int *sock, uint32_t *seq, int proto, uint32_t groups) +{ + struct sockaddr_nl snl = {}; + socklen_t addr_len; + int ret = -1; + + snl.nl_family = AF_NETLINK; + snl.nl_groups = groups; + + if (netlink_sock(sock, seq, proto)) { + printk("Failed to open xfrm netlink socket"); + return -1; + } + + if (bind(*sock, (struct sockaddr *)&snl, sizeof(snl)) < 0) { + pr_err("bind()"); + goto out_close; + } + + addr_len = sizeof(snl); + if (getsockname(*sock, (struct sockaddr *)&snl, &addr_len) < 0) { + pr_err("getsockname()"); + goto out_close; + } + if (addr_len != sizeof(snl)) { + printk("Wrong address length %d", addr_len); + goto out_close; + } + if (snl.nl_family != AF_NETLINK) { + printk("Wrong address family %d", snl.nl_family); + goto out_close; + } + return 0; + +out_close: + close(*sock); + return ret; +} + +static int xfrm_monitor_acquire(int xfrm_sock, uint32_t *seq, unsigned int nr) +{ + struct { + struct nlmsghdr nh; + union { + struct xfrm_user_acquire acq; + int error; + }; + char attrbuf[MAX_PAYLOAD]; + } req; + struct xfrm_user_tmpl xfrm_tmpl = {}; + int xfrm_listen = -1, ret = KSFT_FAIL; + uint32_t seq_listen; + + if (netlink_sock_bind(&xfrm_listen, &seq_listen, NETLINK_XFRM, XFRMNLGRP_ACQUIRE)) + return KSFT_FAIL; + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.acq)); + req.nh.nlmsg_type = XFRM_MSG_ACQUIRE; + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.nh.nlmsg_seq = (*seq)++; + + req.acq.policy.sel.family = AF_INET; + req.acq.aalgos = 0xfeed; + req.acq.ealgos = 0xbaad; + req.acq.calgos = 0xbabe; + + xfrm_tmpl.family = AF_INET; + xfrm_tmpl.id.proto = IPPROTO_ESP; + if (rtattr_pack(&req.nh, sizeof(req), XFRMA_TMPL, &xfrm_tmpl, sizeof(xfrm_tmpl))) + goto out_close; + + if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) { + pr_err("send()"); + goto out_close; + } + + if (recv(xfrm_sock, &req, sizeof(req), 0) < 0) { + pr_err("recv()"); + goto out_close; + } else if (req.nh.nlmsg_type != NLMSG_ERROR) { + printk("expected NLMSG_ERROR, got %d", (int)req.nh.nlmsg_type); + goto out_close; + } + + if (req.error) { + printk("NLMSG_ERROR: %d: %s", req.error, strerror(-req.error)); + ret = req.error; + goto out_close; + } + + if (recv(xfrm_listen, &req, sizeof(req), 0) < 0) { + pr_err("recv()"); + goto out_close; + } + + if (req.acq.aalgos != 0xfeed || req.acq.ealgos != 0xbaad + || req.acq.calgos != 0xbabe) { + printk("xfrm_user_acquire has changed %x %x %x", + req.acq.aalgos, req.acq.ealgos, req.acq.calgos); + goto out_close; + } + + ret = KSFT_PASS; +out_close: + close(xfrm_listen); + return ret; +} + +static int xfrm_expire_state(int xfrm_sock, uint32_t *seq, + unsigned int nr, struct xfrm_desc *desc) +{ + struct { + struct nlmsghdr nh; + union { + struct xfrm_user_expire expire; + int error; + }; + } req; + struct in_addr src, dst; + int xfrm_listen = -1, ret = KSFT_FAIL; + uint32_t seq_listen; + + src = inet_makeaddr(INADDR_B, child_ip(nr)); + dst = inet_makeaddr(INADDR_B, grchild_ip(nr)); + + if (xfrm_state_add(xfrm_sock, (*seq)++, gen_spi(src), src, dst, desc)) { + printk("Failed to add xfrm state"); + return KSFT_FAIL; + } + + if (netlink_sock_bind(&xfrm_listen, &seq_listen, NETLINK_XFRM, XFRMNLGRP_EXPIRE)) + return KSFT_FAIL; + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.expire)); + req.nh.nlmsg_type = XFRM_MSG_EXPIRE; + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.nh.nlmsg_seq = (*seq)++; + + memcpy(&req.expire.state.id.daddr, &dst, sizeof(dst)); + req.expire.state.id.spi = gen_spi(src); + req.expire.state.id.proto = desc->proto; + req.expire.state.family = AF_INET; + req.expire.hard = 0xff; + + if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) { + pr_err("send()"); + goto out_close; + } + + if (recv(xfrm_sock, &req, sizeof(req), 0) < 0) { + pr_err("recv()"); + goto out_close; + } else if (req.nh.nlmsg_type != NLMSG_ERROR) { + printk("expected NLMSG_ERROR, got %d", (int)req.nh.nlmsg_type); + goto out_close; + } + + if (req.error) { + printk("NLMSG_ERROR: %d: %s", req.error, strerror(-req.error)); + ret = req.error; + goto out_close; + } + + if (recv(xfrm_listen, &req, sizeof(req), 0) < 0) { + pr_err("recv()"); + goto out_close; + } + + if (req.expire.hard != 0x1) { + printk("expire.hard is not set: %x", req.expire.hard); + goto out_close; + } + + ret = KSFT_PASS; +out_close: + close(xfrm_listen); + return ret; +} + +static int xfrm_expire_policy(int xfrm_sock, uint32_t *seq, + unsigned int nr, struct xfrm_desc *desc) +{ + struct { + struct nlmsghdr nh; + union { + struct xfrm_user_polexpire expire; + int error; + }; + } req; + struct in_addr src, dst, tunsrc, tundst; + int xfrm_listen = -1, ret = KSFT_FAIL; + uint32_t seq_listen; + + src = inet_makeaddr(INADDR_B, child_ip(nr)); + dst = inet_makeaddr(INADDR_B, grchild_ip(nr)); + tunsrc = inet_makeaddr(INADDR_A, child_ip(nr)); + tundst = inet_makeaddr(INADDR_A, grchild_ip(nr)); + + if (xfrm_policy_add(xfrm_sock, (*seq)++, gen_spi(src), src, dst, + XFRM_POLICY_OUT, tunsrc, tundst, desc->proto)) { + printk("Failed to add xfrm policy"); + return KSFT_FAIL; + } + + if (netlink_sock_bind(&xfrm_listen, &seq_listen, NETLINK_XFRM, XFRMNLGRP_EXPIRE)) + return KSFT_FAIL; + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.expire)); + req.nh.nlmsg_type = XFRM_MSG_POLEXPIRE; + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.nh.nlmsg_seq = (*seq)++; + + /* Fill selector. */ + memcpy(&req.expire.pol.sel.daddr, &dst, sizeof(tundst)); + memcpy(&req.expire.pol.sel.saddr, &src, sizeof(tunsrc)); + req.expire.pol.sel.family = AF_INET; + req.expire.pol.sel.prefixlen_d = PREFIX_LEN; + req.expire.pol.sel.prefixlen_s = PREFIX_LEN; + req.expire.pol.dir = XFRM_POLICY_OUT; + req.expire.hard = 0xff; + + if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) { + pr_err("send()"); + goto out_close; + } + + if (recv(xfrm_sock, &req, sizeof(req), 0) < 0) { + pr_err("recv()"); + goto out_close; + } else if (req.nh.nlmsg_type != NLMSG_ERROR) { + printk("expected NLMSG_ERROR, got %d", (int)req.nh.nlmsg_type); + goto out_close; + } + + if (req.error) { + printk("NLMSG_ERROR: %d: %s", req.error, strerror(-req.error)); + ret = req.error; + goto out_close; + } + + if (recv(xfrm_listen, &req, sizeof(req), 0) < 0) { + pr_err("recv()"); + goto out_close; + } + + if (req.expire.hard != 0x1) { + printk("expire.hard is not set: %x", req.expire.hard); + goto out_close; + } + + ret = KSFT_PASS; +out_close: + close(xfrm_listen); + return ret; +} + +static int child_serv(int xfrm_sock, uint32_t *seq, + unsigned int nr, int cmd_fd, void *buf, struct xfrm_desc *desc) +{ + struct in_addr src, dst, tunsrc, tundst; + struct test_desc msg; + int ret = KSFT_FAIL; + + src = inet_makeaddr(INADDR_B, child_ip(nr)); + dst = inet_makeaddr(INADDR_B, grchild_ip(nr)); + tunsrc = inet_makeaddr(INADDR_A, child_ip(nr)); + tundst = inet_makeaddr(INADDR_A, grchild_ip(nr)); + + /* UDP pinging without xfrm */ + if (do_ping(cmd_fd, buf, page_size, src, true, 0, 0, udp_ping_send)) { + printk("ping failed before setting xfrm"); + return KSFT_FAIL; + } + + memset(&msg, 0, sizeof(msg)); + msg.type = MSG_XFRM_PREPARE; + memcpy(&msg.body.xfrm_desc, desc, sizeof(*desc)); + write_msg(cmd_fd, &msg, 1); + + if (xfrm_prepare(xfrm_sock, seq, src, dst, tunsrc, tundst, desc->proto)) { + printk("failed to prepare xfrm"); + goto cleanup; + } + + memset(&msg, 0, sizeof(msg)); + msg.type = MSG_XFRM_ADD; + memcpy(&msg.body.xfrm_desc, desc, sizeof(*desc)); + write_msg(cmd_fd, &msg, 1); + if (xfrm_set(xfrm_sock, seq, src, dst, tunsrc, tundst, desc)) { + printk("failed to set xfrm"); + goto delete; + } + + /* UDP pinging with xfrm tunnel */ + if (do_ping(cmd_fd, buf, page_size, tunsrc, + true, 0, 0, udp_ping_send)) { + printk("ping failed for xfrm"); + goto delete; + } + + ret = KSFT_PASS; +delete: + /* xfrm delete */ + memset(&msg, 0, sizeof(msg)); + msg.type = MSG_XFRM_DEL; + memcpy(&msg.body.xfrm_desc, desc, sizeof(*desc)); + write_msg(cmd_fd, &msg, 1); + + if (xfrm_delete(xfrm_sock, seq, src, dst, tunsrc, tundst, desc->proto)) { + printk("failed ping to remove xfrm"); + ret = KSFT_FAIL; + } + +cleanup: + memset(&msg, 0, sizeof(msg)); + msg.type = MSG_XFRM_CLEANUP; + memcpy(&msg.body.xfrm_desc, desc, sizeof(*desc)); + write_msg(cmd_fd, &msg, 1); + if (xfrm_cleanup(xfrm_sock, seq, src, dst, tunsrc, tundst)) { + printk("failed ping to cleanup xfrm"); + ret = KSFT_FAIL; + } + return ret; +} + +static int child_f(unsigned int nr, int test_desc_fd, int cmd_fd, void *buf) +{ + struct xfrm_desc desc; + struct test_desc msg; + int xfrm_sock = -1; + uint32_t seq; + + if (switch_ns(nsfd_childa)) + exit(KSFT_FAIL); + + if (netlink_sock(&xfrm_sock, &seq, NETLINK_XFRM)) { + printk("Failed to open xfrm netlink socket"); + exit(KSFT_FAIL); + } + + /* Check that seq sock is ready, just for sure. */ + memset(&msg, 0, sizeof(msg)); + msg.type = MSG_ACK; + write_msg(cmd_fd, &msg, 1); + read_msg(cmd_fd, &msg, 1); + if (msg.type != MSG_ACK) { + printk("Ack failed"); + exit(KSFT_FAIL); + } + + for (;;) { + ssize_t received = read(test_desc_fd, &desc, sizeof(desc)); + int ret; + + if (received == 0) /* EOF */ + break; + + if (received != sizeof(desc)) { + pr_err("read() returned %zd", received); + exit(KSFT_FAIL); + } + + switch (desc.type) { + case CREATE_TUNNEL: + ret = child_serv(xfrm_sock, &seq, nr, + cmd_fd, buf, &desc); + break; + case ALLOCATE_SPI: + ret = xfrm_state_allocspi(xfrm_sock, &seq, + -1, desc.proto); + break; + case MONITOR_ACQUIRE: + ret = xfrm_monitor_acquire(xfrm_sock, &seq, nr); + break; + case EXPIRE_STATE: + ret = xfrm_expire_state(xfrm_sock, &seq, nr, &desc); + break; + case EXPIRE_POLICY: + ret = xfrm_expire_policy(xfrm_sock, &seq, nr, &desc); + break; + default: + printk("Unknown desc type %d", desc.type); + exit(KSFT_FAIL); + } + write_test_result(ret, &desc); + } + + close(xfrm_sock); + + msg.type = MSG_EXIT; + write_msg(cmd_fd, &msg, 1); + exit(KSFT_PASS); +} + +static void grand_child_serv(unsigned int nr, int cmd_fd, void *buf, + struct test_desc *msg, int xfrm_sock, uint32_t *seq) +{ + struct in_addr src, dst, tunsrc, tundst; + bool tun_reply; + struct xfrm_desc *desc = &msg->body.xfrm_desc; + + src = inet_makeaddr(INADDR_B, grchild_ip(nr)); + dst = inet_makeaddr(INADDR_B, child_ip(nr)); + tunsrc = inet_makeaddr(INADDR_A, grchild_ip(nr)); + tundst = inet_makeaddr(INADDR_A, child_ip(nr)); + + switch (msg->type) { + case MSG_EXIT: + exit(KSFT_PASS); + case MSG_ACK: + write_msg(cmd_fd, msg, 1); + break; + case MSG_PING: + tun_reply = memcmp(&dst, &msg->body.ping.reply_ip, sizeof(in_addr_t)); + /* UDP pinging without xfrm */ + if (do_ping(cmd_fd, buf, page_size, tun_reply ? tunsrc : src, + false, msg->body.ping.port, + msg->body.ping.reply_ip, udp_ping_reply)) { + printk("ping failed before setting xfrm"); + } + break; + case MSG_XFRM_PREPARE: + if (xfrm_prepare(xfrm_sock, seq, src, dst, tunsrc, tundst, + desc->proto)) { + xfrm_cleanup(xfrm_sock, seq, src, dst, tunsrc, tundst); + printk("failed to prepare xfrm"); + } + break; + case MSG_XFRM_ADD: + if (xfrm_set(xfrm_sock, seq, src, dst, tunsrc, tundst, desc)) { + xfrm_cleanup(xfrm_sock, seq, src, dst, tunsrc, tundst); + printk("failed to set xfrm"); + } + break; + case MSG_XFRM_DEL: + if (xfrm_delete(xfrm_sock, seq, src, dst, tunsrc, tundst, + desc->proto)) { + xfrm_cleanup(xfrm_sock, seq, src, dst, tunsrc, tundst); + printk("failed to remove xfrm"); + } + break; + case MSG_XFRM_CLEANUP: + if (xfrm_cleanup(xfrm_sock, seq, src, dst, tunsrc, tundst)) { + printk("failed to cleanup xfrm"); + } + break; + default: + printk("got unknown msg type %d", msg->type); + }; +} + +static int grand_child_f(unsigned int nr, int cmd_fd, void *buf) +{ + struct test_desc msg; + int xfrm_sock = -1; + uint32_t seq; + + if (switch_ns(nsfd_childb)) + exit(KSFT_FAIL); + + if (netlink_sock(&xfrm_sock, &seq, NETLINK_XFRM)) { + printk("Failed to open xfrm netlink socket"); + exit(KSFT_FAIL); + } + + do { + read_msg(cmd_fd, &msg, 1); + grand_child_serv(nr, cmd_fd, buf, &msg, xfrm_sock, &seq); + } while (1); + + close(xfrm_sock); + exit(KSFT_FAIL); +} + +static int start_child(unsigned int nr, char *veth, int test_desc_fd[2]) +{ + int cmd_sock[2]; + void *data_map; + pid_t child; + + if (init_child(nsfd_childa, veth, child_ip(nr), grchild_ip(nr))) + return -1; + + if (init_child(nsfd_childb, veth, grchild_ip(nr), child_ip(nr))) + return -1; + + child = fork(); + if (child < 0) { + pr_err("fork()"); + return -1; + } else if (child) { + /* in parent - selftest */ + return switch_ns(nsfd_parent); + } + + if (close(test_desc_fd[1])) { + pr_err("close()"); + return -1; + } + + /* child */ + data_map = mmap(0, page_size, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); + if (data_map == MAP_FAILED) { + pr_err("mmap()"); + return -1; + } + + randomize_buffer(data_map, page_size); + + if (socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, cmd_sock)) { + pr_err("socketpair()"); + return -1; + } + + child = fork(); + if (child < 0) { + pr_err("fork()"); + return -1; + } else if (child) { + if (close(cmd_sock[0])) { + pr_err("close()"); + return -1; + } + return child_f(nr, test_desc_fd[0], cmd_sock[1], data_map); + } + if (close(cmd_sock[1])) { + pr_err("close()"); + return -1; + } + return grand_child_f(nr, cmd_sock[0], data_map); +} + +static void exit_usage(char **argv) +{ + printk("Usage: %s [nr_process]", argv[0]); + exit(KSFT_FAIL); +} + +static int __write_desc(int test_desc_fd, struct xfrm_desc *desc) +{ + ssize_t ret; + + ret = write(test_desc_fd, desc, sizeof(*desc)); + + if (ret == sizeof(*desc)) + return 0; + + pr_err("Writing test's desc failed %ld", ret); + + return -1; +} + +static int write_desc(int proto, int test_desc_fd, + char *a, char *e, char *c, char *ae) +{ + struct xfrm_desc desc = {}; + + desc.type = CREATE_TUNNEL; + desc.proto = proto; + + if (a) + strncpy(desc.a_algo, a, ALGO_LEN - 1); + if (e) + strncpy(desc.e_algo, e, ALGO_LEN - 1); + if (c) + strncpy(desc.c_algo, c, ALGO_LEN - 1); + if (ae) + strncpy(desc.ae_algo, ae, ALGO_LEN - 1); + + return __write_desc(test_desc_fd, &desc); +} + +int proto_list[] = { IPPROTO_AH, IPPROTO_COMP, IPPROTO_ESP }; +char *ah_list[] = { + "digest_null", "hmac(md5)", "hmac(sha1)", "hmac(sha256)", + "hmac(sha384)", "hmac(sha512)", "hmac(rmd160)", + "xcbc(aes)", "cmac(aes)" +}; +char *comp_list[] = { + "deflate", +#if 0 + /* No compression backend realization */ + "lzs", "lzjh" +#endif +}; +char *e_list[] = { + "ecb(cipher_null)", "cbc(des)", "cbc(des3_ede)", "cbc(cast5)", + "cbc(blowfish)", "cbc(aes)", "cbc(serpent)", "cbc(camellia)", + "cbc(twofish)", "rfc3686(ctr(aes))" +}; +char *ae_list[] = { +#if 0 + /* not implemented */ + "rfc4106(gcm(aes))", "rfc4309(ccm(aes))", "rfc4543(gcm(aes))", + "rfc7539esp(chacha20,poly1305)" +#endif +}; + +const unsigned int proto_plan = ARRAY_SIZE(ah_list) + ARRAY_SIZE(comp_list) \ + + (ARRAY_SIZE(ah_list) * ARRAY_SIZE(e_list)) \ + + ARRAY_SIZE(ae_list); + +static int write_proto_plan(int fd, int proto) +{ + unsigned int i; + + switch (proto) { + case IPPROTO_AH: + for (i = 0; i < ARRAY_SIZE(ah_list); i++) { + if (write_desc(proto, fd, ah_list[i], 0, 0, 0)) + return -1; + } + break; + case IPPROTO_COMP: + for (i = 0; i < ARRAY_SIZE(comp_list); i++) { + if (write_desc(proto, fd, 0, 0, comp_list[i], 0)) + return -1; + } + break; + case IPPROTO_ESP: + for (i = 0; i < ARRAY_SIZE(ah_list); i++) { + int j; + + for (j = 0; j < ARRAY_SIZE(e_list); j++) { + if (write_desc(proto, fd, ah_list[i], + e_list[j], 0, 0)) + return -1; + } + } + for (i = 0; i < ARRAY_SIZE(ae_list); i++) { + if (write_desc(proto, fd, 0, 0, 0, ae_list[i])) + return -1; + } + break; + default: + printk("BUG: Specified unknown proto %d", proto); + return -1; + } + + return 0; +} + +/* + * Some structures in xfrm uapi header differ in size between + * 64-bit and 32-bit ABI: + * + * 32-bit UABI | 64-bit UABI + * -------------------------------------|------------------------------------- + * sizeof(xfrm_usersa_info) = 220 | sizeof(xfrm_usersa_info) = 224 + * sizeof(xfrm_userpolicy_info) = 164 | sizeof(xfrm_userpolicy_info) = 168 + * sizeof(xfrm_userspi_info) = 228 | sizeof(xfrm_userspi_info) = 232 + * sizeof(xfrm_user_acquire) = 276 | sizeof(xfrm_user_acquire) = 280 + * sizeof(xfrm_user_expire) = 224 | sizeof(xfrm_user_expire) = 232 + * sizeof(xfrm_user_polexpire) = 168 | sizeof(xfrm_user_polexpire) = 176 + * + * Check the affected by the UABI difference structures. + */ +const unsigned int compat_plan = 4; +static int write_compat_struct_tests(int test_desc_fd) +{ + struct xfrm_desc desc = {}; + + desc.type = ALLOCATE_SPI; + desc.proto = IPPROTO_AH; + strncpy(desc.a_algo, ah_list[0], ALGO_LEN - 1); + + if (__write_desc(test_desc_fd, &desc)) + return -1; + + desc.type = MONITOR_ACQUIRE; + if (__write_desc(test_desc_fd, &desc)) + return -1; + + desc.type = EXPIRE_STATE; + if (__write_desc(test_desc_fd, &desc)) + return -1; + + desc.type = EXPIRE_POLICY; + if (__write_desc(test_desc_fd, &desc)) + return -1; + + return 0; +} + +static int write_test_plan(int test_desc_fd) +{ + unsigned int i; + pid_t child; + + child = fork(); + if (child < 0) { + pr_err("fork()"); + return -1; + } + if (child) { + if (close(test_desc_fd)) + printk("close(): %m"); + return 0; + } + + if (write_compat_struct_tests(test_desc_fd)) + exit(KSFT_FAIL); + + for (i = 0; i < ARRAY_SIZE(proto_list); i++) { + if (write_proto_plan(test_desc_fd, proto_list[i])) + exit(KSFT_FAIL); + } + + exit(KSFT_PASS); +} + +static int children_cleanup(void) +{ + unsigned ret = KSFT_PASS; + + while (1) { + int status; + pid_t p = wait(&status); + + if ((p < 0) && errno == ECHILD) + break; + + if (p < 0) { + pr_err("wait()"); + return KSFT_FAIL; + } + + if (!WIFEXITED(status)) { + ret = KSFT_FAIL; + continue; + } + + if (WEXITSTATUS(status) == KSFT_FAIL) + ret = KSFT_FAIL; + } + + return ret; +} + +typedef void (*print_res)(const char *, ...); + +static int check_results(void) +{ + struct test_result tr = {}; + struct xfrm_desc *d = &tr.desc; + int ret = KSFT_PASS; + + while (1) { + ssize_t received = read(results_fd[0], &tr, sizeof(tr)); + print_res result; + + if (received == 0) /* EOF */ + break; + + if (received != sizeof(tr)) { + pr_err("read() returned %zd", received); + return KSFT_FAIL; + } + + switch (tr.res) { + case KSFT_PASS: + result = ksft_test_result_pass; + break; + case KSFT_FAIL: + default: + result = ksft_test_result_fail; + ret = KSFT_FAIL; + } + + result(" %s: [%u, '%s', '%s', '%s', '%s', %u]\n", + desc_name[d->type], (unsigned int)d->proto, d->a_algo, + d->e_algo, d->c_algo, d->ae_algo, d->icv_len); + } + + return ret; +} + +int main(int argc, char **argv) +{ + unsigned int nr_process = 1; + int route_sock = -1, ret = KSFT_SKIP; + int test_desc_fd[2]; + uint32_t route_seq; + unsigned int i; + + if (argc > 2) + exit_usage(argv); + + if (argc > 1) { + char *endptr; + + errno = 0; + nr_process = strtol(argv[1], &endptr, 10); + if ((errno == ERANGE && (nr_process == LONG_MAX || nr_process == LONG_MIN)) + || (errno != 0 && nr_process == 0) + || (endptr == argv[1]) || (*endptr != '\0')) { + printk("Failed to parse [nr_process]"); + exit_usage(argv); + } + + if (nr_process > MAX_PROCESSES || !nr_process) { + printk("nr_process should be between [1; %u]", + MAX_PROCESSES); + exit_usage(argv); + } + } + + srand(time(NULL)); + page_size = sysconf(_SC_PAGESIZE); + if (page_size < 1) + ksft_exit_skip("sysconf(): %m\n"); + + if (pipe2(test_desc_fd, O_DIRECT) < 0) + ksft_exit_skip("pipe(): %m\n"); + + if (pipe2(results_fd, O_DIRECT) < 0) + ksft_exit_skip("pipe(): %m\n"); + + if (init_namespaces()) + ksft_exit_skip("Failed to create namespaces\n"); + + if (netlink_sock(&route_sock, &route_seq, NETLINK_ROUTE)) + ksft_exit_skip("Failed to open netlink route socket\n"); + + for (i = 0; i < nr_process; i++) { + char veth[VETH_LEN]; + + snprintf(veth, VETH_LEN, VETH_FMT, i); + + if (veth_add(route_sock, route_seq++, veth, nsfd_childa, veth, nsfd_childb)) { + close(route_sock); + ksft_exit_fail_msg("Failed to create veth device"); + } + + if (start_child(i, veth, test_desc_fd)) { + close(route_sock); + ksft_exit_fail_msg("Child %u failed to start", i); + } + } + + if (close(route_sock) || close(test_desc_fd[0]) || close(results_fd[1])) + ksft_exit_fail_msg("close(): %m"); + + ksft_set_plan(proto_plan + compat_plan); + + if (write_test_plan(test_desc_fd[1])) + ksft_exit_fail_msg("Failed to write test plan to pipe"); + + ret = check_results(); + + if (children_cleanup() == KSFT_FAIL) + exit(KSFT_FAIL); + + exit(ret); +} diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile index aa254aefc2c3..00bb158b4a5d 100644 --- a/tools/testing/selftests/net/mptcp/Makefile +++ b/tools/testing/selftests/net/mptcp/Makefile @@ -5,7 +5,8 @@ KSFT_KHDR_INSTALL := 1 CFLAGS = -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include -TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh diag.sh +TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh diag.sh \ + simult_flows.sh TEST_GEN_FILES = mptcp_connect pm_nl_ctl diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index 090620c3e10c..77bb62feb872 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -54,6 +54,7 @@ static int pf = AF_INET; static int cfg_sndbuf; static int cfg_rcvbuf; static bool cfg_join; +static bool cfg_remove; static int cfg_wait; static void die_usage(void) @@ -65,8 +66,8 @@ static void die_usage(void) fprintf(stderr, "\t-S num -- set SO_SNDBUF to num\n"); fprintf(stderr, "\t-R num -- set SO_RCVBUF to num\n"); fprintf(stderr, "\t-p num -- use port num\n"); - fprintf(stderr, "\t-m [MPTCP|TCP] -- use tcp or mptcp sockets\n"); - fprintf(stderr, "\t-s [mmap|poll] -- use poll (default) or mmap\n"); + fprintf(stderr, "\t-s [MPTCP|TCP] -- use mptcp(default) or tcp sockets\n"); + fprintf(stderr, "\t-m [poll|mmap|sendfile] -- use poll(default)/mmap+write/sendfile\n"); fprintf(stderr, "\t-u -- check mptcp ulp\n"); fprintf(stderr, "\t-w num -- wait num sec before closing the socket\n"); exit(1); @@ -271,6 +272,9 @@ static size_t do_rnd_write(const int fd, char *buf, const size_t len) if (cfg_join && first && do_w > 100) do_w = 100; + if (cfg_remove && do_w > 50) + do_w = 50; + bw = write(fd, buf, do_w); if (bw < 0) perror("write"); @@ -281,6 +285,9 @@ static size_t do_rnd_write(const int fd, char *buf, const size_t len) first = false; } + if (cfg_remove) + usleep(200000); + return bw; } @@ -428,7 +435,7 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd) } /* leave some time for late join/announce */ - if (cfg_join) + if (cfg_join || cfg_remove) usleep(cfg_wait); close(peerfd); @@ -686,7 +693,7 @@ static void maybe_close(int fd) { unsigned int r = rand(); - if (!cfg_join && (r & 1)) + if (!(cfg_join || cfg_remove) && (r & 1)) close(fd); } @@ -822,13 +829,18 @@ static void parse_opts(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "6jlp:s:hut:m:S:R:w:")) != -1) { + while ((c = getopt(argc, argv, "6jrlp:s:hut:m:S:R:w:")) != -1) { switch (c) { case 'j': cfg_join = true; cfg_mode = CFG_MODE_POLL; cfg_wait = 400000; break; + case 'r': + cfg_remove = true; + cfg_mode = CFG_MODE_POLL; + cfg_wait = 400000; + break; case 'l': listen_mode = true; break; diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index 57d75b7f6220..2cfd87d94db8 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -14,9 +14,8 @@ capture=false timeout=30 ipv6=true ethtool_random_on=true -tc_delay="$((RANDOM%400))" +tc_delay="$((RANDOM%50))" tc_loss=$((RANDOM%101)) -tc_reorder="" testmode="" sndbuf=0 rcvbuf=0 @@ -444,9 +443,9 @@ do_transfer() duration=$(printf "(duration %05sms)" $duration) if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then echo "$duration [ FAIL ] client exit code $retc, server $rets" 1>&2 - echo "\nnetns ${listener_ns} socket stat for $port:" 1>&2 + echo -e "\nnetns ${listener_ns} socket stat for ${port}:" 1>&2 ip netns exec ${listener_ns} ss -nita 1>&2 -o "sport = :$port" - echo "\nnetns ${connector_ns} socket stat for $port:" 1>&2 + echo -e "\nnetns ${connector_ns} socket stat for ${port}:" 1>&2 ip netns exec ${connector_ns} ss -nita 1>&2 -o "dport = :$port" cat "$capout" @@ -628,30 +627,32 @@ for sender in "$ns1" "$ns2" "$ns3" "$ns4";do do_ping "$ns4" $sender dead:beef:3::1 done -[ -n "$tc_loss" ] && tc -net "$ns2" qdisc add dev ns2eth3 root netem loss random $tc_loss +[ -n "$tc_loss" ] && tc -net "$ns2" qdisc add dev ns2eth3 root netem loss random $tc_loss delay ${tc_delay}ms echo -n "INFO: Using loss of $tc_loss " test "$tc_delay" -gt 0 && echo -n "delay $tc_delay ms " +reorder_delay=$(($tc_delay / 4)) + if [ -z "${tc_reorder}" ]; then reorder1=$((RANDOM%10)) reorder1=$((100 - reorder1)) reorder2=$((RANDOM%100)) - if [ $tc_delay -gt 0 ] && [ $reorder1 -lt 100 ] && [ $reorder2 -gt 0 ]; then + if [ $reorder_delay -gt 0 ] && [ $reorder1 -lt 100 ] && [ $reorder2 -gt 0 ]; then tc_reorder="reorder ${reorder1}% ${reorder2}%" - echo -n "$tc_reorder " + echo -n "$tc_reorder with delay ${reorder_delay}ms " fi elif [ "$tc_reorder" = "0" ];then tc_reorder="" -elif [ "$tc_delay" -gt 0 ];then +elif [ "$reorder_delay" -gt 0 ];then # reordering requires some delay tc_reorder="reorder $tc_reorder" - echo -n "$tc_reorder " + echo -n "$tc_reorder with delay ${reorder_delay}ms " fi echo "on ns3eth4" -tc -net "$ns3" qdisc add dev ns3eth4 root netem delay ${tc_delay}ms $tc_reorder +tc -net "$ns3" qdisc add dev ns3eth4 root netem delay ${reorder_delay}ms $tc_reorder for sender in $ns1 $ns2 $ns3 $ns4;do run_tests_lo "$ns1" "$sender" 10.0.1.1 1 diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index f39c1129ce5f..08f53d86dedc 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -8,6 +8,7 @@ cin="" cout="" ksft_skip=4 timeout=30 +mptcp_connect="" capture=0 TEST_COUNT=0 @@ -132,6 +133,8 @@ do_transfer() cl_proto="$3" srv_proto="$4" connect_addr="$5" + rm_nr_ns1="$6" + rm_nr_ns2="$7" port=$((10000+$TEST_COUNT)) TEST_COUNT=$((TEST_COUNT+1)) @@ -156,14 +159,44 @@ do_transfer() sleep 1 fi - ip netns exec ${listener_ns} ./mptcp_connect -j -t $timeout -l -p $port -s ${srv_proto} 0.0.0.0 < "$sin" > "$sout" & + if [[ $rm_nr_ns1 -eq 0 && $rm_nr_ns2 -eq 0 ]]; then + mptcp_connect="./mptcp_connect -j" + else + mptcp_connect="./mptcp_connect -r" + fi + + ip netns exec ${listener_ns} $mptcp_connect -t $timeout -l -p $port -s ${srv_proto} 0.0.0.0 < "$sin" > "$sout" & spid=$! sleep 1 - ip netns exec ${connector_ns} ./mptcp_connect -j -t $timeout -p $port -s ${cl_proto} $connect_addr < "$cin" > "$cout" & + ip netns exec ${connector_ns} $mptcp_connect -t $timeout -p $port -s ${cl_proto} $connect_addr < "$cin" > "$cout" & cpid=$! + if [ $rm_nr_ns1 -gt 0 ]; then + counter=1 + sleep 1 + + while [ $counter -le $rm_nr_ns1 ] + do + ip netns exec ${listener_ns} ./pm_nl_ctl del $counter + sleep 1 + let counter+=1 + done + fi + + if [ $rm_nr_ns2 -gt 0 ]; then + counter=1 + sleep 1 + + while [ $counter -le $rm_nr_ns2 ] + do + ip netns exec ${connector_ns} ./pm_nl_ctl del $counter + sleep 1 + let counter+=1 + done + fi + wait $cpid retc=$? wait $spid @@ -176,9 +209,9 @@ do_transfer() if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then echo " client exit code $retc, server $rets" 1>&2 - echo "\nnetns ${listener_ns} socket stat for $port:" 1>&2 + echo -e "\nnetns ${listener_ns} socket stat for ${port}:" 1>&2 ip netns exec ${listener_ns} ss -nita 1>&2 -o "sport = :$port" - echo "\nnetns ${connector_ns} socket stat for $port:" 1>&2 + echo -e "\nnetns ${connector_ns} socket stat for ${port}:" 1>&2 ip netns exec ${connector_ns} ss -nita 1>&2 -o "dport = :$port" cat "$capout" @@ -219,7 +252,24 @@ run_tests() connect_addr="$3" lret=0 - do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} + do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} 0 0 + lret=$? + if [ $lret -ne 0 ]; then + ret=$lret + return + fi +} + +run_remove_tests() +{ + listener_ns="$1" + connector_ns="$2" + connect_addr="$3" + rm_nr_ns1="$4" + rm_nr_ns2="$5" + lret=0 + + do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} ${rm_nr_ns1} ${rm_nr_ns2} lret=$? if [ $lret -ne 0 ]; then ret=$lret @@ -276,6 +326,80 @@ chk_join_nr() fi } +chk_add_nr() +{ + local add_nr=$1 + local echo_nr=$2 + local count + local dump_stats + + printf "%-39s %s" " " "add" + count=`ip netns exec $ns2 nstat -as | grep MPTcpExtAddAddr | awk '{print $2}'` + [ -z "$count" ] && count=0 + if [ "$count" != "$add_nr" ]; then + echo "[fail] got $count ADD_ADDR[s] expected $add_nr" + ret=1 + dump_stats=1 + else + echo -n "[ ok ]" + fi + + echo -n " - echo " + count=`ip netns exec $ns1 nstat -as | grep MPTcpExtEchoAdd | awk '{print $2}'` + [ -z "$count" ] && count=0 + if [ "$count" != "$echo_nr" ]; then + echo "[fail] got $count ADD_ADDR echo[s] expected $echo_nr" + ret=1 + dump_stats=1 + else + echo "[ ok ]" + fi + + if [ "${dump_stats}" = 1 ]; then + echo Server ns stats + ip netns exec $ns1 nstat -as | grep MPTcp + echo Client ns stats + ip netns exec $ns2 nstat -as | grep MPTcp + fi +} + +chk_rm_nr() +{ + local rm_addr_nr=$1 + local rm_subflow_nr=$2 + local count + local dump_stats + + printf "%-39s %s" " " "rm " + count=`ip netns exec $ns1 nstat -as | grep MPTcpExtRmAddr | awk '{print $2}'` + [ -z "$count" ] && count=0 + if [ "$count" != "$rm_addr_nr" ]; then + echo "[fail] got $count RM_ADDR[s] expected $rm_addr_nr" + ret=1 + dump_stats=1 + else + echo -n "[ ok ]" + fi + + echo -n " - sf " + count=`ip netns exec $ns2 nstat -as | grep MPTcpExtRmSubflow | awk '{print $2}'` + [ -z "$count" ] && count=0 + if [ "$count" != "$rm_subflow_nr" ]; then + echo "[fail] got $count RM_SUBFLOW[s] expected $rm_subflow_nr" + ret=1 + dump_stats=1 + else + echo "[ ok ]" + fi + + if [ "${dump_stats}" = 1 ]; then + echo Server ns stats + ip netns exec $ns1 nstat -as | grep MPTcp + echo Client ns stats + ip netns exec $ns2 nstat -as | grep MPTcp + fi +} + sin=$(mktemp) sout=$(mktemp) cin=$(mktemp) @@ -332,6 +456,7 @@ reset ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "unused signal address" 0 0 0 +chk_add_nr 1 1 # accept and use add_addr reset @@ -340,6 +465,7 @@ ip netns exec $ns2 ./pm_nl_ctl limits 1 1 ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "signal address" 1 1 1 +chk_add_nr 1 1 # accept and use add_addr with an additional subflow # note: signal address in server ns and local addresses in client ns must @@ -352,6 +478,7 @@ ip netns exec $ns2 ./pm_nl_ctl limits 1 2 ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "subflow and signal" 2 2 2 +chk_add_nr 1 1 # accept and use add_addr with additional subflows reset @@ -362,6 +489,59 @@ ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "multiple subflows and signal" 3 3 3 +chk_add_nr 1 1 + +# single subflow, remove +reset +ip netns exec $ns1 ./pm_nl_ctl limits 0 1 +ip netns exec $ns2 ./pm_nl_ctl limits 0 1 +ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow +run_remove_tests $ns1 $ns2 10.0.1.1 0 1 +chk_join_nr "remove single subflow" 1 1 1 +chk_rm_nr 1 1 + +# multiple subflows, remove +reset +ip netns exec $ns1 ./pm_nl_ctl limits 0 2 +ip netns exec $ns2 ./pm_nl_ctl limits 0 2 +ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow +ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow +run_remove_tests $ns1 $ns2 10.0.1.1 0 2 +chk_join_nr "remove multiple subflows" 2 2 2 +chk_rm_nr 2 2 + +# single address, remove +reset +ip netns exec $ns1 ./pm_nl_ctl limits 0 1 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal +ip netns exec $ns2 ./pm_nl_ctl limits 1 1 +run_remove_tests $ns1 $ns2 10.0.1.1 1 0 +chk_join_nr "remove single address" 1 1 1 +chk_add_nr 1 1 +chk_rm_nr 0 0 + +# subflow and signal, remove +reset +ip netns exec $ns1 ./pm_nl_ctl limits 0 2 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal +ip netns exec $ns2 ./pm_nl_ctl limits 1 2 +ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow +run_remove_tests $ns1 $ns2 10.0.1.1 1 1 +chk_join_nr "remove subflow and signal" 2 2 2 +chk_add_nr 1 1 +chk_rm_nr 1 1 + +# subflows and signal, remove +reset +ip netns exec $ns1 ./pm_nl_ctl limits 0 3 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal +ip netns exec $ns2 ./pm_nl_ctl limits 1 3 +ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow +ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow +run_remove_tests $ns1 $ns2 10.0.1.1 1 2 +chk_join_nr "remove subflows and signal" 3 3 3 +chk_add_nr 1 1 +chk_rm_nr 2 2 # single subflow, syncookies reset_with_cookies @@ -396,6 +576,7 @@ ip netns exec $ns2 ./pm_nl_ctl limits 1 1 ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "signal address with syn cookies" 1 1 1 +chk_add_nr 1 1 # test cookie with subflow and signal reset_with_cookies @@ -405,6 +586,7 @@ ip netns exec $ns2 ./pm_nl_ctl limits 1 2 ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "subflow and signal w cookies" 2 2 2 +chk_add_nr 1 1 # accept and use add_addr with additional subflows reset_with_cookies @@ -415,5 +597,6 @@ ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow run_tests $ns1 $ns2 10.0.1.1 chk_join_nr "subflows and signal w. cookies" 3 3 3 +chk_add_nr 1 1 exit $ret diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh new file mode 100755 index 000000000000..2f649b431456 --- /dev/null +++ b/tools/testing/selftests/net/mptcp/simult_flows.sh @@ -0,0 +1,293 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +rndh=$(printf %x $sec)-$(mktemp -u XXXXXX) +ns1="ns1-$rndh" +ns2="ns2-$rndh" +ns3="ns3-$rndh" +capture=false +ksft_skip=4 +timeout=30 +test_cnt=1 +ret=0 +bail=0 + +usage() { + echo "Usage: $0 [ -b ] [ -c ] [ -d ]" + echo -e "\t-b: bail out after first error, otherwise runs al testcases" + echo -e "\t-c: capture packets for each test using tcpdump (default: no capture)" + echo -e "\t-d: debug this script" +} + +cleanup() +{ + rm -f "$cin" "$cout" + rm -f "$sin" "$sout" + rm -f "$capout" + + local netns + for netns in "$ns1" "$ns2" "$ns3";do + ip netns del $netns + done +} + +ip -Version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +# "$ns1" ns2 ns3 +# ns1eth1 ns2eth1 ns2eth3 ns3eth1 +# netem +# ns1eth2 ns2eth2 +# netem + +setup() +{ + large=$(mktemp) + small=$(mktemp) + sout=$(mktemp) + cout=$(mktemp) + capout=$(mktemp) + size=$((2048 * 4096)) + dd if=/dev/zero of=$small bs=4096 count=20 >/dev/null 2>&1 + dd if=/dev/zero of=$large bs=4096 count=$((size / 4096)) >/dev/null 2>&1 + + trap cleanup EXIT + + for i in "$ns1" "$ns2" "$ns3";do + ip netns add $i || exit $ksft_skip + ip -net $i link set lo up + done + + ip link add ns1eth1 netns "$ns1" type veth peer name ns2eth1 netns "$ns2" + ip link add ns1eth2 netns "$ns1" type veth peer name ns2eth2 netns "$ns2" + ip link add ns2eth3 netns "$ns2" type veth peer name ns3eth1 netns "$ns3" + + ip -net "$ns1" addr add 10.0.1.1/24 dev ns1eth1 + ip -net "$ns1" addr add dead:beef:1::1/64 dev ns1eth1 nodad + ip -net "$ns1" link set ns1eth1 up mtu 1500 + ip -net "$ns1" route add default via 10.0.1.2 + ip -net "$ns1" route add default via dead:beef:1::2 + + ip -net "$ns1" addr add 10.0.2.1/24 dev ns1eth2 + ip -net "$ns1" addr add dead:beef:2::1/64 dev ns1eth2 nodad + ip -net "$ns1" link set ns1eth2 up mtu 1500 + ip -net "$ns1" route add default via 10.0.2.2 metric 101 + ip -net "$ns1" route add default via dead:beef:2::2 metric 101 + + ip netns exec "$ns1" ./pm_nl_ctl limits 1 1 + ip netns exec "$ns1" ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags subflow + ip netns exec "$ns1" sysctl -q net.ipv4.conf.all.rp_filter=0 + + ip -net "$ns2" addr add 10.0.1.2/24 dev ns2eth1 + ip -net "$ns2" addr add dead:beef:1::2/64 dev ns2eth1 nodad + ip -net "$ns2" link set ns2eth1 up mtu 1500 + + ip -net "$ns2" addr add 10.0.2.2/24 dev ns2eth2 + ip -net "$ns2" addr add dead:beef:2::2/64 dev ns2eth2 nodad + ip -net "$ns2" link set ns2eth2 up mtu 1500 + + ip -net "$ns2" addr add 10.0.3.2/24 dev ns2eth3 + ip -net "$ns2" addr add dead:beef:3::2/64 dev ns2eth3 nodad + ip -net "$ns2" link set ns2eth3 up mtu 1500 + ip netns exec "$ns2" sysctl -q net.ipv4.ip_forward=1 + ip netns exec "$ns2" sysctl -q net.ipv6.conf.all.forwarding=1 + + ip -net "$ns3" addr add 10.0.3.3/24 dev ns3eth1 + ip -net "$ns3" addr add dead:beef:3::3/64 dev ns3eth1 nodad + ip -net "$ns3" link set ns3eth1 up mtu 1500 + ip -net "$ns3" route add default via 10.0.3.2 + ip -net "$ns3" route add default via dead:beef:3::2 + + ip netns exec "$ns3" ./pm_nl_ctl limits 1 1 +} + +# $1: ns, $2: port +wait_local_port_listen() +{ + local listener_ns="${1}" + local port="${2}" + + local port_hex i + + port_hex="$(printf "%04X" "${port}")" + for i in $(seq 10); do + ip netns exec "${listener_ns}" cat /proc/net/tcp* | \ + awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) {rc=0; exit}} END {exit rc}" && + break + sleep 0.1 + done +} + +do_transfer() +{ + local cin=$1 + local sin=$2 + local max_time=$3 + local port + port=$((10000+$test_cnt)) + test_cnt=$((test_cnt+1)) + + :> "$cout" + :> "$sout" + :> "$capout" + + local addr_port + addr_port=$(printf "%s:%d" ${connect_addr} ${port}) + + if $capture; then + local capuser + if [ -z $SUDO_USER ] ; then + capuser="" + else + capuser="-Z $SUDO_USER" + fi + + local capfile="${rndh}-${port}" + local capopt="-i any -s 65535 -B 32768 ${capuser}" + + ip netns exec ${ns3} tcpdump ${capopt} -w "${capfile}-listener.pcap" >> "${capout}" 2>&1 & + local cappid_listener=$! + + ip netns exec ${ns1} tcpdump ${capopt} -w "${capfile}-connector.pcap" >> "${capout}" 2>&1 & + local cappid_connector=$! + + sleep 1 + fi + + ip netns exec ${ns3} ./mptcp_connect -jt $timeout -l -p $port 0.0.0.0 < "$sin" > "$sout" & + local spid=$! + + wait_local_port_listen "${ns3}" "${port}" + + local start + start=$(date +%s%3N) + ip netns exec ${ns1} ./mptcp_connect -jt $timeout -p $port 10.0.3.3 < "$cin" > "$cout" & + local cpid=$! + + wait $cpid + local retc=$? + wait $spid + local rets=$? + + local stop + stop=$(date +%s%3N) + + if $capture; then + sleep 1 + kill ${cappid_listener} + kill ${cappid_connector} + fi + + local duration + duration=$((stop-start)) + + cmp $sin $cout > /dev/null 2>&1 + local cmps=$? + cmp $cin $sout > /dev/null 2>&1 + local cmpc=$? + + printf "%16s" "$duration max $max_time " + if [ $retc -eq 0 ] && [ $rets -eq 0 ] && \ + [ $cmpc -eq 0 ] && [ $cmps -eq 0 ] && \ + [ $duration -lt $max_time ]; then + echo "[ OK ]" + cat "$capout" + return 0 + fi + + echo " [ fail ]" + echo "client exit code $retc, server $rets" 1>&2 + echo -e "\nnetns ${ns3} socket stat for $port:" 1>&2 + ip netns exec ${ns3} ss -nita 1>&2 -o "sport = :$port" + echo -e "\nnetns ${ns1} socket stat for $port:" 1>&2 + ip netns exec ${ns1} ss -nita 1>&2 -o "dport = :$port" + ls -l $sin $cout + ls -l $cin $sout + + cat "$capout" + return 1 +} + +run_test() +{ + local rate1=$1 + local rate2=$2 + local delay1=$3 + local delay2=$4 + local lret + local dev + shift 4 + local msg=$* + + [ $delay1 -gt 0 ] && delay1="delay $delay1" || delay1="" + [ $delay2 -gt 0 ] && delay2="delay $delay2" || delay2="" + + for dev in ns1eth1 ns1eth2; do + tc -n $ns1 qdisc del dev $dev root >/dev/null 2>&1 + done + for dev in ns2eth1 ns2eth2; do + tc -n $ns2 qdisc del dev $dev root >/dev/null 2>&1 + done + tc -n $ns1 qdisc add dev ns1eth1 root netem rate ${rate1}mbit $delay1 + tc -n $ns1 qdisc add dev ns1eth2 root netem rate ${rate2}mbit $delay2 + tc -n $ns2 qdisc add dev ns2eth1 root netem rate ${rate1}mbit $delay1 + tc -n $ns2 qdisc add dev ns2eth2 root netem rate ${rate2}mbit $delay2 + + # time is measure in ms + local time=$((size * 8 * 1000 / (( $rate1 + $rate2) * 1024 *1024) )) + + # mptcp_connect will do some sleeps to allow the mp_join handshake + # completion + time=$((time + 1350)) + + printf "%-50s" "$msg" + do_transfer $small $large $((time * 11 / 10)) + lret=$? + if [ $lret -ne 0 ]; then + ret=$lret + [ $bail -eq 0 ] || exit $ret + fi + + printf "%-50s" "$msg - reverse direction" + do_transfer $large $small $((time * 11 / 10)) + lret=$? + if [ $lret -ne 0 ]; then + ret=$lret + [ $bail -eq 0 ] || exit $ret + fi +} + +while getopts "bcdh" option;do + case "$option" in + "h") + usage $0 + exit 0 + ;; + "b") + bail=1 + ;; + "c") + capture=true + ;; + "d") + set -x + ;; + "?") + usage $0 + exit 1 + ;; + esac +done + +setup +run_test 10 10 0 0 "balanced bwidth" +run_test 10 10 1 50 "balanced bwidth with unbalanced delay" + +# we still need some additional infrastructure to pass the following test-cases +# run_test 30 10 0 0 "unbalanced bwidth" +# run_test 30 10 1 50 "unbalanced bwidth with unbalanced delay" +# run_test 30 10 50 1 "unbalanced bwidth with opposed, unbalanced delay" +exit $ret diff --git a/tools/testing/selftests/net/nettest.c b/tools/testing/selftests/net/nettest.c index 93208caacbe6..f75c53ce0a2d 100644 --- a/tools/testing/selftests/net/nettest.c +++ b/tools/testing/selftests/net/nettest.c @@ -1667,6 +1667,8 @@ int main(int argc, char *argv[]) case 'R': args.type = SOCK_RAW; args.port = 0; + if (!args.protocol) + args.protocol = IPPROTO_RAW; break; case 'P': pe = getprotobyname(optarg); diff --git a/tools/testing/selftests/net/psock_snd.sh b/tools/testing/selftests/net/psock_snd.sh index 6331d91b86a6..170be65e0816 100755 --- a/tools/testing/selftests/net/psock_snd.sh +++ b/tools/testing/selftests/net/psock_snd.sh @@ -45,7 +45,7 @@ echo "raw vnet hdr" echo "raw csum_off" ./in_netns.sh ./psock_snd -v -c -echo "raw csum_off with bad offset (fails)" +echo "raw csum_off with bad offset (expected to fail)" (! ./in_netns.sh ./psock_snd -v -c -C) @@ -57,7 +57,7 @@ echo "raw min size" echo "raw mtu size" ./in_netns.sh ./psock_snd -l "${mss}" -echo "raw mtu size + 1 (fails)" +echo "raw mtu size + 1 (expected to fail)" (! ./in_netns.sh ./psock_snd -l "${mss_exceeds}") # fails due to ARPHRD_ETHER check in packet_extra_vlan_len_allowed @@ -65,19 +65,19 @@ echo "raw mtu size + 1 (fails)" # echo "raw vlan mtu size" # ./in_netns.sh ./psock_snd -V -l "${mss}" -echo "raw vlan mtu size + 1 (fails)" +echo "raw vlan mtu size + 1 (expected to fail)" (! ./in_netns.sh ./psock_snd -V -l "${mss_exceeds}") echo "dgram mtu size" ./in_netns.sh ./psock_snd -d -l "${mss}" -echo "dgram mtu size + 1 (fails)" +echo "dgram mtu size + 1 (expected to fail)" (! ./in_netns.sh ./psock_snd -d -l "${mss_exceeds}") -echo "raw truncate hlen (fails: does not arrive)" +echo "raw truncate hlen (expected to fail: does not arrive)" (! ./in_netns.sh ./psock_snd -t "$((${vnet_hlen} + ${eth_hlen}))") -echo "raw truncate hlen - 1 (fails: EINVAL)" +echo "raw truncate hlen - 1 (expected to fail: EINVAL)" (! ./in_netns.sh ./psock_snd -t "$((${vnet_hlen} + ${eth_hlen} - 1))") @@ -86,13 +86,13 @@ echo "raw truncate hlen - 1 (fails: EINVAL)" echo "raw gso min size" ./in_netns.sh ./psock_snd -v -c -g -l "${mss_exceeds}" -echo "raw gso min size - 1 (fails)" +echo "raw gso min size - 1 (expected to fail)" (! ./in_netns.sh ./psock_snd -v -c -g -l "${mss}") echo "raw gso max size" ./in_netns.sh ./psock_snd -v -c -g -l "${max_mss}" -echo "raw gso max size + 1 (fails)" +echo "raw gso max size + 1 (expected to fail)" (! ./in_netns.sh ./psock_snd -v -c -g -l "${max_mss_exceeds}") echo "OK. All tests passed" diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index 7c38a909f8b8..8a2fe6d64bf2 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -1175,6 +1175,51 @@ kci_test_neigh_get() echo "PASS: neigh get" } +kci_test_bridge_parent_id() +{ + local ret=0 + sysfsnet=/sys/bus/netdevsim/devices/netdevsim + probed=false + + if [ ! -w /sys/bus/netdevsim/new_device ] ; then + modprobe -q netdevsim + check_err $? + if [ $ret -ne 0 ]; then + echo "SKIP: bridge_parent_id can't load netdevsim" + return $ksft_skip + fi + probed=true + fi + + echo "10 1" > /sys/bus/netdevsim/new_device + while [ ! -d ${sysfsnet}10 ] ; do :; done + echo "20 1" > /sys/bus/netdevsim/new_device + while [ ! -d ${sysfsnet}20 ] ; do :; done + udevadm settle + dev10=`ls ${sysfsnet}10/net/` + dev20=`ls ${sysfsnet}20/net/` + + ip link add name test-bond0 type bond mode 802.3ad + ip link set dev $dev10 master test-bond0 + ip link set dev $dev20 master test-bond0 + ip link add name test-br0 type bridge + ip link set dev test-bond0 master test-br0 + check_err $? + + # clean up any leftovers + ip link del dev test-br0 + ip link del dev test-bond0 + echo 20 > /sys/bus/netdevsim/del_device + echo 10 > /sys/bus/netdevsim/del_device + $probed && rmmod netdevsim + + if [ $ret -ne 0 ]; then + echo "FAIL: bridge_parent_id" + return 1 + fi + echo "PASS: bridge_parent_id" +} + kci_test_rtnl() { local ret=0 @@ -1224,6 +1269,8 @@ kci_test_rtnl() check_err $? kci_test_neigh_get check_err $? + kci_test_bridge_parent_id + check_err $? kci_del_dummy return $ret diff --git a/tools/testing/selftests/net/tcp_mmap.c b/tools/testing/selftests/net/tcp_mmap.c index a61b7b3da549..00f837c9bc6c 100644 --- a/tools/testing/selftests/net/tcp_mmap.c +++ b/tools/testing/selftests/net/tcp_mmap.c @@ -123,6 +123,28 @@ void hash_zone(void *zone, unsigned int length) #define ALIGN_UP(x, align_to) (((x) + ((align_to)-1)) & ~((align_to)-1)) #define ALIGN_PTR_UP(p, ptr_align_to) ((typeof(p))ALIGN_UP((unsigned long)(p), ptr_align_to)) + +static void *mmap_large_buffer(size_t need, size_t *allocated) +{ + void *buffer; + size_t sz; + + /* Attempt to use huge pages if possible. */ + sz = ALIGN_UP(need, map_align); + buffer = mmap(NULL, sz, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0); + + if (buffer == (void *)-1) { + sz = need; + buffer = mmap(NULL, sz, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (buffer != (void *)-1) + fprintf(stderr, "MAP_HUGETLB attempt failed, look at /sys/kernel/mm/hugepages for optimal performance\n"); + } + *allocated = sz; + return buffer; +} + void *child_thread(void *arg) { unsigned long total_mmap = 0, total = 0; @@ -135,6 +157,7 @@ void *child_thread(void *arg) void *addr = NULL; double throughput; struct rusage ru; + size_t buffer_sz; int lu, fd; fd = (int)(unsigned long)arg; @@ -142,9 +165,9 @@ void *child_thread(void *arg) gettimeofday(&t0, NULL); fcntl(fd, F_SETFL, O_NDELAY); - buffer = malloc(chunk_size); - if (!buffer) { - perror("malloc"); + buffer = mmap_large_buffer(chunk_size, &buffer_sz); + if (buffer == (void *)-1) { + perror("mmap"); goto error; } if (zflg) { @@ -179,6 +202,10 @@ void *child_thread(void *arg) total_mmap += zc.length; if (xflg) hash_zone(addr, zc.length); + /* It is more efficient to unmap the pages right now, + * instead of doing this in next TCP_ZEROCOPY_RECEIVE. + */ + madvise(addr, zc.length, MADV_DONTNEED); total += zc.length; } if (zc.recv_skip_hint) { @@ -230,7 +257,7 @@ end: ru.ru_nvcsw); } error: - free(buffer); + munmap(buffer, buffer_sz); close(fd); if (zflg) munmap(raddr, chunk_size + map_align); @@ -347,6 +374,7 @@ int main(int argc, char *argv[]) uint64_t total = 0; char *host = NULL; int fd, c, on = 1; + size_t buffer_sz; char *buffer; int sflg = 0; int mss = 0; @@ -437,8 +465,8 @@ int main(int argc, char *argv[]) } do_accept(fdlisten); } - buffer = mmap(NULL, chunk_size, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + + buffer = mmap_large_buffer(chunk_size, &buffer_sz); if (buffer == (char *)-1) { perror("mmap"); exit(1); @@ -484,6 +512,6 @@ int main(int argc, char *argv[]) total += wr; } close(fd); - munmap(buffer, chunk_size); + munmap(buffer, buffer_sz); return 0; } diff --git a/tools/testing/selftests/net/vrf_route_leaking.sh b/tools/testing/selftests/net/vrf_route_leaking.sh new file mode 100755 index 000000000000..23cf924754a5 --- /dev/null +++ b/tools/testing/selftests/net/vrf_route_leaking.sh @@ -0,0 +1,626 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (c) 2019 David Ahern <dsahern@gmail.com>. All rights reserved. +# Copyright (c) 2020 Michael Jeanson <mjeanson@efficios.com>. All rights reserved. +# +# Requires CONFIG_NET_VRF, CONFIG_VETH, CONFIG_BRIDGE and CONFIG_NET_NS. +# +# +# Symmetric routing topology +# +# blue red +# +----+ .253 +----+ .253 +----+ +# | h1 |-------------------| r1 |-------------------| h2 | +# +----+ .1 +----+ .2 +----+ +# 172.16.1/24 172.16.2/24 +# 2001:db8:16:1/64 2001:db8:16:2/64 +# +# +# Route from h1 to h2 and back goes through r1, incoming vrf blue has a route +# to the outgoing vrf red for the n2 network and red has a route back to n1. +# The red VRF interface has a MTU of 1400. +# +# The first test sends a ping with a ttl of 1 from h1 to h2 and parses the +# output of the command to check that a ttl expired error is received. +# +# The second test runs traceroute from h1 to h2 and parses the output to check +# for a hop on r1. +# +# The third test sends a ping with a packet size of 1450 from h1 to h2 and +# parses the output of the command to check that a fragmentation error is +# received. +# +# +# Asymmetric routing topology +# +# This topology represents a customer setup where the issue with icmp errors +# and VRF route leaking was initialy reported. The MTU test isn't done here +# because of the lack of a return route in the red VRF. +# +# blue red +# .253 +----+ .253 +# +----| r1 |----+ +# | +----+ | +# +----+ | | +----+ +# | h1 |--------------+ +--------------| h2 | +# +----+ .1 | | .2 +----+ +# 172.16.1/24 | +----+ | 172.16.2/24 +# 2001:db8:16:1/64 +----| r2 |----+ 2001:db8:16:2/64 +# .254 +----+ .254 +# +# +# Route from h1 to h2 goes through r1, incoming vrf blue has a route to the +# outgoing vrf red for the n2 network but red doesn't have a route back to n1. +# Route from h2 to h1 goes through r2. +# +# The objective is to check that the incoming vrf routing table is selected +# to send an ICMP error back to the source when the ttl of a packet reaches 1 +# while it is forwarded between different vrfs. + +VERBOSE=0 +PAUSE_ON_FAIL=no +DEFAULT_TTYPE=sym + +H1_N1=172.16.1.0/24 +H1_N1_6=2001:db8:16:1::/64 + +H1_N1_IP=172.16.1.1 +R1_N1_IP=172.16.1.253 +R2_N1_IP=172.16.1.254 + +H1_N1_IP6=2001:db8:16:1::1 +R1_N1_IP6=2001:db8:16:1::253 +R2_N1_IP6=2001:db8:16:1::254 + +H2_N2=172.16.2.0/24 +H2_N2_6=2001:db8:16:2::/64 + +H2_N2_IP=172.16.2.2 +R1_N2_IP=172.16.2.253 +R2_N2_IP=172.16.2.254 + +H2_N2_IP6=2001:db8:16:2::2 +R1_N2_IP6=2001:db8:16:2::253 +R2_N2_IP6=2001:db8:16:2::254 + +################################################################################ +# helpers + +log_section() +{ + echo + echo "###########################################################################" + echo "$*" + echo "###########################################################################" + echo +} + +log_test() +{ + local rc=$1 + local expected=$2 + local msg="$3" + + if [ "${rc}" -eq "${expected}" ]; then + printf "TEST: %-60s [ OK ]\n" "${msg}" + nsuccess=$((nsuccess+1)) + else + ret=1 + nfail=$((nfail+1)) + printf "TEST: %-60s [FAIL]\n" "${msg}" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue, 'q' to quit" + read -r a + [ "$a" = "q" ] && exit 1 + fi + fi +} + +run_cmd() +{ + local cmd="$*" + local out + local rc + + if [ "$VERBOSE" = "1" ]; then + echo "COMMAND: $cmd" + fi + + # shellcheck disable=SC2086 + out=$(eval $cmd 2>&1) + rc=$? + if [ "$VERBOSE" = "1" ] && [ -n "$out" ]; then + echo "$out" + fi + + [ "$VERBOSE" = "1" ] && echo + + return $rc +} + +run_cmd_grep() +{ + local grep_pattern="$1" + shift + local cmd="$*" + local out + local rc + + if [ "$VERBOSE" = "1" ]; then + echo "COMMAND: $cmd" + fi + + # shellcheck disable=SC2086 + out=$(eval $cmd 2>&1) + if [ "$VERBOSE" = "1" ] && [ -n "$out" ]; then + echo "$out" + fi + + echo "$out" | grep -q "$grep_pattern" + rc=$? + + [ "$VERBOSE" = "1" ] && echo + + return $rc +} + +################################################################################ +# setup and teardown + +cleanup() +{ + local ns + + for ns in h1 h2 r1 r2; do + ip netns del $ns 2>/dev/null + done +} + +setup_vrf() +{ + local ns=$1 + + ip -netns "${ns}" rule del pref 0 + ip -netns "${ns}" rule add pref 32765 from all lookup local + ip -netns "${ns}" -6 rule del pref 0 + ip -netns "${ns}" -6 rule add pref 32765 from all lookup local +} + +create_vrf() +{ + local ns=$1 + local vrf=$2 + local table=$3 + + ip -netns "${ns}" link add "${vrf}" type vrf table "${table}" + ip -netns "${ns}" link set "${vrf}" up + ip -netns "${ns}" route add vrf "${vrf}" unreachable default metric 8192 + ip -netns "${ns}" -6 route add vrf "${vrf}" unreachable default metric 8192 + + ip -netns "${ns}" addr add 127.0.0.1/8 dev "${vrf}" + ip -netns "${ns}" -6 addr add ::1 dev "${vrf}" nodad +} + +setup_sym() +{ + local ns + + # make sure we are starting with a clean slate + cleanup + + # + # create nodes as namespaces + # + for ns in h1 h2 r1; do + ip netns add $ns + ip -netns $ns link set lo up + + case "${ns}" in + h[12]) ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=0 + ip netns exec $ns sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1 + ;; + r1) ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1 + ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1 + esac + done + + # + # create interconnects + # + ip -netns h1 link add eth0 type veth peer name r1h1 + ip -netns h1 link set r1h1 netns r1 name eth0 up + + ip -netns h2 link add eth0 type veth peer name r1h2 + ip -netns h2 link set r1h2 netns r1 name eth1 up + + # + # h1 + # + ip -netns h1 addr add dev eth0 ${H1_N1_IP}/24 + ip -netns h1 -6 addr add dev eth0 ${H1_N1_IP6}/64 nodad + ip -netns h1 link set eth0 up + + # h1 to h2 via r1 + ip -netns h1 route add ${H2_N2} via ${R1_N1_IP} dev eth0 + ip -netns h1 -6 route add ${H2_N2_6} via "${R1_N1_IP6}" dev eth0 + + # + # h2 + # + ip -netns h2 addr add dev eth0 ${H2_N2_IP}/24 + ip -netns h2 -6 addr add dev eth0 ${H2_N2_IP6}/64 nodad + ip -netns h2 link set eth0 up + + # h2 to h1 via r1 + ip -netns h2 route add default via ${R1_N2_IP} dev eth0 + ip -netns h2 -6 route add default via ${R1_N2_IP6} dev eth0 + + # + # r1 + # + setup_vrf r1 + create_vrf r1 blue 1101 + create_vrf r1 red 1102 + ip -netns r1 link set mtu 1400 dev eth1 + ip -netns r1 link set eth0 vrf blue up + ip -netns r1 link set eth1 vrf red up + ip -netns r1 addr add dev eth0 ${R1_N1_IP}/24 + ip -netns r1 -6 addr add dev eth0 ${R1_N1_IP6}/64 nodad + ip -netns r1 addr add dev eth1 ${R1_N2_IP}/24 + ip -netns r1 -6 addr add dev eth1 ${R1_N2_IP6}/64 nodad + + # Route leak from blue to red + ip -netns r1 route add vrf blue ${H2_N2} dev red + ip -netns r1 -6 route add vrf blue ${H2_N2_6} dev red + + # Route leak from red to blue + ip -netns r1 route add vrf red ${H1_N1} dev blue + ip -netns r1 -6 route add vrf red ${H1_N1_6} dev blue + + + # Wait for ip config to settle + sleep 2 +} + +setup_asym() +{ + local ns + + # make sure we are starting with a clean slate + cleanup + + # + # create nodes as namespaces + # + for ns in h1 h2 r1 r2; do + ip netns add $ns + ip -netns $ns link set lo up + + case "${ns}" in + h[12]) ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=0 + ip netns exec $ns sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1 + ;; + r[12]) ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1 + ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1 + esac + done + + # + # create interconnects + # + ip -netns h1 link add eth0 type veth peer name r1h1 + ip -netns h1 link set r1h1 netns r1 name eth0 up + + ip -netns h1 link add eth1 type veth peer name r2h1 + ip -netns h1 link set r2h1 netns r2 name eth0 up + + ip -netns h2 link add eth0 type veth peer name r1h2 + ip -netns h2 link set r1h2 netns r1 name eth1 up + + ip -netns h2 link add eth1 type veth peer name r2h2 + ip -netns h2 link set r2h2 netns r2 name eth1 up + + # + # h1 + # + ip -netns h1 link add br0 type bridge + ip -netns h1 link set br0 up + ip -netns h1 addr add dev br0 ${H1_N1_IP}/24 + ip -netns h1 -6 addr add dev br0 ${H1_N1_IP6}/64 nodad + ip -netns h1 link set eth0 master br0 up + ip -netns h1 link set eth1 master br0 up + + # h1 to h2 via r1 + ip -netns h1 route add ${H2_N2} via ${R1_N1_IP} dev br0 + ip -netns h1 -6 route add ${H2_N2_6} via "${R1_N1_IP6}" dev br0 + + # + # h2 + # + ip -netns h2 link add br0 type bridge + ip -netns h2 link set br0 up + ip -netns h2 addr add dev br0 ${H2_N2_IP}/24 + ip -netns h2 -6 addr add dev br0 ${H2_N2_IP6}/64 nodad + ip -netns h2 link set eth0 master br0 up + ip -netns h2 link set eth1 master br0 up + + # h2 to h1 via r2 + ip -netns h2 route add default via ${R2_N2_IP} dev br0 + ip -netns h2 -6 route add default via ${R2_N2_IP6} dev br0 + + # + # r1 + # + setup_vrf r1 + create_vrf r1 blue 1101 + create_vrf r1 red 1102 + ip -netns r1 link set mtu 1400 dev eth1 + ip -netns r1 link set eth0 vrf blue up + ip -netns r1 link set eth1 vrf red up + ip -netns r1 addr add dev eth0 ${R1_N1_IP}/24 + ip -netns r1 -6 addr add dev eth0 ${R1_N1_IP6}/64 nodad + ip -netns r1 addr add dev eth1 ${R1_N2_IP}/24 + ip -netns r1 -6 addr add dev eth1 ${R1_N2_IP6}/64 nodad + + # Route leak from blue to red + ip -netns r1 route add vrf blue ${H2_N2} dev red + ip -netns r1 -6 route add vrf blue ${H2_N2_6} dev red + + # No route leak from red to blue + + # + # r2 + # + ip -netns r2 addr add dev eth0 ${R2_N1_IP}/24 + ip -netns r2 -6 addr add dev eth0 ${R2_N1_IP6}/64 nodad + ip -netns r2 addr add dev eth1 ${R2_N2_IP}/24 + ip -netns r2 -6 addr add dev eth1 ${R2_N2_IP6}/64 nodad + + # Wait for ip config to settle + sleep 2 +} + +check_connectivity() +{ + ip netns exec h1 ping -c1 -w1 ${H2_N2_IP} >/dev/null 2>&1 + log_test $? 0 "Basic IPv4 connectivity" + return $? +} + +check_connectivity6() +{ + ip netns exec h1 "${ping6}" -c1 -w1 ${H2_N2_IP6} >/dev/null 2>&1 + log_test $? 0 "Basic IPv6 connectivity" + return $? +} + +check_traceroute() +{ + if [ ! -x "$(command -v traceroute)" ]; then + echo "SKIP: Could not run IPV4 test without traceroute" + return 1 + fi +} + +check_traceroute6() +{ + if [ ! -x "$(command -v traceroute6)" ]; then + echo "SKIP: Could not run IPV6 test without traceroute6" + return 1 + fi +} + +ipv4_traceroute() +{ + local ttype="$1" + + [ "x$ttype" = "x" ] && ttype="$DEFAULT_TTYPE" + + log_section "IPv4 ($ttype route): VRF ICMP error route lookup traceroute" + + check_traceroute || return + + setup_"$ttype" + + check_connectivity || return + + run_cmd_grep "${R1_N1_IP}" ip netns exec h1 traceroute ${H2_N2_IP} + log_test $? 0 "Traceroute reports a hop on r1" +} + +ipv4_traceroute_asym() +{ + ipv4_traceroute asym +} + +ipv6_traceroute() +{ + local ttype="$1" + + [ "x$ttype" = "x" ] && ttype="$DEFAULT_TTYPE" + + log_section "IPv6 ($ttype route): VRF ICMP error route lookup traceroute" + + check_traceroute6 || return + + setup_"$ttype" + + check_connectivity6 || return + + run_cmd_grep "${R1_N1_IP6}" ip netns exec h1 traceroute6 ${H2_N2_IP6} + log_test $? 0 "Traceroute6 reports a hop on r1" +} + +ipv6_traceroute_asym() +{ + ipv6_traceroute asym +} + +ipv4_ping_ttl() +{ + local ttype="$1" + + [ "x$ttype" = "x" ] && ttype="$DEFAULT_TTYPE" + + log_section "IPv4 ($ttype route): VRF ICMP ttl error route lookup ping" + + setup_"$ttype" + + check_connectivity || return + + run_cmd_grep "Time to live exceeded" ip netns exec h1 ping -t1 -c1 -W2 ${H2_N2_IP} + log_test $? 0 "Ping received ICMP ttl exceeded" +} + +ipv4_ping_ttl_asym() +{ + ipv4_ping_ttl asym +} + +ipv4_ping_frag() +{ + local ttype="$1" + + [ "x$ttype" = "x" ] && ttype="$DEFAULT_TTYPE" + + log_section "IPv4 ($ttype route): VRF ICMP fragmentation error route lookup ping" + + setup_"$ttype" + + check_connectivity || return + + run_cmd_grep "Frag needed" ip netns exec h1 ping -s 1450 -Mdo -c1 -W2 ${H2_N2_IP} + log_test $? 0 "Ping received ICMP Frag needed" +} + +ipv4_ping_frag_asym() +{ + ipv4_ping_frag asym +} + +ipv6_ping_ttl() +{ + local ttype="$1" + + [ "x$ttype" = "x" ] && ttype="$DEFAULT_TTYPE" + + log_section "IPv6 ($ttype route): VRF ICMP ttl error route lookup ping" + + setup_"$ttype" + + check_connectivity6 || return + + run_cmd_grep "Time exceeded: Hop limit" ip netns exec h1 "${ping6}" -t1 -c1 -W2 ${H2_N2_IP6} + log_test $? 0 "Ping received ICMP Hop limit" +} + +ipv6_ping_ttl_asym() +{ + ipv6_ping_ttl asym +} + +ipv6_ping_frag() +{ + local ttype="$1" + + [ "x$ttype" = "x" ] && ttype="$DEFAULT_TTYPE" + + log_section "IPv6 ($ttype route): VRF ICMP fragmentation error route lookup ping" + + setup_"$ttype" + + check_connectivity6 || return + + run_cmd_grep "Packet too big" ip netns exec h1 "${ping6}" -s 1450 -Mdo -c1 -W2 ${H2_N2_IP6} + log_test $? 0 "Ping received ICMP Packet too big" +} + +ipv6_ping_frag_asym() +{ + ipv6_ping_frag asym +} + +################################################################################ +# usage + +usage() +{ + cat <<EOF +usage: ${0##*/} OPTS + + -4 Run IPv4 tests only + -6 Run IPv6 tests only + -t TEST Run only TEST + -p Pause on fail + -v verbose mode (show commands and output) +EOF +} + +################################################################################ +# main + +# Some systems don't have a ping6 binary anymore +command -v ping6 > /dev/null 2>&1 && ping6=$(command -v ping6) || ping6=$(command -v ping) + +TESTS_IPV4="ipv4_ping_ttl ipv4_traceroute ipv4_ping_frag ipv4_ping_ttl_asym ipv4_traceroute_asym" +TESTS_IPV6="ipv6_ping_ttl ipv6_traceroute ipv6_ping_frag ipv6_ping_ttl_asym ipv6_traceroute_asym" + +ret=0 +nsuccess=0 +nfail=0 + +while getopts :46t:pvh o +do + case $o in + 4) TESTS=ipv4;; + 6) TESTS=ipv6;; + t) TESTS=$OPTARG;; + p) PAUSE_ON_FAIL=yes;; + v) VERBOSE=1;; + h) usage; exit 0;; + *) usage; exit 1;; + esac +done + +# +# show user test config +# +if [ -z "$TESTS" ]; then + TESTS="$TESTS_IPV4 $TESTS_IPV6" +elif [ "$TESTS" = "ipv4" ]; then + TESTS="$TESTS_IPV4" +elif [ "$TESTS" = "ipv6" ]; then + TESTS="$TESTS_IPV6" +fi + +for t in $TESTS +do + case $t in + ipv4_ping_ttl|ping) ipv4_ping_ttl;;& + ipv4_ping_ttl_asym|ping) ipv4_ping_ttl_asym;;& + ipv4_traceroute|traceroute) ipv4_traceroute;;& + ipv4_traceroute_asym|traceroute) ipv4_traceroute_asym;;& + ipv4_ping_frag|ping) ipv4_ping_frag;;& + + ipv6_ping_ttl|ping) ipv6_ping_ttl;;& + ipv6_ping_ttl_asym|ping) ipv6_ping_ttl_asym;;& + ipv6_traceroute|traceroute) ipv6_traceroute;;& + ipv6_traceroute_asym|traceroute) ipv6_traceroute_asym;;& + ipv6_ping_frag|ping) ipv6_ping_frag;;& + + # setup namespaces and config, but do not run any tests + setup_sym|setup) setup_sym; exit 0;; + setup_asym) setup_asym; exit 0;; + + help) echo "Test names: $TESTS"; exit 0;; + esac +done + +cleanup + +printf "\nTests passed: %3d\n" ${nsuccess} +printf "Tests failed: %3d\n" ${nfail} + +exit $ret diff --git a/tools/testing/selftests/netfilter/.gitignore b/tools/testing/selftests/netfilter/.gitignore new file mode 100644 index 000000000000..8448f74adfec --- /dev/null +++ b/tools/testing/selftests/netfilter/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +nf-queue diff --git a/tools/testing/selftests/netfilter/nf-queue.c b/tools/testing/selftests/netfilter/nf-queue.c index 29c73bce38fa..9e56b9d47037 100644 --- a/tools/testing/selftests/netfilter/nf-queue.c +++ b/tools/testing/selftests/netfilter/nf-queue.c @@ -17,9 +17,12 @@ struct options { bool count_packets; + bool gso_enabled; int verbose; unsigned int queue_num; unsigned int timeout; + uint32_t verdict; + uint32_t delay_ms; }; static unsigned int queue_stats[5]; @@ -27,7 +30,7 @@ static struct options opts; static void help(const char *p) { - printf("Usage: %s [-c|-v [-vv] ] [-t timeout] [-q queue_num]\n", p); + printf("Usage: %s [-c|-v [-vv] ] [-t timeout] [-q queue_num] [-Qdst_queue ] [ -d ms_delay ] [-G]\n", p); } static int parse_attr_cb(const struct nlattr *attr, void *data) @@ -162,7 +165,7 @@ nfq_build_cfg_params(char *buf, uint8_t mode, int range, int queue_num) } static struct nlmsghdr * -nfq_build_verdict(char *buf, int id, int queue_num, int verd) +nfq_build_verdict(char *buf, int id, int queue_num, uint32_t verd) { struct nfqnl_msg_verdict_hdr vh = { .verdict = htonl(verd), @@ -189,9 +192,6 @@ static void print_stats(void) unsigned int last, total; int i; - if (!opts.count_packets) - return; - total = 0; last = queue_stats[0]; @@ -234,7 +234,8 @@ struct mnl_socket *open_queue(void) nlh = nfq_build_cfg_params(buf, NFQNL_COPY_PACKET, 0xFFFF, queue_num); - flags = NFQA_CFG_F_GSO | NFQA_CFG_F_UID_GID; + flags = opts.gso_enabled ? NFQA_CFG_F_GSO : 0; + flags |= NFQA_CFG_F_UID_GID; mnl_attr_put_u32(nlh, NFQA_CFG_FLAGS, htonl(flags)); mnl_attr_put_u32(nlh, NFQA_CFG_MASK, htonl(flags)); @@ -255,6 +256,17 @@ struct mnl_socket *open_queue(void) return nl; } +static void sleep_ms(uint32_t delay) +{ + struct timespec ts = { .tv_sec = delay / 1000 }; + + delay %= 1000; + + ts.tv_nsec = delay * 1000llu * 1000llu; + + nanosleep(&ts, NULL); +} + static int mainloop(void) { unsigned int buflen = 64 * 1024 + MNL_SOCKET_BUFFER_SIZE; @@ -278,7 +290,7 @@ static int mainloop(void) ret = mnl_socket_recvfrom(nl, buf, buflen); if (ret == -1) { - if (errno == ENOBUFS) + if (errno == ENOBUFS || errno == EINTR) continue; if (errno == EAGAIN) { @@ -298,7 +310,10 @@ static int mainloop(void) } id = ret - MNL_CB_OK; - nlh = nfq_build_verdict(buf, id, opts.queue_num, NF_ACCEPT); + if (opts.delay_ms) + sleep_ms(opts.delay_ms); + + nlh = nfq_build_verdict(buf, id, opts.queue_num, opts.verdict); if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) < 0) { perror("mnl_socket_sendto"); exit(EXIT_FAILURE); @@ -314,7 +329,7 @@ static void parse_opts(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "chvt:q:")) != -1) { + while ((c = getopt(argc, argv, "chvt:q:Q:d:G")) != -1) { switch (c) { case 'c': opts.count_packets = true; @@ -328,20 +343,48 @@ static void parse_opts(int argc, char **argv) if (opts.queue_num > 0xffff) opts.queue_num = 0; break; + case 'Q': + opts.verdict = atoi(optarg); + if (opts.verdict > 0xffff) { + fprintf(stderr, "Expected destination queue number\n"); + exit(1); + } + + opts.verdict <<= 16; + opts.verdict |= NF_QUEUE; + break; + case 'd': + opts.delay_ms = atoi(optarg); + if (opts.delay_ms == 0) { + fprintf(stderr, "Expected nonzero delay (in milliseconds)\n"); + exit(1); + } + break; case 't': opts.timeout = atoi(optarg); break; + case 'G': + opts.gso_enabled = false; + break; case 'v': opts.verbose++; break; } } + + if (opts.verdict != NF_ACCEPT && (opts.verdict >> 16 == opts.queue_num)) { + fprintf(stderr, "Cannot use same destination and source queue\n"); + exit(1); + } } int main(int argc, char *argv[]) { int ret; + opts.verdict = NF_ACCEPT; + opts.gso_enabled = true; + parse_opts(argc, argv); ret = mainloop(); diff --git a/tools/testing/selftests/netfilter/nft_flowtable.sh b/tools/testing/selftests/netfilter/nft_flowtable.sh index a47d1d832210..431296c0f91c 100755 --- a/tools/testing/selftests/netfilter/nft_flowtable.sh +++ b/tools/testing/selftests/netfilter/nft_flowtable.sh @@ -11,7 +11,7 @@ # result in fragmentation and/or PMTU discovery. # # You can check with different Orgininator/Link/Responder MTU eg: -# sh nft_flowtable.sh -o1000 -l500 -r100 +# nft_flowtable.sh -o8000 -l1500 -r2000 # @@ -27,8 +27,7 @@ ns2out="" log_netns=$(sysctl -n net.netfilter.nf_log_all_netns) checktool (){ - $1 > /dev/null 2>&1 - if [ $? -ne 0 ];then + if ! $1 > /dev/null 2>&1; then echo "SKIP: Could not $2" exit $ksft_skip fi @@ -87,19 +86,36 @@ omtu=9000 lmtu=1500 rmtu=2000 +usage(){ + echo "nft_flowtable.sh [OPTIONS]" + echo + echo "MTU options" + echo " -o originator" + echo " -l link" + echo " -r responder" + exit 1 +} + while getopts "o:l:r:" o do case $o in o) omtu=$OPTARG;; l) lmtu=$OPTARG;; r) rmtu=$OPTARG;; + *) usage;; esac done -ip -net nsr1 link set veth0 mtu $omtu +if ! ip -net nsr1 link set veth0 mtu $omtu; then + exit 1 +fi + ip -net ns1 link set eth0 mtu $omtu -ip -net nsr2 link set veth1 mtu $rmtu +if ! ip -net nsr2 link set veth1 mtu $rmtu; then + exit 1 +fi + ip -net ns2 link set eth0 mtu $rmtu # transfer-net between nsr1 and nsr2. @@ -120,7 +136,10 @@ for i in 1 2; do ip -net ns$i route add default via 10.0.$i.1 ip -net ns$i addr add dead:$i::99/64 dev eth0 ip -net ns$i route add default via dead:$i::1 - ip netns exec ns$i sysctl net.ipv4.tcp_no_metrics_save=1 > /dev/null + if ! ip netns exec ns$i sysctl net.ipv4.tcp_no_metrics_save=1 > /dev/null; then + echo "ERROR: Check Originator/Responder values (problem during address addition)" + exit 1 + fi # don't set ip DF bit for first two tests ip netns exec ns$i sysctl net.ipv4.ip_no_pmtu_disc=1 > /dev/null @@ -178,15 +197,13 @@ if [ $? -ne 0 ]; then fi # test basic connectivity -ip netns exec ns1 ping -c 1 -q 10.0.2.99 > /dev/null -if [ $? -ne 0 ];then +if ! ip netns exec ns1 ping -c 1 -q 10.0.2.99 > /dev/null; then echo "ERROR: ns1 cannot reach ns2" 1>&2 bash exit 1 fi -ip netns exec ns2 ping -c 1 -q 10.0.1.99 > /dev/null -if [ $? -ne 0 ];then +if ! ip netns exec ns2 ping -c 1 -q 10.0.1.99 > /dev/null; then echo "ERROR: ns2 cannot reach ns1" 1>&2 exit 1 fi @@ -203,7 +220,6 @@ ns2out=$(mktemp) make_file() { name=$1 - who=$2 SIZE=$((RANDOM % (1024 * 8))) TSIZE=$((SIZE * 1024)) @@ -222,8 +238,7 @@ check_transfer() out=$2 what=$3 - cmp "$in" "$out" > /dev/null 2>&1 - if [ $? -ne 0 ] ;then + if ! cmp "$in" "$out" > /dev/null 2>&1; then echo "FAIL: file mismatch for $what" 1>&2 ls -l "$in" ls -l "$out" @@ -260,13 +275,11 @@ test_tcp_forwarding_ip() wait - check_transfer "$ns1in" "$ns2out" "ns1 -> ns2" - if [ $? -ne 0 ];then + if ! check_transfer "$ns1in" "$ns2out" "ns1 -> ns2"; then lret=1 fi - check_transfer "$ns2in" "$ns1out" "ns1 <- ns2" - if [ $? -ne 0 ];then + if ! check_transfer "$ns2in" "$ns1out" "ns1 <- ns2"; then lret=1 fi @@ -295,13 +308,12 @@ test_tcp_forwarding_nat() return $lret } -make_file "$ns1in" "ns1" -make_file "$ns2in" "ns2" +make_file "$ns1in" +make_file "$ns2in" # First test: # No PMTU discovery, nsr1 is expected to fragment packets from ns1 to ns2 as needed. -test_tcp_forwarding ns1 ns2 -if [ $? -eq 0 ] ;then +if test_tcp_forwarding ns1 ns2; then echo "PASS: flow offloaded for ns1/ns2" else echo "FAIL: flow offload for ns1/ns2:" 1>&2 @@ -332,9 +344,7 @@ table ip nat { } EOF -test_tcp_forwarding_nat ns1 ns2 - -if [ $? -eq 0 ] ;then +if test_tcp_forwarding_nat ns1 ns2; then echo "PASS: flow offloaded for ns1/ns2 with NAT" else echo "FAIL: flow offload for ns1/ns2 with NAT" 1>&2 @@ -346,8 +356,7 @@ fi # Same as second test, but with PMTU discovery enabled. handle=$(ip netns exec nsr1 nft -a list table inet filter | grep something-to-grep-for | cut -d \# -f 2) -ip netns exec nsr1 nft delete rule inet filter forward $handle -if [ $? -ne 0 ] ;then +if ! ip netns exec nsr1 nft delete rule inet filter forward $handle; then echo "FAIL: Could not delete large-packet accept rule" exit 1 fi @@ -355,8 +364,7 @@ fi ip netns exec ns1 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null ip netns exec ns2 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null -test_tcp_forwarding_nat ns1 ns2 -if [ $? -eq 0 ] ;then +if test_tcp_forwarding_nat ns1 ns2; then echo "PASS: flow offloaded for ns1/ns2 with NAT and pmtu discovery" else echo "FAIL: flow offload for ns1/ns2 with NAT and pmtu discovery" 1>&2 @@ -402,8 +410,7 @@ ip -net ns2 route del 192.168.10.1 via 10.0.2.1 ip -net ns2 route add default via 10.0.2.1 ip -net ns2 route add default via dead:2::1 -test_tcp_forwarding ns1 ns2 -if [ $? -eq 0 ] ;then +if test_tcp_forwarding ns1 ns2; then echo "PASS: ipsec tunnel mode for ns1/ns2" else echo "FAIL: ipsec tunnel mode for ns1/ns2" diff --git a/tools/testing/selftests/netfilter/nft_meta.sh b/tools/testing/selftests/netfilter/nft_meta.sh index d250b84dd5bc..087f0e6e71ce 100755 --- a/tools/testing/selftests/netfilter/nft_meta.sh +++ b/tools/testing/selftests/netfilter/nft_meta.sh @@ -7,8 +7,7 @@ ksft_skip=4 sfx=$(mktemp -u "XXXXXXXX") ns0="ns0-$sfx" -nft --version > /dev/null 2>&1 -if [ $? -ne 0 ];then +if ! nft --version > /dev/null 2>&1; then echo "SKIP: Could not run test without nft tool" exit $ksft_skip fi @@ -24,6 +23,8 @@ ip -net "$ns0" addr add 127.0.0.1 dev lo trap cleanup EXIT +currentyear=$(date +%G) +lastyear=$((currentyear-1)) ip netns exec "$ns0" nft -f /dev/stdin <<EOF table inet filter { counter iifcount {} @@ -33,6 +34,9 @@ table inet filter { counter infproto4count {} counter il4protocounter {} counter imarkcounter {} + counter icpu0counter {} + counter ilastyearcounter {} + counter icurrentyearcounter {} counter oifcount {} counter oifnamecount {} @@ -54,6 +58,9 @@ table inet filter { meta nfproto ipv4 counter name "infproto4count" meta l4proto icmp counter name "il4protocounter" meta mark 42 counter name "imarkcounter" + meta cpu 0 counter name "icpu0counter" + meta time "$lastyear-01-01" - "$lastyear-12-31" counter name ilastyearcounter + meta time "$currentyear-01-01" - "$currentyear-12-31" counter name icurrentyearcounter } chain output { @@ -84,11 +91,10 @@ check_one_counter() local want="packets $2" local verbose="$3" - cnt=$(ip netns exec "$ns0" nft list counter inet filter $cname | grep -q "$want") - if [ $? -ne 0 ];then + if ! ip netns exec "$ns0" nft list counter inet filter $cname | grep -q "$want"; then echo "FAIL: $cname, want \"$want\", got" ret=1 - ip netns exec "$ns0" nft list counter inet filter $counter + ip netns exec "$ns0" nft list counter inet filter $cname fi } @@ -100,8 +106,7 @@ check_lo_counters() for counter in iifcount iifnamecount iifgroupcount iiftypecount infproto4count \ oifcount oifnamecount oifgroupcount oiftypecount onfproto4count \ - il4protocounter \ - ol4protocounter \ + il4protocounter icurrentyearcounter ol4protocounter \ ; do check_one_counter "$counter" "$want" "$verbose" done @@ -116,9 +121,22 @@ check_one_counter oskuidcounter "1" true check_one_counter oskgidcounter "1" true check_one_counter imarkcounter "1" true check_one_counter omarkcounter "1" true +check_one_counter ilastyearcounter "0" true if [ $ret -eq 0 ];then echo "OK: nftables meta iif/oif counters at expected values" +else + exit $ret +fi + +#First CPU execution and counter +taskset -p 01 $$ > /dev/null +ip netns exec "$ns0" nft reset counters > /dev/null +ip netns exec "$ns0" ping -q -c 1 127.0.0.1 > /dev/null +check_one_counter icpu0counter "2" true + +if [ $ret -eq 0 ];then + echo "OK: nftables meta cpu counter at expected values" fi exit $ret diff --git a/tools/testing/selftests/netfilter/nft_queue.sh b/tools/testing/selftests/netfilter/nft_queue.sh index 6898448b4266..3d202b90b33d 100755 --- a/tools/testing/selftests/netfilter/nft_queue.sh +++ b/tools/testing/selftests/netfilter/nft_queue.sh @@ -12,6 +12,7 @@ sfx=$(mktemp -u "XXXXXXXX") ns1="ns1-$sfx" ns2="ns2-$sfx" nsrouter="nsrouter-$sfx" +timeout=4 cleanup() { @@ -20,6 +21,7 @@ cleanup() ip netns del ${nsrouter} rm -f "$TMPFILE0" rm -f "$TMPFILE1" + rm -f "$TMPFILE2" "$TMPFILE3" } nft --version > /dev/null 2>&1 @@ -42,6 +44,8 @@ fi TMPFILE0=$(mktemp) TMPFILE1=$(mktemp) +TMPFILE2=$(mktemp) +TMPFILE3=$(mktemp) trap cleanup EXIT ip netns add ${ns1} @@ -83,7 +87,7 @@ load_ruleset() { local name=$1 local prio=$2 -ip netns exec ${nsrouter} nft -f - <<EOF +ip netns exec ${nsrouter} nft -f /dev/stdin <<EOF table inet $name { chain nfq { ip protocol icmp queue bypass @@ -118,7 +122,7 @@ EOF load_counter_ruleset() { local prio=$1 -ip netns exec ${nsrouter} nft -f - <<EOF +ip netns exec ${nsrouter} nft -f /dev/stdin <<EOF table inet countrules { chain pre { type filter hook prerouting priority $prio; policy accept; @@ -175,7 +179,7 @@ test_ping_router() { test_queue_blackhole() { local proto=$1 -ip netns exec ${nsrouter} nft -f - <<EOF +ip netns exec ${nsrouter} nft -f /dev/stdin <<EOF table $proto blackh { chain forward { type filter hook forward priority 0; policy accept; @@ -184,10 +188,10 @@ table $proto blackh { } EOF if [ $proto = "ip" ] ;then - ip netns exec ${ns1} ping -c 1 -q 10.0.2.99 > /dev/null + ip netns exec ${ns1} ping -W 2 -c 1 -q 10.0.2.99 > /dev/null lret=$? elif [ $proto = "ip6" ]; then - ip netns exec ${ns1} ping -c 1 -q dead:2::99 > /dev/null + ip netns exec ${ns1} ping -W 2 -c 1 -q dead:2::99 > /dev/null lret=$? else lret=111 @@ -214,8 +218,8 @@ test_queue() local last="" # spawn nf-queue listeners - ip netns exec ${nsrouter} ./nf-queue -c -q 0 -t 3 > "$TMPFILE0" & - ip netns exec ${nsrouter} ./nf-queue -c -q 1 -t 3 > "$TMPFILE1" & + ip netns exec ${nsrouter} ./nf-queue -c -q 0 -t $timeout > "$TMPFILE0" & + ip netns exec ${nsrouter} ./nf-queue -c -q 1 -t $timeout > "$TMPFILE1" & sleep 1 test_ping ret=$? @@ -250,11 +254,11 @@ test_queue() test_tcp_forward() { - ip netns exec ${nsrouter} ./nf-queue -q 2 -t 10 & + ip netns exec ${nsrouter} ./nf-queue -q 2 -t $timeout & local nfqpid=$! tmpfile=$(mktemp) || exit 1 - dd conv=sparse status=none if=/dev/zero bs=1M count=100 of=$tmpfile + dd conv=sparse status=none if=/dev/zero bs=1M count=200 of=$tmpfile ip netns exec ${ns2} nc -w 5 -l -p 12345 <"$tmpfile" >/dev/null & local rpid=$! @@ -270,15 +274,13 @@ test_tcp_forward() test_tcp_localhost() { - tc -net "${nsrouter}" qdisc add dev lo root netem loss random 1% - tmpfile=$(mktemp) || exit 1 - dd conv=sparse status=none if=/dev/zero bs=1M count=900 of=$tmpfile + dd conv=sparse status=none if=/dev/zero bs=1M count=200 of=$tmpfile ip netns exec ${nsrouter} nc -w 5 -l -p 12345 <"$tmpfile" >/dev/null & local rpid=$! - ip netns exec ${nsrouter} ./nf-queue -q 3 -t 30 & + ip netns exec ${nsrouter} ./nf-queue -q 3 -t $timeout & local nfqpid=$! sleep 1 @@ -287,6 +289,47 @@ test_tcp_localhost() wait $rpid [ $? -eq 0 ] && echo "PASS: tcp via loopback" + wait 2>/dev/null +} + +test_tcp_localhost_requeue() +{ +ip netns exec ${nsrouter} nft -f /dev/stdin <<EOF +flush ruleset +table inet filter { + chain output { + type filter hook output priority 0; policy accept; + tcp dport 12345 limit rate 1/second burst 1 packets counter queue num 0 + } + chain post { + type filter hook postrouting priority 0; policy accept; + tcp dport 12345 limit rate 1/second burst 1 packets counter queue num 0 + } +} +EOF + tmpfile=$(mktemp) || exit 1 + dd conv=sparse status=none if=/dev/zero bs=1M count=200 of=$tmpfile + ip netns exec ${nsrouter} nc -w 5 -l -p 12345 <"$tmpfile" >/dev/null & + local rpid=$! + + ip netns exec ${nsrouter} ./nf-queue -c -q 1 -t $timeout > "$TMPFILE2" & + + # nfqueue 1 will be called via output hook. But this time, + # re-queue the packet to nfqueue program on queue 2. + ip netns exec ${nsrouter} ./nf-queue -G -d 150 -c -q 0 -Q 1 -t $timeout > "$TMPFILE3" & + + sleep 1 + ip netns exec ${nsrouter} nc -w 5 127.0.0.1 12345 <"$tmpfile" > /dev/null + rm -f "$tmpfile" + + wait + + if ! diff -u "$TMPFILE2" "$TMPFILE3" ; then + echo "FAIL: lost packets during requeue?!" 1>&2 + return + fi + + echo "PASS: tcp via loopback and re-queueing" } ip netns exec ${nsrouter} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null @@ -328,5 +371,6 @@ test_queue 20 test_tcp_forward test_tcp_localhost +test_tcp_localhost_requeue exit $ret diff --git a/tools/testing/selftests/pidfd/pidfd.h b/tools/testing/selftests/pidfd/pidfd.h index a2c80914e3dc..01f8d3c0cf2c 100644 --- a/tools/testing/selftests/pidfd/pidfd.h +++ b/tools/testing/selftests/pidfd/pidfd.h @@ -46,6 +46,10 @@ #define __NR_pidfd_getfd -1 #endif +#ifndef PIDFD_NONBLOCK +#define PIDFD_NONBLOCK O_NONBLOCK +#endif + /* * The kernel reserves 300 pids via RESERVED_PIDS in kernel/pid.c * That means, when it wraps around any pid < 300 will be skipped. diff --git a/tools/testing/selftests/pidfd/pidfd_setns_test.c b/tools/testing/selftests/pidfd/pidfd_setns_test.c index 7dca1aa4672d..1f085b922c6e 100644 --- a/tools/testing/selftests/pidfd/pidfd_setns_test.c +++ b/tools/testing/selftests/pidfd/pidfd_setns_test.c @@ -75,7 +75,7 @@ static int sys_waitid(int which, pid_t pid, int options) pid_t create_child(int *pidfd, unsigned flags) { - struct clone_args args = { + struct __clone_args args = { .flags = CLONE_PIDFD | flags, .exit_signal = SIGCHLD, .pidfd = ptr_to_u64(pidfd), diff --git a/tools/testing/selftests/pidfd/pidfd_wait.c b/tools/testing/selftests/pidfd/pidfd_wait.c index 7079f8eef792..be2943f072f6 100644 --- a/tools/testing/selftests/pidfd/pidfd_wait.c +++ b/tools/testing/selftests/pidfd/pidfd_wait.c @@ -17,10 +17,15 @@ #include <unistd.h> #include "pidfd.h" -#include "../kselftest.h" +#include "../kselftest_harness.h" #define ptr_to_u64(ptr) ((__u64)((uintptr_t)(ptr))) +/* Attempt to de-conflict with the selftests tree. */ +#ifndef SKIP +#define SKIP(s, ...) XFAIL(s, ##__VA_ARGS__) +#endif + static pid_t sys_clone3(struct clone_args *args) { return syscall(__NR_clone3, args, sizeof(struct clone_args)); @@ -32,9 +37,8 @@ static int sys_waitid(int which, pid_t pid, siginfo_t *info, int options, return syscall(__NR_waitid, which, pid, info, options, ru); } -static int test_pidfd_wait_simple(void) +TEST(wait_simple) { - const char *test_name = "pidfd wait simple"; int pidfd = -1, status = 0; pid_t parent_tid = -1; struct clone_args args = { @@ -50,76 +54,40 @@ static int test_pidfd_wait_simple(void) }; pidfd = open("/proc/self", O_DIRECTORY | O_RDONLY | O_CLOEXEC); - if (pidfd < 0) - ksft_exit_fail_msg("%s test: failed to open /proc/self %s\n", - test_name, strerror(errno)); + ASSERT_GE(pidfd, 0); pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL); - if (pid == 0) - ksft_exit_fail_msg( - "%s test: succeeded to wait on invalid pidfd %s\n", - test_name, strerror(errno)); - close(pidfd); + ASSERT_NE(pid, 0); + EXPECT_EQ(close(pidfd), 0); pidfd = -1; pidfd = open("/dev/null", O_RDONLY | O_CLOEXEC); - if (pidfd == 0) - ksft_exit_fail_msg("%s test: failed to open /dev/null %s\n", - test_name, strerror(errno)); + ASSERT_GE(pidfd, 0); pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL); - if (pid == 0) - ksft_exit_fail_msg( - "%s test: succeeded to wait on invalid pidfd %s\n", - test_name, strerror(errno)); - close(pidfd); + ASSERT_NE(pid, 0); + EXPECT_EQ(close(pidfd), 0); pidfd = -1; pid = sys_clone3(&args); - if (pid < 0) - ksft_exit_fail_msg("%s test: failed to create new process %s\n", - test_name, strerror(errno)); + ASSERT_GE(pid, 0); if (pid == 0) exit(EXIT_SUCCESS); pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL); - if (pid < 0) - ksft_exit_fail_msg( - "%s test: failed to wait on process with pid %d and pidfd %d: %s\n", - test_name, parent_tid, pidfd, strerror(errno)); - - if (!WIFEXITED(info.si_status) || WEXITSTATUS(info.si_status)) - ksft_exit_fail_msg( - "%s test: unexpected status received after waiting on process with pid %d and pidfd %d: %s\n", - test_name, parent_tid, pidfd, strerror(errno)); - close(pidfd); - - if (info.si_signo != SIGCHLD) - ksft_exit_fail_msg( - "%s test: unexpected si_signo value %d received after waiting on process with pid %d and pidfd %d: %s\n", - test_name, info.si_signo, parent_tid, pidfd, - strerror(errno)); - - if (info.si_code != CLD_EXITED) - ksft_exit_fail_msg( - "%s test: unexpected si_code value %d received after waiting on process with pid %d and pidfd %d: %s\n", - test_name, info.si_code, parent_tid, pidfd, - strerror(errno)); - - if (info.si_pid != parent_tid) - ksft_exit_fail_msg( - "%s test: unexpected si_pid value %d received after waiting on process with pid %d and pidfd %d: %s\n", - test_name, info.si_pid, parent_tid, pidfd, - strerror(errno)); - - ksft_test_result_pass("%s test: Passed\n", test_name); - return 0; + ASSERT_GE(pid, 0); + ASSERT_EQ(WIFEXITED(info.si_status), true); + ASSERT_EQ(WEXITSTATUS(info.si_status), 0); + EXPECT_EQ(close(pidfd), 0); + + ASSERT_EQ(info.si_signo, SIGCHLD); + ASSERT_EQ(info.si_code, CLD_EXITED); + ASSERT_EQ(info.si_pid, parent_tid); } -static int test_pidfd_wait_states(void) +TEST(wait_states) { - const char *test_name = "pidfd wait states"; int pidfd = -1, status = 0; pid_t parent_tid = -1; struct clone_args args = { @@ -135,9 +103,7 @@ static int test_pidfd_wait_states(void) }; pid = sys_clone3(&args); - if (pid < 0) - ksft_exit_fail_msg("%s test: failed to create new process %s\n", - test_name, strerror(errno)); + ASSERT_GE(pid, 0); if (pid == 0) { kill(getpid(), SIGSTOP); @@ -145,127 +111,115 @@ static int test_pidfd_wait_states(void) exit(EXIT_SUCCESS); } - ret = sys_waitid(P_PIDFD, pidfd, &info, WSTOPPED, NULL); - if (ret < 0) - ksft_exit_fail_msg( - "%s test: failed to wait on WSTOPPED process with pid %d and pidfd %d: %s\n", - test_name, parent_tid, pidfd, strerror(errno)); - - if (info.si_signo != SIGCHLD) - ksft_exit_fail_msg( - "%s test: unexpected si_signo value %d received after waiting on process with pid %d and pidfd %d: %s\n", - test_name, info.si_signo, parent_tid, pidfd, - strerror(errno)); - - if (info.si_code != CLD_STOPPED) - ksft_exit_fail_msg( - "%s test: unexpected si_code value %d received after waiting on process with pid %d and pidfd %d: %s\n", - test_name, info.si_code, parent_tid, pidfd, - strerror(errno)); - - if (info.si_pid != parent_tid) - ksft_exit_fail_msg( - "%s test: unexpected si_pid value %d received after waiting on process with pid %d and pidfd %d: %s\n", - test_name, info.si_pid, parent_tid, pidfd, - strerror(errno)); - - ret = sys_pidfd_send_signal(pidfd, SIGCONT, NULL, 0); - if (ret < 0) - ksft_exit_fail_msg( - "%s test: failed to send signal to process with pid %d and pidfd %d: %s\n", - test_name, parent_tid, pidfd, strerror(errno)); - - ret = sys_waitid(P_PIDFD, pidfd, &info, WCONTINUED, NULL); - if (ret < 0) - ksft_exit_fail_msg( - "%s test: failed to wait WCONTINUED on process with pid %d and pidfd %d: %s\n", - test_name, parent_tid, pidfd, strerror(errno)); - - if (info.si_signo != SIGCHLD) - ksft_exit_fail_msg( - "%s test: unexpected si_signo value %d received after waiting on process with pid %d and pidfd %d: %s\n", - test_name, info.si_signo, parent_tid, pidfd, - strerror(errno)); - - if (info.si_code != CLD_CONTINUED) - ksft_exit_fail_msg( - "%s test: unexpected si_code value %d received after waiting on process with pid %d and pidfd %d: %s\n", - test_name, info.si_code, parent_tid, pidfd, - strerror(errno)); - - if (info.si_pid != parent_tid) - ksft_exit_fail_msg( - "%s test: unexpected si_pid value %d received after waiting on process with pid %d and pidfd %d: %s\n", - test_name, info.si_pid, parent_tid, pidfd, - strerror(errno)); - - ret = sys_waitid(P_PIDFD, pidfd, &info, WUNTRACED, NULL); - if (ret < 0) - ksft_exit_fail_msg( - "%s test: failed to wait on WUNTRACED process with pid %d and pidfd %d: %s\n", - test_name, parent_tid, pidfd, strerror(errno)); - - if (info.si_signo != SIGCHLD) - ksft_exit_fail_msg( - "%s test: unexpected si_signo value %d received after waiting on process with pid %d and pidfd %d: %s\n", - test_name, info.si_signo, parent_tid, pidfd, - strerror(errno)); - - if (info.si_code != CLD_STOPPED) - ksft_exit_fail_msg( - "%s test: unexpected si_code value %d received after waiting on process with pid %d and pidfd %d: %s\n", - test_name, info.si_code, parent_tid, pidfd, - strerror(errno)); - - if (info.si_pid != parent_tid) - ksft_exit_fail_msg( - "%s test: unexpected si_pid value %d received after waiting on process with pid %d and pidfd %d: %s\n", - test_name, info.si_pid, parent_tid, pidfd, - strerror(errno)); - - ret = sys_pidfd_send_signal(pidfd, SIGKILL, NULL, 0); - if (ret < 0) - ksft_exit_fail_msg( - "%s test: failed to send SIGKILL to process with pid %d and pidfd %d: %s\n", - test_name, parent_tid, pidfd, strerror(errno)); + ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WSTOPPED, NULL), 0); + ASSERT_EQ(info.si_signo, SIGCHLD); + ASSERT_EQ(info.si_code, CLD_STOPPED); + ASSERT_EQ(info.si_pid, parent_tid); - ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL); - if (ret < 0) - ksft_exit_fail_msg( - "%s test: failed to wait on WEXITED process with pid %d and pidfd %d: %s\n", - test_name, parent_tid, pidfd, strerror(errno)); - - if (info.si_signo != SIGCHLD) - ksft_exit_fail_msg( - "%s test: unexpected si_signo value %d received after waiting on process with pid %d and pidfd %d: %s\n", - test_name, info.si_signo, parent_tid, pidfd, - strerror(errno)); - - if (info.si_code != CLD_KILLED) - ksft_exit_fail_msg( - "%s test: unexpected si_code value %d received after waiting on process with pid %d and pidfd %d: %s\n", - test_name, info.si_code, parent_tid, pidfd, - strerror(errno)); - - if (info.si_pid != parent_tid) - ksft_exit_fail_msg( - "%s test: unexpected si_pid value %d received after waiting on process with pid %d and pidfd %d: %s\n", - test_name, info.si_pid, parent_tid, pidfd, - strerror(errno)); - - close(pidfd); - - ksft_test_result_pass("%s test: Passed\n", test_name); - return 0; + ASSERT_EQ(sys_pidfd_send_signal(pidfd, SIGCONT, NULL, 0), 0); + + ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WCONTINUED, NULL), 0); + ASSERT_EQ(info.si_signo, SIGCHLD); + ASSERT_EQ(info.si_code, CLD_CONTINUED); + ASSERT_EQ(info.si_pid, parent_tid); + + ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WUNTRACED, NULL), 0); + ASSERT_EQ(info.si_signo, SIGCHLD); + ASSERT_EQ(info.si_code, CLD_STOPPED); + ASSERT_EQ(info.si_pid, parent_tid); + + ASSERT_EQ(sys_pidfd_send_signal(pidfd, SIGKILL, NULL, 0), 0); + + ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL), 0); + ASSERT_EQ(info.si_signo, SIGCHLD); + ASSERT_EQ(info.si_code, CLD_KILLED); + ASSERT_EQ(info.si_pid, parent_tid); + + EXPECT_EQ(close(pidfd), 0); } -int main(int argc, char **argv) +TEST(wait_nonblock) { - ksft_print_header(); - ksft_set_plan(2); + int pidfd, status = 0; + unsigned int flags = 0; + pid_t parent_tid = -1; + struct clone_args args = { + .parent_tid = ptr_to_u64(&parent_tid), + .flags = CLONE_PARENT_SETTID, + .exit_signal = SIGCHLD, + }; + int ret; + pid_t pid; + siginfo_t info = { + .si_signo = 0, + }; + + /* + * Callers need to see ECHILD with non-blocking pidfds when no child + * processes exists. + */ + pidfd = sys_pidfd_open(getpid(), PIDFD_NONBLOCK); + EXPECT_GE(pidfd, 0) { + /* pidfd_open() doesn't support PIDFD_NONBLOCK. */ + ASSERT_EQ(errno, EINVAL); + SKIP(return, "Skipping PIDFD_NONBLOCK test"); + } + + ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL); + ASSERT_LT(ret, 0); + ASSERT_EQ(errno, ECHILD); + EXPECT_EQ(close(pidfd), 0); + + pid = sys_clone3(&args); + ASSERT_GE(pid, 0); + + if (pid == 0) { + kill(getpid(), SIGSTOP); + exit(EXIT_SUCCESS); + } - test_pidfd_wait_simple(); - test_pidfd_wait_states(); + pidfd = sys_pidfd_open(pid, PIDFD_NONBLOCK); + EXPECT_GE(pidfd, 0) { + /* pidfd_open() doesn't support PIDFD_NONBLOCK. */ + ASSERT_EQ(errno, EINVAL); + SKIP(return, "Skipping PIDFD_NONBLOCK test"); + } + + flags = fcntl(pidfd, F_GETFL, 0); + ASSERT_GT(flags, 0); + ASSERT_GT((flags & O_NONBLOCK), 0); + + /* + * Callers need to see EAGAIN/EWOULDBLOCK with non-blocking pidfd when + * child processes exist but none have exited. + */ + ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL); + ASSERT_LT(ret, 0); + ASSERT_EQ(errno, EAGAIN); + + /* + * Callers need to continue seeing 0 with non-blocking pidfd and + * WNOHANG raised explicitly when child processes exist but none have + * exited. + */ + ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED | WNOHANG, NULL); + ASSERT_EQ(ret, 0); - return ksft_exit_pass(); + ASSERT_EQ(fcntl(pidfd, F_SETFL, (flags & ~O_NONBLOCK)), 0); + + ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WSTOPPED, NULL), 0); + ASSERT_EQ(info.si_signo, SIGCHLD); + ASSERT_EQ(info.si_code, CLD_STOPPED); + ASSERT_EQ(info.si_pid, parent_tid); + + ASSERT_EQ(sys_pidfd_send_signal(pidfd, SIGCONT, NULL, 0), 0); + + ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL), 0); + ASSERT_EQ(info.si_signo, SIGCHLD); + ASSERT_EQ(info.si_code, CLD_EXITED); + ASSERT_EQ(info.si_pid, parent_tid); + + EXPECT_EQ(close(pidfd), 0); } + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/powerpc/alignment/alignment_handler.c b/tools/testing/selftests/powerpc/alignment/alignment_handler.c index 55ef15184057..2a0503bc7e49 100644 --- a/tools/testing/selftests/powerpc/alignment/alignment_handler.c +++ b/tools/testing/selftests/powerpc/alignment/alignment_handler.c @@ -55,8 +55,6 @@ #include <setjmp.h> #include <signal.h> -#include <asm/cputable.h> - #include "utils.h" #include "instructions.h" @@ -64,6 +62,7 @@ int bufsize; int debug; int testing; volatile int gotsig; +bool prefixes_enabled; char *cipath = "/dev/fb0"; long cioffset; @@ -77,7 +76,12 @@ void sighandler(int sig, siginfo_t *info, void *ctx) } gotsig = sig; #ifdef __powerpc64__ - ucp->uc_mcontext.gp_regs[PT_NIP] += 4; + if (prefixes_enabled) { + u32 inst = *(u32 *)ucp->uc_mcontext.gp_regs[PT_NIP]; + ucp->uc_mcontext.gp_regs[PT_NIP] += ((inst >> 26 == 1) ? 8 : 4); + } else { + ucp->uc_mcontext.gp_regs[PT_NIP] += 4; + } #else ucp->uc_mcontext.uc_regs->gregs[PT_NIP] += 4; #endif @@ -648,6 +652,8 @@ int main(int argc, char *argv[]) exit(1); } + prefixes_enabled = have_hwcap2(PPC_FEATURE2_ARCH_3_1); + rc |= test_harness(test_alignment_handler_vsx_206, "test_alignment_handler_vsx_206"); rc |= test_harness(test_alignment_handler_vsx_207, diff --git a/tools/testing/selftests/powerpc/benchmarks/context_switch.c b/tools/testing/selftests/powerpc/benchmarks/context_switch.c index d50cc05df495..96554e2794d1 100644 --- a/tools/testing/selftests/powerpc/benchmarks/context_switch.c +++ b/tools/testing/selftests/powerpc/benchmarks/context_switch.c @@ -481,6 +481,12 @@ int main(int argc, char *argv[]) else printf("futex"); + if (!have_hwcap(PPC_FEATURE_HAS_ALTIVEC)) + touch_altivec = 0; + + if (!have_hwcap(PPC_FEATURE_HAS_VSX)) + touch_vector = 0; + printf(" on cpus %d/%d touching FP:%s altivec:%s vector:%s vdso:%s\n", cpu1, cpu2, touch_fp ? "yes" : "no", touch_altivec ? "yes" : "no", touch_vector ? "yes" : "no", touch_vdso ? "yes" : "no"); diff --git a/tools/testing/selftests/powerpc/copyloops/.gitignore b/tools/testing/selftests/powerpc/copyloops/.gitignore index ddaf140b8255..994b11af765c 100644 --- a/tools/testing/selftests/powerpc/copyloops/.gitignore +++ b/tools/testing/selftests/powerpc/copyloops/.gitignore @@ -12,4 +12,4 @@ memcpy_p7_t1 copyuser_64_exc_t0 copyuser_64_exc_t1 copyuser_64_exc_t2 -memcpy_mcsafe_64 +copy_mc_64 diff --git a/tools/testing/selftests/powerpc/copyloops/Makefile b/tools/testing/selftests/powerpc/copyloops/Makefile index 0917983a1c78..3095b1f1c02b 100644 --- a/tools/testing/selftests/powerpc/copyloops/Makefile +++ b/tools/testing/selftests/powerpc/copyloops/Makefile @@ -12,7 +12,7 @@ ASFLAGS = $(CFLAGS) -Wa,-mpower4 TEST_GEN_PROGS := copyuser_64_t0 copyuser_64_t1 copyuser_64_t2 \ copyuser_p7_t0 copyuser_p7_t1 \ memcpy_64_t0 memcpy_64_t1 memcpy_64_t2 \ - memcpy_p7_t0 memcpy_p7_t1 memcpy_mcsafe_64 \ + memcpy_p7_t0 memcpy_p7_t1 copy_mc_64 \ copyuser_64_exc_t0 copyuser_64_exc_t1 copyuser_64_exc_t2 EXTRA_SOURCES := validate.c ../harness.c stubs.S @@ -45,9 +45,9 @@ $(OUTPUT)/memcpy_p7_t%: memcpy_power7.S $(EXTRA_SOURCES) -D SELFTEST_CASE=$(subst memcpy_p7_t,,$(notdir $@)) \ -o $@ $^ -$(OUTPUT)/memcpy_mcsafe_64: memcpy_mcsafe_64.S $(EXTRA_SOURCES) +$(OUTPUT)/copy_mc_64: copy_mc_64.S $(EXTRA_SOURCES) $(CC) $(CPPFLAGS) $(CFLAGS) \ - -D COPY_LOOP=test_memcpy_mcsafe \ + -D COPY_LOOP=test_copy_mc_generic \ -o $@ $^ $(OUTPUT)/copyuser_64_exc_t%: copyuser_64.S exc_validate.c ../harness.c \ diff --git a/tools/testing/selftests/powerpc/copyloops/copy_mc_64.S b/tools/testing/selftests/powerpc/copyloops/copy_mc_64.S new file mode 120000 index 000000000000..dcbe06d500fb --- /dev/null +++ b/tools/testing/selftests/powerpc/copyloops/copy_mc_64.S @@ -0,0 +1 @@ +../../../../../arch/powerpc/lib/copy_mc_64.S
\ No newline at end of file diff --git a/tools/testing/selftests/powerpc/copyloops/memcpy_mcsafe_64.S b/tools/testing/selftests/powerpc/copyloops/memcpy_mcsafe_64.S deleted file mode 120000 index f0feef3062f6..000000000000 --- a/tools/testing/selftests/powerpc/copyloops/memcpy_mcsafe_64.S +++ /dev/null @@ -1 +0,0 @@ -../../../../../arch/powerpc/lib/memcpy_mcsafe_64.S
\ No newline at end of file diff --git a/tools/testing/selftests/powerpc/dscr/Makefile b/tools/testing/selftests/powerpc/dscr/Makefile index cfa6eedcb66c..845db6273a1b 100644 --- a/tools/testing/selftests/powerpc/dscr/Makefile +++ b/tools/testing/selftests/powerpc/dscr/Makefile @@ -10,4 +10,4 @@ include ../../lib.mk $(OUTPUT)/dscr_default_test: LDLIBS += -lpthread -$(TEST_GEN_PROGS): ../harness.c +$(TEST_GEN_PROGS): ../harness.c ../utils.c diff --git a/tools/testing/selftests/powerpc/dscr/dscr_default_test.c b/tools/testing/selftests/powerpc/dscr/dscr_default_test.c index 288a4e2ad156..e76611e608af 100644 --- a/tools/testing/selftests/powerpc/dscr/dscr_default_test.c +++ b/tools/testing/selftests/powerpc/dscr/dscr_default_test.c @@ -63,6 +63,8 @@ int dscr_default(void) unsigned long i, *status[THREADS]; unsigned long orig_dscr_default; + SKIP_IF(!have_hwcap2(PPC_FEATURE2_DSCR)); + orig_dscr_default = get_default_dscr(); /* Initial DSCR default */ diff --git a/tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c b/tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c index aefcd8d8759b..32fcf2b324b1 100644 --- a/tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c +++ b/tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c @@ -21,6 +21,8 @@ int dscr_explicit(void) { unsigned long i, dscr = 0; + SKIP_IF(!have_hwcap2(PPC_FEATURE2_DSCR)); + srand(getpid()); set_dscr(dscr); diff --git a/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c b/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c index 7c1cb46397c6..c6a81b2d6b91 100644 --- a/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c +++ b/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c @@ -44,6 +44,8 @@ int dscr_inherit_exec(void) unsigned long i, dscr = 0; pid_t pid; + SKIP_IF(!have_hwcap2(PPC_FEATURE2_DSCR)); + for (i = 0; i < COUNT; i++) { dscr++; if (dscr > DSCR_MAX) diff --git a/tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c b/tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c index 04297a69ab59..f9dfd3d3c2d5 100644 --- a/tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c +++ b/tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c @@ -22,6 +22,8 @@ int dscr_inherit(void) unsigned long i, dscr = 0; pid_t pid; + SKIP_IF(!have_hwcap2(PPC_FEATURE2_DSCR)); + srand(getpid()); set_dscr(dscr); diff --git a/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c index 02f6b4efde14..fbbdffdb2e5d 100644 --- a/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c +++ b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c @@ -77,6 +77,8 @@ int dscr_sysfs(void) unsigned long orig_dscr_default; int i, j; + SKIP_IF(!have_hwcap2(PPC_FEATURE2_DSCR)); + orig_dscr_default = get_default_dscr(); for (i = 0; i < COUNT; i++) { for (j = 0; j < DSCR_MAX; j++) { diff --git a/tools/testing/selftests/powerpc/dscr/dscr_sysfs_thread_test.c b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_thread_test.c index 37be2c25f277..191ed126f118 100644 --- a/tools/testing/selftests/powerpc/dscr/dscr_sysfs_thread_test.c +++ b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_thread_test.c @@ -56,6 +56,8 @@ int dscr_sysfs_thread(void) unsigned long orig_dscr_default; int i, j; + SKIP_IF(!have_hwcap2(PPC_FEATURE2_DSCR)); + orig_dscr_default = get_default_dscr(); for (i = 0; i < COUNT; i++) { for (j = 0; j < DSCR_MAX; j++) { diff --git a/tools/testing/selftests/powerpc/dscr/dscr_user_test.c b/tools/testing/selftests/powerpc/dscr/dscr_user_test.c index eaf785d11eed..e09072446dd3 100644 --- a/tools/testing/selftests/powerpc/dscr/dscr_user_test.c +++ b/tools/testing/selftests/powerpc/dscr/dscr_user_test.c @@ -36,6 +36,8 @@ int dscr_user(void) { int i; + SKIP_IF(!have_hwcap2(PPC_FEATURE2_DSCR)); + check_dscr(""); for (i = 0; i < COUNT; i++) { diff --git a/tools/testing/selftests/powerpc/eeh/eeh-basic.sh b/tools/testing/selftests/powerpc/eeh/eeh-basic.sh index 8a8d0f456946..0d783e1065c8 100755 --- a/tools/testing/selftests/powerpc/eeh/eeh-basic.sh +++ b/tools/testing/selftests/powerpc/eeh/eeh-basic.sh @@ -1,17 +1,19 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0-only +KSELFTESTS_SKIP=4 + . ./eeh-functions.sh if ! eeh_supported ; then echo "EEH not supported on this system, skipping" - exit 0; + exit $KSELFTESTS_SKIP; fi if [ ! -e "/sys/kernel/debug/powerpc/eeh_dev_check" ] && \ [ ! -e "/sys/kernel/debug/powerpc/eeh_dev_break" ] ; then echo "debugfs EEH testing files are missing. Is debugfs mounted?" - exit 1; + exit $KSELFTESTS_SKIP; fi pre_lspci=`mktemp` @@ -84,4 +86,5 @@ echo "$failed devices failed to recover ($dev_count tested)" lspci | diff -u $pre_lspci - rm -f $pre_lspci -exit $failed +test "$failed" == 0 +exit $? diff --git a/tools/testing/selftests/powerpc/include/utils.h b/tools/testing/selftests/powerpc/include/utils.h index 71d2924f5b8b..052b5a775dc2 100644 --- a/tools/testing/selftests/powerpc/include/utils.h +++ b/tools/testing/selftests/powerpc/include/utils.h @@ -12,6 +12,7 @@ #include <stdbool.h> #include <linux/auxvec.h> #include <linux/perf_event.h> +#include <asm/cputable.h> #include "reg.h" /* Avoid headaches with PRI?64 - just use %ll? always */ @@ -35,7 +36,6 @@ int pick_online_cpu(void); int read_debugfs_file(char *debugfs_file, int *result); int write_debugfs_file(char *debugfs_file, int result); int read_sysfs_file(char *debugfs_file, char *result, size_t result_size); -void set_dscr(unsigned long val); int perf_event_open_counter(unsigned int type, unsigned long config, int group_fd); int perf_event_enable(int fd); diff --git a/tools/testing/selftests/powerpc/mm/.gitignore b/tools/testing/selftests/powerpc/mm/.gitignore index 91c775c23c66..aac4a59f9e28 100644 --- a/tools/testing/selftests/powerpc/mm/.gitignore +++ b/tools/testing/selftests/powerpc/mm/.gitignore @@ -2,6 +2,7 @@ hugetlb_vs_thp_test subpage_prot tempfile +prot_sao segv_errors wild_bctr large_vm_fork_separation diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile index 250ce172e0da..defe488d6bf1 100644 --- a/tools/testing/selftests/powerpc/mm/Makefile +++ b/tools/testing/selftests/powerpc/mm/Makefile @@ -2,7 +2,7 @@ noarg: $(MAKE) -C ../ -TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot segv_errors wild_bctr \ +TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors wild_bctr \ large_vm_fork_separation bad_accesses pkey_exec_prot \ pkey_siginfo stack_expansion_signal stack_expansion_ldst @@ -14,6 +14,8 @@ include ../../lib.mk $(TEST_GEN_PROGS): ../harness.c ../utils.c +$(OUTPUT)/prot_sao: ../utils.c + $(OUTPUT)/wild_bctr: CFLAGS += -m64 $(OUTPUT)/large_vm_fork_separation: CFLAGS += -m64 $(OUTPUT)/bad_accesses: CFLAGS += -m64 diff --git a/tools/testing/selftests/powerpc/mm/bad_accesses.c b/tools/testing/selftests/powerpc/mm/bad_accesses.c index a864ed7e2008..fd747b2ffcfc 100644 --- a/tools/testing/selftests/powerpc/mm/bad_accesses.c +++ b/tools/testing/selftests/powerpc/mm/bad_accesses.c @@ -139,5 +139,6 @@ static int test(void) int main(void) { + test_harness_set_timeout(300); return test_harness(test, "bad_accesses"); } diff --git a/tools/testing/selftests/powerpc/mm/prot_sao.c b/tools/testing/selftests/powerpc/mm/prot_sao.c new file mode 100644 index 000000000000..30b71b1d78d5 --- /dev/null +++ b/tools/testing/selftests/powerpc/mm/prot_sao.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2016, Michael Ellerman, IBM Corp. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/mman.h> +#include <unistd.h> + +#include <asm/cputable.h> + +#include "utils.h" + +#define SIZE (64 * 1024) + +int test_prot_sao(void) +{ + char *p; + + /* + * SAO was introduced in 2.06 and removed in 3.1. It's disabled in + * guests/LPARs by default, so also skip if we are running in a guest. + */ + SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06) || + have_hwcap2(PPC_FEATURE2_ARCH_3_1) || + access("/proc/device-tree/rtas/ibm,hypertas-functions", F_OK) == 0); + + /* + * Ensure we can ask for PROT_SAO. + * We can't really verify that it does the right thing, but at least we + * confirm the kernel will accept it. + */ + p = mmap(NULL, SIZE, PROT_READ | PROT_WRITE | PROT_SAO, + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); + FAIL_IF(p == MAP_FAILED); + + /* Write to the mapping, to at least cause a fault */ + memset(p, 0xaa, SIZE); + + return 0; +} + +int main(void) +{ + return test_harness(test_prot_sao, "prot-sao"); +} diff --git a/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c b/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c index 2980abca31e0..2070a1e2b3a5 100644 --- a/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c +++ b/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c @@ -9,7 +9,6 @@ #include <stdbool.h> #include <string.h> #include <sys/prctl.h> -#include <asm/cputable.h> #include "event.h" #include "utils.h" diff --git a/tools/testing/selftests/powerpc/pmu/l3_bank_test.c b/tools/testing/selftests/powerpc/pmu/l3_bank_test.c index a96d512a18c4..a5dfa9bf3b9f 100644 --- a/tools/testing/selftests/powerpc/pmu/l3_bank_test.c +++ b/tools/testing/selftests/powerpc/pmu/l3_bank_test.c @@ -20,6 +20,9 @@ static int l3_bank_test(void) char *p; int i; + // The L3 bank logic is only used on Power8 or later + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07)); + p = malloc(MALLOC_SIZE); FAIL_IF(!p); diff --git a/tools/testing/selftests/powerpc/pmu/per_event_excludes.c b/tools/testing/selftests/powerpc/pmu/per_event_excludes.c index 2d37942bf72b..ad32a09a6540 100644 --- a/tools/testing/selftests/powerpc/pmu/per_event_excludes.c +++ b/tools/testing/selftests/powerpc/pmu/per_event_excludes.c @@ -12,8 +12,6 @@ #include <string.h> #include <sys/prctl.h> -#include <asm/cputable.h> - #include "event.h" #include "lib.h" #include "utils.h" diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c index fc477dfe86a2..2e0d86e0687e 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c @@ -20,6 +20,8 @@ #include <signal.h> #include <sys/types.h> #include <sys/wait.h> +#include <sys/syscall.h> +#include <linux/limits.h> #include "ptrace.h" #define SPRN_PVR 0x11F @@ -44,6 +46,7 @@ struct gstruct { }; static volatile struct gstruct gstruct __attribute__((aligned(512))); +static volatile char cwd[PATH_MAX] __attribute__((aligned(8))); static void get_dbginfo(pid_t child_pid, struct ppc_debug_info *dbginfo) { @@ -138,6 +141,9 @@ static void test_workload(void) write_var(len); } + /* PTRACE_SET_DEBUGREG, Kernel Access Userspace test */ + syscall(__NR_getcwd, &cwd, PATH_MAX); + /* PPC_PTRACE_SETHWDEBUG, MODE_EXACT, WO test */ write_var(1); @@ -150,6 +156,9 @@ static void test_workload(void) else read_var(1); + /* PPC_PTRACE_SETHWDEBUG, MODE_EXACT, Kernel Access Userspace test */ + syscall(__NR_getcwd, &cwd, PATH_MAX); + /* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW ALIGNED, WO test */ gstruct.a[rand() % A_LEN] = 'a'; @@ -293,6 +302,24 @@ static int test_set_debugreg(pid_t child_pid) return 0; } +static int test_set_debugreg_kernel_userspace(pid_t child_pid) +{ + unsigned long wp_addr = (unsigned long)cwd; + char *name = "PTRACE_SET_DEBUGREG"; + + /* PTRACE_SET_DEBUGREG, Kernel Access Userspace test */ + wp_addr &= ~0x7UL; + wp_addr |= (1Ul << DABR_READ_SHIFT); + wp_addr |= (1UL << DABR_WRITE_SHIFT); + wp_addr |= (1UL << DABR_TRANSLATION_SHIFT); + ptrace_set_debugreg(child_pid, wp_addr); + ptrace(PTRACE_CONT, child_pid, NULL, 0); + check_success(child_pid, name, "Kernel Access Userspace", wp_addr, 8); + + ptrace_set_debugreg(child_pid, 0); + return 0; +} + static void get_ppc_hw_breakpoint(struct ppc_hw_breakpoint *info, int type, unsigned long addr, int len) { @@ -338,6 +365,22 @@ static void test_sethwdebug_exact(pid_t child_pid) ptrace_delhwdebug(child_pid, wh); } +static void test_sethwdebug_exact_kernel_userspace(pid_t child_pid) +{ + struct ppc_hw_breakpoint info; + unsigned long wp_addr = (unsigned long)&cwd; + char *name = "PPC_PTRACE_SETHWDEBUG, MODE_EXACT"; + int len = 1; /* hardcoded in kernel */ + int wh; + + /* PPC_PTRACE_SETHWDEBUG, MODE_EXACT, Kernel Access Userspace test */ + get_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_WRITE, wp_addr, 0); + wh = ptrace_sethwdebug(child_pid, &info); + ptrace(PTRACE_CONT, child_pid, NULL, 0); + check_success(child_pid, name, "Kernel Access Userspace", wp_addr, len); + ptrace_delhwdebug(child_pid, wh); +} + static void test_sethwdebug_range_aligned(pid_t child_pid) { struct ppc_hw_breakpoint info; @@ -452,9 +495,10 @@ static void run_tests(pid_t child_pid, struct ppc_debug_info *dbginfo, bool dawr) { test_set_debugreg(child_pid); + test_set_debugreg_kernel_userspace(child_pid); + test_sethwdebug_exact(child_pid); + test_sethwdebug_exact_kernel_userspace(child_pid); if (dbginfo->features & PPC_DEBUG_FEATURE_DATA_BP_RANGE) { - test_sethwdebug_exact(child_pid); - test_sethwdebug_range_aligned(child_pid); if (dawr || is_8xx) { test_sethwdebug_range_unaligned(child_pid); diff --git a/tools/testing/selftests/powerpc/security/rfi_flush.c b/tools/testing/selftests/powerpc/security/rfi_flush.c index 0a7d0afb26b8..93a65bd1f231 100644 --- a/tools/testing/selftests/powerpc/security/rfi_flush.c +++ b/tools/testing/selftests/powerpc/security/rfi_flush.c @@ -10,6 +10,7 @@ #include <stdint.h> #include <malloc.h> #include <unistd.h> +#include <signal.h> #include <stdlib.h> #include <string.h> #include <stdio.h> @@ -41,6 +42,40 @@ static void syscall_loop(char *p, unsigned long iterations, } } +static void sigill_handler(int signr, siginfo_t *info, void *unused) +{ + static int warned = 0; + ucontext_t *ctx = (ucontext_t *)unused; + unsigned long *pc = &UCONTEXT_NIA(ctx); + + /* mtspr 3,RS to check for move to DSCR below */ + if ((*((unsigned int *)*pc) & 0xfc1fffff) == 0x7c0303a6) { + if (!warned++) + printf("WARNING: Skipping over dscr setup. Consider running 'ppc64_cpu --dscr=1' manually.\n"); + *pc += 4; + } else { + printf("SIGILL at %p\n", pc); + abort(); + } +} + +static void set_dscr(unsigned long val) +{ + static int init = 0; + struct sigaction sa; + + if (!init) { + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = sigill_handler; + sa.sa_flags = SA_SIGINFO; + if (sigaction(SIGILL, &sa, NULL)) + perror("sigill_handler"); + init = 1; + } + + asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR)); +} + int rfi_flush_test(void) { char *p; @@ -54,6 +89,9 @@ int rfi_flush_test(void) SKIP_IF(geteuid() != 0); + // The PMU event we use only works on Power7 or later + SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06)); + if (read_debugfs_file("powerpc/rfi_flush", &rfi_flush_org)) { perror("Unable to read powerpc/rfi_flush debugfs file"); SKIP_IF(1); diff --git a/tools/testing/selftests/powerpc/security/spectre_v2.c b/tools/testing/selftests/powerpc/security/spectre_v2.c index c8d82b784102..adc2b7294e5f 100644 --- a/tools/testing/selftests/powerpc/security/spectre_v2.c +++ b/tools/testing/selftests/powerpc/security/spectre_v2.c @@ -134,6 +134,9 @@ int spectre_v2_test(void) s64 miss_percent; bool is_p9; + // The PMU events we use only work on Power8 or later + SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07)); + state = get_sysfs_state(); if (state == UNKNOWN) { printf("Error: couldn't determine spectre_v2 mitigation state?\n"); diff --git a/tools/testing/selftests/powerpc/stringloops/memcmp.c b/tools/testing/selftests/powerpc/stringloops/memcmp.c index 979df3d98368..cb2f18855c8d 100644 --- a/tools/testing/selftests/powerpc/stringloops/memcmp.c +++ b/tools/testing/selftests/powerpc/stringloops/memcmp.c @@ -4,7 +4,7 @@ #include <string.h> #include <sys/mman.h> #include <time.h> -#include <asm/cputable.h> + #include "utils.h" #define SIZE 256 diff --git a/tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S b/tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S index cc4930467235..7887f78cf072 100644 --- a/tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S +++ b/tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S @@ -3,9 +3,13 @@ .data .balign 8 -message: +success_message: .ascii "success: switch_endian_test\n\0" + .balign 8 +failure_message: + .ascii "failure: switch_endian_test\n\0" + .section ".toc" .balign 8 pattern: @@ -64,6 +68,9 @@ FUNC_START(_start) li r0, __NR_switch_endian sc + tdi 0, 0, 0x48 // b +8 if the endian was switched + b .Lfail // exit if endian didn't switch + #include "check-reversed.S" /* Flip back, r0 already has the switch syscall number */ @@ -71,12 +78,20 @@ FUNC_START(_start) #include "check.S" + ld r4, success_message@got(%r2) + li r5, 28 // strlen(success_message) + li r14, 0 // exit status +.Lout: li r0, __NR_write li r3, 1 /* stdout */ - ld r4, message@got(%r2) - li r5, 28 /* strlen(message3) */ sc li r0, __NR_exit - li r3, 0 + mr r3, r14 sc b . + +.Lfail: + ld r4, failure_message@got(%r2) + li r5, 28 // strlen(failure_message) + li r14, 1 + b .Lout diff --git a/tools/testing/selftests/powerpc/syscalls/Makefile b/tools/testing/selftests/powerpc/syscalls/Makefile index 01b22775ca87..b63f8459c704 100644 --- a/tools/testing/selftests/powerpc/syscalls/Makefile +++ b/tools/testing/selftests/powerpc/syscalls/Makefile @@ -1,5 +1,5 @@ # SPDX-License-Identifier: GPL-2.0-only -TEST_GEN_PROGS := ipc_unmuxed +TEST_GEN_PROGS := ipc_unmuxed rtas_filter CFLAGS += -I../../../../../usr/include diff --git a/tools/testing/selftests/powerpc/syscalls/rtas_filter.c b/tools/testing/selftests/powerpc/syscalls/rtas_filter.c new file mode 100644 index 000000000000..03b487f18d00 --- /dev/null +++ b/tools/testing/selftests/powerpc/syscalls/rtas_filter.c @@ -0,0 +1,285 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Copyright 2005-2020 IBM Corporation. + * + * Includes code from librtas (https://github.com/ibm-power-utilities/librtas/) + */ + +#include <byteswap.h> +#include <stdint.h> +#include <inttypes.h> +#include <stdio.h> +#include <string.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <unistd.h> +#include <stdarg.h> +#include <stdlib.h> +#include <fcntl.h> +#include <errno.h> +#include "utils.h" + +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#define cpu_to_be32(x) bswap_32(x) +#define be32_to_cpu(x) bswap_32(x) +#else +#define cpu_to_be32(x) (x) +#define be32_to_cpu(x) (x) +#endif + +#define RTAS_IO_ASSERT -1098 /* Unexpected I/O Error */ +#define RTAS_UNKNOWN_OP -1099 /* No Firmware Implementation of Function */ +#define BLOCK_SIZE 4096 +#define PAGE_SIZE 4096 +#define MAX_PAGES 64 + +static const char *ofdt_rtas_path = "/proc/device-tree/rtas"; + +typedef __be32 uint32_t; +struct rtas_args { + __be32 token; + __be32 nargs; + __be32 nret; + __be32 args[16]; + __be32 *rets; /* Pointer to return values in args[]. */ +}; + +struct region { + uint64_t addr; + uint32_t size; + struct region *next; +}; + +int read_entire_file(int fd, char **buf, size_t *len) +{ + size_t buf_size = 0; + size_t off = 0; + int rc; + + *buf = NULL; + do { + buf_size += BLOCK_SIZE; + if (*buf == NULL) + *buf = malloc(buf_size); + else + *buf = realloc(*buf, buf_size); + + if (*buf == NULL) + return -ENOMEM; + + rc = read(fd, *buf + off, BLOCK_SIZE); + if (rc < 0) + return -EIO; + + off += rc; + } while (rc == BLOCK_SIZE); + + if (len) + *len = off; + + return 0; +} + +static int open_prop_file(const char *prop_path, const char *prop_name, int *fd) +{ + char *path; + int len; + + /* allocate enough for two string, a slash and trailing NULL */ + len = strlen(prop_path) + strlen(prop_name) + 1 + 1; + path = malloc(len); + if (path == NULL) + return -ENOMEM; + + snprintf(path, len, "%s/%s", prop_path, prop_name); + + *fd = open(path, O_RDONLY); + free(path); + if (*fd < 0) + return -errno; + + return 0; +} + +static int get_property(const char *prop_path, const char *prop_name, + char **prop_val, size_t *prop_len) +{ + int rc, fd; + + rc = open_prop_file(prop_path, prop_name, &fd); + if (rc) + return rc; + + rc = read_entire_file(fd, prop_val, prop_len); + close(fd); + + return rc; +} + +int rtas_token(const char *call_name) +{ + char *prop_buf = NULL; + size_t len; + int rc; + + rc = get_property(ofdt_rtas_path, call_name, &prop_buf, &len); + if (rc < 0) { + rc = RTAS_UNKNOWN_OP; + goto err; + } + + rc = be32_to_cpu(*(int *)prop_buf); + +err: + free(prop_buf); + return rc; +} + +static int read_kregion_bounds(struct region *kregion) +{ + char *buf; + int fd; + int rc; + + fd = open("/proc/ppc64/rtas/rmo_buffer", O_RDONLY); + if (fd < 0) { + printf("Could not open rmo_buffer file\n"); + return RTAS_IO_ASSERT; + } + + rc = read_entire_file(fd, &buf, NULL); + close(fd); + if (rc) { + free(buf); + return rc; + } + + sscanf(buf, "%" SCNx64 " %x", &kregion->addr, &kregion->size); + free(buf); + + if (!(kregion->size && kregion->addr) || + (kregion->size > (PAGE_SIZE * MAX_PAGES))) { + printf("Unexpected kregion bounds\n"); + return RTAS_IO_ASSERT; + } + + return 0; +} + +static int rtas_call(const char *name, int nargs, + int nrets, ...) +{ + struct rtas_args args; + __be32 *rets[16]; + int i, rc, token; + va_list ap; + + va_start(ap, nrets); + + token = rtas_token(name); + if (token == RTAS_UNKNOWN_OP) { + // We don't care if the call doesn't exist + printf("call '%s' not available, skipping...", name); + rc = RTAS_UNKNOWN_OP; + goto err; + } + + args.token = cpu_to_be32(token); + args.nargs = cpu_to_be32(nargs); + args.nret = cpu_to_be32(nrets); + + for (i = 0; i < nargs; i++) + args.args[i] = (__be32) va_arg(ap, unsigned long); + + for (i = 0; i < nrets; i++) + rets[i] = (__be32 *) va_arg(ap, unsigned long); + + rc = syscall(__NR_rtas, &args); + if (rc) { + rc = -errno; + goto err; + } + + if (nrets) { + *(rets[0]) = be32_to_cpu(args.args[nargs]); + + for (i = 1; i < nrets; i++) { + *(rets[i]) = args.args[nargs + i]; + } + } + +err: + va_end(ap); + return rc; +} + +static int test(void) +{ + struct region rmo_region; + uint32_t rmo_start; + uint32_t rmo_end; + __be32 rets[1]; + int rc; + + // Test a legitimate harmless call + // Expected: call succeeds + printf("Test a permitted call, no parameters... "); + rc = rtas_call("get-time-of-day", 0, 1, rets); + printf("rc: %d\n", rc); + FAIL_IF(rc != 0 && rc != RTAS_UNKNOWN_OP); + + // Test a prohibited call + // Expected: call returns -EINVAL + printf("Test a prohibited call... "); + rc = rtas_call("nvram-fetch", 0, 1, rets); + printf("rc: %d\n", rc); + FAIL_IF(rc != -EINVAL && rc != RTAS_UNKNOWN_OP); + + // Get RMO + rc = read_kregion_bounds(&rmo_region); + if (rc) { + printf("Couldn't read RMO region bounds, skipping remaining cases\n"); + return 0; + } + rmo_start = rmo_region.addr; + rmo_end = rmo_start + rmo_region.size - 1; + printf("RMO range: %08x - %08x\n", rmo_start, rmo_end); + + // Test a permitted call, user-supplied size, buffer inside RMO + // Expected: call succeeds + printf("Test a permitted call, user-supplied size, buffer inside RMO... "); + rc = rtas_call("ibm,get-system-parameter", 3, 1, 0, cpu_to_be32(rmo_start), + cpu_to_be32(rmo_end - rmo_start + 1), rets); + printf("rc: %d\n", rc); + FAIL_IF(rc != 0 && rc != RTAS_UNKNOWN_OP); + + // Test a permitted call, user-supplied size, buffer start outside RMO + // Expected: call returns -EINVAL + printf("Test a permitted call, user-supplied size, buffer start outside RMO... "); + rc = rtas_call("ibm,get-system-parameter", 3, 1, 0, cpu_to_be32(rmo_end + 1), + cpu_to_be32(4000), rets); + printf("rc: %d\n", rc); + FAIL_IF(rc != -EINVAL && rc != RTAS_UNKNOWN_OP); + + // Test a permitted call, user-supplied size, buffer end outside RMO + // Expected: call returns -EINVAL + printf("Test a permitted call, user-supplied size, buffer end outside RMO... "); + rc = rtas_call("ibm,get-system-parameter", 3, 1, 0, cpu_to_be32(rmo_start), + cpu_to_be32(rmo_end - rmo_start + 2), rets); + printf("rc: %d\n", rc); + FAIL_IF(rc != -EINVAL && rc != RTAS_UNKNOWN_OP); + + // Test a permitted call, fixed size, buffer end outside RMO + // Expected: call returns -EINVAL + printf("Test a permitted call, fixed size, buffer end outside RMO... "); + rc = rtas_call("ibm,configure-connector", 2, 1, cpu_to_be32(rmo_end - 4000), 0, rets); + printf("rc: %d\n", rc); + FAIL_IF(rc != -EINVAL && rc != RTAS_UNKNOWN_OP); + + return 0; +} + +int main(int argc, char *argv[]) +{ + return test_harness(test, "rtas_filter"); +} diff --git a/tools/testing/selftests/powerpc/tm/tm-poison.c b/tools/testing/selftests/powerpc/tm/tm-poison.c index 977558497c16..29e5f26af7b9 100644 --- a/tools/testing/selftests/powerpc/tm/tm-poison.c +++ b/tools/testing/selftests/powerpc/tm/tm-poison.c @@ -26,7 +26,7 @@ int tm_poison_test(void) { - int pid; + int cpu, pid; cpu_set_t cpuset; uint64_t poison = 0xdeadbeefc0dec0fe; uint64_t unknown = 0; @@ -35,10 +35,13 @@ int tm_poison_test(void) SKIP_IF(!have_htm()); - /* Attach both Child and Parent to CPU 0 */ + cpu = pick_online_cpu(); + FAIL_IF(cpu < 0); + + // Attach both Child and Parent to the same CPU CPU_ZERO(&cpuset); - CPU_SET(0, &cpuset); - sched_setaffinity(0, sizeof(cpuset), &cpuset); + CPU_SET(cpu, &cpuset); + FAIL_IF(sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0); pid = fork(); if (!pid) { diff --git a/tools/testing/selftests/powerpc/tm/tm-tmspr.c b/tools/testing/selftests/powerpc/tm/tm-tmspr.c index 17becf3dcee4..794d574db784 100644 --- a/tools/testing/selftests/powerpc/tm/tm-tmspr.c +++ b/tools/testing/selftests/powerpc/tm/tm-tmspr.c @@ -33,19 +33,13 @@ #include "utils.h" #include "tm.h" -int num_loops = 10000; +int num_loops = 1000000; int passed = 1; void tfiar_tfhar(void *in) { - int i, cpu; unsigned long tfhar, tfhar_rd, tfiar, tfiar_rd; - cpu_set_t cpuset; - - CPU_ZERO(&cpuset); - cpu = (unsigned long)in >> 1; - CPU_SET(cpu, &cpuset); - sched_setaffinity(0, sizeof(cpuset), &cpuset); + int i; /* TFIAR: Last bit has to be high so userspace can read register */ tfiar = ((unsigned long)in) + 1; diff --git a/tools/testing/selftests/powerpc/tm/tm-trap.c b/tools/testing/selftests/powerpc/tm/tm-trap.c index 601f0c1d450d..c75960af8018 100644 --- a/tools/testing/selftests/powerpc/tm/tm-trap.c +++ b/tools/testing/selftests/powerpc/tm/tm-trap.c @@ -247,8 +247,7 @@ void *pong(void *not_used) int tm_trap_test(void) { uint16_t k = 1; - - int rc; + int cpu, rc; pthread_attr_t attr; cpu_set_t cpuset; @@ -267,9 +266,12 @@ int tm_trap_test(void) usr1_sa.sa_sigaction = usr1_signal_handler; sigaction(SIGUSR1, &usr1_sa, NULL); - /* Set only CPU 0 in the mask. Both threads will be bound to cpu 0. */ + cpu = pick_online_cpu(); + FAIL_IF(cpu < 0); + + // Set only one CPU in the mask. Both threads will be bound to that CPU. CPU_ZERO(&cpuset); - CPU_SET(0, &cpuset); + CPU_SET(cpu, &cpuset); /* Init pthread attribute */ rc = pthread_attr_init(&attr); diff --git a/tools/testing/selftests/powerpc/tm/tm-unavailable.c b/tools/testing/selftests/powerpc/tm/tm-unavailable.c index 2ca2fccb0a3e..a1348a5f721a 100644 --- a/tools/testing/selftests/powerpc/tm/tm-unavailable.c +++ b/tools/testing/selftests/powerpc/tm/tm-unavailable.c @@ -338,16 +338,19 @@ void test_fp_vec(int fp, int vec, pthread_attr_t *attr) int tm_unavailable_test(void) { - int rc, exception; /* FP = 0, VEC = 1, VSX = 2 */ + int cpu, rc, exception; /* FP = 0, VEC = 1, VSX = 2 */ pthread_t t1; pthread_attr_t attr; cpu_set_t cpuset; SKIP_IF(!have_htm()); - /* Set only CPU 0 in the mask. Both threads will be bound to CPU 0. */ + cpu = pick_online_cpu(); + FAIL_IF(cpu < 0); + + // Set only one CPU in the mask. Both threads will be bound to that CPU. CPU_ZERO(&cpuset); - CPU_SET(0, &cpuset); + CPU_SET(cpu, &cpuset); /* Init pthread attribute. */ rc = pthread_attr_init(&attr); diff --git a/tools/testing/selftests/powerpc/tm/tm.h b/tools/testing/selftests/powerpc/tm/tm.h index c402464b038f..c5a1e5c163fc 100644 --- a/tools/testing/selftests/powerpc/tm/tm.h +++ b/tools/testing/selftests/powerpc/tm/tm.h @@ -6,9 +6,8 @@ #ifndef _SELFTESTS_POWERPC_TM_TM_H #define _SELFTESTS_POWERPC_TM_TM_H -#include <asm/tm.h> -#include <asm/cputable.h> #include <stdbool.h> +#include <asm/tm.h> #include "utils.h" diff --git a/tools/testing/selftests/powerpc/utils.c b/tools/testing/selftests/powerpc/utils.c index 18b6a773d5c7..1f36ee1a909a 100644 --- a/tools/testing/selftests/powerpc/utils.c +++ b/tools/testing/selftests/powerpc/utils.c @@ -10,7 +10,6 @@ #include <fcntl.h> #include <link.h> #include <sched.h> -#include <signal.h> #include <stdio.h> #include <stdlib.h> #include <string.h> @@ -273,40 +272,6 @@ int perf_event_reset(int fd) return 0; } -static void sigill_handler(int signr, siginfo_t *info, void *unused) -{ - static int warned = 0; - ucontext_t *ctx = (ucontext_t *)unused; - unsigned long *pc = &UCONTEXT_NIA(ctx); - - /* mtspr 3,RS to check for move to DSCR below */ - if ((*((unsigned int *)*pc) & 0xfc1fffff) == 0x7c0303a6) { - if (!warned++) - printf("WARNING: Skipping over dscr setup. Consider running 'ppc64_cpu --dscr=1' manually.\n"); - *pc += 4; - } else { - printf("SIGILL at %p\n", pc); - abort(); - } -} - -void set_dscr(unsigned long val) -{ - static int init = 0; - struct sigaction sa; - - if (!init) { - memset(&sa, 0, sizeof(sa)); - sa.sa_sigaction = sigill_handler; - sa.sa_flags = SA_SIGINFO; - if (sigaction(SIGILL, &sa, NULL)) - perror("sigill_handler"); - init = 1; - } - - asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR)); -} - int using_hash_mmu(bool *using_hash) { char line[128]; @@ -318,7 +283,9 @@ int using_hash_mmu(bool *using_hash) rc = 0; while (fgets(line, sizeof(line), f) != NULL) { - if (strcmp(line, "MMU : Hash\n") == 0) { + if (!strcmp(line, "MMU : Hash\n") || + !strcmp(line, "platform : Cell\n") || + !strcmp(line, "platform : PowerMac\n")) { *using_hash = true; goto out; } diff --git a/tools/testing/selftests/ptrace/.gitignore b/tools/testing/selftests/ptrace/.gitignore index 7bebf9534a86..792318aaa30c 100644 --- a/tools/testing/selftests/ptrace/.gitignore +++ b/tools/testing/selftests/ptrace/.gitignore @@ -1,3 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only get_syscall_info peeksiginfo +vmaccess diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf-ftrace.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuscale-ftrace.sh index 7d3c2be66c64..d4bec538086d 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf-ftrace.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuscale-ftrace.sh @@ -1,12 +1,12 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0+ # -# Analyze a given results directory for rcuperf performance measurements, +# Analyze a given results directory for rcuscale performance measurements, # looking for ftrace data. Exits with 0 if data was found, analyzed, and -# printed. Intended to be invoked from kvm-recheck-rcuperf.sh after +# printed. Intended to be invoked from kvm-recheck-rcuscale.sh after # argument checking. # -# Usage: kvm-recheck-rcuperf-ftrace.sh resdir +# Usage: kvm-recheck-rcuscale-ftrace.sh resdir # # Copyright (C) IBM Corporation, 2016 # diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuscale.sh index db0375a57f28..aa745152a525 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuscale.sh @@ -1,9 +1,9 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0+ # -# Analyze a given results directory for rcuperf performance measurements. +# Analyze a given results directory for rcuscale scalability measurements. # -# Usage: kvm-recheck-rcuperf.sh resdir +# Usage: kvm-recheck-rcuscale.sh resdir # # Copyright (C) IBM Corporation, 2016 # @@ -20,7 +20,7 @@ fi PATH=`pwd`/tools/testing/selftests/rcutorture/bin:$PATH; export PATH . functions.sh -if kvm-recheck-rcuperf-ftrace.sh $i +if kvm-recheck-rcuscale-ftrace.sh $i then # ftrace data was successfully analyzed, call it good! exit 0 @@ -30,12 +30,12 @@ configfile=`echo $i | sed -e 's/^.*\///'` sed -e 's/^\[[^]]*]//' < $i/console.log | awk ' -/-perf: .* gps: .* batches:/ { +/-scale: .* gps: .* batches:/ { ngps = $9; nbatches = $11; } -/-perf: .*writer-duration/ { +/-scale: .*writer-duration/ { gptimes[++n] = $5 / 1000.; sum += $5 / 1000.; } @@ -43,7 +43,7 @@ awk ' END { newNR = asort(gptimes); if (newNR <= 0) { - print "No rcuperf records found???" + print "No rcuscale records found???" exit; } pct50 = int(newNR * 50 / 100); @@ -79,5 +79,5 @@ END { print "99th percentile grace-period duration: " gptimes[pct99]; print "Maximum grace-period duration: " gptimes[newNR]; print "Grace periods: " ngps + 0 " Batches: " nbatches + 0 " Ratio: " ngps / nbatches; - print "Computed from rcuperf printk output."; + print "Computed from rcuscale printk output."; }' diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-scf.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-scf.sh new file mode 100755 index 000000000000..671bfee4fcef --- /dev/null +++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-scf.sh @@ -0,0 +1,38 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0+ +# +# Analyze a given results directory for rcutorture progress. +# +# Usage: kvm-recheck-rcu.sh resdir +# +# Copyright (C) Facebook, 2020 +# +# Authors: Paul E. McKenney <paulmck@kernel.org> + +i="$1" +if test -d "$i" -a -r "$i" +then + : +else + echo Unreadable results directory: $i + exit 1 +fi +. functions.sh + +configfile=`echo $i | sed -e 's/^.*\///'` +nscfs="`grep 'scf_invoked_count ver:' $i/console.log 2> /dev/null | tail -1 | sed -e 's/^.* scf_invoked_count ver: //' -e 's/ .*$//' | tr -d '\015'`" +if test -z "$nscfs" +then + echo "$configfile ------- " +else + dur="`sed -e 's/^.* scftorture.shutdown_secs=//' -e 's/ .*$//' < $i/qemu-cmd 2> /dev/null`" + if test -z "$dur" + then + rate="" + else + nscfss=`awk -v nscfs=$nscfs -v dur=$dur ' + BEGIN { print nscfs / dur }' < /dev/null` + rate=" ($nscfss/s)" + fi + echo "${configfile} ------- ${nscfs} SCF handler invocations$rate" +fi diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh index e07779a62634..6dc2b49b85ea 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh @@ -66,6 +66,7 @@ config_override_param () { echo > $T/KcList config_override_param "$config_dir/CFcommon" KcList "`cat $config_dir/CFcommon 2> /dev/null`" config_override_param "$config_template" KcList "`cat $config_template 2> /dev/null`" +config_override_param "--gdb options" KcList "$TORTURE_KCONFIG_GDB_ARG" config_override_param "--kasan options" KcList "$TORTURE_KCONFIG_KASAN_ARG" config_override_param "--kcsan options" KcList "$TORTURE_KCONFIG_KCSAN_ARG" config_override_param "--kconfig argument" KcList "$TORTURE_KCONFIG_ARG" @@ -152,7 +153,11 @@ qemu_append="`identify_qemu_append "$QEMU"`" boot_args="`configfrag_boot_params "$boot_args" "$config_template"`" # Generate kernel-version-specific boot parameters boot_args="`per_version_boot_params "$boot_args" $resdir/.config $seconds`" -echo $QEMU $qemu_args -m $TORTURE_QEMU_MEM -kernel $KERNEL -append \"$qemu_append $boot_args\" > $resdir/qemu-cmd +if test -n "$TORTURE_BOOT_GDB_ARG" +then + boot_args="$boot_args $TORTURE_BOOT_GDB_ARG" +fi +echo $QEMU $qemu_args -m $TORTURE_QEMU_MEM -kernel $KERNEL -append \"$qemu_append $boot_args\" $TORTURE_QEMU_GDB_ARG > $resdir/qemu-cmd if test -n "$TORTURE_BUILDONLY" then @@ -171,14 +176,26 @@ echo "NOTE: $QEMU either did not run or was interactive" > $resdir/console.log # Attempt to run qemu ( . $T/qemu-cmd; wait `cat $resdir/qemu_pid`; echo $? > $resdir/qemu-retval ) & commandcompleted=0 -sleep 10 # Give qemu's pid a chance to reach the file -if test -s "$resdir/qemu_pid" +if test -z "$TORTURE_KCONFIG_GDB_ARG" then - qemu_pid=`cat "$resdir/qemu_pid"` - echo Monitoring qemu job at pid $qemu_pid -else - qemu_pid="" - echo Monitoring qemu job at yet-as-unknown pid + sleep 10 # Give qemu's pid a chance to reach the file + if test -s "$resdir/qemu_pid" + then + qemu_pid=`cat "$resdir/qemu_pid"` + echo Monitoring qemu job at pid $qemu_pid + else + qemu_pid="" + echo Monitoring qemu job at yet-as-unknown pid + fi +fi +if test -n "$TORTURE_KCONFIG_GDB_ARG" +then + echo Waiting for you to attach a debug session, for example: > /dev/tty + echo " gdb $base_resdir/vmlinux" > /dev/tty + echo 'After symbols load and the "(gdb)" prompt appears:' > /dev/tty + echo " target remote :1234" > /dev/tty + echo " continue" > /dev/tty + kstarttime=`gawk 'BEGIN { print systime() }' < /dev/null` fi while : do diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index e655983b7429..6eb1d3f6524d 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh @@ -31,6 +31,9 @@ TORTURE_DEFCONFIG=defconfig TORTURE_BOOT_IMAGE="" TORTURE_INITRD="$KVM/initrd"; export TORTURE_INITRD TORTURE_KCONFIG_ARG="" +TORTURE_KCONFIG_GDB_ARG="" +TORTURE_BOOT_GDB_ARG="" +TORTURE_QEMU_GDB_ARG="" TORTURE_KCONFIG_KASAN_ARG="" TORTURE_KCONFIG_KCSAN_ARG="" TORTURE_KMAKE_ARG="" @@ -46,6 +49,7 @@ jitter="-1" usage () { echo "Usage: $scriptname optional arguments:" + echo " --allcpus" echo " --bootargs kernel-boot-arguments" echo " --bootimage relative-path-to-kernel-boot-image" echo " --buildonly" @@ -55,17 +59,19 @@ usage () { echo " --defconfig string" echo " --dryrun sched|script" echo " --duration minutes" + echo " --gdb" + echo " --help" echo " --interactive" echo " --jitter N [ maxsleep (us) [ maxspin (us) ] ]" echo " --kconfig Kconfig-options" echo " --kmake-arg kernel-make-arguments" echo " --mac nn:nn:nn:nn:nn:nn" - echo " --memory megabytes | nnnG" + echo " --memory megabytes|nnnG" echo " --no-initrd" echo " --qemu-args qemu-arguments" echo " --qemu-cmd qemu-system-..." echo " --results absolute-pathname" - echo " --torture rcu" + echo " --torture lock|rcu|rcuscale|refscale|scf" echo " --trust-make" exit 1 } @@ -126,6 +132,14 @@ do dur=$(($2*60)) shift ;; + --gdb) + TORTURE_KCONFIG_GDB_ARG="CONFIG_DEBUG_INFO=y"; export TORTURE_KCONFIG_GDB_ARG + TORTURE_BOOT_GDB_ARG="nokaslr"; export TORTURE_BOOT_GDB_ARG + TORTURE_QEMU_GDB_ARG="-s -S"; export TORTURE_QEMU_GDB_ARG + ;; + --help|-h) + usage + ;; --interactive) TORTURE_QEMU_INTERACTIVE=1; export TORTURE_QEMU_INTERACTIVE ;; @@ -184,13 +198,13 @@ do shift ;; --torture) - checkarg --torture "(suite name)" "$#" "$2" '^\(lock\|rcu\|rcuperf\|refscale\)$' '^--' + checkarg --torture "(suite name)" "$#" "$2" '^\(lock\|rcu\|rcuscale\|refscale\|scf\)$' '^--' TORTURE_SUITE=$2 shift - if test "$TORTURE_SUITE" = rcuperf || test "$TORTURE_SUITE" = refscale + if test "$TORTURE_SUITE" = rcuscale || test "$TORTURE_SUITE" = refscale then # If you really want jitter for refscale or - # rcuperf, specify it after specifying the rcuperf + # rcuscale, specify it after specifying the rcuscale # or the refscale. (But why jitter in these cases?) jitter=0 fi @@ -248,6 +262,15 @@ do done touch $T/cfgcpu configs_derep="`echo $configs_derep | sed -e "s/\<CFLIST\>/$defaultconfigs/g"`" +if test -n "$TORTURE_KCONFIG_GDB_ARG" +then + if test "`echo $configs_derep | wc -w`" -gt 1 + then + echo "The --config list is: $configs_derep." + echo "Only one --config permitted with --gdb, terminating." + exit 1 + fi +fi for CF1 in $configs_derep do if test -f "$CONFIGFRAG/$CF1" @@ -323,6 +346,9 @@ TORTURE_BUILDONLY="$TORTURE_BUILDONLY"; export TORTURE_BUILDONLY TORTURE_DEFCONFIG="$TORTURE_DEFCONFIG"; export TORTURE_DEFCONFIG TORTURE_INITRD="$TORTURE_INITRD"; export TORTURE_INITRD TORTURE_KCONFIG_ARG="$TORTURE_KCONFIG_ARG"; export TORTURE_KCONFIG_ARG +TORTURE_KCONFIG_GDB_ARG="$TORTURE_KCONFIG_GDB_ARG"; export TORTURE_KCONFIG_GDB_ARG +TORTURE_BOOT_GDB_ARG="$TORTURE_BOOT_GDB_ARG"; export TORTURE_BOOT_GDB_ARG +TORTURE_QEMU_GDB_ARG="$TORTURE_QEMU_GDB_ARG"; export TORTURE_QEMU_GDB_ARG TORTURE_KCONFIG_KASAN_ARG="$TORTURE_KCONFIG_KASAN_ARG"; export TORTURE_KCONFIG_KASAN_ARG TORTURE_KCONFIG_KCSAN_ARG="$TORTURE_KCONFIG_KCSAN_ARG"; export TORTURE_KCONFIG_KCSAN_ARG TORTURE_KMAKE_ARG="$TORTURE_KMAKE_ARG"; export TORTURE_KMAKE_ARG diff --git a/tools/testing/selftests/rcutorture/bin/parse-console.sh b/tools/testing/selftests/rcutorture/bin/parse-console.sh index 71a9f43a3918..e03338091a06 100755 --- a/tools/testing/selftests/rcutorture/bin/parse-console.sh +++ b/tools/testing/selftests/rcutorture/bin/parse-console.sh @@ -33,8 +33,8 @@ then fi cat /dev/null > $file.diags -# Check for proper termination, except for rcuperf and refscale. -if test "$TORTURE_SUITE" != rcuperf && test "$TORTURE_SUITE" != refscale +# Check for proper termination, except for rcuscale and refscale. +if test "$TORTURE_SUITE" != rcuscale && test "$TORTURE_SUITE" != refscale then # check for abject failure @@ -67,6 +67,7 @@ then grep --binary-files=text 'torture:.*ver:' $file | egrep --binary-files=text -v '\(null\)|rtc: 000000000* ' | sed -e 's/^(initramfs)[^]]*] //' -e 's/^\[[^]]*] //' | + sed -e 's/^.*ver: //' | awk ' BEGIN { ver = 0; @@ -74,13 +75,13 @@ then } { - if (!badseq && ($5 + 0 != $5 || $5 <= ver)) { + if (!badseq && ($1 + 0 != $1 || $1 <= ver)) { badseqno1 = ver; - badseqno2 = $5; + badseqno2 = $1; badseqnr = NR; badseq = 1; } - ver = $5 + ver = $1 } END { diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE05 b/tools/testing/selftests/rcutorture/configs/rcu/TREE05 index 2dde0d9964e3..4f95f8544f3f 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE05 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE05 @@ -16,5 +16,6 @@ CONFIG_RCU_NOCB_CPU=y CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=y #CHECK#CONFIG_PROVE_RCU=y +CONFIG_PROVE_RCU_LIST=y CONFIG_DEBUG_OBJECTS_RCU_HEAD=n CONFIG_RCU_EXPERT=y diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/CFcommon b/tools/testing/selftests/rcutorture/configs/rcuperf/CFcommon deleted file mode 100644 index a09816b8c0f3..000000000000 --- a/tools/testing/selftests/rcutorture/configs/rcuperf/CFcommon +++ /dev/null @@ -1,2 +0,0 @@ -CONFIG_RCU_PERF_TEST=y -CONFIG_PRINTK_TIME=y diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/CFLIST b/tools/testing/selftests/rcutorture/configs/rcuscale/CFLIST index c9f56cf20775..c9f56cf20775 100644 --- a/tools/testing/selftests/rcutorture/configs/rcuperf/CFLIST +++ b/tools/testing/selftests/rcutorture/configs/rcuscale/CFLIST diff --git a/tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon b/tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon new file mode 100644 index 000000000000..87caa0e932c7 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon @@ -0,0 +1,2 @@ +CONFIG_RCU_SCALE_TEST=y +CONFIG_PRINTK_TIME=y diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/TINY b/tools/testing/selftests/rcutorture/configs/rcuscale/TINY index fb05ef5279b4..fb05ef5279b4 100644 --- a/tools/testing/selftests/rcutorture/configs/rcuperf/TINY +++ b/tools/testing/selftests/rcutorture/configs/rcuscale/TINY diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/TREE b/tools/testing/selftests/rcutorture/configs/rcuscale/TREE index 721cfda76ab2..721cfda76ab2 100644 --- a/tools/testing/selftests/rcutorture/configs/rcuperf/TREE +++ b/tools/testing/selftests/rcutorture/configs/rcuscale/TREE diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/TREE54 b/tools/testing/selftests/rcutorture/configs/rcuscale/TREE54 index 7629f5dd73b2..7629f5dd73b2 100644 --- a/tools/testing/selftests/rcutorture/configs/rcuperf/TREE54 +++ b/tools/testing/selftests/rcutorture/configs/rcuscale/TREE54 diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/rcuscale/ver_functions.sh index 777d5b0c190f..0333e9b18522 100644 --- a/tools/testing/selftests/rcutorture/configs/rcuperf/ver_functions.sh +++ b/tools/testing/selftests/rcutorture/configs/rcuscale/ver_functions.sh @@ -11,6 +11,6 @@ # # Adds per-version torture-module parameters to kernels supporting them. per_version_boot_params () { - echo $1 rcuperf.shutdown=1 \ - rcuperf.verbose=1 + echo $1 rcuscale.shutdown=1 \ + rcuscale.verbose=1 } diff --git a/tools/testing/selftests/rcutorture/configs/scf/CFLIST b/tools/testing/selftests/rcutorture/configs/scf/CFLIST new file mode 100644 index 000000000000..4d62eb4a39f9 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/scf/CFLIST @@ -0,0 +1,2 @@ +NOPREEMPT +PREEMPT diff --git a/tools/testing/selftests/rcutorture/configs/scf/CFcommon b/tools/testing/selftests/rcutorture/configs/scf/CFcommon new file mode 100644 index 000000000000..c11ab91f49f5 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/scf/CFcommon @@ -0,0 +1,2 @@ +CONFIG_SCF_TORTURE_TEST=y +CONFIG_PRINTK_TIME=y diff --git a/tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT b/tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT new file mode 100644 index 000000000000..b8429d6c6ebc --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT @@ -0,0 +1,9 @@ +CONFIG_SMP=y +CONFIG_PREEMPT_NONE=y +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=n +CONFIG_HZ_PERIODIC=n +CONFIG_NO_HZ_IDLE=n +CONFIG_NO_HZ_FULL=y +CONFIG_DEBUG_LOCK_ALLOC=n +CONFIG_PROVE_LOCKING=n diff --git a/tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT.boot b/tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT.boot new file mode 100644 index 000000000000..d6a7fa097c2e --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT.boot @@ -0,0 +1 @@ +nohz_full=1 diff --git a/tools/testing/selftests/rcutorture/configs/scf/PREEMPT b/tools/testing/selftests/rcutorture/configs/scf/PREEMPT new file mode 100644 index 000000000000..ae4992b141b0 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/scf/PREEMPT @@ -0,0 +1,9 @@ +CONFIG_SMP=y +CONFIG_PREEMPT_NONE=n +CONFIG_PREEMPT_VOLUNTARY=n +CONFIG_PREEMPT=y +CONFIG_HZ_PERIODIC=n +CONFIG_NO_HZ_IDLE=y +CONFIG_NO_HZ_FULL=n +CONFIG_DEBUG_LOCK_ALLOC=y +CONFIG_PROVE_LOCKING=y diff --git a/tools/testing/selftests/rcutorture/configs/scf/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/scf/ver_functions.sh new file mode 100644 index 000000000000..d3d9e35d3d55 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/scf/ver_functions.sh @@ -0,0 +1,30 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0+ +# +# Torture-suite-dependent shell functions for the rest of the scripts. +# +# Copyright (C) Facebook, 2020 +# +# Authors: Paul E. McKenney <paulmck@kernel.org> + +# scftorture_param_onoff bootparam-string config-file +# +# Adds onoff scftorture module parameters to kernels having it. +scftorture_param_onoff () { + if ! bootparam_hotplug_cpu "$1" && configfrag_hotplug_cpu "$2" + then + echo CPU-hotplug kernel, adding scftorture onoff. 1>&2 + echo scftorture.onoff_interval=1000 scftorture.onoff_holdoff=30 + fi +} + +# per_version_boot_params bootparam-string config-file seconds +# +# Adds per-version torture-module parameters to kernels supporting them. +per_version_boot_params () { + echo $1 `scftorture_param_onoff "$1" "$2"` \ + scftorture.stat_interval=15 \ + scftorture.shutdown_secs=$3 \ + scftorture.verbose=1 \ + scf +} diff --git a/tools/testing/selftests/rcutorture/doc/initrd.txt b/tools/testing/selftests/rcutorture/doc/initrd.txt index 933b4fd12327..41a4255865d4 100644 --- a/tools/testing/selftests/rcutorture/doc/initrd.txt +++ b/tools/testing/selftests/rcutorture/doc/initrd.txt @@ -1,12 +1,11 @@ -The rcutorture scripting tools automatically create the needed initrd -directory using dracut. Failing that, this tool will create an initrd -containing a single statically linked binary named "init" that loops -over a very long sleep() call. In both cases, this creation is done -by tools/testing/selftests/rcutorture/bin/mkinitrd.sh. +The rcutorture scripting tools automatically create an initrd containing +a single statically linked binary named "init" that loops over a +very long sleep() call. In both cases, this creation is done by +tools/testing/selftests/rcutorture/bin/mkinitrd.sh. -However, if you are attempting to run rcutorture on a system that does -not have dracut installed, and if you don't like the notion of static -linking, you might wish to press an existing initrd into service: +However, if you don't like the notion of statically linked bare-bones +userspace environments, you might wish to press an existing initrd +into service: ------------------------------------------------------------------------ cd tools/testing/selftests/rcutorture @@ -15,24 +14,3 @@ mkdir initrd cd initrd cpio -id < /tmp/initrd.img.zcat # Manually verify that initrd contains needed binaries and libraries. ------------------------------------------------------------------------- - -Interestingly enough, if you are running rcutorture, you don't really -need userspace in many cases. Running without userspace has the -advantage of allowing you to test your kernel independently of the -distro in place, the root-filesystem layout, and so on. To make this -happen, put the following script in the initrd's tree's "/init" file, -with 0755 mode. - ------------------------------------------------------------------------- -#!/bin/sh - -while : -do - sleep 10 -done ------------------------------------------------------------------------- - -This approach also allows most of the binaries and libraries in the -initrd filesystem to be dispensed with, which can save significant -space in rcutorture's "res" directory. diff --git a/tools/testing/selftests/rcutorture/doc/rcu-test-image.txt b/tools/testing/selftests/rcutorture/doc/rcu-test-image.txt index 449cf579d6f9..b2fc247976b1 100644 --- a/tools/testing/selftests/rcutorture/doc/rcu-test-image.txt +++ b/tools/testing/selftests/rcutorture/doc/rcu-test-image.txt @@ -1,8 +1,33 @@ -This document describes one way to create the rcu-test-image file -that contains the filesystem used by the guest-OS kernel. There are -probably much better ways of doing this, and this filesystem could no -doubt be smaller. It is probably also possible to simply download -an appropriate image from any number of places. +Normally, a minimal initrd is created automatically by the rcutorture +scripting. But minimal really does mean "minimal", namely just a single +root directory with a single statically linked executable named "init": + +$ size tools/testing/selftests/rcutorture/initrd/init + text data bss dec hex filename + 328 0 8 336 150 tools/testing/selftests/rcutorture/initrd/init + +Suppose you need to run some scripts, perhaps to monitor or control +some aspect of the rcutorture testing. This will require a more fully +filled-out userspace, perhaps containing libraries, executables for +the shell and other utilities, and soforth. In that case, place your +desired filesystem here: + + tools/testing/selftests/rcutorture/initrd + +For example, your tools/testing/selftests/rcutorture/initrd/init might +be a script that does any needed mount operations and starts whatever +scripts need starting to properly monitor or control your testing. +The next rcutorture build will then incorporate this filesystem into +the kernel image that is passed to qemu. + +Or maybe you need a real root filesystem for some reason, in which case +please read on! + +The remainder of this document describes one way to create the +rcu-test-image file that contains the filesystem used by the guest-OS +kernel. There are probably much better ways of doing this, and this +filesystem could no doubt be smaller. It is probably also possible to +simply download an appropriate image from any number of places. That said, here are the commands: @@ -36,7 +61,7 @@ References: https://help.ubuntu.com/community/JeOSVMBuilder http://wiki.libvirt.org/page/UbuntuKVMWalkthrough http://www.moe.co.uk/2011/01/07/pci_add_option_rom-failed-to-find-romfile-pxe-rtl8139-bin/ -- "apt-get install kvm-pxe" - http://www.landley.net/writing/rootfs-howto.html - http://en.wikipedia.org/wiki/Initrd - http://en.wikipedia.org/wiki/Cpio + https://www.landley.net/writing/rootfs-howto.html + https://en.wikipedia.org/wiki/Initrd + https://en.wikipedia.org/wiki/Cpio http://wiki.libvirt.org/page/UbuntuKVMWalkthrough diff --git a/tools/testing/selftests/rseq/param_test.c b/tools/testing/selftests/rseq/param_test.c index e8a657a5f48a..384589095864 100644 --- a/tools/testing/selftests/rseq/param_test.c +++ b/tools/testing/selftests/rseq/param_test.c @@ -1,8 +1,10 @@ // SPDX-License-Identifier: LGPL-2.1 #define _GNU_SOURCE #include <assert.h> +#include <linux/membarrier.h> #include <pthread.h> #include <sched.h> +#include <stdatomic.h> #include <stdint.h> #include <stdio.h> #include <stdlib.h> @@ -1131,6 +1133,220 @@ static int set_signal_handler(void) return ret; } +struct test_membarrier_thread_args { + int stop; + intptr_t percpu_list_ptr; +}; + +/* Worker threads modify data in their "active" percpu lists. */ +void *test_membarrier_worker_thread(void *arg) +{ + struct test_membarrier_thread_args *args = + (struct test_membarrier_thread_args *)arg; + const int iters = opt_reps; + int i; + + if (rseq_register_current_thread()) { + fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n", + errno, strerror(errno)); + abort(); + } + + /* Wait for initialization. */ + while (!atomic_load(&args->percpu_list_ptr)) {} + + for (i = 0; i < iters; ++i) { + int ret; + + do { + int cpu = rseq_cpu_start(); + + ret = rseq_offset_deref_addv(&args->percpu_list_ptr, + sizeof(struct percpu_list_entry) * cpu, 1, cpu); + } while (rseq_unlikely(ret)); + } + + if (rseq_unregister_current_thread()) { + fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n", + errno, strerror(errno)); + abort(); + } + return NULL; +} + +void test_membarrier_init_percpu_list(struct percpu_list *list) +{ + int i; + + memset(list, 0, sizeof(*list)); + for (i = 0; i < CPU_SETSIZE; i++) { + struct percpu_list_node *node; + + node = malloc(sizeof(*node)); + assert(node); + node->data = 0; + node->next = NULL; + list->c[i].head = node; + } +} + +void test_membarrier_free_percpu_list(struct percpu_list *list) +{ + int i; + + for (i = 0; i < CPU_SETSIZE; i++) + free(list->c[i].head); +} + +static int sys_membarrier(int cmd, int flags, int cpu_id) +{ + return syscall(__NR_membarrier, cmd, flags, cpu_id); +} + +/* + * The manager thread swaps per-cpu lists that worker threads see, + * and validates that there are no unexpected modifications. + */ +void *test_membarrier_manager_thread(void *arg) +{ + struct test_membarrier_thread_args *args = + (struct test_membarrier_thread_args *)arg; + struct percpu_list list_a, list_b; + intptr_t expect_a = 0, expect_b = 0; + int cpu_a = 0, cpu_b = 0; + + if (rseq_register_current_thread()) { + fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n", + errno, strerror(errno)); + abort(); + } + + /* Init lists. */ + test_membarrier_init_percpu_list(&list_a); + test_membarrier_init_percpu_list(&list_b); + + atomic_store(&args->percpu_list_ptr, (intptr_t)&list_a); + + while (!atomic_load(&args->stop)) { + /* list_a is "active". */ + cpu_a = rand() % CPU_SETSIZE; + /* + * As list_b is "inactive", we should never see changes + * to list_b. + */ + if (expect_b != atomic_load(&list_b.c[cpu_b].head->data)) { + fprintf(stderr, "Membarrier test failed\n"); + abort(); + } + + /* Make list_b "active". */ + atomic_store(&args->percpu_list_ptr, (intptr_t)&list_b); + if (sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ, + MEMBARRIER_CMD_FLAG_CPU, cpu_a) && + errno != ENXIO /* missing CPU */) { + perror("sys_membarrier"); + abort(); + } + /* + * Cpu A should now only modify list_b, so the values + * in list_a should be stable. + */ + expect_a = atomic_load(&list_a.c[cpu_a].head->data); + + cpu_b = rand() % CPU_SETSIZE; + /* + * As list_a is "inactive", we should never see changes + * to list_a. + */ + if (expect_a != atomic_load(&list_a.c[cpu_a].head->data)) { + fprintf(stderr, "Membarrier test failed\n"); + abort(); + } + + /* Make list_a "active". */ + atomic_store(&args->percpu_list_ptr, (intptr_t)&list_a); + if (sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ, + MEMBARRIER_CMD_FLAG_CPU, cpu_b) && + errno != ENXIO /* missing CPU*/) { + perror("sys_membarrier"); + abort(); + } + /* Remember a value from list_b. */ + expect_b = atomic_load(&list_b.c[cpu_b].head->data); + } + + test_membarrier_free_percpu_list(&list_a); + test_membarrier_free_percpu_list(&list_b); + + if (rseq_unregister_current_thread()) { + fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n", + errno, strerror(errno)); + abort(); + } + return NULL; +} + +/* Test MEMBARRIER_CMD_PRIVATE_RESTART_RSEQ_ON_CPU membarrier command. */ +#ifdef RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV +void test_membarrier(void) +{ + const int num_threads = opt_threads; + struct test_membarrier_thread_args thread_args; + pthread_t worker_threads[num_threads]; + pthread_t manager_thread; + int i, ret; + + if (sys_membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ, 0, 0)) { + perror("sys_membarrier"); + abort(); + } + + thread_args.stop = 0; + thread_args.percpu_list_ptr = 0; + ret = pthread_create(&manager_thread, NULL, + test_membarrier_manager_thread, &thread_args); + if (ret) { + errno = ret; + perror("pthread_create"); + abort(); + } + + for (i = 0; i < num_threads; i++) { + ret = pthread_create(&worker_threads[i], NULL, + test_membarrier_worker_thread, &thread_args); + if (ret) { + errno = ret; + perror("pthread_create"); + abort(); + } + } + + + for (i = 0; i < num_threads; i++) { + ret = pthread_join(worker_threads[i], NULL); + if (ret) { + errno = ret; + perror("pthread_join"); + abort(); + } + } + + atomic_store(&thread_args.stop, 1); + ret = pthread_join(manager_thread, NULL); + if (ret) { + errno = ret; + perror("pthread_join"); + abort(); + } +} +#else /* RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV */ +void test_membarrier(void) +{ + fprintf(stderr, "rseq_offset_deref_addv is not implemented on this architecture. " + "Skipping membarrier test.\n"); +} +#endif + static void show_usage(int argc, char **argv) { printf("Usage : %s <OPTIONS>\n", @@ -1153,7 +1369,7 @@ static void show_usage(int argc, char **argv) printf(" [-r N] Number of repetitions per thread (default 5000)\n"); printf(" [-d] Disable rseq system call (no initialization)\n"); printf(" [-D M] Disable rseq for each M threads\n"); - printf(" [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement\n"); + printf(" [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement, membarrie(r)\n"); printf(" [-M] Push into buffer and memcpy buffer with memory barriers.\n"); printf(" [-v] Verbose output.\n"); printf(" [-h] Show this help.\n"); @@ -1268,6 +1484,7 @@ int main(int argc, char **argv) case 'i': case 'b': case 'm': + case 'r': break; default: show_usage(argc, argv); @@ -1320,6 +1537,10 @@ int main(int argc, char **argv) printf_verbose("counter increment\n"); test_percpu_inc(); break; + case 'r': + printf_verbose("membarrier\n"); + test_membarrier(); + break; } if (!opt_disable_rseq && rseq_unregister_current_thread()) abort(); diff --git a/tools/testing/selftests/rseq/rseq-x86.h b/tools/testing/selftests/rseq/rseq-x86.h index b2da6004fe30..640411518e46 100644 --- a/tools/testing/selftests/rseq/rseq-x86.h +++ b/tools/testing/selftests/rseq/rseq-x86.h @@ -279,6 +279,63 @@ error1: #endif } +#define RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV + +/* + * pval = *(ptr+off) + * *pval += inc; + */ +static inline __attribute__((always_inline)) +int rseq_offset_deref_addv(intptr_t *ptr, off_t off, intptr_t inc, int cpu) +{ + RSEQ_INJECT_C(9) + + __asm__ __volatile__ goto ( + RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */ +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1]) +#endif + /* Start rseq by storing table entry pointer into rseq_cs. */ + RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi])) + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f) + RSEQ_INJECT_ASM(3) +#ifdef RSEQ_COMPARE_TWICE + RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1]) +#endif + /* get p+v */ + "movq %[ptr], %%rbx\n\t" + "addq %[off], %%rbx\n\t" + /* get pv */ + "movq (%%rbx), %%rcx\n\t" + /* *pv += inc */ + "addq %[inc], (%%rcx)\n\t" + "2:\n\t" + RSEQ_INJECT_ASM(4) + RSEQ_ASM_DEFINE_ABORT(4, "", abort) + : /* gcc asm goto does not allow outputs */ + : [cpu_id] "r" (cpu), + [rseq_abi] "r" (&__rseq_abi), + /* final store input */ + [ptr] "m" (*ptr), + [off] "er" (off), + [inc] "er" (inc) + : "memory", "cc", "rax", "rbx", "rcx" + RSEQ_INJECT_CLOBBER + : abort +#ifdef RSEQ_COMPARE_TWICE + , error1 +#endif + ); + return 0; +abort: + RSEQ_INJECT_FAILED + return -1; +#ifdef RSEQ_COMPARE_TWICE +error1: + rseq_bug("cpu_id comparison failed"); +#endif +} + static inline __attribute__((always_inline)) int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect, intptr_t *v2, intptr_t newv2, diff --git a/tools/testing/selftests/rseq/run_param_test.sh b/tools/testing/selftests/rseq/run_param_test.sh index e426304fd4a0..f51bc83c9e41 100755 --- a/tools/testing/selftests/rseq/run_param_test.sh +++ b/tools/testing/selftests/rseq/run_param_test.sh @@ -15,6 +15,7 @@ TEST_LIST=( "-T m" "-T m -M" "-T i" + "-T r" ) TEST_NAME=( @@ -25,6 +26,7 @@ TEST_NAME=( "memcpy" "memcpy with barrier" "increment" + "membarrier" ) IFS="$OLDIFS" diff --git a/tools/testing/selftests/run_kselftest.sh b/tools/testing/selftests/run_kselftest.sh new file mode 100755 index 000000000000..609a4ef9300e --- /dev/null +++ b/tools/testing/selftests/run_kselftest.sh @@ -0,0 +1,93 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# Run installed kselftest tests. +# +BASE_DIR=$(realpath $(dirname $0)) +cd $BASE_DIR +TESTS="$BASE_DIR"/kselftest-list.txt +if [ ! -r "$TESTS" ] ; then + echo "$0: Could not find list of tests to run ($TESTS)" >&2 + available="" +else + available="$(cat "$TESTS")" +fi + +. ./kselftest/runner.sh +ROOT=$PWD + +usage() +{ + cat <<EOF +Usage: $0 [OPTIONS] + -s | --summary Print summary with detailed log in output.log + -t | --test COLLECTION:TEST Run TEST from COLLECTION + -c | --collection COLLECTION Run all tests from COLLECTION + -l | --list List the available collection:test entries + -d | --dry-run Don't actually run any tests + -h | --help Show this usage info +EOF + exit $1 +} + +COLLECTIONS="" +TESTS="" +dryrun="" +while true; do + case "$1" in + -s | --summary) + logfile="$BASE_DIR"/output.log + cat /dev/null > $logfile + shift ;; + -t | --test) + TESTS="$TESTS $2" + shift 2 ;; + -c | --collection) + COLLECTIONS="$COLLECTIONS $2" + shift 2 ;; + -l | --list) + echo "$available" + exit 0 ;; + -n | --dry-run) + dryrun="echo" + shift ;; + -h | --help) + usage 0 ;; + "") + break ;; + *) + usage 1 ;; + esac +done + +# Add all selected collections to the explicit test list. +if [ -n "$COLLECTIONS" ]; then + for collection in $COLLECTIONS ; do + found="$(echo "$available" | grep "^$collection:")" + if [ -z "$found" ] ; then + echo "No such collection '$collection'" >&2 + exit 1 + fi + TESTS="$TESTS $found" + done +fi +# Replace available test list with explicitly selected tests. +if [ -n "$TESTS" ]; then + valid="" + for test in $TESTS ; do + found="$(echo "$available" | grep "^${test}$")" + if [ -z "$found" ] ; then + echo "No such test '$test'" >&2 + exit 1 + fi + valid="$valid $found" + done + available="$(echo "$valid" | sed -e 's/ /\n/g')" +fi + +collections=$(echo "$available" | cut -d: -f1 | uniq) +for collection in $collections ; do + [ -w /dev/kmsg ] && echo "kselftest: Running tests in $collection" >> /dev/kmsg + tests=$(echo "$available" | grep "^$collection:" | cut -d: -f2) + ($dryrun cd "$collection" && $dryrun run_many $tests) +done diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 7a6d40286a42..4a180439ee9e 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -774,8 +774,15 @@ void *kill_thread(void *data) return (void *)SIBLING_EXIT_UNKILLED; } +enum kill_t { + KILL_THREAD, + KILL_PROCESS, + RET_UNKNOWN +}; + /* Prepare a thread that will kill itself or both of us. */ -void kill_thread_or_group(struct __test_metadata *_metadata, bool kill_process) +void kill_thread_or_group(struct __test_metadata *_metadata, + enum kill_t kill_how) { pthread_t thread; void *status; @@ -791,11 +798,12 @@ void kill_thread_or_group(struct __test_metadata *_metadata, bool kill_process) .len = (unsigned short)ARRAY_SIZE(filter_thread), .filter = filter_thread, }; + int kill = kill_how == KILL_PROCESS ? SECCOMP_RET_KILL_PROCESS : 0xAAAAAAAAA; struct sock_filter filter_process[] = { BPF_STMT(BPF_LD|BPF_W|BPF_ABS, offsetof(struct seccomp_data, nr)), BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1), - BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_PROCESS), + BPF_STMT(BPF_RET|BPF_K, kill), BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), }; struct sock_fprog prog_process = { @@ -808,13 +816,15 @@ void kill_thread_or_group(struct __test_metadata *_metadata, bool kill_process) } ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, - kill_process ? &prog_process : &prog_thread)); + kill_how == KILL_THREAD ? &prog_thread + : &prog_process)); /* * Add the KILL_THREAD rule again to make sure that the KILL_PROCESS * flag cannot be downgraded by a new filter. */ - ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog_thread)); + if (kill_how == KILL_PROCESS) + ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog_thread)); /* Start a thread that will exit immediately. */ ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)false)); @@ -842,7 +852,7 @@ TEST(KILL_thread) child_pid = fork(); ASSERT_LE(0, child_pid); if (child_pid == 0) { - kill_thread_or_group(_metadata, false); + kill_thread_or_group(_metadata, KILL_THREAD); _exit(38); } @@ -861,7 +871,7 @@ TEST(KILL_process) child_pid = fork(); ASSERT_LE(0, child_pid); if (child_pid == 0) { - kill_thread_or_group(_metadata, true); + kill_thread_or_group(_metadata, KILL_PROCESS); _exit(38); } @@ -872,6 +882,27 @@ TEST(KILL_process) ASSERT_EQ(SIGSYS, WTERMSIG(status)); } +TEST(KILL_unknown) +{ + int status; + pid_t child_pid; + + child_pid = fork(); + ASSERT_LE(0, child_pid); + if (child_pid == 0) { + kill_thread_or_group(_metadata, RET_UNKNOWN); + _exit(38); + } + + ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); + + /* If the entire process was killed, we'll see SIGSYS. */ + EXPECT_TRUE(WIFSIGNALED(status)) { + TH_LOG("Unknown SECCOMP_RET is only killing the thread?"); + } + ASSERT_EQ(SIGSYS, WTERMSIG(status)); +} + /* TODO(wad) add 64-bit versus 32-bit arg tests. */ TEST(arg_out_of_range) { @@ -1667,70 +1698,148 @@ TEST_F(TRACE_poke, getpid_runs_normally) } #if defined(__x86_64__) -# define ARCH_REGS struct user_regs_struct -# define SYSCALL_NUM orig_rax -# define SYSCALL_RET rax +# define ARCH_REGS struct user_regs_struct +# define SYSCALL_NUM(_regs) (_regs).orig_rax +# define SYSCALL_RET(_regs) (_regs).rax #elif defined(__i386__) -# define ARCH_REGS struct user_regs_struct -# define SYSCALL_NUM orig_eax -# define SYSCALL_RET eax +# define ARCH_REGS struct user_regs_struct +# define SYSCALL_NUM(_regs) (_regs).orig_eax +# define SYSCALL_RET(_regs) (_regs).eax #elif defined(__arm__) -# define ARCH_REGS struct pt_regs -# define SYSCALL_NUM ARM_r7 -# define SYSCALL_RET ARM_r0 +# define ARCH_REGS struct pt_regs +# define SYSCALL_NUM(_regs) (_regs).ARM_r7 +# ifndef PTRACE_SET_SYSCALL +# define PTRACE_SET_SYSCALL 23 +# endif +# define SYSCALL_NUM_SET(_regs, _nr) \ + EXPECT_EQ(0, ptrace(PTRACE_SET_SYSCALL, tracee, NULL, _nr)) +# define SYSCALL_RET(_regs) (_regs).ARM_r0 #elif defined(__aarch64__) -# define ARCH_REGS struct user_pt_regs -# define SYSCALL_NUM regs[8] -# define SYSCALL_RET regs[0] +# define ARCH_REGS struct user_pt_regs +# define SYSCALL_NUM(_regs) (_regs).regs[8] +# ifndef NT_ARM_SYSTEM_CALL +# define NT_ARM_SYSTEM_CALL 0x404 +# endif +# define SYSCALL_NUM_SET(_regs, _nr) \ + do { \ + struct iovec __v; \ + typeof(_nr) __nr = (_nr); \ + __v.iov_base = &__nr; \ + __v.iov_len = sizeof(__nr); \ + EXPECT_EQ(0, ptrace(PTRACE_SETREGSET, tracee, \ + NT_ARM_SYSTEM_CALL, &__v)); \ + } while (0) +# define SYSCALL_RET(_regs) (_regs).regs[0] #elif defined(__riscv) && __riscv_xlen == 64 -# define ARCH_REGS struct user_regs_struct -# define SYSCALL_NUM a7 -# define SYSCALL_RET a0 +# define ARCH_REGS struct user_regs_struct +# define SYSCALL_NUM(_regs) (_regs).a7 +# define SYSCALL_RET(_regs) (_regs).a0 #elif defined(__csky__) -# define ARCH_REGS struct pt_regs -#if defined(__CSKYABIV2__) -# define SYSCALL_NUM regs[3] -#else -# define SYSCALL_NUM regs[9] -#endif -# define SYSCALL_RET a0 +# define ARCH_REGS struct pt_regs +# if defined(__CSKYABIV2__) +# define SYSCALL_NUM(_regs) (_regs).regs[3] +# else +# define SYSCALL_NUM(_regs) (_regs).regs[9] +# endif +# define SYSCALL_RET(_regs) (_regs).a0 #elif defined(__hppa__) -# define ARCH_REGS struct user_regs_struct -# define SYSCALL_NUM gr[20] -# define SYSCALL_RET gr[28] +# define ARCH_REGS struct user_regs_struct +# define SYSCALL_NUM(_regs) (_regs).gr[20] +# define SYSCALL_RET(_regs) (_regs).gr[28] #elif defined(__powerpc__) -# define ARCH_REGS struct pt_regs -# define SYSCALL_NUM gpr[0] -# define SYSCALL_RET gpr[3] +# define ARCH_REGS struct pt_regs +# define SYSCALL_NUM(_regs) (_regs).gpr[0] +# define SYSCALL_RET(_regs) (_regs).gpr[3] +# define SYSCALL_RET_SET(_regs, _val) \ + do { \ + typeof(_val) _result = (_val); \ + /* \ + * A syscall error is signaled by CR0 SO bit \ + * and the code is stored as a positive value. \ + */ \ + if (_result < 0) { \ + SYSCALL_RET(_regs) = -result; \ + (_regs).ccr |= 0x10000000; \ + } else { \ + SYSCALL_RET(_regs) = result; \ + (_regs).ccr &= ~0x10000000; \ + } \ + } while (0) +# define SYSCALL_RET_SET_ON_PTRACE_EXIT #elif defined(__s390__) -# define ARCH_REGS s390_regs -# define SYSCALL_NUM gprs[2] -# define SYSCALL_RET gprs[2] -# define SYSCALL_NUM_RET_SHARE_REG +# define ARCH_REGS s390_regs +# define SYSCALL_NUM(_regs) (_regs).gprs[2] +# define SYSCALL_RET_SET(_regs, _val) \ + TH_LOG("Can't modify syscall return on this architecture") #elif defined(__mips__) -# define ARCH_REGS struct pt_regs -# define SYSCALL_NUM regs[2] -# define SYSCALL_SYSCALL_NUM regs[4] -# define SYSCALL_RET regs[2] -# define SYSCALL_NUM_RET_SHARE_REG +# include <asm/unistd_nr_n32.h> +# include <asm/unistd_nr_n64.h> +# include <asm/unistd_nr_o32.h> +# define ARCH_REGS struct pt_regs +# define SYSCALL_NUM(_regs) \ + ({ \ + typeof((_regs).regs[2]) _nr; \ + if ((_regs).regs[2] == __NR_O32_Linux) \ + _nr = (_regs).regs[4]; \ + else \ + _nr = (_regs).regs[2]; \ + _nr; \ + }) +# define SYSCALL_NUM_SET(_regs, _nr) \ + do { \ + if ((_regs).regs[2] == __NR_O32_Linux) \ + (_regs).regs[4] = _nr; \ + else \ + (_regs).regs[2] = _nr; \ + } while (0) +# define SYSCALL_RET_SET(_regs, _val) \ + TH_LOG("Can't modify syscall return on this architecture") #elif defined(__xtensa__) -# define ARCH_REGS struct user_pt_regs -# define SYSCALL_NUM syscall +# define ARCH_REGS struct user_pt_regs +# define SYSCALL_NUM(_regs) (_regs).syscall /* * On xtensa syscall return value is in the register * a2 of the current window which is not fixed. */ -#define SYSCALL_RET(reg) a[(reg).windowbase * 4 + 2] +#define SYSCALL_RET(_regs) (_regs).a[(_regs).windowbase * 4 + 2] #elif defined(__sh__) -# define ARCH_REGS struct pt_regs -# define SYSCALL_NUM gpr[3] -# define SYSCALL_RET gpr[0] +# define ARCH_REGS struct pt_regs +# define SYSCALL_NUM(_regs) (_regs).gpr[3] +# define SYSCALL_RET(_regs) (_regs).gpr[0] #else # error "Do not know how to find your architecture's registers and syscalls" #endif +/* + * Most architectures can change the syscall by just updating the + * associated register. This is the default if not defined above. + */ +#ifndef SYSCALL_NUM_SET +# define SYSCALL_NUM_SET(_regs, _nr) \ + do { \ + SYSCALL_NUM(_regs) = (_nr); \ + } while (0) +#endif +/* + * Most architectures can change the syscall return value by just + * writing to the SYSCALL_RET register. This is the default if not + * defined above. If an architecture cannot set the return value + * (for example when the syscall and return value register is + * shared), report it with TH_LOG() in an arch-specific definition + * of SYSCALL_RET_SET() above, and leave SYSCALL_RET undefined. + */ +#if !defined(SYSCALL_RET) && !defined(SYSCALL_RET_SET) +# error "One of SYSCALL_RET or SYSCALL_RET_SET is needed for this arch" +#endif +#ifndef SYSCALL_RET_SET +# define SYSCALL_RET_SET(_regs, _val) \ + do { \ + SYSCALL_RET(_regs) = (_val); \ + } while (0) +#endif + /* When the syscall return can't be changed, stub out the tests for it. */ -#ifdef SYSCALL_NUM_RET_SHARE_REG +#ifndef SYSCALL_RET # define EXPECT_SYSCALL_RETURN(val, action) EXPECT_EQ(-1, action) #else # define EXPECT_SYSCALL_RETURN(val, action) \ @@ -1745,116 +1854,92 @@ TEST_F(TRACE_poke, getpid_runs_normally) } while (0) #endif -/* Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for +/* + * Some architectures (e.g. powerpc) can only set syscall + * return values on syscall exit during ptrace. + */ +const bool ptrace_entry_set_syscall_nr = true; +const bool ptrace_entry_set_syscall_ret = +#ifndef SYSCALL_RET_SET_ON_PTRACE_EXIT + true; +#else + false; +#endif + +/* + * Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for * architectures without HAVE_ARCH_TRACEHOOK (e.g. User-mode Linux). */ #if defined(__x86_64__) || defined(__i386__) || defined(__mips__) -#define HAVE_GETREGS +# define ARCH_GETREGS(_regs) ptrace(PTRACE_GETREGS, tracee, 0, &(_regs)) +# define ARCH_SETREGS(_regs) ptrace(PTRACE_SETREGS, tracee, 0, &(_regs)) +#else +# define ARCH_GETREGS(_regs) ({ \ + struct iovec __v; \ + __v.iov_base = &(_regs); \ + __v.iov_len = sizeof(_regs); \ + ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &__v); \ + }) +# define ARCH_SETREGS(_regs) ({ \ + struct iovec __v; \ + __v.iov_base = &(_regs); \ + __v.iov_len = sizeof(_regs); \ + ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &__v); \ + }) #endif /* Architecture-specific syscall fetching routine. */ int get_syscall(struct __test_metadata *_metadata, pid_t tracee) { ARCH_REGS regs; -#ifdef HAVE_GETREGS - EXPECT_EQ(0, ptrace(PTRACE_GETREGS, tracee, 0, ®s)) { - TH_LOG("PTRACE_GETREGS failed"); - return -1; - } -#else - struct iovec iov; - iov.iov_base = ®s; - iov.iov_len = sizeof(regs); - EXPECT_EQ(0, ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov)) { - TH_LOG("PTRACE_GETREGSET failed"); + EXPECT_EQ(0, ARCH_GETREGS(regs)) { return -1; } -#endif -#if defined(__mips__) - if (regs.SYSCALL_NUM == __NR_O32_Linux) - return regs.SYSCALL_SYSCALL_NUM; -#endif - return regs.SYSCALL_NUM; + return SYSCALL_NUM(regs); } /* Architecture-specific syscall changing routine. */ -void change_syscall(struct __test_metadata *_metadata, - pid_t tracee, int syscall, int result) +void __change_syscall(struct __test_metadata *_metadata, + pid_t tracee, long *syscall, long *ret) { - int ret; - ARCH_REGS regs; -#ifdef HAVE_GETREGS - ret = ptrace(PTRACE_GETREGS, tracee, 0, ®s); -#else - struct iovec iov; - iov.iov_base = ®s; - iov.iov_len = sizeof(regs); - ret = ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov); -#endif - EXPECT_EQ(0, ret) {} + ARCH_REGS orig, regs; -#if defined(__x86_64__) || defined(__i386__) || defined(__powerpc__) || \ - defined(__s390__) || defined(__hppa__) || defined(__riscv) || \ - defined(__xtensa__) || defined(__csky__) || defined(__sh__) - { - regs.SYSCALL_NUM = syscall; - } -#elif defined(__mips__) - { - if (regs.SYSCALL_NUM == __NR_O32_Linux) - regs.SYSCALL_SYSCALL_NUM = syscall; - else - regs.SYSCALL_NUM = syscall; - } + /* Do not get/set registers if we have nothing to do. */ + if (!syscall && !ret) + return; -#elif defined(__arm__) -# ifndef PTRACE_SET_SYSCALL -# define PTRACE_SET_SYSCALL 23 -# endif - { - ret = ptrace(PTRACE_SET_SYSCALL, tracee, NULL, syscall); - EXPECT_EQ(0, ret); + EXPECT_EQ(0, ARCH_GETREGS(regs)) { + return; } + orig = regs; -#elif defined(__aarch64__) -# ifndef NT_ARM_SYSTEM_CALL -# define NT_ARM_SYSTEM_CALL 0x404 -# endif - { - iov.iov_base = &syscall; - iov.iov_len = sizeof(syscall); - ret = ptrace(PTRACE_SETREGSET, tracee, NT_ARM_SYSTEM_CALL, - &iov); - EXPECT_EQ(0, ret); - } + if (syscall) + SYSCALL_NUM_SET(regs, *syscall); -#else - ASSERT_EQ(1, 0) { - TH_LOG("How is the syscall changed on this architecture?"); - } -#endif + if (ret) + SYSCALL_RET_SET(regs, *ret); - /* If syscall is skipped, change return value. */ - if (syscall == -1) -#ifdef SYSCALL_NUM_RET_SHARE_REG - TH_LOG("Can't modify syscall return on this architecture"); + /* Flush any register changes made. */ + if (memcmp(&orig, ®s, sizeof(orig)) != 0) + EXPECT_EQ(0, ARCH_SETREGS(regs)); +} -#elif defined(__xtensa__) - regs.SYSCALL_RET(regs) = result; -#else - regs.SYSCALL_RET = result; -#endif +/* Change only syscall number. */ +void change_syscall_nr(struct __test_metadata *_metadata, + pid_t tracee, long syscall) +{ + __change_syscall(_metadata, tracee, &syscall, NULL); +} -#ifdef HAVE_GETREGS - ret = ptrace(PTRACE_SETREGS, tracee, 0, ®s); -#else - iov.iov_base = ®s; - iov.iov_len = sizeof(regs); - ret = ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &iov); -#endif - EXPECT_EQ(0, ret); +/* Change syscall return value (and set syscall number to -1). */ +void change_syscall_ret(struct __test_metadata *_metadata, + pid_t tracee, long ret) +{ + long syscall = -1; + + __change_syscall(_metadata, tracee, &syscall, &ret); } void tracer_seccomp(struct __test_metadata *_metadata, pid_t tracee, @@ -1872,17 +1957,17 @@ void tracer_seccomp(struct __test_metadata *_metadata, pid_t tracee, case 0x1002: /* change getpid to getppid. */ EXPECT_EQ(__NR_getpid, get_syscall(_metadata, tracee)); - change_syscall(_metadata, tracee, __NR_getppid, 0); + change_syscall_nr(_metadata, tracee, __NR_getppid); break; case 0x1003: /* skip gettid with valid return code. */ EXPECT_EQ(__NR_gettid, get_syscall(_metadata, tracee)); - change_syscall(_metadata, tracee, -1, 45000); + change_syscall_ret(_metadata, tracee, 45000); break; case 0x1004: /* skip openat with error. */ EXPECT_EQ(__NR_openat, get_syscall(_metadata, tracee)); - change_syscall(_metadata, tracee, -1, -ESRCH); + change_syscall_ret(_metadata, tracee, -ESRCH); break; case 0x1005: /* do nothing (allow getppid) */ @@ -1897,12 +1982,21 @@ void tracer_seccomp(struct __test_metadata *_metadata, pid_t tracee, } +FIXTURE(TRACE_syscall) { + struct sock_fprog prog; + pid_t tracer, mytid, mypid, parent; + long syscall_nr; +}; + void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee, int status, void *args) { - int ret, nr; + int ret; unsigned long msg; static bool entry; + long syscall_nr_val, syscall_ret_val; + long *syscall_nr = NULL, *syscall_ret = NULL; + FIXTURE_DATA(TRACE_syscall) *self = args; /* * The traditional way to tell PTRACE_SYSCALL entry/exit @@ -1916,24 +2010,48 @@ void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee, EXPECT_EQ(entry ? PTRACE_EVENTMSG_SYSCALL_ENTRY : PTRACE_EVENTMSG_SYSCALL_EXIT, msg); - if (!entry) - return; + /* + * Some architectures only support setting return values during + * syscall exit under ptrace, and on exit the syscall number may + * no longer be available. Therefore, save the initial sycall + * number here, so it can be examined during both entry and exit + * phases. + */ + if (entry) + self->syscall_nr = get_syscall(_metadata, tracee); - nr = get_syscall(_metadata, tracee); + /* + * Depending on the architecture's syscall setting abilities, we + * pick which things to set during this phase (entry or exit). + */ + if (entry == ptrace_entry_set_syscall_nr) + syscall_nr = &syscall_nr_val; + if (entry == ptrace_entry_set_syscall_ret) + syscall_ret = &syscall_ret_val; + + /* Now handle the actual rewriting cases. */ + switch (self->syscall_nr) { + case __NR_getpid: + syscall_nr_val = __NR_getppid; + /* Never change syscall return for this case. */ + syscall_ret = NULL; + break; + case __NR_gettid: + syscall_nr_val = -1; + syscall_ret_val = 45000; + break; + case __NR_openat: + syscall_nr_val = -1; + syscall_ret_val = -ESRCH; + break; + default: + /* Unhandled, do nothing. */ + return; + } - if (nr == __NR_getpid) - change_syscall(_metadata, tracee, __NR_getppid, 0); - if (nr == __NR_gettid) - change_syscall(_metadata, tracee, -1, 45000); - if (nr == __NR_openat) - change_syscall(_metadata, tracee, -1, -ESRCH); + __change_syscall(_metadata, tracee, syscall_nr, syscall_ret); } -FIXTURE(TRACE_syscall) { - struct sock_fprog prog; - pid_t tracer, mytid, mypid, parent; -}; - FIXTURE_VARIANT(TRACE_syscall) { /* * All of the SECCOMP_RET_TRACE behaviors can be tested with either @@ -1992,7 +2110,7 @@ FIXTURE_SETUP(TRACE_syscall) self->tracer = setup_trace_fixture(_metadata, variant->use_ptrace ? tracer_ptrace : tracer_seccomp, - NULL, variant->use_ptrace); + self, variant->use_ptrace); ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); ASSERT_EQ(0, ret); @@ -3142,11 +3260,11 @@ skip: static int user_notif_syscall(int nr, unsigned int flags) { struct sock_filter filter[] = { - BPF_STMT(BPF_LD+BPF_W+BPF_ABS, + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, offsetof(struct seccomp_data, nr)), - BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, nr, 0, 1), - BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_USER_NOTIF), - BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, nr, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_USER_NOTIF), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), }; struct sock_fprog prog = { @@ -3699,7 +3817,7 @@ TEST(user_notification_filter_empty) long ret; int status; struct pollfd pollfd; - struct clone_args args = { + struct __clone_args args = { .flags = CLONE_FILES, .exit_signal = SIGCHLD, }; @@ -3715,7 +3833,7 @@ TEST(user_notification_filter_empty) if (pid == 0) { int listener; - listener = user_notif_syscall(__NR_mknod, SECCOMP_FILTER_FLAG_NEW_LISTENER); + listener = user_notif_syscall(__NR_mknodat, SECCOMP_FILTER_FLAG_NEW_LISTENER); if (listener < 0) _exit(EXIT_FAILURE); @@ -3753,7 +3871,7 @@ TEST(user_notification_filter_empty_threaded) long ret; int status; struct pollfd pollfd; - struct clone_args args = { + struct __clone_args args = { .flags = CLONE_FILES, .exit_signal = SIGCHLD, }; diff --git a/tools/testing/selftests/timers/Makefile b/tools/testing/selftests/timers/Makefile index 7656c7ce79d9..0e73a16874c4 100644 --- a/tools/testing/selftests/timers/Makefile +++ b/tools/testing/selftests/timers/Makefile @@ -13,6 +13,7 @@ DESTRUCTIVE_TESTS = alarmtimer-suspend valid-adjtimex adjtick change_skew \ TEST_GEN_PROGS_EXTENDED = $(DESTRUCTIVE_TESTS) +TEST_FILES := settings include ../lib.mk diff --git a/tools/testing/selftests/timers/settings b/tools/testing/selftests/timers/settings new file mode 100644 index 000000000000..e7b9417537fb --- /dev/null +++ b/tools/testing/selftests/timers/settings @@ -0,0 +1 @@ +timeout=0 diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index a9026706d597..30873b19d04b 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -3,6 +3,23 @@ uname_M := $(shell uname -m 2>/dev/null || echo not) MACHINE ?= $(shell echo $(uname_M) | sed -e 's/aarch64.*/arm64/') +# Without this, failed build products remain, with up-to-date timestamps, +# thus tricking Make (and you!) into believing that All Is Well, in subsequent +# make invocations: +.DELETE_ON_ERROR: + +# Avoid accidental wrong builds, due to built-in rules working just a little +# bit too well--but not quite as well as required for our situation here. +# +# In other words, "make userfaultfd" is supposed to fail to build at all, +# because this Makefile only supports either "make" (all), or "make /full/path". +# However, the built-in rules, if not suppressed, will pick up CFLAGS and the +# initial LDLIBS (but not the target-specific LDLIBS, because those are only +# set for the full path target!). This causes it to get pretty far into building +# things despite using incorrect values such as an *occasionally* incomplete +# LDLIBS. +MAKEFLAGS += --no-builtin-rules + CFLAGS = -Wall -I ../../../../usr/include $(EXTRA_CFLAGS) LDLIBS = -lrt TEST_GEN_FILES = compaction_test diff --git a/tools/testing/selftests/vm/compaction_test.c b/tools/testing/selftests/vm/compaction_test.c index bcec71250873..9b420140ba2b 100644 --- a/tools/testing/selftests/vm/compaction_test.c +++ b/tools/testing/selftests/vm/compaction_test.c @@ -18,7 +18,8 @@ #include "../kselftest.h" -#define MAP_SIZE 1048576 +#define MAP_SIZE_MB 100 +#define MAP_SIZE (MAP_SIZE_MB * 1024 * 1024) struct map_list { void *map; @@ -165,7 +166,7 @@ int main(int argc, char **argv) void *map = NULL; unsigned long mem_free = 0; unsigned long hugepage_size = 0; - unsigned long mem_fragmentable = 0; + long mem_fragmentable_MB = 0; if (prereq() != 0) { printf("Either the sysctl compact_unevictable_allowed is not\n" @@ -190,9 +191,9 @@ int main(int argc, char **argv) return -1; } - mem_fragmentable = mem_free * 0.8 / 1024; + mem_fragmentable_MB = mem_free * 0.8 / 1024; - while (mem_fragmentable > 0) { + while (mem_fragmentable_MB > 0) { map = mmap(NULL, MAP_SIZE, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE | MAP_LOCKED, -1, 0); if (map == MAP_FAILED) @@ -213,7 +214,7 @@ int main(int argc, char **argv) for (i = 0; i < MAP_SIZE; i += page_size) *(unsigned long *)(map + i) = (unsigned long)map + i; - mem_fragmentable--; + mem_fragmentable_MB -= MAP_SIZE_MB; } for (entry = list; entry != NULL; entry = entry->next) { diff --git a/tools/testing/selftests/vm/config b/tools/testing/selftests/vm/config index 3ba674b64fa9..69dd0d1aa30b 100644 --- a/tools/testing/selftests/vm/config +++ b/tools/testing/selftests/vm/config @@ -3,3 +3,4 @@ CONFIG_USERFAULTFD=y CONFIG_TEST_VMALLOC=m CONFIG_DEVICE_PRIVATE=y CONFIG_TEST_HMM=m +CONFIG_GUP_BENCHMARK=y diff --git a/tools/testing/selftests/vm/gup_benchmark.c b/tools/testing/selftests/vm/gup_benchmark.c index 43b4dfe161a2..1d4359341e44 100644 --- a/tools/testing/selftests/vm/gup_benchmark.c +++ b/tools/testing/selftests/vm/gup_benchmark.c @@ -15,12 +15,12 @@ #define PAGE_SIZE sysconf(_SC_PAGESIZE) #define GUP_FAST_BENCHMARK _IOWR('g', 1, struct gup_benchmark) -#define GUP_LONGTERM_BENCHMARK _IOWR('g', 2, struct gup_benchmark) -#define GUP_BENCHMARK _IOWR('g', 3, struct gup_benchmark) +#define GUP_BENCHMARK _IOWR('g', 2, struct gup_benchmark) /* Similar to above, but use FOLL_PIN instead of FOLL_GET. */ -#define PIN_FAST_BENCHMARK _IOWR('g', 4, struct gup_benchmark) -#define PIN_BENCHMARK _IOWR('g', 5, struct gup_benchmark) +#define PIN_FAST_BENCHMARK _IOWR('g', 3, struct gup_benchmark) +#define PIN_BENCHMARK _IOWR('g', 4, struct gup_benchmark) +#define PIN_LONGTERM_BENCHMARK _IOWR('g', 5, struct gup_benchmark) /* Just the flags we need, copied from mm.h: */ #define FOLL_WRITE 0x01 /* check pte is writable */ @@ -52,6 +52,9 @@ int main(int argc, char **argv) case 'b': cmd = PIN_BENCHMARK; break; + case 'L': + cmd = PIN_LONGTERM_BENCHMARK; + break; case 'm': size = atoi(optarg) * MB; break; @@ -67,9 +70,6 @@ int main(int argc, char **argv) case 'T': thp = 0; break; - case 'L': - cmd = GUP_LONGTERM_BENCHMARK; - break; case 'U': cmd = GUP_BENCHMARK; break; @@ -105,12 +105,16 @@ int main(int argc, char **argv) gup.flags |= FOLL_WRITE; fd = open("/sys/kernel/debug/gup_benchmark", O_RDWR); - if (fd == -1) - perror("open"), exit(1); + if (fd == -1) { + perror("open"); + exit(1); + } p = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, filed, 0); - if (p == MAP_FAILED) - perror("mmap"), exit(1); + if (p == MAP_FAILED) { + perror("mmap"); + exit(1); + } gup.addr = (unsigned long)p; if (thp == 1) @@ -123,8 +127,10 @@ int main(int argc, char **argv) for (i = 0; i < repeats; i++) { gup.size = size; - if (ioctl(fd, cmd, &gup)) - perror("ioctl"), exit(1); + if (ioctl(fd, cmd, &gup)) { + perror("ioctl"); + exit(1); + } printf("Time: get:%lld put:%lld us", gup.get_delta_usec, gup.put_delta_usec); diff --git a/tools/testing/selftests/vm/hmm-tests.c b/tools/testing/selftests/vm/hmm-tests.c index 93fc5cadce61..c9404ef9698e 100644 --- a/tools/testing/selftests/vm/hmm-tests.c +++ b/tools/testing/selftests/vm/hmm-tests.c @@ -45,7 +45,7 @@ struct hmm_buffer { #define TWOMEG (1 << 21) #define HMM_BUFFER_SIZE (1024 << 12) #define HMM_PATH_MAX 64 -#define NTIMES 256 +#define NTIMES 10 #define ALIGN(x, a) (((x) + (a - 1)) & (~((a) - 1))) @@ -680,7 +680,7 @@ TEST_F(hmm, anon_write_hugetlbfs) n = gethugepagesizes(pagesizes, 4); if (n <= 0) - return; + SKIP(return, "Huge page size could not be determined"); for (idx = 0; --n > 0; ) { if (pagesizes[n] < pagesizes[idx]) idx = n; @@ -694,7 +694,7 @@ TEST_F(hmm, anon_write_hugetlbfs) buffer->ptr = get_hugepage_region(size, GHR_STRICT); if (buffer->ptr == NULL) { free(buffer); - return; + SKIP(return, "Huge page could not be allocated"); } buffer->fd = -1; diff --git a/tools/testing/selftests/vm/map_hugetlb.c b/tools/testing/selftests/vm/map_hugetlb.c index 6af951900aa3..312889edb84a 100644 --- a/tools/testing/selftests/vm/map_hugetlb.c +++ b/tools/testing/selftests/vm/map_hugetlb.c @@ -83,7 +83,7 @@ int main(int argc, char **argv) } if (shift) - printf("%u kB hugepages\n", 1 << shift); + printf("%u kB hugepages\n", 1 << (shift - 10)); else printf("Default size hugepages\n"); printf("Mapping %lu Mbytes\n", (unsigned long)length >> 20); diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c index 61e5cfeb1350..9b0912a01777 100644 --- a/tools/testing/selftests/vm/userfaultfd.c +++ b/tools/testing/selftests/vm/userfaultfd.c @@ -227,8 +227,10 @@ static void hugetlb_allocate_area(void **alloc_area) huge_fd, *alloc_area == area_src ? 0 : nr_pages * page_size); if (area_alias == MAP_FAILED) { - if (munmap(*alloc_area, nr_pages * page_size) < 0) - perror("hugetlb munmap"), exit(1); + if (munmap(*alloc_area, nr_pages * page_size) < 0) { + perror("hugetlb munmap"); + exit(1); + } *alloc_area = NULL; return; } @@ -337,9 +339,10 @@ static void wp_range(int ufd, __u64 start, __u64 len, bool wp) /* Undo write-protect, do wakeup after that */ prms.mode = wp ? UFFDIO_WRITEPROTECT_MODE_WP : 0; - if (ioctl(ufd, UFFDIO_WRITEPROTECT, &prms)) - fprintf(stderr, "clear WP failed for address 0x%Lx\n", - start), exit(1); + if (ioctl(ufd, UFFDIO_WRITEPROTECT, &prms)) { + fprintf(stderr, "clear WP failed for address 0x%Lx\n", start); + exit(1); + } } static void *locking_thread(void *arg) @@ -359,8 +362,10 @@ static void *locking_thread(void *arg) seed += cpu; bzero(&rand, sizeof(rand)); bzero(&randstate, sizeof(randstate)); - if (initstate_r(seed, randstate, sizeof(randstate), &rand)) - fprintf(stderr, "srandom_r error\n"), exit(1); + if (initstate_r(seed, randstate, sizeof(randstate), &rand)) { + fprintf(stderr, "srandom_r error\n"); + exit(1); + } } else { page_nr = -bounces; if (!(bounces & BOUNCE_RACINGFAULTS)) @@ -369,12 +374,16 @@ static void *locking_thread(void *arg) while (!finished) { if (bounces & BOUNCE_RANDOM) { - if (random_r(&rand, &rand_nr)) - fprintf(stderr, "random_r 1 error\n"), exit(1); + if (random_r(&rand, &rand_nr)) { + fprintf(stderr, "random_r 1 error\n"); + exit(1); + } page_nr = rand_nr; if (sizeof(page_nr) > sizeof(rand_nr)) { - if (random_r(&rand, &rand_nr)) - fprintf(stderr, "random_r 2 error\n"), exit(1); + if (random_r(&rand, &rand_nr)) { + fprintf(stderr, "random_r 2 error\n"); + exit(1); + } page_nr |= (((unsigned long) rand_nr) << 16) << 16; } @@ -385,11 +394,13 @@ static void *locking_thread(void *arg) start = time(NULL); if (bounces & BOUNCE_VERIFY) { count = *area_count(area_dst, page_nr); - if (!count) + if (!count) { fprintf(stderr, "page_nr %lu wrong count %Lu %Lu\n", page_nr, count, - count_verify[page_nr]), exit(1); + count_verify[page_nr]); + exit(1); + } /* @@ -401,11 +412,12 @@ static void *locking_thread(void *arg) */ #if 1 if (!my_bcmp(area_dst + page_nr * page_size, zeropage, - page_size)) + page_size)) { fprintf(stderr, "my_bcmp page_nr %lu wrong count %Lu %Lu\n", - page_nr, count, - count_verify[page_nr]), exit(1); + page_nr, count, count_verify[page_nr]); + exit(1); + } #else unsigned long loops; @@ -437,7 +449,7 @@ static void *locking_thread(void *arg) fprintf(stderr, "page_nr %lu memory corruption %Lu %Lu\n", page_nr, count, - count_verify[page_nr]), exit(1); + count_verify[page_nr]); exit(1); } count++; *area_count(area_dst, page_nr) = count_verify[page_nr] = count; @@ -461,12 +473,14 @@ static void retry_copy_page(int ufd, struct uffdio_copy *uffdio_copy, offset); if (ioctl(ufd, UFFDIO_COPY, uffdio_copy)) { /* real retval in ufdio_copy.copy */ - if (uffdio_copy->copy != -EEXIST) + if (uffdio_copy->copy != -EEXIST) { fprintf(stderr, "UFFDIO_COPY retry error %Ld\n", - uffdio_copy->copy), exit(1); + uffdio_copy->copy); + exit(1); + } } else { fprintf(stderr, "UFFDIO_COPY retry unexpected %Ld\n", - uffdio_copy->copy), exit(1); + uffdio_copy->copy); exit(1); } } @@ -474,9 +488,10 @@ static int __copy_page(int ufd, unsigned long offset, bool retry) { struct uffdio_copy uffdio_copy; - if (offset >= nr_pages * page_size) - fprintf(stderr, "unexpected offset %lu\n", - offset), exit(1); + if (offset >= nr_pages * page_size) { + fprintf(stderr, "unexpected offset %lu\n", offset); + exit(1); + } uffdio_copy.dst = (unsigned long) area_dst + offset; uffdio_copy.src = (unsigned long) area_src + offset; uffdio_copy.len = page_size; @@ -487,12 +502,14 @@ static int __copy_page(int ufd, unsigned long offset, bool retry) uffdio_copy.copy = 0; if (ioctl(ufd, UFFDIO_COPY, &uffdio_copy)) { /* real retval in ufdio_copy.copy */ - if (uffdio_copy.copy != -EEXIST) + if (uffdio_copy.copy != -EEXIST) { fprintf(stderr, "UFFDIO_COPY error %Ld\n", - uffdio_copy.copy), exit(1); + uffdio_copy.copy); + exit(1); + } } else if (uffdio_copy.copy != page_size) { fprintf(stderr, "UFFDIO_COPY unexpected copy %Ld\n", - uffdio_copy.copy), exit(1); + uffdio_copy.copy); exit(1); } else { if (test_uffdio_copy_eexist && retry) { test_uffdio_copy_eexist = false; @@ -521,11 +538,11 @@ static int uffd_read_msg(int ufd, struct uffd_msg *msg) if (ret < 0) { if (errno == EAGAIN) return 1; - else - perror("blocking read error"), exit(1); + perror("blocking read error"); } else { - fprintf(stderr, "short read\n"), exit(1); + fprintf(stderr, "short read\n"); } + exit(1); } return 0; @@ -536,9 +553,10 @@ static void uffd_handle_page_fault(struct uffd_msg *msg, { unsigned long offset; - if (msg->event != UFFD_EVENT_PAGEFAULT) - fprintf(stderr, "unexpected msg event %u\n", - msg->event), exit(1); + if (msg->event != UFFD_EVENT_PAGEFAULT) { + fprintf(stderr, "unexpected msg event %u\n", msg->event); + exit(1); + } if (msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WP) { wp_range(uffd, msg->arg.pagefault.address, page_size, false); @@ -546,8 +564,10 @@ static void uffd_handle_page_fault(struct uffd_msg *msg, } else { /* Missing page faults */ if (bounces & BOUNCE_VERIFY && - msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE) - fprintf(stderr, "unexpected write fault\n"), exit(1); + msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE) { + fprintf(stderr, "unexpected write fault\n"); + exit(1); + } offset = (char *)(unsigned long)msg->arg.pagefault.address - area_dst; offset &= ~(page_size-1); @@ -574,25 +594,32 @@ static void *uffd_poll_thread(void *arg) for (;;) { ret = poll(pollfd, 2, -1); - if (!ret) - fprintf(stderr, "poll error %d\n", ret), exit(1); - if (ret < 0) - perror("poll"), exit(1); + if (!ret) { + fprintf(stderr, "poll error %d\n", ret); + exit(1); + } + if (ret < 0) { + perror("poll"); + exit(1); + } if (pollfd[1].revents & POLLIN) { - if (read(pollfd[1].fd, &tmp_chr, 1) != 1) - fprintf(stderr, "read pipefd error\n"), - exit(1); + if (read(pollfd[1].fd, &tmp_chr, 1) != 1) { + fprintf(stderr, "read pipefd error\n"); + exit(1); + } break; } - if (!(pollfd[0].revents & POLLIN)) + if (!(pollfd[0].revents & POLLIN)) { fprintf(stderr, "pollfd[0].revents %d\n", - pollfd[0].revents), exit(1); + pollfd[0].revents); + exit(1); + } if (uffd_read_msg(uffd, &msg)) continue; switch (msg.event) { default: fprintf(stderr, "unexpected msg event %u\n", - msg.event), exit(1); + msg.event); exit(1); break; case UFFD_EVENT_PAGEFAULT: uffd_handle_page_fault(&msg, stats); @@ -606,8 +633,10 @@ static void *uffd_poll_thread(void *arg) uffd_reg.range.start = msg.arg.remove.start; uffd_reg.range.len = msg.arg.remove.end - msg.arg.remove.start; - if (ioctl(uffd, UFFDIO_UNREGISTER, &uffd_reg.range)) - fprintf(stderr, "remove failure\n"), exit(1); + if (ioctl(uffd, UFFDIO_UNREGISTER, &uffd_reg.range)) { + fprintf(stderr, "remove failure\n"); + exit(1); + } break; case UFFD_EVENT_REMAP: area_dst = (char *)(unsigned long)msg.arg.remap.to; @@ -879,8 +908,10 @@ static int faulting_process(int signal_test) area_dst = mremap(area_dst, nr_pages * page_size, nr_pages * page_size, MREMAP_MAYMOVE | MREMAP_FIXED, area_src); - if (area_dst == MAP_FAILED) - perror("mremap"), exit(1); + if (area_dst == MAP_FAILED) { + perror("mremap"); + exit(1); + } for (; nr < nr_pages; nr++) { count = *area_count(area_dst, nr); @@ -888,7 +919,7 @@ static int faulting_process(int signal_test) fprintf(stderr, "nr %lu memory corruption %Lu %Lu\n", nr, count, - count_verify[nr]), exit(1); + count_verify[nr]); exit(1); } /* * Trigger write protection if there is by writting @@ -901,8 +932,10 @@ static int faulting_process(int signal_test) return 1; for (nr = 0; nr < nr_pages; nr++) { - if (my_bcmp(area_dst + nr * page_size, zeropage, page_size)) - fprintf(stderr, "nr %lu is not zero\n", nr), exit(1); + if (my_bcmp(area_dst + nr * page_size, zeropage, page_size)) { + fprintf(stderr, "nr %lu is not zero\n", nr); + exit(1); + } } return 0; @@ -916,12 +949,14 @@ static void retry_uffdio_zeropage(int ufd, uffdio_zeropage->range.len, offset); if (ioctl(ufd, UFFDIO_ZEROPAGE, uffdio_zeropage)) { - if (uffdio_zeropage->zeropage != -EEXIST) + if (uffdio_zeropage->zeropage != -EEXIST) { fprintf(stderr, "UFFDIO_ZEROPAGE retry error %Ld\n", - uffdio_zeropage->zeropage), exit(1); + uffdio_zeropage->zeropage); + exit(1); + } } else { fprintf(stderr, "UFFDIO_ZEROPAGE retry unexpected %Ld\n", - uffdio_zeropage->zeropage), exit(1); + uffdio_zeropage->zeropage); exit(1); } } @@ -933,9 +968,10 @@ static int __uffdio_zeropage(int ufd, unsigned long offset, bool retry) has_zeropage = uffd_test_ops->expected_ioctls & (1 << _UFFDIO_ZEROPAGE); - if (offset >= nr_pages * page_size) - fprintf(stderr, "unexpected offset %lu\n", - offset), exit(1); + if (offset >= nr_pages * page_size) { + fprintf(stderr, "unexpected offset %lu\n", offset); + exit(1); + } uffdio_zeropage.range.start = (unsigned long) area_dst + offset; uffdio_zeropage.range.len = page_size; uffdio_zeropage.mode = 0; @@ -943,22 +979,26 @@ static int __uffdio_zeropage(int ufd, unsigned long offset, bool retry) if (ret) { /* real retval in ufdio_zeropage.zeropage */ if (has_zeropage) { - if (uffdio_zeropage.zeropage == -EEXIST) - fprintf(stderr, "UFFDIO_ZEROPAGE -EEXIST\n"), - exit(1); - else + if (uffdio_zeropage.zeropage == -EEXIST) { + fprintf(stderr, "UFFDIO_ZEROPAGE -EEXIST\n"); + exit(1); + } else { fprintf(stderr, "UFFDIO_ZEROPAGE error %Ld\n", - uffdio_zeropage.zeropage), exit(1); + uffdio_zeropage.zeropage); + exit(1); + } } else { - if (uffdio_zeropage.zeropage != -EINVAL) + if (uffdio_zeropage.zeropage != -EINVAL) { fprintf(stderr, "UFFDIO_ZEROPAGE not -EINVAL %Ld\n", - uffdio_zeropage.zeropage), exit(1); + uffdio_zeropage.zeropage); + exit(1); + } } } else if (has_zeropage) { if (uffdio_zeropage.zeropage != page_size) { fprintf(stderr, "UFFDIO_ZEROPAGE unexpected %Ld\n", - uffdio_zeropage.zeropage), exit(1); + uffdio_zeropage.zeropage); exit(1); } else { if (test_uffdio_zeropage_eexist && retry) { test_uffdio_zeropage_eexist = false; @@ -970,7 +1010,7 @@ static int __uffdio_zeropage(int ufd, unsigned long offset, bool retry) } else { fprintf(stderr, "UFFDIO_ZEROPAGE succeeded %Ld\n", - uffdio_zeropage.zeropage), exit(1); + uffdio_zeropage.zeropage); exit(1); } return 0; @@ -1000,19 +1040,24 @@ static int userfaultfd_zeropage_test(void) uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING; if (test_uffdio_wp) uffdio_register.mode |= UFFDIO_REGISTER_MODE_WP; - if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) - fprintf(stderr, "register failure\n"), exit(1); + if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) { + fprintf(stderr, "register failure\n"); + exit(1); + } expected_ioctls = uffd_test_ops->expected_ioctls; if ((uffdio_register.ioctls & expected_ioctls) != - expected_ioctls) + expected_ioctls) { fprintf(stderr, - "unexpected missing ioctl for anon memory\n"), - exit(1); + "unexpected missing ioctl for anon memory\n"); + exit(1); + } if (uffdio_zeropage(uffd, 0)) { - if (my_bcmp(area_dst, zeropage, page_size)) - fprintf(stderr, "zeropage is not zero\n"), exit(1); + if (my_bcmp(area_dst, zeropage, page_size)) { + fprintf(stderr, "zeropage is not zero\n"); + exit(1); + } } close(uffd); @@ -1047,32 +1092,41 @@ static int userfaultfd_events_test(void) uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING; if (test_uffdio_wp) uffdio_register.mode |= UFFDIO_REGISTER_MODE_WP; - if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) - fprintf(stderr, "register failure\n"), exit(1); + if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) { + fprintf(stderr, "register failure\n"); + exit(1); + } expected_ioctls = uffd_test_ops->expected_ioctls; - if ((uffdio_register.ioctls & expected_ioctls) != - expected_ioctls) - fprintf(stderr, - "unexpected missing ioctl for anon memory\n"), - exit(1); + if ((uffdio_register.ioctls & expected_ioctls) != expected_ioctls) { + fprintf(stderr, "unexpected missing ioctl for anon memory\n"); + exit(1); + } - if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, &stats)) - perror("uffd_poll_thread create"), exit(1); + if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, &stats)) { + perror("uffd_poll_thread create"); + exit(1); + } pid = fork(); - if (pid < 0) - perror("fork"), exit(1); + if (pid < 0) { + perror("fork"); + exit(1); + } if (!pid) return faulting_process(0); waitpid(pid, &err, 0); - if (err) - fprintf(stderr, "faulting process failed\n"), exit(1); + if (err) { + fprintf(stderr, "faulting process failed\n"); + exit(1); + } - if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) - perror("pipe write"), exit(1); + if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) { + perror("pipe write"); + exit(1); + } if (pthread_join(uffd_mon, NULL)) return 1; @@ -1110,38 +1164,49 @@ static int userfaultfd_sig_test(void) uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING; if (test_uffdio_wp) uffdio_register.mode |= UFFDIO_REGISTER_MODE_WP; - if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) - fprintf(stderr, "register failure\n"), exit(1); + if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) { + fprintf(stderr, "register failure\n"); + exit(1); + } expected_ioctls = uffd_test_ops->expected_ioctls; - if ((uffdio_register.ioctls & expected_ioctls) != - expected_ioctls) - fprintf(stderr, - "unexpected missing ioctl for anon memory\n"), - exit(1); + if ((uffdio_register.ioctls & expected_ioctls) != expected_ioctls) { + fprintf(stderr, "unexpected missing ioctl for anon memory\n"); + exit(1); + } - if (faulting_process(1)) - fprintf(stderr, "faulting process failed\n"), exit(1); + if (faulting_process(1)) { + fprintf(stderr, "faulting process failed\n"); + exit(1); + } if (uffd_test_ops->release_pages(area_dst)) return 1; - if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, &stats)) - perror("uffd_poll_thread create"), exit(1); + if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, &stats)) { + perror("uffd_poll_thread create"); + exit(1); + } pid = fork(); - if (pid < 0) - perror("fork"), exit(1); + if (pid < 0) { + perror("fork"); + exit(1); + } if (!pid) exit(faulting_process(2)); waitpid(pid, &err, 0); - if (err) - fprintf(stderr, "faulting process failed\n"), exit(1); + if (err) { + fprintf(stderr, "faulting process failed\n"); + exit(1); + } - if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) - perror("pipe write"), exit(1); + if (write(pipefd[1], &c, sizeof(c)) != sizeof(c)) { + perror("pipe write"); + exit(1); + } if (pthread_join(uffd_mon, (void **)&userfaults)) return 1; @@ -1395,7 +1460,7 @@ static void set_test_type(const char *type) test_type = TEST_SHMEM; uffd_test_ops = &shmem_uffd_test_ops; } else { - fprintf(stderr, "Unknown test type: %s\n", type), exit(1); + fprintf(stderr, "Unknown test type: %s\n", type); exit(1); } if (test_type == TEST_HUGETLB) @@ -1403,12 +1468,15 @@ static void set_test_type(const char *type) else page_size = sysconf(_SC_PAGE_SIZE); - if (!page_size) - fprintf(stderr, "Unable to determine page size\n"), - exit(2); + if (!page_size) { + fprintf(stderr, "Unable to determine page size\n"); + exit(2); + } if ((unsigned long) area_count(NULL, 0) + sizeof(unsigned long long) * 2 - > page_size) - fprintf(stderr, "Impossible to run this test\n"), exit(2); + > page_size) { + fprintf(stderr, "Impossible to run this test\n"); + exit(2); + } } static void sigalrm(int sig) @@ -1425,8 +1493,10 @@ int main(int argc, char **argv) if (argc < 4) usage(); - if (signal(SIGALRM, sigalrm) == SIG_ERR) - fprintf(stderr, "failed to arm SIGALRM"), exit(1); + if (signal(SIGALRM, sigalrm) == SIG_ERR) { + fprintf(stderr, "failed to arm SIGALRM"); + exit(1); + } alarm(ALARM_INTERVAL_SECS); set_test_type(argv[1]); diff --git a/tools/testing/selftests/x86/fsgsbase.c b/tools/testing/selftests/x86/fsgsbase.c index 998319553523..7161cfc2e60b 100644 --- a/tools/testing/selftests/x86/fsgsbase.c +++ b/tools/testing/selftests/x86/fsgsbase.c @@ -443,6 +443,68 @@ static void test_unexpected_base(void) #define USER_REGS_OFFSET(r) offsetof(struct user_regs_struct, r) +static void test_ptrace_write_gs_read_base(void) +{ + int status; + pid_t child = fork(); + + if (child < 0) + err(1, "fork"); + + if (child == 0) { + printf("[RUN]\tPTRACE_POKE GS, read GSBASE back\n"); + + printf("[RUN]\tARCH_SET_GS to 1\n"); + if (syscall(SYS_arch_prctl, ARCH_SET_GS, 1) != 0) + err(1, "ARCH_SET_GS"); + + if (ptrace(PTRACE_TRACEME, 0, NULL, NULL) != 0) + err(1, "PTRACE_TRACEME"); + + raise(SIGTRAP); + _exit(0); + } + + wait(&status); + + if (WSTOPSIG(status) == SIGTRAP) { + unsigned long base; + unsigned long gs_offset = USER_REGS_OFFSET(gs); + unsigned long base_offset = USER_REGS_OFFSET(gs_base); + + /* Read the initial base. It should be 1. */ + base = ptrace(PTRACE_PEEKUSER, child, base_offset, NULL); + if (base == 1) { + printf("[OK]\tGSBASE started at 1\n"); + } else { + nerrs++; + printf("[FAIL]\tGSBASE started at 0x%lx\n", base); + } + + printf("[RUN]\tSet GS = 0x7, read GSBASE\n"); + + /* Poke an LDT selector into GS. */ + if (ptrace(PTRACE_POKEUSER, child, gs_offset, 0x7) != 0) + err(1, "PTRACE_POKEUSER"); + + /* And read the base. */ + base = ptrace(PTRACE_PEEKUSER, child, base_offset, NULL); + + if (base == 0 || base == 1) { + printf("[OK]\tGSBASE reads as 0x%lx with invalid GS\n", base); + } else { + nerrs++; + printf("[FAIL]\tGSBASE=0x%lx (should be 0 or 1)\n", base); + } + } + + ptrace(PTRACE_CONT, child, NULL, NULL); + + wait(&status); + if (!WIFEXITED(status)) + printf("[WARN]\tChild didn't exit cleanly.\n"); +} + static void test_ptrace_write_gsbase(void) { int status; @@ -517,6 +579,9 @@ static void test_ptrace_write_gsbase(void) END: ptrace(PTRACE_CONT, child, NULL, NULL); + wait(&status); + if (!WIFEXITED(status)) + printf("[WARN]\tChild didn't exit cleanly.\n"); } int main() @@ -526,6 +591,9 @@ int main() shared_scratch = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_SHARED, -1, 0); + /* Do these tests before we have an LDT. */ + test_ptrace_write_gs_read_base(); + /* Probe FSGSBASE */ sethandler(SIGILL, sigill, 0); if (sigsetjmp(jmpbuf, 1) == 0) { diff --git a/tools/testing/selftests/x86/test_vsyscall.c b/tools/testing/selftests/x86/test_vsyscall.c index c41f24b517f4..65c141ebfbbd 100644 --- a/tools/testing/selftests/x86/test_vsyscall.c +++ b/tools/testing/selftests/x86/test_vsyscall.c @@ -462,6 +462,17 @@ static int test_vsys_x(void) return 0; } +/* + * Debuggers expect ptrace() to be able to peek at the vsyscall page. + * Use process_vm_readv() as a proxy for ptrace() to test this. We + * want it to work in the vsyscall=emulate case and to fail in the + * vsyscall=xonly case. + * + * It's worth noting that this ABI is a bit nutty. write(2) can't + * read from the vsyscall page on any kernel version or mode. The + * fact that ptrace() ever worked was a nice courtesy of old kernels, + * but the code to support it is fairly gross. + */ static int test_process_vm_readv(void) { #ifdef __x86_64__ @@ -477,8 +488,12 @@ static int test_process_vm_readv(void) remote.iov_len = 4096; ret = process_vm_readv(getpid(), &local, 1, &remote, 1, 0); if (ret != 4096) { - printf("[OK]\tprocess_vm_readv() failed (ret = %d, errno = %d)\n", ret, errno); - return 0; + /* + * We expect process_vm_readv() to work if and only if the + * vsyscall page is readable. + */ + printf("[%s]\tprocess_vm_readv() failed (ret = %d, errno = %d)\n", vsyscall_map_r ? "FAIL" : "OK", ret, errno); + return vsyscall_map_r ? 1 : 0; } if (vsyscall_map_r) { @@ -488,6 +503,9 @@ static int test_process_vm_readv(void) printf("[FAIL]\tIt worked but returned incorrect data\n"); return 1; } + } else { + printf("[FAIL]\tprocess_rm_readv() succeeded, but it should have failed in this configuration\n"); + return 1; } #endif |