From 8b1bc88c3c67bf95de248c35ed2855e0f4a4db90 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 10 Jul 2020 09:24:36 -0700 Subject: selftests/seccomp: Rename XFAIL to SKIP The kselftests will be renaming XFAIL to SKIP in the test harness, and to avoid painful conflicts, rename XFAIL to SKIP now in a future-proofed way. Signed-off-by: Kees Cook --- tools/testing/selftests/seccomp/seccomp_bpf.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index c0aa46ce14f6..d4c8858fde8e 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -52,6 +52,11 @@ #include "../kselftest_harness.h" +/* Attempt to de-conflict with the selftests tree. */ +#ifndef SKIP +#define SKIP(s, ...) XFAIL(s, ##__VA_ARGS__) +#endif + #ifndef PR_SET_PTRACER # define PR_SET_PTRACER 0x59616d61 #endif @@ -3068,7 +3073,7 @@ TEST(get_metadata) /* Only real root can get metadata. */ if (geteuid()) { - XFAIL(return, "get_metadata requires real root"); + SKIP(return, "get_metadata requires real root"); return; } @@ -3111,7 +3116,7 @@ TEST(get_metadata) ret = ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md); EXPECT_EQ(sizeof(md), ret) { if (errno == EINVAL) - XFAIL(goto skip, "Kernel does not support PTRACE_SECCOMP_GET_METADATA (missing CONFIG_CHECKPOINT_RESTORE?)"); + SKIP(goto skip, "Kernel does not support PTRACE_SECCOMP_GET_METADATA (missing CONFIG_CHECKPOINT_RESTORE?)"); } EXPECT_EQ(md.flags, SECCOMP_FILTER_FLAG_LOG); @@ -3672,7 +3677,7 @@ TEST(user_notification_continue) resp.val = 0; EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0) { if (errno == EINVAL) - XFAIL(goto skip, "Kernel does not support SECCOMP_USER_NOTIF_FLAG_CONTINUE"); + SKIP(goto skip, "Kernel does not support SECCOMP_USER_NOTIF_FLAG_CONTINUE"); } skip: @@ -3680,7 +3685,7 @@ skip: EXPECT_EQ(true, WIFEXITED(status)); EXPECT_EQ(0, WEXITSTATUS(status)) { if (WEXITSTATUS(status) == 2) { - XFAIL(return, "Kernel does not support kcmp() syscall"); + SKIP(return, "Kernel does not support kcmp() syscall"); return; } } -- cgit v1.2.3 From d7d2e5bb9f361dc05c7dedd38d4d9269ad8c05e2 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 10 Jul 2020 10:26:11 -0700 Subject: selftests/seccomp: Add SKIPs for failed unshare() Running the seccomp tests as a regular user shouldn't just fail tests that require CAP_SYS_ADMIN (for getting a PID namespace). Instead, detect those cases and SKIP them. Additionally, gracefully SKIP missing CONFIG_USER_NS (and add to "config" since we'd prefer to actually test this case). Signed-off-by: Kees Cook --- tools/testing/selftests/seccomp/config | 1 + tools/testing/selftests/seccomp/seccomp_bpf.c | 10 ++++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/seccomp/config b/tools/testing/selftests/seccomp/config index db1e11b08c8a..64c19d8eba79 100644 --- a/tools/testing/selftests/seccomp/config +++ b/tools/testing/selftests/seccomp/config @@ -1,2 +1,3 @@ CONFIG_SECCOMP=y CONFIG_SECCOMP_FILTER=y +CONFIG_USER_NS=y diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index d4c8858fde8e..80223113f3e3 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -3444,7 +3444,10 @@ TEST(user_notification_child_pid_ns) struct seccomp_notif req = {}; struct seccomp_notif_resp resp = {}; - ASSERT_EQ(unshare(CLONE_NEWUSER | CLONE_NEWPID), 0); + ASSERT_EQ(unshare(CLONE_NEWUSER | CLONE_NEWPID), 0) { + if (errno == EINVAL) + SKIP(return, "kernel missing CLONE_NEWUSER support"); + }; listener = user_trap_syscall(__NR_getppid, SECCOMP_FILTER_FLAG_NEW_LISTENER); @@ -3509,7 +3512,10 @@ TEST(user_notification_sibling_pid_ns) } /* Create the sibling ns, and sibling in it. */ - ASSERT_EQ(unshare(CLONE_NEWPID), 0); + ASSERT_EQ(unshare(CLONE_NEWPID), 0) { + if (errno == EPERM) + SKIP(return, "CLONE_NEWPID requires CAP_SYS_ADMIN"); + } ASSERT_EQ(errno, 0); pid2 = fork(); -- cgit v1.2.3 From e4d05028a07f505a08802a6d1b11674c149df2b3 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 10 Jul 2020 10:29:41 -0700 Subject: selftests/seccomp: Set NNP for TSYNC ESRCH flag test The TSYNC ESRCH flag test will fail for regular users because NNP was not set yet. Add NNP setting. Fixes: 51891498f2da ("seccomp: allow TSYNC and USER_NOTIF together") Cc: stable@vger.kernel.org Reviewed-by: Tycho Andersen Signed-off-by: Kees Cook --- tools/testing/selftests/seccomp/seccomp_bpf.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 80223113f3e3..c8f93edc52cc 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -3262,6 +3262,11 @@ TEST(user_notification_with_tsync) int ret; unsigned int flags; + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + /* these were exclusive */ flags = SECCOMP_FILTER_FLAG_NEW_LISTENER | SECCOMP_FILTER_FLAG_TSYNC; -- cgit v1.2.3 From ad5682184a811d62e56f07d25d75eeee9dffe3d9 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Sun, 31 May 2020 13:50:31 +0200 Subject: selftests/seccomp: Check for EPOLLHUP for user_notif This verifies we're correctly notified when a seccomp filter becomes unused when a notifier is in use. Signed-off-by: Christian Brauner Link: https://lore.kernel.org/r/20200531115031.391515-4-christian.brauner@ubuntu.com Signed-off-by: Kees Cook --- tools/testing/selftests/seccomp/seccomp_bpf.c | 136 ++++++++++++++++++++++++++ 1 file changed, 136 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index c8f93edc52cc..fc899cfbca99 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -51,6 +51,7 @@ #include #include "../kselftest_harness.h" +#include "../clone3/clone3_selftests.h" /* Attempt to de-conflict with the selftests tree. */ #ifndef SKIP @@ -3702,6 +3703,141 @@ skip: } } +TEST(user_notification_filter_empty) +{ + pid_t pid; + long ret; + int status; + struct pollfd pollfd; + struct clone_args args = { + .flags = CLONE_FILES, + .exit_signal = SIGCHLD, + }; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + pid = sys_clone3(&args, sizeof(args)); + ASSERT_GE(pid, 0); + + if (pid == 0) { + int listener; + + listener = user_trap_syscall(__NR_mknod, SECCOMP_FILTER_FLAG_NEW_LISTENER); + if (listener < 0) + _exit(EXIT_FAILURE); + + if (dup2(listener, 200) != 200) + _exit(EXIT_FAILURE); + + close(listener); + + _exit(EXIT_SUCCESS); + } + + EXPECT_EQ(waitpid(pid, &status, 0), pid); + EXPECT_EQ(true, WIFEXITED(status)); + EXPECT_EQ(0, WEXITSTATUS(status)); + + /* + * The seccomp filter has become unused so we should be notified once + * the kernel gets around to cleaning up task struct. + */ + pollfd.fd = 200; + pollfd.events = POLLHUP; + + EXPECT_GT(poll(&pollfd, 1, 2000), 0); + EXPECT_GT((pollfd.revents & POLLHUP) ?: 0, 0); +} + +static void *do_thread(void *data) +{ + return NULL; +} + +TEST(user_notification_filter_empty_threaded) +{ + pid_t pid; + long ret; + int status; + struct pollfd pollfd; + struct clone_args args = { + .flags = CLONE_FILES, + .exit_signal = SIGCHLD, + }; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + pid = sys_clone3(&args, sizeof(args)); + ASSERT_GE(pid, 0); + + if (pid == 0) { + pid_t pid1, pid2; + int listener, status; + pthread_t thread; + + listener = user_trap_syscall(__NR_dup, SECCOMP_FILTER_FLAG_NEW_LISTENER); + if (listener < 0) + _exit(EXIT_FAILURE); + + if (dup2(listener, 200) != 200) + _exit(EXIT_FAILURE); + + close(listener); + + pid1 = fork(); + if (pid1 < 0) + _exit(EXIT_FAILURE); + + if (pid1 == 0) + _exit(EXIT_SUCCESS); + + pid2 = fork(); + if (pid2 < 0) + _exit(EXIT_FAILURE); + + if (pid2 == 0) + _exit(EXIT_SUCCESS); + + if (pthread_create(&thread, NULL, do_thread, NULL) || + pthread_join(thread, NULL)) + _exit(EXIT_FAILURE); + + if (pthread_create(&thread, NULL, do_thread, NULL) || + pthread_join(thread, NULL)) + _exit(EXIT_FAILURE); + + if (waitpid(pid1, &status, 0) != pid1 || !WIFEXITED(status) || + WEXITSTATUS(status)) + _exit(EXIT_FAILURE); + + if (waitpid(pid2, &status, 0) != pid2 || !WIFEXITED(status) || + WEXITSTATUS(status)) + _exit(EXIT_FAILURE); + + exit(EXIT_SUCCESS); + } + + EXPECT_EQ(waitpid(pid, &status, 0), pid); + EXPECT_EQ(true, WIFEXITED(status)); + EXPECT_EQ(0, WEXITSTATUS(status)); + + /* + * The seccomp filter has become unused so we should be notified once + * the kernel gets around to cleaning up task struct. + */ + pollfd.fd = 200; + pollfd.events = POLLHUP; + + EXPECT_GT(poll(&pollfd, 1, 2000), 0); + EXPECT_GT((pollfd.revents & POLLHUP) ?: 0, 0); +} + /* * TODO: * - add microbenchmarks -- cgit v1.2.3 From d3a37ea9f6e548388b83fe895c7a037bc2ec3f7f Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 1 Jun 2020 12:34:44 -0700 Subject: selftests/seccomp: Expand benchmark to per-filter measurements It's useful to see how much (at a minimum) each filter adds to the syscall overhead. Add additional calculations. Signed-off-by: Kees Cook --- .../testing/selftests/seccomp/seccomp_benchmark.c | 36 +++++++++++++++++----- tools/testing/selftests/seccomp/seccomp_bpf.c | 2 -- 2 files changed, 29 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/seccomp/seccomp_benchmark.c b/tools/testing/selftests/seccomp/seccomp_benchmark.c index 5838c8697ec3..eca13fe1fba9 100644 --- a/tools/testing/selftests/seccomp/seccomp_benchmark.c +++ b/tools/testing/selftests/seccomp/seccomp_benchmark.c @@ -68,32 +68,54 @@ int main(int argc, char *argv[]) }; long ret; unsigned long long samples; - unsigned long long native, filtered; + unsigned long long native, filter1, filter2; if (argc > 1) samples = strtoull(argv[1], NULL, 0); else samples = calibrate(); + printf("Current BPF sysctl settings:\n"); + system("sysctl net.core.bpf_jit_enable"); + system("sysctl net.core.bpf_jit_harden"); printf("Benchmarking %llu samples...\n", samples); + /* Native call */ native = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples; printf("getpid native: %llu ns\n", native); ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); assert(ret == 0); + /* One filter */ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); assert(ret == 0); - filtered = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples; - printf("getpid RET_ALLOW: %llu ns\n", filtered); + filter1 = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples; + printf("getpid RET_ALLOW 1 filter: %llu ns\n", filter1); - printf("Estimated seccomp overhead per syscall: %llu ns\n", - filtered - native); + if (filter1 == native) + printf("No overhead measured!? Try running again with more samples.\n"); - if (filtered == native) - printf("Trying running again with more samples.\n"); + /* Two filters */ + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); + assert(ret == 0); + + filter2 = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples; + printf("getpid RET_ALLOW 2 filters: %llu ns\n", filter2); + + /* Calculations */ + printf("Estimated total seccomp overhead for 1 filter: %llu ns\n", + filter1 - native); + + printf("Estimated total seccomp overhead for 2 filters: %llu ns\n", + filter2 - native); + + printf("Estimated seccomp per-filter overhead: %llu ns\n", + filter2 - filter1); + + printf("Estimated seccomp entry overhead: %llu ns\n", + filter1 - native - (filter2 - filter1)); return 0; } diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index fc899cfbca99..d76ae98a34a2 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -3840,7 +3840,6 @@ TEST(user_notification_filter_empty_threaded) /* * TODO: - * - add microbenchmarks * - expand NNP testing * - better arch-specific TRACE and TRAP handlers. * - endianness checking when appropriate @@ -3848,7 +3847,6 @@ TEST(user_notification_filter_empty_threaded) * - arch value testing (x86 modes especially) * - verify that FILTER_FLAG_LOG filters generate log messages * - verify that RET_LOG generates log messages - * - ... */ TEST_HARNESS_MAIN -- cgit v1.2.3 From bc32c9c86581abf7baacf71342df3b0affe367db Mon Sep 17 00:00:00 2001 From: Thadeu Lima de Souza Cascardo Date: Mon, 1 Jun 2020 12:50:12 -0700 Subject: selftests/seccomp: use 90s as timeout As seccomp_benchmark tries to calibrate how many samples will take more than 5 seconds to execute, it may end up picking up a number of samples that take 10 (but up to 12) seconds. As the calibration will take double that time, it takes around 20 seconds. Then, it executes the whole thing again, and then once more, with some added overhead. So, the thing might take more than 40 seconds, which is too close to the 45s timeout. That is very dependent on the system where it's executed, so may not be observed always, but it has been observed on x86 VMs. Using a 90s timeout seems safe enough. Signed-off-by: Thadeu Lima de Souza Cascardo Link: https://lore.kernel.org/r/20200601123202.1183526-1-cascardo@canonical.com Signed-off-by: Kees Cook --- tools/testing/selftests/seccomp/settings | 1 + 1 file changed, 1 insertion(+) create mode 100644 tools/testing/selftests/seccomp/settings (limited to 'tools') diff --git a/tools/testing/selftests/seccomp/settings b/tools/testing/selftests/seccomp/settings new file mode 100644 index 000000000000..ba4d85f74cd6 --- /dev/null +++ b/tools/testing/selftests/seccomp/settings @@ -0,0 +1 @@ +timeout=90 -- cgit v1.2.3 From 81a0c8bc82be7c15dbf3e54832334552e6b76e2b Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 6 Jun 2020 09:37:17 -0700 Subject: selftests/seccomp: Improve calibration loop The seccomp benchmark calibration loop did not need to take so long. Instead, use a simple 1 second timeout and multiply up to target. It does not need to be accurate. Signed-off-by: Kees Cook --- .../testing/selftests/seccomp/seccomp_benchmark.c | 50 ++++++++++++++-------- 1 file changed, 32 insertions(+), 18 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/seccomp/seccomp_benchmark.c b/tools/testing/selftests/seccomp/seccomp_benchmark.c index eca13fe1fba9..91f5a89cadac 100644 --- a/tools/testing/selftests/seccomp/seccomp_benchmark.c +++ b/tools/testing/selftests/seccomp/seccomp_benchmark.c @@ -18,9 +18,9 @@ unsigned long long timing(clockid_t clk_id, unsigned long long samples) { - pid_t pid, ret; - unsigned long long i; struct timespec start, finish; + unsigned long long i; + pid_t pid, ret; pid = getpid(); assert(clock_gettime(clk_id, &start) == 0); @@ -31,30 +31,43 @@ unsigned long long timing(clockid_t clk_id, unsigned long long samples) assert(clock_gettime(clk_id, &finish) == 0); i = finish.tv_sec - start.tv_sec; - i *= 1000000000; + i *= 1000000000ULL; i += finish.tv_nsec - start.tv_nsec; - printf("%lu.%09lu - %lu.%09lu = %llu\n", + printf("%lu.%09lu - %lu.%09lu = %llu (%.1fs)\n", finish.tv_sec, finish.tv_nsec, start.tv_sec, start.tv_nsec, - i); + i, (double)i / 1000000000.0); return i; } unsigned long long calibrate(void) { - unsigned long long i; - - printf("Calibrating reasonable sample size...\n"); + struct timespec start, finish; + unsigned long long i, samples, step = 9973; + pid_t pid, ret; + int seconds = 15; - for (i = 5; ; i++) { - unsigned long long samples = 1 << i; + printf("Calibrating sample size for %d seconds worth of syscalls ...\n", seconds); - /* Find something that takes more than 5 seconds to run. */ - if (timing(CLOCK_REALTIME, samples) / 1000000000ULL > 5) - return samples; - } + samples = 0; + pid = getpid(); + assert(clock_gettime(CLOCK_MONOTONIC, &start) == 0); + do { + for (i = 0; i < step; i++) { + ret = syscall(__NR_getpid); + assert(pid == ret); + } + assert(clock_gettime(CLOCK_MONOTONIC, &finish) == 0); + + samples += step; + i = finish.tv_sec - start.tv_sec; + i *= 1000000000ULL; + i += finish.tv_nsec - start.tv_nsec; + } while (i < 1000000000ULL); + + return samples * seconds; } int main(int argc, char *argv[]) @@ -70,15 +83,16 @@ int main(int argc, char *argv[]) unsigned long long samples; unsigned long long native, filter1, filter2; + printf("Current BPF sysctl settings:\n"); + system("sysctl net.core.bpf_jit_enable"); + system("sysctl net.core.bpf_jit_harden"); + if (argc > 1) samples = strtoull(argv[1], NULL, 0); else samples = calibrate(); - printf("Current BPF sysctl settings:\n"); - system("sysctl net.core.bpf_jit_enable"); - system("sysctl net.core.bpf_jit_harden"); - printf("Benchmarking %llu samples...\n", samples); + printf("Benchmarking %llu syscalls...\n", samples); /* Native call */ native = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples; -- cgit v1.2.3 From cf8918dba2de7315a5878a56818a89cb899132da Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 11 Jun 2020 11:02:21 -0700 Subject: selftests/seccomp: Make kcmp() less required The seccomp tests are a bit noisy without CONFIG_CHECKPOINT_RESTORE (due to missing the kcmp() syscall). The seccomp tests are more accurate with kcmp(), but it's not strictly required. Refactor the tests to use alternatives (comparing fd numbers), and provide a central test for kcmp() so there is a single SKIP instead of many. Continue to produce warnings for the other tests, though. Additionally adds some more bad flag EINVAL tests to the addfd selftest. Cc: Andy Lutomirski Cc: Will Drewry Cc: Shuah Khan Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Martin KaFai Lau Cc: Song Liu Cc: Yonghong Song Cc: Andrii Nakryiko Cc: John Fastabend Cc: KP Singh Cc: linux-kselftest@vger.kernel.org Cc: netdev@vger.kernel.org Cc: bpf@vger.kernel.org Signed-off-by: Kees Cook --- tools/testing/selftests/seccomp/seccomp_bpf.c | 58 ++++++++++++++++++--------- 1 file changed, 38 insertions(+), 20 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index d76ae98a34a2..c589a2d495a3 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -242,6 +242,40 @@ int seccomp(unsigned int op, unsigned int flags, void *args) #define SIBLING_EXIT_FAILURE 0xbadface #define SIBLING_EXIT_NEWPRIVS 0xbadfeed +static int __filecmp(pid_t pid1, pid_t pid2, int fd1, int fd2) +{ +#ifdef __NR_kcmp + errno = 0; + return syscall(__NR_kcmp, pid1, pid2, KCMP_FILE, fd1, fd2); +#else + errno = ENOSYS; + return -1; +#endif +} + +/* Have TH_LOG report actual location filecmp() is used. */ +#define filecmp(pid1, pid2, fd1, fd2) ({ \ + int _ret; \ + \ + _ret = __filecmp(pid1, pid2, fd1, fd2); \ + if (_ret != 0) { \ + if (_ret < 0 && errno == ENOSYS) { \ + TH_LOG("kcmp() syscall missing (test is less accurate)");\ + _ret = 0; \ + } \ + } \ + _ret; }) + +TEST(kcmp) +{ + int ret; + + ret = __filecmp(getpid(), getpid(), 1, 1); + EXPECT_EQ(ret, 0); + if (ret != 0 && errno == ENOSYS) + SKIP(return, "Kernel does not support kcmp() (missing CONFIG_CHECKPOINT_RESTORE?)"); +} + TEST(mode_strict_support) { long ret; @@ -3601,16 +3635,6 @@ TEST(seccomp_get_notif_sizes) EXPECT_EQ(sizes.seccomp_notif_resp, sizeof(struct seccomp_notif_resp)); } -static int filecmp(pid_t pid1, pid_t pid2, int fd1, int fd2) -{ -#ifdef __NR_kcmp - return syscall(__NR_kcmp, pid1, pid2, KCMP_FILE, fd1, fd2); -#else - errno = ENOSYS; - return -1; -#endif -} - TEST(user_notification_continue) { pid_t pid; @@ -3635,20 +3659,14 @@ TEST(user_notification_continue) int dup_fd, pipe_fds[2]; pid_t self; - ret = pipe(pipe_fds); - if (ret < 0) - exit(1); + ASSERT_GE(pipe(pipe_fds), 0); dup_fd = dup(pipe_fds[0]); - if (dup_fd < 0) - exit(1); + ASSERT_GE(dup_fd, 0); + EXPECT_NE(pipe_fds[0], dup_fd); self = getpid(); - - ret = filecmp(self, self, pipe_fds[0], dup_fd); - if (ret) - exit(2); - + ASSERT_EQ(filecmp(self, self, pipe_fds[0], dup_fd), 0); exit(0); } -- cgit v1.2.3 From 279ed8900079831ba436007dbc5e24530f026ce2 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 11 Jun 2020 11:08:48 -0700 Subject: selftests/seccomp: Rename user_trap_syscall() to user_notif_syscall() The user_trap_syscall() helper creates a filter with SECCOMP_RET_USER_NOTIF. To avoid confusion with SECCOMP_RET_TRAP, rename the helper to user_notif_syscall(). Cc: Andy Lutomirski Cc: Will Drewry Cc: Shuah Khan Cc: Alexei Starovoitov Cc: Daniel Borkmann Cc: Martin KaFai Lau Cc: Song Liu Cc: Yonghong Song Cc: Andrii Nakryiko Cc: John Fastabend Cc: KP Singh Cc: linux-kselftest@vger.kernel.org Cc: netdev@vger.kernel.org Cc: bpf@vger.kernel.org Signed-off-by: Kees Cook --- tools/testing/selftests/seccomp/seccomp_bpf.c | 46 +++++++++++++-------------- 1 file changed, 23 insertions(+), 23 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index c589a2d495a3..43884b6625fc 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -3167,7 +3167,7 @@ skip: ASSERT_EQ(0, kill(pid, SIGKILL)); } -static int user_trap_syscall(int nr, unsigned int flags) +static int user_notif_syscall(int nr, unsigned int flags) { struct sock_filter filter[] = { BPF_STMT(BPF_LD+BPF_W+BPF_ABS, @@ -3213,7 +3213,7 @@ TEST(user_notification_basic) /* Check that we get -ENOSYS with no listener attached */ if (pid == 0) { - if (user_trap_syscall(__NR_getppid, 0) < 0) + if (user_notif_syscall(__NR_getppid, 0) < 0) exit(1); ret = syscall(__NR_getppid); exit(ret >= 0 || errno != ENOSYS); @@ -3230,13 +3230,13 @@ TEST(user_notification_basic) EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0); /* Check that the basic notification machinery works */ - listener = user_trap_syscall(__NR_getppid, - SECCOMP_FILTER_FLAG_NEW_LISTENER); + listener = user_notif_syscall(__NR_getppid, + SECCOMP_FILTER_FLAG_NEW_LISTENER); ASSERT_GE(listener, 0); /* Installing a second listener in the chain should EBUSY */ - EXPECT_EQ(user_trap_syscall(__NR_getppid, - SECCOMP_FILTER_FLAG_NEW_LISTENER), + EXPECT_EQ(user_notif_syscall(__NR_getppid, + SECCOMP_FILTER_FLAG_NEW_LISTENER), -1); EXPECT_EQ(errno, EBUSY); @@ -3305,12 +3305,12 @@ TEST(user_notification_with_tsync) /* these were exclusive */ flags = SECCOMP_FILTER_FLAG_NEW_LISTENER | SECCOMP_FILTER_FLAG_TSYNC; - ASSERT_EQ(-1, user_trap_syscall(__NR_getppid, flags)); + ASSERT_EQ(-1, user_notif_syscall(__NR_getppid, flags)); ASSERT_EQ(EINVAL, errno); /* but now they're not */ flags |= SECCOMP_FILTER_FLAG_TSYNC_ESRCH; - ret = user_trap_syscall(__NR_getppid, flags); + ret = user_notif_syscall(__NR_getppid, flags); close(ret); ASSERT_LE(0, ret); } @@ -3328,8 +3328,8 @@ TEST(user_notification_kill_in_middle) TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); } - listener = user_trap_syscall(__NR_getppid, - SECCOMP_FILTER_FLAG_NEW_LISTENER); + listener = user_notif_syscall(__NR_getppid, + SECCOMP_FILTER_FLAG_NEW_LISTENER); ASSERT_GE(listener, 0); /* @@ -3382,8 +3382,8 @@ TEST(user_notification_signal) ASSERT_EQ(socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair), 0); - listener = user_trap_syscall(__NR_gettid, - SECCOMP_FILTER_FLAG_NEW_LISTENER); + listener = user_notif_syscall(__NR_gettid, + SECCOMP_FILTER_FLAG_NEW_LISTENER); ASSERT_GE(listener, 0); pid = fork(); @@ -3452,8 +3452,8 @@ TEST(user_notification_closed_listener) TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); } - listener = user_trap_syscall(__NR_getppid, - SECCOMP_FILTER_FLAG_NEW_LISTENER); + listener = user_notif_syscall(__NR_getppid, + SECCOMP_FILTER_FLAG_NEW_LISTENER); ASSERT_GE(listener, 0); /* @@ -3489,8 +3489,8 @@ TEST(user_notification_child_pid_ns) SKIP(return, "kernel missing CLONE_NEWUSER support"); }; - listener = user_trap_syscall(__NR_getppid, - SECCOMP_FILTER_FLAG_NEW_LISTENER); + listener = user_notif_syscall(__NR_getppid, + SECCOMP_FILTER_FLAG_NEW_LISTENER); ASSERT_GE(listener, 0); pid = fork(); @@ -3529,8 +3529,8 @@ TEST(user_notification_sibling_pid_ns) TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); } - listener = user_trap_syscall(__NR_getppid, - SECCOMP_FILTER_FLAG_NEW_LISTENER); + listener = user_notif_syscall(__NR_getppid, + SECCOMP_FILTER_FLAG_NEW_LISTENER); ASSERT_GE(listener, 0); pid = fork(); @@ -3597,8 +3597,8 @@ TEST(user_notification_fault_recv) ASSERT_EQ(unshare(CLONE_NEWUSER), 0); - listener = user_trap_syscall(__NR_getppid, - SECCOMP_FILTER_FLAG_NEW_LISTENER); + listener = user_notif_syscall(__NR_getppid, + SECCOMP_FILTER_FLAG_NEW_LISTENER); ASSERT_GE(listener, 0); pid = fork(); @@ -3649,7 +3649,7 @@ TEST(user_notification_continue) TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); } - listener = user_trap_syscall(__NR_dup, SECCOMP_FILTER_FLAG_NEW_LISTENER); + listener = user_notif_syscall(__NR_dup, SECCOMP_FILTER_FLAG_NEW_LISTENER); ASSERT_GE(listener, 0); pid = fork(); @@ -3743,7 +3743,7 @@ TEST(user_notification_filter_empty) if (pid == 0) { int listener; - listener = user_trap_syscall(__NR_mknod, SECCOMP_FILTER_FLAG_NEW_LISTENER); + listener = user_notif_syscall(__NR_mknod, SECCOMP_FILTER_FLAG_NEW_LISTENER); if (listener < 0) _exit(EXIT_FAILURE); @@ -3799,7 +3799,7 @@ TEST(user_notification_filter_empty_threaded) int listener, status; pthread_t thread; - listener = user_trap_syscall(__NR_dup, SECCOMP_FILTER_FLAG_NEW_LISTENER); + listener = user_notif_syscall(__NR_dup, SECCOMP_FILTER_FLAG_NEW_LISTENER); if (listener < 0) _exit(EXIT_FAILURE); -- cgit v1.2.3 From 47e33c05f9f07cac3de833e531bcac9ae052c7ca Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 15 Jun 2020 15:42:46 -0700 Subject: seccomp: Fix ioctl number for SECCOMP_IOCTL_NOTIF_ID_VALID When SECCOMP_IOCTL_NOTIF_ID_VALID was first introduced it had the wrong direction flag set. While this isn't a big deal as nothing currently enforces these bits in the kernel, it should be defined correctly. Fix the define and provide support for the old command until it is no longer needed for backward compatibility. Fixes: 6a21cc50f0c7 ("seccomp: add a return code to trap to userspace") Signed-off-by: Kees Cook --- include/uapi/linux/seccomp.h | 3 ++- kernel/seccomp.c | 9 +++++++++ tools/testing/selftests/seccomp/seccomp_bpf.c | 2 +- 3 files changed, 12 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h index c1735455bc53..965290f7dcc2 100644 --- a/include/uapi/linux/seccomp.h +++ b/include/uapi/linux/seccomp.h @@ -123,5 +123,6 @@ struct seccomp_notif_resp { #define SECCOMP_IOCTL_NOTIF_RECV SECCOMP_IOWR(0, struct seccomp_notif) #define SECCOMP_IOCTL_NOTIF_SEND SECCOMP_IOWR(1, \ struct seccomp_notif_resp) -#define SECCOMP_IOCTL_NOTIF_ID_VALID SECCOMP_IOR(2, __u64) +#define SECCOMP_IOCTL_NOTIF_ID_VALID SECCOMP_IOW(2, __u64) + #endif /* _UAPI_LINUX_SECCOMP_H */ diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 5f0e3f3a7a5d..0ed57e8c49d0 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -44,6 +44,14 @@ #include #include +/* + * When SECCOMP_IOCTL_NOTIF_ID_VALID was first introduced, it had the + * wrong direction flag in the ioctl number. This is the broken one, + * which the kernel needs to keep supporting until all userspaces stop + * using the wrong command number. + */ +#define SECCOMP_IOCTL_NOTIF_ID_VALID_WRONG_DIR SECCOMP_IOR(2, __u64) + enum notify_state { SECCOMP_NOTIFY_INIT, SECCOMP_NOTIFY_SENT, @@ -1236,6 +1244,7 @@ static long seccomp_notify_ioctl(struct file *file, unsigned int cmd, return seccomp_notify_recv(filter, buf); case SECCOMP_IOCTL_NOTIF_SEND: return seccomp_notify_send(filter, buf); + case SECCOMP_IOCTL_NOTIF_ID_VALID_WRONG_DIR: case SECCOMP_IOCTL_NOTIF_ID_VALID: return seccomp_notify_id_valid(filter, buf); default: diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 43884b6625fc..61f9ac200001 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -186,7 +186,7 @@ struct seccomp_metadata { #define SECCOMP_IOCTL_NOTIF_RECV SECCOMP_IOWR(0, struct seccomp_notif) #define SECCOMP_IOCTL_NOTIF_SEND SECCOMP_IOWR(1, \ struct seccomp_notif_resp) -#define SECCOMP_IOCTL_NOTIF_ID_VALID SECCOMP_IOR(2, __u64) +#define SECCOMP_IOCTL_NOTIF_ID_VALID SECCOMP_IOW(2, __u64) struct seccomp_notif { __u64 id; -- cgit v1.2.3 From 9d1587adcc356c1570c9830364e7797a9c19696c Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 4 Jul 2020 22:05:43 -0700 Subject: selftests/harness: Clean up kern-doc for fixtures The FIXTURE*() macro kern-doc examples had the wrong names for the C code examples associated with them. Fix those and clarify that FIXTURE_DATA() usage should be avoided. Cc: Shuah Khan Fixes: 74bc7c97fa88 ("kselftest: add fixture variants") Acked-by: Jakub Kicinski Signed-off-by: Kees Cook --- tools/testing/selftests/kselftest_harness.h | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index c9f03ef93338..7f32a7099a81 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -195,8 +195,9 @@ * * .. code-block:: c * - * FIXTURE_DATA(datatype name) + * FIXTURE_DATA(datatype_name) * + * Almost always, you want just FIXTURE() instead (see below). * This call may be used when the type of the fixture data * is needed. In general, this should not be needed unless * the *self* is being passed to a helper directly. @@ -211,7 +212,7 @@ * * .. code-block:: c * - * FIXTURE(datatype name) { + * FIXTURE(fixture_name) { * type property1; * ... * }; @@ -238,7 +239,7 @@ * * .. code-block:: c * - * FIXTURE_SETUP(fixture name) { implementation } + * FIXTURE_SETUP(fixture_name) { implementation } * * Populates the required "setup" function for a fixture. An instance of the * datatype defined with FIXTURE_DATA() will be exposed as *self* for the @@ -264,7 +265,7 @@ * * .. code-block:: c * - * FIXTURE_TEARDOWN(fixture name) { implementation } + * FIXTURE_TEARDOWN(fixture_name) { implementation } * * Populates the required "teardown" function for a fixture. An instance of the * datatype defined with FIXTURE_DATA() will be exposed as *self* for the @@ -285,7 +286,7 @@ * * .. code-block:: c * - * FIXTURE_VARIANT(datatype name) { + * FIXTURE_VARIANT(fixture_name) { * type property1; * ... * }; @@ -305,8 +306,8 @@ * * .. code-block:: c * - * FIXTURE_ADD(datatype name) { - * .property1 = val1; + * FIXTURE_VARIANT_ADD(fixture_name, variant_name) { + * .property1 = val1, * ... * }; * -- cgit v1.2.3 From adeeec8472397001901c06aa41d1b2fd1068e222 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 4 Jul 2020 22:34:04 -0700 Subject: selftests/seccomp: Refactor to use fixture variants Now that the selftest harness has variants, use them to eliminate a bunch of copy/paste duplication. Reviewed-by: Jakub Kicinski Tested-by: Will Deacon Signed-off-by: Kees Cook --- tools/testing/selftests/seccomp/seccomp_bpf.c | 199 ++++++-------------------- 1 file changed, 42 insertions(+), 157 deletions(-) (limited to 'tools') diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 61f9ac200001..462a30e81ca9 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1510,6 +1510,7 @@ pid_t setup_trace_fixture(struct __test_metadata *_metadata, return tracer_pid; } + void teardown_trace_fixture(struct __test_metadata *_metadata, pid_t tracer) { @@ -1789,7 +1790,7 @@ void change_syscall(struct __test_metadata *_metadata, EXPECT_EQ(0, ret); } -void tracer_syscall(struct __test_metadata *_metadata, pid_t tracee, +void tracer_seccomp(struct __test_metadata *_metadata, pid_t tracee, int status, void *args) { int ret; @@ -1866,6 +1867,24 @@ FIXTURE(TRACE_syscall) { pid_t tracer, mytid, mypid, parent; }; +FIXTURE_VARIANT(TRACE_syscall) { + /* + * All of the SECCOMP_RET_TRACE behaviors can be tested with either + * SECCOMP_RET_TRACE+PTRACE_CONT or plain ptrace()+PTRACE_SYSCALL. + * This indicates if we should use SECCOMP_RET_TRACE (false), or + * ptrace (true). + */ + bool use_ptrace; +}; + +FIXTURE_VARIANT_ADD(TRACE_syscall, ptrace) { + .use_ptrace = true, +}; + +FIXTURE_VARIANT_ADD(TRACE_syscall, seccomp) { + .use_ptrace = false, +}; + FIXTURE_SETUP(TRACE_syscall) { struct sock_filter filter[] = { @@ -1881,12 +1900,11 @@ FIXTURE_SETUP(TRACE_syscall) BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1005), BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), }; - - memset(&self->prog, 0, sizeof(self->prog)); - self->prog.filter = malloc(sizeof(filter)); - ASSERT_NE(NULL, self->prog.filter); - memcpy(self->prog.filter, filter, sizeof(filter)); - self->prog.len = (unsigned short)ARRAY_SIZE(filter); + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; /* Prepare some testable syscall results. */ self->mytid = syscall(__NR_gettid); @@ -1904,60 +1922,28 @@ FIXTURE_SETUP(TRACE_syscall) ASSERT_NE(self->parent, self->mypid); /* Launch tracer. */ - self->tracer = setup_trace_fixture(_metadata, tracer_syscall, NULL, - false); -} - -FIXTURE_TEARDOWN(TRACE_syscall) -{ - teardown_trace_fixture(_metadata, self->tracer); - if (self->prog.filter) - free(self->prog.filter); -} + self->tracer = setup_trace_fixture(_metadata, + variant->use_ptrace ? tracer_ptrace + : tracer_seccomp, + NULL, variant->use_ptrace); -TEST_F(TRACE_syscall, ptrace_syscall_redirected) -{ - /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */ - teardown_trace_fixture(_metadata, self->tracer); - self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL, - true); - - /* Tracer will redirect getpid to getppid. */ - EXPECT_NE(self->mypid, syscall(__NR_getpid)); -} + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); -TEST_F(TRACE_syscall, ptrace_syscall_errno) -{ - /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */ - teardown_trace_fixture(_metadata, self->tracer); - self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL, - true); + if (variant->use_ptrace) + return; - /* Tracer should skip the open syscall, resulting in ESRCH. */ - EXPECT_SYSCALL_RETURN(-ESRCH, syscall(__NR_openat)); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); + ASSERT_EQ(0, ret); } -TEST_F(TRACE_syscall, ptrace_syscall_faked) +FIXTURE_TEARDOWN(TRACE_syscall) { - /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */ teardown_trace_fixture(_metadata, self->tracer); - self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL, - true); - - /* Tracer should skip the gettid syscall, resulting fake pid. */ - EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid)); } TEST_F(TRACE_syscall, syscall_allowed) { - long ret; - - ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); - ASSERT_EQ(0, ret); - - ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); - ASSERT_EQ(0, ret); - /* getppid works as expected (no changes). */ EXPECT_EQ(self->parent, syscall(__NR_getppid)); EXPECT_NE(self->mypid, syscall(__NR_getppid)); @@ -1965,14 +1951,6 @@ TEST_F(TRACE_syscall, syscall_allowed) TEST_F(TRACE_syscall, syscall_redirected) { - long ret; - - ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); - ASSERT_EQ(0, ret); - - ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); - ASSERT_EQ(0, ret); - /* getpid has been redirected to getppid as expected. */ EXPECT_EQ(self->parent, syscall(__NR_getpid)); EXPECT_NE(self->mypid, syscall(__NR_getpid)); @@ -1980,33 +1958,17 @@ TEST_F(TRACE_syscall, syscall_redirected) TEST_F(TRACE_syscall, syscall_errno) { - long ret; - - ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); - ASSERT_EQ(0, ret); - - ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); - ASSERT_EQ(0, ret); - - /* openat has been skipped and an errno return. */ + /* Tracer should skip the open syscall, resulting in ESRCH. */ EXPECT_SYSCALL_RETURN(-ESRCH, syscall(__NR_openat)); } TEST_F(TRACE_syscall, syscall_faked) { - long ret; - - ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); - ASSERT_EQ(0, ret); - - ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); - ASSERT_EQ(0, ret); - - /* gettid has been skipped and an altered return value stored. */ + /* Tracer skips the gettid syscall and store altered return value. */ EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid)); } -TEST_F(TRACE_syscall, skip_after_RET_TRACE) +TEST_F(TRACE_syscall, skip_after) { struct sock_filter filter[] = { BPF_STMT(BPF_LD|BPF_W|BPF_ABS, @@ -2021,14 +1983,7 @@ TEST_F(TRACE_syscall, skip_after_RET_TRACE) }; long ret; - ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); - ASSERT_EQ(0, ret); - - /* Install fixture filter. */ - ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); - ASSERT_EQ(0, ret); - - /* Install "errno on getppid" filter. */ + /* Install additional "errno on getppid" filter. */ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); ASSERT_EQ(0, ret); @@ -2038,7 +1993,7 @@ TEST_F(TRACE_syscall, skip_after_RET_TRACE) EXPECT_EQ(EPERM, errno); } -TEST_F_SIGNAL(TRACE_syscall, kill_after_RET_TRACE, SIGSYS) +TEST_F_SIGNAL(TRACE_syscall, kill_after, SIGSYS) { struct sock_filter filter[] = { BPF_STMT(BPF_LD|BPF_W|BPF_ABS, @@ -2053,77 +2008,7 @@ TEST_F_SIGNAL(TRACE_syscall, kill_after_RET_TRACE, SIGSYS) }; long ret; - ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); - ASSERT_EQ(0, ret); - - /* Install fixture filter. */ - ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); - ASSERT_EQ(0, ret); - - /* Install "death on getppid" filter. */ - ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); - ASSERT_EQ(0, ret); - - /* Tracer will redirect getpid to getppid, and we should die. */ - EXPECT_NE(self->mypid, syscall(__NR_getpid)); -} - -TEST_F(TRACE_syscall, skip_after_ptrace) -{ - struct sock_filter filter[] = { - BPF_STMT(BPF_LD|BPF_W|BPF_ABS, - offsetof(struct seccomp_data, nr)), - BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1), - BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EPERM), - BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), - }; - struct sock_fprog prog = { - .len = (unsigned short)ARRAY_SIZE(filter), - .filter = filter, - }; - long ret; - - /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */ - teardown_trace_fixture(_metadata, self->tracer); - self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL, - true); - - ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); - ASSERT_EQ(0, ret); - - /* Install "errno on getppid" filter. */ - ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); - ASSERT_EQ(0, ret); - - /* Tracer will redirect getpid to getppid, and we should see EPERM. */ - EXPECT_EQ(-1, syscall(__NR_getpid)); - EXPECT_EQ(EPERM, errno); -} - -TEST_F_SIGNAL(TRACE_syscall, kill_after_ptrace, SIGSYS) -{ - struct sock_filter filter[] = { - BPF_STMT(BPF_LD|BPF_W|BPF_ABS, - offsetof(struct seccomp_data, nr)), - BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1), - BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), - BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), - }; - struct sock_fprog prog = { - .len = (unsigned short)ARRAY_SIZE(filter), - .filter = filter, - }; - long ret; - - /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */ - teardown_trace_fixture(_metadata, self->tracer); - self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL, - true); - - ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); - ASSERT_EQ(0, ret); - - /* Install "death on getppid" filter. */ + /* Install additional "death on getppid" filter. */ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); ASSERT_EQ(0, ret); -- cgit v1.2.3 From 11eb004ef7ead0c43586adea0340b51da095bdee Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 4 Jul 2020 22:57:33 -0700 Subject: selftests/seccomp: Check ENOSYS under tracing There should be no difference between -1 and other negative syscalls while tracing. Cc: Keno Fischer Tested-by: Will Deacon Signed-off-by: Kees Cook --- tools/testing/selftests/seccomp/seccomp_bpf.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 462a30e81ca9..13b88d578db6 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -1942,6 +1942,26 @@ FIXTURE_TEARDOWN(TRACE_syscall) teardown_trace_fixture(_metadata, self->tracer); } +TEST(negative_ENOSYS) +{ + /* + * There should be no difference between an "internal" skip + * and userspace asking for syscall "-1". + */ + errno = 0; + EXPECT_EQ(-1, syscall(-1)); + EXPECT_EQ(errno, ENOSYS); + /* And no difference for "still not valid but not -1". */ + errno = 0; + EXPECT_EQ(-1, syscall(-101)); + EXPECT_EQ(errno, ENOSYS); +} + +TEST_F(TRACE_syscall, negative_ENOSYS) +{ + negative_ENOSYS(_metadata); +} + TEST_F(TRACE_syscall, syscall_allowed) { /* getppid works as expected (no changes). */ -- cgit v1.2.3 From c97aedc52dce4c87d4c44de4e6af941cd102600c Mon Sep 17 00:00:00 2001 From: Sargun Dhillon Date: Tue, 2 Jun 2020 18:10:44 -0700 Subject: selftests/seccomp: Test SECCOMP_IOCTL_NOTIF_ADDFD Test whether we can add file descriptors in response to notifications. This injects the file descriptors via notifications, and then uses kcmp to determine whether or not it has been successful. It also includes some basic sanity checking for arguments. Signed-off-by: Sargun Dhillon Cc: Al Viro Cc: Chris Palmer Cc: Christian Brauner Cc: Jann Horn Cc: Kees Cook Cc: Robert Sesek Cc: Tycho Andersen Cc: Matt Denton Cc: linux-fsdevel@vger.kernel.org Cc: linux-kernel@vger.kernel.org Link: https://lore.kernel.org/r/20200603011044.7972-5-sargun@sargun.me Co-developed-by: Kees Cook Signed-off-by: Kees Cook --- tools/testing/selftests/seccomp/seccomp_bpf.c | 229 ++++++++++++++++++++++++++ 1 file changed, 229 insertions(+) (limited to 'tools') diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 13b88d578db6..41ee7a73ee07 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include @@ -173,7 +174,9 @@ struct seccomp_metadata { #ifndef SECCOMP_FILTER_FLAG_NEW_LISTENER #define SECCOMP_FILTER_FLAG_NEW_LISTENER (1UL << 3) +#endif +#ifndef SECCOMP_RET_USER_NOTIF #define SECCOMP_RET_USER_NOTIF 0x7fc00000U #define SECCOMP_IOC_MAGIC '!' @@ -209,6 +212,39 @@ struct seccomp_notif_sizes { }; #endif +#ifndef SECCOMP_IOCTL_NOTIF_ADDFD +/* On success, the return value is the remote process's added fd number */ +#define SECCOMP_IOCTL_NOTIF_ADDFD SECCOMP_IOW(3, \ + struct seccomp_notif_addfd) + +/* valid flags for seccomp_notif_addfd */ +#define SECCOMP_ADDFD_FLAG_SETFD (1UL << 0) /* Specify remote fd */ + +struct seccomp_notif_addfd { + __u64 id; + __u32 flags; + __u32 srcfd; + __u32 newfd; + __u32 newfd_flags; +}; +#endif + +struct seccomp_notif_addfd_small { + __u64 id; + char weird[4]; +}; +#define SECCOMP_IOCTL_NOTIF_ADDFD_SMALL \ + SECCOMP_IOW(3, struct seccomp_notif_addfd_small) + +struct seccomp_notif_addfd_big { + union { + struct seccomp_notif_addfd addfd; + char buf[sizeof(struct seccomp_notif_addfd) + 8]; + }; +}; +#define SECCOMP_IOCTL_NOTIF_ADDFD_BIG \ + SECCOMP_IOWR(3, struct seccomp_notif_addfd_big) + #ifndef PTRACE_EVENTMSG_SYSCALL_ENTRY #define PTRACE_EVENTMSG_SYSCALL_ENTRY 1 #define PTRACE_EVENTMSG_SYSCALL_EXIT 2 @@ -3761,6 +3797,199 @@ TEST(user_notification_filter_empty_threaded) EXPECT_GT((pollfd.revents & POLLHUP) ?: 0, 0); } +TEST(user_notification_addfd) +{ + pid_t pid; + long ret; + int status, listener, memfd, fd; + struct seccomp_notif_addfd addfd = {}; + struct seccomp_notif_addfd_small small = {}; + struct seccomp_notif_addfd_big big = {}; + struct seccomp_notif req = {}; + struct seccomp_notif_resp resp = {}; + /* 100 ms */ + struct timespec delay = { .tv_nsec = 100000000 }; + + memfd = memfd_create("test", 0); + ASSERT_GE(memfd, 0); + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + /* Check that the basic notification machinery works */ + listener = user_notif_syscall(__NR_getppid, + SECCOMP_FILTER_FLAG_NEW_LISTENER); + ASSERT_GE(listener, 0); + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + if (syscall(__NR_getppid) != USER_NOTIF_MAGIC) + exit(1); + exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC); + } + + ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); + + addfd.srcfd = memfd; + addfd.newfd = 0; + addfd.id = req.id; + addfd.flags = 0x0; + + /* Verify bad newfd_flags cannot be set */ + addfd.newfd_flags = ~O_CLOEXEC; + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1); + EXPECT_EQ(errno, EINVAL); + addfd.newfd_flags = O_CLOEXEC; + + /* Verify bad flags cannot be set */ + addfd.flags = 0xff; + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1); + EXPECT_EQ(errno, EINVAL); + addfd.flags = 0; + + /* Verify that remote_fd cannot be set without setting flags */ + addfd.newfd = 1; + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1); + EXPECT_EQ(errno, EINVAL); + addfd.newfd = 0; + + /* Verify small size cannot be set */ + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_SMALL, &small), -1); + EXPECT_EQ(errno, EINVAL); + + /* Verify we can't send bits filled in unknown buffer area */ + memset(&big, 0xAA, sizeof(big)); + big.addfd = addfd; + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_BIG, &big), -1); + EXPECT_EQ(errno, E2BIG); + + + /* Verify we can set an arbitrary remote fd */ + fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd); + /* + * The child has fds 0(stdin), 1(stdout), 2(stderr), 3(memfd), + * 4(listener), so the newly allocated fd should be 5. + */ + EXPECT_EQ(fd, 5); + EXPECT_EQ(filecmp(getpid(), pid, memfd, fd), 0); + + /* Verify we can set an arbitrary remote fd with large size */ + memset(&big, 0x0, sizeof(big)); + big.addfd = addfd; + fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_BIG, &big); + EXPECT_EQ(fd, 6); + + /* Verify we can set a specific remote fd */ + addfd.newfd = 42; + addfd.flags = SECCOMP_ADDFD_FLAG_SETFD; + fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd); + EXPECT_EQ(fd, 42); + EXPECT_EQ(filecmp(getpid(), pid, memfd, fd), 0); + + /* Resume syscall */ + resp.id = req.id; + resp.error = 0; + resp.val = USER_NOTIF_MAGIC; + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); + + /* + * This sets the ID of the ADD FD to the last request plus 1. The + * notification ID increments 1 per notification. + */ + addfd.id = req.id + 1; + + /* This spins until the underlying notification is generated */ + while (ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd) != -1 && + errno != -EINPROGRESS) + nanosleep(&delay, NULL); + + memset(&req, 0, sizeof(req)); + ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); + ASSERT_EQ(addfd.id, req.id); + + resp.id = req.id; + resp.error = 0; + resp.val = USER_NOTIF_MAGIC; + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); + + /* Wait for child to finish. */ + EXPECT_EQ(waitpid(pid, &status, 0), pid); + EXPECT_EQ(true, WIFEXITED(status)); + EXPECT_EQ(0, WEXITSTATUS(status)); + + close(memfd); +} + +TEST(user_notification_addfd_rlimit) +{ + pid_t pid; + long ret; + int status, listener, memfd; + struct seccomp_notif_addfd addfd = {}; + struct seccomp_notif req = {}; + struct seccomp_notif_resp resp = {}; + const struct rlimit lim = { + .rlim_cur = 0, + .rlim_max = 0, + }; + + memfd = memfd_create("test", 0); + ASSERT_GE(memfd, 0); + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + /* Check that the basic notification machinery works */ + listener = user_notif_syscall(__NR_getppid, + SECCOMP_FILTER_FLAG_NEW_LISTENER); + ASSERT_GE(listener, 0); + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) + exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC); + + + ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0); + + ASSERT_EQ(prlimit(pid, RLIMIT_NOFILE, &lim, NULL), 0); + + addfd.srcfd = memfd; + addfd.newfd_flags = O_CLOEXEC; + addfd.newfd = 0; + addfd.id = req.id; + addfd.flags = 0; + + /* Should probably spot check /proc/sys/fs/file-nr */ + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1); + EXPECT_EQ(errno, EMFILE); + + addfd.newfd = 100; + addfd.flags = SECCOMP_ADDFD_FLAG_SETFD; + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1); + EXPECT_EQ(errno, EBADF); + + resp.id = req.id; + resp.error = 0; + resp.val = USER_NOTIF_MAGIC; + + EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0); + + /* Wait for child to finish. */ + EXPECT_EQ(waitpid(pid, &status, 0), pid); + EXPECT_EQ(true, WIFEXITED(status)); + EXPECT_EQ(0, WEXITSTATUS(status)); + + close(memfd); +} + /* * TODO: * - expand NNP testing -- cgit v1.2.3