summaryrefslogtreecommitdiffstats
path: root/tools/testing/selftests
diff options
context:
space:
mode:
Diffstat (limited to 'tools/testing/selftests')
-rw-r--r--tools/testing/selftests/arm64/signal/test_signals_utils.c7
-rw-r--r--tools/testing/selftests/bpf/Makefile3
-rw-r--r--tools/testing/selftests/bpf/cgroup_helpers.c137
-rw-r--r--tools/testing/selftests/bpf/cgroup_helpers.h16
-rw-r--r--tools/testing/selftests/bpf/network_helpers.c27
-rw-r--r--tools/testing/selftests/bpf/network_helpers.h1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c79
-rw-r--r--tools/testing/selftests/bpf/prog_tests/task_pt_regs.c1
-rw-r--r--tools/testing/selftests/bpf/progs/connect4_dropper.c26
-rw-r--r--tools/testing/selftests/bpf/progs/test_task_pt_regs.c19
-rwxr-xr-xtools/testing/selftests/bpf/test_lwt_ip_encap.sh13
-rwxr-xr-xtools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh2
-rw-r--r--tools/testing/selftests/kvm/.gitignore2
-rw-r--r--tools/testing/selftests/kvm/Makefile4
-rw-r--r--tools/testing/selftests/kvm/access_tracking_perf_test.c6
-rw-r--r--tools/testing/selftests/kvm/demand_paging_test.c15
-rw-r--r--tools/testing/selftests/kvm/dirty_log_perf_test.c62
-rw-r--r--tools/testing/selftests/kvm/include/test_util.h4
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/processor.h34
-rw-r--r--tools/testing/selftests/kvm/kvm_page_table_test.c7
-rw-r--r--tools/testing/selftests/kvm/lib/test_util.c17
-rw-r--r--tools/testing/selftests/kvm/rseq_test.c286
-rw-r--r--tools/testing/selftests/kvm/steal_time.c4
-rw-r--r--tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c128
-rw-r--r--tools/testing/selftests/nci/nci_dev.c2
-rw-r--r--tools/testing/selftests/net/af_unix/Makefile5
-rw-r--r--tools/testing/selftests/net/af_unix/test_unix_oob.c5
-rwxr-xr-xtools/testing/selftests/net/altnames.sh2
-rwxr-xr-xtools/testing/selftests/netfilter/nft_nat_zones.sh309
-rwxr-xr-xtools/testing/selftests/netfilter/nft_zones_many.sh156
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-syscall-asm.S37
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-syscall.c36
32 files changed, 1341 insertions, 111 deletions
diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.c b/tools/testing/selftests/arm64/signal/test_signals_utils.c
index 6836510a522f..22722abc9dfa 100644
--- a/tools/testing/selftests/arm64/signal/test_signals_utils.c
+++ b/tools/testing/selftests/arm64/signal/test_signals_utils.c
@@ -266,16 +266,19 @@ int test_init(struct tdescr *td)
td->feats_supported |= FEAT_SSBS;
if (getauxval(AT_HWCAP) & HWCAP_SVE)
td->feats_supported |= FEAT_SVE;
- if (feats_ok(td))
+ if (feats_ok(td)) {
fprintf(stderr,
"Required Features: [%s] supported\n",
feats_to_string(td->feats_required &
td->feats_supported));
- else
+ } else {
fprintf(stderr,
"Required Features: [%s] NOT supported\n",
feats_to_string(td->feats_required &
~td->feats_supported));
+ td->result = KSFT_SKIP;
+ return 0;
+ }
}
/* Perform test specific additional initialization */
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 866531c08e4f..799b88152e9e 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -375,7 +375,8 @@ $(TRUNNER_BPF_OBJS): $(TRUNNER_OUTPUT)/%.o: \
$(TRUNNER_BPF_PROGS_DIR)/%.c \
$(TRUNNER_BPF_PROGS_DIR)/*.h \
$$(INCLUDE_DIR)/vmlinux.h \
- $(wildcard $(BPFDIR)/bpf_*.h) | $(TRUNNER_OUTPUT)
+ $(wildcard $(BPFDIR)/bpf_*.h) \
+ | $(TRUNNER_OUTPUT) $$(BPFOBJ)
$$(call $(TRUNNER_BPF_BUILD_RULE),$$<,$$@, \
$(TRUNNER_BPF_CFLAGS))
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c
index 033051717ba5..f3daa44a8266 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.c
+++ b/tools/testing/selftests/bpf/cgroup_helpers.c
@@ -12,27 +12,36 @@
#include <unistd.h>
#include <ftw.h>
-
#include "cgroup_helpers.h"
/*
* To avoid relying on the system setup, when setup_cgroup_env is called
- * we create a new mount namespace, and cgroup namespace. The cgroup2
- * root is mounted at CGROUP_MOUNT_PATH
- *
- * Unfortunately, most people don't have cgroupv2 enabled at this point in time.
- * It's easier to create our own mount namespace and manage it ourselves.
+ * we create a new mount namespace, and cgroup namespace. The cgroupv2
+ * root is mounted at CGROUP_MOUNT_PATH. Unfortunately, most people don't
+ * have cgroupv2 enabled at this point in time. It's easier to create our
+ * own mount namespace and manage it ourselves. We assume /mnt exists.
*
- * We assume /mnt exists.
+ * Related cgroupv1 helpers are named *classid*(), since we only use the
+ * net_cls controller for tagging net_cls.classid. We assume the default
+ * mount under /sys/fs/cgroup/net_cls, which should be the case for the
+ * vast majority of users.
*/
#define WALK_FD_LIMIT 16
+
#define CGROUP_MOUNT_PATH "/mnt"
+#define CGROUP_MOUNT_DFLT "/sys/fs/cgroup"
+#define NETCLS_MOUNT_PATH CGROUP_MOUNT_DFLT "/net_cls"
#define CGROUP_WORK_DIR "/cgroup-test-work-dir"
+
#define format_cgroup_path(buf, path) \
snprintf(buf, sizeof(buf), "%s%s%s", CGROUP_MOUNT_PATH, \
CGROUP_WORK_DIR, path)
+#define format_classid_path(buf) \
+ snprintf(buf, sizeof(buf), "%s%s", NETCLS_MOUNT_PATH, \
+ CGROUP_WORK_DIR)
+
/**
* enable_all_controllers() - Enable all available cgroup v2 controllers
*
@@ -139,8 +148,7 @@ static int nftwfunc(const char *filename, const struct stat *statptr,
return 0;
}
-
-static int join_cgroup_from_top(char *cgroup_path)
+static int join_cgroup_from_top(const char *cgroup_path)
{
char cgroup_procs_path[PATH_MAX + 1];
pid_t pid = getpid();
@@ -313,3 +321,114 @@ int cgroup_setup_and_join(const char *path) {
}
return cg_fd;
}
+
+/**
+ * setup_classid_environment() - Setup the cgroupv1 net_cls environment
+ *
+ * After calling this function, cleanup_classid_environment should be called
+ * once testing is complete.
+ *
+ * This function will print an error to stderr and return 1 if it is unable
+ * to setup the cgroup environment. If setup is successful, 0 is returned.
+ */
+int setup_classid_environment(void)
+{
+ char cgroup_workdir[PATH_MAX + 1];
+
+ format_classid_path(cgroup_workdir);
+
+ if (mount("tmpfs", CGROUP_MOUNT_DFLT, "tmpfs", 0, NULL) &&
+ errno != EBUSY) {
+ log_err("mount cgroup base");
+ return 1;
+ }
+
+ if (mkdir(NETCLS_MOUNT_PATH, 0777) && errno != EEXIST) {
+ log_err("mkdir cgroup net_cls");
+ return 1;
+ }
+
+ if (mount("net_cls", NETCLS_MOUNT_PATH, "cgroup", 0, "net_cls") &&
+ errno != EBUSY) {
+ log_err("mount cgroup net_cls");
+ return 1;
+ }
+
+ cleanup_classid_environment();
+
+ if (mkdir(cgroup_workdir, 0777) && errno != EEXIST) {
+ log_err("mkdir cgroup work dir");
+ return 1;
+ }
+
+ return 0;
+}
+
+/**
+ * set_classid() - Set a cgroupv1 net_cls classid
+ * @id: the numeric classid
+ *
+ * Writes the passed classid into the cgroup work dir's net_cls.classid
+ * file in order to later on trigger socket tagging.
+ *
+ * On success, it returns 0, otherwise on failure it returns 1. If there
+ * is a failure, it prints the error to stderr.
+ */
+int set_classid(unsigned int id)
+{
+ char cgroup_workdir[PATH_MAX - 42];
+ char cgroup_classid_path[PATH_MAX + 1];
+ int fd, rc = 0;
+
+ format_classid_path(cgroup_workdir);
+ snprintf(cgroup_classid_path, sizeof(cgroup_classid_path),
+ "%s/net_cls.classid", cgroup_workdir);
+
+ fd = open(cgroup_classid_path, O_WRONLY);
+ if (fd < 0) {
+ log_err("Opening cgroup classid: %s", cgroup_classid_path);
+ return 1;
+ }
+
+ if (dprintf(fd, "%u\n", id) < 0) {
+ log_err("Setting cgroup classid");
+ rc = 1;
+ }
+
+ close(fd);
+ return rc;
+}
+
+/**
+ * join_classid() - Join a cgroupv1 net_cls classid
+ *
+ * This function expects the cgroup work dir to be already created, as we
+ * join it here. This causes the process sockets to be tagged with the given
+ * net_cls classid.
+ *
+ * On success, it returns 0, otherwise on failure it returns 1.
+ */
+int join_classid(void)
+{
+ char cgroup_workdir[PATH_MAX + 1];
+
+ format_classid_path(cgroup_workdir);
+ return join_cgroup_from_top(cgroup_workdir);
+}
+
+/**
+ * cleanup_classid_environment() - Cleanup the cgroupv1 net_cls environment
+ *
+ * At call time, it moves the calling process to the root cgroup, and then
+ * runs the deletion process.
+ *
+ * On failure, it will print an error to stderr, and try to continue.
+ */
+void cleanup_classid_environment(void)
+{
+ char cgroup_workdir[PATH_MAX + 1];
+
+ format_classid_path(cgroup_workdir);
+ join_cgroup_from_top(NETCLS_MOUNT_PATH);
+ nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT);
+}
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.h b/tools/testing/selftests/bpf/cgroup_helpers.h
index 5fe3d88e4f0d..629da3854b3e 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.h
+++ b/tools/testing/selftests/bpf/cgroup_helpers.h
@@ -1,6 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __CGROUP_HELPERS_H
#define __CGROUP_HELPERS_H
+
#include <errno.h>
#include <string.h>
@@ -8,12 +9,21 @@
#define log_err(MSG, ...) fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \
__FILE__, __LINE__, clean_errno(), ##__VA_ARGS__)
-
+/* cgroupv2 related */
int cgroup_setup_and_join(const char *path);
int create_and_get_cgroup(const char *path);
+unsigned long long get_cgroup_id(const char *path);
+
int join_cgroup(const char *path);
+
int setup_cgroup_environment(void);
void cleanup_cgroup_environment(void);
-unsigned long long get_cgroup_id(const char *path);
-#endif
+/* cgroupv1 related */
+int set_classid(unsigned int id);
+int join_classid(void);
+
+int setup_classid_environment(void);
+void cleanup_classid_environment(void);
+
+#endif /* __CGROUP_HELPERS_H */
diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c
index 7e9f6375757a..6db1af8fdee7 100644
--- a/tools/testing/selftests/bpf/network_helpers.c
+++ b/tools/testing/selftests/bpf/network_helpers.c
@@ -208,11 +208,26 @@ error_close:
static int connect_fd_to_addr(int fd,
const struct sockaddr_storage *addr,
- socklen_t addrlen)
+ socklen_t addrlen, const bool must_fail)
{
- if (connect(fd, (const struct sockaddr *)addr, addrlen)) {
- log_err("Failed to connect to server");
- return -1;
+ int ret;
+
+ errno = 0;
+ ret = connect(fd, (const struct sockaddr *)addr, addrlen);
+ if (must_fail) {
+ if (!ret) {
+ log_err("Unexpected success to connect to server");
+ return -1;
+ }
+ if (errno != EPERM) {
+ log_err("Unexpected error from connect to server");
+ return -1;
+ }
+ } else {
+ if (ret) {
+ log_err("Failed to connect to server");
+ return -1;
+ }
}
return 0;
@@ -257,7 +272,7 @@ int connect_to_fd_opts(int server_fd, const struct network_helper_opts *opts)
strlen(opts->cc) + 1))
goto error_close;
- if (connect_fd_to_addr(fd, &addr, addrlen))
+ if (connect_fd_to_addr(fd, &addr, addrlen, opts->must_fail))
goto error_close;
return fd;
@@ -289,7 +304,7 @@ int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms)
return -1;
}
- if (connect_fd_to_addr(client_fd, &addr, len))
+ if (connect_fd_to_addr(client_fd, &addr, len, false))
return -1;
return 0;
diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h
index da7e132657d5..d198181a5648 100644
--- a/tools/testing/selftests/bpf/network_helpers.h
+++ b/tools/testing/selftests/bpf/network_helpers.h
@@ -20,6 +20,7 @@ typedef __u16 __sum16;
struct network_helper_opts {
const char *cc;
int timeout_ms;
+ bool must_fail;
};
/* ipv4 test vector */
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c b/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c
new file mode 100644
index 000000000000..ab3b9bc5e6d1
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+
+#include "connect4_dropper.skel.h"
+
+#include "cgroup_helpers.h"
+#include "network_helpers.h"
+
+static int run_test(int cgroup_fd, int server_fd, bool classid)
+{
+ struct network_helper_opts opts = {
+ .must_fail = true,
+ };
+ struct connect4_dropper *skel;
+ int fd, err = 0;
+
+ skel = connect4_dropper__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return -1;
+
+ skel->links.connect_v4_dropper =
+ bpf_program__attach_cgroup(skel->progs.connect_v4_dropper,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(skel->links.connect_v4_dropper, "prog_attach")) {
+ err = -1;
+ goto out;
+ }
+
+ if (classid && !ASSERT_OK(join_classid(), "join_classid")) {
+ err = -1;
+ goto out;
+ }
+
+ fd = connect_to_fd_opts(server_fd, &opts);
+ if (fd < 0)
+ err = -1;
+ else
+ close(fd);
+out:
+ connect4_dropper__destroy(skel);
+ return err;
+}
+
+void test_cgroup_v1v2(void)
+{
+ struct network_helper_opts opts = {};
+ int server_fd, client_fd, cgroup_fd;
+ static const int port = 60123;
+
+ /* Step 1: Check base connectivity works without any BPF. */
+ server_fd = start_server(AF_INET, SOCK_STREAM, NULL, port, 0);
+ if (!ASSERT_GE(server_fd, 0, "server_fd"))
+ return;
+ client_fd = connect_to_fd_opts(server_fd, &opts);
+ if (!ASSERT_GE(client_fd, 0, "client_fd")) {
+ close(server_fd);
+ return;
+ }
+ close(client_fd);
+ close(server_fd);
+
+ /* Step 2: Check BPF policy prog attached to cgroups drops connectivity. */
+ cgroup_fd = test__join_cgroup("/connect_dropper");
+ if (!ASSERT_GE(cgroup_fd, 0, "cgroup_fd"))
+ return;
+ server_fd = start_server(AF_INET, SOCK_STREAM, NULL, port, 0);
+ if (!ASSERT_GE(server_fd, 0, "server_fd")) {
+ close(cgroup_fd);
+ return;
+ }
+ ASSERT_OK(run_test(cgroup_fd, server_fd, false), "cgroup-v2-only");
+ setup_classid_environment();
+ set_classid(42);
+ ASSERT_OK(run_test(cgroup_fd, server_fd, true), "cgroup-v1v2");
+ cleanup_classid_environment();
+ close(server_fd);
+ close(cgroup_fd);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/task_pt_regs.c b/tools/testing/selftests/bpf/prog_tests/task_pt_regs.c
index 53f0e0fa1a53..37c20b5ffa70 100644
--- a/tools/testing/selftests/bpf/prog_tests/task_pt_regs.c
+++ b/tools/testing/selftests/bpf/prog_tests/task_pt_regs.c
@@ -1,7 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
#define _GNU_SOURCE
#include <test_progs.h>
-#include <linux/ptrace.h>
#include "test_task_pt_regs.skel.h"
void test_task_pt_regs(void)
diff --git a/tools/testing/selftests/bpf/progs/connect4_dropper.c b/tools/testing/selftests/bpf/progs/connect4_dropper.c
new file mode 100644
index 000000000000..b565d997810a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/connect4_dropper.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <string.h>
+
+#include <linux/stddef.h>
+#include <linux/bpf.h>
+
+#include <sys/socket.h>
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#define VERDICT_REJECT 0
+#define VERDICT_PROCEED 1
+
+SEC("cgroup/connect4")
+int connect_v4_dropper(struct bpf_sock_addr *ctx)
+{
+ if (ctx->type != SOCK_STREAM)
+ return VERDICT_PROCEED;
+ if (ctx->user_port == bpf_htons(60123))
+ return VERDICT_REJECT;
+ return VERDICT_PROCEED;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_task_pt_regs.c b/tools/testing/selftests/bpf/progs/test_task_pt_regs.c
index 6c059f1cfa1b..e6cb09259408 100644
--- a/tools/testing/selftests/bpf/progs/test_task_pt_regs.c
+++ b/tools/testing/selftests/bpf/progs/test_task_pt_regs.c
@@ -1,12 +1,17 @@
// SPDX-License-Identifier: GPL-2.0
-#include <linux/ptrace.h>
-#include <linux/bpf.h>
+#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
-struct pt_regs current_regs = {};
-struct pt_regs ctx_regs = {};
+#define PT_REGS_SIZE sizeof(struct pt_regs)
+
+/*
+ * The kernel struct pt_regs isn't exported in its entirety to userspace.
+ * Pass it as an array to task_pt_regs.c
+ */
+char current_regs[PT_REGS_SIZE] = {};
+char ctx_regs[PT_REGS_SIZE] = {};
int uprobe_res = 0;
SEC("uprobe/trigger_func")
@@ -17,8 +22,10 @@ int handle_uprobe(struct pt_regs *ctx)
current = bpf_get_current_task_btf();
regs = (struct pt_regs *) bpf_task_pt_regs(current);
- __builtin_memcpy(&current_regs, regs, sizeof(*regs));
- __builtin_memcpy(&ctx_regs, ctx, sizeof(*ctx));
+ if (bpf_probe_read_kernel(current_regs, PT_REGS_SIZE, regs))
+ return 0;
+ if (bpf_probe_read_kernel(ctx_regs, PT_REGS_SIZE, ctx))
+ return 0;
/* Prove that uprobe was run */
uprobe_res = 1;
diff --git a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh b/tools/testing/selftests/bpf/test_lwt_ip_encap.sh
index 59ea56945e6c..b497bb85b667 100755
--- a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh
+++ b/tools/testing/selftests/bpf/test_lwt_ip_encap.sh
@@ -112,6 +112,14 @@ setup()
ip netns add "${NS2}"
ip netns add "${NS3}"
+ # rp_filter gets confused by what these tests are doing, so disable it
+ ip netns exec ${NS1} sysctl -wq net.ipv4.conf.all.rp_filter=0
+ ip netns exec ${NS2} sysctl -wq net.ipv4.conf.all.rp_filter=0
+ ip netns exec ${NS3} sysctl -wq net.ipv4.conf.all.rp_filter=0
+ ip netns exec ${NS1} sysctl -wq net.ipv4.conf.default.rp_filter=0
+ ip netns exec ${NS2} sysctl -wq net.ipv4.conf.default.rp_filter=0
+ ip netns exec ${NS3} sysctl -wq net.ipv4.conf.default.rp_filter=0
+
ip link add veth1 type veth peer name veth2
ip link add veth3 type veth peer name veth4
ip link add veth5 type veth peer name veth6
@@ -236,11 +244,6 @@ setup()
ip -netns ${NS1} -6 route add ${IPv6_GRE}/128 dev veth5 via ${IPv6_6} ${VRF}
ip -netns ${NS2} -6 route add ${IPv6_GRE}/128 dev veth7 via ${IPv6_8} ${VRF}
- # rp_filter gets confused by what these tests are doing, so disable it
- ip netns exec ${NS1} sysctl -wq net.ipv4.conf.all.rp_filter=0
- ip netns exec ${NS2} sysctl -wq net.ipv4.conf.all.rp_filter=0
- ip netns exec ${NS3} sysctl -wq net.ipv4.conf.all.rp_filter=0
-
TMPFILE=$(mktemp /tmp/test_lwt_ip_encap.XXXXXX)
sleep 1 # reduce flakiness
diff --git a/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh
index beee0d5646a6..f7d84549cc3e 100755
--- a/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh
+++ b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh
@@ -1,6 +1,6 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-# Copyright 2020 NXP Semiconductors
+# Copyright 2020 NXP
WAIT_TIME=1
NUM_NETIFS=4
diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index 98053d3afbda..b8dbabe24ac2 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -24,6 +24,7 @@
/x86_64/smm_test
/x86_64/state_test
/x86_64/svm_vmcall_test
+/x86_64/svm_int_ctl_test
/x86_64/sync_regs_test
/x86_64/tsc_msrs_test
/x86_64/userspace_msr_exit_test
@@ -48,6 +49,7 @@
/kvm_page_table_test
/memslot_modification_stress_test
/memslot_perf_test
+/rseq_test
/set_memory_region_test
/steal_time
/kvm_binary_stats_test
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 5d05801ab816..d1774f461393 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -56,6 +56,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/smm_test
TEST_GEN_PROGS_x86_64 += x86_64/state_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test
TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test
+TEST_GEN_PROGS_x86_64 += x86_64/svm_int_ctl_test
TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test
TEST_GEN_PROGS_x86_64 += x86_64/userspace_msr_exit_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_apic_access_test
@@ -80,6 +81,7 @@ TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus
TEST_GEN_PROGS_x86_64 += kvm_page_table_test
TEST_GEN_PROGS_x86_64 += memslot_modification_stress_test
TEST_GEN_PROGS_x86_64 += memslot_perf_test
+TEST_GEN_PROGS_x86_64 += rseq_test
TEST_GEN_PROGS_x86_64 += set_memory_region_test
TEST_GEN_PROGS_x86_64 += steal_time
TEST_GEN_PROGS_x86_64 += kvm_binary_stats_test
@@ -93,6 +95,7 @@ TEST_GEN_PROGS_aarch64 += dirty_log_test
TEST_GEN_PROGS_aarch64 += dirty_log_perf_test
TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus
TEST_GEN_PROGS_aarch64 += kvm_page_table_test
+TEST_GEN_PROGS_aarch64 += rseq_test
TEST_GEN_PROGS_aarch64 += set_memory_region_test
TEST_GEN_PROGS_aarch64 += steal_time
TEST_GEN_PROGS_aarch64 += kvm_binary_stats_test
@@ -104,6 +107,7 @@ TEST_GEN_PROGS_s390x += demand_paging_test
TEST_GEN_PROGS_s390x += dirty_log_test
TEST_GEN_PROGS_s390x += kvm_create_max_vcpus
TEST_GEN_PROGS_s390x += kvm_page_table_test
+TEST_GEN_PROGS_s390x += rseq_test
TEST_GEN_PROGS_s390x += set_memory_region_test
TEST_GEN_PROGS_s390x += kvm_binary_stats_test
diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c
index 71e277c7c3f3..5d95113c7b7c 100644
--- a/tools/testing/selftests/kvm/access_tracking_perf_test.c
+++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c
@@ -371,9 +371,7 @@ static void help(char *name)
printf(" -v: specify the number of vCPUs to run.\n");
printf(" -o: Overlap guest memory accesses instead of partitioning\n"
" them into a separate region of memory for each vCPU.\n");
- printf(" -s: specify the type of memory that should be used to\n"
- " back the guest data region.\n\n");
- backing_src_help();
+ backing_src_help("-s");
puts("");
exit(0);
}
@@ -381,7 +379,7 @@ static void help(char *name)
int main(int argc, char *argv[])
{
struct test_params params = {
- .backing_src = VM_MEM_SRC_ANONYMOUS,
+ .backing_src = DEFAULT_VM_MEM_SRC,
.vcpu_memory_bytes = DEFAULT_PER_VCPU_MEM_SIZE,
.vcpus = 1,
};
diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c
index e79c1b64977f..1510b21e6306 100644
--- a/tools/testing/selftests/kvm/demand_paging_test.c
+++ b/tools/testing/selftests/kvm/demand_paging_test.c
@@ -179,7 +179,7 @@ static void *uffd_handler_thread_fn(void *arg)
return NULL;
}
- if (!pollfd[0].revents & POLLIN)
+ if (!(pollfd[0].revents & POLLIN))
continue;
r = read(uffd, &msg, sizeof(msg));
@@ -416,7 +416,7 @@ static void help(char *name)
{
puts("");
printf("usage: %s [-h] [-m vm_mode] [-u uffd_mode] [-d uffd_delay_usec]\n"
- " [-b memory] [-t type] [-v vcpus] [-o]\n", name);
+ " [-b memory] [-s type] [-v vcpus] [-o]\n", name);
guest_modes_help();
printf(" -u: use userfaultfd to handle vCPU page faults. Mode is a\n"
" UFFD registration mode: 'MISSING' or 'MINOR'.\n");
@@ -426,8 +426,7 @@ static void help(char *name)
printf(" -b: specify the size of the memory region which should be\n"
" demand paged by each vCPU. e.g. 10M or 3G.\n"
" Default: 1G\n");
- printf(" -t: The type of backing memory to use. Default: anonymous\n");
- backing_src_help();
+ backing_src_help("-s");
printf(" -v: specify the number of vCPUs to run.\n");
printf(" -o: Overlap guest memory accesses instead of partitioning\n"
" them into a separate region of memory for each vCPU.\n");
@@ -439,14 +438,14 @@ int main(int argc, char *argv[])
{
int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);
struct test_params p = {
- .src_type = VM_MEM_SRC_ANONYMOUS,
+ .src_type = DEFAULT_VM_MEM_SRC,
.partition_vcpu_memory_access = true,
};
int opt;
guest_modes_append_default();
- while ((opt = getopt(argc, argv, "hm:u:d:b:t:v:o")) != -1) {
+ while ((opt = getopt(argc, argv, "hm:u:d:b:s:v:o")) != -1) {
switch (opt) {
case 'm':
guest_modes_cmdline(optarg);
@@ -465,7 +464,7 @@ int main(int argc, char *argv[])
case 'b':
guest_percpu_mem_size = parse_size(optarg);
break;
- case 't':
+ case 's':
p.src_type = parse_backing_src_type(optarg);
break;
case 'v':
@@ -485,7 +484,7 @@ int main(int argc, char *argv[])
if (p.uffd_mode == UFFDIO_REGISTER_MODE_MINOR &&
!backing_src_is_shared(p.src_type)) {
- TEST_FAIL("userfaultfd MINOR mode requires shared memory; pick a different -t");
+ TEST_FAIL("userfaultfd MINOR mode requires shared memory; pick a different -s");
}
for_each_guest_mode(run_test, &p);
diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c
index 479868570d59..7ffab5bd5ce5 100644
--- a/tools/testing/selftests/kvm/dirty_log_perf_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c
@@ -118,42 +118,64 @@ static inline void disable_dirty_logging(struct kvm_vm *vm, int slots)
toggle_dirty_logging(vm, slots, false);
}
-static void get_dirty_log(struct kvm_vm *vm, int slots, unsigned long *bitmap,
- uint64_t nr_pages)
+static void get_dirty_log(struct kvm_vm *vm, unsigned long *bitmaps[], int slots)
{
- uint64_t slot_pages = nr_pages / slots;
int i;
for (i = 0; i < slots; i++) {
int slot = PERF_TEST_MEM_SLOT_INDEX + i;
- unsigned long *slot_bitmap = bitmap + i * slot_pages;
- kvm_vm_get_dirty_log(vm, slot, slot_bitmap);
+ kvm_vm_get_dirty_log(vm, slot, bitmaps[i]);
}
}
-static void clear_dirty_log(struct kvm_vm *vm, int slots, unsigned long *bitmap,
- uint64_t nr_pages)
+static void clear_dirty_log(struct kvm_vm *vm, unsigned long *bitmaps[],
+ int slots, uint64_t pages_per_slot)
{
- uint64_t slot_pages = nr_pages / slots;
int i;
for (i = 0; i < slots; i++) {
int slot = PERF_TEST_MEM_SLOT_INDEX + i;
- unsigned long *slot_bitmap = bitmap + i * slot_pages;
- kvm_vm_clear_dirty_log(vm, slot, slot_bitmap, 0, slot_pages);
+ kvm_vm_clear_dirty_log(vm, slot, bitmaps[i], 0, pages_per_slot);
}
}
+static unsigned long **alloc_bitmaps(int slots, uint64_t pages_per_slot)
+{
+ unsigned long **bitmaps;
+ int i;
+
+ bitmaps = malloc(slots * sizeof(bitmaps[0]));
+ TEST_ASSERT(bitmaps, "Failed to allocate bitmaps array.");
+
+ for (i = 0; i < slots; i++) {
+ bitmaps[i] = bitmap_zalloc(pages_per_slot);
+ TEST_ASSERT(bitmaps[i], "Failed to allocate slot bitmap.");
+ }
+
+ return bitmaps;
+}
+
+static void free_bitmaps(unsigned long *bitmaps[], int slots)
+{
+ int i;
+
+ for (i = 0; i < slots; i++)
+ free(bitmaps[i]);
+
+ free(bitmaps);
+}
+
static void run_test(enum vm_guest_mode mode, void *arg)
{
struct test_params *p = arg;
pthread_t *vcpu_threads;
struct kvm_vm *vm;
- unsigned long *bmap;
+ unsigned long **bitmaps;
uint64_t guest_num_pages;
uint64_t host_num_pages;
+ uint64_t pages_per_slot;
int vcpu_id;
struct timespec start;
struct timespec ts_diff;
@@ -171,7 +193,9 @@ static void run_test(enum vm_guest_mode mode, void *arg)
guest_num_pages = (nr_vcpus * guest_percpu_mem_size) >> vm_get_page_shift(vm);
guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages);
host_num_pages = vm_num_host_pages(mode, guest_num_pages);
- bmap = bitmap_zalloc(host_num_pages);
+ pages_per_slot = host_num_pages / p->slots;
+
+ bitmaps = alloc_bitmaps(p->slots, pages_per_slot);
if (dirty_log_manual_caps) {
cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2;
@@ -239,7 +263,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
iteration, ts_diff.tv_sec, ts_diff.tv_nsec);
clock_gettime(CLOCK_MONOTONIC, &start);
- get_dirty_log(vm, p->slots, bmap, host_num_pages);
+ get_dirty_log(vm, bitmaps, p->slots);
ts_diff = timespec_elapsed(start);
get_dirty_log_total = timespec_add(get_dirty_log_total,
ts_diff);
@@ -248,7 +272,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
if (dirty_log_manual_caps) {
clock_gettime(CLOCK_MONOTONIC, &start);
- clear_dirty_log(vm, p->slots, bmap, host_num_pages);
+ clear_dirty_log(vm, bitmaps, p->slots, pages_per_slot);
ts_diff = timespec_elapsed(start);
clear_dirty_log_total = timespec_add(clear_dirty_log_total,
ts_diff);
@@ -281,7 +305,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
clear_dirty_log_total.tv_nsec, avg.tv_sec, avg.tv_nsec);
}
- free(bmap);
+ free_bitmaps(bitmaps, p->slots);
free(vcpu_threads);
perf_test_destroy_vm(vm);
}
@@ -308,11 +332,9 @@ static void help(char *name)
printf(" -v: specify the number of vCPUs to run.\n");
printf(" -o: Overlap guest memory accesses instead of partitioning\n"
" them into a separate region of memory for each vCPU.\n");
- printf(" -s: specify the type of memory that should be used to\n"
- " back the guest data region.\n\n");
+ backing_src_help("-s");
printf(" -x: Split the memory region into this number of memslots.\n"
- " (default: 1)");
- backing_src_help();
+ " (default: 1)\n");
puts("");
exit(0);
}
@@ -324,7 +346,7 @@ int main(int argc, char *argv[])
.iterations = TEST_HOST_LOOP_N,
.wr_fract = 1,
.partition_vcpu_memory_access = true,
- .backing_src = VM_MEM_SRC_ANONYMOUS,
+ .backing_src = DEFAULT_VM_MEM_SRC,
.slots = 1,
};
int opt;
diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h
index 451fed5ce8e7..f8fddc84c0d3 100644
--- a/tools/testing/selftests/kvm/include/test_util.h
+++ b/tools/testing/selftests/kvm/include/test_util.h
@@ -90,6 +90,8 @@ enum vm_mem_backing_src_type {
NUM_SRC_TYPES,
};
+#define DEFAULT_VM_MEM_SRC VM_MEM_SRC_ANONYMOUS
+
struct vm_mem_backing_src_alias {
const char *name;
uint32_t flag;
@@ -102,7 +104,7 @@ size_t get_trans_hugepagesz(void);
size_t get_def_hugetlb_pagesz(void);
const struct vm_mem_backing_src_alias *vm_mem_backing_src_alias(uint32_t i);
size_t get_backing_src_pagesz(uint32_t i);
-void backing_src_help(void);
+void backing_src_help(const char *flag);
enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name);
long get_run_delay(void);
diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index 242ae8e09a65..05e65ca1c30c 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -312,37 +312,37 @@ static inline void set_xmm(int n, unsigned long val)
}
}
-typedef unsigned long v1di __attribute__ ((vector_size (8)));
+#define GET_XMM(__xmm) \
+({ \
+ unsigned long __val; \
+ asm volatile("movq %%"#__xmm", %0" : "=r"(__val)); \
+ __val; \
+})
+
static inline unsigned long get_xmm(int n)
{
assert(n >= 0 && n <= 7);
- register v1di xmm0 __asm__("%xmm0");
- register v1di xmm1 __asm__("%xmm1");
- register v1di xmm2 __asm__("%xmm2");
- register v1di xmm3 __asm__("%xmm3");
- register v1di xmm4 __asm__("%xmm4");
- register v1di xmm5 __asm__("%xmm5");
- register v1di xmm6 __asm__("%xmm6");
- register v1di xmm7 __asm__("%xmm7");
switch (n) {
case 0:
- return (unsigned long)xmm0;
+ return GET_XMM(xmm0);
case 1:
- return (unsigned long)xmm1;
+ return GET_XMM(xmm1);
case 2:
- return (unsigned long)xmm2;
+ return GET_XMM(xmm2);
case 3:
- return (unsigned long)xmm3;
+ return GET_XMM(xmm3);
case 4:
- return (unsigned long)xmm4;
+ return GET_XMM(xmm4);
case 5:
- return (unsigned long)xmm5;
+ return GET_XMM(xmm5);
case 6:
- return (unsigned long)xmm6;
+ return GET_XMM(xmm6);
case 7:
- return (unsigned long)xmm7;
+ return GET_XMM(xmm7);
}
+
+ /* never reached */
return 0;
}
diff --git a/tools/testing/selftests/kvm/kvm_page_table_test.c b/tools/testing/selftests/kvm/kvm_page_table_test.c
index 0d04a7db7f24..36407cb0ec85 100644
--- a/tools/testing/selftests/kvm/kvm_page_table_test.c
+++ b/tools/testing/selftests/kvm/kvm_page_table_test.c
@@ -456,10 +456,7 @@ static void help(char *name)
" (default: 1G)\n");
printf(" -v: specify the number of vCPUs to run\n"
" (default: 1)\n");
- printf(" -s: specify the type of memory that should be used to\n"
- " back the guest data region.\n"
- " (default: anonymous)\n\n");
- backing_src_help();
+ backing_src_help("-s");
puts("");
}
@@ -468,7 +465,7 @@ int main(int argc, char *argv[])
int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);
struct test_params p = {
.test_mem_size = DEFAULT_TEST_MEM_SIZE,
- .src_type = VM_MEM_SRC_ANONYMOUS,
+ .src_type = DEFAULT_VM_MEM_SRC,
};
int opt;
diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c
index a9107bfae402..b72429108993 100644
--- a/tools/testing/selftests/kvm/lib/test_util.c
+++ b/tools/testing/selftests/kvm/lib/test_util.c
@@ -283,13 +283,22 @@ size_t get_backing_src_pagesz(uint32_t i)
}
}
-void backing_src_help(void)
+static void print_available_backing_src_types(const char *prefix)
{
int i;
- printf("Available backing src types:\n");
+ printf("%sAvailable backing src types:\n", prefix);
+
for (i = 0; i < NUM_SRC_TYPES; i++)
- printf("\t%s\n", vm_mem_backing_src_alias(i)->name);
+ printf("%s %s\n", prefix, vm_mem_backing_src_alias(i)->name);
+}
+
+void backing_src_help(const char *flag)
+{
+ printf(" %s: specify the type of memory that should be used to\n"
+ " back the guest data region. (default: %s)\n",
+ flag, vm_mem_backing_src_alias(DEFAULT_VM_MEM_SRC)->name);
+ print_available_backing_src_types(" ");
}
enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name)
@@ -300,7 +309,7 @@ enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name)
if (!strcmp(type_name, vm_mem_backing_src_alias(i)->name))
return i;
- backing_src_help();
+ print_available_backing_src_types("");
TEST_FAIL("Unknown backing src type: %s", type_name);
return -1;
}
diff --git a/tools/testing/selftests/kvm/rseq_test.c b/tools/testing/selftests/kvm/rseq_test.c
new file mode 100644
index 000000000000..4158da0da2bb
--- /dev/null
+++ b/tools/testing/selftests/kvm/rseq_test.c
@@ -0,0 +1,286 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <errno.h>
+#include <fcntl.h>
+#include <pthread.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <signal.h>
+#include <syscall.h>
+#include <sys/ioctl.h>
+#include <sys/sysinfo.h>
+#include <asm/barrier.h>
+#include <linux/atomic.h>
+#include <linux/rseq.h>
+#include <linux/unistd.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "test_util.h"
+
+#define VCPU_ID 0
+
+static __thread volatile struct rseq __rseq = {
+ .cpu_id = RSEQ_CPU_ID_UNINITIALIZED,
+};
+
+/*
+ * Use an arbitrary, bogus signature for configuring rseq, this test does not
+ * actually enter an rseq critical section.
+ */
+#define RSEQ_SIG 0xdeadbeef
+
+/*
+ * Any bug related to task migration is likely to be timing-dependent; perform
+ * a large number of migrations to reduce the odds of a false negative.
+ */
+#define NR_TASK_MIGRATIONS 100000
+
+static pthread_t migration_thread;
+static cpu_set_t possible_mask;
+static int min_cpu, max_cpu;
+static bool done;
+
+static atomic_t seq_cnt;
+
+static void guest_code(void)
+{
+ for (;;)
+ GUEST_SYNC(0);
+}
+
+static void sys_rseq(int flags)
+{
+ int r;
+
+ r = syscall(__NR_rseq, &__rseq, sizeof(__rseq), flags, RSEQ_SIG);
+ TEST_ASSERT(!r, "rseq failed, errno = %d (%s)", errno, strerror(errno));
+}
+
+static int next_cpu(int cpu)
+{
+ /*
+ * Advance to the next CPU, skipping those that weren't in the original
+ * affinity set. Sadly, there is no CPU_SET_FOR_EACH, and cpu_set_t's
+ * data storage is considered as opaque. Note, if this task is pinned
+ * to a small set of discontigous CPUs, e.g. 2 and 1023, this loop will
+ * burn a lot cycles and the test will take longer than normal to
+ * complete.
+ */
+ do {
+ cpu++;
+ if (cpu > max_cpu) {
+ cpu = min_cpu;
+ TEST_ASSERT(CPU_ISSET(cpu, &possible_mask),
+ "Min CPU = %d must always be usable", cpu);
+ break;
+ }
+ } while (!CPU_ISSET(cpu, &possible_mask));
+
+ return cpu;
+}
+
+static void *migration_worker(void *ign)
+{
+ cpu_set_t allowed_mask;
+ int r, i, cpu;
+
+ CPU_ZERO(&allowed_mask);
+
+ for (i = 0, cpu = min_cpu; i < NR_TASK_MIGRATIONS; i++, cpu = next_cpu(cpu)) {
+ CPU_SET(cpu, &allowed_mask);
+
+ /*
+ * Bump the sequence count twice to allow the reader to detect
+ * that a migration may have occurred in between rseq and sched
+ * CPU ID reads. An odd sequence count indicates a migration
+ * is in-progress, while a completely different count indicates
+ * a migration occurred since the count was last read.
+ */
+ atomic_inc(&seq_cnt);
+
+ /*
+ * Ensure the odd count is visible while sched_getcpu() isn't
+ * stable, i.e. while changing affinity is in-progress.
+ */
+ smp_wmb();
+ r = sched_setaffinity(0, sizeof(allowed_mask), &allowed_mask);
+ TEST_ASSERT(!r, "sched_setaffinity failed, errno = %d (%s)",
+ errno, strerror(errno));
+ smp_wmb();
+ atomic_inc(&seq_cnt);
+
+ CPU_CLR(cpu, &allowed_mask);
+
+ /*
+ * Wait 1-10us before proceeding to the next iteration and more
+ * specifically, before bumping seq_cnt again. A delay is
+ * needed on three fronts:
+ *
+ * 1. To allow sched_setaffinity() to prompt migration before
+ * ioctl(KVM_RUN) enters the guest so that TIF_NOTIFY_RESUME
+ * (or TIF_NEED_RESCHED, which indirectly leads to handling
+ * NOTIFY_RESUME) is handled in KVM context.
+ *
+ * If NOTIFY_RESUME/NEED_RESCHED is set after KVM enters
+ * the guest, the guest will trigger a IO/MMIO exit all the
+ * way to userspace and the TIF flags will be handled by
+ * the generic "exit to userspace" logic, not by KVM. The
+ * exit to userspace is necessary to give the test a chance
+ * to check the rseq CPU ID (see #2).
+ *
+ * Alternatively, guest_code() could include an instruction
+ * to trigger an exit that is handled by KVM, but any such
+ * exit requires architecture specific code.
+ *
+ * 2. To let ioctl(KVM_RUN) make its way back to the test
+ * before the next round of migration. The test's check on
+ * the rseq CPU ID must wait for migration to complete in
+ * order to avoid false positive, thus any kernel rseq bug
+ * will be missed if the next migration starts before the
+ * check completes.
+ *
+ * 3. To ensure the read-side makes efficient forward progress,
+ * e.g. if sched_getcpu() involves a syscall. Stalling the
+ * read-side means the test will spend more time waiting for
+ * sched_getcpu() to stabilize and less time trying to hit
+ * the timing-dependent bug.
+ *
+ * Because any bug in this area is likely to be timing-dependent,
+ * run with a range of delays at 1us intervals from 1us to 10us
+ * as a best effort to avoid tuning the test to the point where
+ * it can hit _only_ the original bug and not detect future
+ * regressions.
+ *
+ * The original bug can reproduce with a delay up to ~500us on
+ * x86-64, but starts to require more iterations to reproduce
+ * as the delay creeps above ~10us, and the average runtime of
+ * each iteration obviously increases as well. Cap the delay
+ * at 10us to keep test runtime reasonable while minimizing
+ * potential coverage loss.
+ *
+ * The lower bound for reproducing the bug is likely below 1us,
+ * e.g. failures occur on x86-64 with nanosleep(0), but at that
+ * point the overhead of the syscall likely dominates the delay.
+ * Use usleep() for simplicity and to avoid unnecessary kernel
+ * dependencies.
+ */
+ usleep((i % 10) + 1);
+ }
+ done = true;
+ return NULL;
+}
+
+static int calc_min_max_cpu(void)
+{
+ int i, cnt, nproc;
+
+ if (CPU_COUNT(&possible_mask) < 2)
+ return -EINVAL;
+
+ /*
+ * CPU_SET doesn't provide a FOR_EACH helper, get the min/max CPU that
+ * this task is affined to in order to reduce the time spent querying
+ * unusable CPUs, e.g. if this task is pinned to a small percentage of
+ * total CPUs.
+ */
+ nproc = get_nprocs_conf();
+ min_cpu = -1;
+ max_cpu = -1;
+ cnt = 0;
+
+ for (i = 0; i < nproc; i++) {
+ if (!CPU_ISSET(i, &possible_mask))
+ continue;
+ if (min_cpu == -1)
+ min_cpu = i;
+ max_cpu = i;
+ cnt++;
+ }
+
+ return (cnt < 2) ? -EINVAL : 0;
+}
+
+int main(int argc, char *argv[])
+{
+ int r, i, snapshot;
+ struct kvm_vm *vm;
+ u32 cpu, rseq_cpu;
+
+ /* Tell stdout not to buffer its content */
+ setbuf(stdout, NULL);
+
+ r = sched_getaffinity(0, sizeof(possible_mask), &possible_mask);
+ TEST_ASSERT(!r, "sched_getaffinity failed, errno = %d (%s)", errno,
+ strerror(errno));
+
+ if (calc_min_max_cpu()) {
+ print_skip("Only one usable CPU, task migration not possible");
+ exit(KSFT_SKIP);
+ }
+
+ sys_rseq(0);
+
+ /*
+ * Create and run a dummy VM that immediately exits to userspace via
+ * GUEST_SYNC, while concurrently migrating the process by setting its
+ * CPU affinity.
+ */
+ vm = vm_create_default(VCPU_ID, 0, guest_code);
+ ucall_init(vm, NULL);
+
+ pthread_create(&migration_thread, NULL, migration_worker, 0);
+
+ for (i = 0; !done; i++) {
+ vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(get_ucall(vm, VCPU_ID, NULL) == UCALL_SYNC,
+ "Guest failed?");
+
+ /*
+ * Verify rseq's CPU matches sched's CPU. Ensure migration
+ * doesn't occur between sched_getcpu() and reading the rseq
+ * cpu_id by rereading both if the sequence count changes, or
+ * if the count is odd (migration in-progress).
+ */
+ do {
+ /*
+ * Drop bit 0 to force a mismatch if the count is odd,
+ * i.e. if a migration is in-progress.
+ */
+ snapshot = atomic_read(&seq_cnt) & ~1;
+
+ /*
+ * Ensure reading sched_getcpu() and rseq.cpu_id
+ * complete in a single "no migration" window, i.e. are
+ * not reordered across the seq_cnt reads.
+ */
+ smp_rmb();
+ cpu = sched_getcpu();
+ rseq_cpu = READ_ONCE(__rseq.cpu_id);
+ smp_rmb();
+ } while (snapshot != atomic_read(&seq_cnt));
+
+ TEST_ASSERT(rseq_cpu == cpu,
+ "rseq CPU = %d, sched CPU = %d\n", rseq_cpu, cpu);
+ }
+
+ /*
+ * Sanity check that the test was able to enter the guest a reasonable
+ * number of times, e.g. didn't get stalled too often/long waiting for
+ * sched_getcpu() to stabilize. A 2:1 migration:KVM_RUN ratio is a
+ * fairly conservative ratio on x86-64, which can do _more_ KVM_RUNs
+ * than migrations given the 1us+ delay in the migration task.
+ */
+ TEST_ASSERT(i > (NR_TASK_MIGRATIONS / 2),
+ "Only performed %d KVM_RUNs, task stalled too much?\n", i);
+
+ pthread_join(migration_thread, NULL);
+
+ kvm_vm_free(vm);
+
+ sys_rseq(RSEQ_FLAG_UNREGISTER);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/steal_time.c b/tools/testing/selftests/kvm/steal_time.c
index 2172d65b85e4..62f2eb9ee3d5 100644
--- a/tools/testing/selftests/kvm/steal_time.c
+++ b/tools/testing/selftests/kvm/steal_time.c
@@ -116,12 +116,12 @@ struct st_time {
uint64_t st_time;
};
-static int64_t smccc(uint32_t func, uint32_t arg)
+static int64_t smccc(uint32_t func, uint64_t arg)
{
unsigned long ret;
asm volatile(
- "mov x0, %1\n"
+ "mov w0, %w1\n"
"mov x1, %2\n"
"hvc #0\n"
"mov %0, x0\n"
diff --git a/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c b/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c
new file mode 100644
index 000000000000..df04f56ce859
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c
@@ -0,0 +1,128 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * svm_int_ctl_test
+ *
+ * Copyright (C) 2021, Red Hat, Inc.
+ *
+ * Nested SVM testing: test simultaneous use of V_IRQ from L1 and L0.
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+#include "apic.h"
+
+#define VCPU_ID 0
+
+static struct kvm_vm *vm;
+
+bool vintr_irq_called;
+bool intr_irq_called;
+
+#define VINTR_IRQ_NUMBER 0x20
+#define INTR_IRQ_NUMBER 0x30
+
+static void vintr_irq_handler(struct ex_regs *regs)
+{
+ vintr_irq_called = true;
+}
+
+static void intr_irq_handler(struct ex_regs *regs)
+{
+ x2apic_write_reg(APIC_EOI, 0x00);
+ intr_irq_called = true;
+}
+
+static void l2_guest_code(struct svm_test_data *svm)
+{
+ /* This code raises interrupt INTR_IRQ_NUMBER in the L1's LAPIC,
+ * and since L1 didn't enable virtual interrupt masking,
+ * L2 should receive it and not L1.
+ *
+ * L2 also has virtual interrupt 'VINTR_IRQ_NUMBER' pending in V_IRQ
+ * so it should also receive it after the following 'sti'.
+ */
+ x2apic_write_reg(APIC_ICR,
+ APIC_DEST_SELF | APIC_INT_ASSERT | INTR_IRQ_NUMBER);
+
+ __asm__ __volatile__(
+ "sti\n"
+ "nop\n"
+ );
+
+ GUEST_ASSERT(vintr_irq_called);
+ GUEST_ASSERT(intr_irq_called);
+
+ __asm__ __volatile__(
+ "vmcall\n"
+ );
+}
+
+static void l1_guest_code(struct svm_test_data *svm)
+{
+ #define L2_GUEST_STACK_SIZE 64
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ struct vmcb *vmcb = svm->vmcb;
+
+ x2apic_enable();
+
+ /* Prepare for L2 execution. */
+ generic_svm_setup(svm, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ /* No virtual interrupt masking */
+ vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
+
+ /* No intercepts for real and virtual interrupts */
+ vmcb->control.intercept &= ~(1ULL << INTERCEPT_INTR | INTERCEPT_VINTR);
+
+ /* Make a virtual interrupt VINTR_IRQ_NUMBER pending */
+ vmcb->control.int_ctl |= V_IRQ_MASK | (0x1 << V_INTR_PRIO_SHIFT);
+ vmcb->control.int_vector = VINTR_IRQ_NUMBER;
+
+ run_guest(vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ vm_vaddr_t svm_gva;
+
+ nested_svm_check_supported();
+
+ vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code);
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vm, VCPU_ID);
+
+ vm_install_exception_handler(vm, VINTR_IRQ_NUMBER, vintr_irq_handler);
+ vm_install_exception_handler(vm, INTR_IRQ_NUMBER, intr_irq_handler);
+
+ vcpu_alloc_svm(vm, &svm_gva);
+ vcpu_args_set(vm, VCPU_ID, 1, svm_gva);
+
+ struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+ struct ucall uc;
+
+ vcpu_run(vm, VCPU_ID);
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+
+ switch (get_ucall(vm, VCPU_ID, &uc)) {
+ case UCALL_ABORT:
+ TEST_FAIL("%s", (const char *)uc.args[0]);
+ break;
+ /* NOT REACHED */
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
+ }
+done:
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/nci/nci_dev.c b/tools/testing/selftests/nci/nci_dev.c
index e1bf55dabdf6..162c41e9bcae 100644
--- a/tools/testing/selftests/nci/nci_dev.c
+++ b/tools/testing/selftests/nci/nci_dev.c
@@ -746,7 +746,7 @@ int read_write_nci_cmd(int nfc_sock, int virtual_fd, const __u8 *cmd, __u32 cmd_
const __u8 *rsp, __u32 rsp_len)
{
char buf[256];
- unsigned int len;
+ int len;
send(nfc_sock, &cmd[3], cmd_len - 3, 0);
len = read(virtual_fd, buf, cmd_len);
diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile
index cfc7f4f97fd1..df341648f818 100644
--- a/tools/testing/selftests/net/af_unix/Makefile
+++ b/tools/testing/selftests/net/af_unix/Makefile
@@ -1,5 +1,2 @@
-##TEST_GEN_FILES := test_unix_oob
-TEST_PROGS := test_unix_oob
+TEST_GEN_PROGS := test_unix_oob
include ../../lib.mk
-
-all: $(TEST_PROGS)
diff --git a/tools/testing/selftests/net/af_unix/test_unix_oob.c b/tools/testing/selftests/net/af_unix/test_unix_oob.c
index 0f3e3763f4f8..3dece8b29253 100644
--- a/tools/testing/selftests/net/af_unix/test_unix_oob.c
+++ b/tools/testing/selftests/net/af_unix/test_unix_oob.c
@@ -271,8 +271,9 @@ main(int argc, char **argv)
read_oob(pfd, &oob);
if (!signal_recvd || len != 127 || oob != '%' || atmark != 1) {
- fprintf(stderr, "Test 3 failed, sigurg %d len %d OOB %c ",
- "atmark %d\n", signal_recvd, len, oob, atmark);
+ fprintf(stderr,
+ "Test 3 failed, sigurg %d len %d OOB %c atmark %d\n",
+ signal_recvd, len, oob, atmark);
die(1);
}
diff --git a/tools/testing/selftests/net/altnames.sh b/tools/testing/selftests/net/altnames.sh
index 4254ddc3f70b..1ef9e4159bba 100755
--- a/tools/testing/selftests/net/altnames.sh
+++ b/tools/testing/selftests/net/altnames.sh
@@ -45,7 +45,7 @@ altnames_test()
check_err $? "Got unexpected long alternative name from link show JSON"
ip link property del $DUMMY_DEV altname $SHORT_NAME
- check_err $? "Failed to add short alternative name"
+ check_err $? "Failed to delete short alternative name"
ip -j -p link show $SHORT_NAME &>/dev/null
check_fail $? "Unexpected success while trying to do link show with deleted short alternative name"
diff --git a/tools/testing/selftests/netfilter/nft_nat_zones.sh b/tools/testing/selftests/netfilter/nft_nat_zones.sh
new file mode 100755
index 000000000000..b9ab37380f33
--- /dev/null
+++ b/tools/testing/selftests/netfilter/nft_nat_zones.sh
@@ -0,0 +1,309 @@
+#!/bin/bash
+#
+# Test connection tracking zone and NAT source port reallocation support.
+#
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+# Don't increase too much, 2000 clients should work
+# just fine but script can then take several minutes with
+# KASAN/debug builds.
+maxclients=100
+
+have_iperf=1
+ret=0
+
+# client1---.
+# veth1-.
+# |
+# NAT Gateway --veth0--> Server
+# | |
+# veth2-' |
+# client2---' |
+# .... |
+# clientX----vethX---'
+
+# All clients share identical IP address.
+# NAT Gateway uses policy routing and conntrack zones to isolate client
+# namespaces. Each client connects to Server, each with colliding tuples:
+# clientsaddr:10000 -> serveraddr:dport
+# NAT Gateway is supposed to do port reallocation for each of the
+# connections.
+
+sfx=$(mktemp -u "XXXXXXXX")
+gw="ns-gw-$sfx"
+cl1="ns-cl1-$sfx"
+cl2="ns-cl2-$sfx"
+srv="ns-srv-$sfx"
+
+v4gc1=$(sysctl -n net.ipv4.neigh.default.gc_thresh1 2>/dev/null)
+v4gc2=$(sysctl -n net.ipv4.neigh.default.gc_thresh2 2>/dev/null)
+v4gc3=$(sysctl -n net.ipv4.neigh.default.gc_thresh3 2>/dev/null)
+v6gc1=$(sysctl -n net.ipv6.neigh.default.gc_thresh1 2>/dev/null)
+v6gc2=$(sysctl -n net.ipv6.neigh.default.gc_thresh2 2>/dev/null)
+v6gc3=$(sysctl -n net.ipv6.neigh.default.gc_thresh3 2>/dev/null)
+
+cleanup()
+{
+ ip netns del $gw
+ ip netns del $srv
+ for i in $(seq 1 $maxclients); do
+ ip netns del ns-cl$i-$sfx 2>/dev/null
+ done
+
+ sysctl -q net.ipv4.neigh.default.gc_thresh1=$v4gc1 2>/dev/null
+ sysctl -q net.ipv4.neigh.default.gc_thresh2=$v4gc2 2>/dev/null
+ sysctl -q net.ipv4.neigh.default.gc_thresh3=$v4gc3 2>/dev/null
+ sysctl -q net.ipv6.neigh.default.gc_thresh1=$v6gc1 2>/dev/null
+ sysctl -q net.ipv6.neigh.default.gc_thresh2=$v6gc2 2>/dev/null
+ sysctl -q net.ipv6.neigh.default.gc_thresh3=$v6gc3 2>/dev/null
+}
+
+nft --version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without nft tool"
+ exit $ksft_skip
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+conntrack -V > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without conntrack tool"
+ exit $ksft_skip
+fi
+
+iperf3 -v >/dev/null 2>&1
+if [ $? -ne 0 ];then
+ have_iperf=0
+fi
+
+ip netns add "$gw"
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not create net namespace $gw"
+ exit $ksft_skip
+fi
+ip -net "$gw" link set lo up
+
+trap cleanup EXIT
+
+ip netns add "$srv"
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not create server netns $srv"
+ exit $ksft_skip
+fi
+
+ip link add veth0 netns "$gw" type veth peer name eth0 netns "$srv"
+ip -net "$gw" link set veth0 up
+ip -net "$srv" link set lo up
+ip -net "$srv" link set eth0 up
+
+sysctl -q net.ipv6.neigh.default.gc_thresh1=512 2>/dev/null
+sysctl -q net.ipv6.neigh.default.gc_thresh2=1024 2>/dev/null
+sysctl -q net.ipv6.neigh.default.gc_thresh3=4096 2>/dev/null
+sysctl -q net.ipv4.neigh.default.gc_thresh1=512 2>/dev/null
+sysctl -q net.ipv4.neigh.default.gc_thresh2=1024 2>/dev/null
+sysctl -q net.ipv4.neigh.default.gc_thresh3=4096 2>/dev/null
+
+for i in $(seq 1 $maxclients);do
+ cl="ns-cl$i-$sfx"
+
+ ip netns add "$cl"
+ if [ $? -ne 0 ];then
+ echo "SKIP: Could not create client netns $cl"
+ exit $ksft_skip
+ fi
+ ip link add veth$i netns "$gw" type veth peer name eth0 netns "$cl" > /dev/null 2>&1
+ if [ $? -ne 0 ];then
+ echo "SKIP: No virtual ethernet pair device support in kernel"
+ exit $ksft_skip
+ fi
+done
+
+for i in $(seq 1 $maxclients);do
+ cl="ns-cl$i-$sfx"
+ echo netns exec "$cl" ip link set lo up
+ echo netns exec "$cl" ip link set eth0 up
+ echo netns exec "$cl" sysctl -q net.ipv4.tcp_syn_retries=2
+ echo netns exec "$gw" ip link set veth$i up
+ echo netns exec "$gw" sysctl -q net.ipv4.conf.veth$i.arp_ignore=2
+ echo netns exec "$gw" sysctl -q net.ipv4.conf.veth$i.rp_filter=0
+
+ # clients have same IP addresses.
+ echo netns exec "$cl" ip addr add 10.1.0.3/24 dev eth0
+ echo netns exec "$cl" ip addr add dead:1::3/64 dev eth0
+ echo netns exec "$cl" ip route add default via 10.1.0.2 dev eth0
+ echo netns exec "$cl" ip route add default via dead:1::2 dev eth0
+
+ # NB: same addresses on client-facing interfaces.
+ echo netns exec "$gw" ip addr add 10.1.0.2/24 dev veth$i
+ echo netns exec "$gw" ip addr add dead:1::2/64 dev veth$i
+
+ # gw: policy routing
+ echo netns exec "$gw" ip route add 10.1.0.0/24 dev veth$i table $((1000+i))
+ echo netns exec "$gw" ip route add dead:1::0/64 dev veth$i table $((1000+i))
+ echo netns exec "$gw" ip route add 10.3.0.0/24 dev veth0 table $((1000+i))
+ echo netns exec "$gw" ip route add dead:3::0/64 dev veth0 table $((1000+i))
+ echo netns exec "$gw" ip rule add fwmark $i lookup $((1000+i))
+done | ip -batch /dev/stdin
+
+ip -net "$gw" addr add 10.3.0.1/24 dev veth0
+ip -net "$gw" addr add dead:3::1/64 dev veth0
+
+ip -net "$srv" addr add 10.3.0.99/24 dev eth0
+ip -net "$srv" addr add dead:3::99/64 dev eth0
+
+ip netns exec $gw nft -f /dev/stdin<<EOF
+table inet raw {
+ map iiftomark {
+ type ifname : mark
+ }
+
+ map iiftozone {
+ typeof iifname : ct zone
+ }
+
+ set inicmp {
+ flags dynamic
+ type ipv4_addr . ifname . ipv4_addr
+ }
+ set inflows {
+ flags dynamic
+ type ipv4_addr . inet_service . ifname . ipv4_addr . inet_service
+ }
+
+ set inflows6 {
+ flags dynamic
+ type ipv6_addr . inet_service . ifname . ipv6_addr . inet_service
+ }
+
+ chain prerouting {
+ type filter hook prerouting priority -64000; policy accept;
+ ct original zone set meta iifname map @iiftozone
+ meta mark set meta iifname map @iiftomark
+
+ tcp flags & (syn|ack) == ack add @inflows { ip saddr . tcp sport . meta iifname . ip daddr . tcp dport counter }
+ add @inflows6 { ip6 saddr . tcp sport . meta iifname . ip6 daddr . tcp dport counter }
+ ip protocol icmp add @inicmp { ip saddr . meta iifname . ip daddr counter }
+ }
+
+ chain nat_postrouting {
+ type nat hook postrouting priority 0; policy accept;
+ ct mark set meta mark meta oifname veth0 masquerade
+ }
+
+ chain mangle_prerouting {
+ type filter hook prerouting priority -100; policy accept;
+ ct direction reply meta mark set ct mark
+ }
+}
+EOF
+
+( echo add element inet raw iiftomark \{
+ for i in $(seq 1 $((maxclients-1))); do
+ echo \"veth$i\" : $i,
+ done
+ echo \"veth$maxclients\" : $maxclients \}
+ echo add element inet raw iiftozone \{
+ for i in $(seq 1 $((maxclients-1))); do
+ echo \"veth$i\" : $i,
+ done
+ echo \"veth$maxclients\" : $maxclients \}
+) | ip netns exec $gw nft -f /dev/stdin
+
+ip netns exec "$gw" sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null
+ip netns exec "$gw" sysctl -q net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec "$gw" sysctl -q net.ipv4.conf.all.rp_filter=0 >/dev/null
+
+# useful for debugging: allows to use 'ping' from clients to gateway.
+ip netns exec "$gw" sysctl -q net.ipv4.fwmark_reflect=1 > /dev/null
+ip netns exec "$gw" sysctl -q net.ipv6.fwmark_reflect=1 > /dev/null
+
+for i in $(seq 1 $maxclients); do
+ cl="ns-cl$i-$sfx"
+ ip netns exec $cl ping -i 0.5 -q -c 3 10.3.0.99 > /dev/null 2>&1 &
+ if [ $? -ne 0 ]; then
+ echo FAIL: Ping failure from $cl 1>&2
+ ret=1
+ break
+ fi
+done
+
+wait
+
+for i in $(seq 1 $maxclients); do
+ ip netns exec $gw nft get element inet raw inicmp "{ 10.1.0.3 . \"veth$i\" . 10.3.0.99 }" | grep -q "{ 10.1.0.3 . \"veth$i\" . 10.3.0.99 counter packets 3 bytes 252 }"
+ if [ $? -ne 0 ];then
+ ret=1
+ echo "FAIL: counter icmp mismatch for veth$i" 1>&2
+ ip netns exec $gw nft get element inet raw inicmp "{ 10.1.0.3 . \"veth$i\" . 10.3.0.99 }" 1>&2
+ break
+ fi
+done
+
+ip netns exec $gw nft get element inet raw inicmp "{ 10.3.0.99 . \"veth0\" . 10.3.0.1 }" | grep -q "{ 10.3.0.99 . \"veth0\" . 10.3.0.1 counter packets $((3 * $maxclients)) bytes $((252 * $maxclients)) }"
+if [ $? -ne 0 ];then
+ ret=1
+ echo "FAIL: counter icmp mismatch for veth0: { 10.3.0.99 . \"veth0\" . 10.3.0.1 counter packets $((3 * $maxclients)) bytes $((252 * $maxclients)) }"
+ ip netns exec $gw nft get element inet raw inicmp "{ 10.3.99 . \"veth0\" . 10.3.0.1 }" 1>&2
+fi
+
+if [ $ret -eq 0 ]; then
+ echo "PASS: ping test from all $maxclients namespaces"
+fi
+
+if [ $have_iperf -eq 0 ];then
+ echo "SKIP: iperf3 not installed"
+ if [ $ret -ne 0 ];then
+ exit $ret
+ fi
+ exit $ksft_skip
+fi
+
+ip netns exec $srv iperf3 -s > /dev/null 2>&1 &
+iperfpid=$!
+sleep 1
+
+for i in $(seq 1 $maxclients); do
+ if [ $ret -ne 0 ]; then
+ break
+ fi
+ cl="ns-cl$i-$sfx"
+ ip netns exec $cl iperf3 -c 10.3.0.99 --cport 10000 -n 1 > /dev/null
+ if [ $? -ne 0 ]; then
+ echo FAIL: Failure to connect for $cl 1>&2
+ ip netns exec $gw conntrack -S 1>&2
+ ret=1
+ fi
+done
+if [ $ret -eq 0 ];then
+ echo "PASS: iperf3 connections for all $maxclients net namespaces"
+fi
+
+kill $iperfpid
+wait
+
+for i in $(seq 1 $maxclients); do
+ ip netns exec $gw nft get element inet raw inflows "{ 10.1.0.3 . 10000 . \"veth$i\" . 10.3.0.99 . 5201 }" > /dev/null
+ if [ $? -ne 0 ];then
+ ret=1
+ echo "FAIL: can't find expected tcp entry for veth$i" 1>&2
+ break
+ fi
+done
+if [ $ret -eq 0 ];then
+ echo "PASS: Found client connection for all $maxclients net namespaces"
+fi
+
+ip netns exec $gw nft get element inet raw inflows "{ 10.3.0.99 . 5201 . \"veth0\" . 10.3.0.1 . 10000 }" > /dev/null
+if [ $? -ne 0 ];then
+ ret=1
+ echo "FAIL: cannot find return entry on veth0" 1>&2
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/netfilter/nft_zones_many.sh b/tools/testing/selftests/netfilter/nft_zones_many.sh
new file mode 100755
index 000000000000..ac646376eb01
--- /dev/null
+++ b/tools/testing/selftests/netfilter/nft_zones_many.sh
@@ -0,0 +1,156 @@
+#!/bin/bash
+
+# Test insertion speed for packets with identical addresses/ports
+# that are all placed in distinct conntrack zones.
+
+sfx=$(mktemp -u "XXXXXXXX")
+ns="ns-$sfx"
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+zones=20000
+have_ct_tool=0
+ret=0
+
+cleanup()
+{
+ ip netns del $ns
+}
+
+ip netns add $ns
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not create net namespace $gw"
+ exit $ksft_skip
+fi
+
+trap cleanup EXIT
+
+conntrack -V > /dev/null 2>&1
+if [ $? -eq 0 ];then
+ have_ct_tool=1
+fi
+
+ip -net "$ns" link set lo up
+
+test_zones() {
+ local max_zones=$1
+
+ip netns exec $ns sysctl -q net.netfilter.nf_conntrack_udp_timeout=3600
+ip netns exec $ns nft -f /dev/stdin<<EOF
+flush ruleset
+table inet raw {
+ map rndzone {
+ typeof numgen inc mod $max_zones : ct zone
+ }
+
+ chain output {
+ type filter hook output priority -64000; policy accept;
+ udp dport 12345 ct zone set numgen inc mod 65536 map @rndzone
+ }
+}
+EOF
+ (
+ echo "add element inet raw rndzone {"
+ for i in $(seq 1 $max_zones);do
+ echo -n "$i : $i"
+ if [ $i -lt $max_zones ]; then
+ echo ","
+ else
+ echo "}"
+ fi
+ done
+ ) | ip netns exec $ns nft -f /dev/stdin
+
+ local i=0
+ local j=0
+ local outerstart=$(date +%s%3N)
+ local stop=$outerstart
+
+ while [ $i -lt $max_zones ]; do
+ local start=$(date +%s%3N)
+ i=$((i + 10000))
+ j=$((j + 1))
+ dd if=/dev/zero of=/dev/stdout bs=8k count=10000 2>/dev/null | ip netns exec "$ns" nc -w 1 -q 1 -u -p 12345 127.0.0.1 12345 > /dev/null
+ if [ $? -ne 0 ] ;then
+ ret=1
+ break
+ fi
+
+ stop=$(date +%s%3N)
+ local duration=$((stop-start))
+ echo "PASS: added 10000 entries in $duration ms (now $i total, loop $j)"
+ done
+
+ if [ $have_ct_tool -eq 1 ]; then
+ local count=$(ip netns exec "$ns" conntrack -C)
+ local duration=$((stop-outerstart))
+
+ if [ $count -eq $max_zones ]; then
+ echo "PASS: inserted $count entries from packet path in $duration ms total"
+ else
+ ip netns exec $ns conntrack -S 1>&2
+ echo "FAIL: inserted $count entries from packet path in $duration ms total, expected $max_zones entries"
+ ret=1
+ fi
+ fi
+
+ if [ $ret -ne 0 ];then
+ echo "FAIL: insert $max_zones entries from packet path" 1>&2
+ fi
+}
+
+test_conntrack_tool() {
+ local max_zones=$1
+
+ ip netns exec $ns conntrack -F >/dev/null 2>/dev/null
+
+ local outerstart=$(date +%s%3N)
+ local start=$(date +%s%3N)
+ local stop=$start
+ local i=0
+ while [ $i -lt $max_zones ]; do
+ i=$((i + 1))
+ ip netns exec "$ns" conntrack -I -s 1.1.1.1 -d 2.2.2.2 --protonum 6 \
+ --timeout 3600 --state ESTABLISHED --sport 12345 --dport 1000 --zone $i >/dev/null 2>&1
+ if [ $? -ne 0 ];then
+ ip netns exec "$ns" conntrack -I -s 1.1.1.1 -d 2.2.2.2 --protonum 6 \
+ --timeout 3600 --state ESTABLISHED --sport 12345 --dport 1000 --zone $i > /dev/null
+ echo "FAIL: conntrack -I returned an error"
+ ret=1
+ break
+ fi
+
+ if [ $((i%10000)) -eq 0 ];then
+ stop=$(date +%s%3N)
+
+ local duration=$((stop-start))
+ echo "PASS: added 10000 entries in $duration ms (now $i total)"
+ start=$stop
+ fi
+ done
+
+ local count=$(ip netns exec "$ns" conntrack -C)
+ local duration=$((stop-outerstart))
+
+ if [ $count -eq $max_zones ]; then
+ echo "PASS: inserted $count entries via ctnetlink in $duration ms"
+ else
+ ip netns exec $ns conntrack -S 1>&2
+ echo "FAIL: inserted $count entries via ctnetlink in $duration ms, expected $max_zones entries ($duration ms)"
+ ret=1
+ fi
+}
+
+test_zones $zones
+
+if [ $have_ct_tool -eq 1 ];then
+ test_conntrack_tool $zones
+else
+ echo "SKIP: Could not run ctnetlink insertion test without conntrack tool"
+ if [ $ret -eq 0 ];then
+ exit $ksft_skip
+ fi
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/powerpc/tm/tm-syscall-asm.S b/tools/testing/selftests/powerpc/tm/tm-syscall-asm.S
index bd1ca25febe4..aed632d29fff 100644
--- a/tools/testing/selftests/powerpc/tm/tm-syscall-asm.S
+++ b/tools/testing/selftests/powerpc/tm/tm-syscall-asm.S
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-#include <ppc-asm.h>
+#include <basic_asm.h>
#include <asm/unistd.h>
.text
@@ -26,3 +26,38 @@ FUNC_START(getppid_tm_suspended)
1:
li r3, -1
blr
+
+
+.macro scv level
+ .long (0x44000001 | (\level) << 5)
+.endm
+
+FUNC_START(getppid_scv_tm_active)
+ PUSH_BASIC_STACK(0)
+ tbegin.
+ beq 1f
+ li r0, __NR_getppid
+ scv 0
+ tend.
+ POP_BASIC_STACK(0)
+ blr
+1:
+ li r3, -1
+ POP_BASIC_STACK(0)
+ blr
+
+FUNC_START(getppid_scv_tm_suspended)
+ PUSH_BASIC_STACK(0)
+ tbegin.
+ beq 1f
+ li r0, __NR_getppid
+ tsuspend.
+ scv 0
+ tresume.
+ tend.
+ POP_BASIC_STACK(0)
+ blr
+1:
+ li r3, -1
+ POP_BASIC_STACK(0)
+ blr
diff --git a/tools/testing/selftests/powerpc/tm/tm-syscall.c b/tools/testing/selftests/powerpc/tm/tm-syscall.c
index 467a6b3134b2..b763354c2eb4 100644
--- a/tools/testing/selftests/powerpc/tm/tm-syscall.c
+++ b/tools/testing/selftests/powerpc/tm/tm-syscall.c
@@ -19,23 +19,36 @@
#include "utils.h"
#include "tm.h"
+#ifndef PPC_FEATURE2_SCV
+#define PPC_FEATURE2_SCV 0x00100000 /* scv syscall */
+#endif
+
extern int getppid_tm_active(void);
extern int getppid_tm_suspended(void);
+extern int getppid_scv_tm_active(void);
+extern int getppid_scv_tm_suspended(void);
unsigned retries = 0;
#define TEST_DURATION 10 /* seconds */
-pid_t getppid_tm(bool suspend)
+pid_t getppid_tm(bool scv, bool suspend)
{
int i;
pid_t pid;
for (i = 0; i < TM_RETRIES; i++) {
- if (suspend)
- pid = getppid_tm_suspended();
- else
- pid = getppid_tm_active();
+ if (suspend) {
+ if (scv)
+ pid = getppid_scv_tm_suspended();
+ else
+ pid = getppid_tm_suspended();
+ } else {
+ if (scv)
+ pid = getppid_scv_tm_active();
+ else
+ pid = getppid_tm_active();
+ }
if (pid >= 0)
return pid;
@@ -82,15 +95,24 @@ int tm_syscall(void)
* Test a syscall within a suspended transaction and verify
* that it succeeds.
*/
- FAIL_IF(getppid_tm(true) == -1); /* Should succeed. */
+ FAIL_IF(getppid_tm(false, true) == -1); /* Should succeed. */
/*
* Test a syscall within an active transaction and verify that
* it fails with the correct failure code.
*/
- FAIL_IF(getppid_tm(false) != -1); /* Should fail... */
+ FAIL_IF(getppid_tm(false, false) != -1); /* Should fail... */
FAIL_IF(!failure_is_persistent()); /* ...persistently... */
FAIL_IF(!failure_is_syscall()); /* ...with code syscall. */
+
+ /* Now do it all again with scv if it is available. */
+ if (have_hwcap2(PPC_FEATURE2_SCV)) {
+ FAIL_IF(getppid_tm(true, true) == -1); /* Should succeed. */
+ FAIL_IF(getppid_tm(true, false) != -1); /* Should fail... */
+ FAIL_IF(!failure_is_persistent()); /* ...persistently... */
+ FAIL_IF(!failure_is_syscall()); /* ...with code syscall. */
+ }
+
gettimeofday(&now, 0);
}