32 files changed, 1341 insertions, 111 deletions
diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.c b/tools/testing/selftests/arm64/signal/test_signals_utils.c
index 6836510a522f..22722abc9dfa 100644
--- a/tools/testing/selftests/arm64/signal/test_signals_utils.c
+++ b/tools/testing/selftests/arm64/signal/test_signals_utils.c
@@ -266,16 +266,19 @@ int test_init(struct tdescr *td)
 			td->feats_supported |= FEAT_SSBS;
 		if (getauxval(AT_HWCAP) & HWCAP_SVE)
 			td->feats_supported |= FEAT_SVE;
-		if (feats_ok(td))
+		if (feats_ok(td)) {
 			fprintf(stderr,
 				"Required Features: [%s] supported\n",
 				feats_to_string(td->feats_required &
 						td->feats_supported));
-		else
+		} else {
 			fprintf(stderr,
 				"Required Features: [%s] NOT supported\n",
 				feats_to_string(td->feats_required &
 						~td->feats_supported));
+			td->result = KSFT_SKIP;
+			return 0;
+		}
 	}
 
 	/* Perform test specific additional initialization */
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 866531c08e4f..799b88152e9e 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -375,7 +375,8 @@ $(TRUNNER_BPF_OBJS): $(TRUNNER_OUTPUT)/%.o:				\
 		     $(TRUNNER_BPF_PROGS_DIR)/%.c			\
 		     $(TRUNNER_BPF_PROGS_DIR)/*.h			\
 		     $$(INCLUDE_DIR)/vmlinux.h				\
-		     $(wildcard $(BPFDIR)/bpf_*.h) | $(TRUNNER_OUTPUT)
+		     $(wildcard $(BPFDIR)/bpf_*.h)			\
+		     | $(TRUNNER_OUTPUT) $$(BPFOBJ)
 	$$(call $(TRUNNER_BPF_BUILD_RULE),$$<,$$@,			\
 					  $(TRUNNER_BPF_CFLAGS))
 
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c
index 033051717ba5..f3daa44a8266 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.c
+++ b/tools/testing/selftests/bpf/cgroup_helpers.c
@@ -12,27 +12,36 @@
 #include <unistd.h>
 #include <ftw.h>
 
-
 #include "cgroup_helpers.h"
 
 /*
  * To avoid relying on the system setup, when setup_cgroup_env is called
- * we create a new mount namespace, and cgroup namespace. The cgroup2
- * root is mounted at CGROUP_MOUNT_PATH
- *
- * Unfortunately, most people don't have cgroupv2 enabled at this point in time.
- * It's easier to create our own mount namespace and manage it ourselves.
+ * we create a new mount namespace, and cgroup namespace. The cgroupv2
+ * root is mounted at CGROUP_MOUNT_PATH. Unfortunately, most people don't
+ * have cgroupv2 enabled at this point in time. It's easier to create our
+ * own mount namespace and manage it ourselves. We assume /mnt exists.
  *
- * We assume /mnt exists.
+ * Related cgroupv1 helpers are named *classid*(), since we only use the
+ * net_cls controller for tagging net_cls.classid. We assume the default
+ * mount under /sys/fs/cgroup/net_cls, which should be the case for the
+ * vast majority of users.
  */
 
 #define WALK_FD_LIMIT			16
+
 #define CGROUP_MOUNT_PATH		"/mnt"
+#define CGROUP_MOUNT_DFLT		"/sys/fs/cgroup"
+#define NETCLS_MOUNT_PATH		CGROUP_MOUNT_DFLT "/net_cls"
 #define CGROUP_WORK_DIR			"/cgroup-test-work-dir"
+
 #define format_cgroup_path(buf, path) \
 	snprintf(buf, sizeof(buf), "%s%s%s", CGROUP_MOUNT_PATH, \
 		 CGROUP_WORK_DIR, path)
 
+#define format_classid_path(buf)				\
+	snprintf(buf, sizeof(buf), "%s%s", NETCLS_MOUNT_PATH,	\
+		 CGROUP_WORK_DIR)
+
 /**
  * enable_all_controllers() - Enable all available cgroup v2 controllers
  *
@@ -139,8 +148,7 @@ static int nftwfunc(const char *filename, const struct stat *statptr,
 	return 0;
 }
 
-
-static int join_cgroup_from_top(char *cgroup_path)
+static int join_cgroup_from_top(const char *cgroup_path)
 {
 	char cgroup_procs_path[PATH_MAX + 1];
 	pid_t pid = getpid();
@@ -313,3 +321,114 @@ int cgroup_setup_and_join(const char *path) {
 	}
 	return cg_fd;
 }
+
+/**
+ * setup_classid_environment() - Setup the cgroupv1 net_cls environment
+ *
+ * After calling this function, cleanup_classid_environment should be called
+ * once testing is complete.
+ *
+ * This function will print an error to stderr and return 1 if it is unable
+ * to setup the cgroup environment. If setup is successful, 0 is returned.
+ */
+int setup_classid_environment(void)
+{
+	char cgroup_workdir[PATH_MAX + 1];
+
+	format_classid_path(cgroup_workdir);
+
+	if (mount("tmpfs", CGROUP_MOUNT_DFLT, "tmpfs", 0, NULL) &&
+	    errno != EBUSY) {
+		log_err("mount cgroup base");
+		return 1;
+	}
+
+	if (mkdir(NETCLS_MOUNT_PATH, 0777) && errno != EEXIST) {
+		log_err("mkdir cgroup net_cls");
+		return 1;
+	}
+
+	if (mount("net_cls", NETCLS_MOUNT_PATH, "cgroup", 0, "net_cls") &&
+	    errno != EBUSY) {
+		log_err("mount cgroup net_cls");
+		return 1;
+	}
+
+	cleanup_classid_environment();
+
+	if (mkdir(cgroup_workdir, 0777) && errno != EEXIST) {
+		log_err("mkdir cgroup work dir");
+		return 1;
+	}
+
+	return 0;
+}
+
+/**
+ * set_classid() - Set a cgroupv1 net_cls classid
+ * @id: the numeric classid
+ *
+ * Writes the passed classid into the cgroup work dir's net_cls.classid
+ * file in order to later on trigger socket tagging.
+ *
+ * On success, it returns 0, otherwise on failure it returns 1. If there
+ * is a failure, it prints the error to stderr.
+ */
+int set_classid(unsigned int id)
+{
+	char cgroup_workdir[PATH_MAX - 42];
+	char cgroup_classid_path[PATH_MAX + 1];
+	int fd, rc = 0;
+
+	format_classid_path(cgroup_workdir);
+	snprintf(cgroup_classid_path, sizeof(cgroup_classid_path),
+		 "%s/net_cls.classid", cgroup_workdir);
+
+	fd = open(cgroup_classid_path, O_WRONLY);
+	if (fd < 0) {
+		log_err("Opening cgroup classid: %s", cgroup_classid_path);
+		return 1;
+	}
+
+	if (dprintf(fd, "%u\n", id) < 0) {
+		log_err("Setting cgroup classid");
+		rc = 1;
+	}
+
+	close(fd);
+	return rc;
+}
+
+/**
+ * join_classid() - Join a cgroupv1 net_cls classid
+ *
+ * This function expects the cgroup work dir to be already created, as we
+ * join it here. This causes the process sockets to be tagged with the given
+ * net_cls classid.
+ *
+ * On success, it returns 0, otherwise on failure it returns 1.
+ */
+int join_classid(void)
+{
+	char cgroup_workdir[PATH_MAX + 1];
+
+	format_classid_path(cgroup_workdir);
+	return join_cgroup_from_top(cgroup_workdir);
+}
+
+/**
+ * cleanup_classid_environment() - Cleanup the cgroupv1 net_cls environment
+ *
+ * At call time, it moves the calling process to the root cgroup, and then
+ * runs the deletion process.
+ *
+ * On failure, it will print an error to stderr, and try to continue.
+ */
+void cleanup_classid_environment(void)
+{
+	char cgroup_workdir[PATH_MAX + 1];
+
+	format_classid_path(cgroup_workdir);
+	join_cgroup_from_top(NETCLS_MOUNT_PATH);
+	nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT);
+}
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.h b/tools/testing/selftests/bpf/cgroup_helpers.h
index 5fe3d88e4f0d..629da3854b3e 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.h
+++ b/tools/testing/selftests/bpf/cgroup_helpers.h
@@ -1,6 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 #ifndef __CGROUP_HELPERS_H
 #define __CGROUP_HELPERS_H
+
 #include <errno.h>
 #include <string.h>
 
@@ -8,12 +9,21 @@
 #define log_err(MSG, ...) fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \
 	__FILE__, __LINE__, clean_errno(), ##__VA_ARGS__)
 
-
+/* cgroupv2 related */
 int cgroup_setup_and_join(const char *path);
 int create_and_get_cgroup(const char *path);
+unsigned long long get_cgroup_id(const char *path);
+
 int join_cgroup(const char *path);
+
 int setup_cgroup_environment(void);
 void cleanup_cgroup_environment(void);
-unsigned long long get_cgroup_id(const char *path);
 
-#endif
+/* cgroupv1 related */
+int set_classid(unsigned int id);
+int join_classid(void);
+
+int setup_classid_environment(void);
+void cleanup_classid_environment(void);
+
+#endif /* __CGROUP_HELPERS_H */
diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c
index 7e9f6375757a..6db1af8fdee7 100644
--- a/tools/testing/selftests/bpf/network_helpers.c
+++ b/tools/testing/selftests/bpf/network_helpers.c
@@ -208,11 +208,26 @@ error_close:
 
 static int connect_fd_to_addr(int fd,
 			      const struct sockaddr_storage *addr,
-			      socklen_t addrlen)
+			      socklen_t addrlen, const bool must_fail)
 {
-	if (connect(fd, (const struct sockaddr *)addr, addrlen)) {
-		log_err("Failed to connect to server");
-		return -1;
+	int ret;
+
+	errno = 0;
+	ret = connect(fd, (const struct sockaddr *)addr, addrlen);
+	if (must_fail) {
+		if (!ret) {
+			log_err("Unexpected success to connect to server");
+			return -1;
+		}
+		if (errno != EPERM) {
+			log_err("Unexpected error from connect to server");
+			return -1;
+		}
+	} else {
+		if (ret) {
+			log_err("Failed to connect to server");
+			return -1;
+		}
 	}
 
 	return 0;
@@ -257,7 +272,7 @@ int connect_to_fd_opts(int server_fd, const struct network_helper_opts *opts)
 		       strlen(opts->cc) + 1))
 		goto error_close;
 
-	if (connect_fd_to_addr(fd, &addr, addrlen))
+	if (connect_fd_to_addr(fd, &addr, addrlen, opts->must_fail))
 		goto error_close;
 
 	return fd;
@@ -289,7 +304,7 @@ int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms)
 		return -1;
 	}
 
-	if (connect_fd_to_addr(client_fd, &addr, len))
+	if (connect_fd_to_addr(client_fd, &addr, len, false))
 		return -1;
 
 	return 0;
diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h
index da7e132657d5..d198181a5648 100644
--- a/tools/testing/selftests/bpf/network_helpers.h
+++ b/tools/testing/selftests/bpf/network_helpers.h
@@ -20,6 +20,7 @@ typedef __u16 __sum16;
 struct network_helper_opts {
 	const char *cc;
 	int timeout_ms;
+	bool must_fail;
 };
 
 /* ipv4 test vector */
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c b/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c
new file mode 100644
index 000000000000..ab3b9bc5e6d1
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+
+#include "connect4_dropper.skel.h"
+
+#include "cgroup_helpers.h"
+#include "network_helpers.h"
+
+static int run_test(int cgroup_fd, int server_fd, bool classid)
+{
+	struct network_helper_opts opts = {
+		.must_fail = true,
+	};
+	struct connect4_dropper *skel;
+	int fd, err = 0;
+
+	skel = connect4_dropper__open_and_load();
+	if (!ASSERT_OK_PTR(skel, "skel_open"))
+		return -1;
+
+	skel->links.connect_v4_dropper =
+		bpf_program__attach_cgroup(skel->progs.connect_v4_dropper,
+					   cgroup_fd);
+	if (!ASSERT_OK_PTR(skel->links.connect_v4_dropper, "prog_attach")) {
+		err = -1;
+		goto out;
+	}
+
+	if (classid && !ASSERT_OK(join_classid(), "join_classid")) {
+		err = -1;
+		goto out;
+	}
+
+	fd = connect_to_fd_opts(server_fd, &opts);
+	if (fd < 0)
+		err = -1;
+	else
+		close(fd);
+out:
+	connect4_dropper__destroy(skel);
+	return err;
+}
+
+void test_cgroup_v1v2(void)
+{
+	struct network_helper_opts opts = {};
+	int server_fd, client_fd, cgroup_fd;
+	static const int port = 60123;
+
+	/* Step 1: Check base connectivity works without any BPF. */
+	server_fd = start_server(AF_INET, SOCK_STREAM, NULL, port, 0);
+	if (!ASSERT_GE(server_fd, 0, "server_fd"))
+		return;
+	client_fd = connect_to_fd_opts(server_fd, &opts);
+	if (!ASSERT_GE(client_fd, 0, "client_fd")) {
+		close(server_fd);
+		return;
+	}
+	close(client_fd);
+	close(server_fd);
+
+	/* Step 2: Check BPF policy prog attached to cgroups drops connectivity. */
+	cgroup_fd = test__join_cgroup("/connect_dropper");
+	if (!ASSERT_GE(cgroup_fd, 0, "cgroup_fd"))
+		return;
+	server_fd = start_server(AF_INET, SOCK_STREAM, NULL, port, 0);
+	if (!ASSERT_GE(server_fd, 0, "server_fd")) {
+		close(cgroup_fd);
+		return;
+	}
+	ASSERT_OK(run_test(cgroup_fd, server_fd, false), "cgroup-v2-only");
+	setup_classid_environment();
+	set_classid(42);
+	ASSERT_OK(run_test(cgroup_fd, server_fd, true), "cgroup-v1v2");
+	cleanup_classid_environment();
+	close(server_fd);
+	close(cgroup_fd);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/task_pt_regs.c b/tools/testing/selftests/bpf/prog_tests/task_pt_regs.c
index 53f0e0fa1a53..37c20b5ffa70 100644
--- a/tools/testing/selftests/bpf/prog_tests/task_pt_regs.c
+++ b/tools/testing/selftests/bpf/prog_tests/task_pt_regs.c
@@ -1,7 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #define _GNU_SOURCE
 #include <test_progs.h>
-#include <linux/ptrace.h>
 #include "test_task_pt_regs.skel.h"
 
 void test_task_pt_regs(void)
diff --git a/tools/testing/selftests/bpf/progs/connect4_dropper.c b/tools/testing/selftests/bpf/progs/connect4_dropper.c
new file mode 100644
index 000000000000..b565d997810a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/connect4_dropper.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <string.h>
+
+#include <linux/stddef.h>
+#include <linux/bpf.h>
+
+#include <sys/socket.h>
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#define VERDICT_REJECT	0
+#define VERDICT_PROCEED	1
+
+SEC("cgroup/connect4")
+int connect_v4_dropper(struct bpf_sock_addr *ctx)
+{
+	if (ctx->type != SOCK_STREAM)
+		return VERDICT_PROCEED;
+	if (ctx->user_port == bpf_htons(60123))
+		return VERDICT_REJECT;
+	return VERDICT_PROCEED;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_task_pt_regs.c b/tools/testing/selftests/bpf/progs/test_task_pt_regs.c
index 6c059f1cfa1b..e6cb09259408 100644
--- a/tools/testing/selftests/bpf/progs/test_task_pt_regs.c
+++ b/tools/testing/selftests/bpf/progs/test_task_pt_regs.c
@@ -1,12 +1,17 @@
 // SPDX-License-Identifier: GPL-2.0
 
-#include <linux/ptrace.h>
-#include <linux/bpf.h>
+#include "vmlinux.h"
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
 
-struct pt_regs current_regs = {};
-struct pt_regs ctx_regs = {};
+#define PT_REGS_SIZE sizeof(struct pt_regs)
+
+/*
+ * The kernel struct pt_regs isn't exported in its entirety to userspace.
+ * Pass it as an array to task_pt_regs.c
+ */
+char current_regs[PT_REGS_SIZE] = {};
+char ctx_regs[PT_REGS_SIZE] = {};
 int uprobe_res = 0;
 
 SEC("uprobe/trigger_func")
@@ -17,8 +22,10 @@ int handle_uprobe(struct pt_regs *ctx)
 
 	current = bpf_get_current_task_btf();
 	regs = (struct pt_regs *) bpf_task_pt_regs(current);
-	__builtin_memcpy(&current_regs, regs, sizeof(*regs));
-	__builtin_memcpy(&ctx_regs, ctx, sizeof(*ctx));
+	if (bpf_probe_read_kernel(current_regs, PT_REGS_SIZE, regs))
+		return 0;
+	if (bpf_probe_read_kernel(ctx_regs, PT_REGS_SIZE, ctx))
+		return 0;
 
 	/* Prove that uprobe was run */
 	uprobe_res = 1;
diff --git a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh b/tools/testing/selftests/bpf/test_lwt_ip_encap.sh
index 59ea56945e6c..b497bb85b667 100755
--- a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh
+++ b/tools/testing/selftests/bpf/test_lwt_ip_encap.sh
@@ -112,6 +112,14 @@ setup()
 	ip netns add "${NS2}"
 	ip netns add "${NS3}"
 
+	# rp_filter gets confused by what these tests are doing, so disable it
+	ip netns exec ${NS1} sysctl -wq net.ipv4.conf.all.rp_filter=0
+	ip netns exec ${NS2} sysctl -wq net.ipv4.conf.all.rp_filter=0
+	ip netns exec ${NS3} sysctl -wq net.ipv4.conf.all.rp_filter=0
+	ip netns exec ${NS1} sysctl -wq net.ipv4.conf.default.rp_filter=0
+	ip netns exec ${NS2} sysctl -wq net.ipv4.conf.default.rp_filter=0
+	ip netns exec ${NS3} sysctl -wq net.ipv4.conf.default.rp_filter=0
+
 	ip link add veth1 type veth peer name veth2
 	ip link add veth3 type veth peer name veth4
 	ip link add veth5 type veth peer name veth6
@@ -236,11 +244,6 @@ setup()
 	ip -netns ${NS1} -6 route add ${IPv6_GRE}/128 dev veth5 via ${IPv6_6} ${VRF}
 	ip -netns ${NS2} -6 route add ${IPv6_GRE}/128 dev veth7 via ${IPv6_8} ${VRF}
 
-	# rp_filter gets confused by what these tests are doing, so disable it
-	ip netns exec ${NS1} sysctl -wq net.ipv4.conf.all.rp_filter=0
-	ip netns exec ${NS2} sysctl -wq net.ipv4.conf.all.rp_filter=0
-	ip netns exec ${NS3} sysctl -wq net.ipv4.conf.all.rp_filter=0
-
 	TMPFILE=$(mktemp /tmp/test_lwt_ip_encap.XXXXXX)
 
 	sleep 1  # reduce flakiness
diff --git a/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh
index beee0d5646a6..f7d84549cc3e 100755
--- a/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh
+++ b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh
@@ -1,6 +1,6 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
-# Copyright 2020 NXP Semiconductors
+# Copyright 2020 NXP
 
 WAIT_TIME=1
 NUM_NETIFS=4
diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index 98053d3afbda..b8dbabe24ac2 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -24,6 +24,7 @@
 /x86_64/smm_test
 /x86_64/state_test
 /x86_64/svm_vmcall_test
+/x86_64/svm_int_ctl_test
 /x86_64/sync_regs_test
 /x86_64/tsc_msrs_test
 /x86_64/userspace_msr_exit_test
@@ -48,6 +49,7 @@
 /kvm_page_table_test
 /memslot_modification_stress_test
 /memslot_perf_test
+/rseq_test
 /set_memory_region_test
 /steal_time
 /kvm_binary_stats_test
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 5d05801ab816..d1774f461393 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -56,6 +56,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/smm_test
 TEST_GEN_PROGS_x86_64 += x86_64/state_test
 TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test
 TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test
+TEST_GEN_PROGS_x86_64 += x86_64/svm_int_ctl_test
 TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test
 TEST_GEN_PROGS_x86_64 += x86_64/userspace_msr_exit_test
 TEST_GEN_PROGS_x86_64 += x86_64/vmx_apic_access_test
@@ -80,6 +81,7 @@ TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus
 TEST_GEN_PROGS_x86_64 += kvm_page_table_test
 TEST_GEN_PROGS_x86_64 += memslot_modification_stress_test
 TEST_GEN_PROGS_x86_64 += memslot_perf_test
+TEST_GEN_PROGS_x86_64 += rseq_test
 TEST_GEN_PROGS_x86_64 += set_memory_region_test
 TEST_GEN_PROGS_x86_64 += steal_time
 TEST_GEN_PROGS_x86_64 += kvm_binary_stats_test
@@ -93,6 +95,7 @@ TEST_GEN_PROGS_aarch64 += dirty_log_test
 TEST_GEN_PROGS_aarch64 += dirty_log_perf_test
 TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus
 TEST_GEN_PROGS_aarch64 += kvm_page_table_test
+TEST_GEN_PROGS_aarch64 += rseq_test
 TEST_GEN_PROGS_aarch64 += set_memory_region_test
 TEST_GEN_PROGS_aarch64 += steal_time
 TEST_GEN_PROGS_aarch64 += kvm_binary_stats_test
@@ -104,6 +107,7 @@ TEST_GEN_PROGS_s390x += demand_paging_test
 TEST_GEN_PROGS_s390x += dirty_log_test
 TEST_GEN_PROGS_s390x += kvm_create_max_vcpus
 TEST_GEN_PROGS_s390x += kvm_page_table_test
+TEST_GEN_PROGS_s390x += rseq_test
 TEST_GEN_PROGS_s390x += set_memory_region_test
 TEST_GEN_PROGS_s390x += kvm_binary_stats_test
 
diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c
index 71e277c7c3f3..5d95113c7b7c 100644
--- a/tools/testing/selftests/kvm/access_tracking_perf_test.c
+++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c
@@ -371,9 +371,7 @@ static void help(char *name)
 	printf(" -v: specify the number of vCPUs to run.\n");
 	printf(" -o: Overlap guest memory accesses instead of partitioning\n"
 	       "     them into a separate region of memory for each vCPU.\n");
-	printf(" -s: specify the type of memory that should be used to\n"
-	       "     back the guest data region.\n\n");
-	backing_src_help();
+	backing_src_help("-s");
 	puts("");
 	exit(0);
 }
@@ -381,7 +379,7 @@ static void help(char *name)
 int main(int argc, char *argv[])
 {
 	struct test_params params = {
-		.backing_src = VM_MEM_SRC_ANONYMOUS,
+		.backing_src = DEFAULT_VM_MEM_SRC,
 		.vcpu_memory_bytes = DEFAULT_PER_VCPU_MEM_SIZE,
 		.vcpus = 1,
 	};
diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c
index e79c1b64977f..1510b21e6306 100644
--- a/tools/testing/selftests/kvm/demand_paging_test.c
+++ b/tools/testing/selftests/kvm/demand_paging_test.c
@@ -179,7 +179,7 @@ static void *uffd_handler_thread_fn(void *arg)
 			return NULL;
 		}
 
-		if (!pollfd[0].revents & POLLIN)
+		if (!(pollfd[0].revents & POLLIN))
 			continue;
 
 		r = read(uffd, &msg, sizeof(msg));
@@ -416,7 +416,7 @@ static void help(char *name)
 {
 	puts("");
 	printf("usage: %s [-h] [-m vm_mode] [-u uffd_mode] [-d uffd_delay_usec]\n"
-	       "          [-b memory] [-t type] [-v vcpus] [-o]\n", name);
+	       "          [-b memory] [-s type] [-v vcpus] [-o]\n", name);
 	guest_modes_help();
 	printf(" -u: use userfaultfd to handle vCPU page faults. Mode is a\n"
 	       "     UFFD registration mode: 'MISSING' or 'MINOR'.\n");
@@ -426,8 +426,7 @@ static void help(char *name)
 	printf(" -b: specify the size of the memory region which should be\n"
 	       "     demand paged by each vCPU. e.g. 10M or 3G.\n"
 	       "     Default: 1G\n");
-	printf(" -t: The type of backing memory to use. Default: anonymous\n");
-	backing_src_help();
+	backing_src_help("-s");
 	printf(" -v: specify the number of vCPUs to run.\n");
 	printf(" -o: Overlap guest memory accesses instead of partitioning\n"
 	       "     them into a separate region of memory for each vCPU.\n");
@@ -439,14 +438,14 @@ int main(int argc, char *argv[])
 {
 	int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);
 	struct test_params p = {
-		.src_type = VM_MEM_SRC_ANONYMOUS,
+		.src_type = DEFAULT_VM_MEM_SRC,
 		.partition_vcpu_memory_access = true,
 	};
 	int opt;
 
 	guest_modes_append_default();
 
-	while ((opt = getopt(argc, argv, "hm:u:d:b:t:v:o")) != -1) {
+	while ((opt = getopt(argc, argv, "hm:u:d:b:s:v:o")) != -1) {
 		switch (opt) {
 		case 'm':
 			guest_modes_cmdline(optarg);
@@ -465,7 +464,7 @@ int main(int argc, char *argv[])
 		case 'b':
 			guest_percpu_mem_size = parse_size(optarg);
 			break;
-		case 't':
+		case 's':
 			p.src_type = parse_backing_src_type(optarg);
 			break;
 		case 'v':
@@ -485,7 +484,7 @@ int main(int argc, char *argv[])
 
 	if (p.uffd_mode == UFFDIO_REGISTER_MODE_MINOR &&
 	    !backing_src_is_shared(p.src_type)) {
-		TEST_FAIL("userfaultfd MINOR mode requires shared memory; pick a different -t");
+		TEST_FAIL("userfaultfd MINOR mode requires shared memory; pick a different -s");
 	}
 
 	for_each_guest_mode(run_test, &p);
diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c
index 479868570d59..7ffab5bd5ce5 100644
--- a/tools/testing/selftests/kvm/dirty_log_perf_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c
@@ -118,42 +118,64 @@ static inline void disable_dirty_logging(struct kvm_vm *vm, int slots)
 	toggle_dirty_logging(vm, slots, false);
 }
 
-static void get_dirty_log(struct kvm_vm *vm, int slots, unsigned long *bitmap,
-			  uint64_t nr_pages)
+static void get_dirty_log(struct kvm_vm *vm, unsigned long *bitmaps[], int slots)
 {
-	uint64_t slot_pages = nr_pages / slots;
 	int i;
 
 	for (i = 0; i < slots; i++) {
 		int slot = PERF_TEST_MEM_SLOT_INDEX + i;
-		unsigned long *slot_bitmap = bitmap + i * slot_pages;
 
-		kvm_vm_get_dirty_log(vm, slot, slot_bitmap);
+		kvm_vm_get_dirty_log(vm, slot, bitmaps[i]);
 	}
 }
 
-static void clear_dirty_log(struct kvm_vm *vm, int slots, unsigned long *bitmap,
-			    uint64_t nr_pages)
+static void clear_dirty_log(struct kvm_vm *vm, unsigned long *bitmaps[],
+			    int slots, uint64_t pages_per_slot)
 {
-	uint64_t slot_pages = nr_pages / slots;
 	int i;
 
 	for (i = 0; i < slots; i++) {
 		int slot = PERF_TEST_MEM_SLOT_INDEX + i;
-		unsigned long *slot_bitmap = bitmap + i * slot_pages;
 
-		kvm_vm_clear_dirty_log(vm, slot, slot_bitmap, 0, slot_pages);
+		kvm_vm_clear_dirty_log(vm, slot, bitmaps[i], 0, pages_per_slot);
 	}
 }
 
+static unsigned long **alloc_bitmaps(int slots, uint64_t pages_per_slot)
+{
+	unsigned long **bitmaps;
+	int i;
+
+	bitmaps = malloc(slots * sizeof(bitmaps[0]));
+	TEST_ASSERT(bitmaps, "Failed to allocate bitmaps array.");
+
+	for (i = 0; i < slots; i++) {
+		bitmaps[i] = bitmap_zalloc(pages_per_slot);
+		TEST_ASSERT(bitmaps[i], "Failed to allocate slot bitmap.");
+	}
+
+	return bitmaps;
+}
+
+static void free_bitmaps(unsigned long *bitmaps[], int slots)
+{
+	int i;
+
+	for (i = 0; i < slots; i++)
+		free(bitmaps[i]);
+
+	free(bitmaps);
+}
+
 static void run_test(enum vm_guest_mode mode, void *arg)
 {
 	struct test_params *p = arg;
 	pthread_t *vcpu_threads;
 	struct kvm_vm *vm;
-	unsigned long *bmap;
+	unsigned long **bitmaps;
 	uint64_t guest_num_pages;
 	uint64_t host_num_pages;
+	uint64_t pages_per_slot;
 	int vcpu_id;
 	struct timespec start;
 	struct timespec ts_diff;
@@ -171,7 +193,9 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 	guest_num_pages = (nr_vcpus * guest_percpu_mem_size) >> vm_get_page_shift(vm);
 	guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages);
 	host_num_pages = vm_num_host_pages(mode, guest_num_pages);
-	bmap = bitmap_zalloc(host_num_pages);
+	pages_per_slot = host_num_pages / p->slots;
+
+	bitmaps = alloc_bitmaps(p->slots, pages_per_slot);
 
 	if (dirty_log_manual_caps) {
 		cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2;
@@ -239,7 +263,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 			iteration, ts_diff.tv_sec, ts_diff.tv_nsec);
 
 		clock_gettime(CLOCK_MONOTONIC, &start);
-		get_dirty_log(vm, p->slots, bmap, host_num_pages);
+		get_dirty_log(vm, bitmaps, p->slots);
 		ts_diff = timespec_elapsed(start);
 		get_dirty_log_total = timespec_add(get_dirty_log_total,
 						   ts_diff);
@@ -248,7 +272,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 
 		if (dirty_log_manual_caps) {
 			clock_gettime(CLOCK_MONOTONIC, &start);
-			clear_dirty_log(vm, p->slots, bmap, host_num_pages);
+			clear_dirty_log(vm, bitmaps, p->slots, pages_per_slot);
 			ts_diff = timespec_elapsed(start);
 			clear_dirty_log_total = timespec_add(clear_dirty_log_total,
 							     ts_diff);
@@ -281,7 +305,7 @@ static void run_test(enum vm_guest_mode mode, void *arg)
 			clear_dirty_log_total.tv_nsec, avg.tv_sec, avg.tv_nsec);
 	}
 
-	free(bmap);
+	free_bitmaps(bitmaps, p->slots);
 	free(vcpu_threads);
 	perf_test_destroy_vm(vm);
 }
@@ -308,11 +332,9 @@ static void help(char *name)
 	printf(" -v: specify the number of vCPUs to run.\n");
 	printf(" -o: Overlap guest memory accesses instead of partitioning\n"
 	       "     them into a separate region of memory for each vCPU.\n");
-	printf(" -s: specify the type of memory that should be used to\n"
-	       "     back the guest data region.\n\n");
+	backing_src_help("-s");
 	printf(" -x: Split the memory region into this number of memslots.\n"
-	       "     (default: 1)");
-	backing_src_help();
+	       "     (default: 1)\n");
 	puts("");
 	exit(0);
 }
@@ -324,7 +346,7 @@ int main(int argc, char *argv[])
 		.iterations = TEST_HOST_LOOP_N,
 		.wr_fract = 1,
 		.partition_vcpu_memory_access = true,
-		.backing_src = VM_MEM_SRC_ANONYMOUS,
+		.backing_src = DEFAULT_VM_MEM_SRC,
 		.slots = 1,
 	};
 	int opt;
diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h
index 451fed5ce8e7..f8fddc84c0d3 100644
--- a/tools/testing/selftests/kvm/include/test_util.h
+++ b/tools/testing/selftests/kvm/include/test_util.h
@@ -90,6 +90,8 @@ enum vm_mem_backing_src_type {
 	NUM_SRC_TYPES,
 };
 
+#define DEFAULT_VM_MEM_SRC VM_MEM_SRC_ANONYMOUS
+
 struct vm_mem_backing_src_alias {
 	const char *name;
 	uint32_t flag;
@@ -102,7 +104,7 @@ size_t get_trans_hugepagesz(void);
 size_t get_def_hugetlb_pagesz(void);
 const struct vm_mem_backing_src_alias *vm_mem_backing_src_alias(uint32_t i);
 size_t get_backing_src_pagesz(uint32_t i);
-void backing_src_help(void);
+void backing_src_help(const char *flag);
 enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name);
 long get_run_delay(void);
 
diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index 242ae8e09a65..05e65ca1c30c 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -312,37 +312,37 @@ static inline void set_xmm(int n, unsigned long val)
 	}
 }
 
-typedef unsigned long v1di __attribute__ ((vector_size (8)));
+#define GET_XMM(__xmm)							\
+({									\
+	unsigned long __val;						\
+	asm volatile("movq %%"#__xmm", %0" : "=r"(__val));		\
+	__val;								\
+})
+
 static inline unsigned long get_xmm(int n)
 {
 	assert(n >= 0 && n <= 7);
 
-	register v1di xmm0 __asm__("%xmm0");
-	register v1di xmm1 __asm__("%xmm1");
-	register v1di xmm2 __asm__("%xmm2");
-	register v1di xmm3 __asm__("%xmm3");
-	register v1di xmm4 __asm__("%xmm4");
-	register v1di xmm5 __asm__("%xmm5");
-	register v1di xmm6 __asm__("%xmm6");
-	register v1di xmm7 __asm__("%xmm7");
 	switch (n) {
 	case 0:
-		return (unsigned long)xmm0;
+		return GET_XMM(xmm0);
 	case 1:
-		return (unsigned long)xmm1;
+		return GET_XMM(xmm1);
 	case 2:
-		return (unsigned long)xmm2;
+		return GET_XMM(xmm2);
 	case 3:
-		return (unsigned long)xmm3;
+		return GET_XMM(xmm3);
 	case 4:
-		return (unsigned long)xmm4;
+		return GET_XMM(xmm4);
 	case 5:
-		return (unsigned long)xmm5;
+		return GET_XMM(xmm5);
 	case 6:
-		return (unsigned long)xmm6;
+		return GET_XMM(xmm6);
 	case 7:
-		return (unsigned long)xmm7;
+		return GET_XMM(xmm7);
 	}
+
+	/* never reached */
 	return 0;
 }
 
diff --git a/tools/testing/selftests/kvm/kvm_page_table_test.c b/tools/testing/selftests/kvm/kvm_page_table_test.c
index 0d04a7db7f24..36407cb0ec85 100644
--- a/tools/testing/selftests/kvm/kvm_page_table_test.c
+++ b/tools/testing/selftests/kvm/kvm_page_table_test.c
@@ -456,10 +456,7 @@ static void help(char *name)
 	       "     (default: 1G)\n");
 	printf(" -v: specify the number of vCPUs to run\n"
 	       "     (default: 1)\n");
-	printf(" -s: specify the type of memory that should be used to\n"
-	       "     back the guest data region.\n"
-	       "     (default: anonymous)\n\n");
-	backing_src_help();
+	backing_src_help("-s");
 	puts("");
 }
 
@@ -468,7 +465,7 @@ int main(int argc, char *argv[])
 	int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);
 	struct test_params p = {
 		.test_mem_size = DEFAULT_TEST_MEM_SIZE,
-		.src_type = VM_MEM_SRC_ANONYMOUS,
+		.src_type = DEFAULT_VM_MEM_SRC,
 	};
 	int opt;
 
diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c
index a9107bfae402..b72429108993 100644
--- a/tools/testing/selftests/kvm/lib/test_util.c
+++ b/tools/testing/selftests/kvm/lib/test_util.c
@@ -283,13 +283,22 @@ size_t get_backing_src_pagesz(uint32_t i)
 	}
 }
 
-void backing_src_help(void)
+static void print_available_backing_src_types(const char *prefix)
 {
 	int i;
 
-	printf("Available backing src types:\n");
+	printf("%sAvailable backing src types:\n", prefix);
+
 	for (i = 0; i < NUM_SRC_TYPES; i++)
-		printf("\t%s\n", vm_mem_backing_src_alias(i)->name);
+		printf("%s    %s\n", prefix, vm_mem_backing_src_alias(i)->name);
+}
+
+void backing_src_help(const char *flag)
+{
+	printf(" %s: specify the type of memory that should be used to\n"
+	       "     back the guest data region. (default: %s)\n",
+	       flag, vm_mem_backing_src_alias(DEFAULT_VM_MEM_SRC)->name);
+	print_available_backing_src_types("     ");
 }
 
 enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name)
@@ -300,7 +309,7 @@ enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name)
 		if (!strcmp(type_name, vm_mem_backing_src_alias(i)->name))
 			return i;
 
-	backing_src_help();
+	print_available_backing_src_types("");
 	TEST_FAIL("Unknown backing src type: %s", type_name);
 	return -1;
 }
diff --git a/tools/testing/selftests/kvm/rseq_test.c b/tools/testing/selftests/kvm/rseq_test.c
new file mode 100644
index 000000000000..4158da0da2bb
--- /dev/null
+++ b/tools/testing/selftests/kvm/rseq_test.c
@@ -0,0 +1,286 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <errno.h>
+#include <fcntl.h>
+#include <pthread.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <signal.h>
+#include <syscall.h>
+#include <sys/ioctl.h>
+#include <sys/sysinfo.h>
+#include <asm/barrier.h>
+#include <linux/atomic.h>
+#include <linux/rseq.h>
+#include <linux/unistd.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "test_util.h"
+
+#define VCPU_ID 0
+
+static __thread volatile struct rseq __rseq = {
+	.cpu_id = RSEQ_CPU_ID_UNINITIALIZED,
+};
+
+/*
+ * Use an arbitrary, bogus signature for configuring rseq, this test does not
+ * actually enter an rseq critical section.
+ */
+#define RSEQ_SIG 0xdeadbeef
+
+/*
+ * Any bug related to task migration is likely to be timing-dependent; perform
+ * a large number of migrations to reduce the odds of a false negative.
+ */
+#define NR_TASK_MIGRATIONS 100000
+
+static pthread_t migration_thread;
+static cpu_set_t possible_mask;
+static int min_cpu, max_cpu;
+static bool done;
+
+static atomic_t seq_cnt;
+
+static void guest_code(void)
+{
+	for (;;)
+		GUEST_SYNC(0);
+}
+
+static void sys_rseq(int flags)
+{
+	int r;
+
+	r = syscall(__NR_rseq, &__rseq, sizeof(__rseq), flags, RSEQ_SIG);
+	TEST_ASSERT(!r, "rseq failed, errno = %d (%s)", errno, strerror(errno));
+}
+
+static int next_cpu(int cpu)
+{
+	/*
+	 * Advance to the next CPU, skipping those that weren't in the original
+	 * affinity set.  Sadly, there is no CPU_SET_FOR_EACH, and cpu_set_t's
+	 * data storage is considered as opaque.  Note, if this task is pinned
+	 * to a small set of discontigous CPUs, e.g. 2 and 1023, this loop will
+	 * burn a lot cycles and the test will take longer than normal to
+	 * complete.
+	 */
+	do {
+		cpu++;
+		if (cpu > max_cpu) {
+			cpu = min_cpu;
+			TEST_ASSERT(CPU_ISSET(cpu, &possible_mask),
+				    "Min CPU = %d must always be usable", cpu);
+			break;
+		}
+	} while (!CPU_ISSET(cpu, &possible_mask));
+
+	return cpu;
+}
+
+static void *migration_worker(void *ign)
+{
+	cpu_set_t allowed_mask;
+	int r, i, cpu;
+
+	CPU_ZERO(&allowed_mask);
+
+	for (i = 0, cpu = min_cpu; i < NR_TASK_MIGRATIONS; i++, cpu = next_cpu(cpu)) {
+		CPU_SET(cpu, &allowed_mask);
+
+		/*
+		 * Bump the sequence count twice to allow the reader to detect
+		 * that a migration may have occurred in between rseq and sched
+		 * CPU ID reads.  An odd sequence count indicates a migration
+		 * is in-progress, while a completely different count indicates
+		 * a migration occurred since the count was last read.
+		 */
+		atomic_inc(&seq_cnt);
+
+		/*
+		 * Ensure the odd count is visible while sched_getcpu() isn't
+		 * stable, i.e. while changing affinity is in-progress.
+		 */
+		smp_wmb();
+		r = sched_setaffinity(0, sizeof(allowed_mask), &allowed_mask);
+		TEST_ASSERT(!r, "sched_setaffinity failed, errno = %d (%s)",
+			    errno, strerror(errno));
+		smp_wmb();
+		atomic_inc(&seq_cnt);
+
+		CPU_CLR(cpu, &allowed_mask);
+
+		/*
+		 * Wait 1-10us before proceeding to the next iteration and more
+		 * specifically, before bumping seq_cnt again.  A delay is
+		 * needed on three fronts:
+		 *
+		 *  1. To allow sched_setaffinity() to prompt migration before
+		 *     ioctl(KVM_RUN) enters the guest so that TIF_NOTIFY_RESUME
+		 *     (or TIF_NEED_RESCHED, which indirectly leads to handling
+		 *     NOTIFY_RESUME) is handled in KVM context.
+		 *
+		 *     If NOTIFY_RESUME/NEED_RESCHED is set after KVM enters
+		 *     the guest, the guest will trigger a IO/MMIO exit all the
+		 *     way to userspace and the TIF flags will be handled by
+		 *     the generic "exit to userspace" logic, not by KVM.  The
+		 *     exit to userspace is necessary to give the test a chance
+		 *     to check the rseq CPU ID (see #2).
+		 *
+		 *     Alternatively, guest_code() could include an instruction
+		 *     to trigger an exit that is handled by KVM, but any such
+		 *     exit requires architecture specific code.
+		 *
+		 *  2. To let ioctl(KVM_RUN) make its way back to the test
+		 *     before the next round of migration.  The test's check on
+		 *     the rseq CPU ID must wait for migration to complete in
+		 *     order to avoid false positive, thus any kernel rseq bug
+		 *     will be missed if the next migration starts before the
+		 *     check completes.
+		 *
+		 *  3. To ensure the read-side makes efficient forward progress,
+		 *     e.g. if sched_getcpu() involves a syscall.  Stalling the
+		 *     read-side means the test will spend more time waiting for
+		 *     sched_getcpu() to stabilize and less time trying to hit
+		 *     the timing-dependent bug.
+		 *
+		 * Because any bug in this area is likely to be timing-dependent,
+		 * run with a range of delays at 1us intervals from 1us to 10us
+		 * as a best effort to avoid tuning the test to the point where
+		 * it can hit _only_ the original bug and not detect future
+		 * regressions.
+		 *
+		 * The original bug can reproduce with a delay up to ~500us on
+		 * x86-64, but starts to require more iterations to reproduce
+		 * as the delay creeps above ~10us, and the average runtime of
+		 * each iteration obviously increases as well.  Cap the delay
+		 * at 10us to keep test runtime reasonable while minimizing
+		 * potential coverage loss.
+		 *
+		 * The lower bound for reproducing the bug is likely below 1us,
+		 * e.g. failures occur on x86-64 with nanosleep(0), but at that
+		 * point the overhead of the syscall likely dominates the delay.
+		 * Use usleep() for simplicity and to avoid unnecessary kernel
+		 * dependencies.
+		 */
+		usleep((i % 10) + 1);
+	}
+	done = true;
+	return NULL;
+}
+
+static int calc_min_max_cpu(void)
+{
+	int i, cnt, nproc;
+
+	if (CPU_COUNT(&possible_mask) < 2)
+		return -EINVAL;
+
+	/*
+	 * CPU_SET doesn't provide a FOR_EACH helper, get the min/max CPU that
+	 * this task is affined to in order to reduce the time spent querying
+	 * unusable CPUs, e.g. if this task is pinned to a small percentage of
+	 * total CPUs.
+	 */
+	nproc = get_nprocs_conf();
+	min_cpu = -1;
+	max_cpu = -1;
+	cnt = 0;
+
+	for (i = 0; i < nproc; i++) {
+		if (!CPU_ISSET(i, &possible_mask))
+			continue;
+		if (min_cpu == -1)
+			min_cpu = i;
+		max_cpu = i;
+		cnt++;
+	}
+
+	return (cnt < 2) ? -EINVAL : 0;
+}
+
+int main(int argc, char *argv[])
+{
+	int r, i, snapshot;
+	struct kvm_vm *vm;
+	u32 cpu, rseq_cpu;
+
+	/* Tell stdout not to buffer its content */
+	setbuf(stdout, NULL);
+
+	r = sched_getaffinity(0, sizeof(possible_mask), &possible_mask);
+	TEST_ASSERT(!r, "sched_getaffinity failed, errno = %d (%s)", errno,
+		    strerror(errno));
+
+	if (calc_min_max_cpu()) {
+		print_skip("Only one usable CPU, task migration not possible");
+		exit(KSFT_SKIP);
+	}
+
+	sys_rseq(0);
+
+	/*
+	 * Create and run a dummy VM that immediately exits to userspace via
+	 * GUEST_SYNC, while concurrently migrating the process by setting its
+	 * CPU affinity.
+	 */
+	vm = vm_create_default(VCPU_ID, 0, guest_code);
+	ucall_init(vm, NULL);
+
+	pthread_create(&migration_thread, NULL, migration_worker, 0);
+
+	for (i = 0; !done; i++) {
+		vcpu_run(vm, VCPU_ID);
+		TEST_ASSERT(get_ucall(vm, VCPU_ID, NULL) == UCALL_SYNC,
+			    "Guest failed?");
+
+		/*
+		 * Verify rseq's CPU matches sched's CPU.  Ensure migration
+		 * doesn't occur between sched_getcpu() and reading the rseq
+		 * cpu_id by rereading both if the sequence count changes, or
+		 * if the count is odd (migration in-progress).
+		 */
+		do {
+			/*
+			 * Drop bit 0 to force a mismatch if the count is odd,
+			 * i.e. if a migration is in-progress.
+			 */
+			snapshot = atomic_read(&seq_cnt) & ~1;
+
+			/*
+			 * Ensure reading sched_getcpu() and rseq.cpu_id
+			 * complete in a single "no migration" window, i.e. are
+			 * not reordered across the seq_cnt reads.
+			 */
+			smp_rmb();
+			cpu = sched_getcpu();
+			rseq_cpu = READ_ONCE(__rseq.cpu_id);
+			smp_rmb();
+		} while (snapshot != atomic_read(&seq_cnt));
+
+		TEST_ASSERT(rseq_cpu == cpu,
+			    "rseq CPU = %d, sched CPU = %d\n", rseq_cpu, cpu);
+	}
+
+	/*
+	 * Sanity check that the test was able to enter the guest a reasonable
+	 * number of times, e.g. didn't get stalled too often/long waiting for
+	 * sched_getcpu() to stabilize.  A 2:1 migration:KVM_RUN ratio is a
+	 * fairly conservative ratio on x86-64, which can do _more_ KVM_RUNs
+	 * than migrations given the 1us+ delay in the migration task.
+	 */
+	TEST_ASSERT(i > (NR_TASK_MIGRATIONS / 2),
+		    "Only performed %d KVM_RUNs, task stalled too much?\n", i);
+
+	pthread_join(migration_thread, NULL);
+
+	kvm_vm_free(vm);
+
+	sys_rseq(RSEQ_FLAG_UNREGISTER);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/kvm/steal_time.c b/tools/testing/selftests/kvm/steal_time.c
index 2172d65b85e4..62f2eb9ee3d5 100644
--- a/tools/testing/selftests/kvm/steal_time.c
+++ b/tools/testing/selftests/kvm/steal_time.c
@@ -116,12 +116,12 @@ struct st_time {
 	uint64_t st_time;
 };
 
-static int64_t smccc(uint32_t func, uint32_t arg)
+static int64_t smccc(uint32_t func, uint64_t arg)
 {
 	unsigned long ret;
 
 	asm volatile(
-		"mov	x0, %1\n"
+		"mov	w0, %w1\n"
 		"mov	x1, %2\n"
 		"hvc	#0\n"
 		"mov	%0, x0\n"
diff --git a/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c b/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c
new file mode 100644
index 000000000000..df04f56ce859
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c
@@ -0,0 +1,128 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * svm_int_ctl_test
+ *
+ * Copyright (C) 2021, Red Hat, Inc.
+ *
+ * Nested SVM testing: test simultaneous use of V_IRQ from L1 and L0.
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+#include "apic.h"
+
+#define VCPU_ID		0
+
+static struct kvm_vm *vm;
+
+bool vintr_irq_called;
+bool intr_irq_called;
+
+#define VINTR_IRQ_NUMBER 0x20
+#define INTR_IRQ_NUMBER 0x30
+
+static void vintr_irq_handler(struct ex_regs *regs)
+{
+	vintr_irq_called = true;
+}
+
+static void intr_irq_handler(struct ex_regs *regs)
+{
+	x2apic_write_reg(APIC_EOI, 0x00);
+	intr_irq_called = true;
+}
+
+static void l2_guest_code(struct svm_test_data *svm)
+{
+	/* This code raises interrupt INTR_IRQ_NUMBER in the L1's LAPIC,
+	 * and since L1 didn't enable virtual interrupt masking,
+	 * L2 should receive it and not L1.
+	 *
+	 * L2 also has virtual interrupt 'VINTR_IRQ_NUMBER' pending in V_IRQ
+	 * so it should also receive it after the following 'sti'.
+	 */
+	x2apic_write_reg(APIC_ICR,
+		APIC_DEST_SELF | APIC_INT_ASSERT | INTR_IRQ_NUMBER);
+
+	__asm__ __volatile__(
+		"sti\n"
+		"nop\n"
+	);
+
+	GUEST_ASSERT(vintr_irq_called);
+	GUEST_ASSERT(intr_irq_called);
+
+	__asm__ __volatile__(
+		"vmcall\n"
+	);
+}
+
+static void l1_guest_code(struct svm_test_data *svm)
+{
+	#define L2_GUEST_STACK_SIZE 64
+	unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+	struct vmcb *vmcb = svm->vmcb;
+
+	x2apic_enable();
+
+	/* Prepare for L2 execution. */
+	generic_svm_setup(svm, l2_guest_code,
+			  &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+	/* No virtual interrupt masking */
+	vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
+
+	/* No intercepts for real and virtual interrupts */
+	vmcb->control.intercept &= ~(1ULL << INTERCEPT_INTR | INTERCEPT_VINTR);
+
+	/* Make a virtual interrupt VINTR_IRQ_NUMBER pending */
+	vmcb->control.int_ctl |= V_IRQ_MASK | (0x1 << V_INTR_PRIO_SHIFT);
+	vmcb->control.int_vector = VINTR_IRQ_NUMBER;
+
+	run_guest(vmcb, svm->vmcb_gpa);
+	GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+	GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+	vm_vaddr_t svm_gva;
+
+	nested_svm_check_supported();
+
+	vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code);
+
+	vm_init_descriptor_tables(vm);
+	vcpu_init_descriptor_tables(vm, VCPU_ID);
+
+	vm_install_exception_handler(vm, VINTR_IRQ_NUMBER, vintr_irq_handler);
+	vm_install_exception_handler(vm, INTR_IRQ_NUMBER, intr_irq_handler);
+
+	vcpu_alloc_svm(vm, &svm_gva);
+	vcpu_args_set(vm, VCPU_ID, 1, svm_gva);
+
+	struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+	struct ucall uc;
+
+	vcpu_run(vm, VCPU_ID);
+	TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+		    "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
+		    run->exit_reason,
+		    exit_reason_str(run->exit_reason));
+
+	switch (get_ucall(vm, VCPU_ID, &uc)) {
+	case UCALL_ABORT:
+		TEST_FAIL("%s", (const char *)uc.args[0]);
+		break;
+		/* NOT REACHED */
+	case UCALL_DONE:
+		goto done;
+	default:
+		TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
+	}
+done:
+	kvm_vm_free(vm);
+	return 0;
+}
diff --git a/tools/testing/selftests/nci/nci_dev.c b/tools/testing/selftests/nci/nci_dev.c
index e1bf55dabdf6..162c41e9bcae 100644
--- a/tools/testing/selftests/nci/nci_dev.c
+++ b/tools/testing/selftests/nci/nci_dev.c
@@ -746,7 +746,7 @@ int read_write_nci_cmd(int nfc_sock, int virtual_fd, const __u8 *cmd, __u32 cmd_
 		       const __u8 *rsp, __u32 rsp_len)
 {
 	char buf[256];
-	unsigned int len;
+	int len;
 
 	send(nfc_sock, &cmd[3], cmd_len - 3, 0);
 	len = read(virtual_fd, buf, cmd_len);
diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile
index cfc7f4f97fd1..df341648f818 100644
--- a/tools/testing/selftests/net/af_unix/Makefile
+++ b/tools/testing/selftests/net/af_unix/Makefile
@@ -1,5 +1,2 @@
-##TEST_GEN_FILES := test_unix_oob
-TEST_PROGS := test_unix_oob
+TEST_GEN_PROGS := test_unix_oob
 include ../../lib.mk
-
-all: $(TEST_PROGS)
diff --git a/tools/testing/selftests/net/af_unix/test_unix_oob.c b/tools/testing/selftests/net/af_unix/test_unix_oob.c
index 0f3e3763f4f8..3dece8b29253 100644
--- a/tools/testing/selftests/net/af_unix/test_unix_oob.c
+++ b/tools/testing/selftests/net/af_unix/test_unix_oob.c
@@ -271,8 +271,9 @@ main(int argc, char **argv)
 	read_oob(pfd, &oob);
 
 	if (!signal_recvd || len != 127 || oob != '%' || atmark != 1) {
-		fprintf(stderr, "Test 3 failed, sigurg %d len %d OOB %c ",
-		"atmark %d\n", signal_recvd, len, oob, atmark);
+		fprintf(stderr,
+			"Test 3 failed, sigurg %d len %d OOB %c atmark %d\n",
+			signal_recvd, len, oob, atmark);
 		die(1);
 	}
 
diff --git a/tools/testing/selftests/net/altnames.sh b/tools/testing/selftests/net/altnames.sh
index 4254ddc3f70b..1ef9e4159bba 100755
--- a/tools/testing/selftests/net/altnames.sh
+++ b/tools/testing/selftests/net/altnames.sh
@@ -45,7 +45,7 @@ altnames_test()
 	check_err $? "Got unexpected long alternative name from link show JSON"
 
 	ip link property del $DUMMY_DEV altname $SHORT_NAME
-	check_err $? "Failed to add short alternative name"
+	check_err $? "Failed to delete short alternative name"
 
 	ip -j -p link show $SHORT_NAME &>/dev/null
 	check_fail $? "Unexpected success while trying to do link show with deleted short alternative name"
diff --git a/tools/testing/selftests/netfilter/nft_nat_zones.sh b/tools/testing/selftests/netfilter/nft_nat_zones.sh
new file mode 100755
index 000000000000..b9ab37380f33
--- /dev/null
+++ b/tools/testing/selftests/netfilter/nft_nat_zones.sh
@@ -0,0 +1,309 @@
+#!/bin/bash
+#
+# Test connection tracking zone and NAT source port reallocation support.
+#
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+# Don't increase too much, 2000 clients should work
+# just fine but script can then take several minutes with
+# KASAN/debug builds.
+maxclients=100
+
+have_iperf=1
+ret=0
+
+# client1---.
+#            veth1-.
+#                  |
+#               NAT Gateway --veth0--> Server
+#                  | |
+#            veth2-' |
+# client2---'        |
+#  ....              |
+# clientX----vethX---'
+
+# All clients share identical IP address.
+# NAT Gateway uses policy routing and conntrack zones to isolate client
+# namespaces.  Each client connects to Server, each with colliding tuples:
+#   clientsaddr:10000 -> serveraddr:dport
+#   NAT Gateway is supposed to do port reallocation for each of the
+#   connections.
+
+sfx=$(mktemp -u "XXXXXXXX")
+gw="ns-gw-$sfx"
+cl1="ns-cl1-$sfx"
+cl2="ns-cl2-$sfx"
+srv="ns-srv-$sfx"
+
+v4gc1=$(sysctl -n net.ipv4.neigh.default.gc_thresh1 2>/dev/null)
+v4gc2=$(sysctl -n net.ipv4.neigh.default.gc_thresh2 2>/dev/null)
+v4gc3=$(sysctl -n net.ipv4.neigh.default.gc_thresh3 2>/dev/null)
+v6gc1=$(sysctl -n net.ipv6.neigh.default.gc_thresh1 2>/dev/null)
+v6gc2=$(sysctl -n net.ipv6.neigh.default.gc_thresh2 2>/dev/null)
+v6gc3=$(sysctl -n net.ipv6.neigh.default.gc_thresh3 2>/dev/null)
+
+cleanup()
+{
+	ip netns del $gw
+	ip netns del $srv
+	for i in $(seq 1 $maxclients); do
+		ip netns del ns-cl$i-$sfx 2>/dev/null
+	done
+
+	sysctl -q net.ipv4.neigh.default.gc_thresh1=$v4gc1 2>/dev/null
+	sysctl -q net.ipv4.neigh.default.gc_thresh2=$v4gc2 2>/dev/null
+	sysctl -q net.ipv4.neigh.default.gc_thresh3=$v4gc3 2>/dev/null
+	sysctl -q net.ipv6.neigh.default.gc_thresh1=$v6gc1 2>/dev/null
+	sysctl -q net.ipv6.neigh.default.gc_thresh2=$v6gc2 2>/dev/null
+	sysctl -q net.ipv6.neigh.default.gc_thresh3=$v6gc3 2>/dev/null
+}
+
+nft --version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run test without nft tool"
+	exit $ksft_skip
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run test without ip tool"
+	exit $ksft_skip
+fi
+
+conntrack -V > /dev/null 2>&1
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not run test without conntrack tool"
+	exit $ksft_skip
+fi
+
+iperf3 -v >/dev/null 2>&1
+if [ $? -ne 0 ];then
+	have_iperf=0
+fi
+
+ip netns add "$gw"
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not create net namespace $gw"
+	exit $ksft_skip
+fi
+ip -net "$gw" link set lo up
+
+trap cleanup EXIT
+
+ip netns add "$srv"
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not create server netns $srv"
+	exit $ksft_skip
+fi
+
+ip link add veth0 netns "$gw" type veth peer name eth0 netns "$srv"
+ip -net "$gw" link set veth0 up
+ip -net "$srv" link set lo up
+ip -net "$srv" link set eth0 up
+
+sysctl -q net.ipv6.neigh.default.gc_thresh1=512  2>/dev/null
+sysctl -q net.ipv6.neigh.default.gc_thresh2=1024 2>/dev/null
+sysctl -q net.ipv6.neigh.default.gc_thresh3=4096 2>/dev/null
+sysctl -q net.ipv4.neigh.default.gc_thresh1=512  2>/dev/null
+sysctl -q net.ipv4.neigh.default.gc_thresh2=1024 2>/dev/null
+sysctl -q net.ipv4.neigh.default.gc_thresh3=4096 2>/dev/null
+
+for i in $(seq 1 $maxclients);do
+  cl="ns-cl$i-$sfx"
+
+  ip netns add "$cl"
+  if [ $? -ne 0 ];then
+     echo "SKIP: Could not create client netns $cl"
+     exit $ksft_skip
+  fi
+  ip link add veth$i netns "$gw" type veth peer name eth0 netns "$cl" > /dev/null 2>&1
+  if [ $? -ne 0 ];then
+    echo "SKIP: No virtual ethernet pair device support in kernel"
+    exit $ksft_skip
+  fi
+done
+
+for i in $(seq 1 $maxclients);do
+  cl="ns-cl$i-$sfx"
+  echo netns exec "$cl" ip link set lo up
+  echo netns exec "$cl" ip link set eth0 up
+  echo netns exec "$cl" sysctl -q net.ipv4.tcp_syn_retries=2
+  echo netns exec "$gw" ip link set veth$i up
+  echo netns exec "$gw" sysctl -q net.ipv4.conf.veth$i.arp_ignore=2
+  echo netns exec "$gw" sysctl -q net.ipv4.conf.veth$i.rp_filter=0
+
+  # clients have same IP addresses.
+  echo netns exec "$cl" ip addr add 10.1.0.3/24 dev eth0
+  echo netns exec "$cl" ip addr add dead:1::3/64 dev eth0
+  echo netns exec "$cl" ip route add default via 10.1.0.2 dev eth0
+  echo netns exec "$cl" ip route add default via dead:1::2 dev eth0
+
+  # NB: same addresses on client-facing interfaces.
+  echo netns exec "$gw" ip addr add 10.1.0.2/24 dev veth$i
+  echo netns exec "$gw" ip addr add dead:1::2/64 dev veth$i
+
+  # gw: policy routing
+  echo netns exec "$gw" ip route add 10.1.0.0/24 dev veth$i table $((1000+i))
+  echo netns exec "$gw" ip route add dead:1::0/64 dev veth$i table $((1000+i))
+  echo netns exec "$gw" ip route add 10.3.0.0/24 dev veth0 table $((1000+i))
+  echo netns exec "$gw" ip route add dead:3::0/64 dev veth0 table $((1000+i))
+  echo netns exec "$gw" ip rule add fwmark $i lookup $((1000+i))
+done | ip -batch /dev/stdin
+
+ip -net "$gw" addr add 10.3.0.1/24 dev veth0
+ip -net "$gw" addr add dead:3::1/64 dev veth0
+
+ip -net "$srv" addr add 10.3.0.99/24 dev eth0
+ip -net "$srv" addr add dead:3::99/64 dev eth0
+
+ip netns exec $gw nft -f /dev/stdin<<EOF
+table inet raw {
+	map iiftomark {
+		type ifname : mark
+	}
+
+	map iiftozone {
+		typeof iifname : ct zone
+	}
+
+	set inicmp {
+		flags dynamic
+		type ipv4_addr . ifname . ipv4_addr
+	}
+	set inflows {
+		flags dynamic
+		type ipv4_addr . inet_service . ifname . ipv4_addr . inet_service
+	}
+
+	set inflows6 {
+		flags dynamic
+		type ipv6_addr . inet_service . ifname . ipv6_addr . inet_service
+	}
+
+	chain prerouting {
+		type filter hook prerouting priority -64000; policy accept;
+		ct original zone set meta iifname map @iiftozone
+		meta mark set meta iifname map @iiftomark
+
+		tcp flags & (syn|ack) == ack add @inflows { ip saddr . tcp sport . meta iifname . ip daddr . tcp dport counter }
+		add @inflows6 { ip6 saddr . tcp sport . meta iifname . ip6 daddr . tcp dport counter }
+		ip protocol icmp add @inicmp { ip saddr . meta iifname . ip daddr counter }
+	}
+
+	chain nat_postrouting {
+		type nat hook postrouting priority 0; policy accept;
+                ct mark set meta mark meta oifname veth0 masquerade
+	}
+
+	chain mangle_prerouting {
+		type filter hook prerouting priority -100; policy accept;
+		ct direction reply meta mark set ct mark
+	}
+}
+EOF
+
+( echo add element inet raw iiftomark \{
+	for i in $(seq 1 $((maxclients-1))); do
+		echo \"veth$i\" : $i,
+	done
+	echo \"veth$maxclients\" : $maxclients \}
+	echo add element inet raw iiftozone \{
+	for i in $(seq 1 $((maxclients-1))); do
+		echo \"veth$i\" : $i,
+	done
+	echo \"veth$maxclients\" : $maxclients \}
+) | ip netns exec $gw nft -f /dev/stdin
+
+ip netns exec "$gw" sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null
+ip netns exec "$gw" sysctl -q net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec "$gw" sysctl -q net.ipv4.conf.all.rp_filter=0 >/dev/null
+
+# useful for debugging: allows to use 'ping' from clients to gateway.
+ip netns exec "$gw" sysctl -q net.ipv4.fwmark_reflect=1 > /dev/null
+ip netns exec "$gw" sysctl -q net.ipv6.fwmark_reflect=1 > /dev/null
+
+for i in $(seq 1 $maxclients); do
+  cl="ns-cl$i-$sfx"
+  ip netns exec $cl ping -i 0.5 -q -c 3 10.3.0.99 > /dev/null 2>&1 &
+  if [ $? -ne 0 ]; then
+     echo FAIL: Ping failure from $cl 1>&2
+     ret=1
+     break
+  fi
+done
+
+wait
+
+for i in $(seq 1 $maxclients); do
+   ip netns exec $gw nft get element inet raw inicmp "{ 10.1.0.3 . \"veth$i\" . 10.3.0.99 }" | grep -q "{ 10.1.0.3 . \"veth$i\" . 10.3.0.99 counter packets 3 bytes 252 }"
+   if [ $? -ne 0 ];then
+      ret=1
+      echo "FAIL: counter icmp mismatch for veth$i" 1>&2
+      ip netns exec $gw nft get element inet raw inicmp "{ 10.1.0.3 . \"veth$i\" . 10.3.0.99 }" 1>&2
+      break
+   fi
+done
+
+ip netns exec $gw nft get element inet raw inicmp "{ 10.3.0.99 . \"veth0\" . 10.3.0.1 }" | grep -q "{ 10.3.0.99 . \"veth0\" . 10.3.0.1 counter packets $((3 * $maxclients)) bytes $((252 * $maxclients)) }"
+if [ $? -ne 0 ];then
+    ret=1
+    echo "FAIL: counter icmp mismatch for veth0: { 10.3.0.99 . \"veth0\" . 10.3.0.1 counter packets $((3 * $maxclients)) bytes $((252 * $maxclients)) }"
+    ip netns exec $gw nft get element inet raw inicmp "{ 10.3.99 . \"veth0\" . 10.3.0.1 }" 1>&2
+fi
+
+if  [ $ret -eq 0 ]; then
+	echo "PASS: ping test from all $maxclients namespaces"
+fi
+
+if [ $have_iperf -eq 0 ];then
+	echo "SKIP: iperf3 not installed"
+	if [ $ret -ne 0 ];then
+	    exit $ret
+	fi
+	exit $ksft_skip
+fi
+
+ip netns exec $srv iperf3 -s > /dev/null 2>&1 &
+iperfpid=$!
+sleep 1
+
+for i in $(seq 1 $maxclients); do
+  if [ $ret -ne 0 ]; then
+     break
+  fi
+  cl="ns-cl$i-$sfx"
+  ip netns exec $cl iperf3 -c 10.3.0.99 --cport 10000 -n 1 > /dev/null
+  if [ $? -ne 0 ]; then
+     echo FAIL: Failure to connect for $cl 1>&2
+     ip netns exec $gw conntrack -S 1>&2
+     ret=1
+  fi
+done
+if [ $ret -eq 0 ];then
+	echo "PASS: iperf3 connections for all $maxclients net namespaces"
+fi
+
+kill $iperfpid
+wait
+
+for i in $(seq 1 $maxclients); do
+   ip netns exec $gw nft get element inet raw inflows "{ 10.1.0.3 . 10000 . \"veth$i\" . 10.3.0.99 . 5201 }" > /dev/null
+   if [ $? -ne 0 ];then
+      ret=1
+      echo "FAIL: can't find expected tcp entry for veth$i" 1>&2
+      break
+   fi
+done
+if [ $ret -eq 0 ];then
+	echo "PASS: Found client connection for all $maxclients net namespaces"
+fi
+
+ip netns exec $gw nft get element inet raw inflows "{ 10.3.0.99 . 5201 . \"veth0\" . 10.3.0.1 . 10000 }" > /dev/null
+if [ $? -ne 0 ];then
+    ret=1
+    echo "FAIL: cannot find return entry on veth0" 1>&2
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/netfilter/nft_zones_many.sh b/tools/testing/selftests/netfilter/nft_zones_many.sh
new file mode 100755
index 000000000000..ac646376eb01
--- /dev/null
+++ b/tools/testing/selftests/netfilter/nft_zones_many.sh
@@ -0,0 +1,156 @@
+#!/bin/bash
+
+# Test insertion speed for packets with identical addresses/ports
+# that are all placed in distinct conntrack zones.
+
+sfx=$(mktemp -u "XXXXXXXX")
+ns="ns-$sfx"
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+zones=20000
+have_ct_tool=0
+ret=0
+
+cleanup()
+{
+	ip netns del $ns
+}
+
+ip netns add $ns
+if [ $? -ne 0 ];then
+	echo "SKIP: Could not create net namespace $gw"
+	exit $ksft_skip
+fi
+
+trap cleanup EXIT
+
+conntrack -V > /dev/null 2>&1
+if [ $? -eq 0 ];then
+	have_ct_tool=1
+fi
+
+ip -net "$ns" link set lo up
+
+test_zones() {
+	local max_zones=$1
+
+ip netns exec $ns sysctl -q net.netfilter.nf_conntrack_udp_timeout=3600
+ip netns exec $ns nft -f /dev/stdin<<EOF
+flush ruleset
+table inet raw {
+	map rndzone {
+		typeof numgen inc mod $max_zones : ct zone
+	}
+
+	chain output {
+		type filter hook output priority -64000; policy accept;
+		udp dport 12345  ct zone set numgen inc mod 65536 map @rndzone
+	}
+}
+EOF
+	(
+		echo "add element inet raw rndzone {"
+	for i in $(seq 1 $max_zones);do
+		echo -n "$i : $i"
+		if [ $i -lt $max_zones ]; then
+			echo ","
+		else
+			echo "}"
+		fi
+	done
+	) | ip netns exec $ns nft -f /dev/stdin
+
+	local i=0
+	local j=0
+	local outerstart=$(date +%s%3N)
+	local stop=$outerstart
+
+	while [ $i -lt $max_zones ]; do
+		local start=$(date +%s%3N)
+		i=$((i + 10000))
+		j=$((j + 1))
+		dd if=/dev/zero of=/dev/stdout bs=8k count=10000 2>/dev/null | ip netns exec "$ns" nc -w 1 -q 1 -u -p 12345 127.0.0.1 12345 > /dev/null
+		if [ $? -ne 0 ] ;then
+			ret=1
+			break
+		fi
+
+		stop=$(date +%s%3N)
+		local duration=$((stop-start))
+		echo "PASS: added 10000 entries in $duration ms (now $i total, loop $j)"
+	done
+
+	if [ $have_ct_tool -eq 1 ]; then
+		local count=$(ip netns exec "$ns" conntrack -C)
+		local duration=$((stop-outerstart))
+
+		if [ $count -eq $max_zones ]; then
+			echo "PASS: inserted $count entries from packet path in $duration ms total"
+		else
+			ip netns exec $ns conntrack -S 1>&2
+			echo "FAIL: inserted $count entries from packet path in $duration ms total, expected $max_zones entries"
+			ret=1
+		fi
+	fi
+
+	if [ $ret -ne 0 ];then
+		echo "FAIL: insert $max_zones entries from packet path" 1>&2
+	fi
+}
+
+test_conntrack_tool() {
+	local max_zones=$1
+
+	ip netns exec $ns conntrack -F >/dev/null 2>/dev/null
+
+	local outerstart=$(date +%s%3N)
+	local start=$(date +%s%3N)
+	local stop=$start
+	local i=0
+	while [ $i -lt $max_zones ]; do
+		i=$((i + 1))
+		ip netns exec "$ns" conntrack -I -s 1.1.1.1 -d 2.2.2.2 --protonum 6 \
+	                 --timeout 3600 --state ESTABLISHED --sport 12345 --dport 1000 --zone $i >/dev/null 2>&1
+		if [ $? -ne 0 ];then
+			ip netns exec "$ns" conntrack -I -s 1.1.1.1 -d 2.2.2.2 --protonum 6 \
+	                 --timeout 3600 --state ESTABLISHED --sport 12345 --dport 1000 --zone $i > /dev/null
+			echo "FAIL: conntrack -I returned an error"
+			ret=1
+			break
+		fi
+
+		if [ $((i%10000)) -eq 0 ];then
+			stop=$(date +%s%3N)
+
+			local duration=$((stop-start))
+			echo "PASS: added 10000 entries in $duration ms (now $i total)"
+			start=$stop
+		fi
+	done
+
+	local count=$(ip netns exec "$ns" conntrack -C)
+	local duration=$((stop-outerstart))
+
+	if [ $count -eq $max_zones ]; then
+		echo "PASS: inserted $count entries via ctnetlink in $duration ms"
+	else
+		ip netns exec $ns conntrack -S 1>&2
+		echo "FAIL: inserted $count entries via ctnetlink in $duration ms, expected $max_zones entries ($duration ms)"
+		ret=1
+	fi
+}
+
+test_zones $zones
+
+if [ $have_ct_tool -eq 1 ];then
+	test_conntrack_tool $zones
+else
+	echo "SKIP: Could not run ctnetlink insertion test without conntrack tool"
+	if [ $ret -eq 0 ];then
+		exit $ksft_skip
+	fi
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/powerpc/tm/tm-syscall-asm.S b/tools/testing/selftests/powerpc/tm/tm-syscall-asm.S
index bd1ca25febe4..aed632d29fff 100644
--- a/tools/testing/selftests/powerpc/tm/tm-syscall-asm.S
+++ b/tools/testing/selftests/powerpc/tm/tm-syscall-asm.S
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-#include <ppc-asm.h>
+#include <basic_asm.h>
 #include <asm/unistd.h>
 
 	.text
@@ -26,3 +26,38 @@ FUNC_START(getppid_tm_suspended)
 1:
 	li	r3, -1
 	blr
+
+
+.macro scv level
+	.long (0x44000001 | (\level) << 5)
+.endm
+
+FUNC_START(getppid_scv_tm_active)
+	PUSH_BASIC_STACK(0)
+	tbegin.
+	beq 1f
+	li	r0, __NR_getppid
+	scv	0
+	tend.
+	POP_BASIC_STACK(0)
+	blr
+1:
+	li	r3, -1
+	POP_BASIC_STACK(0)
+	blr
+
+FUNC_START(getppid_scv_tm_suspended)
+	PUSH_BASIC_STACK(0)
+	tbegin.
+	beq 1f
+	li	r0, __NR_getppid
+	tsuspend.
+	scv	0
+	tresume.
+	tend.
+	POP_BASIC_STACK(0)
+	blr
+1:
+	li	r3, -1
+	POP_BASIC_STACK(0)
+	blr
diff --git a/tools/testing/selftests/powerpc/tm/tm-syscall.c b/tools/testing/selftests/powerpc/tm/tm-syscall.c
index 467a6b3134b2..b763354c2eb4 100644
--- a/tools/testing/selftests/powerpc/tm/tm-syscall.c
+++ b/tools/testing/selftests/powerpc/tm/tm-syscall.c
@@ -19,23 +19,36 @@
 #include "utils.h"
 #include "tm.h"
 
+#ifndef PPC_FEATURE2_SCV
+#define PPC_FEATURE2_SCV               0x00100000 /* scv syscall */
+#endif
+
 extern int getppid_tm_active(void);
 extern int getppid_tm_suspended(void);
+extern int getppid_scv_tm_active(void);
+extern int getppid_scv_tm_suspended(void);
 
 unsigned retries = 0;
 
 #define TEST_DURATION 10 /* seconds */
 
-pid_t getppid_tm(bool suspend)
+pid_t getppid_tm(bool scv, bool suspend)
 {
 	int i;
 	pid_t pid;
 
 	for (i = 0; i < TM_RETRIES; i++) {
-		if (suspend)
-			pid = getppid_tm_suspended();
-		else
-			pid = getppid_tm_active();
+		if (suspend) {
+			if (scv)
+				pid = getppid_scv_tm_suspended();
+			else
+				pid = getppid_tm_suspended();
+		} else {
+			if (scv)
+				pid = getppid_scv_tm_active();
+			else
+				pid = getppid_tm_active();
+		}
 
 		if (pid >= 0)
 			return pid;
@@ -82,15 +95,24 @@ int tm_syscall(void)
 		 * Test a syscall within a suspended transaction and verify
 		 * that it succeeds.
 		 */
-		FAIL_IF(getppid_tm(true) == -1); /* Should succeed. */
+		FAIL_IF(getppid_tm(false, true) == -1); /* Should succeed. */
 
 		/*
 		 * Test a syscall within an active transaction and verify that
 		 * it fails with the correct failure code.
 		 */
-		FAIL_IF(getppid_tm(false) != -1);  /* Should fail... */
+		FAIL_IF(getppid_tm(false, false) != -1);  /* Should fail... */
 		FAIL_IF(!failure_is_persistent()); /* ...persistently... */
 		FAIL_IF(!failure_is_syscall());    /* ...with code syscall. */
+
+		/* Now do it all again with scv if it is available. */
+		if (have_hwcap2(PPC_FEATURE2_SCV)) {
+			FAIL_IF(getppid_tm(true, true) == -1); /* Should succeed. */
+			FAIL_IF(getppid_tm(true, false) != -1);  /* Should fail... */
+			FAIL_IF(!failure_is_persistent()); /* ...persistently... */
+			FAIL_IF(!failure_is_syscall());    /* ...with code syscall. */
+		}
+
 		gettimeofday(&now, 0);
 	}