32 files changed, 4874 insertions, 13 deletions
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 95abddcd7839..24ae9e829e9a 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -4,6 +4,7 @@ TARGETS += efivarfs
 TARGETS += exec
 TARGETS += firmware
 TARGETS += ftrace
+TARGETS += futex
 TARGETS += kcmp
 TARGETS += memfd
 TARGETS += memory-hotplug
@@ -12,13 +13,18 @@ TARGETS += mqueue
 TARGETS += net
 TARGETS += powerpc
 TARGETS += ptrace
+TARGETS += seccomp
 TARGETS += size
 TARGETS += sysctl
+ifneq (1, $(quicktest))
 TARGETS += timers
+endif
 TARGETS += user
 TARGETS += vm
 TARGETS += x86
 #Please keep the TARGETS list alphabetically sorted
+# Run "make quicktest=1 run_tests" or
+# "make quicktest=1 kselftest from top level Makefile
 
 TARGETS_HOTPLUG = cpu-hotplug
 TARGETS_HOTPLUG += memory-hotplug
@@ -27,7 +33,7 @@ TARGETS_HOTPLUG += memory-hotplug
 # Makefile to avoid test build failures when test
 # Makefile doesn't have explicit build rules.
 ifeq (1,$(MAKELEVEL))
-undefine LDFLAGS
+override LDFLAGS =
 override MAKEFLAGS =
 endif
 
diff --git a/tools/testing/selftests/exec/Makefile b/tools/testing/selftests/exec/Makefile
index 4edb7d0da29b..6b76bfdc847e 100644
--- a/tools/testing/selftests/exec/Makefile
+++ b/tools/testing/selftests/exec/Makefile
@@ -1,6 +1,6 @@
 CFLAGS = -Wall
 BINARIES = execveat
-DEPS = execveat.symlink execveat.denatured script subdir
+DEPS = execveat.symlink execveat.denatured script
 all: $(BINARIES) $(DEPS)
 
 subdir:
diff --git a/tools/testing/selftests/ftrace/Makefile b/tools/testing/selftests/ftrace/Makefile
index 346720639d1d..0acbeca47225 100644
--- a/tools/testing/selftests/ftrace/Makefile
+++ b/tools/testing/selftests/ftrace/Makefile
@@ -1,6 +1,7 @@
 all:
 
 TEST_PROGS := ftracetest
+TEST_DIRS := test.d/
 
 include ../lib.mk
 
diff --git a/tools/testing/selftests/futex/Makefile b/tools/testing/selftests/futex/Makefile
new file mode 100644
index 000000000000..6a1752956283
--- /dev/null
+++ b/tools/testing/selftests/futex/Makefile
@@ -0,0 +1,29 @@
+SUBDIRS := functional
+
+TEST_PROGS := run.sh
+
+.PHONY: all clean
+all:
+	for DIR in $(SUBDIRS); do $(MAKE) -C $$DIR $@ ; done
+
+include ../lib.mk
+
+override define RUN_TESTS
+	./run.sh
+endef
+
+override define INSTALL_RULE
+	mkdir -p $(INSTALL_PATH)
+	install -t $(INSTALL_PATH) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES)
+
+	@for SUBDIR in $(SUBDIRS); do \
+		$(MAKE) -C $$SUBDIR INSTALL_PATH=$(INSTALL_PATH)/$$SUBDIR install; \
+	done;
+endef
+
+override define EMIT_TESTS
+	echo "./run.sh"
+endef
+
+clean:
+	for DIR in $(SUBDIRS); do $(MAKE) -C $$DIR $@ ; done
diff --git a/tools/testing/selftests/futex/README b/tools/testing/selftests/futex/README
new file mode 100644
index 000000000000..3224a049b196
--- /dev/null
+++ b/tools/testing/selftests/futex/README
@@ -0,0 +1,62 @@
+Futex Test
+==========
+Futex Test is intended to thoroughly test the Linux kernel futex system call
+API.
+
+Functional tests shall test the documented behavior of the futex operation
+code under test. This includes checking for proper behavior under normal use,
+odd corner cases, regression tests, and abject abuse and misuse.
+
+Futextest will also provide example implementation of mutual exclusion
+primitives. These can be used as is in user applications or can serve as
+examples for system libraries. These will likely be added to either a new lib/
+directory or purely as header files under include/, I'm leaning toward the
+latter.
+
+Quick Start
+-----------
+# make
+# ./run.sh
+
+Design and Implementation Goals
+-------------------------------
+o Tests should be as self contained as is practical so as to facilitate sharing
+  the individual tests on mailing list discussions and bug reports.
+o The build system shall remain as simple as possible, avoiding any archive or
+  shared object building and linking.
+o Where possible, any helper functions or other package-wide code shall be
+  implemented in header files, avoiding the need to compile intermediate object
+  files.
+o External dependendencies shall remain as minimal as possible. Currently gcc
+  and glibc are the only dependencies.
+o Tests return 0 for success and < 0 for failure.
+
+Output Formatting
+-----------------
+Test output shall be easily parsable by both human and machine. Title and
+results are printed to stdout, while intermediate ERROR or FAIL messages are
+sent to stderr. Tests shall support the -c option to print PASS, FAIL, and
+ERROR strings in color for easy visual parsing. Output shall conform to the
+following format:
+
+test_name: Description of the test
+	Arguments: arg1=val1 #units specified for clarity where appropriate
+	ERROR: Description of unexpected error
+	 FAIL: Reason for test failure
+	# FIXME: Perhaps an " INFO: informational message" option would be
+	#        useful here. Using -v to toggle it them on and off, as with -c.
+	# there may be multiple ERROR or FAIL messages
+Result: (PASS|FAIL|ERROR)
+
+Naming
+------
+o FIXME: decide on a sane test naming scheme.  Currently the tests are named
+  based on the primary futex operation they test. Eventually this will become a
+  problem as we intend to write multiple tests which collide in this namespace.
+  Perhaps something like "wait-wake-1" "wait-wake-2" is adequate, leaving the
+  detailed description in the test source and the output.
+
+Coding Style
+------------
+o The Futex Test project adheres to the coding standards set forth by Linux
+  kernel as defined in the Linux source Documentation/CodingStyle.
diff --git a/tools/testing/selftests/futex/functional/.gitignore b/tools/testing/selftests/futex/functional/.gitignore
new file mode 100644
index 000000000000..a09f57061902
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/.gitignore
@@ -0,0 +1,7 @@
+futex_requeue_pi
+futex_requeue_pi_mismatched_ops
+futex_requeue_pi_signal_restart
+futex_wait_private_mapped_file
+futex_wait_timeout
+futex_wait_uninitialized_heap
+futex_wait_wouldblock
diff --git a/tools/testing/selftests/futex/functional/Makefile b/tools/testing/selftests/futex/functional/Makefile
new file mode 100644
index 000000000000..9d6b75ef7b5d
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/Makefile
@@ -0,0 +1,25 @@
+INCLUDES := -I../include -I../../
+CFLAGS := $(CFLAGS) -g -O2 -Wall -D_GNU_SOURCE -pthread $(INCLUDES)
+LDFLAGS := $(LDFLAGS) -pthread -lrt
+
+HEADERS := ../include/futextest.h
+TARGETS := \
+	futex_wait_timeout \
+	futex_wait_wouldblock \
+	futex_requeue_pi \
+	futex_requeue_pi_signal_restart \
+	futex_requeue_pi_mismatched_ops \
+	futex_wait_uninitialized_heap \
+	futex_wait_private_mapped_file
+
+TEST_PROGS := $(TARGETS) run.sh
+
+.PHONY: all clean
+all: $(TARGETS)
+
+$(TARGETS): $(HEADERS)
+
+include ../../lib.mk
+
+clean:
+	rm -f $(TARGETS)
diff --git a/tools/testing/selftests/futex/functional/futex_requeue_pi.c b/tools/testing/selftests/futex/functional/futex_requeue_pi.c
new file mode 100644
index 000000000000..3da06ad23996
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/futex_requeue_pi.c
@@ -0,0 +1,409 @@
+/******************************************************************************
+ *
+ *   Copyright © International Business Machines  Corp., 2006-2008
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ * DESCRIPTION
+ *      This test excercises the futex syscall op codes needed for requeuing
+ *      priority inheritance aware POSIX condition variables and mutexes.
+ *
+ * AUTHORS
+ *      Sripathi Kodi <sripathik@in.ibm.com>
+ *      Darren Hart <dvhart@linux.intel.com>
+ *
+ * HISTORY
+ *      2008-Jan-13: Initial version by Sripathi Kodi <sripathik@in.ibm.com>
+ *      2009-Nov-6: futex test adaptation by Darren Hart <dvhart@linux.intel.com>
+ *
+ *****************************************************************************/
+
+#include <errno.h>
+#include <limits.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <string.h>
+#include "atomic.h"
+#include "futextest.h"
+#include "logging.h"
+
+#define MAX_WAKE_ITERS 1000
+#define THREAD_MAX 10
+#define SIGNAL_PERIOD_US 100
+
+atomic_t waiters_blocked = ATOMIC_INITIALIZER;
+atomic_t waiters_woken = ATOMIC_INITIALIZER;
+
+futex_t f1 = FUTEX_INITIALIZER;
+futex_t f2 = FUTEX_INITIALIZER;
+futex_t wake_complete = FUTEX_INITIALIZER;
+
+/* Test option defaults */
+static long timeout_ns;
+static int broadcast;
+static int owner;
+static int locked;
+
+struct thread_arg {
+	long id;
+	struct timespec *timeout;
+	int lock;
+	int ret;
+};
+#define THREAD_ARG_INITIALIZER { 0, NULL, 0, 0 }
+
+void usage(char *prog)
+{
+	printf("Usage: %s\n", prog);
+	printf("  -b	Broadcast wakeup (all waiters)\n");
+	printf("  -c	Use color\n");
+	printf("  -h	Display this help message\n");
+	printf("  -l	Lock the pi futex across requeue\n");
+	printf("  -o	Use a third party pi futex owner during requeue (cancels -l)\n");
+	printf("  -t N	Timeout in nanoseconds (default: 0)\n");
+	printf("  -v L	Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
+	       VQUIET, VCRITICAL, VINFO);
+}
+
+int create_rt_thread(pthread_t *pth, void*(*func)(void *), void *arg,
+		     int policy, int prio)
+{
+	int ret;
+	struct sched_param schedp;
+	pthread_attr_t attr;
+
+	pthread_attr_init(&attr);
+	memset(&schedp, 0, sizeof(schedp));
+
+	ret = pthread_attr_setinheritsched(&attr, PTHREAD_EXPLICIT_SCHED);
+	if (ret) {
+		error("pthread_attr_setinheritsched\n", ret);
+		return -1;
+	}
+
+	ret = pthread_attr_setschedpolicy(&attr, policy);
+	if (ret) {
+		error("pthread_attr_setschedpolicy\n", ret);
+		return -1;
+	}
+
+	schedp.sched_priority = prio;
+	ret = pthread_attr_setschedparam(&attr, &schedp);
+	if (ret) {
+		error("pthread_attr_setschedparam\n", ret);
+		return -1;
+	}
+
+	ret = pthread_create(pth, &attr, func, arg);
+	if (ret) {
+		error("pthread_create\n", ret);
+		return -1;
+	}
+	return 0;
+}
+
+
+void *waiterfn(void *arg)
+{
+	struct thread_arg *args = (struct thread_arg *)arg;
+	futex_t old_val;
+
+	info("Waiter %ld: running\n", args->id);
+	/* Each thread sleeps for a different amount of time
+	 * This is to avoid races, because we don't lock the
+	 * external mutex here */
+	usleep(1000 * (long)args->id);
+
+	old_val = f1;
+	atomic_inc(&waiters_blocked);
+	info("Calling futex_wait_requeue_pi: %p (%u) -> %p\n",
+	     &f1, f1, &f2);
+	args->ret = futex_wait_requeue_pi(&f1, old_val, &f2, args->timeout,
+					  FUTEX_PRIVATE_FLAG);
+
+	info("waiter %ld woke with %d %s\n", args->id, args->ret,
+	     args->ret < 0 ? strerror(errno) : "");
+	atomic_inc(&waiters_woken);
+	if (args->ret < 0) {
+		if (args->timeout && errno == ETIMEDOUT)
+			args->ret = 0;
+		else {
+			args->ret = RET_ERROR;
+			error("futex_wait_requeue_pi\n", errno);
+		}
+		futex_lock_pi(&f2, NULL, 0, FUTEX_PRIVATE_FLAG);
+	}
+	futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG);
+
+	info("Waiter %ld: exiting with %d\n", args->id, args->ret);
+	pthread_exit((void *)&args->ret);
+}
+
+void *broadcast_wakerfn(void *arg)
+{
+	struct thread_arg *args = (struct thread_arg *)arg;
+	int nr_requeue = INT_MAX;
+	int task_count = 0;
+	futex_t old_val;
+	int nr_wake = 1;
+	int i = 0;
+
+	info("Waker: waiting for waiters to block\n");
+	while (waiters_blocked.val < THREAD_MAX)
+		usleep(1000);
+	usleep(1000);
+
+	info("Waker: Calling broadcast\n");
+	if (args->lock) {
+		info("Calling FUTEX_LOCK_PI on mutex=%x @ %p\n", f2, &f2);
+		futex_lock_pi(&f2, NULL, 0, FUTEX_PRIVATE_FLAG);
+	}
+ continue_requeue:
+	old_val = f1;
+	args->ret = futex_cmp_requeue_pi(&f1, old_val, &f2, nr_wake, nr_requeue,
+				   FUTEX_PRIVATE_FLAG);
+	if (args->ret < 0) {
+		args->ret = RET_ERROR;
+		error("FUTEX_CMP_REQUEUE_PI failed\n", errno);
+	} else if (++i < MAX_WAKE_ITERS) {
+		task_count += args->ret;
+		if (task_count < THREAD_MAX - waiters_woken.val)
+			goto continue_requeue;
+	} else {
+		error("max broadcast iterations (%d) reached with %d/%d tasks woken or requeued\n",
+		       0, MAX_WAKE_ITERS, task_count, THREAD_MAX);
+		args->ret = RET_ERROR;
+	}
+
+	futex_wake(&wake_complete, 1, FUTEX_PRIVATE_FLAG);
+
+	if (args->lock)
+		futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG);
+
+	if (args->ret > 0)
+		args->ret = task_count;
+
+	info("Waker: exiting with %d\n", args->ret);
+	pthread_exit((void *)&args->ret);
+}
+
+void *signal_wakerfn(void *arg)
+{
+	struct thread_arg *args = (struct thread_arg *)arg;
+	unsigned int old_val;
+	int nr_requeue = 0;
+	int task_count = 0;
+	int nr_wake = 1;
+	int i = 0;
+
+	info("Waker: waiting for waiters to block\n");
+	while (waiters_blocked.val < THREAD_MAX)
+		usleep(1000);
+	usleep(1000);
+
+	while (task_count < THREAD_MAX && waiters_woken.val < THREAD_MAX) {
+		info("task_count: %d, waiters_woken: %d\n",
+		     task_count, waiters_woken.val);
+		if (args->lock) {
+			info("Calling FUTEX_LOCK_PI on mutex=%x @ %p\n",
+			     f2, &f2);
+			futex_lock_pi(&f2, NULL, 0, FUTEX_PRIVATE_FLAG);
+		}
+		info("Waker: Calling signal\n");
+		/* cond_signal */
+		old_val = f1;
+		args->ret = futex_cmp_requeue_pi(&f1, old_val, &f2,
+						 nr_wake, nr_requeue,
+						 FUTEX_PRIVATE_FLAG);
+		if (args->ret < 0)
+			args->ret = -errno;
+		info("futex: %x\n", f2);
+		if (args->lock) {
+			info("Calling FUTEX_UNLOCK_PI on mutex=%x @ %p\n",
+			     f2, &f2);
+			futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG);
+		}
+		info("futex: %x\n", f2);
+		if (args->ret < 0) {
+			error("FUTEX_CMP_REQUEUE_PI failed\n", errno);
+			args->ret = RET_ERROR;
+			break;
+		}
+
+		task_count += args->ret;
+		usleep(SIGNAL_PERIOD_US);
+		i++;
+		/* we have to loop at least THREAD_MAX times */
+		if (i > MAX_WAKE_ITERS + THREAD_MAX) {
+			error("max signaling iterations (%d) reached, giving up on pending waiters.\n",
+			      0, MAX_WAKE_ITERS + THREAD_MAX);
+			args->ret = RET_ERROR;
+			break;
+		}
+	}
+
+	futex_wake(&wake_complete, 1, FUTEX_PRIVATE_FLAG);
+
+	if (args->ret >= 0)
+		args->ret = task_count;
+
+	info("Waker: exiting with %d\n", args->ret);
+	info("Waker: waiters_woken: %d\n", waiters_woken.val);
+	pthread_exit((void *)&args->ret);
+}
+
+void *third_party_blocker(void *arg)
+{
+	struct thread_arg *args = (struct thread_arg *)arg;
+	int ret2 = 0;
+
+	args->ret = futex_lock_pi(&f2, NULL, 0, FUTEX_PRIVATE_FLAG);
+	if (args->ret)
+		goto out;
+	args->ret = futex_wait(&wake_complete, wake_complete, NULL,
+			       FUTEX_PRIVATE_FLAG);
+	ret2 = futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG);
+
+ out:
+	if (args->ret || ret2) {
+		error("third_party_blocker() futex error", 0);
+		args->ret = RET_ERROR;
+	}
+
+	pthread_exit((void *)&args->ret);
+}
+
+int unit_test(int broadcast, long lock, int third_party_owner, long timeout_ns)
+{
+	void *(*wakerfn)(void *) = signal_wakerfn;
+	struct thread_arg blocker_arg = THREAD_ARG_INITIALIZER;
+	struct thread_arg waker_arg = THREAD_ARG_INITIALIZER;
+	pthread_t waiter[THREAD_MAX], waker, blocker;
+	struct timespec ts, *tsp = NULL;
+	struct thread_arg args[THREAD_MAX];
+	int *waiter_ret;
+	int i, ret = RET_PASS;
+
+	if (timeout_ns) {
+		time_t secs;
+
+		info("timeout_ns = %ld\n", timeout_ns);
+		ret = clock_gettime(CLOCK_MONOTONIC, &ts);
+		secs = (ts.tv_nsec + timeout_ns) / 1000000000;
+		ts.tv_nsec = ((int64_t)ts.tv_nsec + timeout_ns) % 1000000000;
+		ts.tv_sec += secs;
+		info("ts.tv_sec  = %ld\n", ts.tv_sec);
+		info("ts.tv_nsec = %ld\n", ts.tv_nsec);
+		tsp = &ts;
+	}
+
+	if (broadcast)
+		wakerfn = broadcast_wakerfn;
+
+	if (third_party_owner) {
+		if (create_rt_thread(&blocker, third_party_blocker,
+				     (void *)&blocker_arg, SCHED_FIFO, 1)) {
+			error("Creating third party blocker thread failed\n",
+			      errno);
+			ret = RET_ERROR;
+			goto out;
+		}
+	}
+
+	atomic_set(&waiters_woken, 0);
+	for (i = 0; i < THREAD_MAX; i++) {
+		args[i].id = i;
+		args[i].timeout = tsp;
+		info("Starting thread %d\n", i);
+		if (create_rt_thread(&waiter[i], waiterfn, (void *)&args[i],
+				     SCHED_FIFO, 1)) {
+			error("Creating waiting thread failed\n", errno);
+			ret = RET_ERROR;
+			goto out;
+		}
+	}
+	waker_arg.lock = lock;
+	if (create_rt_thread(&waker, wakerfn, (void *)&waker_arg,
+			     SCHED_FIFO, 1)) {
+		error("Creating waker thread failed\n", errno);
+		ret = RET_ERROR;
+		goto out;
+	}
+
+	/* Wait for threads to finish */
+	/* Store the first error or failure encountered in waiter_ret */
+	waiter_ret = &args[0].ret;
+	for (i = 0; i < THREAD_MAX; i++)
+		pthread_join(waiter[i],
+			     *waiter_ret ? NULL : (void **)&waiter_ret);
+
+	if (third_party_owner)
+		pthread_join(blocker, NULL);
+	pthread_join(waker, NULL);
+
+out:
+	if (!ret) {
+		if (*waiter_ret)
+			ret = *waiter_ret;
+		else if (waker_arg.ret < 0)
+			ret = waker_arg.ret;
+		else if (blocker_arg.ret)
+			ret = blocker_arg.ret;
+	}
+
+	return ret;
+}
+
+int main(int argc, char *argv[])
+{
+	int c, ret;
+
+	while ((c = getopt(argc, argv, "bchlot:v:")) != -1) {
+		switch (c) {
+		case 'b':
+			broadcast = 1;
+			break;
+		case 'c':
+			log_color(1);
+			break;
+		case 'h':
+			usage(basename(argv[0]));
+			exit(0);
+		case 'l':
+			locked = 1;
+			break;
+		case 'o':
+			owner = 1;
+			locked = 0;
+			break;
+		case 't':
+			timeout_ns = atoi(optarg);
+			break;
+		case 'v':
+			log_verbosity(atoi(optarg));
+			break;
+		default:
+			usage(basename(argv[0]));
+			exit(1);
+		}
+	}
+
+	printf("%s: Test requeue functionality\n", basename(argv[0]));
+	printf("\tArguments: broadcast=%d locked=%d owner=%d timeout=%ldns\n",
+	       broadcast, locked, owner, timeout_ns);
+
+	/*
+	 * FIXME: unit_test is obsolete now that we parse options and the
+	 * various style of runs are done by run.sh - simplify the code and move
+	 * unit_test into main()
+	 */
+	ret = unit_test(broadcast, locked, owner, timeout_ns);
+
+	print_result(ret);
+	return ret;
+}
diff --git a/tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c b/tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c
new file mode 100644
index 000000000000..d5e4f2c4da2a
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c
@@ -0,0 +1,135 @@
+/******************************************************************************
+ *
+ *   Copyright © International Business Machines  Corp., 2009
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ * DESCRIPTION
+ *      1. Block a thread using FUTEX_WAIT
+ *      2. Attempt to use FUTEX_CMP_REQUEUE_PI on the futex from 1.
+ *      3. The kernel must detect the mismatch and return -EINVAL.
+ *
+ * AUTHOR
+ *      Darren Hart <dvhart@linux.intel.com>
+ *
+ * HISTORY
+ *      2009-Nov-9: Initial version by Darren Hart <dvhart@linux.intel.com>
+ *
+ *****************************************************************************/
+
+#include <errno.h>
+#include <getopt.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include "futextest.h"
+#include "logging.h"
+
+futex_t f1 = FUTEX_INITIALIZER;
+futex_t f2 = FUTEX_INITIALIZER;
+int child_ret = 0;
+
+void usage(char *prog)
+{
+	printf("Usage: %s\n", prog);
+	printf("  -c	Use color\n");
+	printf("  -h	Display this help message\n");
+	printf("  -v L	Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
+	       VQUIET, VCRITICAL, VINFO);
+}
+
+void *blocking_child(void *arg)
+{
+	child_ret = futex_wait(&f1, f1, NULL, FUTEX_PRIVATE_FLAG);
+	if (child_ret < 0) {
+		child_ret = -errno;
+		error("futex_wait\n", errno);
+	}
+	return (void *)&child_ret;
+}
+
+int main(int argc, char *argv[])
+{
+	int ret = RET_PASS;
+	pthread_t child;
+	int c;
+
+	while ((c = getopt(argc, argv, "chv:")) != -1) {
+		switch (c) {
+		case 'c':
+			log_color(1);
+			break;
+		case 'h':
+			usage(basename(argv[0]));
+			exit(0);
+		case 'v':
+			log_verbosity(atoi(optarg));
+			break;
+		default:
+			usage(basename(argv[0]));
+			exit(1);
+		}
+	}
+
+	printf("%s: Detect mismatched requeue_pi operations\n",
+	       basename(argv[0]));
+
+	if (pthread_create(&child, NULL, blocking_child, NULL)) {
+		error("pthread_create\n", errno);
+		ret = RET_ERROR;
+		goto out;
+	}
+	/* Allow the child to block in the kernel. */
+	sleep(1);
+
+	/*
+	 * The kernel should detect the waiter did not setup the
+	 * q->requeue_pi_key and return -EINVAL. If it does not,
+	 * it likely gave the lock to the child, which is now hung
+	 * in the kernel.
+	 */
+	ret = futex_cmp_requeue_pi(&f1, f1, &f2, 1, 0, FUTEX_PRIVATE_FLAG);
+	if (ret < 0) {
+		if (errno == EINVAL) {
+			/*
+			 * The kernel correctly detected the mismatched
+			 * requeue_pi target and aborted. Wake the child with
+			 * FUTEX_WAKE.
+			 */
+			ret = futex_wake(&f1, 1, FUTEX_PRIVATE_FLAG);
+			if (ret == 1) {
+				ret = RET_PASS;
+			} else if (ret < 0) {
+				error("futex_wake\n", errno);
+				ret = RET_ERROR;
+			} else {
+				error("futex_wake did not wake the child\n", 0);
+				ret = RET_ERROR;
+			}
+		} else {
+			error("futex_cmp_requeue_pi\n", errno);
+			ret = RET_ERROR;
+		}
+	} else if (ret > 0) {
+		fail("futex_cmp_requeue_pi failed to detect the mismatch\n");
+		ret = RET_FAIL;
+	} else {
+		error("futex_cmp_requeue_pi found no waiters\n", 0);
+		ret = RET_ERROR;
+	}
+
+	pthread_join(child, NULL);
+
+	if (!ret)
+		ret = child_ret;
+
+ out:
+	/* If the kernel crashes, we shouldn't return at all. */
+	print_result(ret);
+	return ret;
+}
diff --git a/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c b/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c
new file mode 100644
index 000000000000..7f0c756993af
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c
@@ -0,0 +1,223 @@
+/******************************************************************************
+ *
+ *   Copyright © International Business Machines  Corp., 2006-2008
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ * DESCRIPTION
+ *      This test exercises the futex_wait_requeue_pi() signal handling both
+ *      before and after the requeue. The first should be restarted by the
+ *      kernel. The latter should return EWOULDBLOCK to the waiter.
+ *
+ * AUTHORS
+ *      Darren Hart <dvhart@linux.intel.com>
+ *
+ * HISTORY
+ *      2008-May-5: Initial version by Darren Hart <dvhart@linux.intel.com>
+ *
+ *****************************************************************************/
+
+#include <errno.h>
+#include <getopt.h>
+#include <limits.h>
+#include <pthread.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include "atomic.h"
+#include "futextest.h"
+#include "logging.h"
+
+#define DELAY_US 100
+
+futex_t f1 = FUTEX_INITIALIZER;
+futex_t f2 = FUTEX_INITIALIZER;
+atomic_t requeued = ATOMIC_INITIALIZER;
+
+int waiter_ret = 0;
+
+void usage(char *prog)
+{
+	printf("Usage: %s\n", prog);
+	printf("  -c	Use color\n");
+	printf("  -h	Display this help message\n");
+	printf("  -v L	Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
+	       VQUIET, VCRITICAL, VINFO);
+}
+
+int create_rt_thread(pthread_t *pth, void*(*func)(void *), void *arg,
+		     int policy, int prio)
+{
+	struct sched_param schedp;
+	pthread_attr_t attr;
+	int ret;
+
+	pthread_attr_init(&attr);
+	memset(&schedp, 0, sizeof(schedp));
+
+	ret = pthread_attr_setinheritsched(&attr, PTHREAD_EXPLICIT_SCHED);
+	if (ret) {
+		error("pthread_attr_setinheritsched\n", ret);
+		return -1;
+	}
+
+	ret = pthread_attr_setschedpolicy(&attr, policy);
+	if (ret) {
+		error("pthread_attr_setschedpolicy\n", ret);
+		return -1;
+	}
+
+	schedp.sched_priority = prio;
+	ret = pthread_attr_setschedparam(&attr, &schedp);
+	if (ret) {
+		error("pthread_attr_setschedparam\n", ret);
+		return -1;
+	}
+
+	ret = pthread_create(pth, &attr, func, arg);
+	if (ret) {
+		error("pthread_create\n", ret);
+		return -1;
+	}
+	return 0;
+}
+
+void handle_signal(int signo)
+{
+	info("signal received %s requeue\n",
+	     requeued.val ? "after" : "prior to");
+}
+
+void *waiterfn(void *arg)
+{
+	unsigned int old_val;
+	int res;
+
+	waiter_ret = RET_PASS;
+
+	info("Waiter running\n");
+	info("Calling FUTEX_LOCK_PI on f2=%x @ %p\n", f2, &f2);
+	old_val = f1;
+	res = futex_wait_requeue_pi(&f1, old_val, &(f2), NULL,
+				    FUTEX_PRIVATE_FLAG);
+	if (!requeued.val || errno != EWOULDBLOCK) {
+		fail("unexpected return from futex_wait_requeue_pi: %d (%s)\n",
+		     res, strerror(errno));
+		info("w2:futex: %x\n", f2);
+		if (!res)
+			futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG);
+		waiter_ret = RET_FAIL;
+	}
+
+	info("Waiter exiting with %d\n", waiter_ret);
+	pthread_exit(NULL);
+}
+
+
+int main(int argc, char *argv[])
+{
+	unsigned int old_val;
+	struct sigaction sa;
+	pthread_t waiter;
+	int c, res, ret = RET_PASS;
+
+	while ((c = getopt(argc, argv, "chv:")) != -1) {
+		switch (c) {
+		case 'c':
+			log_color(1);
+			break;
+		case 'h':
+			usage(basename(argv[0]));
+			exit(0);
+		case 'v':
+			log_verbosity(atoi(optarg));
+			break;
+		default:
+			usage(basename(argv[0]));
+			exit(1);
+		}
+	}
+
+	printf("%s: Test signal handling during requeue_pi\n",
+	       basename(argv[0]));
+	printf("\tArguments: <none>\n");
+
+	sa.sa_handler = handle_signal;
+	sigemptyset(&sa.sa_mask);
+	sa.sa_flags = 0;
+	if (sigaction(SIGUSR1, &sa, NULL)) {
+		error("sigaction\n", errno);
+		exit(1);
+	}
+
+	info("m1:f2: %x\n", f2);
+	info("Creating waiter\n");
+	res = create_rt_thread(&waiter, waiterfn, NULL, SCHED_FIFO, 1);
+	if (res) {
+		error("Creating waiting thread failed", res);
+		ret = RET_ERROR;
+		goto out;
+	}
+
+	info("Calling FUTEX_LOCK_PI on f2=%x @ %p\n", f2, &f2);
+	info("m2:f2: %x\n", f2);
+	futex_lock_pi(&f2, 0, 0, FUTEX_PRIVATE_FLAG);
+	info("m3:f2: %x\n", f2);
+
+	while (1) {
+		/*
+		 * signal the waiter before requeue, waiter should automatically
+		 * restart futex_wait_requeue_pi() in the kernel. Wait for the
+		 * waiter to block on f1 again.
+		 */
+		info("Issuing SIGUSR1 to waiter\n");
+		pthread_kill(waiter, SIGUSR1);
+		usleep(DELAY_US);
+
+		info("Requeueing waiter via FUTEX_CMP_REQUEUE_PI\n");
+		old_val = f1;
+		res = futex_cmp_requeue_pi(&f1, old_val, &(f2), 1, 0,
+					   FUTEX_PRIVATE_FLAG);
+		/*
+		 * If res is non-zero, we either requeued the waiter or hit an
+		 * error, break out and handle it. If it is zero, then the
+		 * signal may have hit before the the waiter was blocked on f1.
+		 * Try again.
+		 */
+		if (res > 0) {
+			atomic_set(&requeued, 1);
+			break;
+		} else if (res > 0) {
+			error("FUTEX_CMP_REQUEUE_PI failed\n", errno);
+			ret = RET_ERROR;
+			break;
+		}
+	}
+	info("m4:f2: %x\n", f2);
+
+	/*
+	 * Signal the waiter after requeue, waiter should return from
+	 * futex_wait_requeue_pi() with EWOULDBLOCK. Join the thread here so the
+	 * futex_unlock_pi() can't happen before the signal wakeup is detected
+	 * in the kernel.
+	 */
+	info("Issuing SIGUSR1 to waiter\n");
+	pthread_kill(waiter, SIGUSR1);
+	info("Waiting for waiter to return\n");
+	pthread_join(waiter, NULL);
+
+	info("Calling FUTEX_UNLOCK_PI on mutex=%x @ %p\n", f2, &f2);
+	futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG);
+	info("m5:f2: %x\n", f2);
+
+ out:
+	if (ret == RET_PASS && waiter_ret)
+		ret = waiter_ret;
+
+	print_result(ret);
+	return ret;
+}
diff --git a/tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c b/tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c
new file mode 100644
index 000000000000..5f687f247454
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c
@@ -0,0 +1,125 @@
+/******************************************************************************
+ *
+ * Copyright FUJITSU LIMITED 2010
+ * Copyright KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ * DESCRIPTION
+ *      Internally, Futex has two handling mode, anon and file. The private file
+ *      mapping is special. At first it behave as file, but after write anything
+ *      it behave as anon. This test is intent to test such case.
+ *
+ * AUTHOR
+ *      KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+ *
+ * HISTORY
+ *      2010-Jan-6: Initial version by KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+ *
+ *****************************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <syscall.h>
+#include <unistd.h>
+#include <errno.h>
+#include <linux/futex.h>
+#include <pthread.h>
+#include <libgen.h>
+#include <signal.h>
+
+#include "logging.h"
+#include "futextest.h"
+
+#define PAGE_SZ 4096
+
+char pad[PAGE_SZ] = {1};
+futex_t val = 1;
+char pad2[PAGE_SZ] = {1};
+
+#define WAKE_WAIT_US 3000000
+struct timespec wait_timeout = { .tv_sec = 5, .tv_nsec = 0};
+
+void usage(char *prog)
+{
+	printf("Usage: %s\n", prog);
+	printf("  -c	Use color\n");
+	printf("  -h	Display this help message\n");
+	printf("  -v L	Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
+	       VQUIET, VCRITICAL, VINFO);
+}
+
+void *thr_futex_wait(void *arg)
+{
+	int ret;
+
+	info("futex wait\n");
+	ret = futex_wait(&val, 1, &wait_timeout, 0);
+	if (ret && errno != EWOULDBLOCK && errno != ETIMEDOUT) {
+		error("futex error.\n", errno);
+		print_result(RET_ERROR);
+		exit(RET_ERROR);
+	}
+
+	if (ret && errno == ETIMEDOUT)
+		fail("waiter timedout\n");
+
+	info("futex_wait: ret = %d, errno = %d\n", ret, errno);
+
+	return NULL;
+}
+
+int main(int argc, char **argv)
+{
+	pthread_t thr;
+	int ret = RET_PASS;
+	int res;
+	int c;
+
+	while ((c = getopt(argc, argv, "chv:")) != -1) {
+		switch (c) {
+		case 'c':
+			log_color(1);
+			break;
+		case 'h':
+			usage(basename(argv[0]));
+			exit(0);
+		case 'v':
+			log_verbosity(atoi(optarg));
+			break;
+		default:
+			usage(basename(argv[0]));
+			exit(1);
+		}
+	}
+
+	printf("%s: Test the futex value of private file mappings in FUTEX_WAIT\n",
+	       basename(argv[0]));
+
+	ret = pthread_create(&thr, NULL, thr_futex_wait, NULL);
+	if (ret < 0) {
+		fprintf(stderr, "pthread_create error\n");
+		ret = RET_ERROR;
+		goto out;
+	}
+
+	info("wait a while\n");
+	usleep(WAKE_WAIT_US);
+	val = 2;
+	res = futex_wake(&val, 1, 0);
+	info("futex_wake %d\n", res);
+	if (res != 1) {
+		fail("FUTEX_WAKE didn't find the waiting thread.\n");
+		ret = RET_FAIL;
+	}
+
+	info("join\n");
+	pthread_join(thr, NULL);
+
+ out:
+	print_result(ret);
+	return ret;
+}
diff --git a/tools/testing/selftests/futex/functional/futex_wait_timeout.c b/tools/testing/selftests/futex/functional/futex_wait_timeout.c
new file mode 100644
index 000000000000..ab428ca894de
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/futex_wait_timeout.c
@@ -0,0 +1,86 @@
+/******************************************************************************
+ *
+ *   Copyright © International Business Machines  Corp., 2009
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ * DESCRIPTION
+ *      Block on a futex and wait for timeout.
+ *
+ * AUTHOR
+ *      Darren Hart <dvhart@linux.intel.com>
+ *
+ * HISTORY
+ *      2009-Nov-6: Initial version by Darren Hart <dvhart@linux.intel.com>
+ *
+ *****************************************************************************/
+
+#include <errno.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include "futextest.h"
+#include "logging.h"
+
+static long timeout_ns = 100000;	/* 100us default timeout */
+
+void usage(char *prog)
+{
+	printf("Usage: %s\n", prog);
+	printf("  -c	Use color\n");
+	printf("  -h	Display this help message\n");
+	printf("  -t N	Timeout in nanoseconds (default: 100,000)\n");
+	printf("  -v L	Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
+	       VQUIET, VCRITICAL, VINFO);
+}
+
+int main(int argc, char *argv[])
+{
+	futex_t f1 = FUTEX_INITIALIZER;
+	struct timespec to;
+	int res, ret = RET_PASS;
+	int c;
+
+	while ((c = getopt(argc, argv, "cht:v:")) != -1) {
+		switch (c) {
+		case 'c':
+			log_color(1);
+			break;
+		case 'h':
+			usage(basename(argv[0]));
+			exit(0);
+		case 't':
+			timeout_ns = atoi(optarg);
+			break;
+		case 'v':
+			log_verbosity(atoi(optarg));
+			break;
+		default:
+			usage(basename(argv[0]));
+			exit(1);
+		}
+	}
+
+	printf("%s: Block on a futex and wait for timeout\n",
+	       basename(argv[0]));
+	printf("\tArguments: timeout=%ldns\n", timeout_ns);
+
+	/* initialize timeout */
+	to.tv_sec = 0;
+	to.tv_nsec = timeout_ns;
+
+	info("Calling futex_wait on f1: %u @ %p\n", f1, &f1);
+	res = futex_wait(&f1, f1, &to, FUTEX_PRIVATE_FLAG);
+	if (!res || errno != ETIMEDOUT) {
+		fail("futex_wait returned %d\n", ret < 0 ? errno : ret);
+		ret = RET_FAIL;
+	}
+
+	print_result(ret);
+	return ret;
+}
diff --git a/tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c b/tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c
new file mode 100644
index 000000000000..fe7aee96844b
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c
@@ -0,0 +1,124 @@
+/******************************************************************************
+ *
+ * Copyright FUJITSU LIMITED 2010
+ * Copyright KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ * DESCRIPTION
+ *      Wait on uninitialized heap. It shold be zero and FUTEX_WAIT should
+ *      return immediately. This test is intent to test zero page handling in
+ *      futex.
+ *
+ * AUTHOR
+ *      KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+ *
+ * HISTORY
+ *      2010-Jan-6: Initial version by KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+ *
+ *****************************************************************************/
+
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <syscall.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <errno.h>
+#include <linux/futex.h>
+#include <libgen.h>
+
+#include "logging.h"
+#include "futextest.h"
+
+#define WAIT_US 5000000
+
+static int child_blocked = 1;
+static int child_ret;
+void *buf;
+
+void usage(char *prog)
+{
+	printf("Usage: %s\n", prog);
+	printf("  -c	Use color\n");
+	printf("  -h	Display this help message\n");
+	printf("  -v L	Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
+	       VQUIET, VCRITICAL, VINFO);
+}
+
+void *wait_thread(void *arg)
+{
+	int res;
+
+	child_ret = RET_PASS;
+	res = futex_wait(buf, 1, NULL, 0);
+	child_blocked = 0;
+
+	if (res != 0 && errno != EWOULDBLOCK) {
+		error("futex failure\n", errno);
+		child_ret = RET_ERROR;
+	}
+	pthread_exit(NULL);
+}
+
+int main(int argc, char **argv)
+{
+	int c, ret = RET_PASS;
+	long page_size;
+	pthread_t thr;
+
+	while ((c = getopt(argc, argv, "chv:")) != -1) {
+		switch (c) {
+		case 'c':
+			log_color(1);
+			break;
+		case 'h':
+			usage(basename(argv[0]));
+			exit(0);
+		case 'v':
+			log_verbosity(atoi(optarg));
+			break;
+		default:
+			usage(basename(argv[0]));
+			exit(1);
+		}
+	}
+
+	page_size = sysconf(_SC_PAGESIZE);
+
+	buf = mmap(NULL, page_size, PROT_READ|PROT_WRITE,
+		   MAP_PRIVATE|MAP_ANONYMOUS, 0, 0);
+	if (buf == (void *)-1) {
+		error("mmap\n", errno);
+		exit(1);
+	}
+
+	printf("%s: Test the uninitialized futex value in FUTEX_WAIT\n",
+	       basename(argv[0]));
+
+
+	ret = pthread_create(&thr, NULL, wait_thread, NULL);
+	if (ret) {
+		error("pthread_create\n", errno);
+		ret = RET_ERROR;
+		goto out;
+	}
+
+	info("waiting %dus for child to return\n", WAIT_US);
+	usleep(WAIT_US);
+
+	ret = child_ret;
+	if (child_blocked) {
+		fail("child blocked in kernel\n");
+		ret = RET_FAIL;
+	}
+
+ out:
+	print_result(ret);
+	return ret;
+}
diff --git a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c
new file mode 100644
index 000000000000..b6b027448825
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c
@@ -0,0 +1,79 @@
+/******************************************************************************
+ *
+ *   Copyright © International Business Machines  Corp., 2009
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ * DESCRIPTION
+ *      Test if FUTEX_WAIT op returns -EWOULDBLOCK if the futex value differs
+ *      from the expected one.
+ *
+ * AUTHOR
+ *      Gowrishankar <gowrishankar.m@in.ibm.com>
+ *
+ * HISTORY
+ *      2009-Nov-14: Initial version by Gowrishankar <gowrishankar.m@in.ibm.com>
+ *
+ *****************************************************************************/
+
+#include <errno.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include "futextest.h"
+#include "logging.h"
+
+#define timeout_ns 100000
+
+void usage(char *prog)
+{
+	printf("Usage: %s\n", prog);
+	printf("  -c	Use color\n");
+	printf("  -h	Display this help message\n");
+	printf("  -v L	Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
+	       VQUIET, VCRITICAL, VINFO);
+}
+
+int main(int argc, char *argv[])
+{
+	struct timespec to = {.tv_sec = 0, .tv_nsec = timeout_ns};
+	futex_t f1 = FUTEX_INITIALIZER;
+	int res, ret = RET_PASS;
+	int c;
+
+	while ((c = getopt(argc, argv, "cht:v:")) != -1) {
+		switch (c) {
+		case 'c':
+			log_color(1);
+			break;
+		case 'h':
+			usage(basename(argv[0]));
+			exit(0);
+		case 'v':
+			log_verbosity(atoi(optarg));
+			break;
+		default:
+			usage(basename(argv[0]));
+			exit(1);
+		}
+	}
+
+	printf("%s: Test the unexpected futex value in FUTEX_WAIT\n",
+	       basename(argv[0]));
+
+	info("Calling futex_wait on f1: %u @ %p with val=%u\n", f1, &f1, f1+1);
+	res = futex_wait(&f1, f1+1, &to, FUTEX_PRIVATE_FLAG);
+	if (!res || errno != EWOULDBLOCK) {
+		fail("futex_wait returned: %d %s\n",
+		     res ? errno : res, res ? strerror(errno) : "");
+		ret = RET_FAIL;
+	}
+
+	print_result(ret);
+	return ret;
+}
diff --git a/tools/testing/selftests/futex/functional/run.sh b/tools/testing/selftests/futex/functional/run.sh
new file mode 100755
index 000000000000..e87dbe2a0b0d
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/run.sh
@@ -0,0 +1,79 @@
+#!/bin/sh
+
+###############################################################################
+#
+#   Copyright © International Business Machines  Corp., 2009
+#
+#   This program is free software;  you can redistribute it and/or modify
+#   it under the terms of the GNU General Public License as published by
+#   the Free Software Foundation; either version 2 of the License, or
+#   (at your option) any later version.
+#
+# DESCRIPTION
+#      Run tests in the current directory.
+#
+# AUTHOR
+#      Darren Hart <dvhart@linux.intel.com>
+#
+# HISTORY
+#      2009-Nov-9: Initial version by Darren Hart <dvhart@linux.intel.com>
+#      2010-Jan-6: Add futex_wait_uninitialized_heap and futex_wait_private_mapped_file
+#                  by KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
+#
+###############################################################################
+
+# Test for a color capable console
+if [ -z "$USE_COLOR" ]; then
+    tput setf 7
+    if [ $? -eq 0 ]; then
+        USE_COLOR=1
+        tput sgr0
+    fi
+fi
+if [ "$USE_COLOR" -eq 1 ]; then
+    COLOR="-c"
+fi
+
+
+echo
+# requeue pi testing
+# without timeouts
+./futex_requeue_pi $COLOR
+./futex_requeue_pi $COLOR -b
+./futex_requeue_pi $COLOR -b -l
+./futex_requeue_pi $COLOR -b -o
+./futex_requeue_pi $COLOR -l
+./futex_requeue_pi $COLOR -o
+# with timeouts
+./futex_requeue_pi $COLOR -b -l -t 5000
+./futex_requeue_pi $COLOR -l -t 5000
+./futex_requeue_pi $COLOR -b -l -t 500000
+./futex_requeue_pi $COLOR -l -t 500000
+./futex_requeue_pi $COLOR -b -t 5000
+./futex_requeue_pi $COLOR -t 5000
+./futex_requeue_pi $COLOR -b -t 500000
+./futex_requeue_pi $COLOR -t 500000
+./futex_requeue_pi $COLOR -b -o -t 5000
+./futex_requeue_pi $COLOR -l -t 5000
+./futex_requeue_pi $COLOR -b -o -t 500000
+./futex_requeue_pi $COLOR -l -t 500000
+# with long timeout
+./futex_requeue_pi $COLOR -b -l -t 2000000000
+./futex_requeue_pi $COLOR -l -t 2000000000
+
+
+echo
+./futex_requeue_pi_mismatched_ops $COLOR
+
+echo
+./futex_requeue_pi_signal_restart $COLOR
+
+echo
+./futex_wait_timeout $COLOR
+
+echo
+./futex_wait_wouldblock $COLOR
+
+echo
+./futex_wait_uninitialized_heap $COLOR
+./futex_wait_private_mapped_file $COLOR
diff --git a/tools/testing/selftests/futex/include/atomic.h b/tools/testing/selftests/futex/include/atomic.h
new file mode 100644
index 000000000000..f861da3e31ab
--- /dev/null
+++ b/tools/testing/selftests/futex/include/atomic.h
@@ -0,0 +1,83 @@
+/******************************************************************************
+ *
+ *   Copyright © International Business Machines  Corp., 2009
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ * DESCRIPTION
+ *      GCC atomic builtin wrappers
+ *      http://gcc.gnu.org/onlinedocs/gcc-4.1.0/gcc/Atomic-Builtins.html
+ *
+ * AUTHOR
+ *      Darren Hart <dvhart@linux.intel.com>
+ *
+ * HISTORY
+ *      2009-Nov-17: Initial version by Darren Hart <dvhart@linux.intel.com>
+ *
+ *****************************************************************************/
+
+#ifndef _ATOMIC_H
+#define _ATOMIC_H
+
+typedef struct {
+	volatile int val;
+} atomic_t;
+
+#define ATOMIC_INITIALIZER { 0 }
+
+/**
+ * atomic_cmpxchg() - Atomic compare and exchange
+ * @uaddr:	The address of the futex to be modified
+ * @oldval:	The expected value of the futex
+ * @newval:	The new value to try and assign the futex
+ *
+ * Return the old value of addr->val.
+ */
+static inline int
+atomic_cmpxchg(atomic_t *addr, int oldval, int newval)
+{
+	return __sync_val_compare_and_swap(&addr->val, oldval, newval);
+}
+
+/**
+ * atomic_inc() - Atomic incrememnt
+ * @addr:	Address of the variable to increment
+ *
+ * Return the new value of addr->val.
+ */
+static inline int
+atomic_inc(atomic_t *addr)
+{
+	return __sync_add_and_fetch(&addr->val, 1);
+}
+
+/**
+ * atomic_dec() - Atomic decrement
+ * @addr:	Address of the variable to decrement
+ *
+ * Return the new value of addr-val.
+ */
+static inline int
+atomic_dec(atomic_t *addr)
+{
+	return __sync_sub_and_fetch(&addr->val, 1);
+}
+
+/**
+ * atomic_set() - Atomic set
+ * @addr:	Address of the variable to set
+ * @newval:	New value for the atomic_t
+ *
+ * Return the new value of addr->val.
+ */
+static inline int
+atomic_set(atomic_t *addr, int newval)
+{
+	addr->val = newval;
+	return newval;
+}
+
+#endif
diff --git a/tools/testing/selftests/futex/include/futextest.h b/tools/testing/selftests/futex/include/futextest.h
new file mode 100644
index 000000000000..b98c3aba7102
--- /dev/null
+++ b/tools/testing/selftests/futex/include/futextest.h
@@ -0,0 +1,266 @@
+/******************************************************************************
+ *
+ *   Copyright © International Business Machines  Corp., 2009
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ * DESCRIPTION
+ *      Glibc independent futex library for testing kernel functionality.
+ *
+ * AUTHOR
+ *      Darren Hart <dvhart@linux.intel.com>
+ *
+ * HISTORY
+ *      2009-Nov-6: Initial version by Darren Hart <dvhart@linux.intel.com>
+ *
+ *****************************************************************************/
+
+#ifndef _FUTEXTEST_H
+#define _FUTEXTEST_H
+
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <linux/futex.h>
+
+typedef volatile u_int32_t futex_t;
+#define FUTEX_INITIALIZER 0
+
+/* Define the newer op codes if the system header file is not up to date. */
+#ifndef FUTEX_WAIT_BITSET
+#define FUTEX_WAIT_BITSET		9
+#endif
+#ifndef FUTEX_WAKE_BITSET
+#define FUTEX_WAKE_BITSET		10
+#endif
+#ifndef FUTEX_WAIT_REQUEUE_PI
+#define FUTEX_WAIT_REQUEUE_PI		11
+#endif
+#ifndef FUTEX_CMP_REQUEUE_PI
+#define FUTEX_CMP_REQUEUE_PI		12
+#endif
+#ifndef FUTEX_WAIT_REQUEUE_PI_PRIVATE
+#define FUTEX_WAIT_REQUEUE_PI_PRIVATE	(FUTEX_WAIT_REQUEUE_PI | \
+					 FUTEX_PRIVATE_FLAG)
+#endif
+#ifndef FUTEX_REQUEUE_PI_PRIVATE
+#define FUTEX_CMP_REQUEUE_PI_PRIVATE	(FUTEX_CMP_REQUEUE_PI | \
+					 FUTEX_PRIVATE_FLAG)
+#endif
+
+/**
+ * futex() - SYS_futex syscall wrapper
+ * @uaddr:	address of first futex
+ * @op:		futex op code
+ * @val:	typically expected value of uaddr, but varies by op
+ * @timeout:	typically an absolute struct timespec (except where noted
+ *              otherwise). Overloaded by some ops
+ * @uaddr2:	address of second futex for some ops\
+ * @val3:	varies by op
+ * @opflags:	flags to be bitwise OR'd with op, such as FUTEX_PRIVATE_FLAG
+ *
+ * futex() is used by all the following futex op wrappers. It can also be
+ * used for misuse and abuse testing. Generally, the specific op wrappers
+ * should be used instead. It is a macro instead of an static inline function as
+ * some of the types over overloaded (timeout is used for nr_requeue for
+ * example).
+ *
+ * These argument descriptions are the defaults for all
+ * like-named arguments in the following wrappers except where noted below.
+ */
+#define futex(uaddr, op, val, timeout, uaddr2, val3, opflags) \
+	syscall(SYS_futex, uaddr, op | opflags, val, timeout, uaddr2, val3)
+
+/**
+ * futex_wait() - block on uaddr with optional timeout
+ * @timeout:	relative timeout
+ */
+static inline int
+futex_wait(futex_t *uaddr, futex_t val, struct timespec *timeout, int opflags)
+{
+	return futex(uaddr, FUTEX_WAIT, val, timeout, NULL, 0, opflags);
+}
+
+/**
+ * futex_wake() - wake one or more tasks blocked on uaddr
+ * @nr_wake:	wake up to this many tasks
+ */
+static inline int
+futex_wake(futex_t *uaddr, int nr_wake, int opflags)
+{
+	return futex(uaddr, FUTEX_WAKE, nr_wake, NULL, NULL, 0, opflags);
+}
+
+/**
+ * futex_wait_bitset() - block on uaddr with bitset
+ * @bitset:	bitset to be used with futex_wake_bitset
+ */
+static inline int
+futex_wait_bitset(futex_t *uaddr, futex_t val, struct timespec *timeout,
+		  u_int32_t bitset, int opflags)
+{
+	return futex(uaddr, FUTEX_WAIT_BITSET, val, timeout, NULL, bitset,
+		     opflags);
+}
+
+/**
+ * futex_wake_bitset() - wake one or more tasks blocked on uaddr with bitset
+ * @bitset:	bitset to compare with that used in futex_wait_bitset
+ */
+static inline int
+futex_wake_bitset(futex_t *uaddr, int nr_wake, u_int32_t bitset, int opflags)
+{
+	return futex(uaddr, FUTEX_WAKE_BITSET, nr_wake, NULL, NULL, bitset,
+		     opflags);
+}
+
+/**
+ * futex_lock_pi() - block on uaddr as a PI mutex
+ * @detect:	whether (1) or not (0) to perform deadlock detection
+ */
+static inline int
+futex_lock_pi(futex_t *uaddr, struct timespec *timeout, int detect,
+	      int opflags)
+{
+	return futex(uaddr, FUTEX_LOCK_PI, detect, timeout, NULL, 0, opflags);
+}
+
+/**
+ * futex_unlock_pi() - release uaddr as a PI mutex, waking the top waiter
+ */
+static inline int
+futex_unlock_pi(futex_t *uaddr, int opflags)
+{
+	return futex(uaddr, FUTEX_UNLOCK_PI, 0, NULL, NULL, 0, opflags);
+}
+
+/**
+ * futex_wake_op() - FIXME: COME UP WITH A GOOD ONE LINE DESCRIPTION
+ */
+static inline int
+futex_wake_op(futex_t *uaddr, futex_t *uaddr2, int nr_wake, int nr_wake2,
+	      int wake_op, int opflags)
+{
+	return futex(uaddr, FUTEX_WAKE_OP, nr_wake, nr_wake2, uaddr2, wake_op,
+		     opflags);
+}
+
+/**
+ * futex_requeue() - requeue without expected value comparison, deprecated
+ * @nr_wake:	wake up to this many tasks
+ * @nr_requeue:	requeue up to this many tasks
+ *
+ * Due to its inherently racy implementation, futex_requeue() is deprecated in
+ * favor of futex_cmp_requeue().
+ */
+static inline int
+futex_requeue(futex_t *uaddr, futex_t *uaddr2, int nr_wake, int nr_requeue,
+	      int opflags)
+{
+	return futex(uaddr, FUTEX_REQUEUE, nr_wake, nr_requeue, uaddr2, 0,
+		     opflags);
+}
+
+/**
+ * futex_cmp_requeue() - requeue tasks from uaddr to uaddr2
+ * @nr_wake:	wake up to this many tasks
+ * @nr_requeue:	requeue up to this many tasks
+ */
+static inline int
+futex_cmp_requeue(futex_t *uaddr, futex_t val, futex_t *uaddr2, int nr_wake,
+		  int nr_requeue, int opflags)
+{
+	return futex(uaddr, FUTEX_CMP_REQUEUE, nr_wake, nr_requeue, uaddr2,
+		     val, opflags);
+}
+
+/**
+ * futex_wait_requeue_pi() - block on uaddr and prepare to requeue to uaddr2
+ * @uaddr:	non-PI futex source
+ * @uaddr2:	PI futex target
+ *
+ * This is the first half of the requeue_pi mechanism. It shall always be
+ * paired with futex_cmp_requeue_pi().
+ */
+static inline int
+futex_wait_requeue_pi(futex_t *uaddr, futex_t val, futex_t *uaddr2,
+		      struct timespec *timeout, int opflags)
+{
+	return futex(uaddr, FUTEX_WAIT_REQUEUE_PI, val, timeout, uaddr2, 0,
+		     opflags);
+}
+
+/**
+ * futex_cmp_requeue_pi() - requeue tasks from uaddr to uaddr2 (PI aware)
+ * @uaddr:	non-PI futex source
+ * @uaddr2:	PI futex target
+ * @nr_wake:	wake up to this many tasks
+ * @nr_requeue:	requeue up to this many tasks
+ */
+static inline int
+futex_cmp_requeue_pi(futex_t *uaddr, futex_t val, futex_t *uaddr2, int nr_wake,
+		     int nr_requeue, int opflags)
+{
+	return futex(uaddr, FUTEX_CMP_REQUEUE_PI, nr_wake, nr_requeue, uaddr2,
+		     val, opflags);
+}
+
+/**
+ * futex_cmpxchg() - atomic compare and exchange
+ * @uaddr:	The address of the futex to be modified
+ * @oldval:	The expected value of the futex
+ * @newval:	The new value to try and assign the futex
+ *
+ * Implement cmpxchg using gcc atomic builtins.
+ * http://gcc.gnu.org/onlinedocs/gcc-4.1.0/gcc/Atomic-Builtins.html
+ *
+ * Return the old futex value.
+ */
+static inline u_int32_t
+futex_cmpxchg(futex_t *uaddr, u_int32_t oldval, u_int32_t newval)
+{
+	return __sync_val_compare_and_swap(uaddr, oldval, newval);
+}
+
+/**
+ * futex_dec() - atomic decrement of the futex value
+ * @uaddr:	The address of the futex to be modified
+ *
+ * Return the new futex value.
+ */
+static inline u_int32_t
+futex_dec(futex_t *uaddr)
+{
+	return __sync_sub_and_fetch(uaddr, 1);
+}
+
+/**
+ * futex_inc() - atomic increment of the futex value
+ * @uaddr:	the address of the futex to be modified
+ *
+ * Return the new futex value.
+ */
+static inline u_int32_t
+futex_inc(futex_t *uaddr)
+{
+	return __sync_add_and_fetch(uaddr, 1);
+}
+
+/**
+ * futex_set() - atomic decrement of the futex value
+ * @uaddr:	the address of the futex to be modified
+ * @newval:	New value for the atomic_t
+ *
+ * Return the new futex value.
+ */
+static inline u_int32_t
+futex_set(futex_t *uaddr, u_int32_t newval)
+{
+	*uaddr = newval;
+	return newval;
+}
+
+#endif
diff --git a/tools/testing/selftests/futex/include/logging.h b/tools/testing/selftests/futex/include/logging.h
new file mode 100644
index 000000000000..014aa01197af
--- /dev/null
+++ b/tools/testing/selftests/futex/include/logging.h
@@ -0,0 +1,153 @@
+/******************************************************************************
+ *
+ *   Copyright © International Business Machines  Corp., 2009
+ *
+ *   This program is free software;  you can redistribute it and/or modify
+ *   it under the terms of the GNU General Public License as published by
+ *   the Free Software Foundation; either version 2 of the License, or
+ *   (at your option) any later version.
+ *
+ * DESCRIPTION
+ *      Glibc independent futex library for testing kernel functionality.
+ *
+ * AUTHOR
+ *      Darren Hart <dvhart@linux.intel.com>
+ *
+ * HISTORY
+ *      2009-Nov-6: Initial version by Darren Hart <dvhart@linux.intel.com>
+ *
+ *****************************************************************************/
+
+#ifndef _LOGGING_H
+#define _LOGGING_H
+
+#include <string.h>
+#include <unistd.h>
+#include <linux/futex.h>
+#include "kselftest.h"
+
+/*
+ * Define PASS, ERROR, and FAIL strings with and without color escape
+ * sequences, default to no color.
+ */
+#define ESC 0x1B, '['
+#define BRIGHT '1'
+#define GREEN '3', '2'
+#define YELLOW '3', '3'
+#define RED '3', '1'
+#define ESCEND 'm'
+#define BRIGHT_GREEN ESC, BRIGHT, ';', GREEN, ESCEND
+#define BRIGHT_YELLOW ESC, BRIGHT, ';', YELLOW, ESCEND
+#define BRIGHT_RED ESC, BRIGHT, ';', RED, ESCEND
+#define RESET_COLOR ESC, '0', 'm'
+static const char PASS_COLOR[] = {BRIGHT_GREEN, ' ', 'P', 'A', 'S', 'S',
+				  RESET_COLOR, 0};
+static const char ERROR_COLOR[] = {BRIGHT_YELLOW, 'E', 'R', 'R', 'O', 'R',
+				   RESET_COLOR, 0};
+static const char FAIL_COLOR[] = {BRIGHT_RED, ' ', 'F', 'A', 'I', 'L',
+				  RESET_COLOR, 0};
+static const char INFO_NORMAL[] = " INFO";
+static const char PASS_NORMAL[] = " PASS";
+static const char ERROR_NORMAL[] = "ERROR";
+static const char FAIL_NORMAL[] = " FAIL";
+const char *INFO = INFO_NORMAL;
+const char *PASS = PASS_NORMAL;
+const char *ERROR = ERROR_NORMAL;
+const char *FAIL = FAIL_NORMAL;
+
+/* Verbosity setting for INFO messages */
+#define VQUIET    0
+#define VCRITICAL 1
+#define VINFO     2
+#define VMAX      VINFO
+int _verbose = VCRITICAL;
+
+/* Functional test return codes */
+#define RET_PASS   0
+#define RET_ERROR -1
+#define RET_FAIL  -2
+
+/**
+ * log_color() - Use colored output for PASS, ERROR, and FAIL strings
+ * @use_color:	use color (1) or not (0)
+ */
+void log_color(int use_color)
+{
+	if (use_color) {
+		PASS = PASS_COLOR;
+		ERROR = ERROR_COLOR;
+		FAIL = FAIL_COLOR;
+	} else {
+		PASS = PASS_NORMAL;
+		ERROR = ERROR_NORMAL;
+		FAIL = FAIL_NORMAL;
+	}
+}
+
+/**
+ * log_verbosity() - Set verbosity of test output
+ * @verbose:	Enable (1) verbose output or not (0)
+ *
+ * Currently setting verbose=1 will enable INFO messages and 0 will disable
+ * them. FAIL and ERROR messages are always displayed.
+ */
+void log_verbosity(int level)
+{
+	if (level > VMAX)
+		level = VMAX;
+	else if (level < 0)
+		level = 0;
+	_verbose = level;
+}
+
+/**
+ * print_result() - Print standard PASS | ERROR | FAIL results
+ * @ret:	the return value to be considered: 0 | RET_ERROR | RET_FAIL
+ *
+ * print_result() is primarily intended for functional tests.
+ */
+void print_result(int ret)
+{
+	const char *result = "Unknown return code";
+
+	switch (ret) {
+	case RET_PASS:
+		ksft_inc_pass_cnt();
+		result = PASS;
+		break;
+	case RET_ERROR:
+		result = ERROR;
+		break;
+	case RET_FAIL:
+		ksft_inc_fail_cnt();
+		result = FAIL;
+		break;
+	}
+	printf("Result: %s\n", result);
+}
+
+/* log level macros */
+#define info(message, vargs...) \
+do { \
+	if (_verbose >= VINFO) \
+		fprintf(stderr, "\t%s: "message, INFO, ##vargs); \
+} while (0)
+
+#define error(message, err, args...) \
+do { \
+	if (_verbose >= VCRITICAL) {\
+		if (err) \
+			fprintf(stderr, "\t%s: %s: "message, \
+				ERROR, strerror(err), ##args); \
+		else \
+			fprintf(stderr, "\t%s: "message, ERROR, ##args); \
+	} \
+} while (0)
+
+#define fail(message, args...) \
+do { \
+	if (_verbose >= VCRITICAL) \
+		fprintf(stderr, "\t%s: "message, FAIL, ##args); \
+} while (0)
+
+#endif
diff --git a/tools/testing/selftests/futex/run.sh b/tools/testing/selftests/futex/run.sh
new file mode 100755
index 000000000000..4126312ad64e
--- /dev/null
+++ b/tools/testing/selftests/futex/run.sh
@@ -0,0 +1,33 @@
+#!/bin/sh
+
+###############################################################################
+#
+#   Copyright © International Business Machines  Corp., 2009
+#
+#   This program is free software;  you can redistribute it and/or modify
+#   it under the terms of the GNU General Public License as published by
+#   the Free Software Foundation; either version 2 of the License, or
+#   (at your option) any later version.
+#
+# DESCRIPTION
+#      Run all tests under the functional, performance, and stress directories.
+#      Format and summarize the results.
+#
+# AUTHOR
+#      Darren Hart <dvhart@linux.intel.com>
+#
+# HISTORY
+#      2009-Nov-9: Initial version by Darren Hart <dvhart@linux.intel.com>
+#
+###############################################################################
+
+# Test for a color capable shell and pass the result to the subdir scripts
+USE_COLOR=0
+tput setf 7
+if [ $? -eq 0 ]; then
+    USE_COLOR=1
+    tput sgr0
+fi
+export USE_COLOR
+
+(cd functional; ./run.sh)
diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h
index 572c8888167a..ef1c80d67ac7 100644
--- a/tools/testing/selftests/kselftest.h
+++ b/tools/testing/selftests/kselftest.h
@@ -13,6 +13,13 @@
 #include <stdlib.h>
 #include <unistd.h>
 
+/* define kselftest exit codes */
+#define KSFT_PASS  0
+#define KSFT_FAIL  1
+#define KSFT_XFAIL 2
+#define KSFT_XPASS 3
+#define KSFT_SKIP  4
+
 /* counters */
 struct ksft_count {
 	unsigned int ksft_pass;
@@ -40,23 +47,23 @@ static inline void ksft_print_cnts(void)
 
 static inline int ksft_exit_pass(void)
 {
-	exit(0);
+	exit(KSFT_PASS);
 }
 static inline int ksft_exit_fail(void)
 {
-	exit(1);
+	exit(KSFT_FAIL);
 }
 static inline int ksft_exit_xfail(void)
 {
-	exit(2);
+	exit(KSFT_XFAIL);
 }
 static inline int ksft_exit_xpass(void)
 {
-	exit(3);
+	exit(KSFT_XPASS);
 }
 static inline int ksft_exit_skip(void)
 {
-	exit(4);
+	exit(KSFT_SKIP);
 }
 
 #endif /* __KSELFTEST_H */
diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index 2194155ae62a..ee412bab7ed4 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -13,6 +13,9 @@ run_tests: all
 
 define INSTALL_RULE
 	mkdir -p $(INSTALL_PATH)
+	@for TEST_DIR in $(TEST_DIRS); do\
+		cp -r $$TEST_DIR $(INSTALL_PATH); \
+	done;
 	install -t $(INSTALL_PATH) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES)
 endef
 
diff --git a/tools/testing/selftests/mount/Makefile b/tools/testing/selftests/mount/Makefile
index 95580a97326e..5e35c9c50b72 100644
--- a/tools/testing/selftests/mount/Makefile
+++ b/tools/testing/selftests/mount/Makefile
@@ -9,7 +9,12 @@ unprivileged-remount-test: unprivileged-remount-test.c
 include ../lib.mk
 
 TEST_PROGS := unprivileged-remount-test
-override RUN_TESTS := if [ -f /proc/self/uid_map ] ; then ./unprivileged-remount-test ; fi
+override RUN_TESTS := if [ -f /proc/self/uid_map ] ; \
+		      then	\
+				./unprivileged-remount-test ; \
+		      else	\
+				echo "WARN: No /proc/self/uid_map exist, test skipped." ; \
+		      fi
 override EMIT_TESTS := echo "$(RUN_TESTS)"
 
 clean:
diff --git a/tools/testing/selftests/seccomp/.gitignore b/tools/testing/selftests/seccomp/.gitignore
new file mode 100644
index 000000000000..346d83ca8069
--- /dev/null
+++ b/tools/testing/selftests/seccomp/.gitignore
@@ -0,0 +1 @@
+seccomp_bpf
diff --git a/tools/testing/selftests/seccomp/Makefile b/tools/testing/selftests/seccomp/Makefile
new file mode 100644
index 000000000000..8401e87e34e1
--- /dev/null
+++ b/tools/testing/selftests/seccomp/Makefile
@@ -0,0 +1,10 @@
+TEST_PROGS := seccomp_bpf
+CFLAGS += -Wl,-no-as-needed -Wall
+LDFLAGS += -lpthread
+
+all: $(TEST_PROGS)
+
+include ../lib.mk
+
+clean:
+	$(RM) $(TEST_PROGS)
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
new file mode 100644
index 000000000000..c5abe7fd7590
--- /dev/null
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -0,0 +1,2109 @@
+/*
+ * Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
+ * Use of this source code is governed by the GPLv2 license.
+ *
+ * Test code for seccomp bpf.
+ */
+
+#include <asm/siginfo.h>
+#define __have_siginfo_t 1
+#define __have_sigval_t 1
+#define __have_sigevent_t 1
+
+#include <errno.h>
+#include <linux/filter.h>
+#include <sys/prctl.h>
+#include <sys/ptrace.h>
+#include <sys/user.h>
+#include <linux/prctl.h>
+#include <linux/ptrace.h>
+#include <linux/seccomp.h>
+#include <poll.h>
+#include <pthread.h>
+#include <semaphore.h>
+#include <signal.h>
+#include <stddef.h>
+#include <stdbool.h>
+#include <string.h>
+#include <linux/elf.h>
+#include <sys/uio.h>
+
+#define _GNU_SOURCE
+#include <unistd.h>
+#include <sys/syscall.h>
+
+#include "test_harness.h"
+
+#ifndef PR_SET_PTRACER
+# define PR_SET_PTRACER 0x59616d61
+#endif
+
+#ifndef PR_SET_NO_NEW_PRIVS
+#define PR_SET_NO_NEW_PRIVS 38
+#define PR_GET_NO_NEW_PRIVS 39
+#endif
+
+#ifndef PR_SECCOMP_EXT
+#define PR_SECCOMP_EXT 43
+#endif
+
+#ifndef SECCOMP_EXT_ACT
+#define SECCOMP_EXT_ACT 1
+#endif
+
+#ifndef SECCOMP_EXT_ACT_TSYNC
+#define SECCOMP_EXT_ACT_TSYNC 1
+#endif
+
+#ifndef SECCOMP_MODE_STRICT
+#define SECCOMP_MODE_STRICT 1
+#endif
+
+#ifndef SECCOMP_MODE_FILTER
+#define SECCOMP_MODE_FILTER 2
+#endif
+
+#ifndef SECCOMP_RET_KILL
+#define SECCOMP_RET_KILL        0x00000000U /* kill the task immediately */
+#define SECCOMP_RET_TRAP        0x00030000U /* disallow and force a SIGSYS */
+#define SECCOMP_RET_ERRNO       0x00050000U /* returns an errno */
+#define SECCOMP_RET_TRACE       0x7ff00000U /* pass to a tracer or disallow */
+#define SECCOMP_RET_ALLOW       0x7fff0000U /* allow */
+
+/* Masks for the return value sections. */
+#define SECCOMP_RET_ACTION      0x7fff0000U
+#define SECCOMP_RET_DATA        0x0000ffffU
+
+struct seccomp_data {
+	int nr;
+	__u32 arch;
+	__u64 instruction_pointer;
+	__u64 args[6];
+};
+#endif
+
+#define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]))
+
+#define SIBLING_EXIT_UNKILLED	0xbadbeef
+#define SIBLING_EXIT_FAILURE	0xbadface
+#define SIBLING_EXIT_NEWPRIVS	0xbadfeed
+
+TEST(mode_strict_support)
+{
+	long ret;
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL);
+	ASSERT_EQ(0, ret) {
+		TH_LOG("Kernel does not support CONFIG_SECCOMP");
+	}
+	syscall(__NR_exit, 1);
+}
+
+TEST_SIGNAL(mode_strict_cannot_call_prctl, SIGKILL)
+{
+	long ret;
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL);
+	ASSERT_EQ(0, ret) {
+		TH_LOG("Kernel does not support CONFIG_SECCOMP");
+	}
+	syscall(__NR_prctl, PR_SET_SECCOMP, SECCOMP_MODE_FILTER,
+		NULL, NULL, NULL);
+	EXPECT_FALSE(true) {
+		TH_LOG("Unreachable!");
+	}
+}
+
+/* Note! This doesn't test no new privs behavior */
+TEST(no_new_privs_support)
+{
+	long ret;
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	EXPECT_EQ(0, ret) {
+		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+	}
+}
+
+/* Tests kernel support by checking for a copy_from_user() fault on * NULL. */
+TEST(mode_filter_support)
+{
+	long ret;
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
+	ASSERT_EQ(0, ret) {
+		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+	}
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, NULL, NULL, NULL);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(EFAULT, errno) {
+		TH_LOG("Kernel does not support CONFIG_SECCOMP_FILTER!");
+	}
+}
+
+TEST(mode_filter_without_nnp)
+{
+	struct sock_filter filter[] = {
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+	struct sock_fprog prog = {
+		.len = (unsigned short)ARRAY_SIZE(filter),
+		.filter = filter,
+	};
+	long ret;
+
+	ret = prctl(PR_GET_NO_NEW_PRIVS, 0, NULL, 0, 0);
+	ASSERT_LE(0, ret) {
+		TH_LOG("Expected 0 or unsupported for NO_NEW_PRIVS");
+	}
+	errno = 0;
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
+	/* Succeeds with CAP_SYS_ADMIN, fails without */
+	/* TODO(wad) check caps not euid */
+	if (geteuid()) {
+		EXPECT_EQ(-1, ret);
+		EXPECT_EQ(EACCES, errno);
+	} else {
+		EXPECT_EQ(0, ret);
+	}
+}
+
+#define MAX_INSNS_PER_PATH 32768
+
+TEST(filter_size_limits)
+{
+	int i;
+	int count = BPF_MAXINSNS + 1;
+	struct sock_filter allow[] = {
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+	struct sock_filter *filter;
+	struct sock_fprog prog = { };
+	long ret;
+
+	filter = calloc(count, sizeof(*filter));
+	ASSERT_NE(NULL, filter);
+
+	for (i = 0; i < count; i++)
+		filter[i] = allow[0];
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	prog.filter = filter;
+	prog.len = count;
+
+	/* Too many filter instructions in a single filter. */
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
+	ASSERT_NE(0, ret) {
+		TH_LOG("Installing %d insn filter was allowed", prog.len);
+	}
+
+	/* One less is okay, though. */
+	prog.len -= 1;
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
+	ASSERT_EQ(0, ret) {
+		TH_LOG("Installing %d insn filter wasn't allowed", prog.len);
+	}
+}
+
+TEST(filter_chain_limits)
+{
+	int i;
+	int count = BPF_MAXINSNS;
+	struct sock_filter allow[] = {
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+	struct sock_filter *filter;
+	struct sock_fprog prog = { };
+	long ret;
+
+	filter = calloc(count, sizeof(*filter));
+	ASSERT_NE(NULL, filter);
+
+	for (i = 0; i < count; i++)
+		filter[i] = allow[0];
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	prog.filter = filter;
+	prog.len = 1;
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	prog.len = count;
+
+	/* Too many total filter instructions. */
+	for (i = 0; i < MAX_INSNS_PER_PATH; i++) {
+		ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
+		if (ret != 0)
+			break;
+	}
+	ASSERT_NE(0, ret) {
+		TH_LOG("Allowed %d %d-insn filters (total with penalties:%d)",
+		       i, count, i * (count + 4));
+	}
+}
+
+TEST(mode_filter_cannot_move_to_strict)
+{
+	struct sock_filter filter[] = {
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+	struct sock_fprog prog = {
+		.len = (unsigned short)ARRAY_SIZE(filter),
+		.filter = filter,
+	};
+	long ret;
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, 0, 0);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(EINVAL, errno);
+}
+
+
+TEST(mode_filter_get_seccomp)
+{
+	struct sock_filter filter[] = {
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+	struct sock_fprog prog = {
+		.len = (unsigned short)ARRAY_SIZE(filter),
+		.filter = filter,
+	};
+	long ret;
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
+	EXPECT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
+	EXPECT_EQ(2, ret);
+}
+
+
+TEST(ALLOW_all)
+{
+	struct sock_filter filter[] = {
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+	struct sock_fprog prog = {
+		.len = (unsigned short)ARRAY_SIZE(filter),
+		.filter = filter,
+	};
+	long ret;
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+	ASSERT_EQ(0, ret);
+}
+
+TEST(empty_prog)
+{
+	struct sock_filter filter[] = {
+	};
+	struct sock_fprog prog = {
+		.len = (unsigned short)ARRAY_SIZE(filter),
+		.filter = filter,
+	};
+	long ret;
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(EINVAL, errno);
+}
+
+TEST_SIGNAL(unknown_ret_is_kill_inside, SIGSYS)
+{
+	struct sock_filter filter[] = {
+		BPF_STMT(BPF_RET|BPF_K, 0x10000000U),
+	};
+	struct sock_fprog prog = {
+		.len = (unsigned short)ARRAY_SIZE(filter),
+		.filter = filter,
+	};
+	long ret;
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+	ASSERT_EQ(0, ret);
+	EXPECT_EQ(0, syscall(__NR_getpid)) {
+		TH_LOG("getpid() shouldn't ever return");
+	}
+}
+
+/* return code >= 0x80000000 is unused. */
+TEST_SIGNAL(unknown_ret_is_kill_above_allow, SIGSYS)
+{
+	struct sock_filter filter[] = {
+		BPF_STMT(BPF_RET|BPF_K, 0x90000000U),
+	};
+	struct sock_fprog prog = {
+		.len = (unsigned short)ARRAY_SIZE(filter),
+		.filter = filter,
+	};
+	long ret;
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+	ASSERT_EQ(0, ret);
+	EXPECT_EQ(0, syscall(__NR_getpid)) {
+		TH_LOG("getpid() shouldn't ever return");
+	}
+}
+
+TEST_SIGNAL(KILL_all, SIGSYS)
+{
+	struct sock_filter filter[] = {
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
+	};
+	struct sock_fprog prog = {
+		.len = (unsigned short)ARRAY_SIZE(filter),
+		.filter = filter,
+	};
+	long ret;
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+	ASSERT_EQ(0, ret);
+}
+
+TEST_SIGNAL(KILL_one, SIGSYS)
+{
+	struct sock_filter filter[] = {
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+			offsetof(struct seccomp_data, nr)),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+	struct sock_fprog prog = {
+		.len = (unsigned short)ARRAY_SIZE(filter),
+		.filter = filter,
+	};
+	long ret;
+	pid_t parent = getppid();
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+	ASSERT_EQ(0, ret);
+
+	EXPECT_EQ(parent, syscall(__NR_getppid));
+	/* getpid() should never return. */
+	EXPECT_EQ(0, syscall(__NR_getpid));
+}
+
+TEST_SIGNAL(KILL_one_arg_one, SIGSYS)
+{
+	struct sock_filter filter[] = {
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+			offsetof(struct seccomp_data, nr)),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+		/* Only both with lower 32-bit for now. */
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(0)),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0x0C0FFEE, 0, 1),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+	struct sock_fprog prog = {
+		.len = (unsigned short)ARRAY_SIZE(filter),
+		.filter = filter,
+	};
+	long ret;
+	pid_t parent = getppid();
+	pid_t pid = getpid();
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+	ASSERT_EQ(0, ret);
+
+	EXPECT_EQ(parent, syscall(__NR_getppid));
+	EXPECT_EQ(pid, syscall(__NR_getpid));
+	/* getpid() should never return. */
+	EXPECT_EQ(0, syscall(__NR_getpid, 0x0C0FFEE));
+}
+
+TEST_SIGNAL(KILL_one_arg_six, SIGSYS)
+{
+	struct sock_filter filter[] = {
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+			offsetof(struct seccomp_data, nr)),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+		/* Only both with lower 32-bit for now. */
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(5)),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0x0C0FFEE, 0, 1),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+	struct sock_fprog prog = {
+		.len = (unsigned short)ARRAY_SIZE(filter),
+		.filter = filter,
+	};
+	long ret;
+	pid_t parent = getppid();
+	pid_t pid = getpid();
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+	ASSERT_EQ(0, ret);
+
+	EXPECT_EQ(parent, syscall(__NR_getppid));
+	EXPECT_EQ(pid, syscall(__NR_getpid));
+	/* getpid() should never return. */
+	EXPECT_EQ(0, syscall(__NR_getpid, 1, 2, 3, 4, 5, 0x0C0FFEE));
+}
+
+/* TODO(wad) add 64-bit versus 32-bit arg tests. */
+TEST(arg_out_of_range)
+{
+	struct sock_filter filter[] = {
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(6)),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+	struct sock_fprog prog = {
+		.len = (unsigned short)ARRAY_SIZE(filter),
+		.filter = filter,
+	};
+	long ret;
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(EINVAL, errno);
+}
+
+TEST(ERRNO_valid)
+{
+	struct sock_filter filter[] = {
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+			offsetof(struct seccomp_data, nr)),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | E2BIG),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+	struct sock_fprog prog = {
+		.len = (unsigned short)ARRAY_SIZE(filter),
+		.filter = filter,
+	};
+	long ret;
+	pid_t parent = getppid();
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+	ASSERT_EQ(0, ret);
+
+	EXPECT_EQ(parent, syscall(__NR_getppid));
+	EXPECT_EQ(-1, read(0, NULL, 0));
+	EXPECT_EQ(E2BIG, errno);
+}
+
+TEST(ERRNO_zero)
+{
+	struct sock_filter filter[] = {
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+			offsetof(struct seccomp_data, nr)),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | 0),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+	struct sock_fprog prog = {
+		.len = (unsigned short)ARRAY_SIZE(filter),
+		.filter = filter,
+	};
+	long ret;
+	pid_t parent = getppid();
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+	ASSERT_EQ(0, ret);
+
+	EXPECT_EQ(parent, syscall(__NR_getppid));
+	/* "errno" of 0 is ok. */
+	EXPECT_EQ(0, read(0, NULL, 0));
+}
+
+TEST(ERRNO_capped)
+{
+	struct sock_filter filter[] = {
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+			offsetof(struct seccomp_data, nr)),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | 4096),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+	struct sock_fprog prog = {
+		.len = (unsigned short)ARRAY_SIZE(filter),
+		.filter = filter,
+	};
+	long ret;
+	pid_t parent = getppid();
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+	ASSERT_EQ(0, ret);
+
+	EXPECT_EQ(parent, syscall(__NR_getppid));
+	EXPECT_EQ(-1, read(0, NULL, 0));
+	EXPECT_EQ(4095, errno);
+}
+
+FIXTURE_DATA(TRAP) {
+	struct sock_fprog prog;
+};
+
+FIXTURE_SETUP(TRAP)
+{
+	struct sock_filter filter[] = {
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+			offsetof(struct seccomp_data, nr)),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+
+	memset(&self->prog, 0, sizeof(self->prog));
+	self->prog.filter = malloc(sizeof(filter));
+	ASSERT_NE(NULL, self->prog.filter);
+	memcpy(self->prog.filter, filter, sizeof(filter));
+	self->prog.len = (unsigned short)ARRAY_SIZE(filter);
+}
+
+FIXTURE_TEARDOWN(TRAP)
+{
+	if (self->prog.filter)
+		free(self->prog.filter);
+}
+
+TEST_F_SIGNAL(TRAP, dfl, SIGSYS)
+{
+	long ret;
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
+	ASSERT_EQ(0, ret);
+	syscall(__NR_getpid);
+}
+
+/* Ensure that SIGSYS overrides SIG_IGN */
+TEST_F_SIGNAL(TRAP, ign, SIGSYS)
+{
+	long ret;
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	signal(SIGSYS, SIG_IGN);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
+	ASSERT_EQ(0, ret);
+	syscall(__NR_getpid);
+}
+
+static struct siginfo TRAP_info;
+static volatile int TRAP_nr;
+static void TRAP_action(int nr, siginfo_t *info, void *void_context)
+{
+	memcpy(&TRAP_info, info, sizeof(TRAP_info));
+	TRAP_nr = nr;
+}
+
+TEST_F(TRAP, handler)
+{
+	int ret, test;
+	struct sigaction act;
+	sigset_t mask;
+
+	memset(&act, 0, sizeof(act));
+	sigemptyset(&mask);
+	sigaddset(&mask, SIGSYS);
+
+	act.sa_sigaction = &TRAP_action;
+	act.sa_flags = SA_SIGINFO;
+	ret = sigaction(SIGSYS, &act, NULL);
+	ASSERT_EQ(0, ret) {
+		TH_LOG("sigaction failed");
+	}
+	ret = sigprocmask(SIG_UNBLOCK, &mask, NULL);
+	ASSERT_EQ(0, ret) {
+		TH_LOG("sigprocmask failed");
+	}
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog);
+	ASSERT_EQ(0, ret);
+	TRAP_nr = 0;
+	memset(&TRAP_info, 0, sizeof(TRAP_info));
+	/* Expect the registers to be rolled back. (nr = error) may vary
+	 * based on arch. */
+	ret = syscall(__NR_getpid);
+	/* Silence gcc warning about volatile. */
+	test = TRAP_nr;
+	EXPECT_EQ(SIGSYS, test);
+	struct local_sigsys {
+		void *_call_addr;	/* calling user insn */
+		int _syscall;		/* triggering system call number */
+		unsigned int _arch;	/* AUDIT_ARCH_* of syscall */
+	} *sigsys = (struct local_sigsys *)
+#ifdef si_syscall
+		&(TRAP_info.si_call_addr);
+#else
+		&TRAP_info.si_pid;
+#endif
+	EXPECT_EQ(__NR_getpid, sigsys->_syscall);
+	/* Make sure arch is non-zero. */
+	EXPECT_NE(0, sigsys->_arch);
+	EXPECT_NE(0, (unsigned long)sigsys->_call_addr);
+}
+
+FIXTURE_DATA(precedence) {
+	struct sock_fprog allow;
+	struct sock_fprog trace;
+	struct sock_fprog error;
+	struct sock_fprog trap;
+	struct sock_fprog kill;
+};
+
+FIXTURE_SETUP(precedence)
+{
+	struct sock_filter allow_insns[] = {
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+	struct sock_filter trace_insns[] = {
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+			offsetof(struct seccomp_data, nr)),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE),
+	};
+	struct sock_filter error_insns[] = {
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+			offsetof(struct seccomp_data, nr)),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO),
+	};
+	struct sock_filter trap_insns[] = {
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+			offsetof(struct seccomp_data, nr)),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP),
+	};
+	struct sock_filter kill_insns[] = {
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+			offsetof(struct seccomp_data, nr)),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
+	};
+
+	memset(self, 0, sizeof(*self));
+#define FILTER_ALLOC(_x) \
+	self->_x.filter = malloc(sizeof(_x##_insns)); \
+	ASSERT_NE(NULL, self->_x.filter); \
+	memcpy(self->_x.filter, &_x##_insns, sizeof(_x##_insns)); \
+	self->_x.len = (unsigned short)ARRAY_SIZE(_x##_insns)
+	FILTER_ALLOC(allow);
+	FILTER_ALLOC(trace);
+	FILTER_ALLOC(error);
+	FILTER_ALLOC(trap);
+	FILTER_ALLOC(kill);
+}
+
+FIXTURE_TEARDOWN(precedence)
+{
+#define FILTER_FREE(_x) if (self->_x.filter) free(self->_x.filter)
+	FILTER_FREE(allow);
+	FILTER_FREE(trace);
+	FILTER_FREE(error);
+	FILTER_FREE(trap);
+	FILTER_FREE(kill);
+}
+
+TEST_F(precedence, allow_ok)
+{
+	pid_t parent, res = 0;
+	long ret;
+
+	parent = getppid();
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
+	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
+	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
+	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
+	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
+	ASSERT_EQ(0, ret);
+	/* Should work just fine. */
+	res = syscall(__NR_getppid);
+	EXPECT_EQ(parent, res);
+}
+
+TEST_F_SIGNAL(precedence, kill_is_highest, SIGSYS)
+{
+	pid_t parent, res = 0;
+	long ret;
+
+	parent = getppid();
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
+	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
+	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
+	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
+	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
+	ASSERT_EQ(0, ret);
+	/* Should work just fine. */
+	res = syscall(__NR_getppid);
+	EXPECT_EQ(parent, res);
+	/* getpid() should never return. */
+	res = syscall(__NR_getpid);
+	EXPECT_EQ(0, res);
+}
+
+TEST_F_SIGNAL(precedence, kill_is_highest_in_any_order, SIGSYS)
+{
+	pid_t parent;
+	long ret;
+
+	parent = getppid();
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
+	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill);
+	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
+	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
+	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
+	ASSERT_EQ(0, ret);
+	/* Should work just fine. */
+	EXPECT_EQ(parent, syscall(__NR_getppid));
+	/* getpid() should never return. */
+	EXPECT_EQ(0, syscall(__NR_getpid));
+}
+
+TEST_F_SIGNAL(precedence, trap_is_second, SIGSYS)
+{
+	pid_t parent;
+	long ret;
+
+	parent = getppid();
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
+	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
+	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
+	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
+	ASSERT_EQ(0, ret);
+	/* Should work just fine. */
+	EXPECT_EQ(parent, syscall(__NR_getppid));
+	/* getpid() should never return. */
+	EXPECT_EQ(0, syscall(__NR_getpid));
+}
+
+TEST_F_SIGNAL(precedence, trap_is_second_in_any_order, SIGSYS)
+{
+	pid_t parent;
+	long ret;
+
+	parent = getppid();
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
+	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
+	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
+	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
+	ASSERT_EQ(0, ret);
+	/* Should work just fine. */
+	EXPECT_EQ(parent, syscall(__NR_getppid));
+	/* getpid() should never return. */
+	EXPECT_EQ(0, syscall(__NR_getpid));
+}
+
+TEST_F(precedence, errno_is_third)
+{
+	pid_t parent;
+	long ret;
+
+	parent = getppid();
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
+	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
+	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
+	ASSERT_EQ(0, ret);
+	/* Should work just fine. */
+	EXPECT_EQ(parent, syscall(__NR_getppid));
+	EXPECT_EQ(0, syscall(__NR_getpid));
+}
+
+TEST_F(precedence, errno_is_third_in_any_order)
+{
+	pid_t parent;
+	long ret;
+
+	parent = getppid();
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
+	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
+	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
+	ASSERT_EQ(0, ret);
+	/* Should work just fine. */
+	EXPECT_EQ(parent, syscall(__NR_getppid));
+	EXPECT_EQ(0, syscall(__NR_getpid));
+}
+
+TEST_F(precedence, trace_is_fourth)
+{
+	pid_t parent;
+	long ret;
+
+	parent = getppid();
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
+	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
+	ASSERT_EQ(0, ret);
+	/* Should work just fine. */
+	EXPECT_EQ(parent, syscall(__NR_getppid));
+	/* No ptracer */
+	EXPECT_EQ(-1, syscall(__NR_getpid));
+}
+
+TEST_F(precedence, trace_is_fourth_in_any_order)
+{
+	pid_t parent;
+	long ret;
+
+	parent = getppid();
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
+	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
+	ASSERT_EQ(0, ret);
+	/* Should work just fine. */
+	EXPECT_EQ(parent, syscall(__NR_getppid));
+	/* No ptracer */
+	EXPECT_EQ(-1, syscall(__NR_getpid));
+}
+
+#ifndef PTRACE_O_TRACESECCOMP
+#define PTRACE_O_TRACESECCOMP	0x00000080
+#endif
+
+/* Catch the Ubuntu 12.04 value error. */
+#if PTRACE_EVENT_SECCOMP != 7
+#undef PTRACE_EVENT_SECCOMP
+#endif
+
+#ifndef PTRACE_EVENT_SECCOMP
+#define PTRACE_EVENT_SECCOMP 7
+#endif
+
+#define IS_SECCOMP_EVENT(status) ((status >> 16) == PTRACE_EVENT_SECCOMP)
+bool tracer_running;
+void tracer_stop(int sig)
+{
+	tracer_running = false;
+}
+
+typedef void tracer_func_t(struct __test_metadata *_metadata,
+			   pid_t tracee, int status, void *args);
+
+void tracer(struct __test_metadata *_metadata, int fd, pid_t tracee,
+	    tracer_func_t tracer_func, void *args)
+{
+	int ret = -1;
+	struct sigaction action = {
+		.sa_handler = tracer_stop,
+	};
+
+	/* Allow external shutdown. */
+	tracer_running = true;
+	ASSERT_EQ(0, sigaction(SIGUSR1, &action, NULL));
+
+	errno = 0;
+	while (ret == -1 && errno != EINVAL)
+		ret = ptrace(PTRACE_ATTACH, tracee, NULL, 0);
+	ASSERT_EQ(0, ret) {
+		kill(tracee, SIGKILL);
+	}
+	/* Wait for attach stop */
+	wait(NULL);
+
+	ret = ptrace(PTRACE_SETOPTIONS, tracee, NULL, PTRACE_O_TRACESECCOMP);
+	ASSERT_EQ(0, ret) {
+		TH_LOG("Failed to set PTRACE_O_TRACESECCOMP");
+		kill(tracee, SIGKILL);
+	}
+	ptrace(PTRACE_CONT, tracee, NULL, 0);
+
+	/* Unblock the tracee */
+	ASSERT_EQ(1, write(fd, "A", 1));
+	ASSERT_EQ(0, close(fd));
+
+	/* Run until we're shut down. Must assert to stop execution. */
+	while (tracer_running) {
+		int status;
+
+		if (wait(&status) != tracee)
+			continue;
+		if (WIFSIGNALED(status) || WIFEXITED(status))
+			/* Child is dead. Time to go. */
+			return;
+
+		/* Make sure this is a seccomp event. */
+		ASSERT_EQ(true, IS_SECCOMP_EVENT(status));
+
+		tracer_func(_metadata, tracee, status, args);
+
+		ret = ptrace(PTRACE_CONT, tracee, NULL, NULL);
+		ASSERT_EQ(0, ret);
+	}
+	/* Directly report the status of our test harness results. */
+	syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE);
+}
+
+/* Common tracer setup/teardown functions. */
+void cont_handler(int num)
+{ }
+pid_t setup_trace_fixture(struct __test_metadata *_metadata,
+			  tracer_func_t func, void *args)
+{
+	char sync;
+	int pipefd[2];
+	pid_t tracer_pid;
+	pid_t tracee = getpid();
+
+	/* Setup a pipe for clean synchronization. */
+	ASSERT_EQ(0, pipe(pipefd));
+
+	/* Fork a child which we'll promote to tracer */
+	tracer_pid = fork();
+	ASSERT_LE(0, tracer_pid);
+	signal(SIGALRM, cont_handler);
+	if (tracer_pid == 0) {
+		close(pipefd[0]);
+		tracer(_metadata, pipefd[1], tracee, func, args);
+		syscall(__NR_exit, 0);
+	}
+	close(pipefd[1]);
+	prctl(PR_SET_PTRACER, tracer_pid, 0, 0, 0);
+	read(pipefd[0], &sync, 1);
+	close(pipefd[0]);
+
+	return tracer_pid;
+}
+void teardown_trace_fixture(struct __test_metadata *_metadata,
+			    pid_t tracer)
+{
+	if (tracer) {
+		int status;
+		/*
+		 * Extract the exit code from the other process and
+		 * adopt it for ourselves in case its asserts failed.
+		 */
+		ASSERT_EQ(0, kill(tracer, SIGUSR1));
+		ASSERT_EQ(tracer, waitpid(tracer, &status, 0));
+		if (WEXITSTATUS(status))
+			_metadata->passed = 0;
+	}
+}
+
+/* "poke" tracer arguments and function. */
+struct tracer_args_poke_t {
+	unsigned long poke_addr;
+};
+
+void tracer_poke(struct __test_metadata *_metadata, pid_t tracee, int status,
+		 void *args)
+{
+	int ret;
+	unsigned long msg;
+	struct tracer_args_poke_t *info = (struct tracer_args_poke_t *)args;
+
+	ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
+	EXPECT_EQ(0, ret);
+	/* If this fails, don't try to recover. */
+	ASSERT_EQ(0x1001, msg) {
+		kill(tracee, SIGKILL);
+	}
+	/*
+	 * Poke in the message.
+	 * Registers are not touched to try to keep this relatively arch
+	 * agnostic.
+	 */
+	ret = ptrace(PTRACE_POKEDATA, tracee, info->poke_addr, 0x1001);
+	EXPECT_EQ(0, ret);
+}
+
+FIXTURE_DATA(TRACE_poke) {
+	struct sock_fprog prog;
+	pid_t tracer;
+	long poked;
+	struct tracer_args_poke_t tracer_args;
+};
+
+FIXTURE_SETUP(TRACE_poke)
+{
+	struct sock_filter filter[] = {
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+			offsetof(struct seccomp_data, nr)),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1001),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+
+	self->poked = 0;
+	memset(&self->prog, 0, sizeof(self->prog));
+	self->prog.filter = malloc(sizeof(filter));
+	ASSERT_NE(NULL, self->prog.filter);
+	memcpy(self->prog.filter, filter, sizeof(filter));
+	self->prog.len = (unsigned short)ARRAY_SIZE(filter);
+
+	/* Set up tracer args. */
+	self->tracer_args.poke_addr = (unsigned long)&self->poked;
+
+	/* Launch tracer. */
+	self->tracer = setup_trace_fixture(_metadata, tracer_poke,
+					   &self->tracer_args);
+}
+
+FIXTURE_TEARDOWN(TRACE_poke)
+{
+	teardown_trace_fixture(_metadata, self->tracer);
+	if (self->prog.filter)
+		free(self->prog.filter);
+}
+
+TEST_F(TRACE_poke, read_has_side_effects)
+{
+	ssize_t ret;
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	EXPECT_EQ(0, self->poked);
+	ret = read(-1, NULL, 0);
+	EXPECT_EQ(-1, ret);
+	EXPECT_EQ(0x1001, self->poked);
+}
+
+TEST_F(TRACE_poke, getpid_runs_normally)
+{
+	long ret;
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	EXPECT_EQ(0, self->poked);
+	EXPECT_NE(0, syscall(__NR_getpid));
+	EXPECT_EQ(0, self->poked);
+}
+
+#if defined(__x86_64__)
+# define ARCH_REGS	struct user_regs_struct
+# define SYSCALL_NUM	orig_rax
+# define SYSCALL_RET	rax
+#elif defined(__i386__)
+# define ARCH_REGS	struct user_regs_struct
+# define SYSCALL_NUM	orig_eax
+# define SYSCALL_RET	eax
+#elif defined(__arm__)
+# define ARCH_REGS	struct pt_regs
+# define SYSCALL_NUM	ARM_r7
+# define SYSCALL_RET	ARM_r0
+#elif defined(__aarch64__)
+# define ARCH_REGS	struct user_pt_regs
+# define SYSCALL_NUM	regs[8]
+# define SYSCALL_RET	regs[0]
+#else
+# error "Do not know how to find your architecture's registers and syscalls"
+#endif
+
+/* Architecture-specific syscall fetching routine. */
+int get_syscall(struct __test_metadata *_metadata, pid_t tracee)
+{
+	struct iovec iov;
+	ARCH_REGS regs;
+
+	iov.iov_base = &regs;
+	iov.iov_len = sizeof(regs);
+	EXPECT_EQ(0, ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov)) {
+		TH_LOG("PTRACE_GETREGSET failed");
+		return -1;
+	}
+
+	return regs.SYSCALL_NUM;
+}
+
+/* Architecture-specific syscall changing routine. */
+void change_syscall(struct __test_metadata *_metadata,
+		    pid_t tracee, int syscall)
+{
+	struct iovec iov;
+	int ret;
+	ARCH_REGS regs;
+
+	iov.iov_base = &regs;
+	iov.iov_len = sizeof(regs);
+	ret = ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov);
+	EXPECT_EQ(0, ret);
+
+#if defined(__x86_64__) || defined(__i386__) || defined(__aarch64__)
+	{
+		regs.SYSCALL_NUM = syscall;
+	}
+
+#elif defined(__arm__)
+# ifndef PTRACE_SET_SYSCALL
+#  define PTRACE_SET_SYSCALL   23
+# endif
+	{
+		ret = ptrace(PTRACE_SET_SYSCALL, tracee, NULL, syscall);
+		EXPECT_EQ(0, ret);
+	}
+
+#else
+	ASSERT_EQ(1, 0) {
+		TH_LOG("How is the syscall changed on this architecture?");
+	}
+#endif
+
+	/* If syscall is skipped, change return value. */
+	if (syscall == -1)
+		regs.SYSCALL_RET = 1;
+
+	ret = ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &iov);
+	EXPECT_EQ(0, ret);
+}
+
+void tracer_syscall(struct __test_metadata *_metadata, pid_t tracee,
+		    int status, void *args)
+{
+	int ret;
+	unsigned long msg;
+
+	/* Make sure we got the right message. */
+	ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
+	EXPECT_EQ(0, ret);
+
+	switch (msg) {
+	case 0x1002:
+		/* change getpid to getppid. */
+		change_syscall(_metadata, tracee, __NR_getppid);
+		break;
+	case 0x1003:
+		/* skip gettid. */
+		change_syscall(_metadata, tracee, -1);
+		break;
+	case 0x1004:
+		/* do nothing (allow getppid) */
+		break;
+	default:
+		EXPECT_EQ(0, msg) {
+			TH_LOG("Unknown PTRACE_GETEVENTMSG: 0x%lx", msg);
+			kill(tracee, SIGKILL);
+		}
+	}
+
+}
+
+FIXTURE_DATA(TRACE_syscall) {
+	struct sock_fprog prog;
+	pid_t tracer, mytid, mypid, parent;
+};
+
+FIXTURE_SETUP(TRACE_syscall)
+{
+	struct sock_filter filter[] = {
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+			offsetof(struct seccomp_data, nr)),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1002),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_gettid, 0, 1),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1003),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1004),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+
+	memset(&self->prog, 0, sizeof(self->prog));
+	self->prog.filter = malloc(sizeof(filter));
+	ASSERT_NE(NULL, self->prog.filter);
+	memcpy(self->prog.filter, filter, sizeof(filter));
+	self->prog.len = (unsigned short)ARRAY_SIZE(filter);
+
+	/* Prepare some testable syscall results. */
+	self->mytid = syscall(__NR_gettid);
+	ASSERT_GT(self->mytid, 0);
+	ASSERT_NE(self->mytid, 1) {
+		TH_LOG("Running this test as init is not supported. :)");
+	}
+
+	self->mypid = getpid();
+	ASSERT_GT(self->mypid, 0);
+	ASSERT_EQ(self->mytid, self->mypid);
+
+	self->parent = getppid();
+	ASSERT_GT(self->parent, 0);
+	ASSERT_NE(self->parent, self->mypid);
+
+	/* Launch tracer. */
+	self->tracer = setup_trace_fixture(_metadata, tracer_syscall, NULL);
+}
+
+FIXTURE_TEARDOWN(TRACE_syscall)
+{
+	teardown_trace_fixture(_metadata, self->tracer);
+	if (self->prog.filter)
+		free(self->prog.filter);
+}
+
+TEST_F(TRACE_syscall, syscall_allowed)
+{
+	long ret;
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	/* getppid works as expected (no changes). */
+	EXPECT_EQ(self->parent, syscall(__NR_getppid));
+	EXPECT_NE(self->mypid, syscall(__NR_getppid));
+}
+
+TEST_F(TRACE_syscall, syscall_redirected)
+{
+	long ret;
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	/* getpid has been redirected to getppid as expected. */
+	EXPECT_EQ(self->parent, syscall(__NR_getpid));
+	EXPECT_NE(self->mypid, syscall(__NR_getpid));
+}
+
+TEST_F(TRACE_syscall, syscall_dropped)
+{
+	long ret;
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	/* gettid has been skipped and an altered return value stored. */
+	EXPECT_EQ(1, syscall(__NR_gettid));
+	EXPECT_NE(self->mytid, syscall(__NR_gettid));
+}
+
+#ifndef __NR_seccomp
+# if defined(__i386__)
+#  define __NR_seccomp 354
+# elif defined(__x86_64__)
+#  define __NR_seccomp 317
+# elif defined(__arm__)
+#  define __NR_seccomp 383
+# elif defined(__aarch64__)
+#  define __NR_seccomp 277
+# else
+#  warning "seccomp syscall number unknown for this architecture"
+#  define __NR_seccomp 0xffff
+# endif
+#endif
+
+#ifndef SECCOMP_SET_MODE_STRICT
+#define SECCOMP_SET_MODE_STRICT 0
+#endif
+
+#ifndef SECCOMP_SET_MODE_FILTER
+#define SECCOMP_SET_MODE_FILTER 1
+#endif
+
+#ifndef SECCOMP_FLAG_FILTER_TSYNC
+#define SECCOMP_FLAG_FILTER_TSYNC 1
+#endif
+
+#ifndef seccomp
+int seccomp(unsigned int op, unsigned int flags, struct sock_fprog *filter)
+{
+	errno = 0;
+	return syscall(__NR_seccomp, op, flags, filter);
+}
+#endif
+
+TEST(seccomp_syscall)
+{
+	struct sock_filter filter[] = {
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+	struct sock_fprog prog = {
+		.len = (unsigned short)ARRAY_SIZE(filter),
+		.filter = filter,
+	};
+	long ret;
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret) {
+		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+	}
+
+	/* Reject insane operation. */
+	ret = seccomp(-1, 0, &prog);
+	EXPECT_EQ(EINVAL, errno) {
+		TH_LOG("Did not reject crazy op value!");
+	}
+
+	/* Reject strict with flags or pointer. */
+	ret = seccomp(SECCOMP_SET_MODE_STRICT, -1, NULL);
+	EXPECT_EQ(EINVAL, errno) {
+		TH_LOG("Did not reject mode strict with flags!");
+	}
+	ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, &prog);
+	EXPECT_EQ(EINVAL, errno) {
+		TH_LOG("Did not reject mode strict with uargs!");
+	}
+
+	/* Reject insane args for filter. */
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, -1, &prog);
+	EXPECT_EQ(EINVAL, errno) {
+		TH_LOG("Did not reject crazy filter flags!");
+	}
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, NULL);
+	EXPECT_EQ(EFAULT, errno) {
+		TH_LOG("Did not reject NULL filter!");
+	}
+
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
+	EXPECT_EQ(0, errno) {
+		TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER: %s",
+			strerror(errno));
+	}
+}
+
+TEST(seccomp_syscall_mode_lock)
+{
+	struct sock_filter filter[] = {
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+	struct sock_fprog prog = {
+		.len = (unsigned short)ARRAY_SIZE(filter),
+		.filter = filter,
+	};
+	long ret;
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
+	ASSERT_EQ(0, ret) {
+		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+	}
+
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
+	EXPECT_EQ(0, ret) {
+		TH_LOG("Could not install filter!");
+	}
+
+	/* Make sure neither entry point will switch to strict. */
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, 0, 0, 0);
+	EXPECT_EQ(EINVAL, errno) {
+		TH_LOG("Switched to mode strict!");
+	}
+
+	ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, NULL);
+	EXPECT_EQ(EINVAL, errno) {
+		TH_LOG("Switched to mode strict!");
+	}
+}
+
+TEST(TSYNC_first)
+{
+	struct sock_filter filter[] = {
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+	struct sock_fprog prog = {
+		.len = (unsigned short)ARRAY_SIZE(filter),
+		.filter = filter,
+	};
+	long ret;
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0);
+	ASSERT_EQ(0, ret) {
+		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+	}
+
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
+		      &prog);
+	EXPECT_EQ(0, ret) {
+		TH_LOG("Could not install initial filter with TSYNC!");
+	}
+}
+
+#define TSYNC_SIBLINGS 2
+struct tsync_sibling {
+	pthread_t tid;
+	pid_t system_tid;
+	sem_t *started;
+	pthread_cond_t *cond;
+	pthread_mutex_t *mutex;
+	int diverge;
+	int num_waits;
+	struct sock_fprog *prog;
+	struct __test_metadata *metadata;
+};
+
+FIXTURE_DATA(TSYNC) {
+	struct sock_fprog root_prog, apply_prog;
+	struct tsync_sibling sibling[TSYNC_SIBLINGS];
+	sem_t started;
+	pthread_cond_t cond;
+	pthread_mutex_t mutex;
+	int sibling_count;
+};
+
+FIXTURE_SETUP(TSYNC)
+{
+	struct sock_filter root_filter[] = {
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+	struct sock_filter apply_filter[] = {
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+			offsetof(struct seccomp_data, nr)),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+
+	memset(&self->root_prog, 0, sizeof(self->root_prog));
+	memset(&self->apply_prog, 0, sizeof(self->apply_prog));
+	memset(&self->sibling, 0, sizeof(self->sibling));
+	self->root_prog.filter = malloc(sizeof(root_filter));
+	ASSERT_NE(NULL, self->root_prog.filter);
+	memcpy(self->root_prog.filter, &root_filter, sizeof(root_filter));
+	self->root_prog.len = (unsigned short)ARRAY_SIZE(root_filter);
+
+	self->apply_prog.filter = malloc(sizeof(apply_filter));
+	ASSERT_NE(NULL, self->apply_prog.filter);
+	memcpy(self->apply_prog.filter, &apply_filter, sizeof(apply_filter));
+	self->apply_prog.len = (unsigned short)ARRAY_SIZE(apply_filter);
+
+	self->sibling_count = 0;
+	pthread_mutex_init(&self->mutex, NULL);
+	pthread_cond_init(&self->cond, NULL);
+	sem_init(&self->started, 0, 0);
+	self->sibling[0].tid = 0;
+	self->sibling[0].cond = &self->cond;
+	self->sibling[0].started = &self->started;
+	self->sibling[0].mutex = &self->mutex;
+	self->sibling[0].diverge = 0;
+	self->sibling[0].num_waits = 1;
+	self->sibling[0].prog = &self->root_prog;
+	self->sibling[0].metadata = _metadata;
+	self->sibling[1].tid = 0;
+	self->sibling[1].cond = &self->cond;
+	self->sibling[1].started = &self->started;
+	self->sibling[1].mutex = &self->mutex;
+	self->sibling[1].diverge = 0;
+	self->sibling[1].prog = &self->root_prog;
+	self->sibling[1].num_waits = 1;
+	self->sibling[1].metadata = _metadata;
+}
+
+FIXTURE_TEARDOWN(TSYNC)
+{
+	int sib = 0;
+
+	if (self->root_prog.filter)
+		free(self->root_prog.filter);
+	if (self->apply_prog.filter)
+		free(self->apply_prog.filter);
+
+	for ( ; sib < self->sibling_count; ++sib) {
+		struct tsync_sibling *s = &self->sibling[sib];
+		void *status;
+
+		if (!s->tid)
+			continue;
+		if (pthread_kill(s->tid, 0)) {
+			pthread_cancel(s->tid);
+			pthread_join(s->tid, &status);
+		}
+	}
+	pthread_mutex_destroy(&self->mutex);
+	pthread_cond_destroy(&self->cond);
+	sem_destroy(&self->started);
+}
+
+void *tsync_sibling(void *data)
+{
+	long ret = 0;
+	struct tsync_sibling *me = data;
+
+	me->system_tid = syscall(__NR_gettid);
+
+	pthread_mutex_lock(me->mutex);
+	if (me->diverge) {
+		/* Just re-apply the root prog to fork the tree */
+		ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER,
+				me->prog, 0, 0);
+	}
+	sem_post(me->started);
+	/* Return outside of started so parent notices failures. */
+	if (ret) {
+		pthread_mutex_unlock(me->mutex);
+		return (void *)SIBLING_EXIT_FAILURE;
+	}
+	do {
+		pthread_cond_wait(me->cond, me->mutex);
+		me->num_waits = me->num_waits - 1;
+	} while (me->num_waits);
+	pthread_mutex_unlock(me->mutex);
+
+	ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
+	if (!ret)
+		return (void *)SIBLING_EXIT_NEWPRIVS;
+	read(0, NULL, 0);
+	return (void *)SIBLING_EXIT_UNKILLED;
+}
+
+void tsync_start_sibling(struct tsync_sibling *sibling)
+{
+	pthread_create(&sibling->tid, NULL, tsync_sibling, (void *)sibling);
+}
+
+TEST_F(TSYNC, siblings_fail_prctl)
+{
+	long ret;
+	void *status;
+	struct sock_filter filter[] = {
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+			offsetof(struct seccomp_data, nr)),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EINVAL),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+	struct sock_fprog prog = {
+		.len = (unsigned short)ARRAY_SIZE(filter),
+		.filter = filter,
+	};
+
+	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
+		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+	}
+
+	/* Check prctl failure detection by requesting sib 0 diverge. */
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog);
+	ASSERT_EQ(0, ret) {
+		TH_LOG("setting filter failed");
+	}
+
+	self->sibling[0].diverge = 1;
+	tsync_start_sibling(&self->sibling[0]);
+	tsync_start_sibling(&self->sibling[1]);
+
+	while (self->sibling_count < TSYNC_SIBLINGS) {
+		sem_wait(&self->started);
+		self->sibling_count++;
+	}
+
+	/* Signal the threads to clean up*/
+	pthread_mutex_lock(&self->mutex);
+	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
+		TH_LOG("cond broadcast non-zero");
+	}
+	pthread_mutex_unlock(&self->mutex);
+
+	/* Ensure diverging sibling failed to call prctl. */
+	pthread_join(self->sibling[0].tid, &status);
+	EXPECT_EQ(SIBLING_EXIT_FAILURE, (long)status);
+	pthread_join(self->sibling[1].tid, &status);
+	EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
+}
+
+TEST_F(TSYNC, two_siblings_with_ancestor)
+{
+	long ret;
+	void *status;
+
+	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
+		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+	}
+
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
+	ASSERT_EQ(0, ret) {
+		TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
+	}
+	tsync_start_sibling(&self->sibling[0]);
+	tsync_start_sibling(&self->sibling[1]);
+
+	while (self->sibling_count < TSYNC_SIBLINGS) {
+		sem_wait(&self->started);
+		self->sibling_count++;
+	}
+
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
+		      &self->apply_prog);
+	ASSERT_EQ(0, ret) {
+		TH_LOG("Could install filter on all threads!");
+	}
+	/* Tell the siblings to test the policy */
+	pthread_mutex_lock(&self->mutex);
+	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
+		TH_LOG("cond broadcast non-zero");
+	}
+	pthread_mutex_unlock(&self->mutex);
+	/* Ensure they are both killed and don't exit cleanly. */
+	pthread_join(self->sibling[0].tid, &status);
+	EXPECT_EQ(0x0, (long)status);
+	pthread_join(self->sibling[1].tid, &status);
+	EXPECT_EQ(0x0, (long)status);
+}
+
+TEST_F(TSYNC, two_sibling_want_nnp)
+{
+	void *status;
+
+	/* start siblings before any prctl() operations */
+	tsync_start_sibling(&self->sibling[0]);
+	tsync_start_sibling(&self->sibling[1]);
+	while (self->sibling_count < TSYNC_SIBLINGS) {
+		sem_wait(&self->started);
+		self->sibling_count++;
+	}
+
+	/* Tell the siblings to test no policy */
+	pthread_mutex_lock(&self->mutex);
+	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
+		TH_LOG("cond broadcast non-zero");
+	}
+	pthread_mutex_unlock(&self->mutex);
+
+	/* Ensure they are both upset about lacking nnp. */
+	pthread_join(self->sibling[0].tid, &status);
+	EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status);
+	pthread_join(self->sibling[1].tid, &status);
+	EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status);
+}
+
+TEST_F(TSYNC, two_siblings_with_no_filter)
+{
+	long ret;
+	void *status;
+
+	/* start siblings before any prctl() operations */
+	tsync_start_sibling(&self->sibling[0]);
+	tsync_start_sibling(&self->sibling[1]);
+	while (self->sibling_count < TSYNC_SIBLINGS) {
+		sem_wait(&self->started);
+		self->sibling_count++;
+	}
+
+	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
+		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+	}
+
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
+		      &self->apply_prog);
+	ASSERT_EQ(0, ret) {
+		TH_LOG("Could install filter on all threads!");
+	}
+
+	/* Tell the siblings to test the policy */
+	pthread_mutex_lock(&self->mutex);
+	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
+		TH_LOG("cond broadcast non-zero");
+	}
+	pthread_mutex_unlock(&self->mutex);
+
+	/* Ensure they are both killed and don't exit cleanly. */
+	pthread_join(self->sibling[0].tid, &status);
+	EXPECT_EQ(0x0, (long)status);
+	pthread_join(self->sibling[1].tid, &status);
+	EXPECT_EQ(0x0, (long)status);
+}
+
+TEST_F(TSYNC, two_siblings_with_one_divergence)
+{
+	long ret;
+	void *status;
+
+	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
+		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+	}
+
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
+	ASSERT_EQ(0, ret) {
+		TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
+	}
+	self->sibling[0].diverge = 1;
+	tsync_start_sibling(&self->sibling[0]);
+	tsync_start_sibling(&self->sibling[1]);
+
+	while (self->sibling_count < TSYNC_SIBLINGS) {
+		sem_wait(&self->started);
+		self->sibling_count++;
+	}
+
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
+		      &self->apply_prog);
+	ASSERT_EQ(self->sibling[0].system_tid, ret) {
+		TH_LOG("Did not fail on diverged sibling.");
+	}
+
+	/* Wake the threads */
+	pthread_mutex_lock(&self->mutex);
+	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
+		TH_LOG("cond broadcast non-zero");
+	}
+	pthread_mutex_unlock(&self->mutex);
+
+	/* Ensure they are both unkilled. */
+	pthread_join(self->sibling[0].tid, &status);
+	EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
+	pthread_join(self->sibling[1].tid, &status);
+	EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
+}
+
+TEST_F(TSYNC, two_siblings_not_under_filter)
+{
+	long ret, sib;
+	void *status;
+
+	ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
+		TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+	}
+
+	/*
+	 * Sibling 0 will have its own seccomp policy
+	 * and Sibling 1 will not be under seccomp at
+	 * all. Sibling 1 will enter seccomp and 0
+	 * will cause failure.
+	 */
+	self->sibling[0].diverge = 1;
+	tsync_start_sibling(&self->sibling[0]);
+	tsync_start_sibling(&self->sibling[1]);
+
+	while (self->sibling_count < TSYNC_SIBLINGS) {
+		sem_wait(&self->started);
+		self->sibling_count++;
+	}
+
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog);
+	ASSERT_EQ(0, ret) {
+		TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!");
+	}
+
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
+		      &self->apply_prog);
+	ASSERT_EQ(ret, self->sibling[0].system_tid) {
+		TH_LOG("Did not fail on diverged sibling.");
+	}
+	sib = 1;
+	if (ret == self->sibling[0].system_tid)
+		sib = 0;
+
+	pthread_mutex_lock(&self->mutex);
+
+	/* Increment the other siblings num_waits so we can clean up
+	 * the one we just saw.
+	 */
+	self->sibling[!sib].num_waits += 1;
+
+	/* Signal the thread to clean up*/
+	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
+		TH_LOG("cond broadcast non-zero");
+	}
+	pthread_mutex_unlock(&self->mutex);
+	pthread_join(self->sibling[sib].tid, &status);
+	EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status);
+	/* Poll for actual task death. pthread_join doesn't guarantee it. */
+	while (!kill(self->sibling[sib].system_tid, 0))
+		sleep(0.1);
+	/* Switch to the remaining sibling */
+	sib = !sib;
+
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
+		      &self->apply_prog);
+	ASSERT_EQ(0, ret) {
+		TH_LOG("Expected the remaining sibling to sync");
+	};
+
+	pthread_mutex_lock(&self->mutex);
+
+	/* If remaining sibling didn't have a chance to wake up during
+	 * the first broadcast, manually reduce the num_waits now.
+	 */
+	if (self->sibling[sib].num_waits > 1)
+		self->sibling[sib].num_waits = 1;
+	ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) {
+		TH_LOG("cond broadcast non-zero");
+	}
+	pthread_mutex_unlock(&self->mutex);
+	pthread_join(self->sibling[sib].tid, &status);
+	EXPECT_EQ(0, (long)status);
+	/* Poll for actual task death. pthread_join doesn't guarantee it. */
+	while (!kill(self->sibling[sib].system_tid, 0))
+		sleep(0.1);
+
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC,
+		      &self->apply_prog);
+	ASSERT_EQ(0, ret);  /* just us chickens */
+}
+
+/* Make sure restarted syscalls are seen directly as "restart_syscall". */
+TEST(syscall_restart)
+{
+	long ret;
+	unsigned long msg;
+	pid_t child_pid;
+	int pipefd[2];
+	int status;
+	siginfo_t info = { };
+	struct sock_filter filter[] = {
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+			 offsetof(struct seccomp_data, nr)),
+
+#ifdef __NR_sigreturn
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_sigreturn, 6, 0),
+#endif
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 5, 0),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit, 4, 0),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_rt_sigreturn, 3, 0),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_poll, 4, 0),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_restart_syscall, 4, 0),
+
+		/* Allow __NR_write for easy logging. */
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_write, 0, 1),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x100), /* poll */
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x200), /* restart */
+	};
+	struct sock_fprog prog = {
+		.len = (unsigned short)ARRAY_SIZE(filter),
+		.filter = filter,
+	};
+
+	ASSERT_EQ(0, pipe(pipefd));
+
+	child_pid = fork();
+	ASSERT_LE(0, child_pid);
+	if (child_pid == 0) {
+		/* Child uses EXPECT not ASSERT to deliver status correctly. */
+		char buf = ' ';
+		struct pollfd fds = {
+			.fd = pipefd[0],
+			.events = POLLIN,
+		};
+
+		/* Attach parent as tracer and stop. */
+		EXPECT_EQ(0, ptrace(PTRACE_TRACEME));
+		EXPECT_EQ(0, raise(SIGSTOP));
+
+		EXPECT_EQ(0, close(pipefd[1]));
+
+		EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) {
+			TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+		}
+
+		ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
+		EXPECT_EQ(0, ret) {
+			TH_LOG("Failed to install filter!");
+		}
+
+		EXPECT_EQ(1, read(pipefd[0], &buf, 1)) {
+			TH_LOG("Failed to read() sync from parent");
+		}
+		EXPECT_EQ('.', buf) {
+			TH_LOG("Failed to get sync data from read()");
+		}
+
+		/* Start poll to be interrupted. */
+		errno = 0;
+		EXPECT_EQ(1, poll(&fds, 1, -1)) {
+			TH_LOG("Call to poll() failed (errno %d)", errno);
+		}
+
+		/* Read final sync from parent. */
+		EXPECT_EQ(1, read(pipefd[0], &buf, 1)) {
+			TH_LOG("Failed final read() from parent");
+		}
+		EXPECT_EQ('!', buf) {
+			TH_LOG("Failed to get final data from read()");
+		}
+
+		/* Directly report the status of our test harness results. */
+		syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS
+						     : EXIT_FAILURE);
+	}
+	EXPECT_EQ(0, close(pipefd[0]));
+
+	/* Attach to child, setup options, and release. */
+	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
+	ASSERT_EQ(true, WIFSTOPPED(status));
+	ASSERT_EQ(0, ptrace(PTRACE_SETOPTIONS, child_pid, NULL,
+			    PTRACE_O_TRACESECCOMP));
+	ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
+	ASSERT_EQ(1, write(pipefd[1], ".", 1));
+
+	/* Wait for poll() to start. */
+	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
+	ASSERT_EQ(true, WIFSTOPPED(status));
+	ASSERT_EQ(SIGTRAP, WSTOPSIG(status));
+	ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16));
+	ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg));
+	ASSERT_EQ(0x100, msg);
+	EXPECT_EQ(__NR_poll, get_syscall(_metadata, child_pid));
+
+	/* Might as well check siginfo for sanity while we're here. */
+	ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info));
+	ASSERT_EQ(SIGTRAP, info.si_signo);
+	ASSERT_EQ(SIGTRAP | (PTRACE_EVENT_SECCOMP << 8), info.si_code);
+	EXPECT_EQ(0, info.si_errno);
+	EXPECT_EQ(getuid(), info.si_uid);
+	/* Verify signal delivery came from child (seccomp-triggered). */
+	EXPECT_EQ(child_pid, info.si_pid);
+
+	/* Interrupt poll with SIGSTOP (which we'll need to handle). */
+	ASSERT_EQ(0, kill(child_pid, SIGSTOP));
+	ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
+	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
+	ASSERT_EQ(true, WIFSTOPPED(status));
+	ASSERT_EQ(SIGSTOP, WSTOPSIG(status));
+	/* Verify signal delivery came from parent now. */
+	ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info));
+	EXPECT_EQ(getpid(), info.si_pid);
+
+	/* Restart poll with SIGCONT, which triggers restart_syscall. */
+	ASSERT_EQ(0, kill(child_pid, SIGCONT));
+	ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
+	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
+	ASSERT_EQ(true, WIFSTOPPED(status));
+	ASSERT_EQ(SIGCONT, WSTOPSIG(status));
+	ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
+
+	/* Wait for restart_syscall() to start. */
+	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
+	ASSERT_EQ(true, WIFSTOPPED(status));
+	ASSERT_EQ(SIGTRAP, WSTOPSIG(status));
+	ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16));
+	ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg));
+	ASSERT_EQ(0x200, msg);
+	ret = get_syscall(_metadata, child_pid);
+#if defined(__arm__)
+	/* FIXME: ARM does not expose true syscall in registers. */
+	EXPECT_EQ(__NR_poll, ret);
+#else
+	EXPECT_EQ(__NR_restart_syscall, ret);
+#endif
+
+	/* Write again to end poll. */
+	ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0));
+	ASSERT_EQ(1, write(pipefd[1], "!", 1));
+	EXPECT_EQ(0, close(pipefd[1]));
+
+	ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
+	if (WIFSIGNALED(status) || WEXITSTATUS(status))
+		_metadata->passed = 0;
+}
+
+/*
+ * TODO:
+ * - add microbenchmarks
+ * - expand NNP testing
+ * - better arch-specific TRACE and TRAP handlers.
+ * - endianness checking when appropriate
+ * - 64-bit arg prodding
+ * - arch value testing (x86 modes especially)
+ * - ...
+ */
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/seccomp/test_harness.h b/tools/testing/selftests/seccomp/test_harness.h
new file mode 100644
index 000000000000..977a6afc4489
--- /dev/null
+++ b/tools/testing/selftests/seccomp/test_harness.h
@@ -0,0 +1,537 @@
+/*
+ * Copyright (c) 2012 The Chromium OS Authors. All rights reserved.
+ * Use of this source code is governed by the GPLv2 license.
+ *
+ * test_harness.h: simple C unit test helper.
+ *
+ * Usage:
+ *   #include "test_harness.h"
+ *   TEST(standalone_test) {
+ *     do_some_stuff;
+ *     EXPECT_GT(10, stuff) {
+ *        stuff_state_t state;
+ *        enumerate_stuff_state(&state);
+ *        TH_LOG("expectation failed with state: %s", state.msg);
+ *     }
+ *     more_stuff;
+ *     ASSERT_NE(some_stuff, NULL) TH_LOG("how did it happen?!");
+ *     last_stuff;
+ *     EXPECT_EQ(0, last_stuff);
+ *   }
+ *
+ *   FIXTURE(my_fixture) {
+ *     mytype_t *data;
+ *     int awesomeness_level;
+ *   };
+ *   FIXTURE_SETUP(my_fixture) {
+ *     self->data = mytype_new();
+ *     ASSERT_NE(NULL, self->data);
+ *   }
+ *   FIXTURE_TEARDOWN(my_fixture) {
+ *     mytype_free(self->data);
+ *   }
+ *   TEST_F(my_fixture, data_is_good) {
+ *     EXPECT_EQ(1, is_my_data_good(self->data));
+ *   }
+ *
+ *   TEST_HARNESS_MAIN
+ *
+ * API inspired by code.google.com/p/googletest
+ */
+#ifndef TEST_HARNESS_H_
+#define TEST_HARNESS_H_
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+/* All exported functionality should be declared through this macro. */
+#define TEST_API(x) _##x
+
+/*
+ * Exported APIs
+ */
+
+/* TEST(name) { implementation }
+ * Defines a test by name.
+ * Names must be unique and tests must not be run in parallel.  The
+ * implementation containing block is a function and scoping should be treated
+ * as such.  Returning early may be performed with a bare "return;" statement.
+ *
+ * EXPECT_* and ASSERT_* are valid in a TEST() { } context.
+ */
+#define TEST TEST_API(TEST)
+
+/* TEST_SIGNAL(name, signal) { implementation }
+ * Defines a test by name and the expected term signal.
+ * Names must be unique and tests must not be run in parallel.  The
+ * implementation containing block is a function and scoping should be treated
+ * as such.  Returning early may be performed with a bare "return;" statement.
+ *
+ * EXPECT_* and ASSERT_* are valid in a TEST() { } context.
+ */
+#define TEST_SIGNAL TEST_API(TEST_SIGNAL)
+
+/* FIXTURE(datatype name) {
+ *   type property1;
+ *   ...
+ * };
+ * Defines the data provided to TEST_F()-defined tests as |self|.  It should be
+ * populated and cleaned up using FIXTURE_SETUP and FIXTURE_TEARDOWN.
+ */
+#define FIXTURE TEST_API(FIXTURE)
+
+/* FIXTURE_DATA(datatype name)
+ * This call may be used when the type of the fixture data
+ * is needed.  In general, this should not be needed unless
+ * the |self| is being passed to a helper directly.
+ */
+#define FIXTURE_DATA TEST_API(FIXTURE_DATA)
+
+/* FIXTURE_SETUP(fixture name) { implementation }
+ * Populates the required "setup" function for a fixture.  An instance of the
+ * datatype defined with _FIXTURE_DATA will be exposed as |self| for the
+ * implementation.
+ *
+ * ASSERT_* are valid for use in this context and will prempt the execution
+ * of any dependent fixture tests.
+ *
+ * A bare "return;" statement may be used to return early.
+ */
+#define FIXTURE_SETUP TEST_API(FIXTURE_SETUP)
+
+/* FIXTURE_TEARDOWN(fixture name) { implementation }
+ * Populates the required "teardown" function for a fixture.  An instance of the
+ * datatype defined with _FIXTURE_DATA will be exposed as |self| for the
+ * implementation to clean up.
+ *
+ * A bare "return;" statement may be used to return early.
+ */
+#define FIXTURE_TEARDOWN TEST_API(FIXTURE_TEARDOWN)
+
+/* TEST_F(fixture, name) { implementation }
+ * Defines a test that depends on a fixture (e.g., is part of a test case).
+ * Very similar to TEST() except that |self| is the setup instance of fixture's
+ * datatype exposed for use by the implementation.
+ */
+#define TEST_F TEST_API(TEST_F)
+
+#define TEST_F_SIGNAL TEST_API(TEST_F_SIGNAL)
+
+/* Use once to append a main() to the test file. E.g.,
+ *   TEST_HARNESS_MAIN
+ */
+#define TEST_HARNESS_MAIN TEST_API(TEST_HARNESS_MAIN)
+
+/*
+ * Operators for use in TEST and TEST_F.
+ * ASSERT_* calls will stop test execution immediately.
+ * EXPECT_* calls will emit a failure warning, note it, and continue.
+ */
+
+/* ASSERT_EQ(expected, measured): expected == measured */
+#define ASSERT_EQ TEST_API(ASSERT_EQ)
+/* ASSERT_NE(expected, measured): expected != measured */
+#define ASSERT_NE TEST_API(ASSERT_NE)
+/* ASSERT_LT(expected, measured): expected < measured */
+#define ASSERT_LT TEST_API(ASSERT_LT)
+/* ASSERT_LE(expected, measured): expected <= measured */
+#define ASSERT_LE TEST_API(ASSERT_LE)
+/* ASSERT_GT(expected, measured): expected > measured */
+#define ASSERT_GT TEST_API(ASSERT_GT)
+/* ASSERT_GE(expected, measured): expected >= measured */
+#define ASSERT_GE TEST_API(ASSERT_GE)
+/* ASSERT_NULL(measured): NULL == measured */
+#define ASSERT_NULL TEST_API(ASSERT_NULL)
+/* ASSERT_TRUE(measured): measured != 0 */
+#define ASSERT_TRUE TEST_API(ASSERT_TRUE)
+/* ASSERT_FALSE(measured): measured == 0 */
+#define ASSERT_FALSE TEST_API(ASSERT_FALSE)
+/* ASSERT_STREQ(expected, measured): !strcmp(expected, measured) */
+#define ASSERT_STREQ TEST_API(ASSERT_STREQ)
+/* ASSERT_STRNE(expected, measured): strcmp(expected, measured) */
+#define ASSERT_STRNE TEST_API(ASSERT_STRNE)
+/* EXPECT_EQ(expected, measured): expected == measured */
+#define EXPECT_EQ TEST_API(EXPECT_EQ)
+/* EXPECT_NE(expected, measured): expected != measured */
+#define EXPECT_NE TEST_API(EXPECT_NE)
+/* EXPECT_LT(expected, measured): expected < measured */
+#define EXPECT_LT TEST_API(EXPECT_LT)
+/* EXPECT_LE(expected, measured): expected <= measured */
+#define EXPECT_LE TEST_API(EXPECT_LE)
+/* EXPECT_GT(expected, measured): expected > measured */
+#define EXPECT_GT TEST_API(EXPECT_GT)
+/* EXPECT_GE(expected, measured): expected >= measured */
+#define EXPECT_GE TEST_API(EXPECT_GE)
+/* EXPECT_NULL(measured): NULL == measured */
+#define EXPECT_NULL TEST_API(EXPECT_NULL)
+/* EXPECT_TRUE(measured): 0 != measured */
+#define EXPECT_TRUE TEST_API(EXPECT_TRUE)
+/* EXPECT_FALSE(measured): 0 == measured */
+#define EXPECT_FALSE TEST_API(EXPECT_FALSE)
+/* EXPECT_STREQ(expected, measured): !strcmp(expected, measured) */
+#define EXPECT_STREQ TEST_API(EXPECT_STREQ)
+/* EXPECT_STRNE(expected, measured): strcmp(expected, measured) */
+#define EXPECT_STRNE TEST_API(EXPECT_STRNE)
+
+/* TH_LOG(format, ...)
+ * Optional debug logging function available for use in tests.
+ * Logging may be enabled or disabled by defining TH_LOG_ENABLED.
+ * E.g., #define TH_LOG_ENABLED 1
+ * If no definition is provided, logging is enabled by default.
+ */
+#define TH_LOG  TEST_API(TH_LOG)
+
+/*
+ * Internal implementation.
+ *
+ */
+
+/* Utilities exposed to the test definitions */
+#ifndef TH_LOG_STREAM
+#  define TH_LOG_STREAM stderr
+#endif
+
+#ifndef TH_LOG_ENABLED
+#  define TH_LOG_ENABLED 1
+#endif
+
+#define _TH_LOG(fmt, ...) do { \
+	if (TH_LOG_ENABLED) \
+		__TH_LOG(fmt, ##__VA_ARGS__); \
+} while (0)
+
+/* Unconditional logger for internal use. */
+#define __TH_LOG(fmt, ...) \
+		fprintf(TH_LOG_STREAM, "%s:%d:%s:" fmt "\n", \
+			__FILE__, __LINE__, _metadata->name, ##__VA_ARGS__)
+
+/* Defines the test function and creates the registration stub. */
+#define _TEST(test_name) __TEST_IMPL(test_name, -1)
+
+#define _TEST_SIGNAL(test_name, signal) __TEST_IMPL(test_name, signal)
+
+#define __TEST_IMPL(test_name, _signal) \
+	static void test_name(struct __test_metadata *_metadata); \
+	static struct __test_metadata _##test_name##_object = \
+		{ name: "global." #test_name, \
+		  fn: &test_name, termsig: _signal }; \
+	static void __attribute__((constructor)) _register_##test_name(void) \
+	{ \
+		__register_test(&_##test_name##_object); \
+	} \
+	static void test_name( \
+		struct __test_metadata __attribute__((unused)) *_metadata)
+
+/* Wraps the struct name so we have one less argument to pass around. */
+#define _FIXTURE_DATA(fixture_name) struct _test_data_##fixture_name
+
+/* Called once per fixture to setup the data and register. */
+#define _FIXTURE(fixture_name) \
+	static void __attribute__((constructor)) \
+	_register_##fixture_name##_data(void) \
+	{ \
+		__fixture_count++; \
+	} \
+	_FIXTURE_DATA(fixture_name)
+
+/* Prepares the setup function for the fixture.  |_metadata| is included
+ * so that ASSERT_* work as a convenience.
+ */
+#define _FIXTURE_SETUP(fixture_name) \
+	void fixture_name##_setup( \
+		struct __test_metadata __attribute__((unused)) *_metadata, \
+		_FIXTURE_DATA(fixture_name) __attribute__((unused)) *self)
+#define _FIXTURE_TEARDOWN(fixture_name) \
+	void fixture_name##_teardown( \
+		struct __test_metadata __attribute__((unused)) *_metadata, \
+		_FIXTURE_DATA(fixture_name) __attribute__((unused)) *self)
+
+/* Emits test registration and helpers for fixture-based test
+ * cases.
+ * TODO(wad) register fixtures on dedicated test lists.
+ */
+#define _TEST_F(fixture_name, test_name) \
+	__TEST_F_IMPL(fixture_name, test_name, -1)
+
+#define _TEST_F_SIGNAL(fixture_name, test_name, signal) \
+	__TEST_F_IMPL(fixture_name, test_name, signal)
+
+#define __TEST_F_IMPL(fixture_name, test_name, signal) \
+	static void fixture_name##_##test_name( \
+		struct __test_metadata *_metadata, \
+		_FIXTURE_DATA(fixture_name) *self); \
+	static inline void wrapper_##fixture_name##_##test_name( \
+		struct __test_metadata *_metadata) \
+	{ \
+		/* fixture data is alloced, setup, and torn down per call. */ \
+		_FIXTURE_DATA(fixture_name) self; \
+		memset(&self, 0, sizeof(_FIXTURE_DATA(fixture_name))); \
+		fixture_name##_setup(_metadata, &self); \
+		/* Let setup failure terminate early. */ \
+		if (!_metadata->passed) \
+			return; \
+		fixture_name##_##test_name(_metadata, &self); \
+		fixture_name##_teardown(_metadata, &self); \
+	} \
+	static struct __test_metadata \
+		      _##fixture_name##_##test_name##_object = { \
+		name: #fixture_name "." #test_name, \
+		fn: &wrapper_##fixture_name##_##test_name, \
+		termsig: signal, \
+	 }; \
+	static void __attribute__((constructor)) \
+			_register_##fixture_name##_##test_name(void) \
+	{ \
+		__register_test(&_##fixture_name##_##test_name##_object); \
+	} \
+	static void fixture_name##_##test_name( \
+		struct __test_metadata __attribute__((unused)) *_metadata, \
+		_FIXTURE_DATA(fixture_name) __attribute__((unused)) *self)
+
+/* Exports a simple wrapper to run the test harness. */
+#define _TEST_HARNESS_MAIN \
+	static void __attribute__((constructor)) \
+	__constructor_order_last(void) \
+	{ \
+		if (!__constructor_order) \
+			__constructor_order = _CONSTRUCTOR_ORDER_BACKWARD; \
+	} \
+	int main(int argc, char **argv) { \
+		return test_harness_run(argc, argv); \
+	}
+
+#define _ASSERT_EQ(_expected, _seen) \
+	__EXPECT(_expected, _seen, ==, 1)
+#define _ASSERT_NE(_expected, _seen) \
+	__EXPECT(_expected, _seen, !=, 1)
+#define _ASSERT_LT(_expected, _seen) \
+	__EXPECT(_expected, _seen, <, 1)
+#define _ASSERT_LE(_expected, _seen) \
+	__EXPECT(_expected, _seen, <=, 1)
+#define _ASSERT_GT(_expected, _seen) \
+	__EXPECT(_expected, _seen, >, 1)
+#define _ASSERT_GE(_expected, _seen) \
+	__EXPECT(_expected, _seen, >=, 1)
+#define _ASSERT_NULL(_seen) \
+	__EXPECT(NULL, _seen, ==, 1)
+
+#define _ASSERT_TRUE(_seen) \
+	_ASSERT_NE(0, _seen)
+#define _ASSERT_FALSE(_seen) \
+	_ASSERT_EQ(0, _seen)
+#define _ASSERT_STREQ(_expected, _seen) \
+	__EXPECT_STR(_expected, _seen, ==, 1)
+#define _ASSERT_STRNE(_expected, _seen) \
+	__EXPECT_STR(_expected, _seen, !=, 1)
+
+#define _EXPECT_EQ(_expected, _seen) \
+	__EXPECT(_expected, _seen, ==, 0)
+#define _EXPECT_NE(_expected, _seen) \
+	__EXPECT(_expected, _seen, !=, 0)
+#define _EXPECT_LT(_expected, _seen) \
+	__EXPECT(_expected, _seen, <, 0)
+#define _EXPECT_LE(_expected, _seen) \
+	__EXPECT(_expected, _seen, <=, 0)
+#define _EXPECT_GT(_expected, _seen) \
+	__EXPECT(_expected, _seen, >, 0)
+#define _EXPECT_GE(_expected, _seen) \
+	__EXPECT(_expected, _seen, >=, 0)
+
+#define _EXPECT_NULL(_seen) \
+	__EXPECT(NULL, _seen, ==, 0)
+#define _EXPECT_TRUE(_seen) \
+	_EXPECT_NE(0, _seen)
+#define _EXPECT_FALSE(_seen) \
+	_EXPECT_EQ(0, _seen)
+
+#define _EXPECT_STREQ(_expected, _seen) \
+	__EXPECT_STR(_expected, _seen, ==, 0)
+#define _EXPECT_STRNE(_expected, _seen) \
+	__EXPECT_STR(_expected, _seen, !=, 0)
+
+#define ARRAY_SIZE(a)	(sizeof(a) / sizeof(a[0]))
+
+/* Support an optional handler after and ASSERT_* or EXPECT_*.  The approach is
+ * not thread-safe, but it should be fine in most sane test scenarios.
+ *
+ * Using __bail(), which optionally abort()s, is the easiest way to early
+ * return while still providing an optional block to the API consumer.
+ */
+#define OPTIONAL_HANDLER(_assert) \
+	for (; _metadata->trigger;  _metadata->trigger = __bail(_assert))
+
+#define __EXPECT(_expected, _seen, _t, _assert) do { \
+	/* Avoid multiple evaluation of the cases */ \
+	__typeof__(_expected) __exp = (_expected); \
+	__typeof__(_seen) __seen = (_seen); \
+	if (!(__exp _t __seen)) { \
+		unsigned long long __exp_print = 0; \
+		unsigned long long __seen_print = 0; \
+		/* Avoid casting complaints the scariest way we can. */ \
+		memcpy(&__exp_print, &__exp, sizeof(__exp)); \
+		memcpy(&__seen_print, &__seen, sizeof(__seen)); \
+		__TH_LOG("Expected %s (%llu) %s %s (%llu)", \
+			 #_expected, __exp_print, #_t, \
+			 #_seen, __seen_print); \
+		_metadata->passed = 0; \
+		/* Ensure the optional handler is triggered */ \
+		_metadata->trigger = 1; \
+	} \
+} while (0); OPTIONAL_HANDLER(_assert)
+
+#define __EXPECT_STR(_expected, _seen, _t, _assert) do { \
+	const char *__exp = (_expected); \
+	const char *__seen = (_seen); \
+	if (!(strcmp(__exp, __seen) _t 0))  { \
+		__TH_LOG("Expected '%s' %s '%s'.", __exp, #_t, __seen); \
+		_metadata->passed = 0; \
+		_metadata->trigger = 1; \
+	} \
+} while (0); OPTIONAL_HANDLER(_assert)
+
+/* Contains all the information for test execution and status checking. */
+struct __test_metadata {
+	const char *name;
+	void (*fn)(struct __test_metadata *);
+	int termsig;
+	int passed;
+	int trigger; /* extra handler after the evaluation */
+	struct __test_metadata *prev, *next;
+};
+
+/* Storage for the (global) tests to be run. */
+static struct __test_metadata *__test_list;
+static unsigned int __test_count;
+static unsigned int __fixture_count;
+static int __constructor_order;
+
+#define _CONSTRUCTOR_ORDER_FORWARD   1
+#define _CONSTRUCTOR_ORDER_BACKWARD -1
+
+/*
+ * Since constructors are called in reverse order, reverse the test
+ * list so tests are run in source declaration order.
+ * https://gcc.gnu.org/onlinedocs/gccint/Initialization.html
+ * However, it seems not all toolchains do this correctly, so use
+ * __constructor_order to detect which direction is called first
+ * and adjust list building logic to get things running in the right
+ * direction.
+ */
+static inline void __register_test(struct __test_metadata *t)
+{
+	__test_count++;
+	/* Circular linked list where only prev is circular. */
+	if (__test_list == NULL) {
+		__test_list = t;
+		t->next = NULL;
+		t->prev = t;
+		return;
+	}
+	if (__constructor_order == _CONSTRUCTOR_ORDER_FORWARD) {
+		t->next = NULL;
+		t->prev = __test_list->prev;
+		t->prev->next = t;
+		__test_list->prev = t;
+	} else {
+		t->next = __test_list;
+		t->next->prev = t;
+		t->prev = t;
+		__test_list = t;
+	}
+}
+
+static inline int __bail(int for_realz)
+{
+	if (for_realz)
+		abort();
+	return 0;
+}
+
+void __run_test(struct __test_metadata *t)
+{
+	pid_t child_pid;
+	int status;
+
+	t->passed = 1;
+	t->trigger = 0;
+	printf("[ RUN      ] %s\n", t->name);
+	child_pid = fork();
+	if (child_pid < 0) {
+		printf("ERROR SPAWNING TEST CHILD\n");
+		t->passed = 0;
+	} else if (child_pid == 0) {
+		t->fn(t);
+		_exit(t->passed);
+	} else {
+		/* TODO(wad) add timeout support. */
+		waitpid(child_pid, &status, 0);
+		if (WIFEXITED(status)) {
+			t->passed = t->termsig == -1 ? WEXITSTATUS(status) : 0;
+			if (t->termsig != -1) {
+				fprintf(TH_LOG_STREAM,
+					"%s: Test exited normally "
+					"instead of by signal (code: %d)\n",
+					t->name,
+					WEXITSTATUS(status));
+			}
+		} else if (WIFSIGNALED(status)) {
+			t->passed = 0;
+			if (WTERMSIG(status) == SIGABRT) {
+				fprintf(TH_LOG_STREAM,
+					"%s: Test terminated by assertion\n",
+					t->name);
+			} else if (WTERMSIG(status) == t->termsig) {
+				t->passed = 1;
+			} else {
+				fprintf(TH_LOG_STREAM,
+					"%s: Test terminated unexpectedly "
+					"by signal %d\n",
+					t->name,
+					WTERMSIG(status));
+			}
+		} else {
+			fprintf(TH_LOG_STREAM,
+				"%s: Test ended in some other way [%u]\n",
+				t->name,
+				status);
+		}
+	}
+	printf("[     %4s ] %s\n", (t->passed ? "OK" : "FAIL"), t->name);
+}
+
+static int test_harness_run(int __attribute__((unused)) argc,
+			    char __attribute__((unused)) **argv)
+{
+	struct __test_metadata *t;
+	int ret = 0;
+	unsigned int count = 0;
+	unsigned int pass_count = 0;
+
+	/* TODO(wad) add optional arguments similar to gtest. */
+	printf("[==========] Running %u tests from %u test cases.\n",
+	       __test_count, __fixture_count + 1);
+	for (t = __test_list; t; t = t->next) {
+		count++;
+		__run_test(t);
+		if (t->passed)
+			pass_count++;
+		else
+			ret = 1;
+	}
+	printf("[==========] %u / %u tests passed.\n", pass_count, count);
+	printf("[  %s  ]\n", (ret ? "FAILED" : "PASSED"));
+	return ret;
+}
+
+static void __attribute__((constructor)) __constructor_order_first(void)
+{
+	if (!__constructor_order)
+		__constructor_order = _CONSTRUCTOR_ORDER_FORWARD;
+}
+
+#endif  /* TEST_HARNESS_H_ */
diff --git a/tools/testing/selftests/timers/.gitignore b/tools/testing/selftests/timers/.gitignore
new file mode 100644
index 000000000000..ced998151bc4
--- /dev/null
+++ b/tools/testing/selftests/timers/.gitignore
@@ -0,0 +1,18 @@
+alarmtimer-suspend
+change_skew
+clocksource-switch
+inconsistency-check
+leap-a-day
+leapcrash
+mqueue-lat
+nanosleep
+nsleep-lat
+posix_timers
+raw_skew
+rtctest
+set-2038
+set-tai
+set-timer-lat
+skew_consistency
+threadtest
+valid-adjtimex
diff --git a/tools/testing/selftests/timers/alarmtimer-suspend.c b/tools/testing/selftests/timers/alarmtimer-suspend.c
index aaffbde1d5ee..72cacf5383dd 100644
--- a/tools/testing/selftests/timers/alarmtimer-suspend.c
+++ b/tools/testing/selftests/timers/alarmtimer-suspend.c
@@ -57,7 +57,7 @@ static inline int ksft_exit_fail(void)
 
 
 #define NSEC_PER_SEC 1000000000ULL
-#define UNREASONABLE_LAT (NSEC_PER_SEC * 4) /* hopefully we resume in 4secs */
+#define UNREASONABLE_LAT (NSEC_PER_SEC * 5) /* hopefully we resume in 5 secs */
 
 #define SUSPEND_SECS 15
 int alarmcount;
@@ -152,7 +152,11 @@ int main(void)
 			alarm_clock_id++) {
 
 		alarmcount = 0;
-		timer_create(alarm_clock_id, &se, &tm1);
+		if (timer_create(alarm_clock_id, &se, &tm1) == -1) {
+			printf("timer_create failled, %s unspported?\n",
+					clockstring(alarm_clock_id));
+			break;
+		}
 
 		clock_gettime(alarm_clock_id, &start_time);
 		printf("Start time (%s): %ld:%ld\n", clockstring(alarm_clock_id),
@@ -172,7 +176,7 @@ int main(void)
 		while (alarmcount < 10) {
 			int ret;
 
-			sleep(1);
+			sleep(3);
 			ret = system("echo mem > /sys/power/state");
 			if (ret)
 				break;
diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
index a5ce9534eb15..231b9a031f6a 100644
--- a/tools/testing/selftests/vm/Makefile
+++ b/tools/testing/selftests/vm/Makefile
@@ -1,7 +1,12 @@
 # Makefile for vm selftests
 
 CFLAGS = -Wall
-BINARIES = hugepage-mmap hugepage-shm map_hugetlb thuge-gen hugetlbfstest
+BINARIES = compaction_test
+BINARIES += hugepage-mmap
+BINARIES += hugepage-shm
+BINARIES += hugetlbfstest
+BINARIES += map_hugetlb
+BINARIES += thuge-gen
 BINARIES += transhuge-stress
 
 all: $(BINARIES)
diff --git a/tools/testing/selftests/vm/compaction_test.c b/tools/testing/selftests/vm/compaction_test.c
new file mode 100644
index 000000000000..932ff577ffc0
--- /dev/null
+++ b/tools/testing/selftests/vm/compaction_test.c
@@ -0,0 +1,225 @@
+/*
+ *
+ * A test for the patch "Allow compaction of unevictable pages".
+ * With this patch we should be able to allocate at least 1/4
+ * of RAM in huge pages. Without the patch much less is
+ * allocated.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <sys/resource.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <unistd.h>
+#include <string.h>
+
+#define MAP_SIZE 1048576
+
+struct map_list {
+	void *map;
+	struct map_list *next;
+};
+
+int read_memory_info(unsigned long *memfree, unsigned long *hugepagesize)
+{
+	char  buffer[256] = {0};
+	char *cmd = "cat /proc/meminfo | grep -i memfree | grep -o '[0-9]*'";
+	FILE *cmdfile = popen(cmd, "r");
+
+	if (!(fgets(buffer, sizeof(buffer), cmdfile))) {
+		perror("Failed to read meminfo\n");
+		return -1;
+	}
+
+	pclose(cmdfile);
+
+	*memfree = atoll(buffer);
+	cmd = "cat /proc/meminfo | grep -i hugepagesize | grep -o '[0-9]*'";
+	cmdfile = popen(cmd, "r");
+
+	if (!(fgets(buffer, sizeof(buffer), cmdfile))) {
+		perror("Failed to read meminfo\n");
+		return -1;
+	}
+
+	pclose(cmdfile);
+	*hugepagesize = atoll(buffer);
+
+	return 0;
+}
+
+int prereq(void)
+{
+	char allowed;
+	int fd;
+
+	fd = open("/proc/sys/vm/compact_unevictable_allowed",
+		  O_RDONLY | O_NONBLOCK);
+	if (fd < 0) {
+		perror("Failed to open\n"
+		       "/proc/sys/vm/compact_unevictable_allowed\n");
+		return -1;
+	}
+
+	if (read(fd, &allowed, sizeof(char)) != sizeof(char)) {
+		perror("Failed to read from\n"
+		       "/proc/sys/vm/compact_unevictable_allowed\n");
+		close(fd);
+		return -1;
+	}
+
+	close(fd);
+	if (allowed == '1')
+		return 0;
+
+	return -1;
+}
+
+int check_compaction(unsigned long mem_free, unsigned int hugepage_size)
+{
+	int fd;
+	int compaction_index = 0;
+	char initial_nr_hugepages[10] = {0};
+	char nr_hugepages[10] = {0};
+
+	/* We want to test with 80% of available memory. Else, OOM killer comes
+	   in to play */
+	mem_free = mem_free * 0.8;
+
+	fd = open("/proc/sys/vm/nr_hugepages", O_RDWR | O_NONBLOCK);
+	if (fd < 0) {
+		perror("Failed to open /proc/sys/vm/nr_hugepages");
+		return -1;
+	}
+
+	if (read(fd, initial_nr_hugepages, sizeof(initial_nr_hugepages)) <= 0) {
+		perror("Failed to read from /proc/sys/vm/nr_hugepages");
+		goto close_fd;
+	}
+
+	/* Start with the initial condition of 0 huge pages*/
+	if (write(fd, "0", sizeof(char)) != sizeof(char)) {
+		perror("Failed to write to /proc/sys/vm/nr_hugepages\n");
+		goto close_fd;
+	}
+
+	lseek(fd, 0, SEEK_SET);
+
+	/* Request a large number of huge pages. The Kernel will allocate
+	   as much as it can */
+	if (write(fd, "100000", (6*sizeof(char))) != (6*sizeof(char))) {
+		perror("Failed to write to /proc/sys/vm/nr_hugepages\n");
+		goto close_fd;
+	}
+
+	lseek(fd, 0, SEEK_SET);
+
+	if (read(fd, nr_hugepages, sizeof(nr_hugepages)) <= 0) {
+		perror("Failed to read from /proc/sys/vm/nr_hugepages\n");
+		goto close_fd;
+	}
+
+	/* We should have been able to request at least 1/3 rd of the memory in
+	   huge pages */
+	compaction_index = mem_free/(atoi(nr_hugepages) * hugepage_size);
+
+	if (compaction_index > 3) {
+		printf("No of huge pages allocated = %d\n",
+		       (atoi(nr_hugepages)));
+		fprintf(stderr, "ERROR: Less that 1/%d of memory is available\n"
+			"as huge pages\n", compaction_index);
+		goto close_fd;
+	}
+
+	printf("No of huge pages allocated = %d\n",
+	       (atoi(nr_hugepages)));
+
+	if (write(fd, initial_nr_hugepages, sizeof(initial_nr_hugepages))
+	    != strlen(initial_nr_hugepages)) {
+		perror("Failed to write to /proc/sys/vm/nr_hugepages\n");
+		goto close_fd;
+	}
+
+	close(fd);
+	return 0;
+
+ close_fd:
+	close(fd);
+	printf("Not OK. Compaction test failed.");
+	return -1;
+}
+
+
+int main(int argc, char **argv)
+{
+	struct rlimit lim;
+	struct map_list *list, *entry;
+	size_t page_size, i;
+	void *map = NULL;
+	unsigned long mem_free = 0;
+	unsigned long hugepage_size = 0;
+	unsigned long mem_fragmentable = 0;
+
+	if (prereq() != 0) {
+		printf("Either the sysctl compact_unevictable_allowed is not\n"
+		       "set to 1 or couldn't read the proc file.\n"
+		       "Skipping the test\n");
+		return 0;
+	}
+
+	lim.rlim_cur = RLIM_INFINITY;
+	lim.rlim_max = RLIM_INFINITY;
+	if (setrlimit(RLIMIT_MEMLOCK, &lim)) {
+		perror("Failed to set rlimit:\n");
+		return -1;
+	}
+
+	page_size = getpagesize();
+
+	list = NULL;
+
+	if (read_memory_info(&mem_free, &hugepage_size) != 0) {
+		printf("ERROR: Cannot read meminfo\n");
+		return -1;
+	}
+
+	mem_fragmentable = mem_free * 0.8 / 1024;
+
+	while (mem_fragmentable > 0) {
+		map = mmap(NULL, MAP_SIZE, PROT_READ | PROT_WRITE,
+			   MAP_ANONYMOUS | MAP_PRIVATE | MAP_LOCKED, -1, 0);
+		if (map == MAP_FAILED)
+			break;
+
+		entry = malloc(sizeof(struct map_list));
+		if (!entry) {
+			munmap(map, MAP_SIZE);
+			break;
+		}
+		entry->map = map;
+		entry->next = list;
+		list = entry;
+
+		/* Write something (in this case the address of the map) to
+		 * ensure that KSM can't merge the mapped pages
+		 */
+		for (i = 0; i < MAP_SIZE; i += page_size)
+			*(unsigned long *)(map + i) = (unsigned long)map + i;
+
+		mem_fragmentable--;
+	}
+
+	for (entry = list; entry != NULL; entry = entry->next) {
+		munmap(entry->map, MAP_SIZE);
+		if (!entry->next)
+			break;
+		entry = entry->next;
+	}
+
+	if (check_compaction(mem_free, hugepage_size) == 0)
+		return 0;
+
+	return -1;
+}
diff --git a/tools/testing/selftests/vm/run_vmtests b/tools/testing/selftests/vm/run_vmtests
index c87b6812300d..49ece11ff7fd 100755
--- a/tools/testing/selftests/vm/run_vmtests
+++ b/tools/testing/selftests/vm/run_vmtests
@@ -90,4 +90,16 @@ fi
 umount $mnt
 rm -rf $mnt
 echo $nr_hugepgs > /proc/sys/vm/nr_hugepages
+
+echo "-----------------------"
+echo "running compaction_test"
+echo "-----------------------"
+./compaction_test
+if [ $? -ne 0 ]; then
+	echo "[FAIL]"
+	exitcode=1
+else
+	echo "[PASS]"
+fi
+
 exit $exitcode
diff --git a/tools/testing/selftests/x86/trivial_64bit_program.c b/tools/testing/selftests/x86/trivial_64bit_program.c
index b994946c40fb..05c6a41b3671 100644
--- a/tools/testing/selftests/x86/trivial_64bit_program.c
+++ b/tools/testing/selftests/x86/trivial_64bit_program.c
@@ -1,5 +1,5 @@
 /*
- * Trivial program to check that we have a valid 32-bit build environment.
+ * Trivial program to check that we have a valid 64-bit build environment.
  * Copyright (c) 2015 Andy Lutomirski
  * GPL v2
  */