From 472b46c352c9ff0b6fa57dbf85d77c51901a3368 Mon Sep 17 00:00:00 2001
From: Mikko Rapeli <mikko.rapeli@iki.fi>
Date: Sun, 6 Aug 2017 18:44:27 +0200
Subject: uapi linux/kfd_ioctl.h: only use __u32 and __u64
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Include <drm/drm.h> instead of <linux/types.h> which on Linux includes
<linux/types.h> and on non-Linux platforms defines __u32 etc types.

Fixes user space compilation errors like:

linux/kfd_ioctl.h:33:2: error: unknown type name ‘uint32_t’
  uint32_t major_version; /* from KFD */
  ^~~~~~~~

Signed-off-by: Mikko Rapeli <mikko.rapeli@iki.fi>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Oded Gabbay <oded.gabbay@gmail.com>
---
 include/uapi/linux/kfd_ioctl.h | 172 ++++++++++++++++++++---------------------
 1 file changed, 86 insertions(+), 86 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h
index 7b4567bacfc2..26283fefdf5f 100644
--- a/include/uapi/linux/kfd_ioctl.h
+++ b/include/uapi/linux/kfd_ioctl.h
@@ -23,15 +23,15 @@
 #ifndef KFD_IOCTL_H_INCLUDED
 #define KFD_IOCTL_H_INCLUDED
 
-#include <linux/types.h>
+#include <drm/drm.h>
 #include <linux/ioctl.h>
 
 #define KFD_IOCTL_MAJOR_VERSION 1
 #define KFD_IOCTL_MINOR_VERSION 1
 
 struct kfd_ioctl_get_version_args {
-	uint32_t major_version;	/* from KFD */
-	uint32_t minor_version;	/* from KFD */
+	__u32 major_version;	/* from KFD */
+	__u32 minor_version;	/* from KFD */
 };
 
 /* For kfd_ioctl_create_queue_args.queue_type. */
@@ -43,36 +43,36 @@ struct kfd_ioctl_get_version_args {
 #define KFD_MAX_QUEUE_PRIORITY		15
 
 struct kfd_ioctl_create_queue_args {
-	uint64_t ring_base_address;	/* to KFD */
-	uint64_t write_pointer_address;	/* from KFD */
-	uint64_t read_pointer_address;	/* from KFD */
-	uint64_t doorbell_offset;	/* from KFD */
-
-	uint32_t ring_size;		/* to KFD */
-	uint32_t gpu_id;		/* to KFD */
-	uint32_t queue_type;		/* to KFD */
-	uint32_t queue_percentage;	/* to KFD */
-	uint32_t queue_priority;	/* to KFD */
-	uint32_t queue_id;		/* from KFD */
-
-	uint64_t eop_buffer_address;	/* to KFD */
-	uint64_t eop_buffer_size;	/* to KFD */
-	uint64_t ctx_save_restore_address; /* to KFD */
-	uint64_t ctx_save_restore_size;	/* to KFD */
+	__u64 ring_base_address;	/* to KFD */
+	__u64 write_pointer_address;	/* from KFD */
+	__u64 read_pointer_address;	/* from KFD */
+	__u64 doorbell_offset;	/* from KFD */
+
+	__u32 ring_size;		/* to KFD */
+	__u32 gpu_id;		/* to KFD */
+	__u32 queue_type;		/* to KFD */
+	__u32 queue_percentage;	/* to KFD */
+	__u32 queue_priority;	/* to KFD */
+	__u32 queue_id;		/* from KFD */
+
+	__u64 eop_buffer_address;	/* to KFD */
+	__u64 eop_buffer_size;	/* to KFD */
+	__u64 ctx_save_restore_address; /* to KFD */
+	__u64 ctx_save_restore_size;	/* to KFD */
 };
 
 struct kfd_ioctl_destroy_queue_args {
-	uint32_t queue_id;		/* to KFD */
-	uint32_t pad;
+	__u32 queue_id;		/* to KFD */
+	__u32 pad;
 };
 
 struct kfd_ioctl_update_queue_args {
-	uint64_t ring_base_address;	/* to KFD */
+	__u64 ring_base_address;	/* to KFD */
 
-	uint32_t queue_id;		/* to KFD */
-	uint32_t ring_size;		/* to KFD */
-	uint32_t queue_percentage;	/* to KFD */
-	uint32_t queue_priority;	/* to KFD */
+	__u32 queue_id;		/* to KFD */
+	__u32 ring_size;		/* to KFD */
+	__u32 queue_percentage;	/* to KFD */
+	__u32 queue_priority;	/* to KFD */
 };
 
 /* For kfd_ioctl_set_memory_policy_args.default_policy and alternate_policy */
@@ -80,13 +80,13 @@ struct kfd_ioctl_update_queue_args {
 #define KFD_IOC_CACHE_POLICY_NONCOHERENT 1
 
 struct kfd_ioctl_set_memory_policy_args {
-	uint64_t alternate_aperture_base;	/* to KFD */
-	uint64_t alternate_aperture_size;	/* to KFD */
+	__u64 alternate_aperture_base;	/* to KFD */
+	__u64 alternate_aperture_size;	/* to KFD */
 
-	uint32_t gpu_id;			/* to KFD */
-	uint32_t default_policy;		/* to KFD */
-	uint32_t alternate_policy;		/* to KFD */
-	uint32_t pad;
+	__u32 gpu_id;			/* to KFD */
+	__u32 default_policy;		/* to KFD */
+	__u32 alternate_policy;		/* to KFD */
+	__u32 pad;
 };
 
 /*
@@ -97,26 +97,26 @@ struct kfd_ioctl_set_memory_policy_args {
  */
 
 struct kfd_ioctl_get_clock_counters_args {
-	uint64_t gpu_clock_counter;	/* from KFD */
-	uint64_t cpu_clock_counter;	/* from KFD */
-	uint64_t system_clock_counter;	/* from KFD */
-	uint64_t system_clock_freq;	/* from KFD */
+	__u64 gpu_clock_counter;	/* from KFD */
+	__u64 cpu_clock_counter;	/* from KFD */
+	__u64 system_clock_counter;	/* from KFD */
+	__u64 system_clock_freq;	/* from KFD */
 
-	uint32_t gpu_id;		/* to KFD */
-	uint32_t pad;
+	__u32 gpu_id;		/* to KFD */
+	__u32 pad;
 };
 
 #define NUM_OF_SUPPORTED_GPUS 7
 
 struct kfd_process_device_apertures {
-	uint64_t lds_base;		/* from KFD */
-	uint64_t lds_limit;		/* from KFD */
-	uint64_t scratch_base;		/* from KFD */
-	uint64_t scratch_limit;		/* from KFD */
-	uint64_t gpuvm_base;		/* from KFD */
-	uint64_t gpuvm_limit;		/* from KFD */
-	uint32_t gpu_id;		/* from KFD */
-	uint32_t pad;
+	__u64 lds_base;		/* from KFD */
+	__u64 lds_limit;		/* from KFD */
+	__u64 scratch_base;		/* from KFD */
+	__u64 scratch_limit;		/* from KFD */
+	__u64 gpuvm_base;		/* from KFD */
+	__u64 gpuvm_limit;		/* from KFD */
+	__u32 gpu_id;		/* from KFD */
+	__u32 pad;
 };
 
 struct kfd_ioctl_get_process_apertures_args {
@@ -124,8 +124,8 @@ struct kfd_ioctl_get_process_apertures_args {
 			process_apertures[NUM_OF_SUPPORTED_GPUS];/* from KFD */
 
 	/* from KFD, should be in the range [1 - NUM_OF_SUPPORTED_GPUS] */
-	uint32_t num_of_nodes;
-	uint32_t pad;
+	__u32 num_of_nodes;
+	__u32 pad;
 };
 
 #define MAX_ALLOWED_NUM_POINTS    100
@@ -133,25 +133,25 @@ struct kfd_ioctl_get_process_apertures_args {
 #define MAX_ALLOWED_WAC_BUFF_SIZE  128
 
 struct kfd_ioctl_dbg_register_args {
-	uint32_t gpu_id;		/* to KFD */
-	uint32_t pad;
+	__u32 gpu_id;		/* to KFD */
+	__u32 pad;
 };
 
 struct kfd_ioctl_dbg_unregister_args {
-	uint32_t gpu_id;		/* to KFD */
-	uint32_t pad;
+	__u32 gpu_id;		/* to KFD */
+	__u32 pad;
 };
 
 struct kfd_ioctl_dbg_address_watch_args {
-	uint64_t content_ptr;		/* a pointer to the actual content */
-	uint32_t gpu_id;		/* to KFD */
-	uint32_t buf_size_in_bytes;	/*including gpu_id and buf_size */
+	__u64 content_ptr;		/* a pointer to the actual content */
+	__u32 gpu_id;		/* to KFD */
+	__u32 buf_size_in_bytes;	/*including gpu_id and buf_size */
 };
 
 struct kfd_ioctl_dbg_wave_control_args {
-	uint64_t content_ptr;		/* a pointer to the actual content */
-	uint32_t gpu_id;		/* to KFD */
-	uint32_t buf_size_in_bytes;	/*including gpu_id and buf_size */
+	__u64 content_ptr;		/* a pointer to the actual content */
+	__u32 gpu_id;		/* to KFD */
+	__u32 buf_size_in_bytes;	/*including gpu_id and buf_size */
 };
 
 /* Matching HSA_EVENTTYPE */
@@ -172,44 +172,44 @@ struct kfd_ioctl_dbg_wave_control_args {
 #define KFD_SIGNAL_EVENT_LIMIT			256
 
 struct kfd_ioctl_create_event_args {
-	uint64_t event_page_offset;	/* from KFD */
-	uint32_t event_trigger_data;	/* from KFD - signal events only */
-	uint32_t event_type;		/* to KFD */
-	uint32_t auto_reset;		/* to KFD */
-	uint32_t node_id;		/* to KFD - only valid for certain
+	__u64 event_page_offset;	/* from KFD */
+	__u32 event_trigger_data;	/* from KFD - signal events only */
+	__u32 event_type;		/* to KFD */
+	__u32 auto_reset;		/* to KFD */
+	__u32 node_id;		/* to KFD - only valid for certain
 							event types */
-	uint32_t event_id;		/* from KFD */
-	uint32_t event_slot_index;	/* from KFD */
+	__u32 event_id;		/* from KFD */
+	__u32 event_slot_index;	/* from KFD */
 };
 
 struct kfd_ioctl_destroy_event_args {
-	uint32_t event_id;		/* to KFD */
-	uint32_t pad;
+	__u32 event_id;		/* to KFD */
+	__u32 pad;
 };
 
 struct kfd_ioctl_set_event_args {
-	uint32_t event_id;		/* to KFD */
-	uint32_t pad;
+	__u32 event_id;		/* to KFD */
+	__u32 pad;
 };
 
 struct kfd_ioctl_reset_event_args {
-	uint32_t event_id;		/* to KFD */
-	uint32_t pad;
+	__u32 event_id;		/* to KFD */
+	__u32 pad;
 };
 
 struct kfd_memory_exception_failure {
-	uint32_t NotPresent;	/* Page not present or supervisor privilege */
-	uint32_t ReadOnly;	/* Write access to a read-only page */
-	uint32_t NoExecute;	/* Execute access to a page marked NX */
-	uint32_t pad;
+	__u32 NotPresent;	/* Page not present or supervisor privilege */
+	__u32 ReadOnly;	/* Write access to a read-only page */
+	__u32 NoExecute;	/* Execute access to a page marked NX */
+	__u32 pad;
 };
 
 /* memory exception data*/
 struct kfd_hsa_memory_exception_data {
 	struct kfd_memory_exception_failure failure;
-	uint64_t va;
-	uint32_t gpu_id;
-	uint32_t pad;
+	__u64 va;
+	__u32 gpu_id;
+	__u32 pad;
 };
 
 /* Event data*/
@@ -217,19 +217,19 @@ struct kfd_event_data {
 	union {
 		struct kfd_hsa_memory_exception_data memory_exception_data;
 	};				/* From KFD */
-	uint64_t kfd_event_data_ext;	/* pointer to an extension structure
+	__u64 kfd_event_data_ext;	/* pointer to an extension structure
 					   for future exception types */
-	uint32_t event_id;		/* to KFD */
-	uint32_t pad;
+	__u32 event_id;		/* to KFD */
+	__u32 pad;
 };
 
 struct kfd_ioctl_wait_events_args {
-	uint64_t events_ptr;		/* pointed to struct
+	__u64 events_ptr;		/* pointed to struct
 					   kfd_event_data array, to KFD */
-	uint32_t num_events;		/* to KFD */
-	uint32_t wait_for_all;		/* to KFD */
-	uint32_t timeout;		/* to KFD */
-	uint32_t wait_result;		/* from KFD */
+	__u32 num_events;		/* to KFD */
+	__u32 wait_for_all;		/* to KFD */
+	__u32 timeout;		/* to KFD */
+	__u32 wait_result;		/* from KFD */
 };
 
 struct kfd_ioctl_set_scratch_backing_va_args {
-- 
cgit v1.2.3


From d612b1fd8010d0d67b5287fe146b8b55bcbb8655 Mon Sep 17 00:00:00 2001
From: Tyler Hicks <tyhicks@canonical.com>
Date: Fri, 11 Aug 2017 04:33:53 +0000
Subject: seccomp: Operation for checking if an action is available

Userspace code that needs to check if the kernel supports a given action
may not be able to use the /proc/sys/kernel/seccomp/actions_avail
sysctl. The process may be running in a sandbox and, therefore,
sufficient filesystem access may not be available. This patch adds an
operation to the seccomp(2) syscall that allows userspace code to ask
the kernel if a given action is available.

If the action is supported by the kernel, 0 is returned. If the action
is not supported by the kernel, -1 is returned with errno set to
-EOPNOTSUPP. If this check is attempted on a kernel that doesn't support
this new operation, -1 is returned with errno set to -EINVAL meaning
that userspace code will have the ability to differentiate between the
two error cases.

Signed-off-by: Tyler Hicks <tyhicks@canonical.com>
Suggested-by: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 include/uapi/linux/seccomp.h                  |  5 ++--
 kernel/seccomp.c                              | 26 +++++++++++++++++++
 tools/testing/selftests/seccomp/seccomp_bpf.c | 36 +++++++++++++++++++++++++++
 3 files changed, 65 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
index 0f238a43ff1e..aaad61cc46bc 100644
--- a/include/uapi/linux/seccomp.h
+++ b/include/uapi/linux/seccomp.h
@@ -11,8 +11,9 @@
 #define SECCOMP_MODE_FILTER	2 /* uses user-supplied filter. */
 
 /* Valid operations for seccomp syscall. */
-#define SECCOMP_SET_MODE_STRICT	0
-#define SECCOMP_SET_MODE_FILTER	1
+#define SECCOMP_SET_MODE_STRICT		0
+#define SECCOMP_SET_MODE_FILTER		1
+#define SECCOMP_GET_ACTION_AVAIL	2
 
 /* Valid flags for SECCOMP_SET_MODE_FILTER */
 #define SECCOMP_FILTER_FLAG_TSYNC	1
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 5f19f41e4e50..7a6089f66fed 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -808,6 +808,27 @@ static inline long seccomp_set_mode_filter(unsigned int flags,
 }
 #endif
 
+static long seccomp_get_action_avail(const char __user *uaction)
+{
+	u32 action;
+
+	if (copy_from_user(&action, uaction, sizeof(action)))
+		return -EFAULT;
+
+	switch (action) {
+	case SECCOMP_RET_KILL:
+	case SECCOMP_RET_TRAP:
+	case SECCOMP_RET_ERRNO:
+	case SECCOMP_RET_TRACE:
+	case SECCOMP_RET_ALLOW:
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
 /* Common entry point for both prctl and syscall. */
 static long do_seccomp(unsigned int op, unsigned int flags,
 		       const char __user *uargs)
@@ -819,6 +840,11 @@ static long do_seccomp(unsigned int op, unsigned int flags,
 		return seccomp_set_mode_strict();
 	case SECCOMP_SET_MODE_FILTER:
 		return seccomp_set_mode_filter(flags, uargs);
+	case SECCOMP_GET_ACTION_AVAIL:
+		if (flags != 0)
+			return -EINVAL;
+
+		return seccomp_get_action_avail(uargs);
 	default:
 		return -EINVAL;
 	}
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 2fb49d99588d..1f2888f6678b 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -1731,6 +1731,10 @@ TEST_F_SIGNAL(TRACE_syscall, kill_after_ptrace, SIGSYS)
 #define SECCOMP_SET_MODE_FILTER 1
 #endif
 
+#ifndef SECCOMP_GET_ACTION_AVAIL
+#define SECCOMP_GET_ACTION_AVAIL 2
+#endif
+
 #ifndef SECCOMP_FILTER_FLAG_TSYNC
 #define SECCOMP_FILTER_FLAG_TSYNC 1
 #endif
@@ -2469,6 +2473,38 @@ TEST(syscall_restart)
 		_metadata->passed = 0;
 }
 
+TEST(get_action_avail)
+{
+	__u32 actions[] = { SECCOMP_RET_KILL,  SECCOMP_RET_TRAP,
+			    SECCOMP_RET_ERRNO, SECCOMP_RET_TRACE,
+			    SECCOMP_RET_ALLOW };
+	__u32 unknown_action = 0x10000000U;
+	int i;
+	long ret;
+
+	ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[0]);
+	ASSERT_NE(ENOSYS, errno) {
+		TH_LOG("Kernel does not support seccomp syscall!");
+	}
+	ASSERT_NE(EINVAL, errno) {
+		TH_LOG("Kernel does not support SECCOMP_GET_ACTION_AVAIL operation!");
+	}
+	EXPECT_EQ(ret, 0);
+
+	for (i = 0; i < ARRAY_SIZE(actions); i++) {
+		ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &actions[i]);
+		EXPECT_EQ(ret, 0) {
+			TH_LOG("Expected action (0x%X) not available!",
+			       actions[i]);
+		}
+	}
+
+	/* Check that an unknown action is handled properly (EOPNOTSUPP) */
+	ret = seccomp(SECCOMP_GET_ACTION_AVAIL, 0, &unknown_action);
+	EXPECT_EQ(ret, -1);
+	EXPECT_EQ(errno, EOPNOTSUPP);
+}
+
 /*
  * TODO:
  * - add microbenchmarks
-- 
cgit v1.2.3


From 0ddec0fc8900201c0897b87b762b7c420436662f Mon Sep 17 00:00:00 2001
From: Tyler Hicks <tyhicks@canonical.com>
Date: Fri, 11 Aug 2017 04:33:54 +0000
Subject: seccomp: Sysctl to configure actions that are allowed to be logged

Adminstrators can write to this sysctl to set the seccomp actions that
are allowed to be logged. Any actions not found in this sysctl will not
be logged.

For example, all SECCOMP_RET_KILL, SECCOMP_RET_TRAP, and
SECCOMP_RET_ERRNO actions would be loggable if "kill trap errno" were
written to the sysctl. SECCOMP_RET_TRACE actions would not be logged
since its string representation ("trace") wasn't present in the sysctl
value.

The path to the sysctl is:

 /proc/sys/kernel/seccomp/actions_logged

The actions_avail sysctl can be read to discover the valid action names
that can be written to the actions_logged sysctl with the exception of
"allow". SECCOMP_RET_ALLOW actions cannot be configured for logging.

The default setting for the sysctl is to allow all actions to be logged
except SECCOMP_RET_ALLOW. While only SECCOMP_RET_KILL actions are
currently logged, an upcoming patch will allow applications to request
additional actions to be logged.

There's one important exception to this sysctl. If a task is
specifically being audited, meaning that an audit context has been
allocated for the task, seccomp will log all actions other than
SECCOMP_RET_ALLOW despite the value of actions_logged. This exception
preserves the existing auditing behavior of tasks with an allocated
audit context.

With this patch, the logic for deciding if an action will be logged is:

if action == RET_ALLOW:
  do not log
else if action == RET_KILL && RET_KILL in actions_logged:
  log
else if audit_enabled && task-is-being-audited:
  log
else:
  do not log

Signed-off-by: Tyler Hicks <tyhicks@canonical.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 Documentation/userspace-api/seccomp_filter.rst |  18 +++
 include/linux/audit.h                          |   6 +-
 kernel/seccomp.c                               | 171 ++++++++++++++++++++++++-
 3 files changed, 187 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/Documentation/userspace-api/seccomp_filter.rst b/Documentation/userspace-api/seccomp_filter.rst
index 35fc7cbf1d95..2d1d8ab04ac5 100644
--- a/Documentation/userspace-api/seccomp_filter.rst
+++ b/Documentation/userspace-api/seccomp_filter.rst
@@ -187,6 +187,24 @@ directory. Here's a description of each file in that directory:
 	program was built, differs from the set of actions actually
 	supported in the current running kernel.
 
+``actions_logged``:
+	A read-write ordered list of seccomp return values (refer to the
+	``SECCOMP_RET_*`` macros above) that are allowed to be logged. Writes
+	to the file do not need to be in ordered form but reads from the file
+	will be ordered in the same way as the actions_avail sysctl.
+
+	It is important to note that the value of ``actions_logged`` does not
+	prevent certain actions from being logged when the audit subsystem is
+	configured to audit a task. If the action is not found in
+	``actions_logged`` list, the final decision on whether to audit the
+	action for that task is ultimately left up to the audit subsystem to
+	decide for all seccomp return values other than ``SECCOMP_RET_ALLOW``.
+
+	The ``allow`` string is not accepted in the ``actions_logged`` sysctl
+	as it is not possible to log ``SECCOMP_RET_ALLOW`` actions. Attempting
+	to write ``allow`` to the sysctl will result in an EINVAL being
+	returned.
+
 Adding architecture support
 ===========================
 
diff --git a/include/linux/audit.h b/include/linux/audit.h
index 2150bdccfbab..8c30f06d639d 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -314,11 +314,7 @@ void audit_core_dumps(long signr);
 
 static inline void audit_seccomp(unsigned long syscall, long signr, int code)
 {
-	if (!audit_enabled)
-		return;
-
-	/* Force a record to be reported if a signal was delivered. */
-	if (signr || unlikely(!audit_dummy_context()))
+	if (audit_enabled && unlikely(!audit_dummy_context()))
 		__audit_seccomp(syscall, signr, code);
 }
 
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 7a6089f66fed..54357e361aea 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -522,6 +522,45 @@ static void seccomp_send_sigsys(int syscall, int reason)
 }
 #endif	/* CONFIG_SECCOMP_FILTER */
 
+/* For use with seccomp_actions_logged */
+#define SECCOMP_LOG_KILL		(1 << 0)
+#define SECCOMP_LOG_TRAP		(1 << 2)
+#define SECCOMP_LOG_ERRNO		(1 << 3)
+#define SECCOMP_LOG_TRACE		(1 << 4)
+#define SECCOMP_LOG_ALLOW		(1 << 5)
+
+static u32 seccomp_actions_logged = SECCOMP_LOG_KILL  | SECCOMP_LOG_TRAP  |
+				    SECCOMP_LOG_ERRNO | SECCOMP_LOG_TRACE;
+
+static inline void seccomp_log(unsigned long syscall, long signr, u32 action)
+{
+	bool log = false;
+
+	switch (action) {
+	case SECCOMP_RET_ALLOW:
+	case SECCOMP_RET_TRAP:
+	case SECCOMP_RET_ERRNO:
+	case SECCOMP_RET_TRACE:
+		break;
+	case SECCOMP_RET_KILL:
+	default:
+		log = seccomp_actions_logged & SECCOMP_LOG_KILL;
+	}
+
+	/*
+	 * Force an audit message to be emitted when the action is RET_KILL and
+	 * the action is allowed to be logged by the admin.
+	 */
+	if (log)
+		return __audit_seccomp(syscall, signr, action);
+
+	/*
+	 * Let the audit subsystem decide if the action should be audited based
+	 * on whether the current task itself is being audited.
+	 */
+	return audit_seccomp(syscall, signr, action);
+}
+
 /*
  * Secure computing mode 1 allows only read/write/exit/sigreturn.
  * To be fully secure this must be combined with rlimit
@@ -547,7 +586,7 @@ static void __secure_computing_strict(int this_syscall)
 #ifdef SECCOMP_DEBUG
 	dump_stack();
 #endif
-	audit_seccomp(this_syscall, SIGKILL, SECCOMP_RET_KILL);
+	seccomp_log(this_syscall, SIGKILL, SECCOMP_RET_KILL);
 	do_exit(SIGKILL);
 }
 
@@ -656,7 +695,7 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
 
 	case SECCOMP_RET_KILL:
 	default:
-		audit_seccomp(this_syscall, SIGSYS, action);
+		seccomp_log(this_syscall, SIGSYS, action);
 		/* Dump core only if this is the last remaining thread. */
 		if (get_nr_threads(current) == 1) {
 			siginfo_t info;
@@ -673,7 +712,7 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
 	unreachable();
 
 skip:
-	audit_seccomp(this_syscall, 0, action);
+	seccomp_log(this_syscall, 0, action);
 	return -1;
 }
 #else
@@ -978,6 +1017,127 @@ static const char seccomp_actions_avail[] = SECCOMP_RET_KILL_NAME	" "
 					    SECCOMP_RET_TRACE_NAME	" "
 					    SECCOMP_RET_ALLOW_NAME;
 
+struct seccomp_log_name {
+	u32		log;
+	const char	*name;
+};
+
+static const struct seccomp_log_name seccomp_log_names[] = {
+	{ SECCOMP_LOG_KILL, SECCOMP_RET_KILL_NAME },
+	{ SECCOMP_LOG_TRAP, SECCOMP_RET_TRAP_NAME },
+	{ SECCOMP_LOG_ERRNO, SECCOMP_RET_ERRNO_NAME },
+	{ SECCOMP_LOG_TRACE, SECCOMP_RET_TRACE_NAME },
+	{ SECCOMP_LOG_ALLOW, SECCOMP_RET_ALLOW_NAME },
+	{ }
+};
+
+static bool seccomp_names_from_actions_logged(char *names, size_t size,
+					      u32 actions_logged)
+{
+	const struct seccomp_log_name *cur;
+	bool append_space = false;
+
+	for (cur = seccomp_log_names; cur->name && size; cur++) {
+		ssize_t ret;
+
+		if (!(actions_logged & cur->log))
+			continue;
+
+		if (append_space) {
+			ret = strscpy(names, " ", size);
+			if (ret < 0)
+				return false;
+
+			names += ret;
+			size -= ret;
+		} else
+			append_space = true;
+
+		ret = strscpy(names, cur->name, size);
+		if (ret < 0)
+			return false;
+
+		names += ret;
+		size -= ret;
+	}
+
+	return true;
+}
+
+static bool seccomp_action_logged_from_name(u32 *action_logged,
+					    const char *name)
+{
+	const struct seccomp_log_name *cur;
+
+	for (cur = seccomp_log_names; cur->name; cur++) {
+		if (!strcmp(cur->name, name)) {
+			*action_logged = cur->log;
+			return true;
+		}
+	}
+
+	return false;
+}
+
+static bool seccomp_actions_logged_from_names(u32 *actions_logged, char *names)
+{
+	char *name;
+
+	*actions_logged = 0;
+	while ((name = strsep(&names, " ")) && *name) {
+		u32 action_logged = 0;
+
+		if (!seccomp_action_logged_from_name(&action_logged, name))
+			return false;
+
+		*actions_logged |= action_logged;
+	}
+
+	return true;
+}
+
+static int seccomp_actions_logged_handler(struct ctl_table *ro_table, int write,
+					  void __user *buffer, size_t *lenp,
+					  loff_t *ppos)
+{
+	char names[sizeof(seccomp_actions_avail)];
+	struct ctl_table table;
+	int ret;
+
+	if (write && !capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	memset(names, 0, sizeof(names));
+
+	if (!write) {
+		if (!seccomp_names_from_actions_logged(names, sizeof(names),
+						       seccomp_actions_logged))
+			return -EINVAL;
+	}
+
+	table = *ro_table;
+	table.data = names;
+	table.maxlen = sizeof(names);
+	ret = proc_dostring(&table, write, buffer, lenp, ppos);
+	if (ret)
+		return ret;
+
+	if (write) {
+		u32 actions_logged;
+
+		if (!seccomp_actions_logged_from_names(&actions_logged,
+						       table.data))
+			return -EINVAL;
+
+		if (actions_logged & SECCOMP_LOG_ALLOW)
+			return -EINVAL;
+
+		seccomp_actions_logged = actions_logged;
+	}
+
+	return 0;
+}
+
 static struct ctl_path seccomp_sysctl_path[] = {
 	{ .procname = "kernel", },
 	{ .procname = "seccomp", },
@@ -992,6 +1152,11 @@ static struct ctl_table seccomp_sysctl_table[] = {
 		.mode		= 0444,
 		.proc_handler	= proc_dostring,
 	},
+	{
+		.procname	= "actions_logged",
+		.mode		= 0644,
+		.proc_handler	= seccomp_actions_logged_handler,
+	},
 	{ }
 };
 
-- 
cgit v1.2.3


From e66a39977985b1e69e17c4042cb290768eca9b02 Mon Sep 17 00:00:00 2001
From: Tyler Hicks <tyhicks@canonical.com>
Date: Fri, 11 Aug 2017 04:33:56 +0000
Subject: seccomp: Filter flag to log all actions except SECCOMP_RET_ALLOW

Add a new filter flag, SECCOMP_FILTER_FLAG_LOG, that enables logging for
all actions except for SECCOMP_RET_ALLOW for the given filter.

SECCOMP_RET_KILL actions are always logged, when "kill" is in the
actions_logged sysctl, and SECCOMP_RET_ALLOW actions are never logged,
regardless of this flag.

This flag can be used to create noisy filters that result in all
non-allowed actions to be logged. A process may have one noisy filter,
which is loaded with this flag, as well as a quiet filter that's not
loaded with this flag. This allows for the actions in a set of filters
to be selectively conveyed to the admin.

Since a system could have a large number of allocated seccomp_filter
structs, struct packing was taken in consideration. On 64 bit x86, the
new log member takes up one byte of an existing four byte hole in the
struct. On 32 bit x86, the new log member creates a new four byte hole
(unavoidable) and consumes one of those bytes.

Unfortunately, the tests added for SECCOMP_FILTER_FLAG_LOG are not
capable of inspecting the audit log to verify that the actions taken in
the filter were logged.

With this patch, the logic for deciding if an action will be logged is:

if action == RET_ALLOW:
  do not log
else if action == RET_KILL && RET_KILL in actions_logged:
  log
else if filter-requests-logging && action in actions_logged:
  log
else if audit_enabled && process-is-being-audited:
  log
else:
  do not log

Signed-off-by: Tyler Hicks <tyhicks@canonical.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 include/linux/seccomp.h                       |  3 +-
 include/uapi/linux/seccomp.h                  |  1 +
 kernel/seccomp.c                              | 26 +++++++---
 tools/testing/selftests/seccomp/seccomp_bpf.c | 69 ++++++++++++++++++++++++++-
 4 files changed, 91 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h
index ecc296c137cd..c8bef436b61d 100644
--- a/include/linux/seccomp.h
+++ b/include/linux/seccomp.h
@@ -3,7 +3,8 @@
 
 #include <uapi/linux/seccomp.h>
 
-#define SECCOMP_FILTER_FLAG_MASK	(SECCOMP_FILTER_FLAG_TSYNC)
+#define SECCOMP_FILTER_FLAG_MASK	(SECCOMP_FILTER_FLAG_TSYNC | \
+					 SECCOMP_FILTER_FLAG_LOG)
 
 #ifdef CONFIG_SECCOMP
 
diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
index aaad61cc46bc..19a611d0712e 100644
--- a/include/uapi/linux/seccomp.h
+++ b/include/uapi/linux/seccomp.h
@@ -17,6 +17,7 @@
 
 /* Valid flags for SECCOMP_SET_MODE_FILTER */
 #define SECCOMP_FILTER_FLAG_TSYNC	1
+#define SECCOMP_FILTER_FLAG_LOG		2
 
 /*
  * All BPF programs must return a 32-bit value.
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 54357e361aea..ed9fde418fc4 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -44,6 +44,7 @@
  *         get/put helpers should be used when accessing an instance
  *         outside of a lifetime-guarded section.  In general, this
  *         is only needed for handling filters shared across tasks.
+ * @log: true if all actions except for SECCOMP_RET_ALLOW should be logged
  * @prev: points to a previously installed, or inherited, filter
  * @prog: the BPF program to evaluate
  *
@@ -59,6 +60,7 @@
  */
 struct seccomp_filter {
 	refcount_t usage;
+	bool log;
 	struct seccomp_filter *prev;
 	struct bpf_prog *prog;
 };
@@ -452,6 +454,10 @@ static long seccomp_attach_filter(unsigned int flags,
 			return ret;
 	}
 
+	/* Set log flag, if present. */
+	if (flags & SECCOMP_FILTER_FLAG_LOG)
+		filter->log = true;
+
 	/*
 	 * If there is an existing filter, make it the prev and don't drop its
 	 * task reference.
@@ -532,15 +538,22 @@ static void seccomp_send_sigsys(int syscall, int reason)
 static u32 seccomp_actions_logged = SECCOMP_LOG_KILL  | SECCOMP_LOG_TRAP  |
 				    SECCOMP_LOG_ERRNO | SECCOMP_LOG_TRACE;
 
-static inline void seccomp_log(unsigned long syscall, long signr, u32 action)
+static inline void seccomp_log(unsigned long syscall, long signr, u32 action,
+			       bool requested)
 {
 	bool log = false;
 
 	switch (action) {
 	case SECCOMP_RET_ALLOW:
+		break;
 	case SECCOMP_RET_TRAP:
+		log = requested && seccomp_actions_logged & SECCOMP_LOG_TRAP;
+		break;
 	case SECCOMP_RET_ERRNO:
+		log = requested && seccomp_actions_logged & SECCOMP_LOG_ERRNO;
+		break;
 	case SECCOMP_RET_TRACE:
+		log = requested && seccomp_actions_logged & SECCOMP_LOG_TRACE;
 		break;
 	case SECCOMP_RET_KILL:
 	default:
@@ -548,8 +561,9 @@ static inline void seccomp_log(unsigned long syscall, long signr, u32 action)
 	}
 
 	/*
-	 * Force an audit message to be emitted when the action is RET_KILL and
-	 * the action is allowed to be logged by the admin.
+	 * Force an audit message to be emitted when the action is RET_KILL or
+	 * the FILTER_FLAG_LOG bit was set and the action is allowed to be
+	 * logged by the admin.
 	 */
 	if (log)
 		return __audit_seccomp(syscall, signr, action);
@@ -586,7 +600,7 @@ static void __secure_computing_strict(int this_syscall)
 #ifdef SECCOMP_DEBUG
 	dump_stack();
 #endif
-	seccomp_log(this_syscall, SIGKILL, SECCOMP_RET_KILL);
+	seccomp_log(this_syscall, SIGKILL, SECCOMP_RET_KILL, true);
 	do_exit(SIGKILL);
 }
 
@@ -695,7 +709,7 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
 
 	case SECCOMP_RET_KILL:
 	default:
-		seccomp_log(this_syscall, SIGSYS, action);
+		seccomp_log(this_syscall, SIGSYS, action, true);
 		/* Dump core only if this is the last remaining thread. */
 		if (get_nr_threads(current) == 1) {
 			siginfo_t info;
@@ -712,7 +726,7 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
 	unreachable();
 
 skip:
-	seccomp_log(this_syscall, 0, action);
+	seccomp_log(this_syscall, 0, action, match ? match->log : false);
 	return -1;
 }
 #else
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index abf708e09892..1c8c22ce7740 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -1739,6 +1739,10 @@ TEST_F_SIGNAL(TRACE_syscall, kill_after_ptrace, SIGSYS)
 #define SECCOMP_FILTER_FLAG_TSYNC 1
 #endif
 
+#ifndef SECCOMP_FILTER_FLAG_LOG
+#define SECCOMP_FILTER_FLAG_LOG 2
+#endif
+
 #ifndef seccomp
 int seccomp(unsigned int op, unsigned int flags, void *args)
 {
@@ -1844,7 +1848,8 @@ TEST(seccomp_syscall_mode_lock)
  */
 TEST(detect_seccomp_filter_flags)
 {
-	unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC };
+	unsigned int flags[] = { SECCOMP_FILTER_FLAG_TSYNC,
+				 SECCOMP_FILTER_FLAG_LOG };
 	unsigned int flag, all_flags;
 	int i;
 	long ret;
@@ -2533,6 +2538,67 @@ TEST(syscall_restart)
 		_metadata->passed = 0;
 }
 
+TEST_SIGNAL(filter_flag_log, SIGSYS)
+{
+	struct sock_filter allow_filter[] = {
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+	struct sock_filter kill_filter[] = {
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+			offsetof(struct seccomp_data, nr)),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+	};
+	struct sock_fprog allow_prog = {
+		.len = (unsigned short)ARRAY_SIZE(allow_filter),
+		.filter = allow_filter,
+	};
+	struct sock_fprog kill_prog = {
+		.len = (unsigned short)ARRAY_SIZE(kill_filter),
+		.filter = kill_filter,
+	};
+	long ret;
+	pid_t parent = getppid();
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	/* Verify that the FILTER_FLAG_LOG flag isn't accepted in strict mode */
+	ret = seccomp(SECCOMP_SET_MODE_STRICT, SECCOMP_FILTER_FLAG_LOG,
+		      &allow_prog);
+	ASSERT_NE(ENOSYS, errno) {
+		TH_LOG("Kernel does not support seccomp syscall!");
+	}
+	EXPECT_NE(0, ret) {
+		TH_LOG("Kernel accepted FILTER_FLAG_LOG flag in strict mode!");
+	}
+	EXPECT_EQ(EINVAL, errno) {
+		TH_LOG("Kernel returned unexpected errno for FILTER_FLAG_LOG flag in strict mode!");
+	}
+
+	/* Verify that a simple, permissive filter can be added with no flags */
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &allow_prog);
+	EXPECT_EQ(0, ret);
+
+	/* See if the same filter can be added with the FILTER_FLAG_LOG flag */
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG,
+		      &allow_prog);
+	ASSERT_NE(EINVAL, errno) {
+		TH_LOG("Kernel does not support the FILTER_FLAG_LOG flag!");
+	}
+	EXPECT_EQ(0, ret);
+
+	/* Ensure that the kill filter works with the FILTER_FLAG_LOG flag */
+	ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FILTER_FLAG_LOG,
+		      &kill_prog);
+	EXPECT_EQ(0, ret);
+
+	EXPECT_EQ(parent, syscall(__NR_getppid));
+	/* getpid() should never return. */
+	EXPECT_EQ(0, syscall(__NR_getpid));
+}
+
 TEST(get_action_avail)
 {
 	__u32 actions[] = { SECCOMP_RET_KILL,  SECCOMP_RET_TRAP,
@@ -2573,6 +2639,7 @@ TEST(get_action_avail)
  * - endianness checking when appropriate
  * - 64-bit arg prodding
  * - arch value testing (x86 modes especially)
+ * - verify that FILTER_FLAG_LOG filters generate log messages
  * - ...
  */
 
-- 
cgit v1.2.3


From 59f5cf44a38284eb9e76270c786fb6cc62ef8ac4 Mon Sep 17 00:00:00 2001
From: Tyler Hicks <tyhicks@canonical.com>
Date: Fri, 11 Aug 2017 04:33:57 +0000
Subject: seccomp: Action to log before allowing

Add a new action, SECCOMP_RET_LOG, that logs a syscall before allowing
the syscall. At the implementation level, this action is identical to
the existing SECCOMP_RET_ALLOW action. However, it can be very useful when
initially developing a seccomp filter for an application. The developer
can set the default action to be SECCOMP_RET_LOG, maybe mark any
obviously needed syscalls with SECCOMP_RET_ALLOW, and then put the
application through its paces. A list of syscalls that triggered the
default action (SECCOMP_RET_LOG) can be easily gleaned from the logs and
that list can be used to build the syscall whitelist. Finally, the
developer can change the default action to the desired value.

This provides a more friendly experience than seeing the application get
killed, then updating the filter and rebuilding the app, seeing the
application get killed due to a different syscall, then updating the
filter and rebuilding the app, etc.

The functionality is similar to what's supported by the various LSMs.
SELinux has permissive mode, AppArmor has complain mode, SMACK has
bring-up mode, etc.

SECCOMP_RET_LOG is given a lower value than SECCOMP_RET_ALLOW as allow
while logging is slightly more restrictive than quietly allowing.

Unfortunately, the tests added for SECCOMP_RET_LOG are not capable of
inspecting the audit log to verify that the syscall was logged.

With this patch, the logic for deciding if an action will be logged is:

if action == RET_ALLOW:
  do not log
else if action == RET_KILL && RET_KILL in actions_logged:
  log
else if action == RET_LOG && RET_LOG in actions_logged:
  log
else if filter-requests-logging && action in actions_logged:
  log
else if audit_enabled && process-is-being-audited:
  log
else:
  do not log

Signed-off-by: Tyler Hicks <tyhicks@canonical.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 Documentation/userspace-api/seccomp_filter.rst |  9 +++
 include/uapi/linux/seccomp.h                   |  1 +
 kernel/seccomp.c                               | 23 ++++--
 tools/testing/selftests/seccomp/seccomp_bpf.c  | 98 +++++++++++++++++++++++++-
 4 files changed, 125 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/Documentation/userspace-api/seccomp_filter.rst b/Documentation/userspace-api/seccomp_filter.rst
index 2d1d8ab04ac5..f4977357daf2 100644
--- a/Documentation/userspace-api/seccomp_filter.rst
+++ b/Documentation/userspace-api/seccomp_filter.rst
@@ -141,6 +141,15 @@ In precedence order, they are:
 	allow use of ptrace, even of other sandboxed processes, without
 	extreme care; ptracers can use this mechanism to escape.)
 
+``SECCOMP_RET_LOG``:
+	Results in the system call being executed after it is logged. This
+	should be used by application developers to learn which syscalls their
+	application needs without having to iterate through multiple test and
+	development cycles to build the list.
+
+	This action will only be logged if "log" is present in the
+	actions_logged sysctl string.
+
 ``SECCOMP_RET_ALLOW``:
 	Results in the system call being executed.
 
diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
index 19a611d0712e..f94433263e4b 100644
--- a/include/uapi/linux/seccomp.h
+++ b/include/uapi/linux/seccomp.h
@@ -31,6 +31,7 @@
 #define SECCOMP_RET_TRAP	0x00030000U /* disallow and force a SIGSYS */
 #define SECCOMP_RET_ERRNO	0x00050000U /* returns an errno */
 #define SECCOMP_RET_TRACE	0x7ff00000U /* pass to a tracer or disallow */
+#define SECCOMP_RET_LOG		0x7ffc0000U /* allow after logging */
 #define SECCOMP_RET_ALLOW	0x7fff0000U /* allow */
 
 /* Masks for the return value sections. */
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index ed9fde418fc4..59cde2ed3b92 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -533,10 +533,12 @@ static void seccomp_send_sigsys(int syscall, int reason)
 #define SECCOMP_LOG_TRAP		(1 << 2)
 #define SECCOMP_LOG_ERRNO		(1 << 3)
 #define SECCOMP_LOG_TRACE		(1 << 4)
-#define SECCOMP_LOG_ALLOW		(1 << 5)
+#define SECCOMP_LOG_LOG			(1 << 5)
+#define SECCOMP_LOG_ALLOW		(1 << 6)
 
 static u32 seccomp_actions_logged = SECCOMP_LOG_KILL  | SECCOMP_LOG_TRAP  |
-				    SECCOMP_LOG_ERRNO | SECCOMP_LOG_TRACE;
+				    SECCOMP_LOG_ERRNO | SECCOMP_LOG_TRACE |
+				    SECCOMP_LOG_LOG;
 
 static inline void seccomp_log(unsigned long syscall, long signr, u32 action,
 			       bool requested)
@@ -555,15 +557,18 @@ static inline void seccomp_log(unsigned long syscall, long signr, u32 action,
 	case SECCOMP_RET_TRACE:
 		log = requested && seccomp_actions_logged & SECCOMP_LOG_TRACE;
 		break;
+	case SECCOMP_RET_LOG:
+		log = seccomp_actions_logged & SECCOMP_LOG_LOG;
+		break;
 	case SECCOMP_RET_KILL:
 	default:
 		log = seccomp_actions_logged & SECCOMP_LOG_KILL;
 	}
 
 	/*
-	 * Force an audit message to be emitted when the action is RET_KILL or
-	 * the FILTER_FLAG_LOG bit was set and the action is allowed to be
-	 * logged by the admin.
+	 * Force an audit message to be emitted when the action is RET_KILL,
+	 * RET_LOG, or the FILTER_FLAG_LOG bit was set and the action is
+	 * allowed to be logged by the admin.
 	 */
 	if (log)
 		return __audit_seccomp(syscall, signr, action);
@@ -699,6 +704,10 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
 
 		return 0;
 
+	case SECCOMP_RET_LOG:
+		seccomp_log(this_syscall, 0, action, true);
+		return 0;
+
 	case SECCOMP_RET_ALLOW:
 		/*
 		 * Note that the "match" filter will always be NULL for
@@ -873,6 +882,7 @@ static long seccomp_get_action_avail(const char __user *uaction)
 	case SECCOMP_RET_TRAP:
 	case SECCOMP_RET_ERRNO:
 	case SECCOMP_RET_TRACE:
+	case SECCOMP_RET_LOG:
 	case SECCOMP_RET_ALLOW:
 		break;
 	default:
@@ -1023,12 +1033,14 @@ out:
 #define SECCOMP_RET_TRAP_NAME		"trap"
 #define SECCOMP_RET_ERRNO_NAME		"errno"
 #define SECCOMP_RET_TRACE_NAME		"trace"
+#define SECCOMP_RET_LOG_NAME		"log"
 #define SECCOMP_RET_ALLOW_NAME		"allow"
 
 static const char seccomp_actions_avail[] = SECCOMP_RET_KILL_NAME	" "
 					    SECCOMP_RET_TRAP_NAME	" "
 					    SECCOMP_RET_ERRNO_NAME	" "
 					    SECCOMP_RET_TRACE_NAME	" "
+					    SECCOMP_RET_LOG_NAME	" "
 					    SECCOMP_RET_ALLOW_NAME;
 
 struct seccomp_log_name {
@@ -1041,6 +1053,7 @@ static const struct seccomp_log_name seccomp_log_names[] = {
 	{ SECCOMP_LOG_TRAP, SECCOMP_RET_TRAP_NAME },
 	{ SECCOMP_LOG_ERRNO, SECCOMP_RET_ERRNO_NAME },
 	{ SECCOMP_LOG_TRACE, SECCOMP_RET_TRACE_NAME },
+	{ SECCOMP_LOG_LOG, SECCOMP_RET_LOG_NAME },
 	{ SECCOMP_LOG_ALLOW, SECCOMP_RET_ALLOW_NAME },
 	{ }
 };
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 1c8c22ce7740..7372958eccb5 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -74,7 +74,12 @@
 #define SECCOMP_RET_ERRNO       0x00050000U /* returns an errno */
 #define SECCOMP_RET_TRACE       0x7ff00000U /* pass to a tracer or disallow */
 #define SECCOMP_RET_ALLOW       0x7fff0000U /* allow */
+#endif
+#ifndef SECCOMP_RET_LOG
+#define SECCOMP_RET_LOG       0x7ffc0000U /* allow after logging */
+#endif
 
+#ifndef SECCOMP_RET_ACTION
 /* Masks for the return value sections. */
 #define SECCOMP_RET_ACTION      0x7fff0000U
 #define SECCOMP_RET_DATA        0x0000ffffU
@@ -342,6 +347,28 @@ TEST(empty_prog)
 	EXPECT_EQ(EINVAL, errno);
 }
 
+TEST(log_all)
+{
+	struct sock_filter filter[] = {
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG),
+	};
+	struct sock_fprog prog = {
+		.len = (unsigned short)ARRAY_SIZE(filter),
+		.filter = filter,
+	};
+	long ret;
+	pid_t parent = getppid();
+
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
+	ASSERT_EQ(0, ret);
+
+	/* getppid() should succeed and be logged (no check for logging) */
+	EXPECT_EQ(parent, syscall(__NR_getppid));
+}
+
 TEST_SIGNAL(unknown_ret_is_kill_inside, SIGSYS)
 {
 	struct sock_filter filter[] = {
@@ -756,6 +783,7 @@ TEST_F(TRAP, handler)
 
 FIXTURE_DATA(precedence) {
 	struct sock_fprog allow;
+	struct sock_fprog log;
 	struct sock_fprog trace;
 	struct sock_fprog error;
 	struct sock_fprog trap;
@@ -767,6 +795,13 @@ FIXTURE_SETUP(precedence)
 	struct sock_filter allow_insns[] = {
 		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
 	};
+	struct sock_filter log_insns[] = {
+		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
+			offsetof(struct seccomp_data, nr)),
+		BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+		BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_LOG),
+	};
 	struct sock_filter trace_insns[] = {
 		BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
 			offsetof(struct seccomp_data, nr)),
@@ -803,6 +838,7 @@ FIXTURE_SETUP(precedence)
 	memcpy(self->_x.filter, &_x##_insns, sizeof(_x##_insns)); \
 	self->_x.len = (unsigned short)ARRAY_SIZE(_x##_insns)
 	FILTER_ALLOC(allow);
+	FILTER_ALLOC(log);
 	FILTER_ALLOC(trace);
 	FILTER_ALLOC(error);
 	FILTER_ALLOC(trap);
@@ -813,6 +849,7 @@ FIXTURE_TEARDOWN(precedence)
 {
 #define FILTER_FREE(_x) if (self->_x.filter) free(self->_x.filter)
 	FILTER_FREE(allow);
+	FILTER_FREE(log);
 	FILTER_FREE(trace);
 	FILTER_FREE(error);
 	FILTER_FREE(trap);
@@ -830,6 +867,8 @@ TEST_F(precedence, allow_ok)
 
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
 	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
 	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
@@ -854,6 +893,8 @@ TEST_F_SIGNAL(precedence, kill_is_highest, SIGSYS)
 
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
 	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
 	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
@@ -885,6 +926,8 @@ TEST_F_SIGNAL(precedence, kill_is_highest_in_any_order, SIGSYS)
 	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
 	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
 	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
@@ -906,6 +949,8 @@ TEST_F_SIGNAL(precedence, trap_is_second, SIGSYS)
 
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
 	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
 	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
@@ -931,6 +976,8 @@ TEST_F_SIGNAL(precedence, trap_is_second_in_any_order, SIGSYS)
 	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap);
 	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
 	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
@@ -952,6 +999,8 @@ TEST_F(precedence, errno_is_third)
 
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
 	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
 	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
@@ -970,6 +1019,8 @@ TEST_F(precedence, errno_is_third_in_any_order)
 	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
 	ASSERT_EQ(0, ret);
 
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error);
 	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
@@ -992,6 +1043,8 @@ TEST_F(precedence, trace_is_fourth)
 
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
 	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace);
 	ASSERT_EQ(0, ret);
 	/* Should work just fine. */
@@ -1013,12 +1066,54 @@ TEST_F(precedence, trace_is_fourth_in_any_order)
 	ASSERT_EQ(0, ret);
 	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
 	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+	ASSERT_EQ(0, ret);
 	/* Should work just fine. */
 	EXPECT_EQ(parent, syscall(__NR_getppid));
 	/* No ptracer */
 	EXPECT_EQ(-1, syscall(__NR_getpid));
 }
 
+TEST_F(precedence, log_is_fifth)
+{
+	pid_t mypid, parent;
+	long ret;
+
+	mypid = getpid();
+	parent = getppid();
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
+	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+	ASSERT_EQ(0, ret);
+	/* Should work just fine. */
+	EXPECT_EQ(parent, syscall(__NR_getppid));
+	/* Should also work just fine */
+	EXPECT_EQ(mypid, syscall(__NR_getpid));
+}
+
+TEST_F(precedence, log_is_fifth_in_any_order)
+{
+	pid_t mypid, parent;
+	long ret;
+
+	mypid = getpid();
+	parent = getppid();
+	ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+	ASSERT_EQ(0, ret);
+
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->log);
+	ASSERT_EQ(0, ret);
+	ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow);
+	ASSERT_EQ(0, ret);
+	/* Should work just fine. */
+	EXPECT_EQ(parent, syscall(__NR_getppid));
+	/* Should also work just fine */
+	EXPECT_EQ(mypid, syscall(__NR_getpid));
+}
+
 #ifndef PTRACE_O_TRACESECCOMP
 #define PTRACE_O_TRACESECCOMP	0x00000080
 #endif
@@ -2603,7 +2698,7 @@ TEST(get_action_avail)
 {
 	__u32 actions[] = { SECCOMP_RET_KILL,  SECCOMP_RET_TRAP,
 			    SECCOMP_RET_ERRNO, SECCOMP_RET_TRACE,
-			    SECCOMP_RET_ALLOW };
+			    SECCOMP_RET_LOG,   SECCOMP_RET_ALLOW };
 	__u32 unknown_action = 0x10000000U;
 	int i;
 	long ret;
@@ -2640,6 +2735,7 @@ TEST(get_action_avail)
  * - 64-bit arg prodding
  * - arch value testing (x86 modes especially)
  * - verify that FILTER_FLAG_LOG filters generate log messages
+ * - verify that RET_LOG generates log messages
  * - ...
  */
 
-- 
cgit v1.2.3


From fd76875ca289a3d4722f266fd2d5532a27083903 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 11 Aug 2017 12:53:18 -0700
Subject: seccomp: Rename SECCOMP_RET_KILL to SECCOMP_RET_KILL_THREAD

In preparation for adding SECCOMP_RET_KILL_PROCESS, rename SECCOMP_RET_KILL
to the more accurate SECCOMP_RET_KILL_THREAD.

The existing selftest values are intentionally left as SECCOMP_RET_KILL
just to be sure we're exercising the alias.

Signed-off-by: Kees Cook <keescook@chromium.org>
---
 Documentation/networking/filter.txt            |  2 +-
 Documentation/userspace-api/seccomp_filter.rst |  4 +--
 include/uapi/linux/seccomp.h                   |  3 +-
 kernel/seccomp.c                               | 39 ++++++++++++++------------
 samples/seccomp/bpf-direct.c                   |  4 +--
 samples/seccomp/bpf-helper.h                   |  2 +-
 tools/testing/selftests/seccomp/seccomp_bpf.c  | 17 ++++++-----
 7 files changed, 39 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt
index b69b205501de..73aa0f12156d 100644
--- a/Documentation/networking/filter.txt
+++ b/Documentation/networking/filter.txt
@@ -337,7 +337,7 @@ Examples for low-level BPF:
   jeq #14, good           /* __NR_rt_sigprocmask */
   jeq #13, good           /* __NR_rt_sigaction */
   jeq #35, good           /* __NR_nanosleep */
-  bad: ret #0             /* SECCOMP_RET_KILL */
+  bad: ret #0             /* SECCOMP_RET_KILL_THREAD */
   good: ret #0x7fff0000   /* SECCOMP_RET_ALLOW */
 
 The above example code can be placed into a file (here called "foo"), and
diff --git a/Documentation/userspace-api/seccomp_filter.rst b/Documentation/userspace-api/seccomp_filter.rst
index f4977357daf2..d76396f2d8ed 100644
--- a/Documentation/userspace-api/seccomp_filter.rst
+++ b/Documentation/userspace-api/seccomp_filter.rst
@@ -87,11 +87,11 @@ Return values
 A seccomp filter may return any of the following values. If multiple
 filters exist, the return value for the evaluation of a given system
 call will always use the highest precedent value. (For example,
-``SECCOMP_RET_KILL`` will always take precedence.)
+``SECCOMP_RET_KILL_THREAD`` will always take precedence.)
 
 In precedence order, they are:
 
-``SECCOMP_RET_KILL``:
+``SECCOMP_RET_KILL_THREAD``:
 	Results in the task exiting immediately without executing the
 	system call.  The exit status of the task (``status & 0x7f``) will
 	be ``SIGSYS``, not ``SIGKILL``.
diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
index f94433263e4b..5a03f699eb17 100644
--- a/include/uapi/linux/seccomp.h
+++ b/include/uapi/linux/seccomp.h
@@ -27,7 +27,8 @@
  * The ordering ensures that a min_t() over composed return values always
  * selects the least permissive choice.
  */
-#define SECCOMP_RET_KILL	0x00000000U /* kill the task immediately */
+#define SECCOMP_RET_KILL_THREAD	0x00000000U /* kill the thread */
+#define SECCOMP_RET_KILL	SECCOMP_RET_KILL_THREAD
 #define SECCOMP_RET_TRAP	0x00030000U /* disallow and force a SIGSYS */
 #define SECCOMP_RET_ERRNO	0x00050000U /* returns an errno */
 #define SECCOMP_RET_TRACE	0x7ff00000U /* pass to a tracer or disallow */
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 59cde2ed3b92..95ac54cff00f 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -192,7 +192,7 @@ static u32 seccomp_run_filters(const struct seccomp_data *sd,
 
 	/* Ensure unexpected behavior doesn't result in failing open. */
 	if (unlikely(WARN_ON(f == NULL)))
-		return SECCOMP_RET_KILL;
+		return SECCOMP_RET_KILL_THREAD;
 
 	if (!sd) {
 		populate_seccomp_data(&sd_local);
@@ -529,15 +529,17 @@ static void seccomp_send_sigsys(int syscall, int reason)
 #endif	/* CONFIG_SECCOMP_FILTER */
 
 /* For use with seccomp_actions_logged */
-#define SECCOMP_LOG_KILL		(1 << 0)
+#define SECCOMP_LOG_KILL_THREAD		(1 << 0)
 #define SECCOMP_LOG_TRAP		(1 << 2)
 #define SECCOMP_LOG_ERRNO		(1 << 3)
 #define SECCOMP_LOG_TRACE		(1 << 4)
 #define SECCOMP_LOG_LOG			(1 << 5)
 #define SECCOMP_LOG_ALLOW		(1 << 6)
 
-static u32 seccomp_actions_logged = SECCOMP_LOG_KILL  | SECCOMP_LOG_TRAP  |
-				    SECCOMP_LOG_ERRNO | SECCOMP_LOG_TRACE |
+static u32 seccomp_actions_logged = SECCOMP_LOG_KILL_THREAD |
+				    SECCOMP_LOG_TRAP  |
+				    SECCOMP_LOG_ERRNO |
+				    SECCOMP_LOG_TRACE |
 				    SECCOMP_LOG_LOG;
 
 static inline void seccomp_log(unsigned long syscall, long signr, u32 action,
@@ -560,13 +562,13 @@ static inline void seccomp_log(unsigned long syscall, long signr, u32 action,
 	case SECCOMP_RET_LOG:
 		log = seccomp_actions_logged & SECCOMP_LOG_LOG;
 		break;
-	case SECCOMP_RET_KILL:
+	case SECCOMP_RET_KILL_THREAD:
 	default:
-		log = seccomp_actions_logged & SECCOMP_LOG_KILL;
+		log = seccomp_actions_logged & SECCOMP_LOG_KILL_THREAD;
 	}
 
 	/*
-	 * Force an audit message to be emitted when the action is RET_KILL,
+	 * Force an audit message to be emitted when the action is RET_KILL_*,
 	 * RET_LOG, or the FILTER_FLAG_LOG bit was set and the action is
 	 * allowed to be logged by the admin.
 	 */
@@ -605,7 +607,7 @@ static void __secure_computing_strict(int this_syscall)
 #ifdef SECCOMP_DEBUG
 	dump_stack();
 #endif
-	seccomp_log(this_syscall, SIGKILL, SECCOMP_RET_KILL, true);
+	seccomp_log(this_syscall, SIGKILL, SECCOMP_RET_KILL_THREAD, true);
 	do_exit(SIGKILL);
 }
 
@@ -716,7 +718,7 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
 		 */
 		return 0;
 
-	case SECCOMP_RET_KILL:
+	case SECCOMP_RET_KILL_THREAD:
 	default:
 		seccomp_log(this_syscall, SIGSYS, action, true);
 		/* Dump core only if this is the last remaining thread. */
@@ -878,7 +880,7 @@ static long seccomp_get_action_avail(const char __user *uaction)
 		return -EFAULT;
 
 	switch (action) {
-	case SECCOMP_RET_KILL:
+	case SECCOMP_RET_KILL_THREAD:
 	case SECCOMP_RET_TRAP:
 	case SECCOMP_RET_ERRNO:
 	case SECCOMP_RET_TRACE:
@@ -1029,19 +1031,20 @@ out:
 #ifdef CONFIG_SYSCTL
 
 /* Human readable action names for friendly sysctl interaction */
-#define SECCOMP_RET_KILL_NAME		"kill"
+#define SECCOMP_RET_KILL_THREAD_NAME	"kill_thread"
 #define SECCOMP_RET_TRAP_NAME		"trap"
 #define SECCOMP_RET_ERRNO_NAME		"errno"
 #define SECCOMP_RET_TRACE_NAME		"trace"
 #define SECCOMP_RET_LOG_NAME		"log"
 #define SECCOMP_RET_ALLOW_NAME		"allow"
 
-static const char seccomp_actions_avail[] = SECCOMP_RET_KILL_NAME	" "
-					    SECCOMP_RET_TRAP_NAME	" "
-					    SECCOMP_RET_ERRNO_NAME	" "
-					    SECCOMP_RET_TRACE_NAME	" "
-					    SECCOMP_RET_LOG_NAME	" "
-					    SECCOMP_RET_ALLOW_NAME;
+static const char seccomp_actions_avail[] =
+				SECCOMP_RET_KILL_THREAD_NAME	" "
+				SECCOMP_RET_TRAP_NAME		" "
+				SECCOMP_RET_ERRNO_NAME		" "
+				SECCOMP_RET_TRACE_NAME		" "
+				SECCOMP_RET_LOG_NAME		" "
+				SECCOMP_RET_ALLOW_NAME;
 
 struct seccomp_log_name {
 	u32		log;
@@ -1049,7 +1052,7 @@ struct seccomp_log_name {
 };
 
 static const struct seccomp_log_name seccomp_log_names[] = {
-	{ SECCOMP_LOG_KILL, SECCOMP_RET_KILL_NAME },
+	{ SECCOMP_LOG_KILL_THREAD, SECCOMP_RET_KILL_THREAD_NAME },
 	{ SECCOMP_LOG_TRAP, SECCOMP_RET_TRAP_NAME },
 	{ SECCOMP_LOG_ERRNO, SECCOMP_RET_ERRNO_NAME },
 	{ SECCOMP_LOG_TRACE, SECCOMP_RET_TRACE_NAME },
diff --git a/samples/seccomp/bpf-direct.c b/samples/seccomp/bpf-direct.c
index 151ec3f52189..235ce3c49ee9 100644
--- a/samples/seccomp/bpf-direct.c
+++ b/samples/seccomp/bpf-direct.c
@@ -129,7 +129,7 @@ static int install_filter(void)
 		/* Check that read is only using stdin. */
 		BPF_STMT(BPF_LD+BPF_W+BPF_ABS, syscall_arg(0)),
 		BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, STDIN_FILENO, 4, 0),
-		BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL),
+		BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL_THREAD),
 
 		/* Check that write is only using stdout */
 		BPF_STMT(BPF_LD+BPF_W+BPF_ABS, syscall_arg(0)),
@@ -139,7 +139,7 @@ static int install_filter(void)
 
 		BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW),
 		BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_TRAP),
-		BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL),
+		BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL_THREAD),
 	};
 	struct sock_fprog prog = {
 		.len = (unsigned short)(sizeof(filter)/sizeof(filter[0])),
diff --git a/samples/seccomp/bpf-helper.h b/samples/seccomp/bpf-helper.h
index 1d8de9edd858..83dbe79cbe2c 100644
--- a/samples/seccomp/bpf-helper.h
+++ b/samples/seccomp/bpf-helper.h
@@ -44,7 +44,7 @@ void seccomp_bpf_print(struct sock_filter *filter, size_t count);
 #define ALLOW \
 	BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW)
 #define DENY \
-	BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL)
+	BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_KILL_THREAD)
 #define JUMP(labels, label) \
 	BPF_JUMP(BPF_JMP+BPF_JA, FIND_LABEL((labels), (label)), \
 		 JUMP_JT, JUMP_JF)
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 7372958eccb5..a3ba39a32449 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -68,15 +68,18 @@
 #define SECCOMP_MODE_FILTER 2
 #endif
 
+#ifndef SECCOMP_RET_KILL_THREAD
+#define SECCOMP_RET_KILL_THREAD	 0x00000000U /* kill the thread */
+#endif
 #ifndef SECCOMP_RET_KILL
-#define SECCOMP_RET_KILL        0x00000000U /* kill the task immediately */
-#define SECCOMP_RET_TRAP        0x00030000U /* disallow and force a SIGSYS */
-#define SECCOMP_RET_ERRNO       0x00050000U /* returns an errno */
-#define SECCOMP_RET_TRACE       0x7ff00000U /* pass to a tracer or disallow */
-#define SECCOMP_RET_ALLOW       0x7fff0000U /* allow */
+#define SECCOMP_RET_KILL	 SECCOMP_RET_KILL_THREAD
+#define SECCOMP_RET_TRAP	 0x00030000U /* disallow and force a SIGSYS */
+#define SECCOMP_RET_ERRNO	 0x00050000U /* returns an errno */
+#define SECCOMP_RET_TRACE	 0x7ff00000U /* pass to a tracer or disallow */
+#define SECCOMP_RET_ALLOW	 0x7fff0000U /* allow */
 #endif
 #ifndef SECCOMP_RET_LOG
-#define SECCOMP_RET_LOG       0x7ffc0000U /* allow after logging */
+#define SECCOMP_RET_LOG		 0x7ffc0000U /* allow after logging */
 #endif
 
 #ifndef SECCOMP_RET_ACTION
@@ -2696,7 +2699,7 @@ TEST_SIGNAL(filter_flag_log, SIGSYS)
 
 TEST(get_action_avail)
 {
-	__u32 actions[] = { SECCOMP_RET_KILL,  SECCOMP_RET_TRAP,
+	__u32 actions[] = { SECCOMP_RET_KILL_THREAD, SECCOMP_RET_TRAP,
 			    SECCOMP_RET_ERRNO, SECCOMP_RET_TRACE,
 			    SECCOMP_RET_LOG,   SECCOMP_RET_ALLOW };
 	__u32 unknown_action = 0x10000000U;
-- 
cgit v1.2.3


From 4d3b0b05aae9ee9ce0970dc4cc0fb3fad5e85945 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 11 Aug 2017 13:01:39 -0700
Subject: seccomp: Introduce SECCOMP_RET_KILL_PROCESS

This introduces the BPF return value for SECCOMP_RET_KILL_PROCESS to kill
an entire process. This cannot yet be reached by seccomp, but it changes
the default-kill behavior (for unknown return values) from kill-thread to
kill-process.

Signed-off-by: Kees Cook <keescook@chromium.org>
---
 include/uapi/linux/seccomp.h | 18 ++++++++++--------
 kernel/seccomp.c             | 22 ++++++++++++++++------
 2 files changed, 26 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
index 5a03f699eb17..7e77c92df78a 100644
--- a/include/uapi/linux/seccomp.h
+++ b/include/uapi/linux/seccomp.h
@@ -22,18 +22,20 @@
 /*
  * All BPF programs must return a 32-bit value.
  * The bottom 16-bits are for optional return data.
- * The upper 16-bits are ordered from least permissive values to most.
+ * The upper 16-bits are ordered from least permissive values to most,
+ * as a signed value (so 0x8000000 is negative).
  *
  * The ordering ensures that a min_t() over composed return values always
  * selects the least permissive choice.
  */
-#define SECCOMP_RET_KILL_THREAD	0x00000000U /* kill the thread */
-#define SECCOMP_RET_KILL	SECCOMP_RET_KILL_THREAD
-#define SECCOMP_RET_TRAP	0x00030000U /* disallow and force a SIGSYS */
-#define SECCOMP_RET_ERRNO	0x00050000U /* returns an errno */
-#define SECCOMP_RET_TRACE	0x7ff00000U /* pass to a tracer or disallow */
-#define SECCOMP_RET_LOG		0x7ffc0000U /* allow after logging */
-#define SECCOMP_RET_ALLOW	0x7fff0000U /* allow */
+#define SECCOMP_RET_KILL_PROCESS 0x80000000U /* kill the process */
+#define SECCOMP_RET_KILL_THREAD	 0x00000000U /* kill the thread */
+#define SECCOMP_RET_KILL	 SECCOMP_RET_KILL_THREAD
+#define SECCOMP_RET_TRAP	 0x00030000U /* disallow and force a SIGSYS */
+#define SECCOMP_RET_ERRNO	 0x00050000U /* returns an errno */
+#define SECCOMP_RET_TRACE	 0x7ff00000U /* pass to a tracer or disallow */
+#define SECCOMP_RET_LOG		 0x7ffc0000U /* allow after logging */
+#define SECCOMP_RET_ALLOW	 0x7fff0000U /* allow */
 
 /* Masks for the return value sections. */
 #define SECCOMP_RET_ACTION	0x7fff0000U
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 95ac54cff00f..5c7299b9d953 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -192,7 +192,7 @@ static u32 seccomp_run_filters(const struct seccomp_data *sd,
 
 	/* Ensure unexpected behavior doesn't result in failing open. */
 	if (unlikely(WARN_ON(f == NULL)))
-		return SECCOMP_RET_KILL_THREAD;
+		return SECCOMP_RET_KILL_PROCESS;
 
 	if (!sd) {
 		populate_seccomp_data(&sd_local);
@@ -529,14 +529,16 @@ static void seccomp_send_sigsys(int syscall, int reason)
 #endif	/* CONFIG_SECCOMP_FILTER */
 
 /* For use with seccomp_actions_logged */
-#define SECCOMP_LOG_KILL_THREAD		(1 << 0)
+#define SECCOMP_LOG_KILL_PROCESS	(1 << 0)
+#define SECCOMP_LOG_KILL_THREAD		(1 << 1)
 #define SECCOMP_LOG_TRAP		(1 << 2)
 #define SECCOMP_LOG_ERRNO		(1 << 3)
 #define SECCOMP_LOG_TRACE		(1 << 4)
 #define SECCOMP_LOG_LOG			(1 << 5)
 #define SECCOMP_LOG_ALLOW		(1 << 6)
 
-static u32 seccomp_actions_logged = SECCOMP_LOG_KILL_THREAD |
+static u32 seccomp_actions_logged = SECCOMP_LOG_KILL_PROCESS |
+				    SECCOMP_LOG_KILL_THREAD  |
 				    SECCOMP_LOG_TRAP  |
 				    SECCOMP_LOG_ERRNO |
 				    SECCOMP_LOG_TRACE |
@@ -563,8 +565,11 @@ static inline void seccomp_log(unsigned long syscall, long signr, u32 action,
 		log = seccomp_actions_logged & SECCOMP_LOG_LOG;
 		break;
 	case SECCOMP_RET_KILL_THREAD:
-	default:
 		log = seccomp_actions_logged & SECCOMP_LOG_KILL_THREAD;
+		break;
+	case SECCOMP_RET_KILL_PROCESS:
+	default:
+		log = seccomp_actions_logged & SECCOMP_LOG_KILL_PROCESS;
 	}
 
 	/*
@@ -719,10 +724,12 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
 		return 0;
 
 	case SECCOMP_RET_KILL_THREAD:
+	case SECCOMP_RET_KILL_PROCESS:
 	default:
 		seccomp_log(this_syscall, SIGSYS, action, true);
 		/* Dump core only if this is the last remaining thread. */
-		if (get_nr_threads(current) == 1) {
+		if (action == SECCOMP_RET_KILL_PROCESS ||
+		    get_nr_threads(current) == 1) {
 			siginfo_t info;
 
 			/* Show the original registers in the dump. */
@@ -731,7 +738,10 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
 			seccomp_init_siginfo(&info, this_syscall, data);
 			do_coredump(&info);
 		}
-		do_exit(SIGSYS);
+		if (action == SECCOMP_RET_KILL_PROCESS)
+			do_group_exit(SIGSYS);
+		else
+			do_exit(SIGSYS);
 	}
 
 	unreachable();
-- 
cgit v1.2.3


From 0466bdb99e8744bc9befa8d62a317f0fd7fd7421 Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Fri, 11 Aug 2017 13:12:11 -0700
Subject: seccomp: Implement SECCOMP_RET_KILL_PROCESS action
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Right now, SECCOMP_RET_KILL_THREAD (neé SECCOMP_RET_KILL) kills the
current thread. There have been a few requests for this to kill the entire
process (the thread group). This cannot be just changed (discovered when
adding coredump support since coredumping kills the entire process)
because there are userspace programs depending on the thread-kill
behavior.

Instead, implement SECCOMP_RET_KILL_PROCESS, which is 0x80000000, and can
be processed as "-1" by the kernel, below the existing RET_KILL that is
ABI-set to "0". For userspace, SECCOMP_RET_ACTION_FULL is added to expand
the mask to the signed bit. Old userspace using the SECCOMP_RET_ACTION
mask will see SECCOMP_RET_KILL_PROCESS as 0 still, but this would only
be visible when examining the siginfo in a core dump from a RET_KILL_*,
where it will think it was thread-killed instead of process-killed.

Attempts to introduce this behavior via other ways (filter flags,
seccomp struct flags, masked RET_DATA bits) all come with weird
side-effects and baggage. This change preserves the central behavioral
expectations of the seccomp filter engine without putting too great
a burden on changes needed in userspace to use the new action.

The new action is discoverable by userspace through either the new
actions_avail sysctl or through the SECCOMP_GET_ACTION_AVAIL seccomp
operation. If used without checking for availability, old kernels
will treat RET_KILL_PROCESS as RET_KILL_THREAD (since the old mask
will produce RET_KILL_THREAD).

Cc: Paul Moore <paul@paul-moore.com>
Cc: Fabricio Voznika <fvoznika@google.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 Documentation/userspace-api/seccomp_filter.rst | 7 ++++++-
 include/uapi/linux/seccomp.h                   | 1 +
 kernel/seccomp.c                               | 9 +++++++--
 3 files changed, 14 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/Documentation/userspace-api/seccomp_filter.rst b/Documentation/userspace-api/seccomp_filter.rst
index d76396f2d8ed..099c412951d6 100644
--- a/Documentation/userspace-api/seccomp_filter.rst
+++ b/Documentation/userspace-api/seccomp_filter.rst
@@ -87,10 +87,15 @@ Return values
 A seccomp filter may return any of the following values. If multiple
 filters exist, the return value for the evaluation of a given system
 call will always use the highest precedent value. (For example,
-``SECCOMP_RET_KILL_THREAD`` will always take precedence.)
+``SECCOMP_RET_KILL_PROCESS`` will always take precedence.)
 
 In precedence order, they are:
 
+``SECCOMP_RET_KILL_PROCESS``:
+	Results in the entire process exiting immediately without executing
+	the system call.  The exit status of the task (``status & 0x7f``)
+	will be ``SIGSYS``, not ``SIGKILL``.
+
 ``SECCOMP_RET_KILL_THREAD``:
 	Results in the task exiting immediately without executing the
 	system call.  The exit status of the task (``status & 0x7f``) will
diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h
index 7e77c92df78a..f6bc1dea3247 100644
--- a/include/uapi/linux/seccomp.h
+++ b/include/uapi/linux/seccomp.h
@@ -38,6 +38,7 @@
 #define SECCOMP_RET_ALLOW	 0x7fff0000U /* allow */
 
 /* Masks for the return value sections. */
+#define SECCOMP_RET_ACTION_FULL	0xffff0000U
 #define SECCOMP_RET_ACTION	0x7fff0000U
 #define SECCOMP_RET_DATA	0x0000ffffU
 
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 5c7299b9d953..c24579dfa7a1 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -181,6 +181,7 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen)
  *
  * Returns valid seccomp BPF response codes.
  */
+#define ACTION_ONLY(ret) ((s32)((ret) & (SECCOMP_RET_ACTION_FULL)))
 static u32 seccomp_run_filters(const struct seccomp_data *sd,
 			       struct seccomp_filter **match)
 {
@@ -206,7 +207,7 @@ static u32 seccomp_run_filters(const struct seccomp_data *sd,
 	for (; f; f = f->prev) {
 		u32 cur_ret = BPF_PROG_RUN(f->prog, sd);
 
-		if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION)) {
+		if (ACTION_ONLY(cur_ret) < ACTION_ONLY(ret)) {
 			ret = cur_ret;
 			*match = f;
 		}
@@ -650,7 +651,7 @@ static int __seccomp_filter(int this_syscall, const struct seccomp_data *sd,
 
 	filter_ret = seccomp_run_filters(sd, &match);
 	data = filter_ret & SECCOMP_RET_DATA;
-	action = filter_ret & SECCOMP_RET_ACTION;
+	action = filter_ret & SECCOMP_RET_ACTION_FULL;
 
 	switch (action) {
 	case SECCOMP_RET_ERRNO:
@@ -890,6 +891,7 @@ static long seccomp_get_action_avail(const char __user *uaction)
 		return -EFAULT;
 
 	switch (action) {
+	case SECCOMP_RET_KILL_PROCESS:
 	case SECCOMP_RET_KILL_THREAD:
 	case SECCOMP_RET_TRAP:
 	case SECCOMP_RET_ERRNO:
@@ -1041,6 +1043,7 @@ out:
 #ifdef CONFIG_SYSCTL
 
 /* Human readable action names for friendly sysctl interaction */
+#define SECCOMP_RET_KILL_PROCESS_NAME	"kill_process"
 #define SECCOMP_RET_KILL_THREAD_NAME	"kill_thread"
 #define SECCOMP_RET_TRAP_NAME		"trap"
 #define SECCOMP_RET_ERRNO_NAME		"errno"
@@ -1049,6 +1052,7 @@ out:
 #define SECCOMP_RET_ALLOW_NAME		"allow"
 
 static const char seccomp_actions_avail[] =
+				SECCOMP_RET_KILL_PROCESS_NAME	" "
 				SECCOMP_RET_KILL_THREAD_NAME	" "
 				SECCOMP_RET_TRAP_NAME		" "
 				SECCOMP_RET_ERRNO_NAME		" "
@@ -1062,6 +1066,7 @@ struct seccomp_log_name {
 };
 
 static const struct seccomp_log_name seccomp_log_names[] = {
+	{ SECCOMP_LOG_KILL_PROCESS, SECCOMP_RET_KILL_PROCESS_NAME },
 	{ SECCOMP_LOG_KILL_THREAD, SECCOMP_RET_KILL_THREAD_NAME },
 	{ SECCOMP_LOG_TRAP, SECCOMP_RET_TRAP_NAME },
 	{ SECCOMP_LOG_ERRNO, SECCOMP_RET_ERRNO_NAME },
-- 
cgit v1.2.3


From 74378c5c8cdaf0ce9f65e67cbd0613286f2c3bad Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Tue, 5 Sep 2017 20:16:27 +0200
Subject: driver core: Fix link to device power management documentation

Correct location as of commit 2728b2d2e5be4b82 (PM / core / docs:
Convert sleep states API document to reST).

Fixes: 2728b2d2e5be4b82 (PM / core / docs: Convert sleep states API document to reST)
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/device.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/device.h b/include/linux/device.h
index c6f27207dbe8..1d2607923a24 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -838,7 +838,7 @@ struct dev_links_info {
  * @driver_data: Private pointer for driver specific info.
  * @links:	Links to suppliers and consumers of this device.
  * @power:	For device power management.
- * 		See Documentation/power/admin-guide/devices.rst for details.
+ *		See Documentation/driver-api/pm/devices.rst for details.
  * @pm_domain:	Provide callbacks that are executed during system suspend,
  * 		hibernation, system resume and during runtime PM transitions
  * 		along with subsystem-level and driver-level callbacks.
-- 
cgit v1.2.3


From 4c7124413aa759b8ea0b90cd39177e525396e662 Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Mon, 18 Sep 2017 11:05:16 -0700
Subject: tcp: remove two unused functions

remove tcp_may_send_now and tcp_snd_test that are no longer used

Fixes: 840a3cbe8969 ("tcp: remove forward retransmit feature")
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h     |  1 -
 net/ipv4/tcp_output.c | 34 ----------------------------------
 2 files changed, 35 deletions(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index b510f284427a..3bc910a9bfc6 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -544,7 +544,6 @@ u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now,
 		     int min_tso_segs);
 void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss,
 			       int nonagle);
-bool tcp_may_send_now(struct sock *sk);
 int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs);
 int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs);
 void tcp_retransmit_timer(struct sock *sk);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 1c839c99114c..517d737059d1 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1806,40 +1806,6 @@ static bool tcp_snd_wnd_test(const struct tcp_sock *tp,
 	return !after(end_seq, tcp_wnd_end(tp));
 }
 
-/* This checks if the data bearing packet SKB (usually tcp_send_head(sk))
- * should be put on the wire right now.  If so, it returns the number of
- * packets allowed by the congestion window.
- */
-static unsigned int tcp_snd_test(const struct sock *sk, struct sk_buff *skb,
-				 unsigned int cur_mss, int nonagle)
-{
-	const struct tcp_sock *tp = tcp_sk(sk);
-	unsigned int cwnd_quota;
-
-	tcp_init_tso_segs(skb, cur_mss);
-
-	if (!tcp_nagle_test(tp, skb, cur_mss, nonagle))
-		return 0;
-
-	cwnd_quota = tcp_cwnd_test(tp, skb);
-	if (cwnd_quota && !tcp_snd_wnd_test(tp, skb, cur_mss))
-		cwnd_quota = 0;
-
-	return cwnd_quota;
-}
-
-/* Test if sending is allowed right now. */
-bool tcp_may_send_now(struct sock *sk)
-{
-	const struct tcp_sock *tp = tcp_sk(sk);
-	struct sk_buff *skb = tcp_send_head(sk);
-
-	return skb &&
-		tcp_snd_test(sk, skb, tcp_current_mss(sk),
-			     (tcp_skb_is_last(sk, skb) ?
-			      tp->nonagle : TCP_NAGLE_PUSH));
-}
-
 /* Trim TSO SKB to LEN bytes, put the remaining data into a new packet
  * which is put after SKB on the list.  It is very much like
  * tcp_fragment() except that it may make several kinds of assumptions
-- 
cgit v1.2.3


From 0555ac4333d7da7cff83d5b06bd3679a3e1ef584 Mon Sep 17 00:00:00 2001
From: Tycho Andersen <tycho@docker.com>
Date: Mon, 18 Sep 2017 16:35:32 -0600
Subject: xen, arm64: drop dummy lookup_address()

This is unused, and conflicts with the definition that we'll add for XPFO.

Signed-off-by: Tycho Andersen <tycho@docker.com>
Reviewed-by: Julien Grall <julien.grall@arm.com>
CC: Boris Ostrovsky <boris.ostrovsky@oracle.com>
CC: Juergen Gross <jgross@suse.com>
CC: Stefano Stabellini <sstabellini@kernel.org>
Signed-off-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>
---
 include/xen/arm/page.h | 10 ----------
 1 file changed, 10 deletions(-)

(limited to 'include')

diff --git a/include/xen/arm/page.h b/include/xen/arm/page.h
index 415dbc6e43fd..6adc2a955340 100644
--- a/include/xen/arm/page.h
+++ b/include/xen/arm/page.h
@@ -84,16 +84,6 @@ static inline xmaddr_t arbitrary_virt_to_machine(void *vaddr)
 	BUG();
 }
 
-/* TODO: this shouldn't be here but it is because the frontend drivers
- * are using it (its rolled in headers) even though we won't hit the code path.
- * So for right now just punt with this.
- */
-static inline pte_t *lookup_address(unsigned long address, unsigned int *level)
-{
-	BUG();
-	return NULL;
-}
-
 extern int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops,
 				   struct gnttab_map_grant_ref *kmap_ops,
 				   struct page **pages, unsigned int count);
-- 
cgit v1.2.3


From 9e987b70ada27554c5d176421de1d167218c49b5 Mon Sep 17 00:00:00 2001
From: John Hubbard <jhubbard@nvidia.com>
Date: Fri, 15 Sep 2017 17:35:27 -0700
Subject: ACPI / bus: Make ACPI_HANDLE() work for non-GPL code again

Due to commit db3e50f3234b (device property: Get rid of struct
fwnode_handle type field), ACPI_HANDLE() inadvertently became
a GPL-only call. The call path that led to that was:

ACPI_HANDLE()
    ACPI_COMPANION()
        to_acpi_device_node()
            is_acpi_device_node()
                acpi_device_fwnode_ops
                    DECLARE_ACPI_FWNODE_OPS(acpi_device_fwnode_ops);

...and the new DECLARE_ACPI_FWNODE_OPS() includes
EXPORT_SYMBOL_GPL, whereas previously it was a static struct.

In order to avoid changing any of that, let's instead provide ever
so slightly better encapsulation of those struct fwnode_operations
instances. Those do not really need to be directly used in
inline function calls in header files. Simply moving two small
functions (is_acpi_device_node and is_acpi_data_node) out of
acpi_bus.h, and into a .c file, does that.

That leaves the internals of struct fwnode_operations as GPL-only
(which I think was the intent all along), but un-breaks any driver
code out there that relies on the ACPI subsystem's being (historically)
an EXPORT_SYMBOL-usable system. By that, I mean, ACPI_HANDLE() and
other basic ACPI calls were non-GPL-protected.

Also, while I'm there, remove a tiny bit of redundancy that was missed
in the earlier commit, by having is_acpi_node() use the other two
routines, instead of checking fwnode directly.

Fixes: db3e50f3234b (device property: Get rid of struct fwnode_handle type field)
Signed-off-by: John Hubbard <jhubbard@nvidia.com>
Acked-by: Sakari Ailus <sakari.ailus@linux.intel.com>
Acked-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/property.c | 13 +++++++++++++
 include/acpi/acpi_bus.h | 18 ++++--------------
 2 files changed, 17 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c
index c1c216163de3..1e3c2517a1ac 100644
--- a/drivers/acpi/property.c
+++ b/drivers/acpi/property.c
@@ -1293,3 +1293,16 @@ static int acpi_fwnode_graph_parse_endpoint(const struct fwnode_handle *fwnode,
 DECLARE_ACPI_FWNODE_OPS(acpi_device_fwnode_ops);
 DECLARE_ACPI_FWNODE_OPS(acpi_data_fwnode_ops);
 const struct fwnode_operations acpi_static_fwnode_ops;
+
+bool is_acpi_device_node(const struct fwnode_handle *fwnode)
+{
+	return !IS_ERR_OR_NULL(fwnode) &&
+		fwnode->ops == &acpi_device_fwnode_ops;
+}
+EXPORT_SYMBOL(is_acpi_device_node);
+
+bool is_acpi_data_node(const struct fwnode_handle *fwnode)
+{
+	return !IS_ERR_OR_NULL(fwnode) && fwnode->ops == &acpi_data_fwnode_ops;
+}
+EXPORT_SYMBOL(is_acpi_data_node);
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index dedf9d789166..fa1505292f6c 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -399,17 +399,12 @@ extern const struct fwnode_operations acpi_device_fwnode_ops;
 extern const struct fwnode_operations acpi_data_fwnode_ops;
 extern const struct fwnode_operations acpi_static_fwnode_ops;
 
+bool is_acpi_device_node(const struct fwnode_handle *fwnode);
+bool is_acpi_data_node(const struct fwnode_handle *fwnode);
+
 static inline bool is_acpi_node(const struct fwnode_handle *fwnode)
 {
-	return !IS_ERR_OR_NULL(fwnode) &&
-		(fwnode->ops == &acpi_device_fwnode_ops
-		 || fwnode->ops == &acpi_data_fwnode_ops);
-}
-
-static inline bool is_acpi_device_node(const struct fwnode_handle *fwnode)
-{
-	return !IS_ERR_OR_NULL(fwnode) &&
-		fwnode->ops == &acpi_device_fwnode_ops;
+	return (is_acpi_device_node(fwnode) || is_acpi_data_node(fwnode));
 }
 
 #define to_acpi_device_node(__fwnode)					\
@@ -422,11 +417,6 @@ static inline bool is_acpi_device_node(const struct fwnode_handle *fwnode)
 			NULL;						\
 	})
 
-static inline bool is_acpi_data_node(const struct fwnode_handle *fwnode)
-{
-	return !IS_ERR_OR_NULL(fwnode) && fwnode->ops == &acpi_data_fwnode_ops;
-}
-
 #define to_acpi_data_node(__fwnode)					\
 	({								\
 		typeof(__fwnode) __to_acpi_data_node_fwnode = __fwnode;	\
-- 
cgit v1.2.3


From ec9dd352d591f0c90402ec67a317c1ed4fb2e638 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Mon, 18 Sep 2017 16:38:36 -0700
Subject: bpf: one perf event close won't free bpf program attached by another
 perf event

This patch fixes a bug exhibited by the following scenario:
  1. fd1 = perf_event_open with attr.config = ID1
  2. attach bpf program prog1 to fd1
  3. fd2 = perf_event_open with attr.config = ID1
     <this will be successful>
  4. user program closes fd2 and prog1 is detached from the tracepoint.
  5. user program with fd1 does not work properly as tracepoint
     no output any more.

The issue happens at step 4. Multiple perf_event_open can be called
successfully, but only one bpf prog pointer in the tp_event. In the
current logic, any fd release for the same tp_event will free
the tp_event->prog.

The fix is to free tp_event->prog only when the closing fd
corresponds to the one which registered the program.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/trace_events.h | 1 +
 kernel/events/core.c         | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 7f11050746ae..2e0f22298fe9 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -272,6 +272,7 @@ struct trace_event_call {
 	int				perf_refcount;
 	struct hlist_head __percpu	*perf_events;
 	struct bpf_prog			*prog;
+	struct perf_event		*bpf_prog_owner;
 
 	int	(*perf_perm)(struct trace_event_call *,
 			     struct perf_event *);
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 3e691b75b2db..6bc21e202ae4 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -8171,6 +8171,7 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
 		}
 	}
 	event->tp_event->prog = prog;
+	event->tp_event->bpf_prog_owner = event;
 
 	return 0;
 }
@@ -8185,7 +8186,7 @@ static void perf_event_free_bpf_prog(struct perf_event *event)
 		return;
 
 	prog = event->tp_event->prog;
-	if (prog) {
+	if (prog && event->tp_event->bpf_prog_owner == event) {
 		event->tp_event->prog = NULL;
 		bpf_prog_put(prog);
 	}
-- 
cgit v1.2.3


From 19cab8872692960535aa6d12e3a295ac51d1a648 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Wed, 20 Sep 2017 15:52:13 -0700
Subject: net: ethtool: Add back transceiver type

Commit 3f1ac7a700d0 ("net: ethtool: add new ETHTOOL_xLINKSETTINGS API")
deprecated the ethtool_cmd::transceiver field, which was fine in
premise, except that the PHY library was actually using it to report the
type of transceiver: internal or external.

Use the first word of the reserved field to put this __u8 transceiver
field back in. It is made read-only, and we don't expect the
ETHTOOL_xLINKSETTINGS API to be doing anything with this anyway, so this
is mostly for the legacy path where we do:

ethtool_get_settings()
-> dev->ethtool_ops->get_link_ksettings()
   -> convert_link_ksettings_to_legacy_settings()

to have no information loss compared to the legacy get_settings API.

Fixes: 3f1ac7a700d0 ("net: ethtool: add new ETHTOOL_xLINKSETTINGS API")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/ethtool.h | 6 +++++-
 net/core/ethtool.c           | 2 ++
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index 9c041dae8e2c..5bd1b1de4ea0 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -1753,6 +1753,8 @@ enum ethtool_reset_flags {
  *	%ethtool_link_mode_bit_indices for the link modes, and other
  *	link features that the link partner advertised through
  *	autonegotiation; 0 if unknown or not applicable.  Read-only.
+ * @transceiver: Used to distinguish different possible PHY types,
+ *	reported consistently by PHYLIB.  Read-only.
  *
  * If autonegotiation is disabled, the speed and @duplex represent the
  * fixed link mode and are writable if the driver supports multiple
@@ -1804,7 +1806,9 @@ struct ethtool_link_settings {
 	__u8	eth_tp_mdix;
 	__u8	eth_tp_mdix_ctrl;
 	__s8	link_mode_masks_nwords;
-	__u32	reserved[8];
+	__u8	transceiver;
+	__u8	reserved1[3];
+	__u32	reserved[7];
 	__u32	link_mode_masks[0];
 	/* layout of link_mode_masks fields:
 	 * __u32 map_supported[link_mode_masks_nwords];
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 6a582ae4c5d9..3228411ada0f 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -525,6 +525,8 @@ convert_link_ksettings_to_legacy_settings(
 		= link_ksettings->base.eth_tp_mdix;
 	legacy_settings->eth_tp_mdix_ctrl
 		= link_ksettings->base.eth_tp_mdix_ctrl;
+	legacy_settings->transceiver
+		= link_ksettings->base.transceiver;
 	return retval;
 }
 
-- 
cgit v1.2.3


From e8b95728f724797f958912fd9b765a695595d3a6 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Fri, 1 Sep 2017 17:13:43 -0700
Subject: Input: uinput - avoid FF flush when destroying device
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Normally, when input device supporting force feedback effects is being
destroyed, we try to "flush" currently playing effects, so that the
physical device does not continue vibrating (or executing other effects).
Unfortunately this does not work well for uinput as flushing of the effects
deadlocks with the destroy action:

- if device is being destroyed because the file descriptor is being closed,
  then there is noone to even service FF requests;

- if device is being destroyed because userspace sent UI_DEV_DESTROY,
  while theoretically it could be possible to service FF requests,
  userspace is unlikely to do so (they'd need to make sure FF handling
  happens on a separate thread) even if kernel solves the issue with FF
  ioctls deadlocking with UI_DEV_DESTROY ioctl on udev->mutex.

To avoid lockups like the one below, let's install a custom input device
flush handler, and avoid trying to flush force feedback effects when we
destroying the device, and instead rely on uinput to shut off the device
properly.

NMI watchdog: Watchdog detected hard LOCKUP on cpu 3
...
 <<EOE>>  [<ffffffff817a0307>] _raw_spin_lock_irqsave+0x37/0x40
 [<ffffffff810e633d>] complete+0x1d/0x50
 [<ffffffffa00ba08c>] uinput_request_done+0x3c/0x40 [uinput]
 [<ffffffffa00ba587>] uinput_request_submit.part.7+0x47/0xb0 [uinput]
 [<ffffffffa00bb62b>] uinput_dev_erase_effect+0x5b/0x76 [uinput]
 [<ffffffff815d91ad>] erase_effect+0xad/0xf0
 [<ffffffff815d929d>] flush_effects+0x4d/0x90
 [<ffffffff815d4cc0>] input_flush_device+0x40/0x60
 [<ffffffff815daf1c>] evdev_cleanup+0xac/0xc0
 [<ffffffff815daf5b>] evdev_disconnect+0x2b/0x60
 [<ffffffff815d74ac>] __input_unregister_device+0xac/0x150
 [<ffffffff815d75f7>] input_unregister_device+0x47/0x70
 [<ffffffffa00bac45>] uinput_destroy_device+0xb5/0xc0 [uinput]
 [<ffffffffa00bb2de>] uinput_ioctl_handler.isra.9+0x65e/0x740 [uinput]
 [<ffffffff811231ab>] ? do_futex+0x12b/0xad0
 [<ffffffffa00bb3f8>] uinput_ioctl+0x18/0x20 [uinput]
 [<ffffffff81241248>] do_vfs_ioctl+0x298/0x480
 [<ffffffff81337553>] ? security_file_ioctl+0x43/0x60
 [<ffffffff812414a9>] SyS_ioctl+0x79/0x90
 [<ffffffff817a04ee>] entry_SYSCALL_64_fastpath+0x12/0x71

Reported-by: Rodrigo Rivas Costa <rodrigorivascosta@gmail.com>
Reported-by: Clément VUCHENER <clement.vuchener@gmail.com>
Fixes: https://bugzilla.kernel.org/show_bug.cgi?id=193741
Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
---
 drivers/input/ff-core.c     | 13 ++++++++++---
 drivers/input/misc/uinput.c | 18 ++++++++++++++++++
 include/linux/input.h       |  1 +
 3 files changed, 29 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/input/ff-core.c b/drivers/input/ff-core.c
index 8f2042432c85..66a46c84e28f 100644
--- a/drivers/input/ff-core.c
+++ b/drivers/input/ff-core.c
@@ -237,9 +237,15 @@ int input_ff_erase(struct input_dev *dev, int effect_id, struct file *file)
 EXPORT_SYMBOL_GPL(input_ff_erase);
 
 /*
- * flush_effects - erase all effects owned by a file handle
+ * input_ff_flush - erase all effects owned by a file handle
+ * @dev: input device to erase effect from
+ * @file: purported owner of the effects
+ *
+ * This function erases all force-feedback effects associated with
+ * the given owner from specified device. Note that @file may be %NULL,
+ * in which case all effects will be erased.
  */
-static int flush_effects(struct input_dev *dev, struct file *file)
+int input_ff_flush(struct input_dev *dev, struct file *file)
 {
 	struct ff_device *ff = dev->ff;
 	int i;
@@ -255,6 +261,7 @@ static int flush_effects(struct input_dev *dev, struct file *file)
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(input_ff_flush);
 
 /**
  * input_ff_event() - generic handler for force-feedback events
@@ -343,7 +350,7 @@ int input_ff_create(struct input_dev *dev, unsigned int max_effects)
 	mutex_init(&ff->mutex);
 
 	dev->ff = ff;
-	dev->flush = flush_effects;
+	dev->flush = input_ff_flush;
 	dev->event = input_ff_event;
 	__set_bit(EV_FF, dev->evbit);
 
diff --git a/drivers/input/misc/uinput.c b/drivers/input/misc/uinput.c
index 022be0e22eba..2cff40be8860 100644
--- a/drivers/input/misc/uinput.c
+++ b/drivers/input/misc/uinput.c
@@ -230,6 +230,18 @@ static int uinput_dev_erase_effect(struct input_dev *dev, int effect_id)
 	return uinput_request_submit(udev, &request);
 }
 
+static int uinput_dev_flush(struct input_dev *dev, struct file *file)
+{
+	/*
+	 * If we are called with file == NULL that means we are tearing
+	 * down the device, and therefore we can not handle FF erase
+	 * requests: either we are handling UI_DEV_DESTROY (and holding
+	 * the udev->mutex), or the file descriptor is closed and there is
+	 * nobody on the other side anymore.
+	 */
+	return file ? input_ff_flush(dev, file) : 0;
+}
+
 static void uinput_destroy_device(struct uinput_device *udev)
 {
 	const char *name, *phys;
@@ -297,6 +309,12 @@ static int uinput_create_device(struct uinput_device *udev)
 		dev->ff->playback = uinput_dev_playback;
 		dev->ff->set_gain = uinput_dev_set_gain;
 		dev->ff->set_autocenter = uinput_dev_set_autocenter;
+		/*
+		 * The standard input_ff_flush() implementation does
+		 * not quite work for uinput as we can't reasonably
+		 * handle FF requests during device teardown.
+		 */
+		dev->flush = uinput_dev_flush;
 	}
 
 	error = input_register_device(udev->dev);
diff --git a/include/linux/input.h b/include/linux/input.h
index a65e3b24fb18..fb5e23c7ed98 100644
--- a/include/linux/input.h
+++ b/include/linux/input.h
@@ -529,6 +529,7 @@ int input_ff_event(struct input_dev *dev, unsigned int type, unsigned int code,
 
 int input_ff_upload(struct input_dev *dev, struct ff_effect *effect, struct file *file);
 int input_ff_erase(struct input_dev *dev, int effect_id, struct file *file);
+int input_ff_flush(struct input_dev *dev, struct file *file);
 
 int input_ff_create_memless(struct input_dev *dev, void *data,
 		int (*play_effect)(struct input_dev *, void *, struct ff_effect *));
-- 
cgit v1.2.3


From 222d7dbd258dad4cd5241c43ef818141fad5a87a Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 21 Sep 2017 09:15:46 -0700
Subject: net: prevent dst uses after free
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In linux-4.13, Wei worked hard to convert dst to a traditional
refcounted model, removing GC.

We now want to make sure a dst refcount can not transition from 0 back
to 1.

The problem here is that input path attached a not refcounted dst to an
skb. Then later, because packet is forwarded and hits skb_dst_force()
before exiting RCU section, we might try to take a refcount on one dst
that is about to be freed, if another cpu saw 1 -> 0 transition in
dst_release() and queued the dst for freeing after one RCU grace period.

Lets unify skb_dst_force() and skb_dst_force_safe(), since we should
always perform the complete check against dst refcount, and not assume
it is not zero.

Bugzilla : https://bugzilla.kernel.org/show_bug.cgi?id=197005

[  989.919496]  skb_dst_force+0x32/0x34
[  989.919498]  __dev_queue_xmit+0x1ad/0x482
[  989.919501]  ? eth_header+0x28/0xc6
[  989.919502]  dev_queue_xmit+0xb/0xd
[  989.919504]  neigh_connected_output+0x9b/0xb4
[  989.919507]  ip_finish_output2+0x234/0x294
[  989.919509]  ? ipt_do_table+0x369/0x388
[  989.919510]  ip_finish_output+0x12c/0x13f
[  989.919512]  ip_output+0x53/0x87
[  989.919513]  ip_forward_finish+0x53/0x5a
[  989.919515]  ip_forward+0x2cb/0x3e6
[  989.919516]  ? pskb_trim_rcsum.part.9+0x4b/0x4b
[  989.919518]  ip_rcv_finish+0x2e2/0x321
[  989.919519]  ip_rcv+0x26f/0x2eb
[  989.919522]  ? vlan_do_receive+0x4f/0x289
[  989.919523]  __netif_receive_skb_core+0x467/0x50b
[  989.919526]  ? tcp_gro_receive+0x239/0x239
[  989.919529]  ? inet_gro_receive+0x226/0x238
[  989.919530]  __netif_receive_skb+0x4d/0x5f
[  989.919532]  netif_receive_skb_internal+0x5c/0xaf
[  989.919533]  napi_gro_receive+0x45/0x81
[  989.919536]  ixgbe_poll+0xc8a/0xf09
[  989.919539]  ? kmem_cache_free_bulk+0x1b6/0x1f7
[  989.919540]  net_rx_action+0xf4/0x266
[  989.919543]  __do_softirq+0xa8/0x19d
[  989.919545]  irq_exit+0x5d/0x6b
[  989.919546]  do_IRQ+0x9c/0xb5
[  989.919548]  common_interrupt+0x93/0x93
[  989.919548]  </IRQ>

Similarly dst_clone() can use dst_hold() helper to have additional
debugging, as a follow up to commit 44ebe79149ff ("net: add debug
atomic_inc_not_zero() in dst_hold()")

In net-next we will convert dst atomic_t to refcount_t for peace of
mind.

Fixes: a4c2fd7f7891 ("net: remove DST_NOCACHE flag")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Wei Wang <weiwan@google.com>
Reported-by: Paweł Staszewski <pstaszewski@itcare.pl>
Bisected-by: Paweł Staszewski <pstaszewski@itcare.pl>
Acked-by: Wei Wang <weiwan@google.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dst.h   | 22 ++++------------------
 include/net/route.h |  2 +-
 include/net/sock.h  |  2 +-
 3 files changed, 6 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/include/net/dst.h b/include/net/dst.h
index 93568bd0a352..06a6765da074 100644
--- a/include/net/dst.h
+++ b/include/net/dst.h
@@ -271,7 +271,7 @@ static inline void dst_use_noref(struct dst_entry *dst, unsigned long time)
 static inline struct dst_entry *dst_clone(struct dst_entry *dst)
 {
 	if (dst)
-		atomic_inc(&dst->__refcnt);
+		dst_hold(dst);
 	return dst;
 }
 
@@ -311,21 +311,6 @@ static inline void skb_dst_copy(struct sk_buff *nskb, const struct sk_buff *oskb
 	__skb_dst_copy(nskb, oskb->_skb_refdst);
 }
 
-/**
- * skb_dst_force - makes sure skb dst is refcounted
- * @skb: buffer
- *
- * If dst is not yet refcounted, let's do it
- */
-static inline void skb_dst_force(struct sk_buff *skb)
-{
-	if (skb_dst_is_noref(skb)) {
-		WARN_ON(!rcu_read_lock_held());
-		skb->_skb_refdst &= ~SKB_DST_NOREF;
-		dst_clone(skb_dst(skb));
-	}
-}
-
 /**
  * dst_hold_safe - Take a reference on a dst if possible
  * @dst: pointer to dst entry
@@ -339,16 +324,17 @@ static inline bool dst_hold_safe(struct dst_entry *dst)
 }
 
 /**
- * skb_dst_force_safe - makes sure skb dst is refcounted
+ * skb_dst_force - makes sure skb dst is refcounted
  * @skb: buffer
  *
  * If dst is not yet refcounted and not destroyed, grab a ref on it.
  */
-static inline void skb_dst_force_safe(struct sk_buff *skb)
+static inline void skb_dst_force(struct sk_buff *skb)
 {
 	if (skb_dst_is_noref(skb)) {
 		struct dst_entry *dst = skb_dst(skb);
 
+		WARN_ON(!rcu_read_lock_held());
 		if (!dst_hold_safe(dst))
 			dst = NULL;
 
diff --git a/include/net/route.h b/include/net/route.h
index 1b09a9368c68..57dfc6850d37 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -190,7 +190,7 @@ static inline int ip_route_input(struct sk_buff *skb, __be32 dst, __be32 src,
 	rcu_read_lock();
 	err = ip_route_input_noref(skb, dst, src, tos, devin);
 	if (!err) {
-		skb_dst_force_safe(skb);
+		skb_dst_force(skb);
 		if (!skb_dst(skb))
 			err = -EINVAL;
 	}
diff --git a/include/net/sock.h b/include/net/sock.h
index 03a362568357..a6b9a8d1a6df 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -856,7 +856,7 @@ void sk_stream_write_space(struct sock *sk);
 static inline void __sk_add_backlog(struct sock *sk, struct sk_buff *skb)
 {
 	/* dont let skb dst not refcounted, we are going to leave rcu lock */
-	skb_dst_force_safe(skb);
+	skb_dst_force(skb);
 
 	if (!sk->sk_backlog.tail)
 		sk->sk_backlog.head = skb;
-- 
cgit v1.2.3