From a9764869779081e8bf24da07ac040e8f3efcf13a Mon Sep 17 00:00:00 2001
From: KaiChieh Chuang <kaichieh.chuang@mediatek.com>
Date: Fri, 8 Mar 2019 13:05:53 +0800
Subject: ASoC: dpcm: prevent snd_soc_dpcm use after free

The dpcm get from fe_clients/be_clients
may be free before use

Add a spin lock at snd_soc_card level,
to protect the dpcm instance.
The lock may be used in atomic context, so use spin lock.

Use irq spin lock version,
since the lock may be used in interrupts.

possible race condition between
void dpcm_be_disconnect(
	...
	list_del(&dpcm->list_be);
	list_del(&dpcm->list_fe);
	kfree(dpcm);
	...

and
	for_each_dpcm_fe()
	for_each_dpcm_be*()

race condition example
Thread 1:
    snd_soc_dapm_mixer_update_power()
        -> soc_dpcm_runtime_update()
            -> dpcm_be_disconnect()
                -> kfree(dpcm);
Thread 2:
    dpcm_fe_dai_trigger()
        -> dpcm_be_dai_trigger()
            -> snd_soc_dpcm_can_be_free_stop()
                -> if (dpcm->fe == fe)

Excpetion Scenario:
	two FE link to same BE
	FE1 -> BE
	FE2 ->

	Thread 1: switch of mixer between FE2 -> BE
	Thread 2: pcm_stop FE1

Exception:

Unable to handle kernel paging request at virtual address dead0000000000e0

pc=<> [<ffffff8960e2cd10>] dpcm_be_dai_trigger+0x29c/0x47c
	sound/soc/soc-pcm.c:3226
		if (dpcm->fe == fe)
lr=<> [<ffffff8960e2f694>] dpcm_fe_dai_do_trigger+0x94/0x26c

Backtrace:
[<ffffff89602dba80>] notify_die+0x68/0xb8
[<ffffff896028c7dc>] die+0x118/0x2a8
[<ffffff89602a2f84>] __do_kernel_fault+0x13c/0x14c
[<ffffff89602a27f4>] do_translation_fault+0x64/0xa0
[<ffffff8960280cf8>] do_mem_abort+0x4c/0xd0
[<ffffff8960282ad0>] el1_da+0x24/0x40
[<ffffff8960e2cd10>] dpcm_be_dai_trigger+0x29c/0x47c
[<ffffff8960e2f694>] dpcm_fe_dai_do_trigger+0x94/0x26c
[<ffffff8960e2edec>] dpcm_fe_dai_trigger+0x3c/0x44
[<ffffff8960de5588>] snd_pcm_do_stop+0x50/0x5c
[<ffffff8960dded24>] snd_pcm_action+0xb4/0x13c
[<ffffff8960ddfdb4>] snd_pcm_drop+0xa0/0x128
[<ffffff8960de69bc>] snd_pcm_common_ioctl+0x9d8/0x30f0
[<ffffff8960de1cac>] snd_pcm_ioctl_compat+0x29c/0x2f14
[<ffffff89604c9d60>] compat_SyS_ioctl+0x128/0x244
[<ffffff8960283740>] el0_svc_naked+0x34/0x38
[<ffffffffffffffff>] 0xffffffffffffffff

Signed-off-by: KaiChieh Chuang <kaichieh.chuang@mediatek.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')
diff --git a/include/sound/soc.h b/include/sound/soc.h
index eb7db605955b..1e2be35ed36f 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -1083,6 +1083,8 @@ struct snd_soc_card {
 	struct mutex mutex;
 	struct mutex dapm_mutex;
 
+	spinlock_t dpcm_lock;
+
 	bool instantiated;
 	bool topology_shortname_created;
 
-- 
cgit v1.2.3


From 1b986589680a2a5b6fc1ac196ea69925a93d9dd9 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Tue, 12 Mar 2019 10:23:02 -0700
Subject: bpf: Fix bpf_tcp_sock and bpf_sk_fullsock issue related to
 bpf_sk_release

Lorenz Bauer [thanks!] reported that a ptr returned by bpf_tcp_sock(sk)
can still be accessed after bpf_sk_release(sk).
Both bpf_tcp_sock() and bpf_sk_fullsock() have the same issue.
This patch addresses them together.

A simple reproducer looks like this:

	sk = bpf_sk_lookup_tcp();
	/* if (!sk) ... */
	tp = bpf_tcp_sock(sk);
	/* if (!tp) ... */
	bpf_sk_release(sk);
	snd_cwnd = tp->snd_cwnd; /* oops! The verifier does not complain. */

The problem is the verifier did not scrub the register's states of
the tcp_sock ptr (tp) after bpf_sk_release(sk).

[ Note that when calling bpf_tcp_sock(sk), the sk is not always
  refcount-acquired. e.g. bpf_tcp_sock(skb->sk). The verifier works
  fine for this case. ]

Currently, the verifier does not track if a helper's return ptr (in REG_0)
is "carry"-ing one of its argument's refcount status. To carry this info,
the reg1->id needs to be stored in reg0.

One approach was tried, like "reg0->id = reg1->id", when calling
"bpf_tcp_sock()".  The main idea was to avoid adding another "ref_obj_id"
for the same reg.  However, overlapping the NULL marking and ref
tracking purpose in one "id" does not work well:

	ref_sk = bpf_sk_lookup_tcp();
	fullsock = bpf_sk_fullsock(ref_sk);
	tp = bpf_tcp_sock(ref_sk);
	if (!fullsock) {
	     bpf_sk_release(ref_sk);
	     return 0;
	}
	/* fullsock_reg->id is marked for NOT-NULL.
	 * Same for tp_reg->id because they have the same id.
	 */

	/* oops. verifier did not complain about the missing !tp check */
	snd_cwnd = tp->snd_cwnd;

Hence, a new "ref_obj_id" is needed in "struct bpf_reg_state".
With a new ref_obj_id, when bpf_sk_release(sk) is called, the verifier can
scrub all reg states which has a ref_obj_id match.  It is done with the
changes in release_reg_references() in this patch.

While fixing it, sk_to_full_sk() is removed from bpf_tcp_sock() and
bpf_sk_fullsock() to avoid these helpers from returning
another ptr. It will make bpf_sk_release(tp) possible:

	sk = bpf_sk_lookup_tcp();
	/* if (!sk) ... */
	tp = bpf_tcp_sock(sk);
	/* if (!tp) ... */
	bpf_sk_release(tp);

A separate helper "bpf_get_listener_sock()" will be added in a later
patch to do sk_to_full_sk().

Misc change notes:
- To allow bpf_sk_release(tp), the arg of bpf_sk_release() is changed
  from ARG_PTR_TO_SOCKET to ARG_PTR_TO_SOCK_COMMON.  ARG_PTR_TO_SOCKET
  is removed from bpf.h since no helper is using it.

- arg_type_is_refcounted() is renamed to arg_type_may_be_refcounted()
  because ARG_PTR_TO_SOCK_COMMON is the only one and skb->sk is not
  refcounted.  All bpf_sk_release(), bpf_sk_fullsock() and bpf_tcp_sock()
  take ARG_PTR_TO_SOCK_COMMON.

- check_refcount_ok() ensures is_acquire_function() cannot take
  arg_type_may_be_refcounted() as its argument.

- The check_func_arg() can only allow one refcount-ed arg.  It is
  guaranteed by check_refcount_ok() which ensures at most one arg can be
  refcounted.  Hence, it is a verifier internal error if >1 refcount arg
  found in check_func_arg().

- In release_reference(), release_reference_state() is called
  first to ensure a match on "reg->ref_obj_id" can be found before
  scrubbing the reg states with release_reg_references().

- reg_is_refcounted() is no longer needed.
  1. In mark_ptr_or_null_regs(), its usage is replaced by
     "ref_obj_id && ref_obj_id == id" because,
     when is_null == true, release_reference_state() should only be
     called on the ref_obj_id obtained by a acquire helper (i.e.
     is_acquire_function() == true).  Otherwise, the following
     would happen:

	sk = bpf_sk_lookup_tcp();
	/* if (!sk) { ... } */
	fullsock = bpf_sk_fullsock(sk);
	if (!fullsock) {
		/*
		 * release_reference_state(fullsock_reg->ref_obj_id)
		 * where fullsock_reg->ref_obj_id == sk_reg->ref_obj_id.
		 *
		 * Hence, the following bpf_sk_release(sk) will fail
		 * because the ref state has already been released in the
		 * earlier release_reference_state(fullsock_reg->ref_obj_id).
		 */
		bpf_sk_release(sk);
	}

  2. In release_reg_references(), the current reg_is_refcounted() call
     is unnecessary because the id check is enough.

- The type_is_refcounted() and type_is_refcounted_or_null()
  are no longer needed also because reg_is_refcounted() is removed.

Fixes: 655a51e536c0 ("bpf: Add struct bpf_tcp_sock and BPF_FUNC_tcp_sock")
Reported-by: Lorenz Bauer <lmb@cloudflare.com>
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h          |   1 -
 include/linux/bpf_verifier.h |  40 +++++++++++++
 kernel/bpf/verifier.c        | 131 ++++++++++++++++++++++++-------------------
 net/core/filter.c            |   6 +-
 4 files changed, 115 insertions(+), 63 deletions(-)

(limited to 'include')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index a2132e09dc1c..f02367faa58d 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -193,7 +193,6 @@ enum bpf_arg_type {
 
 	ARG_PTR_TO_CTX,		/* pointer to context */
 	ARG_ANYTHING,		/* any (initialized) argument is ok */
-	ARG_PTR_TO_SOCKET,	/* pointer to bpf_sock */
 	ARG_PTR_TO_SPIN_LOCK,	/* pointer to bpf_spin_lock */
 	ARG_PTR_TO_SOCK_COMMON,	/* pointer to sock_common */
 };
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 69f7a3449eda..7d8228d1c898 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -66,6 +66,46 @@ struct bpf_reg_state {
 	 * same reference to the socket, to determine proper reference freeing.
 	 */
 	u32 id;
+	/* PTR_TO_SOCKET and PTR_TO_TCP_SOCK could be a ptr returned
+	 * from a pointer-cast helper, bpf_sk_fullsock() and
+	 * bpf_tcp_sock().
+	 *
+	 * Consider the following where "sk" is a reference counted
+	 * pointer returned from "sk = bpf_sk_lookup_tcp();":
+	 *
+	 * 1: sk = bpf_sk_lookup_tcp();
+	 * 2: if (!sk) { return 0; }
+	 * 3: fullsock = bpf_sk_fullsock(sk);
+	 * 4: if (!fullsock) { bpf_sk_release(sk); return 0; }
+	 * 5: tp = bpf_tcp_sock(fullsock);
+	 * 6: if (!tp) { bpf_sk_release(sk); return 0; }
+	 * 7: bpf_sk_release(sk);
+	 * 8: snd_cwnd = tp->snd_cwnd;  // verifier will complain
+	 *
+	 * After bpf_sk_release(sk) at line 7, both "fullsock" ptr and
+	 * "tp" ptr should be invalidated also.  In order to do that,
+	 * the reg holding "fullsock" and "sk" need to remember
+	 * the original refcounted ptr id (i.e. sk_reg->id) in ref_obj_id
+	 * such that the verifier can reset all regs which have
+	 * ref_obj_id matching the sk_reg->id.
+	 *
+	 * sk_reg->ref_obj_id is set to sk_reg->id at line 1.
+	 * sk_reg->id will stay as NULL-marking purpose only.
+	 * After NULL-marking is done, sk_reg->id can be reset to 0.
+	 *
+	 * After "fullsock = bpf_sk_fullsock(sk);" at line 3,
+	 * fullsock_reg->ref_obj_id is set to sk_reg->ref_obj_id.
+	 *
+	 * After "tp = bpf_tcp_sock(fullsock);" at line 5,
+	 * tp_reg->ref_obj_id is set to fullsock_reg->ref_obj_id
+	 * which is the same as sk_reg->ref_obj_id.
+	 *
+	 * From the verifier perspective, if sk, fullsock and tp
+	 * are not NULL, they are the same ptr with different
+	 * reg->type.  In particular, bpf_sk_release(tp) is also
+	 * allowed and has the same effect as bpf_sk_release(sk).
+	 */
+	u32 ref_obj_id;
 	/* For scalar types (SCALAR_VALUE), this represents our knowledge of
 	 * the actual value.
 	 * For pointer types, this represents the variable part of the offset
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index ce166a002d16..86f9cd5d1c4e 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -212,7 +212,7 @@ struct bpf_call_arg_meta {
 	int access_size;
 	s64 msize_smax_value;
 	u64 msize_umax_value;
-	int ptr_id;
+	int ref_obj_id;
 	int func_id;
 };
 
@@ -346,35 +346,15 @@ static bool reg_type_may_be_null(enum bpf_reg_type type)
 	       type == PTR_TO_TCP_SOCK_OR_NULL;
 }
 
-static bool type_is_refcounted(enum bpf_reg_type type)
-{
-	return type == PTR_TO_SOCKET;
-}
-
-static bool type_is_refcounted_or_null(enum bpf_reg_type type)
-{
-	return type == PTR_TO_SOCKET || type == PTR_TO_SOCKET_OR_NULL;
-}
-
-static bool reg_is_refcounted(const struct bpf_reg_state *reg)
-{
-	return type_is_refcounted(reg->type);
-}
-
 static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
 {
 	return reg->type == PTR_TO_MAP_VALUE &&
 		map_value_has_spin_lock(reg->map_ptr);
 }
 
-static bool reg_is_refcounted_or_null(const struct bpf_reg_state *reg)
+static bool arg_type_may_be_refcounted(enum bpf_arg_type type)
 {
-	return type_is_refcounted_or_null(reg->type);
-}
-
-static bool arg_type_is_refcounted(enum bpf_arg_type type)
-{
-	return type == ARG_PTR_TO_SOCKET;
+	return type == ARG_PTR_TO_SOCK_COMMON;
 }
 
 /* Determine whether the function releases some resources allocated by another
@@ -392,6 +372,12 @@ static bool is_acquire_function(enum bpf_func_id func_id)
 		func_id == BPF_FUNC_sk_lookup_udp;
 }
 
+static bool is_ptr_cast_function(enum bpf_func_id func_id)
+{
+	return func_id == BPF_FUNC_tcp_sock ||
+		func_id == BPF_FUNC_sk_fullsock;
+}
+
 /* string representation of 'enum bpf_reg_type' */
 static const char * const reg_type_str[] = {
 	[NOT_INIT]		= "?",
@@ -465,7 +451,8 @@ static void print_verifier_state(struct bpf_verifier_env *env,
 			if (t == PTR_TO_STACK)
 				verbose(env, ",call_%d", func(env, reg)->callsite);
 		} else {
-			verbose(env, "(id=%d", reg->id);
+			verbose(env, "(id=%d ref_obj_id=%d", reg->id,
+				reg->ref_obj_id);
 			if (t != SCALAR_VALUE)
 				verbose(env, ",off=%d", reg->off);
 			if (type_is_pkt_pointer(t))
@@ -2414,16 +2401,15 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
 		/* Any sk pointer can be ARG_PTR_TO_SOCK_COMMON */
 		if (!type_is_sk_pointer(type))
 			goto err_type;
-	} else if (arg_type == ARG_PTR_TO_SOCKET) {
-		expected_type = PTR_TO_SOCKET;
-		if (type != expected_type)
-			goto err_type;
-		if (meta->ptr_id || !reg->id) {
-			verbose(env, "verifier internal error: mismatched references meta=%d, reg=%d\n",
-				meta->ptr_id, reg->id);
-			return -EFAULT;
+		if (reg->ref_obj_id) {
+			if (meta->ref_obj_id) {
+				verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
+					regno, reg->ref_obj_id,
+					meta->ref_obj_id);
+				return -EFAULT;
+			}
+			meta->ref_obj_id = reg->ref_obj_id;
 		}
-		meta->ptr_id = reg->id;
 	} else if (arg_type == ARG_PTR_TO_SPIN_LOCK) {
 		if (meta->func_id == BPF_FUNC_spin_lock) {
 			if (process_spin_lock(env, regno, true))
@@ -2740,32 +2726,38 @@ static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
 	return true;
 }
 
-static bool check_refcount_ok(const struct bpf_func_proto *fn)
+static bool check_refcount_ok(const struct bpf_func_proto *fn, int func_id)
 {
 	int count = 0;
 
-	if (arg_type_is_refcounted(fn->arg1_type))
+	if (arg_type_may_be_refcounted(fn->arg1_type))
 		count++;
-	if (arg_type_is_refcounted(fn->arg2_type))
+	if (arg_type_may_be_refcounted(fn->arg2_type))
 		count++;
-	if (arg_type_is_refcounted(fn->arg3_type))
+	if (arg_type_may_be_refcounted(fn->arg3_type))
 		count++;
-	if (arg_type_is_refcounted(fn->arg4_type))
+	if (arg_type_may_be_refcounted(fn->arg4_type))
 		count++;
-	if (arg_type_is_refcounted(fn->arg5_type))
+	if (arg_type_may_be_refcounted(fn->arg5_type))
 		count++;
 
+	/* A reference acquiring function cannot acquire
+	 * another refcounted ptr.
+	 */
+	if (is_acquire_function(func_id) && count)
+		return false;
+
 	/* We only support one arg being unreferenced at the moment,
 	 * which is sufficient for the helper functions we have right now.
 	 */
 	return count <= 1;
 }
 
-static int check_func_proto(const struct bpf_func_proto *fn)
+static int check_func_proto(const struct bpf_func_proto *fn, int func_id)
 {
 	return check_raw_mode_ok(fn) &&
 	       check_arg_pair_ok(fn) &&
-	       check_refcount_ok(fn) ? 0 : -EINVAL;
+	       check_refcount_ok(fn, func_id) ? 0 : -EINVAL;
 }
 
 /* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
@@ -2799,19 +2791,20 @@ static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
 }
 
 static void release_reg_references(struct bpf_verifier_env *env,
-				   struct bpf_func_state *state, int id)
+				   struct bpf_func_state *state,
+				   int ref_obj_id)
 {
 	struct bpf_reg_state *regs = state->regs, *reg;
 	int i;
 
 	for (i = 0; i < MAX_BPF_REG; i++)
-		if (regs[i].id == id)
+		if (regs[i].ref_obj_id == ref_obj_id)
 			mark_reg_unknown(env, regs, i);
 
 	bpf_for_each_spilled_reg(i, state, reg) {
 		if (!reg)
 			continue;
-		if (reg_is_refcounted(reg) && reg->id == id)
+		if (reg->ref_obj_id == ref_obj_id)
 			__mark_reg_unknown(reg);
 	}
 }
@@ -2820,15 +2813,20 @@ static void release_reg_references(struct bpf_verifier_env *env,
  * resources. Identify all copies of the same pointer and clear the reference.
  */
 static int release_reference(struct bpf_verifier_env *env,
-			     struct bpf_call_arg_meta *meta)
+			     int ref_obj_id)
 {
 	struct bpf_verifier_state *vstate = env->cur_state;
+	int err;
 	int i;
 
+	err = release_reference_state(cur_func(env), ref_obj_id);
+	if (err)
+		return err;
+
 	for (i = 0; i <= vstate->curframe; i++)
-		release_reg_references(env, vstate->frame[i], meta->ptr_id);
+		release_reg_references(env, vstate->frame[i], ref_obj_id);
 
-	return release_reference_state(cur_func(env), meta->ptr_id);
+	return 0;
 }
 
 static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
@@ -3047,7 +3045,7 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
 	memset(&meta, 0, sizeof(meta));
 	meta.pkt_access = fn->pkt_access;
 
-	err = check_func_proto(fn);
+	err = check_func_proto(fn, func_id);
 	if (err) {
 		verbose(env, "kernel subsystem misconfigured func %s#%d\n",
 			func_id_name(func_id), func_id);
@@ -3093,7 +3091,7 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
 			return err;
 		}
 	} else if (is_release_function(func_id)) {
-		err = release_reference(env, &meta);
+		err = release_reference(env, meta.ref_obj_id);
 		if (err) {
 			verbose(env, "func %s#%d reference has not been acquired before\n",
 				func_id_name(func_id), func_id);
@@ -3154,8 +3152,10 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
 
 			if (id < 0)
 				return id;
-			/* For release_reference() */
+			/* For mark_ptr_or_null_reg() */
 			regs[BPF_REG_0].id = id;
+			/* For release_reference() */
+			regs[BPF_REG_0].ref_obj_id = id;
 		} else {
 			/* For mark_ptr_or_null_reg() */
 			regs[BPF_REG_0].id = ++env->id_gen;
@@ -3170,6 +3170,10 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
 		return -EINVAL;
 	}
 
+	if (is_ptr_cast_function(func_id))
+		/* For release_reference() */
+		regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
+
 	do_refine_retval_range(regs, fn->ret_type, func_id, &meta);
 
 	err = check_map_func_compatibility(env, meta.map_ptr, func_id);
@@ -4665,11 +4669,19 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state,
 		} else if (reg->type == PTR_TO_TCP_SOCK_OR_NULL) {
 			reg->type = PTR_TO_TCP_SOCK;
 		}
-		if (is_null || !(reg_is_refcounted(reg) ||
-				 reg_may_point_to_spin_lock(reg))) {
-			/* We don't need id from this point onwards anymore,
-			 * thus we should better reset it, so that state
-			 * pruning has chances to take effect.
+		if (is_null) {
+			/* We don't need id and ref_obj_id from this point
+			 * onwards anymore, thus we should better reset it,
+			 * so that state pruning has chances to take effect.
+			 */
+			reg->id = 0;
+			reg->ref_obj_id = 0;
+		} else if (!reg_may_point_to_spin_lock(reg)) {
+			/* For not-NULL ptr, reg->ref_obj_id will be reset
+			 * in release_reg_references().
+			 *
+			 * reg->id is still used by spin_lock ptr. Other
+			 * than spin_lock ptr type, reg->id can be reset.
 			 */
 			reg->id = 0;
 		}
@@ -4684,11 +4696,16 @@ static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno,
 {
 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
 	struct bpf_reg_state *reg, *regs = state->regs;
+	u32 ref_obj_id = regs[regno].ref_obj_id;
 	u32 id = regs[regno].id;
 	int i, j;
 
-	if (reg_is_refcounted_or_null(&regs[regno]) && is_null)
-		release_reference_state(state, id);
+	if (ref_obj_id && ref_obj_id == id && is_null)
+		/* regs[regno] is in the " == NULL" branch.
+		 * No one could have freed the reference state before
+		 * doing the NULL check.
+		 */
+		WARN_ON_ONCE(release_reference_state(state, id));
 
 	for (i = 0; i < MAX_BPF_REG; i++)
 		mark_ptr_or_null_reg(state, &regs[i], id, is_null);
diff --git a/net/core/filter.c b/net/core/filter.c
index f274620945ff..36b6afacf83c 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -1796,8 +1796,6 @@ static const struct bpf_func_proto bpf_skb_pull_data_proto = {
 
 BPF_CALL_1(bpf_sk_fullsock, struct sock *, sk)
 {
-	sk = sk_to_full_sk(sk);
-
 	return sk_fullsock(sk) ? (unsigned long)sk : (unsigned long)NULL;
 }
 
@@ -5266,7 +5264,7 @@ static const struct bpf_func_proto bpf_sk_release_proto = {
 	.func		= bpf_sk_release,
 	.gpl_only	= false,
 	.ret_type	= RET_INTEGER,
-	.arg1_type	= ARG_PTR_TO_SOCKET,
+	.arg1_type	= ARG_PTR_TO_SOCK_COMMON,
 };
 
 BPF_CALL_5(bpf_xdp_sk_lookup_udp, struct xdp_buff *, ctx,
@@ -5407,8 +5405,6 @@ u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type,
 
 BPF_CALL_1(bpf_tcp_sock, struct sock *, sk)
 {
-	sk = sk_to_full_sk(sk);
-
 	if (sk_fullsock(sk) && sk->sk_protocol == IPPROTO_TCP)
 		return (unsigned long)sk;
 
-- 
cgit v1.2.3


From dbafd7ddd62369b2f3926ab847cbf8fc40e800b7 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Tue, 12 Mar 2019 10:23:04 -0700
Subject: bpf: Add bpf_get_listener_sock(struct bpf_sock *sk) helper

Add a new helper "struct bpf_sock *bpf_get_listener_sock(struct bpf_sock *sk)"
which returns a bpf_sock in TCP_LISTEN state.  It will trace back to
the listener sk from a request_sock if possible.  It returns NULL
for all other cases.

No reference is taken because the helper ensures the sk is
in SOCK_RCU_FREE (where the TCP_LISTEN sock should be in).
Hence, bpf_sk_release() is unnecessary and the verifier does not
allow bpf_sk_release(listen_sk) to be called either.

The following is also allowed because the bpf_prog is run under
rcu_read_lock():

	sk = bpf_sk_lookup_tcp();
	/* if (!sk) { ... } */
	listen_sk = bpf_get_listener_sock(sk);
	/* if (!listen_sk) { ... } */
	bpf_sk_release(sk);
	src_port = listen_sk->src_port; /* Allowed */

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/uapi/linux/bpf.h | 11 ++++++++++-
 net/core/filter.c        | 21 +++++++++++++++++++++
 2 files changed, 31 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 3c38ac9a92a7..983b25cb608d 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2366,6 +2366,14 @@ union bpf_attr {
  *             current value is ect (ECN capable). Works with IPv6 and IPv4.
  *     Return
  *             1 if set, 0 if not set.
+ *
+ * struct bpf_sock *bpf_get_listener_sock(struct bpf_sock *sk)
+ *	Description
+ *		Return a **struct bpf_sock** pointer in TCP_LISTEN state.
+ *		bpf_sk_release() is unnecessary and not allowed.
+ *	Return
+ *		A **struct bpf_sock** pointer on success, or NULL in
+ *		case of failure.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -2465,7 +2473,8 @@ union bpf_attr {
 	FN(spin_unlock),		\
 	FN(sk_fullsock),		\
 	FN(tcp_sock),			\
-	FN(skb_ecn_set_ce),
+	FN(skb_ecn_set_ce),		\
+	FN(get_listener_sock),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
diff --git a/net/core/filter.c b/net/core/filter.c
index 36b6afacf83c..647c63a7b25b 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -5418,6 +5418,23 @@ static const struct bpf_func_proto bpf_tcp_sock_proto = {
 	.arg1_type	= ARG_PTR_TO_SOCK_COMMON,
 };
 
+BPF_CALL_1(bpf_get_listener_sock, struct sock *, sk)
+{
+	sk = sk_to_full_sk(sk);
+
+	if (sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_RCU_FREE))
+		return (unsigned long)sk;
+
+	return (unsigned long)NULL;
+}
+
+static const struct bpf_func_proto bpf_get_listener_sock_proto = {
+	.func		= bpf_get_listener_sock,
+	.gpl_only	= false,
+	.ret_type	= RET_PTR_TO_SOCKET_OR_NULL,
+	.arg1_type	= ARG_PTR_TO_SOCK_COMMON,
+};
+
 BPF_CALL_1(bpf_skb_ecn_set_ce, struct sk_buff *, skb)
 {
 	unsigned int iphdr_len;
@@ -5603,6 +5620,8 @@ cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 #ifdef CONFIG_INET
 	case BPF_FUNC_tcp_sock:
 		return &bpf_tcp_sock_proto;
+	case BPF_FUNC_get_listener_sock:
+		return &bpf_get_listener_sock_proto;
 	case BPF_FUNC_skb_ecn_set_ce:
 		return &bpf_skb_ecn_set_ce_proto;
 #endif
@@ -5698,6 +5717,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_sk_release_proto;
 	case BPF_FUNC_tcp_sock:
 		return &bpf_tcp_sock_proto;
+	case BPF_FUNC_get_listener_sock:
+		return &bpf_get_listener_sock_proto;
 #endif
 	default:
 		return bpf_base_func_proto(func_id);
-- 
cgit v1.2.3


From 62369db2df8d1edfa040878203b446e023a16802 Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin.monnet@netronome.com>
Date: Thu, 14 Mar 2019 12:38:39 +0000
Subject: bpf: fix documentation for eBPF helpers

Another round of minor fixes for the documentation of the BPF helpers
located in the UAPI bpf.h header file. Changes include:

- Moving around description of some helpers, to keep the descriptions in
  the same order as helpers are declared (bpf_map_push_elem(), leftover
  from commit 90b1023f68c7 ("bpf: fix documentation for eBPF helpers"),
  bpf_rc_keydown(), and bpf_skb_ancestor_cgroup_id()).
- Fixing typos ("contex" -> "context").
- Harmonising return types ("void* " -> "void *", "uint64_t" -> "u64").
- Addition of the "bpf_" prefix to bpf_get_storage().
- Light additions of RST markup on some keywords.
- Empty line deletion between description and return value for
  bpf_tcp_sock().
- Edit for the description for bpf_skb_ecn_set_ce() (capital letters,
  acronym expansion, no effect if ECT not set, more details on return
  value).

Signed-off-by: Quentin Monnet <quentin.monnet@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/uapi/linux/bpf.h | 128 ++++++++++++++++++++++++-----------------------
 1 file changed, 65 insertions(+), 63 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 983b25cb608d..4465d00d3493 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -502,16 +502,6 @@ union bpf_attr {
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags)
- * 	Description
- * 		Push an element *value* in *map*. *flags* is one of:
- *
- * 		**BPF_EXIST**
- * 		If the queue/stack is full, the oldest element is removed to
- * 		make room for this.
- * 	Return
- * 		0 on success, or a negative error in case of failure.
- *
  * int bpf_probe_read(void *dst, u32 size, const void *src)
  * 	Description
  * 		For tracing programs, safely attempt to read *size* bytes from
@@ -1435,14 +1425,14 @@ union bpf_attr {
  * u64 bpf_get_socket_cookie(struct bpf_sock_addr *ctx)
  * 	Description
  * 		Equivalent to bpf_get_socket_cookie() helper that accepts
- * 		*skb*, but gets socket from **struct bpf_sock_addr** contex.
+ * 		*skb*, but gets socket from **struct bpf_sock_addr** context.
  * 	Return
  * 		A 8-byte long non-decreasing number.
  *
  * u64 bpf_get_socket_cookie(struct bpf_sock_ops *ctx)
  * 	Description
  * 		Equivalent to bpf_get_socket_cookie() helper that accepts
- * 		*skb*, but gets socket from **struct bpf_sock_ops** contex.
+ * 		*skb*, but gets socket from **struct bpf_sock_ops** context.
  * 	Return
  * 		A 8-byte long non-decreasing number.
  *
@@ -2098,52 +2088,52 @@ union bpf_attr {
  *	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle)
+ * int bpf_rc_repeat(void *ctx)
  *	Description
  *		This helper is used in programs implementing IR decoding, to
- *		report a successfully decoded key press with *scancode*,
- *		*toggle* value in the given *protocol*. The scancode will be
- *		translated to a keycode using the rc keymap, and reported as
- *		an input key down event. After a period a key up event is
- *		generated. This period can be extended by calling either
- *		**bpf_rc_keydown**\ () again with the same values, or calling
- *		**bpf_rc_repeat**\ ().
+ *		report a successfully decoded repeat key message. This delays
+ *		the generation of a key up event for previously generated
+ *		key down event.
  *
- *		Some protocols include a toggle bit, in case the button	was
- *		released and pressed again between consecutive scancodes.
+ *		Some IR protocols like NEC have a special IR message for
+ *		repeating last button, for when a button is held down.
  *
  *		The *ctx* should point to the lirc sample as passed into
  *		the program.
  *
- *		The *protocol* is the decoded protocol number (see
- *		**enum rc_proto** for some predefined values).
- *
  *		This helper is only available is the kernel was compiled with
  *		the **CONFIG_BPF_LIRC_MODE2** configuration option set to
  *		"**y**".
  *	Return
  *		0
  *
- * int bpf_rc_repeat(void *ctx)
+ * int bpf_rc_keydown(void *ctx, u32 protocol, u64 scancode, u32 toggle)
  *	Description
  *		This helper is used in programs implementing IR decoding, to
- *		report a successfully decoded repeat key message. This delays
- *		the generation of a key up event for previously generated
- *		key down event.
+ *		report a successfully decoded key press with *scancode*,
+ *		*toggle* value in the given *protocol*. The scancode will be
+ *		translated to a keycode using the rc keymap, and reported as
+ *		an input key down event. After a period a key up event is
+ *		generated. This period can be extended by calling either
+ *		**bpf_rc_keydown**\ () again with the same values, or calling
+ *		**bpf_rc_repeat**\ ().
  *
- *		Some IR protocols like NEC have a special IR message for
- *		repeating last button, for when a button is held down.
+ *		Some protocols include a toggle bit, in case the button	was
+ *		released and pressed again between consecutive scancodes.
  *
  *		The *ctx* should point to the lirc sample as passed into
  *		the program.
  *
+ *		The *protocol* is the decoded protocol number (see
+ *		**enum rc_proto** for some predefined values).
+ *
  *		This helper is only available is the kernel was compiled with
  *		the **CONFIG_BPF_LIRC_MODE2** configuration option set to
  *		"**y**".
  *	Return
  *		0
  *
- * uint64_t bpf_skb_cgroup_id(struct sk_buff *skb)
+ * u64 bpf_skb_cgroup_id(struct sk_buff *skb)
  * 	Description
  * 		Return the cgroup v2 id of the socket associated with the *skb*.
  * 		This is roughly similar to the **bpf_get_cgroup_classid**\ ()
@@ -2159,30 +2149,12 @@ union bpf_attr {
  * 	Return
  * 		The id is returned or 0 in case the id could not be retrieved.
  *
- * u64 bpf_skb_ancestor_cgroup_id(struct sk_buff *skb, int ancestor_level)
- *	Description
- *		Return id of cgroup v2 that is ancestor of cgroup associated
- *		with the *skb* at the *ancestor_level*.  The root cgroup is at
- *		*ancestor_level* zero and each step down the hierarchy
- *		increments the level. If *ancestor_level* == level of cgroup
- *		associated with *skb*, then return value will be same as that
- *		of **bpf_skb_cgroup_id**\ ().
- *
- *		The helper is useful to implement policies based on cgroups
- *		that are upper in hierarchy than immediate cgroup associated
- *		with *skb*.
- *
- *		The format of returned id and helper limitations are same as in
- *		**bpf_skb_cgroup_id**\ ().
- *	Return
- *		The id is returned or 0 in case the id could not be retrieved.
- *
  * u64 bpf_get_current_cgroup_id(void)
  * 	Return
  * 		A 64-bit integer containing the current cgroup id based
  * 		on the cgroup within which the current task is running.
  *
- * void* get_local_storage(void *map, u64 flags)
+ * void *bpf_get_local_storage(void *map, u64 flags)
  *	Description
  *		Get the pointer to the local storage area.
  *		The type and the size of the local storage is defined
@@ -2209,6 +2181,24 @@ union bpf_attr {
  *	Return
  *		0 on success, or a negative error in case of failure.
  *
+ * u64 bpf_skb_ancestor_cgroup_id(struct sk_buff *skb, int ancestor_level)
+ *	Description
+ *		Return id of cgroup v2 that is ancestor of cgroup associated
+ *		with the *skb* at the *ancestor_level*.  The root cgroup is at
+ *		*ancestor_level* zero and each step down the hierarchy
+ *		increments the level. If *ancestor_level* == level of cgroup
+ *		associated with *skb*, then return value will be same as that
+ *		of **bpf_skb_cgroup_id**\ ().
+ *
+ *		The helper is useful to implement policies based on cgroups
+ *		that are upper in hierarchy than immediate cgroup associated
+ *		with *skb*.
+ *
+ *		The format of returned id and helper limitations are same as in
+ *		**bpf_skb_cgroup_id**\ ().
+ *	Return
+ *		The id is returned or 0 in case the id could not be retrieved.
+ *
  * struct bpf_sock *bpf_sk_lookup_tcp(void *ctx, struct bpf_sock_tuple *tuple, u32 tuple_size, u64 netns, u64 flags)
  *	Description
  *		Look for TCP socket matching *tuple*, optionally in a child
@@ -2289,6 +2279,16 @@ union bpf_attr {
  *	Return
  *		0 on success, or a negative error in case of failure.
  *
+ * int bpf_map_push_elem(struct bpf_map *map, const void *value, u64 flags)
+ * 	Description
+ * 		Push an element *value* in *map*. *flags* is one of:
+ *
+ * 		**BPF_EXIST**
+ * 			If the queue/stack is full, the oldest element is
+ * 			removed to make room for this.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure.
+ *
  * int bpf_map_pop_elem(struct bpf_map *map, void *value)
  * 	Description
  * 		Pop an element from *map*.
@@ -2346,33 +2346,35 @@ union bpf_attr {
  * struct bpf_sock *bpf_sk_fullsock(struct bpf_sock *sk)
  *	Description
  *		This helper gets a **struct bpf_sock** pointer such
- *		that all the fields in bpf_sock can be accessed.
+ *		that all the fields in this **bpf_sock** can be accessed.
  *	Return
- *		A **struct bpf_sock** pointer on success, or NULL in
+ *		A **struct bpf_sock** pointer on success, or **NULL** in
  *		case of failure.
  *
  * struct bpf_tcp_sock *bpf_tcp_sock(struct bpf_sock *sk)
  *	Description
  *		This helper gets a **struct bpf_tcp_sock** pointer from a
  *		**struct bpf_sock** pointer.
- *
  *	Return
- *		A **struct bpf_tcp_sock** pointer on success, or NULL in
+ *		A **struct bpf_tcp_sock** pointer on success, or **NULL** in
  *		case of failure.
  *
  * int bpf_skb_ecn_set_ce(struct sk_buf *skb)
- *     Description
- *             Sets ECN of IP header to ce (congestion encountered) if
- *             current value is ect (ECN capable). Works with IPv6 and IPv4.
- *     Return
- *             1 if set, 0 if not set.
+ *	Description
+ *		Set ECN (Explicit Congestion Notification) field of IP header
+ *		to **CE** (Congestion Encountered) if current value is **ECT**
+ *		(ECN Capable Transport). Otherwise, do nothing. Works with IPv6
+ *		and IPv4.
+ *	Return
+ *		1 if the **CE** flag is set (either by the current helper call
+ *		or because it was already present), 0 if it is not set.
  *
  * struct bpf_sock *bpf_get_listener_sock(struct bpf_sock *sk)
  *	Description
- *		Return a **struct bpf_sock** pointer in TCP_LISTEN state.
- *		bpf_sk_release() is unnecessary and not allowed.
+ *		Return a **struct bpf_sock** pointer in **TCP_LISTEN** state.
+ *		**bpf_sk_release**\ () is unnecessary and not allowed.
  *	Return
- *		A **struct bpf_sock** pointer on success, or NULL in
+ *		A **struct bpf_sock** pointer on success, or **NULL** in
  *		case of failure.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
-- 
cgit v1.2.3


From 0eb0978528d47699edd091dc2c337952ad8da436 Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin.monnet@netronome.com>
Date: Thu, 14 Mar 2019 12:38:40 +0000
Subject: bpf: add documentation for helpers bpf_spin_lock(), bpf_spin_unlock()

Add documentation for the BPF spinlock-related helpers to the doc in
bpf.h. I added the constraints and restrictions coming with the use of
spinlocks for BPF: not all of it is directly related to the use of the
helper, but I thought it would be nice for users to find them in the man
page.

This list of restrictions is nearly a verbatim copy of the list in
Alexei's commit log for those helpers.

Signed-off-by: Quentin Monnet <quentin.monnet@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/uapi/linux/bpf.h | 55 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 55 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 4465d00d3493..929c8e537a14 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2343,6 +2343,61 @@ union bpf_attr {
  *	Return
  *		0
  *
+ * int bpf_spin_lock(struct bpf_spin_lock *lock)
+ *	Description
+ *		Acquire a spinlock represented by the pointer *lock*, which is
+ *		stored as part of a value of a map. Taking the lock allows to
+ *		safely update the rest of the fields in that value. The
+ *		spinlock can (and must) later be released with a call to
+ *		**bpf_spin_unlock**\ (\ *lock*\ ).
+ *
+ *		Spinlocks in BPF programs come with a number of restrictions
+ *		and constraints:
+ *
+ *		* **bpf_spin_lock** objects are only allowed inside maps of
+ *		  types **BPF_MAP_TYPE_HASH** and **BPF_MAP_TYPE_ARRAY** (this
+ *		  list could be extended in the future).
+ *		* BTF description of the map is mandatory.
+ *		* The BPF program can take ONE lock at a time, since taking two
+ *		  or more could cause dead locks.
+ *		* Only one **struct bpf_spin_lock** is allowed per map element.
+ *		* When the lock is taken, calls (either BPF to BPF or helpers)
+ *		  are not allowed.
+ *		* The **BPF_LD_ABS** and **BPF_LD_IND** instructions are not
+ *		  allowed inside a spinlock-ed region.
+ *		* The BPF program MUST call **bpf_spin_unlock**\ () to release
+ *		  the lock, on all execution paths, before it returns.
+ *		* The BPF program can access **struct bpf_spin_lock** only via
+ *		  the **bpf_spin_lock**\ () and **bpf_spin_unlock**\ ()
+ *		  helpers. Loading or storing data into the **struct
+ *		  bpf_spin_lock** *lock*\ **;** field of a map is not allowed.
+ *		* To use the **bpf_spin_lock**\ () helper, the BTF description
+ *		  of the map value must be a struct and have **struct
+ *		  bpf_spin_lock** *anyname*\ **;** field at the top level.
+ *		  Nested lock inside another struct is not allowed.
+ *		* The **struct bpf_spin_lock** *lock* field in a map value must
+ *		  be aligned on a multiple of 4 bytes in that value.
+ *		* Syscall with command **BPF_MAP_LOOKUP_ELEM** does not copy
+ *		  the **bpf_spin_lock** field to user space.
+ *		* Syscall with command **BPF_MAP_UPDATE_ELEM**, or update from
+ *		  a BPF program, do not update the **bpf_spin_lock** field.
+ *		* **bpf_spin_lock** cannot be on the stack or inside a
+ *		  networking packet (it can only be inside of a map values).
+ *		* **bpf_spin_lock** is available to root only.
+ *		* Tracing programs and socket filter programs cannot use
+ *		  **bpf_spin_lock**\ () due to insufficient preemption checks
+ *		  (but this may change in the future).
+ *		* **bpf_spin_lock** is not allowed in inner maps of map-in-map.
+ *	Return
+ *		0
+ *
+ * int bpf_spin_unlock(struct bpf_spin_lock *lock)
+ *	Description
+ *		Release the *lock* previously locked by a call to
+ *		**bpf_spin_lock**\ (\ *lock*\ ).
+ *	Return
+ *		0
+ *
  * struct bpf_sock *bpf_sk_fullsock(struct bpf_sock *sk)
  *	Description
  *		This helper gets a **struct bpf_sock** pointer such
-- 
cgit v1.2.3


From 9804501fa1228048857910a6bf23e085aade37cc Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Thu, 14 Mar 2019 13:47:59 +0800
Subject: appletalk: Fix potential NULL pointer dereference in
 unregister_snap_client

register_snap_client may return NULL, all the callers
check it, but only print a warning. This will result in
NULL pointer dereference in unregister_snap_client and other
places.

It has always been used like this since v2.6

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/atalk.h |  2 +-
 net/appletalk/aarp.c  | 15 ++++++++++++---
 net/appletalk/ddp.c   | 20 ++++++++++++--------
 3 files changed, 25 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/atalk.h b/include/linux/atalk.h
index d5cfc0b15b76..f6034ba774be 100644
--- a/include/linux/atalk.h
+++ b/include/linux/atalk.h
@@ -108,7 +108,7 @@ static __inline__ struct elapaarp *aarp_hdr(struct sk_buff *skb)
 #define AARP_RESOLVE_TIME	(10 * HZ)
 
 extern struct datalink_proto *ddp_dl, *aarp_dl;
-extern void aarp_proto_init(void);
+extern int aarp_proto_init(void);
 
 /* Inter module exports */
 
diff --git a/net/appletalk/aarp.c b/net/appletalk/aarp.c
index 49a16cee2aae..420a98bf79b5 100644
--- a/net/appletalk/aarp.c
+++ b/net/appletalk/aarp.c
@@ -879,15 +879,24 @@ static struct notifier_block aarp_notifier = {
 
 static unsigned char aarp_snap_id[] = { 0x00, 0x00, 0x00, 0x80, 0xF3 };
 
-void __init aarp_proto_init(void)
+int __init aarp_proto_init(void)
 {
+	int rc;
+
 	aarp_dl = register_snap_client(aarp_snap_id, aarp_rcv);
-	if (!aarp_dl)
+	if (!aarp_dl) {
 		printk(KERN_CRIT "Unable to register AARP with SNAP.\n");
+		return -ENOMEM;
+	}
 	timer_setup(&aarp_timer, aarp_expire_timeout, 0);
 	aarp_timer.expires  = jiffies + sysctl_aarp_expiry_time;
 	add_timer(&aarp_timer);
-	register_netdevice_notifier(&aarp_notifier);
+	rc = register_netdevice_notifier(&aarp_notifier);
+	if (rc) {
+		del_timer_sync(&aarp_timer);
+		unregister_snap_client(aarp_dl);
+	}
+	return rc;
 }
 
 /* Remove the AARP entries associated with a device. */
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 795fbc6c06aa..709d2542f729 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1904,9 +1904,6 @@ static unsigned char ddp_snap_id[] = { 0x08, 0x00, 0x07, 0x80, 0x9B };
 EXPORT_SYMBOL(atrtr_get_dev);
 EXPORT_SYMBOL(atalk_find_dev_addr);
 
-static const char atalk_err_snap[] __initconst =
-	KERN_CRIT "Unable to register DDP with SNAP.\n";
-
 /* Called by proto.c on kernel start up */
 static int __init atalk_init(void)
 {
@@ -1921,17 +1918,22 @@ static int __init atalk_init(void)
 		goto out_proto;
 
 	ddp_dl = register_snap_client(ddp_snap_id, atalk_rcv);
-	if (!ddp_dl)
-		printk(atalk_err_snap);
+	if (!ddp_dl) {
+		pr_crit("Unable to register DDP with SNAP.\n");
+		goto out_sock;
+	}
 
 	dev_add_pack(&ltalk_packet_type);
 	dev_add_pack(&ppptalk_packet_type);
 
 	rc = register_netdevice_notifier(&ddp_notifier);
 	if (rc)
-		goto out_sock;
+		goto out_snap;
+
+	rc = aarp_proto_init();
+	if (rc)
+		goto out_dev;
 
-	aarp_proto_init();
 	rc = atalk_proc_init();
 	if (rc)
 		goto out_aarp;
@@ -1945,11 +1947,13 @@ out_proc:
 	atalk_proc_exit();
 out_aarp:
 	aarp_cleanup_module();
+out_dev:
 	unregister_netdevice_notifier(&ddp_notifier);
-out_sock:
+out_snap:
 	dev_remove_pack(&ppptalk_packet_type);
 	dev_remove_pack(&ltalk_packet_type);
 	unregister_snap_client(ddp_dl);
+out_sock:
 	sock_unregister(PF_APPLETALK);
 out_proto:
 	proto_unregister(&ddp_proto);
-- 
cgit v1.2.3


From 8a3c245c031944f2176118270e7bc5d4fd4a1075 Mon Sep 17 00:00:00 2001
From: Pedro Tammela <pctammela@gmail.com>
Date: Thu, 14 Mar 2019 10:45:23 -0300
Subject: net: add documentation to socket.c

Adds missing sphinx documentation to the
socket.c's functions. Also fixes some whitespaces.

I also changed the style of older documentation as an
effort to have an uniform documentation style.

Signed-off-by: Pedro Tammela <pctammela@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/net.h    |   6 ++
 include/linux/socket.h |  12 +--
 net/socket.c           | 277 +++++++++++++++++++++++++++++++++++++++++++++----
 3 files changed, 271 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/include/linux/net.h b/include/linux/net.h
index 651fca72286c..c606c72311d0 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -83,6 +83,12 @@ enum sock_type {
 
 #endif /* ARCH_HAS_SOCKET_TYPES */
 
+/**
+ * enum sock_shutdown_cmd - Shutdown types
+ * @SHUT_RD: shutdown receptions
+ * @SHUT_WR: shutdown transmissions
+ * @SHUT_RDWR: shutdown receptions/transmissions
+ */
 enum sock_shutdown_cmd {
 	SHUT_RD,
 	SHUT_WR,
diff --git a/include/linux/socket.h b/include/linux/socket.h
index 6016daeecee4..b57cd8bf96e2 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -26,7 +26,7 @@ typedef __kernel_sa_family_t	sa_family_t;
 /*
  *	1003.1g requires sa_family_t and that sa_data is char.
  */
- 
+
 struct sockaddr {
 	sa_family_t	sa_family;	/* address family, AF_xxx	*/
 	char		sa_data[14];	/* 14 bytes of protocol address	*/
@@ -44,7 +44,7 @@ struct linger {
  *	system, not 4.3. Thus msg_accrights(len) are now missing. They
  *	belong in an obscure libc emulation or the bin.
  */
- 
+
 struct msghdr {
 	void		*msg_name;	/* ptr to socket address structure */
 	int		msg_namelen;	/* size of socket address structure */
@@ -54,7 +54,7 @@ struct msghdr {
 	unsigned int	msg_flags;	/* flags on received message */
 	struct kiocb	*msg_iocb;	/* ptr to iocb for async requests */
 };
- 
+
 struct user_msghdr {
 	void		__user *msg_name;	/* ptr to socket address structure */
 	int		msg_namelen;		/* size of socket address structure */
@@ -122,7 +122,7 @@ struct cmsghdr {
  *	inside range, given by msg->msg_controllen before using
  *	ancillary object DATA.				--ANK (980731)
  */
- 
+
 static inline struct cmsghdr * __cmsg_nxthdr(void *__ctl, __kernel_size_t __size,
 					       struct cmsghdr *__cmsg)
 {
@@ -264,10 +264,10 @@ struct ucred {
 /* Maximum queue length specifiable by listen.  */
 #define SOMAXCONN	128
 
-/* Flags we can use with send/ and recv. 
+/* Flags we can use with send/ and recv.
    Added those for 1003.1g not all are supported yet
  */
- 
+
 #define MSG_OOB		1
 #define MSG_PEEK	2
 #define MSG_DONTROUTE	4
diff --git a/net/socket.c b/net/socket.c
index 3c176a12fe48..8255f5bda0aa 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -384,6 +384,18 @@ static struct file_system_type sock_fs_type = {
  *	but we take care of internal coherence yet.
  */
 
+/**
+ *	sock_alloc_file - Bind a &socket to a &file
+ *	@sock: socket
+ *	@flags: file status flags
+ *	@dname: protocol name
+ *
+ *	Returns the &file bound with @sock, implicitly storing it
+ *	in sock->file. If dname is %NULL, sets to "".
+ *	On failure the return is a ERR pointer (see linux/err.h).
+ *	This function uses GFP_KERNEL internally.
+ */
+
 struct file *sock_alloc_file(struct socket *sock, int flags, const char *dname)
 {
 	struct file *file;
@@ -424,6 +436,14 @@ static int sock_map_fd(struct socket *sock, int flags)
 	return PTR_ERR(newfile);
 }
 
+/**
+ *	sock_from_file - Return the &socket bounded to @file.
+ *	@file: file
+ *	@err: pointer to an error code return
+ *
+ *	On failure returns %NULL and assigns -ENOTSOCK to @err.
+ */
+
 struct socket *sock_from_file(struct file *file, int *err)
 {
 	if (file->f_op == &socket_file_ops)
@@ -532,11 +552,11 @@ static const struct inode_operations sockfs_inode_ops = {
 };
 
 /**
- *	sock_alloc	-	allocate a socket
+ *	sock_alloc - allocate a socket
  *
  *	Allocate a new inode and socket object. The two are bound together
  *	and initialised. The socket is then returned. If we are out of inodes
- *	NULL is returned.
+ *	NULL is returned. This functions uses GFP_KERNEL internally.
  */
 
 struct socket *sock_alloc(void)
@@ -561,7 +581,7 @@ struct socket *sock_alloc(void)
 EXPORT_SYMBOL(sock_alloc);
 
 /**
- *	sock_release	-	close a socket
+ *	sock_release - close a socket
  *	@sock: socket to close
  *
  *	The socket is released from the protocol stack if it has a release
@@ -617,6 +637,15 @@ void __sock_tx_timestamp(__u16 tsflags, __u8 *tx_flags)
 }
 EXPORT_SYMBOL(__sock_tx_timestamp);
 
+/**
+ *	sock_sendmsg - send a message through @sock
+ *	@sock: socket
+ *	@msg: message to send
+ *
+ *	Sends @msg through @sock, passing through LSM.
+ *	Returns the number of bytes sent, or an error code.
+ */
+
 static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
 {
 	int ret = sock->ops->sendmsg(sock, msg, msg_data_left(msg));
@@ -633,6 +662,18 @@ int sock_sendmsg(struct socket *sock, struct msghdr *msg)
 }
 EXPORT_SYMBOL(sock_sendmsg);
 
+/**
+ *	kernel_sendmsg - send a message through @sock (kernel-space)
+ *	@sock: socket
+ *	@msg: message header
+ *	@vec: kernel vec
+ *	@num: vec array length
+ *	@size: total message data size
+ *
+ *	Builds the message data with @vec and sends it through @sock.
+ *	Returns the number of bytes sent, or an error code.
+ */
+
 int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
 		   struct kvec *vec, size_t num, size_t size)
 {
@@ -641,6 +682,19 @@ int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
 }
 EXPORT_SYMBOL(kernel_sendmsg);
 
+/**
+ *	kernel_sendmsg_locked - send a message through @sock (kernel-space)
+ *	@sk: sock
+ *	@msg: message header
+ *	@vec: output s/g array
+ *	@num: output s/g array length
+ *	@size: total message data size
+ *
+ *	Builds the message data with @vec and sends it through @sock.
+ *	Returns the number of bytes sent, or an error code.
+ *	Caller must hold @sk.
+ */
+
 int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg,
 			  struct kvec *vec, size_t num, size_t size)
 {
@@ -811,6 +865,16 @@ void __sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk,
 }
 EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
 
+/**
+ *	sock_recvmsg - receive a message from @sock
+ *	@sock: socket
+ *	@msg: message to receive
+ *	@flags: message flags
+ *
+ *	Receives @msg from @sock, passing through LSM. Returns the total number
+ *	of bytes received, or an error.
+ */
+
 static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
 				     int flags)
 {
@@ -826,20 +890,21 @@ int sock_recvmsg(struct socket *sock, struct msghdr *msg, int flags)
 EXPORT_SYMBOL(sock_recvmsg);
 
 /**
- * kernel_recvmsg - Receive a message from a socket (kernel space)
- * @sock:       The socket to receive the message from
- * @msg:        Received message
- * @vec:        Input s/g array for message data
- * @num:        Size of input s/g array
- * @size:       Number of bytes to read
- * @flags:      Message flags (MSG_DONTWAIT, etc...)
+ *	kernel_recvmsg - Receive a message from a socket (kernel space)
+ *	@sock: The socket to receive the message from
+ *	@msg: Received message
+ *	@vec: Input s/g array for message data
+ *	@num: Size of input s/g array
+ *	@size: Number of bytes to read
+ *	@flags: Message flags (MSG_DONTWAIT, etc...)
  *
- * On return the msg structure contains the scatter/gather array passed in the
- * vec argument. The array is modified so that it consists of the unfilled
- * portion of the original array.
+ *	On return the msg structure contains the scatter/gather array passed in the
+ *	vec argument. The array is modified so that it consists of the unfilled
+ *	portion of the original array.
  *
- * The returned value is the total number of bytes received, or an error.
+ *	The returned value is the total number of bytes received, or an error.
  */
+
 int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
 		   struct kvec *vec, size_t num, size_t size, int flags)
 {
@@ -1005,6 +1070,13 @@ static long sock_do_ioctl(struct net *net, struct socket *sock,
  *	what to do with it - that's up to the protocol still.
  */
 
+/**
+ *	get_net_ns - increment the refcount of the network namespace
+ *	@ns: common namespace (net)
+ *
+ *	Returns the net's common namespace.
+ */
+
 struct ns_common *get_net_ns(struct ns_common *ns)
 {
 	return &get_net(container_of(ns, struct net, ns))->ns;
@@ -1099,6 +1171,19 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 	return err;
 }
 
+/**
+ *	sock_create_lite - creates a socket
+ *	@family: protocol family (AF_INET, ...)
+ *	@type: communication type (SOCK_STREAM, ...)
+ *	@protocol: protocol (0, ...)
+ *	@res: new socket
+ *
+ *	Creates a new socket and assigns it to @res, passing through LSM.
+ *	The new socket initialization is not complete, see kernel_accept().
+ *	Returns 0 or an error. On failure @res is set to %NULL.
+ *	This function internally uses GFP_KERNEL.
+ */
+
 int sock_create_lite(int family, int type, int protocol, struct socket **res)
 {
 	int err;
@@ -1224,6 +1309,21 @@ call_kill:
 }
 EXPORT_SYMBOL(sock_wake_async);
 
+/**
+ *	__sock_create - creates a socket
+ *	@net: net namespace
+ *	@family: protocol family (AF_INET, ...)
+ *	@type: communication type (SOCK_STREAM, ...)
+ *	@protocol: protocol (0, ...)
+ *	@res: new socket
+ *	@kern: boolean for kernel space sockets
+ *
+ *	Creates a new socket and assigns it to @res, passing through LSM.
+ *	Returns 0 or an error. On failure @res is set to %NULL. @kern must
+ *	be set to true if the socket resides in kernel space.
+ *	This function internally uses GFP_KERNEL.
+ */
+
 int __sock_create(struct net *net, int family, int type, int protocol,
 			 struct socket **res, int kern)
 {
@@ -1333,12 +1433,35 @@ out_release:
 }
 EXPORT_SYMBOL(__sock_create);
 
+/**
+ *	sock_create - creates a socket
+ *	@family: protocol family (AF_INET, ...)
+ *	@type: communication type (SOCK_STREAM, ...)
+ *	@protocol: protocol (0, ...)
+ *	@res: new socket
+ *
+ *	A wrapper around __sock_create().
+ *	Returns 0 or an error. This function internally uses GFP_KERNEL.
+ */
+
 int sock_create(int family, int type, int protocol, struct socket **res)
 {
 	return __sock_create(current->nsproxy->net_ns, family, type, protocol, res, 0);
 }
 EXPORT_SYMBOL(sock_create);
 
+/**
+ *	sock_create_kern - creates a socket (kernel space)
+ *	@net: net namespace
+ *	@family: protocol family (AF_INET, ...)
+ *	@type: communication type (SOCK_STREAM, ...)
+ *	@protocol: protocol (0, ...)
+ *	@res: new socket
+ *
+ *	A wrapper around __sock_create().
+ *	Returns 0 or an error. This function internally uses GFP_KERNEL.
+ */
+
 int sock_create_kern(struct net *net, int family, int type, int protocol, struct socket **res)
 {
 	return __sock_create(net, family, type, protocol, res, 1);
@@ -3322,18 +3445,46 @@ static long compat_sock_ioctl(struct file *file, unsigned int cmd,
 }
 #endif
 
+/**
+ *	kernel_bind - bind an address to a socket (kernel space)
+ *	@sock: socket
+ *	@addr: address
+ *	@addrlen: length of address
+ *
+ *	Returns 0 or an error.
+ */
+
 int kernel_bind(struct socket *sock, struct sockaddr *addr, int addrlen)
 {
 	return sock->ops->bind(sock, addr, addrlen);
 }
 EXPORT_SYMBOL(kernel_bind);
 
+/**
+ *	kernel_listen - move socket to listening state (kernel space)
+ *	@sock: socket
+ *	@backlog: pending connections queue size
+ *
+ *	Returns 0 or an error.
+ */
+
 int kernel_listen(struct socket *sock, int backlog)
 {
 	return sock->ops->listen(sock, backlog);
 }
 EXPORT_SYMBOL(kernel_listen);
 
+/**
+ *	kernel_accept - accept a connection (kernel space)
+ *	@sock: listening socket
+ *	@newsock: new connected socket
+ *	@flags: flags
+ *
+ *	@flags must be SOCK_CLOEXEC, SOCK_NONBLOCK or 0.
+ *	If it fails, @newsock is guaranteed to be %NULL.
+ *	Returns 0 or an error.
+ */
+
 int kernel_accept(struct socket *sock, struct socket **newsock, int flags)
 {
 	struct sock *sk = sock->sk;
@@ -3359,6 +3510,19 @@ done:
 }
 EXPORT_SYMBOL(kernel_accept);
 
+/**
+ *	kernel_connect - connect a socket (kernel space)
+ *	@sock: socket
+ *	@addr: address
+ *	@addrlen: address length
+ *	@flags: flags (O_NONBLOCK, ...)
+ *
+ *	For datagram sockets, @addr is the addres to which datagrams are sent
+ *	by default, and the only address from which datagrams are received.
+ *	For stream sockets, attempts to connect to @addr.
+ *	Returns 0 or an error code.
+ */
+
 int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
 		   int flags)
 {
@@ -3366,18 +3530,48 @@ int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
 }
 EXPORT_SYMBOL(kernel_connect);
 
+/**
+ *	kernel_getsockname - get the address which the socket is bound (kernel space)
+ *	@sock: socket
+ *	@addr: address holder
+ *
+ * 	Fills the @addr pointer with the address which the socket is bound.
+ *	Returns 0 or an error code.
+ */
+
 int kernel_getsockname(struct socket *sock, struct sockaddr *addr)
 {
 	return sock->ops->getname(sock, addr, 0);
 }
 EXPORT_SYMBOL(kernel_getsockname);
 
+/**
+ *	kernel_peername - get the address which the socket is connected (kernel space)
+ *	@sock: socket
+ *	@addr: address holder
+ *
+ * 	Fills the @addr pointer with the address which the socket is connected.
+ *	Returns 0 or an error code.
+ */
+
 int kernel_getpeername(struct socket *sock, struct sockaddr *addr)
 {
 	return sock->ops->getname(sock, addr, 1);
 }
 EXPORT_SYMBOL(kernel_getpeername);
 
+/**
+ *	kernel_getsockopt - get a socket option (kernel space)
+ *	@sock: socket
+ *	@level: API level (SOL_SOCKET, ...)
+ *	@optname: option tag
+ *	@optval: option value
+ *	@optlen: option length
+ *
+ *	Assigns the option length to @optlen.
+ *	Returns 0 or an error.
+ */
+
 int kernel_getsockopt(struct socket *sock, int level, int optname,
 			char *optval, int *optlen)
 {
@@ -3400,6 +3594,17 @@ int kernel_getsockopt(struct socket *sock, int level, int optname,
 }
 EXPORT_SYMBOL(kernel_getsockopt);
 
+/**
+ *	kernel_setsockopt - set a socket option (kernel space)
+ *	@sock: socket
+ *	@level: API level (SOL_SOCKET, ...)
+ *	@optname: option tag
+ *	@optval: option value
+ *	@optlen: option length
+ *
+ *	Returns 0 or an error.
+ */
+
 int kernel_setsockopt(struct socket *sock, int level, int optname,
 			char *optval, unsigned int optlen)
 {
@@ -3420,6 +3625,17 @@ int kernel_setsockopt(struct socket *sock, int level, int optname,
 }
 EXPORT_SYMBOL(kernel_setsockopt);
 
+/**
+ *	kernel_sendpage - send a &page through a socket (kernel space)
+ *	@sock: socket
+ *	@page: page
+ *	@offset: page offset
+ *	@size: total size in bytes
+ *	@flags: flags (MSG_DONTWAIT, ...)
+ *
+ *	Returns the total amount sent in bytes or an error.
+ */
+
 int kernel_sendpage(struct socket *sock, struct page *page, int offset,
 		    size_t size, int flags)
 {
@@ -3430,6 +3646,18 @@ int kernel_sendpage(struct socket *sock, struct page *page, int offset,
 }
 EXPORT_SYMBOL(kernel_sendpage);
 
+/**
+ *	kernel_sendpage_locked - send a &page through the locked sock (kernel space)
+ *	@sk: sock
+ *	@page: page
+ *	@offset: page offset
+ *	@size: total size in bytes
+ *	@flags: flags (MSG_DONTWAIT, ...)
+ *
+ *	Returns the total amount sent in bytes or an error.
+ *	Caller must hold @sk.
+ */
+
 int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset,
 			   size_t size, int flags)
 {
@@ -3443,17 +3671,30 @@ int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset,
 }
 EXPORT_SYMBOL(kernel_sendpage_locked);
 
+/**
+ *	kernel_shutdown - shut down part of a full-duplex connection (kernel space)
+ *	@sock: socket
+ *	@how: connection part
+ *
+ *	Returns 0 or an error.
+ */
+
 int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how)
 {
 	return sock->ops->shutdown(sock, how);
 }
 EXPORT_SYMBOL(kernel_sock_shutdown);
 
-/* This routine returns the IP overhead imposed by a socket i.e.
- * the length of the underlying IP header, depending on whether
- * this is an IPv4 or IPv6 socket and the length from IP options turned
- * on at the socket. Assumes that the caller has a lock on the socket.
+/**
+ *	kernel_sock_ip_overhead - returns the IP overhead imposed by a socket
+ *	@sk: socket
+ *
+ *	This routine returns the IP overhead imposed by a socket i.e.
+ *	the length of the underlying IP header, depending on whether
+ *	this is an IPv4 or IPv6 socket and the length from IP options turned
+ *	on at the socket. Assumes that the caller has a lock on the socket.
  */
+
 u32 kernel_sock_ip_overhead(struct sock *sk)
 {
 	struct inet_sock *inet;
-- 
cgit v1.2.3


From 044175a06706d516aa42874bb44dbbfc3c4d20eb Mon Sep 17 00:00:00 2001
From: Björn Töpel <bjorn.topel@intel.com>
Date: Wed, 13 Mar 2019 15:15:49 +0100
Subject: xsk: fix umem memory leak on cleanup
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When the umem is cleaned up, the task that created it might already be
gone. If the task was gone, the xdp_umem_release function did not free
the pages member of struct xdp_umem.

It turned out that the task lookup was not needed at all; The code was
a left-over when we moved from task accounting to user accounting [1].

This patch fixes the memory leak by removing the task lookup logic
completely.

[1] https://lore.kernel.org/netdev/20180131135356.19134-3-bjorn.topel@gmail.com/

Link: https://lore.kernel.org/netdev/c1cb2ca8-6a14-3980-8672-f3de0bb38dfd@suse.cz/
Fixes: c0c77d8fb787 ("xsk: add user memory registration support sockopt")
Reported-by: Jiri Slaby <jslaby@suse.cz>
Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
---
 include/net/xdp_sock.h |  1 -
 net/xdp/xdp_umem.c     | 19 +------------------
 2 files changed, 1 insertion(+), 19 deletions(-)

(limited to 'include')

diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index 61cf7dbb6782..d074b6d60f8a 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -36,7 +36,6 @@ struct xdp_umem {
 	u32 headroom;
 	u32 chunk_size_nohr;
 	struct user_struct *user;
-	struct pid *pid;
 	unsigned long address;
 	refcount_t users;
 	struct work_struct work;
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
index 77520eacee8f..989e52386c35 100644
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -193,9 +193,6 @@ static void xdp_umem_unaccount_pages(struct xdp_umem *umem)
 
 static void xdp_umem_release(struct xdp_umem *umem)
 {
-	struct task_struct *task;
-	struct mm_struct *mm;
-
 	xdp_umem_clear_dev(umem);
 
 	ida_simple_remove(&umem_ida, umem->id);
@@ -214,21 +211,10 @@ static void xdp_umem_release(struct xdp_umem *umem)
 
 	xdp_umem_unpin_pages(umem);
 
-	task = get_pid_task(umem->pid, PIDTYPE_PID);
-	put_pid(umem->pid);
-	if (!task)
-		goto out;
-	mm = get_task_mm(task);
-	put_task_struct(task);
-	if (!mm)
-		goto out;
-
-	mmput(mm);
 	kfree(umem->pages);
 	umem->pages = NULL;
 
 	xdp_umem_unaccount_pages(umem);
-out:
 	kfree(umem);
 }
 
@@ -357,7 +343,6 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
 	if (size_chk < 0)
 		return -EINVAL;
 
-	umem->pid = get_task_pid(current, PIDTYPE_PID);
 	umem->address = (unsigned long)addr;
 	umem->chunk_mask = ~((u64)chunk_size - 1);
 	umem->size = size;
@@ -373,7 +358,7 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
 
 	err = xdp_umem_account_pages(umem);
 	if (err)
-		goto out;
+		return err;
 
 	err = xdp_umem_pin_pages(umem);
 	if (err)
@@ -392,8 +377,6 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
 
 out_account:
 	xdp_umem_unaccount_pages(umem);
-out:
-	put_pid(umem->pid);
 	return err;
 }
 
-- 
cgit v1.2.3


From cd1b772d4881d1cd15b90ec17aab9ac7950e8850 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Mon, 29 Oct 2018 16:32:31 +0100
Subject: driver core: remove BUS_ATTR()

There are now no in-kernel users of BUS_ATTR() so drop it from device.h

Everyone should use BUS_ATTR_RO/RW/WO() from now on.

Cc: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/device.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/device.h b/include/linux/device.h
index b425a7ee04ce..4e6987e11f68 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -49,8 +49,6 @@ struct bus_attribute {
 	ssize_t (*store)(struct bus_type *bus, const char *buf, size_t count);
 };
 
-#define BUS_ATTR(_name, _mode, _show, _store)	\
-	struct bus_attribute bus_attr_##_name = __ATTR(_name, _mode, _show, _store)
 #define BUS_ATTR_RW(_name) \
 	struct bus_attribute bus_attr_##_name = __ATTR_RW(_name)
 #define BUS_ATTR_RO(_name) \
-- 
cgit v1.2.3


From a4dc6a49156b1f8d6e17251ffda17c9e6a5db78a Mon Sep 17 00:00:00 2001
From: Maxime Chevallier <maxime.chevallier@bootlin.com>
Date: Sat, 16 Mar 2019 14:41:30 +0100
Subject: packets: Always register packet sk in the same order

When using fanouts with AF_PACKET, the demux functions such as
fanout_demux_cpu will return an index in the fanout socket array, which
corresponds to the selected socket.

The ordering of this array depends on the order the sockets were added
to a given fanout group, so for FANOUT_CPU this means sockets are bound
to cpus in the order they are configured, which is OK.

However, when stopping then restarting the interface these sockets are
bound to, the sockets are reassigned to the fanout group in the reverse
order, due to the fact that they were inserted at the head of the
interface's AF_PACKET socket list.

This means that traffic that was directed to the first socket in the
fanout group is now directed to the last one after an interface restart.

In the case of FANOUT_CPU, traffic from CPU0 will be directed to the
socket that used to receive traffic from the last CPU after an interface
restart.

This commit introduces a helper to add a socket at the tail of a list,
then uses it to register AF_PACKET sockets.

Note that this changes the order in which sockets are listed in /proc and
with sock_diag.

Fixes: dc99f600698d ("packet: Add fanout support")
Signed-off-by: Maxime Chevallier <maxime.chevallier@bootlin.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h     | 6 ++++++
 net/packet/af_packet.c | 2 +-
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/sock.h b/include/net/sock.h
index 328cb7cb7b0b..8de5ee258b93 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -710,6 +710,12 @@ static inline void sk_add_node_rcu(struct sock *sk, struct hlist_head *list)
 		hlist_add_head_rcu(&sk->sk_node, list);
 }
 
+static inline void sk_add_node_tail_rcu(struct sock *sk, struct hlist_head *list)
+{
+	sock_hold(sk);
+	hlist_add_tail_rcu(&sk->sk_node, list);
+}
+
 static inline void __sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list)
 {
 	hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 8376bc1c1508..8754d7c93b84 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -3243,7 +3243,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol,
 	}
 
 	mutex_lock(&net->packet.sklist_lock);
-	sk_add_node_rcu(sk, &net->packet.sklist);
+	sk_add_node_tail_rcu(sk, &net->packet.sklist);
 	mutex_unlock(&net->packet.sklist_lock);
 
 	preempt_disable();
-- 
cgit v1.2.3


From 273160ffc6b993c7c91627f5a84799c66dfe4dee Mon Sep 17 00:00:00 2001
From: Xin Long <lucien.xin@gmail.com>
Date: Mon, 18 Mar 2019 19:47:00 +0800
Subject: sctp: get sctphdr by offset in sctp_compute_cksum

sctp_hdr(skb) only works when skb->transport_header is set properly.

But in Netfilter, skb->transport_header for ipv6 is not guaranteed
to be right value for sctphdr. It would cause to fail to check the
checksum for sctp packets.

So fix it by using offset, which is always right in all places.

v1->v2:
  - Fix the changelog.

Fixes: e6d8b64b34aa ("net: sctp: fix and consolidate SCTP checksumming code")
Reported-by: Li Shuang <shuali@redhat.com>
Signed-off-by: Xin Long <lucien.xin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/checksum.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/sctp/checksum.h b/include/net/sctp/checksum.h
index 32ee65a30aff..1c6e6c0766ca 100644
--- a/include/net/sctp/checksum.h
+++ b/include/net/sctp/checksum.h
@@ -61,7 +61,7 @@ static inline __wsum sctp_csum_combine(__wsum csum, __wsum csum2,
 static inline __le32 sctp_compute_cksum(const struct sk_buff *skb,
 					unsigned int offset)
 {
-	struct sctphdr *sh = sctp_hdr(skb);
+	struct sctphdr *sh = (struct sctphdr *)(skb->data + offset);
 	const struct skb_checksum_ops ops = {
 		.update  = sctp_csum_update,
 		.combine = sctp_csum_combine,
-- 
cgit v1.2.3


From 85d0966fa57e0ef2d30d913c98ca93674f7a03c9 Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Wed, 20 Mar 2019 14:59:59 +0100
Subject: net/sched: prepare TC actions to properly validate the control action

- pass a pointer to struct tcf_proto in each actions's init() handler,
  to allow validating the control action, checking whether the chain
  exists and (eventually) refcounting it.
- remove code that validates the control action after a successful call
  to the action's init() handler, and replace it with a test that forbids
  addition of actions having 'goto_chain' and NULL goto_chain pointer at
  the same time.
- add tcf_action_check_ctrlact(), that will validate the control action
  and eventually allocate the action 'goto_chain' within the init()
  handler.
- add tcf_action_set_ctrlact(), that will assign the control action and
  swap the current 'goto_chain' pointer with the new given one.

This disallows 'goto_chain' on actions that don't initialize it properly
in their init() handler, i.e. calling tcf_action_check_ctrlact() after
successful IDR reservation and then calling tcf_action_set_ctrlact()
to assign 'goto_chain' and 'tcf_action' consistently.

By doing this, the kernel does not leak anymore refcounts when a valid
'goto chain' handle is replaced in TC actions, causing kmemleak splats
like the following one:

 # tc chain add dev dd0 chain 42 ingress protocol ip flower \
 > ip_proto tcp action drop
 # tc chain add dev dd0 chain 43 ingress protocol ip flower \
 > ip_proto udp action drop
 # tc filter add dev dd0 ingress matchall \
 > action gact goto chain 42 index 66
 # tc filter replace dev dd0 ingress matchall \
 > action gact goto chain 43 index 66
 # echo scan >/sys/kernel/debug/kmemleak
 <...>
 unreferenced object 0xffff93c0ee09f000 (size 1024):
 comm "tc", pid 2565, jiffies 4295339808 (age 65.426s)
 hex dump (first 32 bytes):
   00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00  ................
   00 00 00 00 08 00 06 00 00 00 00 00 00 00 00 00  ................
 backtrace:
   [<000000009b63f92d>] tc_ctl_chain+0x3d2/0x4c0
   [<00000000683a8d72>] rtnetlink_rcv_msg+0x263/0x2d0
   [<00000000ddd88f8e>] netlink_rcv_skb+0x4a/0x110
   [<000000006126a348>] netlink_unicast+0x1a0/0x250
   [<00000000b3340877>] netlink_sendmsg+0x2c1/0x3c0
   [<00000000a25a2171>] sock_sendmsg+0x36/0x40
   [<00000000f19ee1ec>] ___sys_sendmsg+0x280/0x2f0
   [<00000000d0422042>] __sys_sendmsg+0x5e/0xa0
   [<000000007a6c61f9>] do_syscall_64+0x5b/0x180
   [<00000000ccd07542>] entry_SYSCALL_64_after_hwframe+0x44/0xa9
   [<0000000013eaa334>] 0xffffffffffffffff

Fixes: db50514f9a9c ("net: sched: add termination action to allow goto chain")
Fixes: 97763dc0f401 ("net_sched: reject unknown tcfa_action values")
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/act_api.h      |  7 +++-
 net/sched/act_api.c        | 97 ++++++++++++++++++++++++++--------------------
 net/sched/act_bpf.c        |  2 +-
 net/sched/act_connmark.c   |  1 +
 net/sched/act_csum.c       |  2 +-
 net/sched/act_gact.c       |  2 +-
 net/sched/act_ife.c        |  2 +-
 net/sched/act_ipt.c        | 11 +++---
 net/sched/act_mirred.c     |  1 +
 net/sched/act_nat.c        |  3 +-
 net/sched/act_pedit.c      |  2 +-
 net/sched/act_police.c     |  1 +
 net/sched/act_sample.c     |  2 +-
 net/sched/act_simple.c     |  2 +-
 net/sched/act_skbedit.c    |  1 +
 net/sched/act_skbmod.c     |  1 +
 net/sched/act_tunnel_key.c |  1 +
 net/sched/act_vlan.c       |  2 +-
 18 files changed, 84 insertions(+), 56 deletions(-)

(limited to 'include')

diff --git a/include/net/act_api.h b/include/net/act_api.h
index c745e9ccfab2..54fbb49bd08a 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -90,7 +90,7 @@ struct tc_action_ops {
 	int     (*lookup)(struct net *net, struct tc_action **a, u32 index);
 	int     (*init)(struct net *net, struct nlattr *nla,
 			struct nlattr *est, struct tc_action **act, int ovr,
-			int bind, bool rtnl_held,
+			int bind, bool rtnl_held, struct tcf_proto *tp,
 			struct netlink_ext_ack *extack);
 	int     (*walk)(struct net *, struct sk_buff *,
 			struct netlink_callback *, int,
@@ -181,6 +181,11 @@ int tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int, int);
 int tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int, int);
 int tcf_action_copy_stats(struct sk_buff *, struct tc_action *, int);
 
+int tcf_action_check_ctrlact(int action, struct tcf_proto *tp,
+			     struct tcf_chain **handle,
+			     struct netlink_ext_ack *newchain);
+struct tcf_chain *tcf_action_set_ctrlact(struct tc_action *a, int action,
+					 struct tcf_chain *newchain);
 #endif /* CONFIG_NET_CLS_ACT */
 
 static inline void tcf_action_stats_update(struct tc_action *a, u64 bytes,
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index aecf1bf233c8..fe67b98ac641 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -28,23 +28,6 @@
 #include <net/act_api.h>
 #include <net/netlink.h>
 
-static int tcf_action_goto_chain_init(struct tc_action *a, struct tcf_proto *tp)
-{
-	u32 chain_index = a->tcfa_action & TC_ACT_EXT_VAL_MASK;
-
-	if (!tp)
-		return -EINVAL;
-	a->goto_chain = tcf_chain_get_by_act(tp->chain->block, chain_index);
-	if (!a->goto_chain)
-		return -ENOMEM;
-	return 0;
-}
-
-static void tcf_action_goto_chain_fini(struct tc_action *a)
-{
-	tcf_chain_put_by_act(a->goto_chain);
-}
-
 static void tcf_action_goto_chain_exec(const struct tc_action *a,
 				       struct tcf_result *res)
 {
@@ -71,6 +54,53 @@ static void tcf_set_action_cookie(struct tc_cookie __rcu **old_cookie,
 		call_rcu(&old->rcu, tcf_free_cookie_rcu);
 }
 
+int tcf_action_check_ctrlact(int action, struct tcf_proto *tp,
+			     struct tcf_chain **newchain,
+			     struct netlink_ext_ack *extack)
+{
+	int opcode = TC_ACT_EXT_OPCODE(action), ret = -EINVAL;
+	u32 chain_index;
+
+	if (!opcode)
+		ret = action > TC_ACT_VALUE_MAX ? -EINVAL : 0;
+	else if (opcode <= TC_ACT_EXT_OPCODE_MAX || action == TC_ACT_UNSPEC)
+		ret = 0;
+	if (ret) {
+		NL_SET_ERR_MSG(extack, "invalid control action");
+		goto end;
+	}
+
+	if (TC_ACT_EXT_CMP(action, TC_ACT_GOTO_CHAIN)) {
+		chain_index = action & TC_ACT_EXT_VAL_MASK;
+		if (!tp || !newchain) {
+			ret = -EINVAL;
+			NL_SET_ERR_MSG(extack,
+				       "can't goto NULL proto/chain");
+			goto end;
+		}
+		*newchain = tcf_chain_get_by_act(tp->chain->block, chain_index);
+		if (!*newchain) {
+			ret = -ENOMEM;
+			NL_SET_ERR_MSG(extack,
+				       "can't allocate goto_chain");
+		}
+	}
+end:
+	return ret;
+}
+EXPORT_SYMBOL(tcf_action_check_ctrlact);
+
+struct tcf_chain *tcf_action_set_ctrlact(struct tc_action *a, int action,
+					 struct tcf_chain *newchain)
+{
+	struct tcf_chain *oldchain = a->goto_chain;
+
+	a->tcfa_action = action;
+	a->goto_chain = newchain;
+	return oldchain;
+}
+EXPORT_SYMBOL(tcf_action_set_ctrlact);
+
 /* XXX: For standalone actions, we don't need a RCU grace period either, because
  * actions are always connected to filters and filters are already destroyed in
  * RCU callbacks, so after a RCU grace period actions are already disconnected
@@ -78,13 +108,15 @@ static void tcf_set_action_cookie(struct tc_cookie __rcu **old_cookie,
  */
 static void free_tcf(struct tc_action *p)
 {
+	struct tcf_chain *chain = p->goto_chain;
+
 	free_percpu(p->cpu_bstats);
 	free_percpu(p->cpu_bstats_hw);
 	free_percpu(p->cpu_qstats);
 
 	tcf_set_action_cookie(&p->act_cookie, NULL);
-	if (p->goto_chain)
-		tcf_action_goto_chain_fini(p);
+	if (chain)
+		tcf_chain_put_by_act(chain);
 
 	kfree(p);
 }
@@ -800,15 +832,6 @@ static struct tc_cookie *nla_memdup_cookie(struct nlattr **tb)
 	return c;
 }
 
-static bool tcf_action_valid(int action)
-{
-	int opcode = TC_ACT_EXT_OPCODE(action);
-
-	if (!opcode)
-		return action <= TC_ACT_VALUE_MAX;
-	return opcode <= TC_ACT_EXT_OPCODE_MAX || action == TC_ACT_UNSPEC;
-}
-
 struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
 				    struct nlattr *nla, struct nlattr *est,
 				    char *name, int ovr, int bind,
@@ -890,10 +913,10 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
 	/* backward compatibility for policer */
 	if (name == NULL)
 		err = a_o->init(net, tb[TCA_ACT_OPTIONS], est, &a, ovr, bind,
-				rtnl_held, extack);
+				rtnl_held, tp, extack);
 	else
 		err = a_o->init(net, nla, est, &a, ovr, bind, rtnl_held,
-				extack);
+				tp, extack);
 	if (err < 0)
 		goto err_mod;
 
@@ -907,18 +930,10 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
 	if (err != ACT_P_CREATED)
 		module_put(a_o->owner);
 
-	if (TC_ACT_EXT_CMP(a->tcfa_action, TC_ACT_GOTO_CHAIN)) {
-		err = tcf_action_goto_chain_init(a, tp);
-		if (err) {
-			tcf_action_destroy_1(a, bind);
-			NL_SET_ERR_MSG(extack, "Failed to init TC action chain");
-			return ERR_PTR(err);
-		}
-	}
-
-	if (!tcf_action_valid(a->tcfa_action)) {
+	if (TC_ACT_EXT_CMP(a->tcfa_action, TC_ACT_GOTO_CHAIN) &&
+	    !a->goto_chain) {
 		tcf_action_destroy_1(a, bind);
-		NL_SET_ERR_MSG(extack, "Invalid control action value");
+		NL_SET_ERR_MSG(extack, "can't use goto chain with NULL chain");
 		return ERR_PTR(-EINVAL);
 	}
 
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index aa5c38d11a30..3c0468f2aae6 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -278,7 +278,7 @@ static void tcf_bpf_prog_fill_cfg(const struct tcf_bpf *prog,
 static int tcf_bpf_init(struct net *net, struct nlattr *nla,
 			struct nlattr *est, struct tc_action **act,
 			int replace, int bind, bool rtnl_held,
-			struct netlink_ext_ack *extack)
+			struct tcf_proto *tp, struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, bpf_net_id);
 	struct nlattr *tb[TCA_ACT_BPF_MAX + 1];
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index 5d24993cccfe..44aa046a92ea 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -97,6 +97,7 @@ static const struct nla_policy connmark_policy[TCA_CONNMARK_MAX + 1] = {
 static int tcf_connmark_init(struct net *net, struct nlattr *nla,
 			     struct nlattr *est, struct tc_action **a,
 			     int ovr, int bind, bool rtnl_held,
+			     struct tcf_proto *tp,
 			     struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, connmark_net_id);
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index c79aca29505e..9ba0f61a1e82 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -46,7 +46,7 @@ static struct tc_action_ops act_csum_ops;
 
 static int tcf_csum_init(struct net *net, struct nlattr *nla,
 			 struct nlattr *est, struct tc_action **a, int ovr,
-			 int bind, bool rtnl_held,
+			 int bind, bool rtnl_held, struct tcf_proto *tp,
 			 struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, csum_net_id);
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index 93da0004e9f4..b8ad311bd8cc 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -57,7 +57,7 @@ static const struct nla_policy gact_policy[TCA_GACT_MAX + 1] = {
 static int tcf_gact_init(struct net *net, struct nlattr *nla,
 			 struct nlattr *est, struct tc_action **a,
 			 int ovr, int bind, bool rtnl_held,
-			 struct netlink_ext_ack *extack)
+			 struct tcf_proto *tp, struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, gact_net_id);
 	struct nlattr *tb[TCA_GACT_MAX + 1];
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 9b1f2b3990ee..c1ba74d5c1e3 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -469,7 +469,7 @@ static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb,
 static int tcf_ife_init(struct net *net, struct nlattr *nla,
 			struct nlattr *est, struct tc_action **a,
 			int ovr, int bind, bool rtnl_held,
-			struct netlink_ext_ack *extack)
+			struct tcf_proto *tp, struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, ife_net_id);
 	struct nlattr *tb[TCA_IFE_MAX + 1];
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 98f5b6ea77b4..04a0b5c61194 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -97,7 +97,8 @@ static const struct nla_policy ipt_policy[TCA_IPT_MAX + 1] = {
 
 static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla,
 			  struct nlattr *est, struct tc_action **a,
-			  const struct tc_action_ops *ops, int ovr, int bind)
+			  const struct tc_action_ops *ops, int ovr, int bind,
+			  struct tcf_proto *tp)
 {
 	struct tc_action_net *tn = net_generic(net, id);
 	struct nlattr *tb[TCA_IPT_MAX + 1];
@@ -205,20 +206,20 @@ err1:
 
 static int tcf_ipt_init(struct net *net, struct nlattr *nla,
 			struct nlattr *est, struct tc_action **a, int ovr,
-			int bind, bool rtnl_held,
+			int bind, bool rtnl_held, struct tcf_proto *tp,
 			struct netlink_ext_ack *extack)
 {
 	return __tcf_ipt_init(net, ipt_net_id, nla, est, a, &act_ipt_ops, ovr,
-			      bind);
+			      bind, tp);
 }
 
 static int tcf_xt_init(struct net *net, struct nlattr *nla,
 		       struct nlattr *est, struct tc_action **a, int ovr,
-		       int bind, bool unlocked,
+		       int bind, bool unlocked, struct tcf_proto *tp,
 		       struct netlink_ext_ack *extack)
 {
 	return __tcf_ipt_init(net, xt_net_id, nla, est, a, &act_xt_ops, ovr,
-			      bind);
+			      bind, tp);
 }
 
 static int tcf_ipt_act(struct sk_buff *skb, const struct tc_action *a,
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 6692fd054617..383f4024452c 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -94,6 +94,7 @@ static struct tc_action_ops act_mirred_ops;
 static int tcf_mirred_init(struct net *net, struct nlattr *nla,
 			   struct nlattr *est, struct tc_action **a,
 			   int ovr, int bind, bool rtnl_held,
+			   struct tcf_proto *tp,
 			   struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, mirred_net_id);
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 543eab9193f1..de4b493e26d2 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -38,7 +38,8 @@ static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = {
 
 static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
 			struct tc_action **a, int ovr, int bind,
-			bool rtnl_held, struct netlink_ext_ack *extack)
+			bool rtnl_held,	struct tcf_proto *tp,
+			struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, nat_net_id);
 	struct nlattr *tb[TCA_NAT_MAX + 1];
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index a80373878df7..8ca82aefa11a 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -138,7 +138,7 @@ nla_failure:
 static int tcf_pedit_init(struct net *net, struct nlattr *nla,
 			  struct nlattr *est, struct tc_action **a,
 			  int ovr, int bind, bool rtnl_held,
-			  struct netlink_ext_ack *extack)
+			  struct tcf_proto *tp, struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, pedit_net_id);
 	struct nlattr *tb[TCA_PEDIT_MAX + 1];
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 8271a6263824..229eba7925e5 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -83,6 +83,7 @@ static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = {
 static int tcf_police_init(struct net *net, struct nlattr *nla,
 			       struct nlattr *est, struct tc_action **a,
 			       int ovr, int bind, bool rtnl_held,
+			       struct tcf_proto *tp,
 			       struct netlink_ext_ack *extack)
 {
 	int ret = 0, tcfp_result = TC_ACT_OK, err, size;
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 203e399e5c85..36b8adbe935d 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -37,7 +37,7 @@ static const struct nla_policy sample_policy[TCA_SAMPLE_MAX + 1] = {
 
 static int tcf_sample_init(struct net *net, struct nlattr *nla,
 			   struct nlattr *est, struct tc_action **a, int ovr,
-			   int bind, bool rtnl_held,
+			   int bind, bool rtnl_held, struct tcf_proto *tp,
 			   struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, sample_net_id);
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index d54cb608dbaf..4916dc3e3668 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -78,7 +78,7 @@ static const struct nla_policy simple_policy[TCA_DEF_MAX + 1] = {
 static int tcf_simp_init(struct net *net, struct nlattr *nla,
 			 struct nlattr *est, struct tc_action **a,
 			 int ovr, int bind, bool rtnl_held,
-			 struct netlink_ext_ack *extack)
+			 struct tcf_proto *tp, struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, simp_net_id);
 	struct nlattr *tb[TCA_DEF_MAX + 1];
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 65879500b688..4566eff3d027 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -96,6 +96,7 @@ static const struct nla_policy skbedit_policy[TCA_SKBEDIT_MAX + 1] = {
 static int tcf_skbedit_init(struct net *net, struct nlattr *nla,
 			    struct nlattr *est, struct tc_action **a,
 			    int ovr, int bind, bool rtnl_held,
+			    struct tcf_proto *tp,
 			    struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, skbedit_net_id);
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index 7bac1d78e7a3..b9ab2c8f07f1 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -82,6 +82,7 @@ static const struct nla_policy skbmod_policy[TCA_SKBMOD_MAX + 1] = {
 static int tcf_skbmod_init(struct net *net, struct nlattr *nla,
 			   struct nlattr *est, struct tc_action **a,
 			   int ovr, int bind, bool rtnl_held,
+			   struct tcf_proto *tp,
 			   struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, skbmod_net_id);
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index 7c6591b991d5..fc295d91559a 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -210,6 +210,7 @@ static void tunnel_key_release_params(struct tcf_tunnel_key_params *p)
 static int tunnel_key_init(struct net *net, struct nlattr *nla,
 			   struct nlattr *est, struct tc_action **a,
 			   int ovr, int bind, bool rtnl_held,
+			   struct tcf_proto *tp,
 			   struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index ac0061599225..4651ee15e35d 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -105,7 +105,7 @@ static const struct nla_policy vlan_policy[TCA_VLAN_MAX + 1] = {
 static int tcf_vlan_init(struct net *net, struct nlattr *nla,
 			 struct nlattr *est, struct tc_action **a,
 			 int ovr, int bind, bool rtnl_held,
-			 struct netlink_ext_ack *extack)
+			 struct tcf_proto *tp, struct netlink_ext_ack *extack)
 {
 	struct tc_action_net *tn = net_generic(net, vlan_net_id);
 	struct nlattr *tb[TCA_VLAN_MAX + 1];
-- 
cgit v1.2.3


From fe384e2fa36ca084a456fd30558cccc75b4b3fbd Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Wed, 20 Mar 2019 15:00:15 +0100
Subject: net/sched: don't dereference a->goto_chain to read the chain index

callers of tcf_gact_goto_chain_index() can potentially read an old value
of the chain index, or even dereference a NULL 'goto_chain' pointer,
because 'goto_chain' and 'tcfa_action' are read in the traffic path
without caring of concurrent write in the control path. The most recent
value of chain index can be read also from a->tcfa_action (it's encoded
there together with TC_ACT_GOTO_CHAIN bits), so we don't really need to
dereference 'goto_chain': just read the chain id from the control action.

Fixes: e457d86ada27 ("net: sched: add couple of goto_chain helpers")
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tc_act/tc_gact.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/tc_act/tc_gact.h b/include/net/tc_act/tc_gact.h
index ee8d005f56fc..eb8f01c819e6 100644
--- a/include/net/tc_act/tc_gact.h
+++ b/include/net/tc_act/tc_gact.h
@@ -56,7 +56,7 @@ static inline bool is_tcf_gact_goto_chain(const struct tc_action *a)
 
 static inline u32 tcf_gact_goto_chain_index(const struct tc_action *a)
 {
-	return a->goto_chain->index;
+	return READ_ONCE(a->tcfa_action) & TC_ACT_EXT_VAL_MASK;
 }
 
 #endif /* __NET_TC_GACT_H */
-- 
cgit v1.2.3


From ee3bbfe806cdb46b02cda63626cb50a7a7b19fc5 Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Wed, 20 Mar 2019 15:00:16 +0100
Subject: net/sched: let actions use RCU to access 'goto_chain'

use RCU when accessing the action chain, to avoid use after free in the
traffic path when 'goto chain' is replaced on existing TC actions (see
script below). Since the control action is read in the traffic path
without holding the action spinlock, we need to explicitly ensure that
a->goto_chain is not NULL before dereferencing (i.e it's not sufficient
to rely on the value of TC_ACT_GOTO_CHAIN bits). Not doing so caused NULL
dereferences in tcf_action_goto_chain_exec() when the following script:

 # tc chain add dev dd0 chain 42 ingress protocol ip flower \
 > ip_proto udp action pass index 4
 # tc filter add dev dd0 ingress protocol ip flower \
 > ip_proto udp action csum udp goto chain 42 index 66
 # tc chain del dev dd0 chain 42 ingress
 (start UDP traffic towards dd0)
 # tc action replace action csum udp pass index 66

was run repeatedly for several hours.

Suggested-by: Cong Wang <xiyou.wangcong@gmail.com>
Suggested-by: Vlad Buslov <vladbu@mellanox.com>
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/act_api.h     |  2 +-
 include/net/sch_generic.h |  1 +
 net/sched/act_api.c       | 18 ++++++++++--------
 net/sched/cls_api.c       |  2 +-
 4 files changed, 13 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/net/act_api.h b/include/net/act_api.h
index 54fbb49bd08a..c61a1bf4e3de 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -39,7 +39,7 @@ struct tc_action {
 	struct gnet_stats_basic_cpu __percpu *cpu_bstats_hw;
 	struct gnet_stats_queue __percpu *cpu_qstats;
 	struct tc_cookie	__rcu *act_cookie;
-	struct tcf_chain	*goto_chain;
+	struct tcf_chain	__rcu *goto_chain;
 };
 #define tcf_index	common.tcfa_index
 #define tcf_refcnt	common.tcfa_refcnt
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 31284c078d06..7d1a0483a17b 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -378,6 +378,7 @@ struct tcf_chain {
 	bool flushing;
 	const struct tcf_proto_ops *tmplt_ops;
 	void *tmplt_priv;
+	struct rcu_head rcu;
 };
 
 struct tcf_block {
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index fe67b98ac641..5a87e271d35a 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -31,7 +31,7 @@
 static void tcf_action_goto_chain_exec(const struct tc_action *a,
 				       struct tcf_result *res)
 {
-	const struct tcf_chain *chain = a->goto_chain;
+	const struct tcf_chain *chain = rcu_dereference_bh(a->goto_chain);
 
 	res->goto_tp = rcu_dereference_bh(chain->filter_chain);
 }
@@ -91,13 +91,11 @@ end:
 EXPORT_SYMBOL(tcf_action_check_ctrlact);
 
 struct tcf_chain *tcf_action_set_ctrlact(struct tc_action *a, int action,
-					 struct tcf_chain *newchain)
+					 struct tcf_chain *goto_chain)
 {
-	struct tcf_chain *oldchain = a->goto_chain;
-
 	a->tcfa_action = action;
-	a->goto_chain = newchain;
-	return oldchain;
+	rcu_swap_protected(a->goto_chain, goto_chain, 1);
+	return goto_chain;
 }
 EXPORT_SYMBOL(tcf_action_set_ctrlact);
 
@@ -108,7 +106,7 @@ EXPORT_SYMBOL(tcf_action_set_ctrlact);
  */
 static void free_tcf(struct tc_action *p)
 {
-	struct tcf_chain *chain = p->goto_chain;
+	struct tcf_chain *chain = rcu_dereference_protected(p->goto_chain, 1);
 
 	free_percpu(p->cpu_bstats);
 	free_percpu(p->cpu_bstats_hw);
@@ -686,6 +684,10 @@ repeat:
 					return TC_ACT_OK;
 			}
 		} else if (TC_ACT_EXT_CMP(ret, TC_ACT_GOTO_CHAIN)) {
+			if (unlikely(!rcu_access_pointer(a->goto_chain))) {
+				net_warn_ratelimited("can't go to NULL chain!\n");
+				return TC_ACT_SHOT;
+			}
 			tcf_action_goto_chain_exec(a, res);
 		}
 
@@ -931,7 +933,7 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
 		module_put(a_o->owner);
 
 	if (TC_ACT_EXT_CMP(a->tcfa_action, TC_ACT_GOTO_CHAIN) &&
-	    !a->goto_chain) {
+	    !rcu_access_pointer(a->goto_chain)) {
 		tcf_action_destroy_1(a, bind);
 		NL_SET_ERR_MSG(extack, "can't use goto chain with NULL chain");
 		return ERR_PTR(-EINVAL);
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index dc10525e90e7..99ae30c177c7 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -367,7 +367,7 @@ static void tcf_chain_destroy(struct tcf_chain *chain, bool free_block)
 	struct tcf_block *block = chain->block;
 
 	mutex_destroy(&chain->filter_chain_lock);
-	kfree(chain);
+	kfree_rcu(chain, rcu);
 	if (free_block)
 		tcf_block_destroy(block);
 }
-- 
cgit v1.2.3


From b45a02e13ee74b6fde56df4d76786058821a3aba Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 19 Mar 2019 15:54:16 +0100
Subject: gpio: amd-fch: Fix bogus SPDX identifier

spdxcheck.py complains:

 include/linux/platform_data/gpio/gpio-amd-fch.h: 1:28 Invalid License ID: GPL+

which is correct because GPL+ is not a valid identifier. Of course this
could have been caught by checkpatch.pl _before_ submitting or merging the
patch.

 WARNING: 'SPDX-License-Identifier: GPL+ */' is not supported in LICENSES/...
 #271: FILE: include/linux/platform_data/gpio/gpio-amd-fch.h:1:
 +/* SPDX-License-Identifier: GPL+ */

Fix it under the assumption that the author meant GPL-2.0+, which makes
sense as the corresponding C file is using that identifier.

Fixes: e09d168f13f0 ("gpio: AMD G-Series PCH gpio driver")
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
---
 include/linux/platform_data/gpio/gpio-amd-fch.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/platform_data/gpio/gpio-amd-fch.h b/include/linux/platform_data/gpio/gpio-amd-fch.h
index a867637e172d..9e46678edb2a 100644
--- a/include/linux/platform_data/gpio/gpio-amd-fch.h
+++ b/include/linux/platform_data/gpio/gpio-amd-fch.h
@@ -1,4 +1,4 @@
-/* SPDX-License-Identifier: GPL+ */
+/* SPDX-License-Identifier: GPL-2.0+ */
 
 /*
  * AMD FCH gpio driver platform-data
-- 
cgit v1.2.3


From aa9aaa4d61c0048d3faad056893cd7860bbc084c Mon Sep 17 00:00:00 2001
From: Erik Schmauss <erik.schmauss@intel.com>
Date: Thu, 21 Mar 2019 18:20:21 -0700
Subject: ACPI: use different default debug value than ACPICA

Rather than setting debug output flags during early init, its makes
more sense to simply re-define ACPI_DEBUG_DEFAULT specifically for
Linux.

ACPICA commit 60903715711f4b00ca1831779a8a23279a66497d

Link: https://github.com/acpica/acpica/commit/60903715
Fixes: ce5cbf53496b ("ACPI: Set debug output flags independent of ACPICA")
Reported-by: Alexandru Gagniuc <mr.nuke.me@gmail.com>
Tested-by: Alexandru Gagniuc <mr.nuke.me@gmail.com>
Signed-off-by: Erik Schmauss <erik.schmauss@intel.com>
Signed-off-by: Bob Moore <robert.moore@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/bus.c              | 3 ---
 include/acpi/acoutput.h         | 3 +++
 include/acpi/platform/aclinux.h | 5 +++++
 3 files changed, 8 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index 6ecbbabf1233..eec263c9019e 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -1043,9 +1043,6 @@ void __init acpi_early_init(void)
 
 	acpi_permanent_mmap = true;
 
-	/* Initialize debug output. Linux does not use ACPICA defaults */
-	acpi_dbg_level = ACPI_LV_INFO | ACPI_LV_REPAIR;
-
 #ifdef CONFIG_X86
 	/*
 	 * If the machine falls into the DMI check table,
diff --git a/include/acpi/acoutput.h b/include/acpi/acoutput.h
index 30b1ae53689f..c50542dc71e0 100644
--- a/include/acpi/acoutput.h
+++ b/include/acpi/acoutput.h
@@ -150,7 +150,10 @@
 
 /* Defaults for debug_level, debug and normal */
 
+#ifndef ACPI_DEBUG_DEFAULT
 #define ACPI_DEBUG_DEFAULT          (ACPI_LV_INIT | ACPI_LV_DEBUG_OBJECT | ACPI_LV_EVALUATION | ACPI_LV_REPAIR)
+#endif
+
 #define ACPI_NORMAL_DEFAULT         (ACPI_LV_INIT | ACPI_LV_DEBUG_OBJECT | ACPI_LV_REPAIR)
 #define ACPI_DEBUG_ALL              (ACPI_LV_AML_DISASSEMBLE | ACPI_LV_ALL_EXCEPTIONS | ACPI_LV_ALL)
 
diff --git a/include/acpi/platform/aclinux.h b/include/acpi/platform/aclinux.h
index 9ff328fd946a..624b90b34085 100644
--- a/include/acpi/platform/aclinux.h
+++ b/include/acpi/platform/aclinux.h
@@ -82,6 +82,11 @@
 #define ACPI_NO_ERROR_MESSAGES
 #undef ACPI_DEBUG_OUTPUT
 
+/* Use a specific bugging default separate from ACPICA */
+
+#undef ACPI_DEBUG_DEFAULT
+#define ACPI_DEBUG_DEFAULT          (ACPI_LV_INFO | ACPI_LV_REPAIR)
+
 /* External interface for __KERNEL__, stub is needed */
 
 #define ACPI_EXTERNAL_RETURN_STATUS(prototype) \
-- 
cgit v1.2.3


From a6256b3a92cbaf3f5ff034ce09d5665607e2d7a4 Mon Sep 17 00:00:00 2001
From: Neil Armstrong <narmstrong@baylibre.com>
Date: Mon, 4 Mar 2019 11:49:16 +0100
Subject: dt-bindings: reset: meson-g12a: Add missing USB2 PHY resets

The G12A Documentation lacked these 2 reset lines, but they are present and
used for each USB 2 PHYs.

Add them to the dt-bindings for the upcoming USB support.

Fixes: dbfc54534dfc ("dt-bindings: reset: meson: add g12a bindings")
Signed-off-by: Neil Armstrong <narmstrong@baylibre.com>
Reviewed-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
---
 include/dt-bindings/reset/amlogic,meson-g12a-reset.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/dt-bindings/reset/amlogic,meson-g12a-reset.h b/include/dt-bindings/reset/amlogic,meson-g12a-reset.h
index 8063e8314eef..6d487c5eba2c 100644
--- a/include/dt-bindings/reset/amlogic,meson-g12a-reset.h
+++ b/include/dt-bindings/reset/amlogic,meson-g12a-reset.h
@@ -51,7 +51,10 @@
 #define RESET_SD_EMMC_A			44
 #define RESET_SD_EMMC_B			45
 #define RESET_SD_EMMC_C			46
-/*					47-60 */
+/*					47	*/
+#define RESET_USB_PHY20			48
+#define RESET_USB_PHY21			49
+/*					50-60	*/
 #define RESET_AUDIO_CODEC		61
 /*					62-63	*/
 /*	RESET2					*/
-- 
cgit v1.2.3


From a3ac7917b73070010c05b4485b8582a6c9cd69b6 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 25 Mar 2019 14:49:00 -0700
Subject: Revert "parport: daisy: use new parport device model"

This reverts commit 1aec4211204d9463d1fd209eb50453de16254599.

Steven Rostedt reports that it causes a hang at bootup and bisected it
to this commit.

The troigger is apparently a module alias for "parport_lowlevel" that
points to "parport_pc", which causes a hang with

    modprobe -q -- parport_lowlevel

blocking forever with a backtrace like this:

    wait_for_completion_killable+0x1c/0x28
    call_usermodehelper_exec+0xa7/0x108
    __request_module+0x351/0x3d8
    get_lowlevel_driver+0x28/0x41 [parport]
    __parport_register_driver+0x39/0x1f4 [parport]
    daisy_drv_init+0x31/0x4f [parport]
    parport_bus_init+0x5d/0x7b [parport]
    parport_default_proc_register+0x26/0x1000 [parport]
    do_one_initcall+0xc2/0x1e0
    do_init_module+0x50/0x1d4
    load_module+0x1c2e/0x21b3
    sys_init_module+0xef/0x117

Supid says:
 "Due to the new device model daisy driver will now try to find the
  parallel ports while trying to register its driver so that it can bind
  with them. Now, since daisy driver is loaded while parport bus is
  initialising the list of parport is still empty and it tries to load
  the lowlevel driver, which has an alias set to parport_pc, now causes
  a deadlock"

But I don't think the daisy driver should be loaded by the parport
initialization in the first place, so let's revert the whole change.

If the daisy driver can just initialize separately on its own (like a
driver should), instead of hooking into the parport init sequence
directly, this issue probably would go away.

Reported-and-bisected-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Reported-by: Michal Kubecek <mkubecek@suse.cz>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Sudip Mukherjee <sudipm.mukherjee@gmail.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/parport/daisy.c | 32 +-------------------------------
 drivers/parport/probe.c |  2 +-
 drivers/parport/share.c | 10 +---------
 include/linux/parport.h | 13 -------------
 4 files changed, 3 insertions(+), 54 deletions(-)

(limited to 'include')

diff --git a/drivers/parport/daisy.c b/drivers/parport/daisy.c
index 56dd83a45e55..5484a46dafda 100644
--- a/drivers/parport/daisy.c
+++ b/drivers/parport/daisy.c
@@ -213,12 +213,10 @@ void parport_daisy_fini(struct parport *port)
 struct pardevice *parport_open(int devnum, const char *name)
 {
 	struct daisydev *p = topology;
-	struct pardev_cb par_cb;
 	struct parport *port;
 	struct pardevice *dev;
 	int daisy;
 
-	memset(&par_cb, 0, sizeof(par_cb));
 	spin_lock(&topology_lock);
 	while (p && p->devnum != devnum)
 		p = p->next;
@@ -232,7 +230,7 @@ struct pardevice *parport_open(int devnum, const char *name)
 	port = parport_get_port(p->port);
 	spin_unlock(&topology_lock);
 
-	dev = parport_register_dev_model(port, name, &par_cb, devnum);
+	dev = parport_register_device(port, name, NULL, NULL, NULL, 0, NULL);
 	parport_put_port(port);
 	if (!dev)
 		return NULL;
@@ -482,31 +480,3 @@ static int assign_addrs(struct parport *port)
 	kfree(deviceid);
 	return detected;
 }
-
-static int daisy_drv_probe(struct pardevice *par_dev)
-{
-	struct device_driver *drv = par_dev->dev.driver;
-
-	if (strcmp(drv->name, "daisy_drv"))
-		return -ENODEV;
-	if (strcmp(par_dev->name, daisy_dev_name))
-		return -ENODEV;
-
-	return 0;
-}
-
-static struct parport_driver daisy_driver = {
-	.name = "daisy_drv",
-	.probe = daisy_drv_probe,
-	.devmodel = true,
-};
-
-int daisy_drv_init(void)
-{
-	return parport_register_driver(&daisy_driver);
-}
-
-void daisy_drv_exit(void)
-{
-	parport_unregister_driver(&daisy_driver);
-}
diff --git a/drivers/parport/probe.c b/drivers/parport/probe.c
index e5e6a463a941..e035174ba205 100644
--- a/drivers/parport/probe.c
+++ b/drivers/parport/probe.c
@@ -257,7 +257,7 @@ static ssize_t parport_read_device_id (struct parport *port, char *buffer,
 ssize_t parport_device_id (int devnum, char *buffer, size_t count)
 {
 	ssize_t retval = -ENXIO;
-	struct pardevice *dev = parport_open(devnum, daisy_dev_name);
+	struct pardevice *dev = parport_open (devnum, "Device ID probe");
 	if (!dev)
 		return -ENXIO;
 
diff --git a/drivers/parport/share.c b/drivers/parport/share.c
index 0171b8dbcdcd..5dc53d420ca8 100644
--- a/drivers/parport/share.c
+++ b/drivers/parport/share.c
@@ -137,19 +137,11 @@ static struct bus_type parport_bus_type = {
 
 int parport_bus_init(void)
 {
-	int retval;
-
-	retval = bus_register(&parport_bus_type);
-	if (retval)
-		return retval;
-	daisy_drv_init();
-
-	return 0;
+	return bus_register(&parport_bus_type);
 }
 
 void parport_bus_exit(void)
 {
-	daisy_drv_exit();
 	bus_unregister(&parport_bus_type);
 }
 
diff --git a/include/linux/parport.h b/include/linux/parport.h
index f41f1d041e2c..397607a0c0eb 100644
--- a/include/linux/parport.h
+++ b/include/linux/parport.h
@@ -460,7 +460,6 @@ extern size_t parport_ieee1284_epp_read_addr (struct parport *,
 					      void *, size_t, int);
 
 /* IEEE1284.3 functions */
-#define daisy_dev_name "Device ID probe"
 extern int parport_daisy_init (struct parport *port);
 extern void parport_daisy_fini (struct parport *port);
 extern struct pardevice *parport_open (int devnum, const char *name);
@@ -469,18 +468,6 @@ extern ssize_t parport_device_id (int devnum, char *buffer, size_t len);
 extern void parport_daisy_deselect_all (struct parport *port);
 extern int parport_daisy_select (struct parport *port, int daisy, int mode);
 
-#ifdef CONFIG_PARPORT_1284
-extern int daisy_drv_init(void);
-extern void daisy_drv_exit(void);
-#else
-static inline int daisy_drv_init(void)
-{
-	return 0;
-}
-
-static inline void daisy_drv_exit(void) {}
-#endif
-
 /* Lowlevel drivers _can_ call this support function to handle irqs.  */
 static inline void parport_generic_irq(struct parport *port)
 {
-- 
cgit v1.2.3


From db779ef67ffeadbb44e9e818eb64dbe528e2f48f Mon Sep 17 00:00:00 2001
From: Bhupesh Sharma <bhsharma@redhat.com>
Date: Tue, 26 Mar 2019 12:20:28 +0530
Subject: proc/kcore: Remove unused kclist_add_remap()

Commit

  bf904d2762ee ("x86/pti/64: Remove the SYSCALL64 entry trampoline")

removed the sole usage of kclist_add_remap() but it did not remove the
left-over definition from the include file.

Fix the same.

Signed-off-by: Bhupesh Sharma <bhsharma@redhat.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Dave Anderson <anderson@redhat.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: James Morse <james.morse@arm.com>
Cc: Kairui Song <kasong@redhat.com>
Cc: kexec@lists.infradead.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: linuxppc-dev@lists.ozlabs.org
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Omar Sandoval <osandov@fb.com>
Cc: "Peter Zijlstra (Intel)" <peterz@infradead.org>
Cc: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: x86-ml <x86@kernel.org>
Link: https://lkml.kernel.org/r/1553583028-17804-1-git-send-email-bhsharma@redhat.com
---
 include/linux/kcore.h | 11 -----------
 1 file changed, 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/kcore.h b/include/linux/kcore.h
index 8c3f8c14eeaa..94b561df3877 100644
--- a/include/linux/kcore.h
+++ b/include/linux/kcore.h
@@ -38,22 +38,11 @@ struct vmcoredd_node {
 
 #ifdef CONFIG_PROC_KCORE
 void __init kclist_add(struct kcore_list *, void *, size_t, int type);
-static inline
-void kclist_add_remap(struct kcore_list *m, void *addr, void *vaddr, size_t sz)
-{
-	m->vaddr = (unsigned long)vaddr;
-	kclist_add(m, addr, sz, KCORE_REMAP);
-}
 #else
 static inline
 void kclist_add(struct kcore_list *new, void *addr, size_t size, int type)
 {
 }
-
-static inline
-void kclist_add_remap(struct kcore_list *m, void *addr, void *vaddr, size_t sz)
-{
-}
 #endif
 
 #endif /* _LINUX_KCORE_H */
-- 
cgit v1.2.3


From 450895d04ba13a96886eddfeddb11556ae8624f1 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <olteanv@gmail.com>
Date: Sun, 24 Mar 2019 00:18:46 +0200
Subject: net: phy: bcm54xx: Encode link speed and activity into LEDs

Previously the green and amber LEDs on this quad PHY were solid, to
indicate an encoding of the link speed (10/100/1000).

This keeps the LEDs always on just as before, but now they flash on
Rx/Tx activity.

Signed-off-by: Vladimir Oltean <olteanv@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/broadcom.c | 13 +++++++++++++
 include/linux/brcmphy.h    | 16 ++++++++++++++++
 2 files changed, 29 insertions(+)

(limited to 'include')

diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c
index 9605d4fe540b..cb86a3e90c7d 100644
--- a/drivers/net/phy/broadcom.c
+++ b/drivers/net/phy/broadcom.c
@@ -323,6 +323,19 @@ static int bcm54xx_config_init(struct phy_device *phydev)
 
 	bcm54xx_phydsp_config(phydev);
 
+	/* Encode link speed into LED1 and LED3 pair (green/amber).
+	 * Also flash these two LEDs on activity. This means configuring
+	 * them for MULTICOLOR and encoding link/activity into them.
+	 */
+	val = BCM5482_SHD_LEDS1_LED1(BCM_LED_SRC_MULTICOLOR1) |
+		BCM5482_SHD_LEDS1_LED3(BCM_LED_SRC_MULTICOLOR1);
+	bcm_phy_write_shadow(phydev, BCM5482_SHD_LEDS1, val);
+
+	val = BCM_LED_MULTICOLOR_IN_PHASE |
+		BCM5482_SHD_LEDS1_LED1(BCM_LED_MULTICOLOR_LINK_ACT) |
+		BCM5482_SHD_LEDS1_LED3(BCM_LED_MULTICOLOR_LINK_ACT);
+	bcm_phy_write_exp(phydev, BCM_EXP_MULTICOLOR, val);
+
 	return 0;
 }
 
diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index 9cd00a37b8d3..6db2d9a6e503 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -148,6 +148,22 @@
 #define BCM_LED_SRC_OFF		0xe	/* Tied high */
 #define BCM_LED_SRC_ON		0xf	/* Tied low */
 
+/*
+ * Broadcom Multicolor LED configurations (expansion register 4)
+ */
+#define BCM_EXP_MULTICOLOR		(MII_BCM54XX_EXP_SEL_ER + 0x04)
+#define BCM_LED_MULTICOLOR_IN_PHASE	BIT(8)
+#define BCM_LED_MULTICOLOR_LINK_ACT	0x0
+#define BCM_LED_MULTICOLOR_SPEED	0x1
+#define BCM_LED_MULTICOLOR_ACT_FLASH	0x2
+#define BCM_LED_MULTICOLOR_FDX		0x3
+#define BCM_LED_MULTICOLOR_OFF		0x4
+#define BCM_LED_MULTICOLOR_ON		0x5
+#define BCM_LED_MULTICOLOR_ALT		0x6
+#define BCM_LED_MULTICOLOR_FLASH	0x7
+#define BCM_LED_MULTICOLOR_LINK		0x8
+#define BCM_LED_MULTICOLOR_ACT		0x9
+#define BCM_LED_MULTICOLOR_PROGRAM	0xa
 
 /*
  * BCM5482: Shadow registers
-- 
cgit v1.2.3


From 0532a1b0d045115521a93acf28f1270df89ad806 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 22 Mar 2019 09:19:34 +0100
Subject: virt: vbox: Implement passing requestor info to the host for
 VirtualBox 6.0.x

VirtualBox 6.0.x has a new feature where the guest kernel driver passes
info about the origin of the request (e.g. userspace or kernelspace) to
the hypervisor.

If we do not pass this information then when running the 6.0.x userspace
guest-additions tools on a 6.0.x host, some requests will get denied
with a VERR_VERSION_MISMATCH error, breaking vboxservice.service and
the mounting of shared folders marked to be auto-mounted.

This commit implements passing the requestor info to the host, fixing this.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/virt/vboxguest/vboxguest_core.c    | 106 ++++++++++++++++++++---------
 drivers/virt/vboxguest/vboxguest_core.h    |  15 ++--
 drivers/virt/vboxguest/vboxguest_linux.c   |  26 ++++++-
 drivers/virt/vboxguest/vboxguest_utils.c   |  32 +++++----
 drivers/virt/vboxguest/vboxguest_version.h |   9 ++-
 drivers/virt/vboxguest/vmmdev.h            |   8 ++-
 include/linux/vbox_utils.h                 |  12 ++--
 include/uapi/linux/vbox_vmmdev_types.h     |  60 ++++++++++++++++
 8 files changed, 197 insertions(+), 71 deletions(-)

(limited to 'include')

diff --git a/drivers/virt/vboxguest/vboxguest_core.c b/drivers/virt/vboxguest/vboxguest_core.c
index df7d09409efe..8ca333f21292 100644
--- a/drivers/virt/vboxguest/vboxguest_core.c
+++ b/drivers/virt/vboxguest/vboxguest_core.c
@@ -27,6 +27,10 @@
 
 #define GUEST_MAPPINGS_TRIES	5
 
+#define VBG_KERNEL_REQUEST \
+	(VMMDEV_REQUESTOR_KERNEL | VMMDEV_REQUESTOR_USR_DRV | \
+	 VMMDEV_REQUESTOR_CON_DONT_KNOW | VMMDEV_REQUESTOR_TRUST_NOT_GIVEN)
+
 /**
  * Reserves memory in which the VMM can relocate any guest mappings
  * that are floating around.
@@ -48,7 +52,8 @@ static void vbg_guest_mappings_init(struct vbg_dev *gdev)
 	int i, rc;
 
 	/* Query the required space. */
-	req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_GET_HYPERVISOR_INFO);
+	req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_GET_HYPERVISOR_INFO,
+			    VBG_KERNEL_REQUEST);
 	if (!req)
 		return;
 
@@ -135,7 +140,8 @@ static void vbg_guest_mappings_exit(struct vbg_dev *gdev)
 	 * Tell the host that we're going to free the memory we reserved for
 	 * it, the free it up. (Leak the memory if anything goes wrong here.)
 	 */
-	req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_SET_HYPERVISOR_INFO);
+	req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_SET_HYPERVISOR_INFO,
+			    VBG_KERNEL_REQUEST);
 	if (!req)
 		return;
 
@@ -172,8 +178,10 @@ static int vbg_report_guest_info(struct vbg_dev *gdev)
 	struct vmmdev_guest_info2 *req2 = NULL;
 	int rc, ret = -ENOMEM;
 
-	req1 = vbg_req_alloc(sizeof(*req1), VMMDEVREQ_REPORT_GUEST_INFO);
-	req2 = vbg_req_alloc(sizeof(*req2), VMMDEVREQ_REPORT_GUEST_INFO2);
+	req1 = vbg_req_alloc(sizeof(*req1), VMMDEVREQ_REPORT_GUEST_INFO,
+			     VBG_KERNEL_REQUEST);
+	req2 = vbg_req_alloc(sizeof(*req2), VMMDEVREQ_REPORT_GUEST_INFO2,
+			     VBG_KERNEL_REQUEST);
 	if (!req1 || !req2)
 		goto out_free;
 
@@ -187,8 +195,8 @@ static int vbg_report_guest_info(struct vbg_dev *gdev)
 	req2->additions_minor = VBG_VERSION_MINOR;
 	req2->additions_build = VBG_VERSION_BUILD;
 	req2->additions_revision = VBG_SVN_REV;
-	/* (no features defined yet) */
-	req2->additions_features = 0;
+	req2->additions_features =
+		VMMDEV_GUEST_INFO2_ADDITIONS_FEATURES_REQUESTOR_INFO;
 	strlcpy(req2->name, VBG_VERSION_STRING,
 		sizeof(req2->name));
 
@@ -230,7 +238,8 @@ static int vbg_report_driver_status(struct vbg_dev *gdev, bool active)
 	struct vmmdev_guest_status *req;
 	int rc;
 
-	req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_REPORT_GUEST_STATUS);
+	req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_REPORT_GUEST_STATUS,
+			    VBG_KERNEL_REQUEST);
 	if (!req)
 		return -ENOMEM;
 
@@ -423,7 +432,8 @@ static int vbg_heartbeat_host_config(struct vbg_dev *gdev, bool enabled)
 	struct vmmdev_heartbeat *req;
 	int rc;
 
-	req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_HEARTBEAT_CONFIGURE);
+	req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_HEARTBEAT_CONFIGURE,
+			    VBG_KERNEL_REQUEST);
 	if (!req)
 		return -ENOMEM;
 
@@ -457,7 +467,8 @@ static int vbg_heartbeat_init(struct vbg_dev *gdev)
 
 	gdev->guest_heartbeat_req = vbg_req_alloc(
 					sizeof(*gdev->guest_heartbeat_req),
-					VMMDEVREQ_GUEST_HEARTBEAT);
+					VMMDEVREQ_GUEST_HEARTBEAT,
+					VBG_KERNEL_REQUEST);
 	if (!gdev->guest_heartbeat_req)
 		return -ENOMEM;
 
@@ -528,7 +539,8 @@ static int vbg_reset_host_event_filter(struct vbg_dev *gdev,
 	struct vmmdev_mask *req;
 	int rc;
 
-	req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_CTL_GUEST_FILTER_MASK);
+	req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_CTL_GUEST_FILTER_MASK,
+			    VBG_KERNEL_REQUEST);
 	if (!req)
 		return -ENOMEM;
 
@@ -567,8 +579,14 @@ static int vbg_set_session_event_filter(struct vbg_dev *gdev,
 	u32 changed, previous;
 	int rc, ret = 0;
 
-	/* Allocate a request buffer before taking the spinlock */
-	req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_CTL_GUEST_FILTER_MASK);
+	/*
+	 * Allocate a request buffer before taking the spinlock, when
+	 * the session is being terminated the requestor is the kernel,
+	 * as we're cleaning up.
+	 */
+	req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_CTL_GUEST_FILTER_MASK,
+			    session_termination ? VBG_KERNEL_REQUEST :
+						  session->requestor);
 	if (!req) {
 		if (!session_termination)
 			return -ENOMEM;
@@ -627,7 +645,8 @@ static int vbg_reset_host_capabilities(struct vbg_dev *gdev)
 	struct vmmdev_mask *req;
 	int rc;
 
-	req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_SET_GUEST_CAPABILITIES);
+	req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_SET_GUEST_CAPABILITIES,
+			    VBG_KERNEL_REQUEST);
 	if (!req)
 		return -ENOMEM;
 
@@ -662,8 +681,14 @@ static int vbg_set_session_capabilities(struct vbg_dev *gdev,
 	u32 changed, previous;
 	int rc, ret = 0;
 
-	/* Allocate a request buffer before taking the spinlock */
-	req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_SET_GUEST_CAPABILITIES);
+	/*
+	 * Allocate a request buffer before taking the spinlock, when
+	 * the session is being terminated the requestor is the kernel,
+	 * as we're cleaning up.
+	 */
+	req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_SET_GUEST_CAPABILITIES,
+			    session_termination ? VBG_KERNEL_REQUEST :
+						  session->requestor);
 	if (!req) {
 		if (!session_termination)
 			return -ENOMEM;
@@ -722,7 +747,8 @@ static int vbg_query_host_version(struct vbg_dev *gdev)
 	struct vmmdev_host_version *req;
 	int rc, ret;
 
-	req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_GET_HOST_VERSION);
+	req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_GET_HOST_VERSION,
+			    VBG_KERNEL_REQUEST);
 	if (!req)
 		return -ENOMEM;
 
@@ -783,19 +809,24 @@ int vbg_core_init(struct vbg_dev *gdev, u32 fixed_events)
 
 	gdev->mem_balloon.get_req =
 		vbg_req_alloc(sizeof(*gdev->mem_balloon.get_req),
-			      VMMDEVREQ_GET_MEMBALLOON_CHANGE_REQ);
+			      VMMDEVREQ_GET_MEMBALLOON_CHANGE_REQ,
+			      VBG_KERNEL_REQUEST);
 	gdev->mem_balloon.change_req =
 		vbg_req_alloc(sizeof(*gdev->mem_balloon.change_req),
-			      VMMDEVREQ_CHANGE_MEMBALLOON);
+			      VMMDEVREQ_CHANGE_MEMBALLOON,
+			      VBG_KERNEL_REQUEST);
 	gdev->cancel_req =
 		vbg_req_alloc(sizeof(*(gdev->cancel_req)),
-			      VMMDEVREQ_HGCM_CANCEL2);
+			      VMMDEVREQ_HGCM_CANCEL2,
+			      VBG_KERNEL_REQUEST);
 	gdev->ack_events_req =
 		vbg_req_alloc(sizeof(*gdev->ack_events_req),
-			      VMMDEVREQ_ACKNOWLEDGE_EVENTS);
+			      VMMDEVREQ_ACKNOWLEDGE_EVENTS,
+			      VBG_KERNEL_REQUEST);
 	gdev->mouse_status_req =
 		vbg_req_alloc(sizeof(*gdev->mouse_status_req),
-			      VMMDEVREQ_GET_MOUSE_STATUS);
+			      VMMDEVREQ_GET_MOUSE_STATUS,
+			      VBG_KERNEL_REQUEST);
 
 	if (!gdev->mem_balloon.get_req || !gdev->mem_balloon.change_req ||
 	    !gdev->cancel_req || !gdev->ack_events_req ||
@@ -892,9 +923,9 @@ void vbg_core_exit(struct vbg_dev *gdev)
  * vboxguest_linux.c calls this when userspace opens the char-device.
  * Return: A pointer to the new session or an ERR_PTR on error.
  * @gdev:		The Guest extension device.
- * @user:		Set if this is a session for the vboxuser device.
+ * @requestor:		VMMDEV_REQUESTOR_* flags
  */
-struct vbg_session *vbg_core_open_session(struct vbg_dev *gdev, bool user)
+struct vbg_session *vbg_core_open_session(struct vbg_dev *gdev, u32 requestor)
 {
 	struct vbg_session *session;
 
@@ -903,7 +934,7 @@ struct vbg_session *vbg_core_open_session(struct vbg_dev *gdev, bool user)
 		return ERR_PTR(-ENOMEM);
 
 	session->gdev = gdev;
-	session->user_session = user;
+	session->requestor = requestor;
 
 	return session;
 }
@@ -924,7 +955,9 @@ void vbg_core_close_session(struct vbg_session *session)
 		if (!session->hgcm_client_ids[i])
 			continue;
 
-		vbg_hgcm_disconnect(gdev, session->hgcm_client_ids[i], &rc);
+		/* requestor is kernel here, as we're cleaning up. */
+		vbg_hgcm_disconnect(gdev, VBG_KERNEL_REQUEST,
+				    session->hgcm_client_ids[i], &rc);
 	}
 
 	kfree(session);
@@ -1152,7 +1185,8 @@ static int vbg_req_allowed(struct vbg_dev *gdev, struct vbg_session *session,
 		return -EPERM;
 	}
 
-	if (trusted_apps_only && session->user_session) {
+	if (trusted_apps_only &&
+	    (session->requestor & VMMDEV_REQUESTOR_USER_DEVICE)) {
 		vbg_err("Denying userspace vmm call type %#08x through vboxuser device node\n",
 			req->request_type);
 		return -EPERM;
@@ -1209,8 +1243,8 @@ static int vbg_ioctl_hgcm_connect(struct vbg_dev *gdev,
 	if (i >= ARRAY_SIZE(session->hgcm_client_ids))
 		return -EMFILE;
 
-	ret = vbg_hgcm_connect(gdev, &conn->u.in.loc, &client_id,
-			       &conn->hdr.rc);
+	ret = vbg_hgcm_connect(gdev, session->requestor, &conn->u.in.loc,
+			       &client_id, &conn->hdr.rc);
 
 	mutex_lock(&gdev->session_mutex);
 	if (ret == 0 && conn->hdr.rc >= 0) {
@@ -1251,7 +1285,8 @@ static int vbg_ioctl_hgcm_disconnect(struct vbg_dev *gdev,
 	if (i >= ARRAY_SIZE(session->hgcm_client_ids))
 		return -EINVAL;
 
-	ret = vbg_hgcm_disconnect(gdev, client_id, &disconn->hdr.rc);
+	ret = vbg_hgcm_disconnect(gdev, session->requestor, client_id,
+				  &disconn->hdr.rc);
 
 	mutex_lock(&gdev->session_mutex);
 	if (ret == 0 && disconn->hdr.rc >= 0)
@@ -1313,12 +1348,12 @@ static int vbg_ioctl_hgcm_call(struct vbg_dev *gdev,
 	}
 
 	if (IS_ENABLED(CONFIG_COMPAT) && f32bit)
-		ret = vbg_hgcm_call32(gdev, client_id,
+		ret = vbg_hgcm_call32(gdev, session->requestor, client_id,
 				      call->function, call->timeout_ms,
 				      VBG_IOCTL_HGCM_CALL_PARMS32(call),
 				      call->parm_count, &call->hdr.rc);
 	else
-		ret = vbg_hgcm_call(gdev, client_id,
+		ret = vbg_hgcm_call(gdev, session->requestor, client_id,
 				    call->function, call->timeout_ms,
 				    VBG_IOCTL_HGCM_CALL_PARMS(call),
 				    call->parm_count, &call->hdr.rc);
@@ -1408,6 +1443,7 @@ static int vbg_ioctl_check_balloon(struct vbg_dev *gdev,
 }
 
 static int vbg_ioctl_write_core_dump(struct vbg_dev *gdev,
+				     struct vbg_session *session,
 				     struct vbg_ioctl_write_coredump *dump)
 {
 	struct vmmdev_write_core_dump *req;
@@ -1415,7 +1451,8 @@ static int vbg_ioctl_write_core_dump(struct vbg_dev *gdev,
 	if (vbg_ioctl_chk(&dump->hdr, sizeof(dump->u.in), 0))
 		return -EINVAL;
 
-	req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_WRITE_COREDUMP);
+	req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_WRITE_COREDUMP,
+			    session->requestor);
 	if (!req)
 		return -ENOMEM;
 
@@ -1476,7 +1513,7 @@ int vbg_core_ioctl(struct vbg_session *session, unsigned int req, void *data)
 	case VBG_IOCTL_CHECK_BALLOON:
 		return vbg_ioctl_check_balloon(gdev, data);
 	case VBG_IOCTL_WRITE_CORE_DUMP:
-		return vbg_ioctl_write_core_dump(gdev, data);
+		return vbg_ioctl_write_core_dump(gdev, session, data);
 	}
 
 	/* Variable sized requests. */
@@ -1508,7 +1545,8 @@ int vbg_core_set_mouse_status(struct vbg_dev *gdev, u32 features)
 	struct vmmdev_mouse_status *req;
 	int rc;
 
-	req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_SET_MOUSE_STATUS);
+	req = vbg_req_alloc(sizeof(*req), VMMDEVREQ_SET_MOUSE_STATUS,
+			    VBG_KERNEL_REQUEST);
 	if (!req)
 		return -ENOMEM;
 
diff --git a/drivers/virt/vboxguest/vboxguest_core.h b/drivers/virt/vboxguest/vboxguest_core.h
index 7ad9ec45bfa9..4188c12b839f 100644
--- a/drivers/virt/vboxguest/vboxguest_core.h
+++ b/drivers/virt/vboxguest/vboxguest_core.h
@@ -154,15 +154,15 @@ struct vbg_session {
 	 * host. Protected by vbg_gdev.session_mutex.
 	 */
 	u32 guest_caps;
-	/** Does this session belong to a root process or a user one? */
-	bool user_session;
+	/** VMMDEV_REQUESTOR_* flags */
+	u32 requestor;
 	/** Set on CANCEL_ALL_WAITEVENTS, protected by vbg_devevent_spinlock. */
 	bool cancel_waiters;
 };
 
 int  vbg_core_init(struct vbg_dev *gdev, u32 fixed_events);
 void vbg_core_exit(struct vbg_dev *gdev);
-struct vbg_session *vbg_core_open_session(struct vbg_dev *gdev, bool user);
+struct vbg_session *vbg_core_open_session(struct vbg_dev *gdev, u32 requestor);
 void vbg_core_close_session(struct vbg_session *session);
 int  vbg_core_ioctl(struct vbg_session *session, unsigned int req, void *data);
 int  vbg_core_set_mouse_status(struct vbg_dev *gdev, u32 features);
@@ -172,12 +172,13 @@ irqreturn_t vbg_core_isr(int irq, void *dev_id);
 void vbg_linux_mouse_event(struct vbg_dev *gdev);
 
 /* Private (non exported) functions form vboxguest_utils.c */
-void *vbg_req_alloc(size_t len, enum vmmdev_request_type req_type);
+void *vbg_req_alloc(size_t len, enum vmmdev_request_type req_type,
+		    u32 requestor);
 void vbg_req_free(void *req, size_t len);
 int vbg_req_perform(struct vbg_dev *gdev, void *req);
 int vbg_hgcm_call32(
-	struct vbg_dev *gdev, u32 client_id, u32 function, u32 timeout_ms,
-	struct vmmdev_hgcm_function_parameter32 *parm32, u32 parm_count,
-	int *vbox_status);
+	struct vbg_dev *gdev, u32 requestor, u32 client_id, u32 function,
+	u32 timeout_ms, struct vmmdev_hgcm_function_parameter32 *parm32,
+	u32 parm_count, int *vbox_status);
 
 #endif
diff --git a/drivers/virt/vboxguest/vboxguest_linux.c b/drivers/virt/vboxguest/vboxguest_linux.c
index 6e2a9619192d..6e8c0f1c1056 100644
--- a/drivers/virt/vboxguest/vboxguest_linux.c
+++ b/drivers/virt/vboxguest/vboxguest_linux.c
@@ -5,6 +5,7 @@
  * Copyright (C) 2006-2016 Oracle Corporation
  */
 
+#include <linux/cred.h>
 #include <linux/input.h>
 #include <linux/kernel.h>
 #include <linux/miscdevice.h>
@@ -28,6 +29,23 @@ static DEFINE_MUTEX(vbg_gdev_mutex);
 /** Global vbg_gdev pointer used by vbg_get/put_gdev. */
 static struct vbg_dev *vbg_gdev;
 
+static u32 vbg_misc_device_requestor(struct inode *inode)
+{
+	u32 requestor = VMMDEV_REQUESTOR_USERMODE |
+			VMMDEV_REQUESTOR_CON_DONT_KNOW |
+			VMMDEV_REQUESTOR_TRUST_NOT_GIVEN;
+
+	if (from_kuid(current_user_ns(), current->cred->uid) == 0)
+		requestor |= VMMDEV_REQUESTOR_USR_ROOT;
+	else
+		requestor |= VMMDEV_REQUESTOR_USR_USER;
+
+	if (in_egroup_p(inode->i_gid))
+		requestor |= VMMDEV_REQUESTOR_GRP_VBOX;
+
+	return requestor;
+}
+
 static int vbg_misc_device_open(struct inode *inode, struct file *filp)
 {
 	struct vbg_session *session;
@@ -36,7 +54,7 @@ static int vbg_misc_device_open(struct inode *inode, struct file *filp)
 	/* misc_open sets filp->private_data to our misc device */
 	gdev = container_of(filp->private_data, struct vbg_dev, misc_device);
 
-	session = vbg_core_open_session(gdev, false);
+	session = vbg_core_open_session(gdev, vbg_misc_device_requestor(inode));
 	if (IS_ERR(session))
 		return PTR_ERR(session);
 
@@ -53,7 +71,8 @@ static int vbg_misc_device_user_open(struct inode *inode, struct file *filp)
 	gdev = container_of(filp->private_data, struct vbg_dev,
 			    misc_device_user);
 
-	session = vbg_core_open_session(gdev, false);
+	session = vbg_core_open_session(gdev, vbg_misc_device_requestor(inode) |
+					      VMMDEV_REQUESTOR_USER_DEVICE);
 	if (IS_ERR(session))
 		return PTR_ERR(session);
 
@@ -115,7 +134,8 @@ static long vbg_misc_device_ioctl(struct file *filp, unsigned int req,
 			 req == VBG_IOCTL_VMMDEV_REQUEST_BIG;
 
 	if (is_vmmdev_req)
-		buf = vbg_req_alloc(size, VBG_IOCTL_HDR_TYPE_DEFAULT);
+		buf = vbg_req_alloc(size, VBG_IOCTL_HDR_TYPE_DEFAULT,
+				    session->requestor);
 	else
 		buf = kmalloc(size, GFP_KERNEL);
 	if (!buf)
diff --git a/drivers/virt/vboxguest/vboxguest_utils.c b/drivers/virt/vboxguest/vboxguest_utils.c
index bf4474214b4d..75fd140b02ff 100644
--- a/drivers/virt/vboxguest/vboxguest_utils.c
+++ b/drivers/virt/vboxguest/vboxguest_utils.c
@@ -62,7 +62,8 @@ VBG_LOG(vbg_err, pr_err);
 VBG_LOG(vbg_debug, pr_debug);
 #endif
 
-void *vbg_req_alloc(size_t len, enum vmmdev_request_type req_type)
+void *vbg_req_alloc(size_t len, enum vmmdev_request_type req_type,
+		    u32 requestor)
 {
 	struct vmmdev_request_header *req;
 	int order = get_order(PAGE_ALIGN(len));
@@ -78,7 +79,7 @@ void *vbg_req_alloc(size_t len, enum vmmdev_request_type req_type)
 	req->request_type = req_type;
 	req->rc = VERR_GENERAL_FAILURE;
 	req->reserved1 = 0;
-	req->reserved2 = 0;
+	req->requestor = requestor;
 
 	return req;
 }
@@ -119,7 +120,7 @@ static bool hgcm_req_done(struct vbg_dev *gdev,
 	return done;
 }
 
-int vbg_hgcm_connect(struct vbg_dev *gdev,
+int vbg_hgcm_connect(struct vbg_dev *gdev, u32 requestor,
 		     struct vmmdev_hgcm_service_location *loc,
 		     u32 *client_id, int *vbox_status)
 {
@@ -127,7 +128,7 @@ int vbg_hgcm_connect(struct vbg_dev *gdev,
 	int rc;
 
 	hgcm_connect = vbg_req_alloc(sizeof(*hgcm_connect),
-				     VMMDEVREQ_HGCM_CONNECT);
+				     VMMDEVREQ_HGCM_CONNECT, requestor);
 	if (!hgcm_connect)
 		return -ENOMEM;
 
@@ -153,13 +154,15 @@ int vbg_hgcm_connect(struct vbg_dev *gdev,
 }
 EXPORT_SYMBOL(vbg_hgcm_connect);
 
-int vbg_hgcm_disconnect(struct vbg_dev *gdev, u32 client_id, int *vbox_status)
+int vbg_hgcm_disconnect(struct vbg_dev *gdev, u32 requestor,
+			u32 client_id, int *vbox_status)
 {
 	struct vmmdev_hgcm_disconnect *hgcm_disconnect = NULL;
 	int rc;
 
 	hgcm_disconnect = vbg_req_alloc(sizeof(*hgcm_disconnect),
-					VMMDEVREQ_HGCM_DISCONNECT);
+					VMMDEVREQ_HGCM_DISCONNECT,
+					requestor);
 	if (!hgcm_disconnect)
 		return -ENOMEM;
 
@@ -593,9 +596,10 @@ static int hgcm_call_copy_back_result(
 	return 0;
 }
 
-int vbg_hgcm_call(struct vbg_dev *gdev, u32 client_id, u32 function,
-		  u32 timeout_ms, struct vmmdev_hgcm_function_parameter *parms,
-		  u32 parm_count, int *vbox_status)
+int vbg_hgcm_call(struct vbg_dev *gdev, u32 requestor, u32 client_id,
+		  u32 function, u32 timeout_ms,
+		  struct vmmdev_hgcm_function_parameter *parms, u32 parm_count,
+		  int *vbox_status)
 {
 	struct vmmdev_hgcm_call *call;
 	void **bounce_bufs = NULL;
@@ -615,7 +619,7 @@ int vbg_hgcm_call(struct vbg_dev *gdev, u32 client_id, u32 function,
 		goto free_bounce_bufs;
 	}
 
-	call = vbg_req_alloc(size, VMMDEVREQ_HGCM_CALL);
+	call = vbg_req_alloc(size, VMMDEVREQ_HGCM_CALL, requestor);
 	if (!call) {
 		ret = -ENOMEM;
 		goto free_bounce_bufs;
@@ -647,9 +651,9 @@ EXPORT_SYMBOL(vbg_hgcm_call);
 
 #ifdef CONFIG_COMPAT
 int vbg_hgcm_call32(
-	struct vbg_dev *gdev, u32 client_id, u32 function, u32 timeout_ms,
-	struct vmmdev_hgcm_function_parameter32 *parm32, u32 parm_count,
-	int *vbox_status)
+	struct vbg_dev *gdev, u32 requestor, u32 client_id, u32 function,
+	u32 timeout_ms, struct vmmdev_hgcm_function_parameter32 *parm32,
+	u32 parm_count, int *vbox_status)
 {
 	struct vmmdev_hgcm_function_parameter *parm64 = NULL;
 	u32 i, size;
@@ -689,7 +693,7 @@ int vbg_hgcm_call32(
 			goto out_free;
 	}
 
-	ret = vbg_hgcm_call(gdev, client_id, function, timeout_ms,
+	ret = vbg_hgcm_call(gdev, requestor, client_id, function, timeout_ms,
 			    parm64, parm_count, vbox_status);
 	if (ret < 0)
 		goto out_free;
diff --git a/drivers/virt/vboxguest/vboxguest_version.h b/drivers/virt/vboxguest/vboxguest_version.h
index 77f0c8f8a231..84834dad38d5 100644
--- a/drivers/virt/vboxguest/vboxguest_version.h
+++ b/drivers/virt/vboxguest/vboxguest_version.h
@@ -9,11 +9,10 @@
 #ifndef __VBOX_VERSION_H__
 #define __VBOX_VERSION_H__
 
-/* Last synced October 4th 2017 */
-#define VBG_VERSION_MAJOR 5
-#define VBG_VERSION_MINOR 2
+#define VBG_VERSION_MAJOR 6
+#define VBG_VERSION_MINOR 0
 #define VBG_VERSION_BUILD 0
-#define VBG_SVN_REV 68940
-#define VBG_VERSION_STRING "5.2.0"
+#define VBG_SVN_REV 127566
+#define VBG_VERSION_STRING "6.0.0"
 
 #endif
diff --git a/drivers/virt/vboxguest/vmmdev.h b/drivers/virt/vboxguest/vmmdev.h
index 5e2ae978935d..6337b8d75d96 100644
--- a/drivers/virt/vboxguest/vmmdev.h
+++ b/drivers/virt/vboxguest/vmmdev.h
@@ -98,8 +98,8 @@ struct vmmdev_request_header {
 	s32 rc;
 	/** Reserved field no.1. MBZ. */
 	u32 reserved1;
-	/** Reserved field no.2. MBZ. */
-	u32 reserved2;
+	/** IN: Requestor information (VMMDEV_REQUESTOR_*) */
+	u32 requestor;
 };
 VMMDEV_ASSERT_SIZE(vmmdev_request_header, 24);
 
@@ -247,6 +247,8 @@ struct vmmdev_guest_info {
 };
 VMMDEV_ASSERT_SIZE(vmmdev_guest_info, 24 + 8);
 
+#define VMMDEV_GUEST_INFO2_ADDITIONS_FEATURES_REQUESTOR_INFO	BIT(0)
+
 /** struct vmmdev_guestinfo2 - Guest information report, version 2. */
 struct vmmdev_guest_info2 {
 	/** Header. */
@@ -259,7 +261,7 @@ struct vmmdev_guest_info2 {
 	u32 additions_build;
 	/** SVN revision. */
 	u32 additions_revision;
-	/** Feature mask, currently unused. */
+	/** Feature mask. */
 	u32 additions_features;
 	/**
 	 * The intentional meaning of this field was:
diff --git a/include/linux/vbox_utils.h b/include/linux/vbox_utils.h
index a240ed2a0372..ff56c443180c 100644
--- a/include/linux/vbox_utils.h
+++ b/include/linux/vbox_utils.h
@@ -24,15 +24,17 @@ __printf(1, 2) void vbg_debug(const char *fmt, ...);
 #define vbg_debug pr_debug
 #endif
 
-int vbg_hgcm_connect(struct vbg_dev *gdev,
+int vbg_hgcm_connect(struct vbg_dev *gdev, u32 requestor,
 		     struct vmmdev_hgcm_service_location *loc,
 		     u32 *client_id, int *vbox_status);
 
-int vbg_hgcm_disconnect(struct vbg_dev *gdev, u32 client_id, int *vbox_status);
+int vbg_hgcm_disconnect(struct vbg_dev *gdev, u32 requestor,
+			u32 client_id, int *vbox_status);
 
-int vbg_hgcm_call(struct vbg_dev *gdev, u32 client_id, u32 function,
-		  u32 timeout_ms, struct vmmdev_hgcm_function_parameter *parms,
-		  u32 parm_count, int *vbox_status);
+int vbg_hgcm_call(struct vbg_dev *gdev, u32 requestor, u32 client_id,
+		  u32 function, u32 timeout_ms,
+		  struct vmmdev_hgcm_function_parameter *parms, u32 parm_count,
+		  int *vbox_status);
 
 /**
  * Convert a VirtualBox status code to a standard Linux kernel return value.
diff --git a/include/uapi/linux/vbox_vmmdev_types.h b/include/uapi/linux/vbox_vmmdev_types.h
index 0e68024f36c7..26f39816af14 100644
--- a/include/uapi/linux/vbox_vmmdev_types.h
+++ b/include/uapi/linux/vbox_vmmdev_types.h
@@ -102,6 +102,66 @@ enum vmmdev_request_type {
 #define VMMDEVREQ_HGCM_CALL VMMDEVREQ_HGCM_CALL32
 #endif
 
+/* vmmdev_request_header.requestor defines */
+
+/* Requestor user not given. */
+#define VMMDEV_REQUESTOR_USR_NOT_GIVEN                      0x00000000
+/* The kernel driver (vboxguest) is the requestor. */
+#define VMMDEV_REQUESTOR_USR_DRV                            0x00000001
+/* Some other kernel driver is the requestor. */
+#define VMMDEV_REQUESTOR_USR_DRV_OTHER                      0x00000002
+/* The root or a admin user is the requestor. */
+#define VMMDEV_REQUESTOR_USR_ROOT                           0x00000003
+/* Regular joe user is making the request. */
+#define VMMDEV_REQUESTOR_USR_USER                           0x00000006
+/* User classification mask. */
+#define VMMDEV_REQUESTOR_USR_MASK                           0x00000007
+
+/* Kernel mode request. Note this is 0, check for !USERMODE instead. */
+#define VMMDEV_REQUESTOR_KERNEL                             0x00000000
+/* User mode request. */
+#define VMMDEV_REQUESTOR_USERMODE                           0x00000008
+/* User or kernel mode classification mask. */
+#define VMMDEV_REQUESTOR_MODE_MASK                          0x00000008
+
+/* Don't know the physical console association of the requestor. */
+#define VMMDEV_REQUESTOR_CON_DONT_KNOW                      0x00000000
+/*
+ * The request originates with a process that is NOT associated with the
+ * physical console.
+ */
+#define VMMDEV_REQUESTOR_CON_NO                             0x00000010
+/* Requestor process is associated with the physical console. */
+#define VMMDEV_REQUESTOR_CON_YES                            0x00000020
+/* Console classification mask. */
+#define VMMDEV_REQUESTOR_CON_MASK                           0x00000030
+
+/* Requestor is member of special VirtualBox user group. */
+#define VMMDEV_REQUESTOR_GRP_VBOX                           0x00000080
+
+/* Note: trust level is for windows guests only, linux always uses not-given */
+/* Requestor trust level: Unspecified */
+#define VMMDEV_REQUESTOR_TRUST_NOT_GIVEN                    0x00000000
+/* Requestor trust level: Untrusted (SID S-1-16-0) */
+#define VMMDEV_REQUESTOR_TRUST_UNTRUSTED                    0x00001000
+/* Requestor trust level: Untrusted (SID S-1-16-4096) */
+#define VMMDEV_REQUESTOR_TRUST_LOW                          0x00002000
+/* Requestor trust level: Medium (SID S-1-16-8192) */
+#define VMMDEV_REQUESTOR_TRUST_MEDIUM                       0x00003000
+/* Requestor trust level: Medium plus (SID S-1-16-8448) */
+#define VMMDEV_REQUESTOR_TRUST_MEDIUM_PLUS                  0x00004000
+/* Requestor trust level: High (SID S-1-16-12288) */
+#define VMMDEV_REQUESTOR_TRUST_HIGH                         0x00005000
+/* Requestor trust level: System (SID S-1-16-16384) */
+#define VMMDEV_REQUESTOR_TRUST_SYSTEM                       0x00006000
+/* Requestor trust level >= Protected (SID S-1-16-20480, S-1-16-28672) */
+#define VMMDEV_REQUESTOR_TRUST_PROTECTED                    0x00007000
+/* Requestor trust level mask */
+#define VMMDEV_REQUESTOR_TRUST_MASK                         0x00007000
+
+/* Requestor is using the less trusted user device node (/dev/vboxuser) */
+#define VMMDEV_REQUESTOR_USER_DEVICE                        0x00008000
+
 /** HGCM service location types. */
 enum vmmdev_hgcm_service_location_type {
 	VMMDEV_HGCM_LOC_INVALID    = 0,
-- 
cgit v1.2.3


From 7f07e5f1f778605e98cf2156d4db1ff3a3a1a74a Mon Sep 17 00:00:00 2001
From: Claudiu Manoil <claudiu.manoil@nxp.com>
Date: Tue, 26 Mar 2019 11:48:57 +0200
Subject: net: mii: Fix PAUSE cap advertisement from
 linkmode_adv_to_lcl_adv_t() helper

With a recent link mode advertisement code update this helper
providing local pause capability translation used for flow
control link mode negotiation got broken.
For eth drivers using this helper, the issue is apparent only
if either PAUSE or ASYM_PAUSE is being advertised.

Fixes: 3c1bcc8614db ("net: ethernet: Convert phydev advertize and supported from u32 to link mode")
Signed-off-by: Claudiu Manoil <claudiu.manoil@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mii.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mii.h b/include/linux/mii.h
index 6fee8b1a4400..5cd824c1c0ca 100644
--- a/include/linux/mii.h
+++ b/include/linux/mii.h
@@ -469,7 +469,7 @@ static inline u32 linkmode_adv_to_lcl_adv_t(unsigned long *advertising)
 	if (linkmode_test_bit(ETHTOOL_LINK_MODE_Pause_BIT,
 			      advertising))
 		lcl_adv |= ADVERTISE_PAUSE_CAP;
-	if (linkmode_test_bit(ETHTOOL_LINK_MODE_Pause_BIT,
+	if (linkmode_test_bit(ETHTOOL_LINK_MODE_Asym_Pause_BIT,
 			      advertising))
 		lcl_adv |= ADVERTISE_PAUSE_ASYM;
 
-- 
cgit v1.2.3


From 3d9683cf3bfb6d4e4605a153958dfca7e18b52f2 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.masahiro@socionext.com>
Date: Mon, 18 Mar 2019 18:08:12 +0900
Subject: KVM: export <linux/kvm_para.h> and <asm/kvm_para.h> iif KVM is
 supported

I do not see any consistency about headers_install of <linux/kvm_para.h>
and <asm/kvm_para.h>.

According to my analysis of Linux 5.1-rc1, there are 3 groups:

 [1] Both <linux/kvm_para.h> and <asm/kvm_para.h> are exported

    alpha, arm, hexagon, mips, powerpc, s390, sparc, x86

 [2] <asm/kvm_para.h> is exported, but <linux/kvm_para.h> is not

    arc, arm64, c6x, h8300, ia64, m68k, microblaze, nios2, openrisc,
    parisc, sh, unicore32, xtensa

 [3] Neither <linux/kvm_para.h> nor <asm/kvm_para.h> is exported

    csky, nds32, riscv

This does not match to the actual KVM support. At least, [2] is
half-baked.

Nor do arch maintainers look like they care about this. For example,
commit 0add53713b1c ("microblaze: Add missing kvm_para.h to Kbuild")
exported <asm/kvm_para.h> to user-space in order to fix an in-kernel
build error.

We have two ways to make this consistent:

 [A] export both <linux/kvm_para.h> and <asm/kvm_para.h> for all
     architectures, irrespective of the KVM support

 [B] Match the header export of <linux/kvm_para.h> and <asm/kvm_para.h>
     to the KVM support

My first attempt was [A] because the code looks cleaner, but Paolo
suggested [B].

So, this commit goes with [B].

For most architectures, <asm/kvm_para.h> was moved to the kernel-space.
I changed include/uapi/linux/Kbuild so that it checks generated
asm/kvm_para.h as well as check-in ones.

After this commit, there will be two groups:

 [1] Both <linux/kvm_para.h> and <asm/kvm_para.h> are exported

    arm, arm64, mips, powerpc, s390, x86

 [2] Neither <linux/kvm_para.h> nor <asm/kvm_para.h> is exported

    alpha, arc, c6x, csky, h8300, hexagon, ia64, m68k, microblaze,
    nds32, nios2, openrisc, parisc, riscv, sh, sparc, unicore32, xtensa

Signed-off-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Acked-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/alpha/include/asm/Kbuild            | 1 +
 arch/alpha/include/uapi/asm/kvm_para.h   | 2 --
 arch/arc/include/asm/Kbuild              | 1 +
 arch/arc/include/uapi/asm/Kbuild         | 1 -
 arch/arm/include/uapi/asm/Kbuild         | 1 +
 arch/arm/include/uapi/asm/kvm_para.h     | 2 --
 arch/c6x/include/asm/Kbuild              | 1 +
 arch/c6x/include/uapi/asm/Kbuild         | 1 -
 arch/h8300/include/asm/Kbuild            | 1 +
 arch/h8300/include/uapi/asm/Kbuild       | 1 -
 arch/hexagon/include/asm/Kbuild          | 1 +
 arch/hexagon/include/uapi/asm/kvm_para.h | 2 --
 arch/ia64/include/asm/Kbuild             | 1 +
 arch/ia64/include/uapi/asm/Kbuild        | 1 -
 arch/m68k/include/asm/Kbuild             | 1 +
 arch/m68k/include/uapi/asm/Kbuild        | 1 -
 arch/microblaze/include/asm/Kbuild       | 1 +
 arch/microblaze/include/uapi/asm/Kbuild  | 1 -
 arch/nios2/include/asm/Kbuild            | 1 +
 arch/nios2/include/uapi/asm/Kbuild       | 1 -
 arch/openrisc/include/asm/Kbuild         | 1 +
 arch/openrisc/include/uapi/asm/Kbuild    | 1 -
 arch/parisc/include/asm/Kbuild           | 1 +
 arch/parisc/include/uapi/asm/Kbuild      | 1 -
 arch/sh/include/asm/Kbuild               | 1 +
 arch/sh/include/uapi/asm/Kbuild          | 1 -
 arch/sparc/include/asm/Kbuild            | 1 +
 arch/sparc/include/uapi/asm/kvm_para.h   | 2 --
 arch/unicore32/include/asm/Kbuild        | 1 +
 arch/unicore32/include/uapi/asm/Kbuild   | 1 -
 arch/xtensa/include/asm/Kbuild           | 1 +
 arch/xtensa/include/uapi/asm/Kbuild      | 1 -
 include/uapi/linux/Kbuild                | 2 ++
 33 files changed, 18 insertions(+), 20 deletions(-)
 delete mode 100644 arch/alpha/include/uapi/asm/kvm_para.h
 delete mode 100644 arch/arm/include/uapi/asm/kvm_para.h
 delete mode 100644 arch/hexagon/include/uapi/asm/kvm_para.h
 delete mode 100644 arch/sparc/include/uapi/asm/kvm_para.h

(limited to 'include')

diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild
index dc0ab28baca1..70b783333965 100644
--- a/arch/alpha/include/asm/Kbuild
+++ b/arch/alpha/include/asm/Kbuild
@@ -6,6 +6,7 @@ generic-y += exec.h
 generic-y += export.h
 generic-y += fb.h
 generic-y += irq_work.h
+generic-y += kvm_para.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
 generic-y += preempt.h
diff --git a/arch/alpha/include/uapi/asm/kvm_para.h b/arch/alpha/include/uapi/asm/kvm_para.h
deleted file mode 100644
index baacc4996d18..000000000000
--- a/arch/alpha/include/uapi/asm/kvm_para.h
+++ /dev/null
@@ -1,2 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#include <asm-generic/kvm_para.h>
diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild
index b41f8881ecc8..decc306a3b52 100644
--- a/arch/arc/include/asm/Kbuild
+++ b/arch/arc/include/asm/Kbuild
@@ -11,6 +11,7 @@ generic-y += hardirq.h
 generic-y += hw_irq.h
 generic-y += irq_regs.h
 generic-y += irq_work.h
+generic-y += kvm_para.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
diff --git a/arch/arc/include/uapi/asm/Kbuild b/arch/arc/include/uapi/asm/Kbuild
index 755bb11323d8..1c72f04ff75d 100644
--- a/arch/arc/include/uapi/asm/Kbuild
+++ b/arch/arc/include/uapi/asm/Kbuild
@@ -1,2 +1 @@
-generic-y += kvm_para.h
 generic-y += ucontext.h
diff --git a/arch/arm/include/uapi/asm/Kbuild b/arch/arm/include/uapi/asm/Kbuild
index 23b4464c0995..ce8573157774 100644
--- a/arch/arm/include/uapi/asm/Kbuild
+++ b/arch/arm/include/uapi/asm/Kbuild
@@ -3,3 +3,4 @@
 generated-y += unistd-common.h
 generated-y += unistd-oabi.h
 generated-y += unistd-eabi.h
+generic-y += kvm_para.h
diff --git a/arch/arm/include/uapi/asm/kvm_para.h b/arch/arm/include/uapi/asm/kvm_para.h
deleted file mode 100644
index baacc4996d18..000000000000
--- a/arch/arm/include/uapi/asm/kvm_para.h
+++ /dev/null
@@ -1,2 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#include <asm-generic/kvm_para.h>
diff --git a/arch/c6x/include/asm/Kbuild b/arch/c6x/include/asm/Kbuild
index 63b4a1705182..249c9f6f26dc 100644
--- a/arch/c6x/include/asm/Kbuild
+++ b/arch/c6x/include/asm/Kbuild
@@ -19,6 +19,7 @@ generic-y += irq_work.h
 generic-y += kdebug.h
 generic-y += kmap_types.h
 generic-y += kprobes.h
+generic-y += kvm_para.h
 generic-y += local.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
diff --git a/arch/c6x/include/uapi/asm/Kbuild b/arch/c6x/include/uapi/asm/Kbuild
index 755bb11323d8..1c72f04ff75d 100644
--- a/arch/c6x/include/uapi/asm/Kbuild
+++ b/arch/c6x/include/uapi/asm/Kbuild
@@ -1,2 +1 @@
-generic-y += kvm_para.h
 generic-y += ucontext.h
diff --git a/arch/h8300/include/asm/Kbuild b/arch/h8300/include/asm/Kbuild
index 3e7c8ecf151e..e3dead402e5f 100644
--- a/arch/h8300/include/asm/Kbuild
+++ b/arch/h8300/include/asm/Kbuild
@@ -23,6 +23,7 @@ generic-y += irq_work.h
 generic-y += kdebug.h
 generic-y += kmap_types.h
 generic-y += kprobes.h
+generic-y += kvm_para.h
 generic-y += linkage.h
 generic-y += local.h
 generic-y += local64.h
diff --git a/arch/h8300/include/uapi/asm/Kbuild b/arch/h8300/include/uapi/asm/Kbuild
index 755bb11323d8..1c72f04ff75d 100644
--- a/arch/h8300/include/uapi/asm/Kbuild
+++ b/arch/h8300/include/uapi/asm/Kbuild
@@ -1,2 +1 @@
-generic-y += kvm_para.h
 generic-y += ucontext.h
diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild
index b25fd42aa0f4..d046e8ccdf78 100644
--- a/arch/hexagon/include/asm/Kbuild
+++ b/arch/hexagon/include/asm/Kbuild
@@ -19,6 +19,7 @@ generic-y += irq_work.h
 generic-y += kdebug.h
 generic-y += kmap_types.h
 generic-y += kprobes.h
+generic-y += kvm_para.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
diff --git a/arch/hexagon/include/uapi/asm/kvm_para.h b/arch/hexagon/include/uapi/asm/kvm_para.h
deleted file mode 100644
index baacc4996d18..000000000000
--- a/arch/hexagon/include/uapi/asm/kvm_para.h
+++ /dev/null
@@ -1,2 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#include <asm-generic/kvm_para.h>
diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild
index 43e21fe3499c..11f191689c9e 100644
--- a/arch/ia64/include/asm/Kbuild
+++ b/arch/ia64/include/asm/Kbuild
@@ -2,6 +2,7 @@ generated-y += syscall_table.h
 generic-y += compat.h
 generic-y += exec.h
 generic-y += irq_work.h
+generic-y += kvm_para.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
 generic-y += preempt.h
diff --git a/arch/ia64/include/uapi/asm/Kbuild b/arch/ia64/include/uapi/asm/Kbuild
index 20018cb883a9..62a9522af51e 100644
--- a/arch/ia64/include/uapi/asm/Kbuild
+++ b/arch/ia64/include/uapi/asm/Kbuild
@@ -1,2 +1 @@
 generated-y += unistd_64.h
-generic-y += kvm_para.h
diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild
index 95f8f631c4df..2c359d9e80f6 100644
--- a/arch/m68k/include/asm/Kbuild
+++ b/arch/m68k/include/asm/Kbuild
@@ -13,6 +13,7 @@ generic-y += irq_work.h
 generic-y += kdebug.h
 generic-y += kmap_types.h
 generic-y += kprobes.h
+generic-y += kvm_para.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
diff --git a/arch/m68k/include/uapi/asm/Kbuild b/arch/m68k/include/uapi/asm/Kbuild
index 8a7ad40be463..7417847dc438 100644
--- a/arch/m68k/include/uapi/asm/Kbuild
+++ b/arch/m68k/include/uapi/asm/Kbuild
@@ -1,2 +1 @@
 generated-y += unistd_32.h
-generic-y += kvm_para.h
diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild
index 791cc8d54d0a..1a8285c3f693 100644
--- a/arch/microblaze/include/asm/Kbuild
+++ b/arch/microblaze/include/asm/Kbuild
@@ -17,6 +17,7 @@ generic-y += irq_work.h
 generic-y += kdebug.h
 generic-y += kmap_types.h
 generic-y += kprobes.h
+generic-y += kvm_para.h
 generic-y += linkage.h
 generic-y += local.h
 generic-y += local64.h
diff --git a/arch/microblaze/include/uapi/asm/Kbuild b/arch/microblaze/include/uapi/asm/Kbuild
index 3ce84fbb2678..13f59631c576 100644
--- a/arch/microblaze/include/uapi/asm/Kbuild
+++ b/arch/microblaze/include/uapi/asm/Kbuild
@@ -1,3 +1,2 @@
 generated-y += unistd_32.h
-generic-y += kvm_para.h
 generic-y += ucontext.h
diff --git a/arch/nios2/include/asm/Kbuild b/arch/nios2/include/asm/Kbuild
index 8fde4fa2c34f..88a667d12aaa 100644
--- a/arch/nios2/include/asm/Kbuild
+++ b/arch/nios2/include/asm/Kbuild
@@ -23,6 +23,7 @@ generic-y += irq_work.h
 generic-y += kdebug.h
 generic-y += kmap_types.h
 generic-y += kprobes.h
+generic-y += kvm_para.h
 generic-y += local.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
diff --git a/arch/nios2/include/uapi/asm/Kbuild b/arch/nios2/include/uapi/asm/Kbuild
index 755bb11323d8..1c72f04ff75d 100644
--- a/arch/nios2/include/uapi/asm/Kbuild
+++ b/arch/nios2/include/uapi/asm/Kbuild
@@ -1,2 +1 @@
-generic-y += kvm_para.h
 generic-y += ucontext.h
diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild
index 5a73e2956ac4..22aa97136c01 100644
--- a/arch/openrisc/include/asm/Kbuild
+++ b/arch/openrisc/include/asm/Kbuild
@@ -20,6 +20,7 @@ generic-y += irq_work.h
 generic-y += kdebug.h
 generic-y += kmap_types.h
 generic-y += kprobes.h
+generic-y += kvm_para.h
 generic-y += local.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
diff --git a/arch/openrisc/include/uapi/asm/Kbuild b/arch/openrisc/include/uapi/asm/Kbuild
index 755bb11323d8..1c72f04ff75d 100644
--- a/arch/openrisc/include/uapi/asm/Kbuild
+++ b/arch/openrisc/include/uapi/asm/Kbuild
@@ -1,2 +1 @@
-generic-y += kvm_para.h
 generic-y += ucontext.h
diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild
index 6f49e77d82a2..9bcd0c903dbb 100644
--- a/arch/parisc/include/asm/Kbuild
+++ b/arch/parisc/include/asm/Kbuild
@@ -11,6 +11,7 @@ generic-y += irq_regs.h
 generic-y += irq_work.h
 generic-y += kdebug.h
 generic-y += kprobes.h
+generic-y += kvm_para.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
diff --git a/arch/parisc/include/uapi/asm/Kbuild b/arch/parisc/include/uapi/asm/Kbuild
index 22fdbd08cdc8..2bd5b392277c 100644
--- a/arch/parisc/include/uapi/asm/Kbuild
+++ b/arch/parisc/include/uapi/asm/Kbuild
@@ -1,3 +1,2 @@
 generated-y += unistd_32.h
 generated-y += unistd_64.h
-generic-y += kvm_para.h
diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild
index a6ef3fee5f85..7bf2cb680d32 100644
--- a/arch/sh/include/asm/Kbuild
+++ b/arch/sh/include/asm/Kbuild
@@ -9,6 +9,7 @@ generic-y += emergency-restart.h
 generic-y += exec.h
 generic-y += irq_regs.h
 generic-y += irq_work.h
+generic-y += kvm_para.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
diff --git a/arch/sh/include/uapi/asm/Kbuild b/arch/sh/include/uapi/asm/Kbuild
index ecfbd40924dd..b8812c74c1de 100644
--- a/arch/sh/include/uapi/asm/Kbuild
+++ b/arch/sh/include/uapi/asm/Kbuild
@@ -1,5 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0
 
 generated-y += unistd_32.h
-generic-y += kvm_para.h
 generic-y += ucontext.h
diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild
index b82f64e28f55..a22cfd5c0ee8 100644
--- a/arch/sparc/include/asm/Kbuild
+++ b/arch/sparc/include/asm/Kbuild
@@ -9,6 +9,7 @@ generic-y += exec.h
 generic-y += export.h
 generic-y += irq_regs.h
 generic-y += irq_work.h
+generic-y += kvm_para.h
 generic-y += linkage.h
 generic-y += local.h
 generic-y += local64.h
diff --git a/arch/sparc/include/uapi/asm/kvm_para.h b/arch/sparc/include/uapi/asm/kvm_para.h
deleted file mode 100644
index baacc4996d18..000000000000
--- a/arch/sparc/include/uapi/asm/kvm_para.h
+++ /dev/null
@@ -1,2 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-#include <asm-generic/kvm_para.h>
diff --git a/arch/unicore32/include/asm/Kbuild b/arch/unicore32/include/asm/Kbuild
index 1d1544b6ca74..d77d953c04c1 100644
--- a/arch/unicore32/include/asm/Kbuild
+++ b/arch/unicore32/include/asm/Kbuild
@@ -18,6 +18,7 @@ generic-y += irq_work.h
 generic-y += kdebug.h
 generic-y += kmap_types.h
 generic-y += kprobes.h
+generic-y += kvm_para.h
 generic-y += local.h
 generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
diff --git a/arch/unicore32/include/uapi/asm/Kbuild b/arch/unicore32/include/uapi/asm/Kbuild
index 755bb11323d8..1c72f04ff75d 100644
--- a/arch/unicore32/include/uapi/asm/Kbuild
+++ b/arch/unicore32/include/uapi/asm/Kbuild
@@ -1,2 +1 @@
-generic-y += kvm_para.h
 generic-y += ucontext.h
diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild
index 42b6cb3d16f7..3843198e03d4 100644
--- a/arch/xtensa/include/asm/Kbuild
+++ b/arch/xtensa/include/asm/Kbuild
@@ -15,6 +15,7 @@ generic-y += irq_work.h
 generic-y += kdebug.h
 generic-y += kmap_types.h
 generic-y += kprobes.h
+generic-y += kvm_para.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
diff --git a/arch/xtensa/include/uapi/asm/Kbuild b/arch/xtensa/include/uapi/asm/Kbuild
index 8a7ad40be463..7417847dc438 100644
--- a/arch/xtensa/include/uapi/asm/Kbuild
+++ b/arch/xtensa/include/uapi/asm/Kbuild
@@ -1,2 +1 @@
 generated-y += unistd_32.h
-generic-y += kvm_para.h
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index 5f24b50c9e88..059dc2bedaf6 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -7,5 +7,7 @@ no-export-headers += kvm.h
 endif
 
 ifeq ($(wildcard $(srctree)/arch/$(SRCARCH)/include/uapi/asm/kvm_para.h),)
+ifeq ($(wildcard $(objtree)/arch/$(SRCARCH)/include/generated/uapi/asm/kvm_para.h),)
 no-export-headers += kvm_para.h
 endif
+endif
-- 
cgit v1.2.3


From 355b98553789b646ed97ad801a619ff898471b92 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 27 Mar 2019 08:21:30 -0700
Subject: netns: provide pure entropy for net_hash_mix()

net_hash_mix() currently uses kernel address of a struct net,
and is used in many places that could be used to reveal this
address to a patient attacker, thus defeating KASLR, for
the typical case (initial net namespace, &init_net is
not dynamically allocated)

I believe the original implementation tried to avoid spending
too many cycles in this function, but security comes first.

Also provide entropy regardless of CONFIG_NET_NS.

Fixes: 0b4419162aa6 ("netns: introduce the net_hash_mix "salt" for hashes")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Amit Klein <aksecurity@gmail.com>
Reported-by: Benny Pinkas <benny@pinkas.net>
Cc: Pavel Emelyanov <xemul@openvz.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/net_namespace.h |  1 +
 include/net/netns/hash.h    | 10 ++--------
 net/core/net_namespace.c    |  1 +
 3 files changed, 4 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index a68ced28d8f4..12689ddfc24c 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -59,6 +59,7 @@ struct net {
 						 */
 	spinlock_t		rules_mod_lock;
 
+	u32			hash_mix;
 	atomic64_t		cookie_gen;
 
 	struct list_head	list;		/* list of network namespaces */
diff --git a/include/net/netns/hash.h b/include/net/netns/hash.h
index 16a842456189..d9b665151f3d 100644
--- a/include/net/netns/hash.h
+++ b/include/net/netns/hash.h
@@ -2,16 +2,10 @@
 #ifndef __NET_NS_HASH_H__
 #define __NET_NS_HASH_H__
 
-#include <asm/cache.h>
-
-struct net;
+#include <net/net_namespace.h>
 
 static inline u32 net_hash_mix(const struct net *net)
 {
-#ifdef CONFIG_NET_NS
-	return (u32)(((unsigned long)net) >> ilog2(sizeof(*net)));
-#else
-	return 0;
-#endif
+	return net->hash_mix;
 }
 #endif
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 17f36317363d..7e6dcc625701 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -304,6 +304,7 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
 
 	refcount_set(&net->count, 1);
 	refcount_set(&net->passive, 1);
+	get_random_bytes(&net->hash_mix, sizeof(u32));
 	net->dev_base_seq = 1;
 	net->user_ns = user_ns;
 	idr_init(&net->netns_ids);
-- 
cgit v1.2.3


From fe61692886669bbcc260f980903eacb4ddebaf59 Mon Sep 17 00:00:00 2001
From: Rodrigo Siqueira <rodrigosiqueiramelo@gmail.com>
Date: Thu, 14 Mar 2019 15:48:45 -0300
Subject: drm/atomic-helper: Make atomic_enable/disable crtc callbacks optional

Allow atomic_enable and atomic_disable operations from
drm_crtc_helper_funcs struct optional. With this, the target display
drivers don't need to define a dummy function if they don't need one.

Changes since v2:
* Don't make funcs optional
* Update kerneldoc for atomic_enable/disable
* Replace "if (funcs->atomic_enable)" by "if (funcs->commit)"
* Improve commit message

Signed-off-by: Rodrigo Siqueira <rodrigosiqueiramelo@gmail.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: http://patchwork.freedesktop.org/patch/msgid/20190314184845.gjmvkamobj4dilyp@smtp.gmail.com
Signed-off-by: Gerd Hoffmann <kraxel@redhat.com>
---
 drivers/gpu/drm/drm_atomic_helper.c      | 5 ++---
 include/drm/drm_modeset_helper_vtables.h | 4 ++++
 2 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c
index 40ac19848034..fbb76332cc9f 100644
--- a/drivers/gpu/drm/drm_atomic_helper.c
+++ b/drivers/gpu/drm/drm_atomic_helper.c
@@ -1034,7 +1034,7 @@ disable_outputs(struct drm_device *dev, struct drm_atomic_state *old_state)
 			funcs->atomic_disable(crtc, old_crtc_state);
 		else if (funcs->disable)
 			funcs->disable(crtc);
-		else
+		else if (funcs->dpms)
 			funcs->dpms(crtc, DRM_MODE_DPMS_OFF);
 
 		if (!(dev->irq_enabled && dev->num_crtcs))
@@ -1277,10 +1277,9 @@ void drm_atomic_helper_commit_modeset_enables(struct drm_device *dev,
 		if (new_crtc_state->enable) {
 			DRM_DEBUG_ATOMIC("enabling [CRTC:%d:%s]\n",
 					 crtc->base.id, crtc->name);
-
 			if (funcs->atomic_enable)
 				funcs->atomic_enable(crtc, old_crtc_state);
-			else
+			else if (funcs->commit)
 				funcs->commit(crtc);
 		}
 	}
diff --git a/include/drm/drm_modeset_helper_vtables.h b/include/drm/drm_modeset_helper_vtables.h
index cfb7be40bed7..ce4de6b1e444 100644
--- a/include/drm/drm_modeset_helper_vtables.h
+++ b/include/drm/drm_modeset_helper_vtables.h
@@ -418,6 +418,8 @@ struct drm_crtc_helper_funcs {
 	 * Drivers can use the @old_crtc_state input parameter if the operations
 	 * needed to enable the CRTC don't depend solely on the new state but
 	 * also on the transition between the old state and the new state.
+	 *
+	 * This function is optional.
 	 */
 	void (*atomic_enable)(struct drm_crtc *crtc,
 			      struct drm_crtc_state *old_crtc_state);
@@ -441,6 +443,8 @@ struct drm_crtc_helper_funcs {
 	 * parameter @old_crtc_state which could be used to access the old
 	 * state. Atomic drivers should consider to use this one instead
 	 * of @disable.
+	 *
+	 * This function is optional.
 	 */
 	void (*atomic_disable)(struct drm_crtc *crtc,
 			       struct drm_crtc_state *old_crtc_state);
-- 
cgit v1.2.3


From 9b7ea46a82b31c74a37e6ff1c2a1df7d53e392ab Mon Sep 17 00:00:00 2001
From: Qian Cai <cai@lca.pw>
Date: Thu, 28 Mar 2019 20:43:34 -0700
Subject: mm/hotplug: fix offline undo_isolate_page_range()

Commit f1dd2cd13c4b ("mm, memory_hotplug: do not associate hotadded
memory to zones until online") introduced move_pfn_range_to_zone() which
calls memmap_init_zone() during onlining a memory block.
memmap_init_zone() will reset pagetype flags and makes migrate type to
be MOVABLE.

However, in __offline_pages(), it also call undo_isolate_page_range()
after offline_isolated_pages() to do the same thing.  Due to commit
2ce13640b3f4 ("mm: __first_valid_page skip over offline pages") changed
__first_valid_page() to skip offline pages, undo_isolate_page_range()
here just waste CPU cycles looping around the offlining PFN range while
doing nothing, because __first_valid_page() will return NULL as
offline_isolated_pages() has already marked all memory sections within
the pfn range as offline via offline_mem_sections().

Also, after calling the "useless" undo_isolate_page_range() here, it
reaches the point of no returning by notifying MEM_OFFLINE.  Those pages
will be marked as MIGRATE_MOVABLE again once onlining.  The only thing
left to do is to decrease the number of isolated pageblocks zone counter
which would make some paths of the page allocation slower that the above
commit introduced.

Even if alloc_contig_range() can be used to isolate 16GB-hugetlb pages
on ppc64, an "int" should still be enough to represent the number of
pageblocks there.  Fix an incorrect comment along the way.

[cai@lca.pw: v4]
  Link: http://lkml.kernel.org/r/20190314150641.59358-1-cai@lca.pw
Link: http://lkml.kernel.org/r/20190313143133.46200-1-cai@lca.pw
Fixes: 2ce13640b3f4 ("mm: __first_valid_page skip over offline pages")
Signed-off-by: Qian Cai <cai@lca.pw>
Acked-by: Michal Hocko <mhocko@suse.com>
Reviewed-by: Oscar Salvador <osalvador@suse.de>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: <stable@vger.kernel.org>	[4.13+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page-isolation.h | 10 ---------
 mm/memory_hotplug.c            | 17 +++++++++++----
 mm/page_alloc.c                |  2 +-
 mm/page_isolation.c            | 48 ++++++++++++++++++++++++++----------------
 mm/sparse.c                    |  2 +-
 5 files changed, 45 insertions(+), 34 deletions(-)

(limited to 'include')

diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h
index 4eb26d278046..280ae96dc4c3 100644
--- a/include/linux/page-isolation.h
+++ b/include/linux/page-isolation.h
@@ -41,16 +41,6 @@ int move_freepages_block(struct zone *zone, struct page *page,
 
 /*
  * Changes migrate type in [start_pfn, end_pfn) to be MIGRATE_ISOLATE.
- * If specified range includes migrate types other than MOVABLE or CMA,
- * this will fail with -EBUSY.
- *
- * For isolating all pages in the range finally, the caller have to
- * free all pages in the range. test_page_isolated() can be used for
- * test it.
- *
- * The following flags are allowed (they can be combined in a bit mask)
- * SKIP_HWPOISON - ignore hwpoison pages
- * REPORT_FAILURE - report details about the failure to isolate the range
  */
 int
 start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index f767582af4f8..0e0a16021fd5 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1576,7 +1576,7 @@ static int __ref __offline_pages(unsigned long start_pfn,
 {
 	unsigned long pfn, nr_pages;
 	long offlined_pages;
-	int ret, node;
+	int ret, node, nr_isolate_pageblock;
 	unsigned long flags;
 	unsigned long valid_start, valid_end;
 	struct zone *zone;
@@ -1602,10 +1602,11 @@ static int __ref __offline_pages(unsigned long start_pfn,
 	ret = start_isolate_page_range(start_pfn, end_pfn,
 				       MIGRATE_MOVABLE,
 				       SKIP_HWPOISON | REPORT_FAILURE);
-	if (ret) {
+	if (ret < 0) {
 		reason = "failure to isolate range";
 		goto failed_removal;
 	}
+	nr_isolate_pageblock = ret;
 
 	arg.start_pfn = start_pfn;
 	arg.nr_pages = nr_pages;
@@ -1657,8 +1658,16 @@ static int __ref __offline_pages(unsigned long start_pfn,
 	/* Ok, all of our target is isolated.
 	   We cannot do rollback at this point. */
 	offline_isolated_pages(start_pfn, end_pfn);
-	/* reset pagetype flags and makes migrate type to be MOVABLE */
-	undo_isolate_page_range(start_pfn, end_pfn, MIGRATE_MOVABLE);
+
+	/*
+	 * Onlining will reset pagetype flags and makes migrate type
+	 * MOVABLE, so just need to decrease the number of isolated
+	 * pageblocks zone counter here.
+	 */
+	spin_lock_irqsave(&zone->lock, flags);
+	zone->nr_isolate_pageblock -= nr_isolate_pageblock;
+	spin_unlock_irqrestore(&zone->lock, flags);
+
 	/* removal success */
 	adjust_managed_page_count(pfn_to_page(start_pfn), -offlined_pages);
 	zone->present_pages -= offlined_pages;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 03fcf73d47da..d96ca5bc555b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -8233,7 +8233,7 @@ int alloc_contig_range(unsigned long start, unsigned long end,
 
 	ret = start_isolate_page_range(pfn_max_align_down(start),
 				       pfn_max_align_up(end), migratetype, 0);
-	if (ret)
+	if (ret < 0)
 		return ret;
 
 	/*
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index ce323e56b34d..bf4159d771c7 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -160,27 +160,36 @@ __first_valid_page(unsigned long pfn, unsigned long nr_pages)
 	return NULL;
 }
 
-/*
- * start_isolate_page_range() -- make page-allocation-type of range of pages
- * to be MIGRATE_ISOLATE.
- * @start_pfn: The lower PFN of the range to be isolated.
- * @end_pfn: The upper PFN of the range to be isolated.
- * @migratetype: migrate type to set in error recovery.
+/**
+ * start_isolate_page_range() - make page-allocation-type of range of pages to
+ * be MIGRATE_ISOLATE.
+ * @start_pfn:		The lower PFN of the range to be isolated.
+ * @end_pfn:		The upper PFN of the range to be isolated.
+ *			start_pfn/end_pfn must be aligned to pageblock_order.
+ * @migratetype:	Migrate type to set in error recovery.
+ * @flags:		The following flags are allowed (they can be combined in
+ *			a bit mask)
+ *			SKIP_HWPOISON - ignore hwpoison pages
+ *			REPORT_FAILURE - report details about the failure to
+ *			isolate the range
  *
  * Making page-allocation-type to be MIGRATE_ISOLATE means free pages in
  * the range will never be allocated. Any free pages and pages freed in the
- * future will not be allocated again.
- *
- * start_pfn/end_pfn must be aligned to pageblock_order.
- * Return 0 on success and -EBUSY if any part of range cannot be isolated.
+ * future will not be allocated again. If specified range includes migrate types
+ * other than MOVABLE or CMA, this will fail with -EBUSY. For isolating all
+ * pages in the range finally, the caller have to free all pages in the range.
+ * test_page_isolated() can be used for test it.
  *
  * There is no high level synchronization mechanism that prevents two threads
- * from trying to isolate overlapping ranges.  If this happens, one thread
+ * from trying to isolate overlapping ranges. If this happens, one thread
  * will notice pageblocks in the overlapping range already set to isolate.
  * This happens in set_migratetype_isolate, and set_migratetype_isolate
- * returns an error.  We then clean up by restoring the migration type on
- * pageblocks we may have modified and return -EBUSY to caller.  This
+ * returns an error. We then clean up by restoring the migration type on
+ * pageblocks we may have modified and return -EBUSY to caller. This
  * prevents two threads from simultaneously working on overlapping ranges.
+ *
+ * Return: the number of isolated pageblocks on success and -EBUSY if any part
+ * of range cannot be isolated.
  */
 int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
 			     unsigned migratetype, int flags)
@@ -188,6 +197,7 @@ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
 	unsigned long pfn;
 	unsigned long undo_pfn;
 	struct page *page;
+	int nr_isolate_pageblock = 0;
 
 	BUG_ON(!IS_ALIGNED(start_pfn, pageblock_nr_pages));
 	BUG_ON(!IS_ALIGNED(end_pfn, pageblock_nr_pages));
@@ -196,13 +206,15 @@ int start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,
 	     pfn < end_pfn;
 	     pfn += pageblock_nr_pages) {
 		page = __first_valid_page(pfn, pageblock_nr_pages);
-		if (page &&
-		    set_migratetype_isolate(page, migratetype, flags)) {
-			undo_pfn = pfn;
-			goto undo;
+		if (page) {
+			if (set_migratetype_isolate(page, migratetype, flags)) {
+				undo_pfn = pfn;
+				goto undo;
+			}
+			nr_isolate_pageblock++;
 		}
 	}
-	return 0;
+	return nr_isolate_pageblock;
 undo:
 	for (pfn = start_pfn;
 	     pfn < undo_pfn;
diff --git a/mm/sparse.c b/mm/sparse.c
index 69904aa6165b..56e057c432f9 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -567,7 +567,7 @@ void online_mem_sections(unsigned long start_pfn, unsigned long end_pfn)
 }
 
 #ifdef CONFIG_MEMORY_HOTREMOVE
-/* Mark all memory sections within the pfn range as online */
+/* Mark all memory sections within the pfn range as offline */
 void offline_mem_sections(unsigned long start_pfn, unsigned long end_pfn)
 {
 	unsigned long pfn;
-- 
cgit v1.2.3


From 6d6ea1e967a246f12cfe2f5fb743b70b2e608d4a Mon Sep 17 00:00:00 2001
From: Nicolas Boichat <drinkcat@chromium.org>
Date: Thu, 28 Mar 2019 20:43:42 -0700
Subject: mm: add support for kmem caches in DMA32 zone

Patch series "iommu/io-pgtable-arm-v7s: Use DMA32 zone for page tables",
v6.

This is a followup to the discussion in [1], [2].

IOMMUs using ARMv7 short-descriptor format require page tables (level 1
and 2) to be allocated within the first 4GB of RAM, even on 64-bit
systems.

For L1 tables that are bigger than a page, we can just use
__get_free_pages with GFP_DMA32 (on arm64 systems only, arm would still
use GFP_DMA).

For L2 tables that only take 1KB, it would be a waste to allocate a full
page, so we considered 3 approaches:
 1. This series, adding support for GFP_DMA32 slab caches.
 2. genalloc, which requires pre-allocating the maximum number of L2 page
    tables (4096, so 4MB of memory).
 3. page_frag, which is not very memory-efficient as it is unable to reuse
    freed fragments until the whole page is freed. [3]

This series is the most memory-efficient approach.

stable@ note:
  We confirmed that this is a regression, and IOMMU errors happen on 4.19
  and linux-next/master on MT8173 (elm, Acer Chromebook R13). The issue
  most likely starts from commit ad67f5a6545f ("arm64: replace ZONE_DMA
  with ZONE_DMA32"), i.e. 4.15, and presumably breaks a number of Mediatek
  platforms (and maybe others?).

[1] https://lists.linuxfoundation.org/pipermail/iommu/2018-November/030876.html
[2] https://lists.linuxfoundation.org/pipermail/iommu/2018-December/031696.html
[3] https://patchwork.codeaurora.org/patch/671639/

This patch (of 3):

IOMMUs using ARMv7 short-descriptor format require page tables to be
allocated within the first 4GB of RAM, even on 64-bit systems.  On arm64,
this is done by passing GFP_DMA32 flag to memory allocation functions.

For IOMMU L2 tables that only take 1KB, it would be a waste to allocate
a full page using get_free_pages, so we considered 3 approaches:
 1. This patch, adding support for GFP_DMA32 slab caches.
 2. genalloc, which requires pre-allocating the maximum number of L2
    page tables (4096, so 4MB of memory).
 3. page_frag, which is not very memory-efficient as it is unable
    to reuse freed fragments until the whole page is freed.

This change makes it possible to create a custom cache in DMA32 zone using
kmem_cache_create, then allocate memory using kmem_cache_alloc.

We do not create a DMA32 kmalloc cache array, as there are currently no
users of kmalloc(..., GFP_DMA32).  These calls will continue to trigger a
warning, as we keep GFP_DMA32 in GFP_SLAB_BUG_MASK.

This implies that calls to kmem_cache_*alloc on a SLAB_CACHE_DMA32
kmem_cache must _not_ use GFP_DMA32 (it is anyway redundant and
unnecessary).

Link: http://lkml.kernel.org/r/20181210011504.122604-2-drinkcat@chromium.org
Signed-off-by: Nicolas Boichat <drinkcat@chromium.org>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Will Deacon <will.deacon@arm.com>
Cc: Robin Murphy <robin.murphy@arm.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Michal Hocko <mhocko@suse.com>
Cc: Mel Gorman <mgorman@techsingularity.net>
Cc: Sasha Levin <Alexander.Levin@microsoft.com>
Cc: Huaisheng Ye <yehs1@lenovo.com>
Cc: Mike Rapoport <rppt@linux.vnet.ibm.com>
Cc: Yong Wu <yong.wu@mediatek.com>
Cc: Matthias Brugger <matthias.bgg@gmail.com>
Cc: Tomasz Figa <tfiga@google.com>
Cc: Yingjoe Chen <yingjoe.chen@mediatek.com>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Hsin-Yi Wang <hsinyi@chromium.org>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/slab.h | 2 ++
 mm/slab.c            | 2 ++
 mm/slab.h            | 3 ++-
 mm/slab_common.c     | 2 +-
 mm/slub.c            | 5 +++++
 5 files changed, 12 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 11b45f7ae405..9449b19c5f10 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -32,6 +32,8 @@
 #define SLAB_HWCACHE_ALIGN	((slab_flags_t __force)0x00002000U)
 /* Use GFP_DMA memory */
 #define SLAB_CACHE_DMA		((slab_flags_t __force)0x00004000U)
+/* Use GFP_DMA32 memory */
+#define SLAB_CACHE_DMA32	((slab_flags_t __force)0x00008000U)
 /* DEBUG: Store the last owner for bug hunting */
 #define SLAB_STORE_USER		((slab_flags_t __force)0x00010000U)
 /* Panic if kmem_cache_create() fails */
diff --git a/mm/slab.c b/mm/slab.c
index 28652e4218e0..329bfe67f2ca 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -2115,6 +2115,8 @@ done:
 	cachep->allocflags = __GFP_COMP;
 	if (flags & SLAB_CACHE_DMA)
 		cachep->allocflags |= GFP_DMA;
+	if (flags & SLAB_CACHE_DMA32)
+		cachep->allocflags |= GFP_DMA32;
 	if (flags & SLAB_RECLAIM_ACCOUNT)
 		cachep->allocflags |= __GFP_RECLAIMABLE;
 	cachep->size = size;
diff --git a/mm/slab.h b/mm/slab.h
index e5e6658eeacc..43ac818b8592 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -127,7 +127,8 @@ static inline slab_flags_t kmem_cache_flags(unsigned int object_size,
 
 
 /* Legal flag mask for kmem_cache_create(), for various configurations */
-#define SLAB_CORE_FLAGS (SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA | SLAB_PANIC | \
+#define SLAB_CORE_FLAGS (SLAB_HWCACHE_ALIGN | SLAB_CACHE_DMA | \
+			 SLAB_CACHE_DMA32 | SLAB_PANIC | \
 			 SLAB_TYPESAFE_BY_RCU | SLAB_DEBUG_OBJECTS )
 
 #if defined(CONFIG_DEBUG_SLAB)
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 03eeb8b7b4b1..58251ba63e4a 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -53,7 +53,7 @@ static DECLARE_WORK(slab_caches_to_rcu_destroy_work,
 		SLAB_FAILSLAB | SLAB_KASAN)
 
 #define SLAB_MERGE_SAME (SLAB_RECLAIM_ACCOUNT | SLAB_CACHE_DMA | \
-			 SLAB_ACCOUNT)
+			 SLAB_CACHE_DMA32 | SLAB_ACCOUNT)
 
 /*
  * Merge control. If this is set then no merging of slab caches will occur.
diff --git a/mm/slub.c b/mm/slub.c
index 1b08fbcb7e61..d30ede89f4a6 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3589,6 +3589,9 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
 	if (s->flags & SLAB_CACHE_DMA)
 		s->allocflags |= GFP_DMA;
 
+	if (s->flags & SLAB_CACHE_DMA32)
+		s->allocflags |= GFP_DMA32;
+
 	if (s->flags & SLAB_RECLAIM_ACCOUNT)
 		s->allocflags |= __GFP_RECLAIMABLE;
 
@@ -5679,6 +5682,8 @@ static char *create_unique_id(struct kmem_cache *s)
 	 */
 	if (s->flags & SLAB_CACHE_DMA)
 		*p++ = 'd';
+	if (s->flags & SLAB_CACHE_DMA32)
+		*p++ = 'D';
 	if (s->flags & SLAB_RECLAIM_ACCOUNT)
 		*p++ = 'a';
 	if (s->flags & SLAB_CONSISTENCY_CHECKS)
-- 
cgit v1.2.3


From a953e7721fa9999fd628885ed451e16641a23d1e Mon Sep 17 00:00:00 2001
From: Souptick Joarder <jrdr.linux@gmail.com>
Date: Thu, 28 Mar 2019 20:43:51 -0700
Subject: include/linux/hugetlb.h: convert to use vm_fault_t

kbuild produces the below warning:

  tree: https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git master
  head:   5453a3df2a5eb49bc24615d4cf0d66b2aae05e5f
  commit 3d3539018d2c ("mm: create the new vm_fault_t type")
  reproduce:
        # apt-get install sparse
        git checkout 3d3539018d2cbd12e5af4a132636ee7fd8d43ef0
        make ARCH=x86_64 allmodconfig
        make C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__'

  >> mm/memory.c:3968:21: sparse: incorrect type in assignment (different
  >> base types) @@    expected restricted vm_fault_t [usertype] ret @@
  >> got e] ret @@
     mm/memory.c:3968:21:    expected restricted vm_fault_t [usertype] ret
     mm/memory.c:3968:21:    got int

This patch converts to return vm_fault_t type for hugetlb_fault() when
CONFIG_HUGETLB_PAGE=n.

Regarding the sparse warning, Luc said:

: This is the expected behaviour.  The constant 0 is magic regarding bitwise
: types but ({ ...; 0; }) is not, it is just an ordinary expression of type
: 'int'.
:
: So, IMHO, Souptick's patch is the right thing to do.

Link: http://lkml.kernel.org/r/20190318162604.GA31553@jordon-HP-15-Notebook-PC
Signed-off-by: Souptick Joarder <jrdr.linux@gmail.com>
Reviewed-by: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Luc Van Oostenryck <luc.vanoostenryck@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index ea35263eb76b..11943b60f208 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -203,7 +203,6 @@ static inline void hugetlb_show_meminfo(void)
 #define pud_huge(x)	0
 #define is_hugepage_only_range(mm, addr, len)	0
 #define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) ({BUG(); 0; })
-#define hugetlb_fault(mm, vma, addr, flags)	({ BUG(); 0; })
 #define hugetlb_mcopy_atomic_pte(dst_mm, dst_pte, dst_vma, dst_addr, \
 				src_addr, pagep)	({ BUG(); 0; })
 #define huge_pte_offset(mm, address, sz)	0
@@ -234,6 +233,13 @@ static inline void __unmap_hugepage_range(struct mmu_gather *tlb,
 {
 	BUG();
 }
+static inline vm_fault_t hugetlb_fault(struct mm_struct *mm,
+				struct vm_area_struct *vma, unsigned long address,
+				unsigned int flags)
+{
+	BUG();
+	return 0;
+}
 
 #endif /* !CONFIG_HUGETLB_PAGE */
 /*
-- 
cgit v1.2.3


From b736523f0759d1debeb56f8e0c4c87a2bea0fb23 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Thu, 28 Mar 2019 20:44:05 -0700
Subject: include/linux/list.h: fix list_is_first() kernel-doc

Fix typo of kernel-doc parameter notation (there should be no space
between '@' and the parameter name).

Also fixes bogus kernel-doc notation output formatting.

Link: http://lkml.kernel.org/r/ddce8b80-9a8a-d52d-3546-87b2211c089a@infradead.org
Fixes: 70b44595eafe9 ("mm, compaction: use free lists to quickly locate a migration source")
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Acked-by: Mel Gorman <mgorman@techsingularity.net>
Reviewed-by: William Kucharski <william.kucharski@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/list.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/list.h b/include/linux/list.h
index 79626b5ab36c..58aa3adf94e6 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -207,7 +207,7 @@ static inline void list_bulk_move_tail(struct list_head *head,
 }
 
 /**
- * list_is_first -- tests whether @ list is the first entry in list @head
+ * list_is_first -- tests whether @list is the first entry in list @head
  * @list: the entry to test
  * @head: the head of the list
  */
-- 
cgit v1.2.3


From fcfc2aa0185f4a731d05a21e9f359968fdfd02e7 Mon Sep 17 00:00:00 2001
From: Andrei Vagin <avagin@gmail.com>
Date: Thu, 28 Mar 2019 20:44:13 -0700
Subject: ptrace: take into account saved_sigmask in PTRACE{GET,SET}SIGMASK

There are a few system calls (pselect, ppoll, etc) which replace a task
sigmask while they are running in a kernel-space

When a task calls one of these syscalls, the kernel saves a current
sigmask in task->saved_sigmask and sets a syscall sigmask.

On syscall-exit-stop, ptrace traps a task before restoring the
saved_sigmask, so PTRACE_GETSIGMASK returns the syscall sigmask and
PTRACE_SETSIGMASK does nothing, because its sigmask is replaced by
saved_sigmask, when the task returns to user-space.

This patch fixes this problem.  PTRACE_GETSIGMASK returns saved_sigmask
if it's set.  PTRACE_SETSIGMASK drops the TIF_RESTORE_SIGMASK flag.

Link: http://lkml.kernel.org/r/20181120060616.6043-1-avagin@gmail.com
Fixes: 29000caecbe8 ("ptrace: add ability to get/set signal-blocked mask")
Signed-off-by: Andrei Vagin <avagin@gmail.com>
Acked-by: Oleg Nesterov <oleg@redhat.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched/signal.h | 18 ++++++++++++++++++
 kernel/ptrace.c              | 15 +++++++++++++--
 2 files changed, 31 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index ae5655197698..e412c092c1e8 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -418,10 +418,20 @@ static inline void set_restore_sigmask(void)
 	set_thread_flag(TIF_RESTORE_SIGMASK);
 	WARN_ON(!test_thread_flag(TIF_SIGPENDING));
 }
+
+static inline void clear_tsk_restore_sigmask(struct task_struct *tsk)
+{
+	clear_tsk_thread_flag(tsk, TIF_RESTORE_SIGMASK);
+}
+
 static inline void clear_restore_sigmask(void)
 {
 	clear_thread_flag(TIF_RESTORE_SIGMASK);
 }
+static inline bool test_tsk_restore_sigmask(struct task_struct *tsk)
+{
+	return test_tsk_thread_flag(tsk, TIF_RESTORE_SIGMASK);
+}
 static inline bool test_restore_sigmask(void)
 {
 	return test_thread_flag(TIF_RESTORE_SIGMASK);
@@ -439,6 +449,10 @@ static inline void set_restore_sigmask(void)
 	current->restore_sigmask = true;
 	WARN_ON(!test_thread_flag(TIF_SIGPENDING));
 }
+static inline void clear_tsk_restore_sigmask(struct task_struct *tsk)
+{
+	tsk->restore_sigmask = false;
+}
 static inline void clear_restore_sigmask(void)
 {
 	current->restore_sigmask = false;
@@ -447,6 +461,10 @@ static inline bool test_restore_sigmask(void)
 {
 	return current->restore_sigmask;
 }
+static inline bool test_tsk_restore_sigmask(struct task_struct *tsk)
+{
+	return tsk->restore_sigmask;
+}
 static inline bool test_and_clear_restore_sigmask(void)
 {
 	if (!current->restore_sigmask)
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 771e93f9c43f..6f357f4fc859 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -29,6 +29,7 @@
 #include <linux/hw_breakpoint.h>
 #include <linux/cn_proc.h>
 #include <linux/compat.h>
+#include <linux/sched/signal.h>
 
 /*
  * Access another process' address space via ptrace.
@@ -924,18 +925,26 @@ int ptrace_request(struct task_struct *child, long request,
 			ret = ptrace_setsiginfo(child, &siginfo);
 		break;
 
-	case PTRACE_GETSIGMASK:
+	case PTRACE_GETSIGMASK: {
+		sigset_t *mask;
+
 		if (addr != sizeof(sigset_t)) {
 			ret = -EINVAL;
 			break;
 		}
 
-		if (copy_to_user(datavp, &child->blocked, sizeof(sigset_t)))
+		if (test_tsk_restore_sigmask(child))
+			mask = &child->saved_sigmask;
+		else
+			mask = &child->blocked;
+
+		if (copy_to_user(datavp, mask, sizeof(sigset_t)))
 			ret = -EFAULT;
 		else
 			ret = 0;
 
 		break;
+	}
 
 	case PTRACE_SETSIGMASK: {
 		sigset_t new_set;
@@ -961,6 +970,8 @@ int ptrace_request(struct task_struct *child, long request,
 		child->blocked = new_set;
 		spin_unlock_irq(&child->sighand->siglock);
 
+		clear_tsk_restore_sigmask(child);
+
 		ret = 0;
 		break;
 	}
-- 
cgit v1.2.3


From 80a2a9026b24c6bd34b8d58256973e22270bedec Mon Sep 17 00:00:00 2001
From: Yuval Avnery <yuvalav@mellanox.com>
Date: Mon, 11 Mar 2019 06:18:24 +0200
Subject: net/mlx5e: Add a lock on tir list

Refresh tirs is looping over a global list of tirs while netdevs are
adding and removing tirs from that list. That is why a lock is
required.

Fixes: 724b2aa15126 ("net/mlx5e: TIRs management refactoring")
Signed-off-by: Yuval Avnery <yuvalav@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_common.c | 7 +++++++
 include/linux/mlx5/driver.h                         | 2 ++
 2 files changed, 9 insertions(+)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
index 8100786f6fb5..1539cf3de5dc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
@@ -45,7 +45,9 @@ int mlx5e_create_tir(struct mlx5_core_dev *mdev,
 	if (err)
 		return err;
 
+	mutex_lock(&mdev->mlx5e_res.td.list_lock);
 	list_add(&tir->list, &mdev->mlx5e_res.td.tirs_list);
+	mutex_unlock(&mdev->mlx5e_res.td.list_lock);
 
 	return 0;
 }
@@ -53,8 +55,10 @@ int mlx5e_create_tir(struct mlx5_core_dev *mdev,
 void mlx5e_destroy_tir(struct mlx5_core_dev *mdev,
 		       struct mlx5e_tir *tir)
 {
+	mutex_lock(&mdev->mlx5e_res.td.list_lock);
 	mlx5_core_destroy_tir(mdev, tir->tirn);
 	list_del(&tir->list);
+	mutex_unlock(&mdev->mlx5e_res.td.list_lock);
 }
 
 static int mlx5e_create_mkey(struct mlx5_core_dev *mdev, u32 pdn,
@@ -114,6 +118,7 @@ int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev)
 	}
 
 	INIT_LIST_HEAD(&mdev->mlx5e_res.td.tirs_list);
+	mutex_init(&mdev->mlx5e_res.td.list_lock);
 
 	return 0;
 
@@ -159,6 +164,7 @@ int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb)
 
 	MLX5_SET(modify_tir_in, in, bitmask.self_lb_en, 1);
 
+	mutex_lock(&mdev->mlx5e_res.td.list_lock);
 	list_for_each_entry(tir, &mdev->mlx5e_res.td.tirs_list, list) {
 		tirn = tir->tirn;
 		err = mlx5_core_modify_tir(mdev, tirn, in, inlen);
@@ -170,6 +176,7 @@ out:
 	kvfree(in);
 	if (err)
 		netdev_err(priv->netdev, "refresh tir(0x%x) failed, %d\n", tirn, err);
+	mutex_unlock(&mdev->mlx5e_res.td.list_lock);
 
 	return err;
 }
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 022541dc5dbf..0d0729648844 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -594,6 +594,8 @@ enum mlx5_pagefault_type_flags {
 };
 
 struct mlx5_td {
+	/* protects tirs list changes while tirs refresh */
+	struct mutex     list_lock;
 	struct list_head tirs_list;
 	u32              tdn;
 };
-- 
cgit v1.2.3


From 8c83f2df9c6578ea4c5b940d8238ad8a41b87e9e Mon Sep 17 00:00:00 2001
From: Stephen Suryaputra <ssuryaextr@gmail.com>
Date: Mon, 1 Apr 2019 09:17:32 -0400
Subject: vrf: check accept_source_route on the original netdevice

Configuration check to accept source route IP options should be made on
the incoming netdevice when the skb->dev is an l3mdev master. The route
lookup for the source route next hop also needs the incoming netdev.

v2->v3:
- Simplify by passing the original netdevice down the stack (per David
  Ahern).

Signed-off-by: Stephen Suryaputra <ssuryaextr@gmail.com>
Reviewed-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h      | 2 +-
 net/ipv4/ip_input.c   | 7 +++----
 net/ipv4/ip_options.c | 4 ++--
 3 files changed, 6 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/net/ip.h b/include/net/ip.h
index be3cad9c2e4c..583526aad1d0 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -677,7 +677,7 @@ int ip_options_get_from_user(struct net *net, struct ip_options_rcu **optp,
 			     unsigned char __user *data, int optlen);
 void ip_options_undo(struct ip_options *opt);
 void ip_forward_options(struct sk_buff *skb);
-int ip_options_rcv_srr(struct sk_buff *skb);
+int ip_options_rcv_srr(struct sk_buff *skb, struct net_device *dev);
 
 /*
  *	Functions provided by ip_sockglue.c
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index ecce2dc78f17..1132d6d1796a 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -257,11 +257,10 @@ int ip_local_deliver(struct sk_buff *skb)
 		       ip_local_deliver_finish);
 }
 
-static inline bool ip_rcv_options(struct sk_buff *skb)
+static inline bool ip_rcv_options(struct sk_buff *skb, struct net_device *dev)
 {
 	struct ip_options *opt;
 	const struct iphdr *iph;
-	struct net_device *dev = skb->dev;
 
 	/* It looks as overkill, because not all
 	   IP options require packet mangling.
@@ -297,7 +296,7 @@ static inline bool ip_rcv_options(struct sk_buff *skb)
 			}
 		}
 
-		if (ip_options_rcv_srr(skb))
+		if (ip_options_rcv_srr(skb, dev))
 			goto drop;
 	}
 
@@ -353,7 +352,7 @@ static int ip_rcv_finish_core(struct net *net, struct sock *sk,
 	}
 #endif
 
-	if (iph->ihl > 5 && ip_rcv_options(skb))
+	if (iph->ihl > 5 && ip_rcv_options(skb, dev))
 		goto drop;
 
 	rt = skb_rtable(skb);
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index 32a35043c9f5..3db31bb9df50 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -612,7 +612,7 @@ void ip_forward_options(struct sk_buff *skb)
 	}
 }
 
-int ip_options_rcv_srr(struct sk_buff *skb)
+int ip_options_rcv_srr(struct sk_buff *skb, struct net_device *dev)
 {
 	struct ip_options *opt = &(IPCB(skb)->opt);
 	int srrspace, srrptr;
@@ -647,7 +647,7 @@ int ip_options_rcv_srr(struct sk_buff *skb)
 
 		orefdst = skb->_skb_refdst;
 		skb_dst_set(skb, NULL);
-		err = ip_route_input(skb, nexthop, iph->saddr, iph->tos, skb->dev);
+		err = ip_route_input(skb, nexthop, iph->saddr, iph->tos, dev);
 		rt2 = skb_rtable(skb);
 		if (err || (rt2->rt_type != RTN_UNICAST && rt2->rt_type != RTN_LOCAL)) {
 			skb_dst_drop(skb);
-- 
cgit v1.2.3


From 5dd431b6b92c0db324d134d2a4006dd4f87f2261 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Thu, 28 Mar 2019 16:53:12 +0100
Subject: net: sched: introduce and use qstats read helpers

Classful qdiscs can't access directly the child qdiscs backlog
length: if such qdisc is NOLOCK, per CPU values should be
accounted instead.

Most qdiscs no not respect the above. As a result, qstats fetching
for most classful qdisc is currently incorrect: if the child qdisc is
NOLOCK, it always reports 0 len backlog.

This change introduces a pair of helpers to safely fetch
both backlog and qlen and use them in stats class dumping
functions, fixing the above issue and cleaning a bit the code.

DRR needs also to access the child qdisc queue length, so it
needs custom handling.

Fixes: c5ad119fb6c0 ("net: sched: pfifo_fast use skb_array")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 18 ++++++++++++++++++
 net/sched/sch_cbq.c       |  4 +++-
 net/sched/sch_drr.c       |  5 +++--
 net/sched/sch_hfsc.c      |  5 +++--
 net/sched/sch_htb.c       |  7 +++----
 net/sched/sch_mq.c        |  2 +-
 net/sched/sch_mqprio.c    |  3 +--
 net/sched/sch_multiq.c    |  2 +-
 net/sched/sch_prio.c      |  2 +-
 net/sched/sch_qfq.c       |  3 +--
 net/sched/sch_taprio.c    |  2 +-
 11 files changed, 36 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 7d1a0483a17b..43e4e17aa938 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -923,6 +923,24 @@ static inline void qdisc_qstats_overlimit(struct Qdisc *sch)
 	sch->qstats.overlimits++;
 }
 
+static inline int qdisc_qstats_copy(struct gnet_dump *d, struct Qdisc *sch)
+{
+	__u32 qlen = qdisc_qlen_sum(sch);
+
+	return gnet_stats_copy_queue(d, sch->cpu_qstats, &sch->qstats, qlen);
+}
+
+static inline void qdisc_qstats_qlen_backlog(struct Qdisc *sch,  __u32 *qlen,
+					     __u32 *backlog)
+{
+	struct gnet_stats_queue qstats = { 0 };
+	__u32 len = qdisc_qlen_sum(sch);
+
+	__gnet_stats_copy_queue(&qstats, sch->cpu_qstats, &sch->qstats, len);
+	*qlen = qstats.qlen;
+	*backlog = qstats.backlog;
+}
+
 static inline void qdisc_skb_head_init(struct qdisc_skb_head *qh)
 {
 	qh->head = NULL;
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 4dc05409e3fb..651879c1b655 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1358,9 +1358,11 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
 {
 	struct cbq_sched_data *q = qdisc_priv(sch);
 	struct cbq_class *cl = (struct cbq_class *)arg;
+	__u32 qlen;
 
 	cl->xstats.avgidle = cl->avgidle;
 	cl->xstats.undertime = 0;
+	qdisc_qstats_qlen_backlog(cl->q, &qlen, &cl->qstats.backlog);
 
 	if (cl->undertime != PSCHED_PASTPERFECT)
 		cl->xstats.undertime = cl->undertime - q->now;
@@ -1368,7 +1370,7 @@ cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
 	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
 				  d, NULL, &cl->bstats) < 0 ||
 	    gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
-	    gnet_stats_copy_queue(d, NULL, &cl->qstats, cl->q->q.qlen) < 0)
+	    gnet_stats_copy_queue(d, NULL, &cl->qstats, qlen) < 0)
 		return -1;
 
 	return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index 09b800991065..8a181591b0ea 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -269,7 +269,8 @@ static int drr_dump_class_stats(struct Qdisc *sch, unsigned long arg,
 				struct gnet_dump *d)
 {
 	struct drr_class *cl = (struct drr_class *)arg;
-	__u32 qlen = cl->qdisc->q.qlen;
+	__u32 qlen = qdisc_qlen_sum(cl->qdisc);
+	struct Qdisc *cl_q = cl->qdisc;
 	struct tc_drr_stats xstats;
 
 	memset(&xstats, 0, sizeof(xstats));
@@ -279,7 +280,7 @@ static int drr_dump_class_stats(struct Qdisc *sch, unsigned long arg,
 	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
 				  d, NULL, &cl->bstats) < 0 ||
 	    gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
-	    gnet_stats_copy_queue(d, NULL, &cl->qdisc->qstats, qlen) < 0)
+	    gnet_stats_copy_queue(d, cl_q->cpu_qstats, &cl_q->qstats, qlen) < 0)
 		return -1;
 
 	return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index 24cc220a3218..a946a419d717 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1328,8 +1328,9 @@ hfsc_dump_class_stats(struct Qdisc *sch, unsigned long arg,
 {
 	struct hfsc_class *cl = (struct hfsc_class *)arg;
 	struct tc_hfsc_stats xstats;
+	__u32 qlen;
 
-	cl->qstats.backlog = cl->qdisc->qstats.backlog;
+	qdisc_qstats_qlen_backlog(cl->qdisc, &qlen, &cl->qstats.backlog);
 	xstats.level   = cl->level;
 	xstats.period  = cl->cl_vtperiod;
 	xstats.work    = cl->cl_total;
@@ -1337,7 +1338,7 @@ hfsc_dump_class_stats(struct Qdisc *sch, unsigned long arg,
 
 	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch), d, NULL, &cl->bstats) < 0 ||
 	    gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
-	    gnet_stats_copy_queue(d, NULL, &cl->qstats, cl->qdisc->q.qlen) < 0)
+	    gnet_stats_copy_queue(d, NULL, &cl->qstats, qlen) < 0)
 		return -1;
 
 	return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 30f9da7e1076..ed92836f528a 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1127,10 +1127,9 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d)
 	};
 	__u32 qlen = 0;
 
-	if (!cl->level && cl->leaf.q) {
-		qlen = cl->leaf.q->q.qlen;
-		qs.backlog = cl->leaf.q->qstats.backlog;
-	}
+	if (!cl->level && cl->leaf.q)
+		qdisc_qstats_qlen_backlog(cl->leaf.q, &qlen, &qs.backlog);
+
 	cl->xstats.tokens = clamp_t(s64, PSCHED_NS2TICKS(cl->tokens),
 				    INT_MIN, INT_MAX);
 	cl->xstats.ctokens = clamp_t(s64, PSCHED_NS2TICKS(cl->ctokens),
diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c
index 203659bc3906..3a3312467692 100644
--- a/net/sched/sch_mq.c
+++ b/net/sched/sch_mq.c
@@ -249,7 +249,7 @@ static int mq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
 
 	sch = dev_queue->qdisc_sleeping;
 	if (gnet_stats_copy_basic(&sch->running, d, NULL, &sch->bstats) < 0 ||
-	    gnet_stats_copy_queue(d, NULL, &sch->qstats, sch->q.qlen) < 0)
+	    qdisc_qstats_copy(d, sch) < 0)
 		return -1;
 	return 0;
 }
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index d364e63c396d..ea0dc112b38d 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -561,8 +561,7 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
 		sch = dev_queue->qdisc_sleeping;
 		if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
 					  d, NULL, &sch->bstats) < 0 ||
-		    gnet_stats_copy_queue(d, NULL,
-					  &sch->qstats, sch->q.qlen) < 0)
+		    qdisc_qstats_copy(d, sch) < 0)
 			return -1;
 	}
 	return 0;
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index 7410ce4d0321..53c918a11378 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -344,7 +344,7 @@ static int multiq_dump_class_stats(struct Qdisc *sch, unsigned long cl,
 	cl_q = q->queues[cl - 1];
 	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
 				  d, NULL, &cl_q->bstats) < 0 ||
-	    gnet_stats_copy_queue(d, NULL, &cl_q->qstats, cl_q->q.qlen) < 0)
+	    qdisc_qstats_copy(d, cl_q) < 0)
 		return -1;
 
 	return 0;
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index 847141cd900f..dfb06d5bfacc 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -365,7 +365,7 @@ static int prio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
 	cl_q = q->queues[cl - 1];
 	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
 				  d, NULL, &cl_q->bstats) < 0 ||
-	    gnet_stats_copy_queue(d, NULL, &cl_q->qstats, cl_q->q.qlen) < 0)
+	    qdisc_qstats_copy(d, cl_q) < 0)
 		return -1;
 
 	return 0;
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index 29f5c4a24688..9fbda3ec5861 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -655,8 +655,7 @@ static int qfq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
 	if (gnet_stats_copy_basic(qdisc_root_sleeping_running(sch),
 				  d, NULL, &cl->bstats) < 0 ||
 	    gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
-	    gnet_stats_copy_queue(d, NULL,
-				  &cl->qdisc->qstats, cl->qdisc->q.qlen) < 0)
+	    qdisc_qstats_copy(d, cl->qdisc) < 0)
 		return -1;
 
 	return gnet_stats_copy_app(d, &xstats, sizeof(xstats));
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index 206e4dbed12f..c7041999eb5d 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -895,7 +895,7 @@ static int taprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
 
 	sch = dev_queue->qdisc_sleeping;
 	if (gnet_stats_copy_basic(&sch->running, d, NULL, &sch->bstats) < 0 ||
-	    gnet_stats_copy_queue(d, NULL, &sch->qstats, sch->q.qlen) < 0)
+	    qdisc_qstats_copy(d, sch) < 0)
 		return -1;
 	return 0;
 }
-- 
cgit v1.2.3


From e5f0e8f8e456589d56e4955154ed5d468cd6d286 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Thu, 28 Mar 2019 16:53:13 +0100
Subject: net: sched: introduce and use qdisc tree flush/purge helpers

The same code to flush qdisc tree and purge the qdisc queue
is duplicated in many places and in most cases it does not
respect NOLOCK qdisc: the global backlog len is used and the
per CPU values are ignored.

This change addresses the above, factoring-out the relevant
code and using the helpers introduced by the previous patch
to fetch the correct backlog len.

Fixes: c5ad119fb6c0 ("net: sched: pfifo_fast use skb_array")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 26 +++++++++++++++++++-------
 net/sched/sch_cbq.c       |  6 +-----
 net/sched/sch_drr.c       | 11 +----------
 net/sched/sch_hfsc.c      | 14 ++------------
 net/sched/sch_htb.c       | 15 +++------------
 net/sched/sch_multiq.c    |  8 +++-----
 net/sched/sch_prio.c      |  8 ++------
 net/sched/sch_qfq.c       | 11 +----------
 net/sched/sch_red.c       |  3 +--
 net/sched/sch_sfb.c       |  3 +--
 net/sched/sch_tbf.c       |  3 +--
 11 files changed, 35 insertions(+), 73 deletions(-)

(limited to 'include')

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 43e4e17aa938..a2b38b3deeca 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -941,6 +941,23 @@ static inline void qdisc_qstats_qlen_backlog(struct Qdisc *sch,  __u32 *qlen,
 	*backlog = qstats.backlog;
 }
 
+static inline void qdisc_tree_flush_backlog(struct Qdisc *sch)
+{
+	__u32 qlen, backlog;
+
+	qdisc_qstats_qlen_backlog(sch, &qlen, &backlog);
+	qdisc_tree_reduce_backlog(sch, qlen, backlog);
+}
+
+static inline void qdisc_purge_queue(struct Qdisc *sch)
+{
+	__u32 qlen, backlog;
+
+	qdisc_qstats_qlen_backlog(sch, &qlen, &backlog);
+	qdisc_reset(sch);
+	qdisc_tree_reduce_backlog(sch, qlen, backlog);
+}
+
 static inline void qdisc_skb_head_init(struct qdisc_skb_head *qh)
 {
 	qh->head = NULL;
@@ -1124,13 +1141,8 @@ static inline struct Qdisc *qdisc_replace(struct Qdisc *sch, struct Qdisc *new,
 	sch_tree_lock(sch);
 	old = *pold;
 	*pold = new;
-	if (old != NULL) {
-		unsigned int qlen = old->q.qlen;
-		unsigned int backlog = old->qstats.backlog;
-
-		qdisc_reset(old);
-		qdisc_tree_reduce_backlog(old, qlen, backlog);
-	}
+	if (old != NULL)
+		qdisc_tree_flush_backlog(old);
 	sch_tree_unlock(sch);
 
 	return old;
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 651879c1b655..114b9048ea7e 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1667,17 +1667,13 @@ static int cbq_delete(struct Qdisc *sch, unsigned long arg)
 {
 	struct cbq_sched_data *q = qdisc_priv(sch);
 	struct cbq_class *cl = (struct cbq_class *)arg;
-	unsigned int qlen, backlog;
 
 	if (cl->filters || cl->children || cl == &q->link)
 		return -EBUSY;
 
 	sch_tree_lock(sch);
 
-	qlen = cl->q->q.qlen;
-	backlog = cl->q->qstats.backlog;
-	qdisc_reset(cl->q);
-	qdisc_tree_reduce_backlog(cl->q, qlen, backlog);
+	qdisc_purge_queue(cl->q);
 
 	if (cl->next_alive)
 		cbq_deactivate_class(cl);
diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c
index 8a181591b0ea..430df9a55ec4 100644
--- a/net/sched/sch_drr.c
+++ b/net/sched/sch_drr.c
@@ -50,15 +50,6 @@ static struct drr_class *drr_find_class(struct Qdisc *sch, u32 classid)
 	return container_of(clc, struct drr_class, common);
 }
 
-static void drr_purge_queue(struct drr_class *cl)
-{
-	unsigned int len = cl->qdisc->q.qlen;
-	unsigned int backlog = cl->qdisc->qstats.backlog;
-
-	qdisc_reset(cl->qdisc);
-	qdisc_tree_reduce_backlog(cl->qdisc, len, backlog);
-}
-
 static const struct nla_policy drr_policy[TCA_DRR_MAX + 1] = {
 	[TCA_DRR_QUANTUM]	= { .type = NLA_U32 },
 };
@@ -167,7 +158,7 @@ static int drr_delete_class(struct Qdisc *sch, unsigned long arg)
 
 	sch_tree_lock(sch);
 
-	drr_purge_queue(cl);
+	qdisc_purge_queue(cl->qdisc);
 	qdisc_class_hash_remove(&q->clhash, &cl->common);
 
 	sch_tree_unlock(sch);
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index a946a419d717..d2ab463f22ae 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -844,16 +844,6 @@ qdisc_peek_len(struct Qdisc *sch)
 	return len;
 }
 
-static void
-hfsc_purge_queue(struct Qdisc *sch, struct hfsc_class *cl)
-{
-	unsigned int len = cl->qdisc->q.qlen;
-	unsigned int backlog = cl->qdisc->qstats.backlog;
-
-	qdisc_reset(cl->qdisc);
-	qdisc_tree_reduce_backlog(cl->qdisc, len, backlog);
-}
-
 static void
 hfsc_adjust_levels(struct hfsc_class *cl)
 {
@@ -1076,7 +1066,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
 	qdisc_class_hash_insert(&q->clhash, &cl->cl_common);
 	list_add_tail(&cl->siblings, &parent->children);
 	if (parent->level == 0)
-		hfsc_purge_queue(sch, parent);
+		qdisc_purge_queue(parent->qdisc);
 	hfsc_adjust_levels(parent);
 	sch_tree_unlock(sch);
 
@@ -1112,7 +1102,7 @@ hfsc_delete_class(struct Qdisc *sch, unsigned long arg)
 	list_del(&cl->siblings);
 	hfsc_adjust_levels(cl->cl_parent);
 
-	hfsc_purge_queue(sch, cl);
+	qdisc_purge_queue(cl->qdisc);
 	qdisc_class_hash_remove(&q->clhash, &cl->cl_common);
 
 	sch_tree_unlock(sch);
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index ed92836f528a..2f9883b196e8 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1269,13 +1269,8 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg)
 
 	sch_tree_lock(sch);
 
-	if (!cl->level) {
-		unsigned int qlen = cl->leaf.q->q.qlen;
-		unsigned int backlog = cl->leaf.q->qstats.backlog;
-
-		qdisc_reset(cl->leaf.q);
-		qdisc_tree_reduce_backlog(cl->leaf.q, qlen, backlog);
-	}
+	if (!cl->level)
+		qdisc_purge_queue(cl->leaf.q);
 
 	/* delete from hash and active; remainder in destroy_class */
 	qdisc_class_hash_remove(&q->clhash, &cl->common);
@@ -1403,12 +1398,8 @@ static int htb_change_class(struct Qdisc *sch, u32 classid,
 					  classid, NULL);
 		sch_tree_lock(sch);
 		if (parent && !parent->level) {
-			unsigned int qlen = parent->leaf.q->q.qlen;
-			unsigned int backlog = parent->leaf.q->qstats.backlog;
-
 			/* turn parent into inner node */
-			qdisc_reset(parent->leaf.q);
-			qdisc_tree_reduce_backlog(parent->leaf.q, qlen, backlog);
+			qdisc_purge_queue(parent->leaf.q);
 			qdisc_put(parent->leaf.q);
 			if (parent->prio_activity)
 				htb_deactivate(q, parent);
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index 53c918a11378..35b03ae08e0f 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -201,9 +201,9 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt,
 	for (i = q->bands; i < q->max_bands; i++) {
 		if (q->queues[i] != &noop_qdisc) {
 			struct Qdisc *child = q->queues[i];
+
 			q->queues[i] = &noop_qdisc;
-			qdisc_tree_reduce_backlog(child, child->q.qlen,
-						  child->qstats.backlog);
+			qdisc_tree_flush_backlog(child);
 			qdisc_put(child);
 		}
 	}
@@ -225,9 +225,7 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt,
 					qdisc_hash_add(child, true);
 
 				if (old != &noop_qdisc) {
-					qdisc_tree_reduce_backlog(old,
-								  old->q.qlen,
-								  old->qstats.backlog);
+					qdisc_tree_flush_backlog(old);
 					qdisc_put(old);
 				}
 				sch_tree_unlock(sch);
diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c
index dfb06d5bfacc..d519b21535b3 100644
--- a/net/sched/sch_prio.c
+++ b/net/sched/sch_prio.c
@@ -216,12 +216,8 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt,
 	q->bands = qopt->bands;
 	memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1);
 
-	for (i = q->bands; i < oldbands; i++) {
-		struct Qdisc *child = q->queues[i];
-
-		qdisc_tree_reduce_backlog(child, child->q.qlen,
-					  child->qstats.backlog);
-	}
+	for (i = q->bands; i < oldbands; i++)
+		qdisc_tree_flush_backlog(q->queues[i]);
 
 	for (i = oldbands; i < q->bands; i++) {
 		q->queues[i] = queues[i];
diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c
index 9fbda3ec5861..1589364b54da 100644
--- a/net/sched/sch_qfq.c
+++ b/net/sched/sch_qfq.c
@@ -217,15 +217,6 @@ static struct qfq_class *qfq_find_class(struct Qdisc *sch, u32 classid)
 	return container_of(clc, struct qfq_class, common);
 }
 
-static void qfq_purge_queue(struct qfq_class *cl)
-{
-	unsigned int len = cl->qdisc->q.qlen;
-	unsigned int backlog = cl->qdisc->qstats.backlog;
-
-	qdisc_reset(cl->qdisc);
-	qdisc_tree_reduce_backlog(cl->qdisc, len, backlog);
-}
-
 static const struct nla_policy qfq_policy[TCA_QFQ_MAX + 1] = {
 	[TCA_QFQ_WEIGHT] = { .type = NLA_U32 },
 	[TCA_QFQ_LMAX] = { .type = NLA_U32 },
@@ -551,7 +542,7 @@ static int qfq_delete_class(struct Qdisc *sch, unsigned long arg)
 
 	sch_tree_lock(sch);
 
-	qfq_purge_queue(cl);
+	qdisc_purge_queue(cl->qdisc);
 	qdisc_class_hash_remove(&q->clhash, &cl->common);
 
 	sch_tree_unlock(sch);
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index 9df9942340ea..4e8c0abf6194 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -233,8 +233,7 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt,
 	q->flags = ctl->flags;
 	q->limit = ctl->limit;
 	if (child) {
-		qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
-					  q->qdisc->qstats.backlog);
+		qdisc_tree_flush_backlog(q->qdisc);
 		old_child = q->qdisc;
 		q->qdisc = child;
 	}
diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c
index bab506b01a32..2419fdb75966 100644
--- a/net/sched/sch_sfb.c
+++ b/net/sched/sch_sfb.c
@@ -521,8 +521,7 @@ static int sfb_change(struct Qdisc *sch, struct nlattr *opt,
 		qdisc_hash_add(child, true);
 	sch_tree_lock(sch);
 
-	qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
-				  q->qdisc->qstats.backlog);
+	qdisc_tree_flush_backlog(q->qdisc);
 	qdisc_put(q->qdisc);
 	q->qdisc = child;
 
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index 7f272a9070c5..f71578dbb9e3 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -391,8 +391,7 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt,
 
 	sch_tree_lock(sch);
 	if (child) {
-		qdisc_tree_reduce_backlog(q->qdisc, q->qdisc->q.qlen,
-					  q->qdisc->qstats.backlog);
+		qdisc_tree_flush_backlog(q->qdisc);
 		qdisc_put(q->qdisc);
 		q->qdisc = child;
 	}
-- 
cgit v1.2.3


From b5bdbb6ccd1117896bf4fba6bff75336ca423e8c Mon Sep 17 00:00:00 2001
From: Daniel Mentz <danielmentz@google.com>
Date: Fri, 29 Mar 2019 15:48:54 -0700
Subject: ALSA: uapi: #include <time.h> in asound.h

The uapi header asound.h defines types based on struct timespec. We need
to #include <time.h> to get access to the definition of this struct.

Previously, we encountered the following error message when building
applications with a clang/bionic toolchain:

kernel-headers/sound/asound.h:350:19: error: field has incomplete type 'struct timespec'
  struct timespec trigger_tstamp;
                  ^

The absence of the time.h #include statement does not cause build errors
with glibc, because its version of stdlib.h indirectly includes time.h.

Signed-off-by: Daniel Mentz <danielmentz@google.com>
Signed-off-by: Takashi Iwai <tiwai@suse.de>
---
 include/uapi/sound/asound.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/sound/asound.h b/include/uapi/sound/asound.h
index 404d4b9ffe76..df1153cea0b7 100644
--- a/include/uapi/sound/asound.h
+++ b/include/uapi/sound/asound.h
@@ -32,6 +32,7 @@
 
 #ifndef __KERNEL__
 #include <stdlib.h>
+#include <time.h>
 #endif
 
 /*
-- 
cgit v1.2.3


From a0fe2c6479aab5723239b315ef1b552673f434a3 Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh@google.com>
Date: Fri, 29 Mar 2019 22:46:49 +0100
Subject: linux/kernel.h: Use parentheses around argument in u64_to_user_ptr()

Use parentheses around uses of the argument in u64_to_user_ptr() to
ensure that the cast doesn't apply to part of the argument.

There are existing uses of the macro of the form

  u64_to_user_ptr(A + B)

which expands to

  (void __user *)(uintptr_t)A + B

(the cast applies to the first operand of the addition, the addition
is a pointer addition). This happens to still work as intended, the
semantic difference doesn't cause a difference in behavior.

But I want to use u64_to_user_ptr() with a ternary operator in the
argument, like so:

  u64_to_user_ptr(A ? B : C)

This currently doesn't work as intended.

Signed-off-by: Jann Horn <jannh@google.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Mukesh Ojha <mojha@codeaurora.org>
Cc: Andrei Vagin <avagin@openvz.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Dan Carpenter <dan.carpenter@oracle.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jani Nikula <jani.nikula@intel.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Masahiro Yamada <yamada.masahiro@socionext.com>
Cc: NeilBrown <neilb@suse.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Qiaowei Ren <qiaowei.ren@intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: x86-ml <x86@kernel.org>
Link: https://lkml.kernel.org/r/20190329214652.258477-1-jannh@google.com
---
 include/linux/kernel.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 34a5036debd3..2d14e21c16c0 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -47,8 +47,8 @@
 
 #define u64_to_user_ptr(x) (		\
 {					\
-	typecheck(u64, x);		\
-	(void __user *)(uintptr_t)x;	\
+	typecheck(u64, (x));		\
+	(void __user *)(uintptr_t)(x);	\
 }					\
 )
 
-- 
cgit v1.2.3


From 631b7abacd02b88f4b0795c08b54ad4fc3e7c7c0 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Mon, 7 Nov 2016 16:26:35 -0500
Subject: ptrace: Remove maxargs from task_current_syscall()

task_current_syscall() has a single user that passes in 6 for maxargs, which
is the maximum arguments that can be used to get system calls from
syscall_get_arguments(). Instead of passing in a number of arguments to
grab, just get 6 arguments. The args argument even specifies that it's an
array of 6 items.

This will also allow changing syscall_get_arguments() to not get a variable
number of arguments, but always grab 6.

Linus also suggested not passing in a bunch of arguments to
task_current_syscall() but to instead pass in a pointer to a structure, and
just fill the structure. struct seccomp_data has almost all the parameters
that is needed except for the stack pointer (sp). As seccomp_data is part of
uapi, and I'm afraid to change it, a new structure was created
"syscall_info", which includes seccomp_data and adds the "sp" field.

Link: http://lkml.kernel.org/r/20161107213233.466776454@goodmis.org

Cc: Andy Lutomirski <luto@kernel.org>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: linux-fsdevel@vger.kernel.org
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 fs/proc/base.c         | 17 ++++++++-------
 include/linux/ptrace.h | 11 +++++++---
 lib/syscall.c          | 57 ++++++++++++++++++++++----------------------------
 3 files changed, 42 insertions(+), 43 deletions(-)

(limited to 'include')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index ddef482f1334..6a803a0b75df 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -616,24 +616,25 @@ static int proc_pid_limits(struct seq_file *m, struct pid_namespace *ns,
 static int proc_pid_syscall(struct seq_file *m, struct pid_namespace *ns,
 			    struct pid *pid, struct task_struct *task)
 {
-	long nr;
-	unsigned long args[6], sp, pc;
+	struct syscall_info info;
+	u64 *args = &info.data.args[0];
 	int res;
 
 	res = lock_trace(task);
 	if (res)
 		return res;
 
-	if (task_current_syscall(task, &nr, args, 6, &sp, &pc))
+	if (task_current_syscall(task, &info))
 		seq_puts(m, "running\n");
-	else if (nr < 0)
-		seq_printf(m, "%ld 0x%lx 0x%lx\n", nr, sp, pc);
+	else if (info.data.nr < 0)
+		seq_printf(m, "%d 0x%llx 0x%llx\n",
+			   info.data.nr, info.sp, info.data.instruction_pointer);
 	else
 		seq_printf(m,
-		       "%ld 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx\n",
-		       nr,
+		       "%d 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx 0x%llx\n",
+		       info.data.nr,
 		       args[0], args[1], args[2], args[3], args[4], args[5],
-		       sp, pc);
+		       info.sp, info.data.instruction_pointer);
 	unlock_trace(task);
 
 	return 0;
diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index edb9b040c94c..d5084ebd9f03 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -9,6 +9,13 @@
 #include <linux/bug.h>			/* For BUG_ON.  */
 #include <linux/pid_namespace.h>	/* For task_active_pid_ns.  */
 #include <uapi/linux/ptrace.h>
+#include <linux/seccomp.h>
+
+/* Add sp to seccomp_data, as seccomp is user API, we don't want to modify it */
+struct syscall_info {
+	__u64			sp;
+	struct seccomp_data	data;
+};
 
 extern int ptrace_access_vm(struct task_struct *tsk, unsigned long addr,
 			    void *buf, int len, unsigned int gup_flags);
@@ -407,9 +414,7 @@ static inline void user_single_step_report(struct pt_regs *regs)
 #define current_user_stack_pointer() user_stack_pointer(current_pt_regs())
 #endif
 
-extern int task_current_syscall(struct task_struct *target, long *callno,
-				unsigned long args[6], unsigned int maxargs,
-				unsigned long *sp, unsigned long *pc);
+extern int task_current_syscall(struct task_struct *target, struct syscall_info *info);
 
 extern void sigaction_compat_abi(struct k_sigaction *act, struct k_sigaction *oact);
 #endif
diff --git a/lib/syscall.c b/lib/syscall.c
index 1a7077f20eae..e8467e17b9a2 100644
--- a/lib/syscall.c
+++ b/lib/syscall.c
@@ -5,16 +5,14 @@
 #include <linux/export.h>
 #include <asm/syscall.h>
 
-static int collect_syscall(struct task_struct *target, long *callno,
-			   unsigned long args[6], unsigned int maxargs,
-			   unsigned long *sp, unsigned long *pc)
+static int collect_syscall(struct task_struct *target, struct syscall_info *info)
 {
 	struct pt_regs *regs;
 
 	if (!try_get_task_stack(target)) {
 		/* Task has no stack, so the task isn't in a syscall. */
-		*sp = *pc = 0;
-		*callno = -1;
+		memset(info, 0, sizeof(*info));
+		info->data.nr = -1;
 		return 0;
 	}
 
@@ -24,12 +22,13 @@ static int collect_syscall(struct task_struct *target, long *callno,
 		return -EAGAIN;
 	}
 
-	*sp = user_stack_pointer(regs);
-	*pc = instruction_pointer(regs);
+	info->sp = user_stack_pointer(regs);
+	info->data.instruction_pointer = instruction_pointer(regs);
 
-	*callno = syscall_get_nr(target, regs);
-	if (*callno != -1L && maxargs > 0)
-		syscall_get_arguments(target, regs, 0, maxargs, args);
+	info->data.nr = syscall_get_nr(target, regs);
+	if (info->data.nr != -1L)
+		syscall_get_arguments(target, regs, 0, 6,
+				      (unsigned long *)&info->data.args[0]);
 
 	put_task_stack(target);
 	return 0;
@@ -38,41 +37,35 @@ static int collect_syscall(struct task_struct *target, long *callno,
 /**
  * task_current_syscall - Discover what a blocked task is doing.
  * @target:		thread to examine
- * @callno:		filled with system call number or -1
- * @args:		filled with @maxargs system call arguments
- * @maxargs:		number of elements in @args to fill
- * @sp:			filled with user stack pointer
- * @pc:			filled with user PC
+ * @info:		structure with the following fields:
+ *			 .sp        - filled with user stack pointer
+ *			 .data.nr   - filled with system call number or -1
+ *			 .data.args - filled with @maxargs system call arguments
+ *			 .data.instruction_pointer - filled with user PC
  *
- * If @target is blocked in a system call, returns zero with *@callno
- * set to the the call's number and @args filled in with its arguments.
- * Registers not used for system call arguments may not be available and
- * it is not kosher to use &struct user_regset calls while the system
+ * If @target is blocked in a system call, returns zero with @info.data.nr
+ * set to the the call's number and @info.data.args filled in with its
+ * arguments. Registers not used for system call arguments may not be available
+ * and it is not kosher to use &struct user_regset calls while the system
  * call is still in progress.  Note we may get this result if @target
  * has finished its system call but not yet returned to user mode, such
  * as when it's stopped for signal handling or syscall exit tracing.
  *
  * If @target is blocked in the kernel during a fault or exception,
- * returns zero with *@callno set to -1 and does not fill in @args.
- * If so, it's now safe to examine @target using &struct user_regset
- * get() calls as long as we're sure @target won't return to user mode.
+ * returns zero with *@info.data.nr set to -1 and does not fill in
+ * @info.data.args. If so, it's now safe to examine @target using
+ * &struct user_regset get() calls as long as we're sure @target won't return
+ * to user mode.
  *
  * Returns -%EAGAIN if @target does not remain blocked.
- *
- * Returns -%EINVAL if @maxargs is too large (maximum is six).
  */
-int task_current_syscall(struct task_struct *target, long *callno,
-			 unsigned long args[6], unsigned int maxargs,
-			 unsigned long *sp, unsigned long *pc)
+int task_current_syscall(struct task_struct *target, struct syscall_info *info)
 {
 	long state;
 	unsigned long ncsw;
 
-	if (unlikely(maxargs > 6))
-		return -EINVAL;
-
 	if (target == current)
-		return collect_syscall(target, callno, args, maxargs, sp, pc);
+		return collect_syscall(target, info);
 
 	state = target->state;
 	if (unlikely(!state))
@@ -80,7 +73,7 @@ int task_current_syscall(struct task_struct *target, long *callno,
 
 	ncsw = wait_task_inactive(target, state);
 	if (unlikely(!ncsw) ||
-	    unlikely(collect_syscall(target, callno, args, maxargs, sp, pc)) ||
+	    unlikely(collect_syscall(target, info)) ||
 	    unlikely(wait_task_inactive(target, state) != ncsw))
 		return -EAGAIN;
 
-- 
cgit v1.2.3


From b35f549df1d7520d37ba1e6d4a8d4df6bd52d136 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Mon, 7 Nov 2016 16:26:37 -0500
Subject: syscalls: Remove start and number from syscall_get_arguments() args

At Linux Plumbers, Andy Lutomirski approached me and pointed out that the
function call syscall_get_arguments() implemented in x86 was horribly
written and not optimized for the standard case of passing in 0 and 6 for
the starting index and the number of system calls to get. When looking at
all the users of this function, I discovered that all instances pass in only
0 and 6 for these arguments. Instead of having this function handle
different cases that are never used, simply rewrite it to return the first 6
arguments of a system call.

This should help out the performance of tracing system calls by ptrace,
ftrace and perf.

Link: http://lkml.kernel.org/r/20161107213233.754809394@goodmis.org

Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Dominik Brodowski <linux@dominikbrodowski.net>
Cc: Dave Martin <dave.martin@arm.com>
Cc: "Dmitry V. Levin" <ldv@altlinux.org>
Cc: x86@kernel.org
Cc: linux-snps-arc@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-c6x-dev@linux-c6x.org
Cc: uclinux-h8-devel@lists.sourceforge.jp
Cc: linux-hexagon@vger.kernel.org
Cc: linux-ia64@vger.kernel.org
Cc: linux-mips@vger.kernel.org
Cc: nios2-dev@lists.rocketboards.org
Cc: openrisc@lists.librecores.org
Cc: linux-parisc@vger.kernel.org
Cc: linuxppc-dev@lists.ozlabs.org
Cc: linux-riscv@lists.infradead.org
Cc: linux-s390@vger.kernel.org
Cc: linux-sh@vger.kernel.org
Cc: sparclinux@vger.kernel.org
Cc: linux-um@lists.infradead.org
Cc: linux-xtensa@linux-xtensa.org
Cc: linux-arch@vger.kernel.org
Acked-by: Paul Burton <paul.burton@mips.com> # MIPS parts
Acked-by: Max Filippov <jcmvbkbc@gmail.com> # For xtensa changes
Acked-by: Will Deacon <will.deacon@arm.com> # For the arm64 bits
Reviewed-by: Thomas Gleixner <tglx@linutronix.de> # for x86
Reviewed-by: Dmitry V. Levin <ldv@altlinux.org>
Reported-by: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 arch/arc/include/asm/syscall.h        |  7 ++--
 arch/arm/include/asm/syscall.h        | 23 ++---------
 arch/arm64/include/asm/syscall.h      | 22 ++---------
 arch/c6x/include/asm/syscall.h        | 41 ++++----------------
 arch/csky/include/asm/syscall.h       | 14 ++-----
 arch/h8300/include/asm/syscall.h      | 34 ++++------------
 arch/hexagon/include/asm/syscall.h    |  4 +-
 arch/ia64/include/asm/syscall.h       |  5 +--
 arch/microblaze/include/asm/syscall.h |  4 +-
 arch/mips/include/asm/syscall.h       |  3 +-
 arch/mips/kernel/ptrace.c             |  2 +-
 arch/nds32/include/asm/syscall.h      | 33 +++-------------
 arch/nios2/include/asm/syscall.h      | 42 ++++----------------
 arch/openrisc/include/asm/syscall.h   |  6 +--
 arch/parisc/include/asm/syscall.h     | 30 ++++----------
 arch/powerpc/include/asm/syscall.h    |  8 ++--
 arch/riscv/include/asm/syscall.h      | 13 ++-----
 arch/s390/include/asm/syscall.h       | 17 +++-----
 arch/sh/include/asm/syscall_32.h      | 26 +++----------
 arch/sh/include/asm/syscall_64.h      |  4 +-
 arch/sparc/include/asm/syscall.h      |  4 +-
 arch/um/include/asm/syscall-generic.h | 39 +++----------------
 arch/x86/include/asm/syscall.h        | 73 ++++++++---------------------------
 arch/xtensa/include/asm/syscall.h     | 16 ++------
 include/asm-generic/syscall.h         | 11 ++----
 include/trace/events/syscalls.h       |  2 +-
 kernel/seccomp.c                      |  2 +-
 kernel/trace/trace_syscalls.c         |  4 +-
 lib/syscall.c                         |  2 +-
 29 files changed, 113 insertions(+), 378 deletions(-)

(limited to 'include')

diff --git a/arch/arc/include/asm/syscall.h b/arch/arc/include/asm/syscall.h
index 29de09804306..c7a4201ed62b 100644
--- a/arch/arc/include/asm/syscall.h
+++ b/arch/arc/include/asm/syscall.h
@@ -55,12 +55,11 @@ syscall_set_return_value(struct task_struct *task, struct pt_regs *regs,
  */
 static inline void
 syscall_get_arguments(struct task_struct *task, struct pt_regs *regs,
-		      unsigned int i, unsigned int n, unsigned long *args)
+		      unsigned long *args)
 {
 	unsigned long *inside_ptregs = &(regs->r0);
-	inside_ptregs -= i;
-
-	BUG_ON((i + n) > 6);
+	unsigned int n = 6;
+	unsigned int i = 0;
 
 	while (n--) {
 		args[i++] = (*inside_ptregs);
diff --git a/arch/arm/include/asm/syscall.h b/arch/arm/include/asm/syscall.h
index 06dea6bce293..db969a2972ae 100644
--- a/arch/arm/include/asm/syscall.h
+++ b/arch/arm/include/asm/syscall.h
@@ -55,29 +55,12 @@ static inline void syscall_set_return_value(struct task_struct *task,
 
 static inline void syscall_get_arguments(struct task_struct *task,
 					 struct pt_regs *regs,
-					 unsigned int i, unsigned int n,
 					 unsigned long *args)
 {
-	if (n == 0)
-		return;
-
-	if (i + n > SYSCALL_MAX_ARGS) {
-		unsigned long *args_bad = args + SYSCALL_MAX_ARGS - i;
-		unsigned int n_bad = n + i - SYSCALL_MAX_ARGS;
-		pr_warn("%s called with max args %d, handling only %d\n",
-			__func__, i + n, SYSCALL_MAX_ARGS);
-		memset(args_bad, 0, n_bad * sizeof(args[0]));
-		n = SYSCALL_MAX_ARGS - i;
-	}
-
-	if (i == 0) {
-		args[0] = regs->ARM_ORIG_r0;
-		args++;
-		i++;
-		n--;
-	}
+	args[0] = regs->ARM_ORIG_r0;
+	args++;
 
-	memcpy(args, &regs->ARM_r0 + i, n * sizeof(args[0]));
+	memcpy(args, &regs->ARM_r0 + 1, 5 * sizeof(args[0]));
 }
 
 static inline void syscall_set_arguments(struct task_struct *task,
diff --git a/arch/arm64/include/asm/syscall.h b/arch/arm64/include/asm/syscall.h
index ad8be16a39c9..55b2dab21023 100644
--- a/arch/arm64/include/asm/syscall.h
+++ b/arch/arm64/include/asm/syscall.h
@@ -65,28 +65,12 @@ static inline void syscall_set_return_value(struct task_struct *task,
 
 static inline void syscall_get_arguments(struct task_struct *task,
 					 struct pt_regs *regs,
-					 unsigned int i, unsigned int n,
 					 unsigned long *args)
 {
-	if (n == 0)
-		return;
-
-	if (i + n > SYSCALL_MAX_ARGS) {
-		unsigned long *args_bad = args + SYSCALL_MAX_ARGS - i;
-		unsigned int n_bad = n + i - SYSCALL_MAX_ARGS;
-		pr_warning("%s called with max args %d, handling only %d\n",
-			   __func__, i + n, SYSCALL_MAX_ARGS);
-		memset(args_bad, 0, n_bad * sizeof(args[0]));
-	}
-
-	if (i == 0) {
-		args[0] = regs->orig_x0;
-		args++;
-		i++;
-		n--;
-	}
+	args[0] = regs->orig_x0;
+	args++;
 
-	memcpy(args, &regs->regs[i], n * sizeof(args[0]));
+	memcpy(args, &regs->regs[1], 5 * sizeof(args[0]));
 }
 
 static inline void syscall_set_arguments(struct task_struct *task,
diff --git a/arch/c6x/include/asm/syscall.h b/arch/c6x/include/asm/syscall.h
index ae2be315ee9c..06db3251926b 100644
--- a/arch/c6x/include/asm/syscall.h
+++ b/arch/c6x/include/asm/syscall.h
@@ -46,40 +46,15 @@ static inline void syscall_set_return_value(struct task_struct *task,
 }
 
 static inline void syscall_get_arguments(struct task_struct *task,
-					 struct pt_regs *regs, unsigned int i,
-					 unsigned int n, unsigned long *args)
+					 struct pt_regs *regs,
+					 unsigned long *args)
 {
-	switch (i) {
-	case 0:
-		if (!n--)
-			break;
-		*args++ = regs->a4;
-	case 1:
-		if (!n--)
-			break;
-		*args++ = regs->b4;
-	case 2:
-		if (!n--)
-			break;
-		*args++ = regs->a6;
-	case 3:
-		if (!n--)
-			break;
-		*args++ = regs->b6;
-	case 4:
-		if (!n--)
-			break;
-		*args++ = regs->a8;
-	case 5:
-		if (!n--)
-			break;
-		*args++ = regs->b8;
-	case 6:
-		if (!n--)
-			break;
-	default:
-		BUG();
-	}
+	*args++ = regs->a4;
+	*args++ = regs->b4;
+	*args++ = regs->a6;
+	*args++ = regs->b6;
+	*args++ = regs->a8;
+	*args   = regs->b8;
 }
 
 static inline void syscall_set_arguments(struct task_struct *task,
diff --git a/arch/csky/include/asm/syscall.h b/arch/csky/include/asm/syscall.h
index 9a9cd81e66c1..c691fe92edc6 100644
--- a/arch/csky/include/asm/syscall.h
+++ b/arch/csky/include/asm/syscall.h
@@ -43,17 +43,11 @@ syscall_set_return_value(struct task_struct *task, struct pt_regs *regs,
 
 static inline void
 syscall_get_arguments(struct task_struct *task, struct pt_regs *regs,
-		      unsigned int i, unsigned int n, unsigned long *args)
+		      unsigned long *args)
 {
-	BUG_ON(i + n > 6);
-	if (i == 0) {
-		args[0] = regs->orig_a0;
-		args++;
-		n--;
-	} else {
-		i--;
-	}
-	memcpy(args, &regs->a1 + i, n * sizeof(args[0]));
+	args[0] = regs->orig_a0;
+	args++;
+	memcpy(args, &regs->a1, 5 * sizeof(args[0]));
 }
 
 static inline void
diff --git a/arch/h8300/include/asm/syscall.h b/arch/h8300/include/asm/syscall.h
index 924990401237..ddd483c6ca95 100644
--- a/arch/h8300/include/asm/syscall.h
+++ b/arch/h8300/include/asm/syscall.h
@@ -17,34 +17,14 @@ syscall_get_nr(struct task_struct *task, struct pt_regs *regs)
 
 static inline void
 syscall_get_arguments(struct task_struct *task, struct pt_regs *regs,
-		      unsigned int i, unsigned int n, unsigned long *args)
+		      unsigned long *args)
 {
-	BUG_ON(i + n > 6);
-
-	while (n > 0) {
-		switch (i) {
-		case 0:
-			*args++ = regs->er1;
-			break;
-		case 1:
-			*args++ = regs->er2;
-			break;
-		case 2:
-			*args++ = regs->er3;
-			break;
-		case 3:
-			*args++ = regs->er4;
-			break;
-		case 4:
-			*args++ = regs->er5;
-			break;
-		case 5:
-			*args++ = regs->er6;
-			break;
-		}
-		i++;
-		n--;
-	}
+	*args++ = regs->er1;
+	*args++ = regs->er2;
+	*args++ = regs->er3;
+	*args++ = regs->er4;
+	*args++ = regs->er5;
+	*args   = regs->er6;
 }
 
 
diff --git a/arch/hexagon/include/asm/syscall.h b/arch/hexagon/include/asm/syscall.h
index 4af9c7b6f13a..ae3a1e24fabd 100644
--- a/arch/hexagon/include/asm/syscall.h
+++ b/arch/hexagon/include/asm/syscall.h
@@ -37,10 +37,8 @@ static inline long syscall_get_nr(struct task_struct *task,
 
 static inline void syscall_get_arguments(struct task_struct *task,
 					 struct pt_regs *regs,
-					 unsigned int i, unsigned int n,
 					 unsigned long *args)
 {
-	BUG_ON(i + n > 6);
-	memcpy(args, &(&regs->r00)[i], n * sizeof(args[0]));
+	memcpy(args, &(&regs->r00)[0], 6 * sizeof(args[0]));
 }
 #endif
diff --git a/arch/ia64/include/asm/syscall.h b/arch/ia64/include/asm/syscall.h
index 1d0b875fec44..8204c1ff70ce 100644
--- a/arch/ia64/include/asm/syscall.h
+++ b/arch/ia64/include/asm/syscall.h
@@ -63,12 +63,9 @@ extern void ia64_syscall_get_set_arguments(struct task_struct *task,
 	unsigned long *args, int rw);
 static inline void syscall_get_arguments(struct task_struct *task,
 					 struct pt_regs *regs,
-					 unsigned int i, unsigned int n,
 					 unsigned long *args)
 {
-	BUG_ON(i + n > 6);
-
-	ia64_syscall_get_set_arguments(task, regs, i, n, args, 0);
+	ia64_syscall_get_set_arguments(task, regs, 0, 6, args, 0);
 }
 
 static inline void syscall_set_arguments(struct task_struct *task,
diff --git a/arch/microblaze/include/asm/syscall.h b/arch/microblaze/include/asm/syscall.h
index 220decd605a4..4b23e44e5c3c 100644
--- a/arch/microblaze/include/asm/syscall.h
+++ b/arch/microblaze/include/asm/syscall.h
@@ -82,9 +82,11 @@ static inline void microblaze_set_syscall_arg(struct pt_regs *regs,
 
 static inline void syscall_get_arguments(struct task_struct *task,
 					 struct pt_regs *regs,
-					 unsigned int i, unsigned int n,
 					 unsigned long *args)
 {
+	unsigned int i = 0;
+	unsigned int n = 6;
+
 	while (n--)
 		*args++ = microblaze_get_syscall_arg(regs, i++);
 }
diff --git a/arch/mips/include/asm/syscall.h b/arch/mips/include/asm/syscall.h
index 6cf8ffb5367e..a2b4748655df 100644
--- a/arch/mips/include/asm/syscall.h
+++ b/arch/mips/include/asm/syscall.h
@@ -116,9 +116,10 @@ static inline void syscall_set_return_value(struct task_struct *task,
 
 static inline void syscall_get_arguments(struct task_struct *task,
 					 struct pt_regs *regs,
-					 unsigned int i, unsigned int n,
 					 unsigned long *args)
 {
+	unsigned int i = 0;
+	unsigned int n = 6;
 	int ret;
 
 	/* O32 ABI syscall() */
diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index 0057c910bc2f..3a62f80958e1 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -1419,7 +1419,7 @@ asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall)
 
 		sd.nr = syscall;
 		sd.arch = syscall_get_arch();
-		syscall_get_arguments(current, regs, 0, 6, args);
+		syscall_get_arguments(current, regs, args);
 		for (i = 0; i < 6; i++)
 			sd.args[i] = args[i];
 		sd.instruction_pointer = KSTK_EIP(current);
diff --git a/arch/nds32/include/asm/syscall.h b/arch/nds32/include/asm/syscall.h
index f7e5e86765fe..89a6ec8731d8 100644
--- a/arch/nds32/include/asm/syscall.h
+++ b/arch/nds32/include/asm/syscall.h
@@ -108,42 +108,21 @@ void syscall_set_return_value(struct task_struct *task, struct pt_regs *regs,
  * syscall_get_arguments - extract system call parameter values
  * @task:	task of interest, must be blocked
  * @regs:	task_pt_regs() of @task
- * @i:		argument index [0,5]
- * @n:		number of arguments; n+i must be [1,6].
  * @args:	array filled with argument values
  *
- * Fetches @n arguments to the system call starting with the @i'th argument
- * (from 0 through 5).  Argument @i is stored in @args[0], and so on.
- * An arch inline version is probably optimal when @i and @n are constants.
+ * Fetches 6 arguments to the system call (from 0 through 5). The first
+ * argument is stored in @args[0], and so on.
  *
  * It's only valid to call this when @task is stopped for tracing on
  * entry to a system call, due to %TIF_SYSCALL_TRACE or %TIF_SYSCALL_AUDIT.
- * It's invalid to call this with @i + @n > 6; we only support system calls
- * taking up to 6 arguments.
  */
 #define SYSCALL_MAX_ARGS 6
 void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs,
-			   unsigned int i, unsigned int n, unsigned long *args)
+			   unsigned long *args)
 {
-	if (n == 0)
-		return;
-	if (i + n > SYSCALL_MAX_ARGS) {
-		unsigned long *args_bad = args + SYSCALL_MAX_ARGS - i;
-		unsigned int n_bad = n + i - SYSCALL_MAX_ARGS;
-		pr_warning("%s called with max args %d, handling only %d\n",
-			   __func__, i + n, SYSCALL_MAX_ARGS);
-		memset(args_bad, 0, n_bad * sizeof(args[0]));
-		memset(args_bad, 0, n_bad * sizeof(args[0]));
-	}
-
-	if (i == 0) {
-		args[0] = regs->orig_r0;
-		args++;
-		i++;
-		n--;
-	}
-
-	memcpy(args, &regs->uregs[0] + i, n * sizeof(args[0]));
+	args[0] = regs->orig_r0;
+	args++;
+	memcpy(args, &regs->uregs[0] + 1, 5 * sizeof(args[0]));
 }
 
 /**
diff --git a/arch/nios2/include/asm/syscall.h b/arch/nios2/include/asm/syscall.h
index 9de220854c4a..792bd449d839 100644
--- a/arch/nios2/include/asm/syscall.h
+++ b/arch/nios2/include/asm/syscall.h
@@ -58,42 +58,14 @@ static inline void syscall_set_return_value(struct task_struct *task,
 }
 
 static inline void syscall_get_arguments(struct task_struct *task,
-	struct pt_regs *regs, unsigned int i, unsigned int n,
-	unsigned long *args)
+	struct pt_regs *regs, unsigned long *args)
 {
-	BUG_ON(i + n > 6);
-
-	switch (i) {
-	case 0:
-		if (!n--)
-			break;
-		*args++ = regs->r4;
-	case 1:
-		if (!n--)
-			break;
-		*args++ = regs->r5;
-	case 2:
-		if (!n--)
-			break;
-		*args++ = regs->r6;
-	case 3:
-		if (!n--)
-			break;
-		*args++ = regs->r7;
-	case 4:
-		if (!n--)
-			break;
-		*args++ = regs->r8;
-	case 5:
-		if (!n--)
-			break;
-		*args++ = regs->r9;
-	case 6:
-		if (!n--)
-			break;
-	default:
-		BUG();
-	}
+	*args++ = regs->r4;
+	*args++ = regs->r5;
+	*args++ = regs->r6;
+	*args++ = regs->r7;
+	*args++ = regs->r8;
+	*args   = regs->r9;
 }
 
 static inline void syscall_set_arguments(struct task_struct *task,
diff --git a/arch/openrisc/include/asm/syscall.h b/arch/openrisc/include/asm/syscall.h
index 2db9f1cf0694..72607860cd55 100644
--- a/arch/openrisc/include/asm/syscall.h
+++ b/arch/openrisc/include/asm/syscall.h
@@ -56,11 +56,9 @@ syscall_set_return_value(struct task_struct *task, struct pt_regs *regs,
 
 static inline void
 syscall_get_arguments(struct task_struct *task, struct pt_regs *regs,
-		      unsigned int i, unsigned int n, unsigned long *args)
+		      unsigned long *args)
 {
-	BUG_ON(i + n > 6);
-
-	memcpy(args, &regs->gpr[3 + i], n * sizeof(args[0]));
+	memcpy(args, &regs->gpr[3], 6 * sizeof(args[0]));
 }
 
 static inline void
diff --git a/arch/parisc/include/asm/syscall.h b/arch/parisc/include/asm/syscall.h
index 8bff1a58c97f..62a6d477fae0 100644
--- a/arch/parisc/include/asm/syscall.h
+++ b/arch/parisc/include/asm/syscall.h
@@ -18,29 +18,15 @@ static inline long syscall_get_nr(struct task_struct *tsk,
 }
 
 static inline void syscall_get_arguments(struct task_struct *tsk,
-					 struct pt_regs *regs, unsigned int i,
-					 unsigned int n, unsigned long *args)
+					 struct pt_regs *regs,
+					 unsigned long *args)
 {
-	BUG_ON(i);
-
-	switch (n) {
-	case 6:
-		args[5] = regs->gr[21];
-	case 5:
-		args[4] = regs->gr[22];
-	case 4:
-		args[3] = regs->gr[23];
-	case 3:
-		args[2] = regs->gr[24];
-	case 2:
-		args[1] = regs->gr[25];
-	case 1:
-		args[0] = regs->gr[26];
-	case 0:
-		break;
-	default:
-		BUG();
-	}
+	args[5] = regs->gr[21];
+	args[4] = regs->gr[22];
+	args[3] = regs->gr[23];
+	args[2] = regs->gr[24];
+	args[1] = regs->gr[25];
+	args[0] = regs->gr[26];
 }
 
 static inline long syscall_get_return_value(struct task_struct *task,
diff --git a/arch/powerpc/include/asm/syscall.h b/arch/powerpc/include/asm/syscall.h
index 1a0e7a8b1c81..5c9b9dc82b7e 100644
--- a/arch/powerpc/include/asm/syscall.h
+++ b/arch/powerpc/include/asm/syscall.h
@@ -65,22 +65,20 @@ static inline void syscall_set_return_value(struct task_struct *task,
 
 static inline void syscall_get_arguments(struct task_struct *task,
 					 struct pt_regs *regs,
-					 unsigned int i, unsigned int n,
 					 unsigned long *args)
 {
 	unsigned long val, mask = -1UL;
-
-	BUG_ON(i + n > 6);
+	unsigned int n = 6;
 
 #ifdef CONFIG_COMPAT
 	if (test_tsk_thread_flag(task, TIF_32BIT))
 		mask = 0xffffffff;
 #endif
 	while (n--) {
-		if (n == 0 && i == 0)
+		if (n == 0)
 			val = regs->orig_gpr3;
 		else
-			val = regs->gpr[3 + i + n];
+			val = regs->gpr[3 + n];
 
 		args[n] = val & mask;
 	}
diff --git a/arch/riscv/include/asm/syscall.h b/arch/riscv/include/asm/syscall.h
index 6ea9e1804233..6adca1804be1 100644
--- a/arch/riscv/include/asm/syscall.h
+++ b/arch/riscv/include/asm/syscall.h
@@ -72,18 +72,11 @@ static inline void syscall_set_return_value(struct task_struct *task,
 
 static inline void syscall_get_arguments(struct task_struct *task,
 					 struct pt_regs *regs,
-					 unsigned int i, unsigned int n,
 					 unsigned long *args)
 {
-	BUG_ON(i + n > 6);
-	if (i == 0) {
-		args[0] = regs->orig_a0;
-		args++;
-		n--;
-	} else {
-		i--;
-	}
-	memcpy(args, &regs->a1 + i, n * sizeof(args[0]));
+	args[0] = regs->orig_a0;
+	args++;
+	memcpy(args, &regs->a1, 5 * sizeof(args[0]));
 }
 
 static inline void syscall_set_arguments(struct task_struct *task,
diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h
index 96f9a9151fde..ee0b1f6aa36d 100644
--- a/arch/s390/include/asm/syscall.h
+++ b/arch/s390/include/asm/syscall.h
@@ -56,27 +56,20 @@ static inline void syscall_set_return_value(struct task_struct *task,
 
 static inline void syscall_get_arguments(struct task_struct *task,
 					 struct pt_regs *regs,
-					 unsigned int i, unsigned int n,
 					 unsigned long *args)
 {
 	unsigned long mask = -1UL;
+	unsigned int n = 6;
 
-	/*
-	 * No arguments for this syscall, there's nothing to do.
-	 */
-	if (!n)
-		return;
-
-	BUG_ON(i + n > 6);
 #ifdef CONFIG_COMPAT
 	if (test_tsk_thread_flag(task, TIF_31BIT))
 		mask = 0xffffffff;
 #endif
 	while (n-- > 0)
-		if (i + n > 0)
-			args[n] = regs->gprs[2 + i + n] & mask;
-	if (i == 0)
-		args[0] = regs->orig_gpr2 & mask;
+		if (n > 0)
+			args[n] = regs->gprs[2 + n] & mask;
+
+	args[0] = regs->orig_gpr2 & mask;
 }
 
 static inline void syscall_set_arguments(struct task_struct *task,
diff --git a/arch/sh/include/asm/syscall_32.h b/arch/sh/include/asm/syscall_32.h
index 6e118799831c..2bf1199a0595 100644
--- a/arch/sh/include/asm/syscall_32.h
+++ b/arch/sh/include/asm/syscall_32.h
@@ -48,30 +48,16 @@ static inline void syscall_set_return_value(struct task_struct *task,
 
 static inline void syscall_get_arguments(struct task_struct *task,
 					 struct pt_regs *regs,
-					 unsigned int i, unsigned int n,
 					 unsigned long *args)
 {
-	/*
-	 * Do this simply for now. If we need to start supporting
-	 * fetching arguments from arbitrary indices, this will need some
-	 * extra logic. Presently there are no in-tree users that depend
-	 * on this behaviour.
-	 */
-	BUG_ON(i);
 
 	/* Argument pattern is: R4, R5, R6, R7, R0, R1 */
-	switch (n) {
-	case 6: args[5] = regs->regs[1];
-	case 5: args[4] = regs->regs[0];
-	case 4: args[3] = regs->regs[7];
-	case 3: args[2] = regs->regs[6];
-	case 2: args[1] = regs->regs[5];
-	case 1:	args[0] = regs->regs[4];
-	case 0:
-		break;
-	default:
-		BUG();
-	}
+	args[5] = regs->regs[1];
+	args[4] = regs->regs[0];
+	args[3] = regs->regs[7];
+	args[2] = regs->regs[6];
+	args[1] = regs->regs[5];
+	args[0] = regs->regs[4];
 }
 
 static inline void syscall_set_arguments(struct task_struct *task,
diff --git a/arch/sh/include/asm/syscall_64.h b/arch/sh/include/asm/syscall_64.h
index 43882580c7f9..4e8f6460c703 100644
--- a/arch/sh/include/asm/syscall_64.h
+++ b/arch/sh/include/asm/syscall_64.h
@@ -47,11 +47,9 @@ static inline void syscall_set_return_value(struct task_struct *task,
 
 static inline void syscall_get_arguments(struct task_struct *task,
 					 struct pt_regs *regs,
-					 unsigned int i, unsigned int n,
 					 unsigned long *args)
 {
-	BUG_ON(i + n > 6);
-	memcpy(args, &regs->regs[2 + i], n * sizeof(args[0]));
+	memcpy(args, &regs->regs[2], 6 * sizeof(args[0]));
 }
 
 static inline void syscall_set_arguments(struct task_struct *task,
diff --git a/arch/sparc/include/asm/syscall.h b/arch/sparc/include/asm/syscall.h
index 053989e3f6a6..872dfee852d6 100644
--- a/arch/sparc/include/asm/syscall.h
+++ b/arch/sparc/include/asm/syscall.h
@@ -96,11 +96,11 @@ static inline void syscall_set_return_value(struct task_struct *task,
 
 static inline void syscall_get_arguments(struct task_struct *task,
 					 struct pt_regs *regs,
-					 unsigned int i, unsigned int n,
 					 unsigned long *args)
 {
 	int zero_extend = 0;
 	unsigned int j;
+	unsigned int n = 6;
 
 #ifdef CONFIG_SPARC64
 	if (test_tsk_thread_flag(task, TIF_32BIT))
@@ -108,7 +108,7 @@ static inline void syscall_get_arguments(struct task_struct *task,
 #endif
 
 	for (j = 0; j < n; j++) {
-		unsigned long val = regs->u_regs[UREG_I0 + i + j];
+		unsigned long val = regs->u_regs[UREG_I0 + j];
 
 		if (zero_extend)
 			args[j] = (u32) val;
diff --git a/arch/um/include/asm/syscall-generic.h b/arch/um/include/asm/syscall-generic.h
index 9fb9cf8cd39a..25d00acd1322 100644
--- a/arch/um/include/asm/syscall-generic.h
+++ b/arch/um/include/asm/syscall-generic.h
@@ -53,43 +53,16 @@ static inline void syscall_set_return_value(struct task_struct *task,
 
 static inline void syscall_get_arguments(struct task_struct *task,
 					 struct pt_regs *regs,
-					 unsigned int i, unsigned int n,
 					 unsigned long *args)
 {
 	const struct uml_pt_regs *r = &regs->regs;
 
-	switch (i) {
-	case 0:
-		if (!n--)
-			break;
-		*args++ = UPT_SYSCALL_ARG1(r);
-	case 1:
-		if (!n--)
-			break;
-		*args++ = UPT_SYSCALL_ARG2(r);
-	case 2:
-		if (!n--)
-			break;
-		*args++ = UPT_SYSCALL_ARG3(r);
-	case 3:
-		if (!n--)
-			break;
-		*args++ = UPT_SYSCALL_ARG4(r);
-	case 4:
-		if (!n--)
-			break;
-		*args++ = UPT_SYSCALL_ARG5(r);
-	case 5:
-		if (!n--)
-			break;
-		*args++ = UPT_SYSCALL_ARG6(r);
-	case 6:
-		if (!n--)
-			break;
-	default:
-		BUG();
-		break;
-	}
+	*args++ = UPT_SYSCALL_ARG1(r);
+	*args++ = UPT_SYSCALL_ARG2(r);
+	*args++ = UPT_SYSCALL_ARG3(r);
+	*args++ = UPT_SYSCALL_ARG4(r);
+	*args++ = UPT_SYSCALL_ARG5(r);
+	*args   = UPT_SYSCALL_ARG6(r);
 }
 
 static inline void syscall_set_arguments(struct task_struct *task,
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index d653139857af..8dbb5c379450 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -91,11 +91,9 @@ static inline void syscall_set_return_value(struct task_struct *task,
 
 static inline void syscall_get_arguments(struct task_struct *task,
 					 struct pt_regs *regs,
-					 unsigned int i, unsigned int n,
 					 unsigned long *args)
 {
-	BUG_ON(i + n > 6);
-	memcpy(args, &regs->bx + i, n * sizeof(args[0]));
+	memcpy(args, &regs->bx, 6 * sizeof(args[0]));
 }
 
 static inline void syscall_set_arguments(struct task_struct *task,
@@ -116,63 +114,26 @@ static inline int syscall_get_arch(void)
 
 static inline void syscall_get_arguments(struct task_struct *task,
 					 struct pt_regs *regs,
-					 unsigned int i, unsigned int n,
 					 unsigned long *args)
 {
 # ifdef CONFIG_IA32_EMULATION
-	if (task->thread_info.status & TS_COMPAT)
-		switch (i) {
-		case 0:
-			if (!n--) break;
-			*args++ = regs->bx;
-		case 1:
-			if (!n--) break;
-			*args++ = regs->cx;
-		case 2:
-			if (!n--) break;
-			*args++ = regs->dx;
-		case 3:
-			if (!n--) break;
-			*args++ = regs->si;
-		case 4:
-			if (!n--) break;
-			*args++ = regs->di;
-		case 5:
-			if (!n--) break;
-			*args++ = regs->bp;
-		case 6:
-			if (!n--) break;
-		default:
-			BUG();
-			break;
-		}
-	else
+	if (task->thread_info.status & TS_COMPAT) {
+		*args++ = regs->bx;
+		*args++ = regs->cx;
+		*args++ = regs->dx;
+		*args++ = regs->si;
+		*args++ = regs->di;
+		*args   = regs->bp;
+	} else
 # endif
-		switch (i) {
-		case 0:
-			if (!n--) break;
-			*args++ = regs->di;
-		case 1:
-			if (!n--) break;
-			*args++ = regs->si;
-		case 2:
-			if (!n--) break;
-			*args++ = regs->dx;
-		case 3:
-			if (!n--) break;
-			*args++ = regs->r10;
-		case 4:
-			if (!n--) break;
-			*args++ = regs->r8;
-		case 5:
-			if (!n--) break;
-			*args++ = regs->r9;
-		case 6:
-			if (!n--) break;
-		default:
-			BUG();
-			break;
-		}
+	{
+		*args++ = regs->di;
+		*args++ = regs->si;
+		*args++ = regs->dx;
+		*args++ = regs->r10;
+		*args++ = regs->r8;
+		*args   = regs->r9;
+	}
 }
 
 static inline void syscall_set_arguments(struct task_struct *task,
diff --git a/arch/xtensa/include/asm/syscall.h b/arch/xtensa/include/asm/syscall.h
index a168bf81c7f4..1504ce9a233a 100644
--- a/arch/xtensa/include/asm/syscall.h
+++ b/arch/xtensa/include/asm/syscall.h
@@ -59,23 +59,13 @@ static inline void syscall_set_return_value(struct task_struct *task,
 
 static inline void syscall_get_arguments(struct task_struct *task,
 					 struct pt_regs *regs,
-					 unsigned int i, unsigned int n,
 					 unsigned long *args)
 {
 	static const unsigned int reg[] = XTENSA_SYSCALL_ARGUMENT_REGS;
-	unsigned int j;
-
-	if (n == 0)
-		return;
-
-	WARN_ON_ONCE(i + n > SYSCALL_MAX_ARGS);
+	unsigned int i;
 
-	for (j = 0; j < n; ++j) {
-		if (i + j < SYSCALL_MAX_ARGS)
-			args[j] = regs->areg[reg[i + j]];
-		else
-			args[j] = 0;
-	}
+	for (i = 0; i < 6; ++i)
+		args[i] = regs->areg[reg[i]];
 }
 
 static inline void syscall_set_arguments(struct task_struct *task,
diff --git a/include/asm-generic/syscall.h b/include/asm-generic/syscall.h
index 0c938a4354f6..269e9412ef42 100644
--- a/include/asm-generic/syscall.h
+++ b/include/asm-generic/syscall.h
@@ -105,21 +105,16 @@ void syscall_set_return_value(struct task_struct *task, struct pt_regs *regs,
  * syscall_get_arguments - extract system call parameter values
  * @task:	task of interest, must be blocked
  * @regs:	task_pt_regs() of @task
- * @i:		argument index [0,5]
- * @n:		number of arguments; n+i must be [1,6].
  * @args:	array filled with argument values
  *
- * Fetches @n arguments to the system call starting with the @i'th argument
- * (from 0 through 5).  Argument @i is stored in @args[0], and so on.
- * An arch inline version is probably optimal when @i and @n are constants.
+ * Fetches 6 arguments to the system call.  First argument is stored in
+*  @args[0], and so on.
  *
  * It's only valid to call this when @task is stopped for tracing on
  * entry to a system call, due to %TIF_SYSCALL_TRACE or %TIF_SYSCALL_AUDIT.
- * It's invalid to call this with @i + @n > 6; we only support system calls
- * taking up to 6 arguments.
  */
 void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs,
-			   unsigned int i, unsigned int n, unsigned long *args);
+			   unsigned long *args);
 
 /**
  * syscall_set_arguments - change system call parameter value
diff --git a/include/trace/events/syscalls.h b/include/trace/events/syscalls.h
index 44a3259ed4a5..b6e0cbc2c71f 100644
--- a/include/trace/events/syscalls.h
+++ b/include/trace/events/syscalls.h
@@ -28,7 +28,7 @@ TRACE_EVENT_FN(sys_enter,
 
 	TP_fast_assign(
 		__entry->id	= id;
-		syscall_get_arguments(current, regs, 0, 6, __entry->args);
+		syscall_get_arguments(current, regs, __entry->args);
 	),
 
 	TP_printk("NR %ld (%lx, %lx, %lx, %lx, %lx, %lx)",
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 54a0347ca812..df27e499956a 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -149,7 +149,7 @@ static void populate_seccomp_data(struct seccomp_data *sd)
 
 	sd->nr = syscall_get_nr(task, regs);
 	sd->arch = syscall_get_arch();
-	syscall_get_arguments(task, regs, 0, 6, args);
+	syscall_get_arguments(task, regs, args);
 	sd->args[0] = args[0];
 	sd->args[1] = args[1];
 	sd->args[2] = args[2];
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index e9f5bbbad6d9..fa8fbff736d6 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -348,7 +348,7 @@ static void ftrace_syscall_enter(void *data, struct pt_regs *regs, long id)
 
 	entry = ring_buffer_event_data(event);
 	entry->nr = syscall_nr;
-	syscall_get_arguments(current, regs, 0, 6, args);
+	syscall_get_arguments(current, regs, args);
 	memcpy(entry->args, args, sizeof(unsigned long) * sys_data->nb_args);
 
 	event_trigger_unlock_commit(trace_file, buffer, event, entry,
@@ -616,7 +616,7 @@ static void perf_syscall_enter(void *ignore, struct pt_regs *regs, long id)
 		return;
 
 	rec->nr = syscall_nr;
-	syscall_get_arguments(current, regs, 0, 6, args);
+	syscall_get_arguments(current, regs, args);
 	memcpy(&rec->args, args, sizeof(unsigned long) * sys_data->nb_args);
 
 	if ((valid_prog_array &&
diff --git a/lib/syscall.c b/lib/syscall.c
index e8467e17b9a2..fb328e7ccb08 100644
--- a/lib/syscall.c
+++ b/lib/syscall.c
@@ -27,7 +27,7 @@ static int collect_syscall(struct task_struct *target, struct syscall_info *info
 
 	info->data.nr = syscall_get_nr(target, regs);
 	if (info->data.nr != -1L)
-		syscall_get_arguments(target, regs, 0, 6,
+		syscall_get_arguments(target, regs,
 				      (unsigned long *)&info->data.args[0]);
 
 	put_task_stack(target);
-- 
cgit v1.2.3


From 32d92586629a8b3637a3c9361709818e25f327ad Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (VMware)" <rostedt@goodmis.org>
Date: Wed, 27 Mar 2019 20:07:31 -0400
Subject: syscalls: Remove start and number from syscall_set_arguments() args

After removing the start and count arguments of syscall_get_arguments() it
seems reasonable to remove them from syscall_set_arguments(). Note, as of
today, there are no users of syscall_set_arguments(). But we are told that
there will be soon. But for now, at least make it consistent with
syscall_get_arguments().

Link: http://lkml.kernel.org/r/20190327222014.GA32540@altlinux.org

Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Dominik Brodowski <linux@dominikbrodowski.net>
Cc: Dave Martin <dave.martin@arm.com>
Cc: "Dmitry V. Levin" <ldv@altlinux.org>
Cc: x86@kernel.org
Cc: linux-snps-arc@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-c6x-dev@linux-c6x.org
Cc: uclinux-h8-devel@lists.sourceforge.jp
Cc: linux-hexagon@vger.kernel.org
Cc: linux-ia64@vger.kernel.org
Cc: linux-mips@vger.kernel.org
Cc: nios2-dev@lists.rocketboards.org
Cc: openrisc@lists.librecores.org
Cc: linux-parisc@vger.kernel.org
Cc: linuxppc-dev@lists.ozlabs.org
Cc: linux-riscv@lists.infradead.org
Cc: linux-s390@vger.kernel.org
Cc: linux-sh@vger.kernel.org
Cc: sparclinux@vger.kernel.org
Cc: linux-um@lists.infradead.org
Cc: linux-xtensa@linux-xtensa.org
Cc: linux-arch@vger.kernel.org
Acked-by: Max Filippov <jcmvbkbc@gmail.com> # For xtensa changes
Acked-by: Will Deacon <will.deacon@arm.com> # For the arm64 bits
Reviewed-by: Thomas Gleixner <tglx@linutronix.de> # for x86
Reviewed-by: Dmitry V. Levin <ldv@altlinux.org>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
 arch/arm/include/asm/syscall.h        | 22 ++---------
 arch/arm64/include/asm/syscall.h      | 22 ++---------
 arch/c6x/include/asm/syscall.h        | 38 +++----------------
 arch/csky/include/asm/syscall.h       | 14 ++-----
 arch/ia64/include/asm/syscall.h       | 10 ++---
 arch/ia64/kernel/ptrace.c             |  7 ++--
 arch/microblaze/include/asm/syscall.h |  4 +-
 arch/nds32/include/asm/syscall.h      | 29 +++------------
 arch/nios2/include/asm/syscall.h      | 42 ++++-----------------
 arch/openrisc/include/asm/syscall.h   |  6 +--
 arch/powerpc/include/asm/syscall.h    |  7 +---
 arch/riscv/include/asm/syscall.h      | 13 ++-----
 arch/s390/include/asm/syscall.h       | 11 +++---
 arch/sh/include/asm/syscall_32.h      | 21 +++--------
 arch/sh/include/asm/syscall_64.h      |  4 +-
 arch/sparc/include/asm/syscall.h      |  7 ++--
 arch/um/include/asm/syscall-generic.h | 39 +++-----------------
 arch/x86/include/asm/syscall.h        | 69 ++++++++---------------------------
 arch/xtensa/include/asm/syscall.h     | 17 ++-------
 include/asm-generic/syscall.h         | 10 +----
 20 files changed, 88 insertions(+), 304 deletions(-)

(limited to 'include')

diff --git a/arch/arm/include/asm/syscall.h b/arch/arm/include/asm/syscall.h
index db969a2972ae..080ce70cab12 100644
--- a/arch/arm/include/asm/syscall.h
+++ b/arch/arm/include/asm/syscall.h
@@ -65,26 +65,12 @@ static inline void syscall_get_arguments(struct task_struct *task,
 
 static inline void syscall_set_arguments(struct task_struct *task,
 					 struct pt_regs *regs,
-					 unsigned int i, unsigned int n,
 					 const unsigned long *args)
 {
-	if (n == 0)
-		return;
-
-	if (i + n > SYSCALL_MAX_ARGS) {
-		pr_warn("%s called with max args %d, handling only %d\n",
-			__func__, i + n, SYSCALL_MAX_ARGS);
-		n = SYSCALL_MAX_ARGS - i;
-	}
-
-	if (i == 0) {
-		regs->ARM_ORIG_r0 = args[0];
-		args++;
-		i++;
-		n--;
-	}
-
-	memcpy(&regs->ARM_r0 + i, args, n * sizeof(args[0]));
+	regs->ARM_ORIG_r0 = args[0];
+	args++;
+
+	memcpy(&regs->ARM_r0 + 1, args, 5 * sizeof(args[0]));
 }
 
 static inline int syscall_get_arch(void)
diff --git a/arch/arm64/include/asm/syscall.h b/arch/arm64/include/asm/syscall.h
index 55b2dab21023..a179df3674a1 100644
--- a/arch/arm64/include/asm/syscall.h
+++ b/arch/arm64/include/asm/syscall.h
@@ -75,26 +75,12 @@ static inline void syscall_get_arguments(struct task_struct *task,
 
 static inline void syscall_set_arguments(struct task_struct *task,
 					 struct pt_regs *regs,
-					 unsigned int i, unsigned int n,
 					 const unsigned long *args)
 {
-	if (n == 0)
-		return;
-
-	if (i + n > SYSCALL_MAX_ARGS) {
-		pr_warning("%s called with max args %d, handling only %d\n",
-			   __func__, i + n, SYSCALL_MAX_ARGS);
-		n = SYSCALL_MAX_ARGS - i;
-	}
-
-	if (i == 0) {
-		regs->orig_x0 = args[0];
-		args++;
-		i++;
-		n--;
-	}
-
-	memcpy(&regs->regs[i], args, n * sizeof(args[0]));
+	regs->orig_x0 = args[0];
+	args++;
+
+	memcpy(&regs->regs[1], args, 5 * sizeof(args[0]));
 }
 
 /*
diff --git a/arch/c6x/include/asm/syscall.h b/arch/c6x/include/asm/syscall.h
index 06db3251926b..15ba8599858e 100644
--- a/arch/c6x/include/asm/syscall.h
+++ b/arch/c6x/include/asm/syscall.h
@@ -59,40 +59,14 @@ static inline void syscall_get_arguments(struct task_struct *task,
 
 static inline void syscall_set_arguments(struct task_struct *task,
 					 struct pt_regs *regs,
-					 unsigned int i, unsigned int n,
 					 const unsigned long *args)
 {
-	switch (i) {
-	case 0:
-		if (!n--)
-			break;
-		regs->a4 = *args++;
-	case 1:
-		if (!n--)
-			break;
-		regs->b4 = *args++;
-	case 2:
-		if (!n--)
-			break;
-		regs->a6 = *args++;
-	case 3:
-		if (!n--)
-			break;
-		regs->b6 = *args++;
-	case 4:
-		if (!n--)
-			break;
-		regs->a8 = *args++;
-	case 5:
-		if (!n--)
-			break;
-		regs->a9 = *args++;
-	case 6:
-		if (!n)
-			break;
-	default:
-		BUG();
-	}
+	regs->a4 = *args++;
+	regs->b4 = *args++;
+	regs->a6 = *args++;
+	regs->b6 = *args++;
+	regs->a8 = *args++;
+	regs->a9 = *args;
 }
 
 #endif /* __ASM_C6X_SYSCALLS_H */
diff --git a/arch/csky/include/asm/syscall.h b/arch/csky/include/asm/syscall.h
index c691fe92edc6..bda0a446c63e 100644
--- a/arch/csky/include/asm/syscall.h
+++ b/arch/csky/include/asm/syscall.h
@@ -52,17 +52,11 @@ syscall_get_arguments(struct task_struct *task, struct pt_regs *regs,
 
 static inline void
 syscall_set_arguments(struct task_struct *task, struct pt_regs *regs,
-		      unsigned int i, unsigned int n, const unsigned long *args)
+		      const unsigned long *args)
 {
-	BUG_ON(i + n > 6);
-	if (i == 0) {
-		regs->orig_a0 = args[0];
-		args++;
-		n--;
-	} else {
-		i--;
-	}
-	memcpy(&regs->a1 + i, args, n * sizeof(regs->a1));
+	regs->orig_a0 = args[0];
+	args++;
+	memcpy(&regs->a1, args, 5 * sizeof(regs->a1));
 }
 
 static inline int
diff --git a/arch/ia64/include/asm/syscall.h b/arch/ia64/include/asm/syscall.h
index 8204c1ff70ce..0d9e7fab4a79 100644
--- a/arch/ia64/include/asm/syscall.h
+++ b/arch/ia64/include/asm/syscall.h
@@ -59,23 +59,19 @@ static inline void syscall_set_return_value(struct task_struct *task,
 }
 
 extern void ia64_syscall_get_set_arguments(struct task_struct *task,
-	struct pt_regs *regs, unsigned int i, unsigned int n,
-	unsigned long *args, int rw);
+	struct pt_regs *regs, unsigned long *args, int rw);
 static inline void syscall_get_arguments(struct task_struct *task,
 					 struct pt_regs *regs,
 					 unsigned long *args)
 {
-	ia64_syscall_get_set_arguments(task, regs, 0, 6, args, 0);
+	ia64_syscall_get_set_arguments(task, regs, args, 0);
 }
 
 static inline void syscall_set_arguments(struct task_struct *task,
 					 struct pt_regs *regs,
-					 unsigned int i, unsigned int n,
 					 unsigned long *args)
 {
-	BUG_ON(i + n > 6);
-
-	ia64_syscall_get_set_arguments(task, regs, i, n, args, 1);
+	ia64_syscall_get_set_arguments(task, regs, args, 1);
 }
 
 static inline int syscall_get_arch(void)
diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c
index 6d50ede0ed69..bf9c24d9ce84 100644
--- a/arch/ia64/kernel/ptrace.c
+++ b/arch/ia64/kernel/ptrace.c
@@ -2179,12 +2179,11 @@ static void syscall_get_set_args_cb(struct unw_frame_info *info, void *data)
 }
 
 void ia64_syscall_get_set_arguments(struct task_struct *task,
-	struct pt_regs *regs, unsigned int i, unsigned int n,
-	unsigned long *args, int rw)
+	struct pt_regs *regs, unsigned long *args, int rw)
 {
 	struct syscall_get_set_args data = {
-		.i = i,
-		.n = n,
+		.i = 0,
+		.n = 6,
 		.args = args,
 		.regs = regs,
 		.rw = rw,
diff --git a/arch/microblaze/include/asm/syscall.h b/arch/microblaze/include/asm/syscall.h
index 4b23e44e5c3c..833d3a53dab3 100644
--- a/arch/microblaze/include/asm/syscall.h
+++ b/arch/microblaze/include/asm/syscall.h
@@ -93,9 +93,11 @@ static inline void syscall_get_arguments(struct task_struct *task,
 
 static inline void syscall_set_arguments(struct task_struct *task,
 					 struct pt_regs *regs,
-					 unsigned int i, unsigned int n,
 					 const unsigned long *args)
 {
+	unsigned int i = 0;
+	unsigned int n = 6;
+
 	while (n--)
 		microblaze_set_syscall_arg(regs, i++, *args++);
 }
diff --git a/arch/nds32/include/asm/syscall.h b/arch/nds32/include/asm/syscall.h
index 89a6ec8731d8..671ebd357496 100644
--- a/arch/nds32/include/asm/syscall.h
+++ b/arch/nds32/include/asm/syscall.h
@@ -129,39 +129,20 @@ void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs,
  * syscall_set_arguments - change system call parameter value
  * @task:	task of interest, must be in system call entry tracing
  * @regs:	task_pt_regs() of @task
- * @i:		argument index [0,5]
- * @n:		number of arguments; n+i must be [1,6].
  * @args:	array of argument values to store
  *
- * Changes @n arguments to the system call starting with the @i'th argument.
- * Argument @i gets value @args[0], and so on.
- * An arch inline version is probably optimal when @i and @n are constants.
+ * Changes 6 arguments to the system call. The first argument gets value
+ * @args[0], and so on.
  *
  * It's only valid to call this when @task is stopped for tracing on
  * entry to a system call, due to %TIF_SYSCALL_TRACE or %TIF_SYSCALL_AUDIT.
- * It's invalid to call this with @i + @n > 6; we only support system calls
- * taking up to 6 arguments.
  */
 void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs,
-			   unsigned int i, unsigned int n,
 			   const unsigned long *args)
 {
-	if (n == 0)
-		return;
-
-	if (i + n > SYSCALL_MAX_ARGS) {
-		pr_warn("%s called with max args %d, handling only %d\n",
-			__func__, i + n, SYSCALL_MAX_ARGS);
-		n = SYSCALL_MAX_ARGS - i;
-	}
-
-	if (i == 0) {
-		regs->orig_r0 = args[0];
-		args++;
-		i++;
-		n--;
-	}
+	regs->orig_r0 = args[0];
+	args++;
 
-	memcpy(&regs->uregs[0] + i, args, n * sizeof(args[0]));
+	memcpy(&regs->uregs[0] + 1, args, 5 * sizeof(args[0]));
 }
 #endif /* _ASM_NDS32_SYSCALL_H */
diff --git a/arch/nios2/include/asm/syscall.h b/arch/nios2/include/asm/syscall.h
index 792bd449d839..d7624ed06efb 100644
--- a/arch/nios2/include/asm/syscall.h
+++ b/arch/nios2/include/asm/syscall.h
@@ -69,42 +69,14 @@ static inline void syscall_get_arguments(struct task_struct *task,
 }
 
 static inline void syscall_set_arguments(struct task_struct *task,
-	struct pt_regs *regs, unsigned int i, unsigned int n,
-	const unsigned long *args)
+	struct pt_regs *regs, const unsigned long *args)
 {
-	BUG_ON(i + n > 6);
-
-	switch (i) {
-	case 0:
-		if (!n--)
-			break;
-		regs->r4 = *args++;
-	case 1:
-		if (!n--)
-			break;
-		regs->r5 = *args++;
-	case 2:
-		if (!n--)
-			break;
-		regs->r6 = *args++;
-	case 3:
-		if (!n--)
-			break;
-		regs->r7 = *args++;
-	case 4:
-		if (!n--)
-			break;
-		regs->r8 = *args++;
-	case 5:
-		if (!n--)
-			break;
-		regs->r9 = *args++;
-	case 6:
-		if (!n)
-			break;
-	default:
-		BUG();
-	}
+	regs->r4 = *args++;
+	regs->r5 = *args++;
+	regs->r6 = *args++;
+	regs->r7 = *args++;
+	regs->r8 = *args++;
+	regs->r9 = *args;
 }
 
 #endif
diff --git a/arch/openrisc/include/asm/syscall.h b/arch/openrisc/include/asm/syscall.h
index 72607860cd55..b4ff07c1baed 100644
--- a/arch/openrisc/include/asm/syscall.h
+++ b/arch/openrisc/include/asm/syscall.h
@@ -63,11 +63,9 @@ syscall_get_arguments(struct task_struct *task, struct pt_regs *regs,
 
 static inline void
 syscall_set_arguments(struct task_struct *task, struct pt_regs *regs,
-		      unsigned int i, unsigned int n, const unsigned long *args)
+		      const unsigned long *args)
 {
-	BUG_ON(i + n > 6);
-
-	memcpy(&regs->gpr[3 + i], args, n * sizeof(args[0]));
+	memcpy(&regs->gpr[3], args, 6 * sizeof(args[0]));
 }
 
 static inline int syscall_get_arch(void)
diff --git a/arch/powerpc/include/asm/syscall.h b/arch/powerpc/include/asm/syscall.h
index 5c9b9dc82b7e..1243045bad2d 100644
--- a/arch/powerpc/include/asm/syscall.h
+++ b/arch/powerpc/include/asm/syscall.h
@@ -86,15 +86,12 @@ static inline void syscall_get_arguments(struct task_struct *task,
 
 static inline void syscall_set_arguments(struct task_struct *task,
 					 struct pt_regs *regs,
-					 unsigned int i, unsigned int n,
 					 const unsigned long *args)
 {
-	BUG_ON(i + n > 6);
-	memcpy(&regs->gpr[3 + i], args, n * sizeof(args[0]));
+	memcpy(&regs->gpr[3], args, 6 * sizeof(args[0]));
 
 	/* Also copy the first argument into orig_gpr3 */
-	if (i == 0 && n > 0)
-		regs->orig_gpr3 = args[0];
+	regs->orig_gpr3 = args[0];
 }
 
 static inline int syscall_get_arch(void)
diff --git a/arch/riscv/include/asm/syscall.h b/arch/riscv/include/asm/syscall.h
index 6adca1804be1..a3d5273ded7c 100644
--- a/arch/riscv/include/asm/syscall.h
+++ b/arch/riscv/include/asm/syscall.h
@@ -81,18 +81,11 @@ static inline void syscall_get_arguments(struct task_struct *task,
 
 static inline void syscall_set_arguments(struct task_struct *task,
 					 struct pt_regs *regs,
-					 unsigned int i, unsigned int n,
 					 const unsigned long *args)
 {
-	BUG_ON(i + n > 6);
-        if (i == 0) {
-                regs->orig_a0 = args[0];
-                args++;
-                n--;
-	} else {
-		i--;
-	}
-	memcpy(&regs->a1 + i, args, n * sizeof(regs->a1));
+	regs->orig_a0 = args[0];
+	args++;
+	memcpy(&regs->a1, args, 5 * sizeof(regs->a1));
 }
 
 static inline int syscall_get_arch(void)
diff --git a/arch/s390/include/asm/syscall.h b/arch/s390/include/asm/syscall.h
index ee0b1f6aa36d..59c3e91f2cdb 100644
--- a/arch/s390/include/asm/syscall.h
+++ b/arch/s390/include/asm/syscall.h
@@ -74,15 +74,14 @@ static inline void syscall_get_arguments(struct task_struct *task,
 
 static inline void syscall_set_arguments(struct task_struct *task,
 					 struct pt_regs *regs,
-					 unsigned int i, unsigned int n,
 					 const unsigned long *args)
 {
-	BUG_ON(i + n > 6);
+	unsigned int n = 6;
+
 	while (n-- > 0)
-		if (i + n > 0)
-			regs->gprs[2 + i + n] = args[n];
-	if (i == 0)
-		regs->orig_gpr2 = args[0];
+		if (n > 0)
+			regs->gprs[2 + n] = args[n];
+	regs->orig_gpr2 = args[0];
 }
 
 static inline int syscall_get_arch(void)
diff --git a/arch/sh/include/asm/syscall_32.h b/arch/sh/include/asm/syscall_32.h
index 2bf1199a0595..8c9d7e5e5dcc 100644
--- a/arch/sh/include/asm/syscall_32.h
+++ b/arch/sh/include/asm/syscall_32.h
@@ -62,23 +62,14 @@ static inline void syscall_get_arguments(struct task_struct *task,
 
 static inline void syscall_set_arguments(struct task_struct *task,
 					 struct pt_regs *regs,
-					 unsigned int i, unsigned int n,
 					 const unsigned long *args)
 {
-	/* Same note as above applies */
-	BUG_ON(i);
-
-	switch (n) {
-	case 6: regs->regs[1] = args[5];
-	case 5: regs->regs[0] = args[4];
-	case 4: regs->regs[7] = args[3];
-	case 3: regs->regs[6] = args[2];
-	case 2: regs->regs[5] = args[1];
-	case 1: regs->regs[4] = args[0];
-		break;
-	default:
-		BUG();
-	}
+	regs->regs[1] = args[5];
+	regs->regs[0] = args[4];
+	regs->regs[7] = args[3];
+	regs->regs[6] = args[2];
+	regs->regs[5] = args[1];
+	regs->regs[4] = args[0];
 }
 
 static inline int syscall_get_arch(void)
diff --git a/arch/sh/include/asm/syscall_64.h b/arch/sh/include/asm/syscall_64.h
index 4e8f6460c703..22fad97da066 100644
--- a/arch/sh/include/asm/syscall_64.h
+++ b/arch/sh/include/asm/syscall_64.h
@@ -54,11 +54,9 @@ static inline void syscall_get_arguments(struct task_struct *task,
 
 static inline void syscall_set_arguments(struct task_struct *task,
 					 struct pt_regs *regs,
-					 unsigned int i, unsigned int n,
 					 const unsigned long *args)
 {
-	BUG_ON(i + n > 6);
-	memcpy(&regs->regs[2 + i], args, n * sizeof(args[0]));
+	memcpy(&regs->regs[2], args, 6 * sizeof(args[0]));
 }
 
 static inline int syscall_get_arch(void)
diff --git a/arch/sparc/include/asm/syscall.h b/arch/sparc/include/asm/syscall.h
index 872dfee852d6..4d075434e816 100644
--- a/arch/sparc/include/asm/syscall.h
+++ b/arch/sparc/include/asm/syscall.h
@@ -119,13 +119,12 @@ static inline void syscall_get_arguments(struct task_struct *task,
 
 static inline void syscall_set_arguments(struct task_struct *task,
 					 struct pt_regs *regs,
-					 unsigned int i, unsigned int n,
 					 const unsigned long *args)
 {
-	unsigned int j;
+	unsigned int i;
 
-	for (j = 0; j < n; j++)
-		regs->u_regs[UREG_I0 + i + j] = args[j];
+	for (i = 0; i < 6; i++)
+		regs->u_regs[UREG_I0 + i] = args[i];
 }
 
 static inline int syscall_get_arch(void)
diff --git a/arch/um/include/asm/syscall-generic.h b/arch/um/include/asm/syscall-generic.h
index 25d00acd1322..98e50c50c12e 100644
--- a/arch/um/include/asm/syscall-generic.h
+++ b/arch/um/include/asm/syscall-generic.h
@@ -67,43 +67,16 @@ static inline void syscall_get_arguments(struct task_struct *task,
 
 static inline void syscall_set_arguments(struct task_struct *task,
 					 struct pt_regs *regs,
-					 unsigned int i, unsigned int n,
 					 const unsigned long *args)
 {
 	struct uml_pt_regs *r = &regs->regs;
 
-	switch (i) {
-	case 0:
-		if (!n--)
-			break;
-		UPT_SYSCALL_ARG1(r) = *args++;
-	case 1:
-		if (!n--)
-			break;
-		UPT_SYSCALL_ARG2(r) = *args++;
-	case 2:
-		if (!n--)
-			break;
-		UPT_SYSCALL_ARG3(r) = *args++;
-	case 3:
-		if (!n--)
-			break;
-		UPT_SYSCALL_ARG4(r) = *args++;
-	case 4:
-		if (!n--)
-			break;
-		UPT_SYSCALL_ARG5(r) = *args++;
-	case 5:
-		if (!n--)
-			break;
-		UPT_SYSCALL_ARG6(r) = *args++;
-	case 6:
-		if (!n--)
-			break;
-	default:
-		BUG();
-		break;
-	}
+	UPT_SYSCALL_ARG1(r) = *args++;
+	UPT_SYSCALL_ARG2(r) = *args++;
+	UPT_SYSCALL_ARG3(r) = *args++;
+	UPT_SYSCALL_ARG4(r) = *args++;
+	UPT_SYSCALL_ARG5(r) = *args++;
+	UPT_SYSCALL_ARG6(r) = *args;
 }
 
 /* See arch/x86/um/asm/syscall.h for syscall_get_arch() definition. */
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index 8dbb5c379450..4c305471ec33 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -138,63 +138,26 @@ static inline void syscall_get_arguments(struct task_struct *task,
 
 static inline void syscall_set_arguments(struct task_struct *task,
 					 struct pt_regs *regs,
-					 unsigned int i, unsigned int n,
 					 const unsigned long *args)
 {
 # ifdef CONFIG_IA32_EMULATION
-	if (task->thread_info.status & TS_COMPAT)
-		switch (i) {
-		case 0:
-			if (!n--) break;
-			regs->bx = *args++;
-		case 1:
-			if (!n--) break;
-			regs->cx = *args++;
-		case 2:
-			if (!n--) break;
-			regs->dx = *args++;
-		case 3:
-			if (!n--) break;
-			regs->si = *args++;
-		case 4:
-			if (!n--) break;
-			regs->di = *args++;
-		case 5:
-			if (!n--) break;
-			regs->bp = *args++;
-		case 6:
-			if (!n--) break;
-		default:
-			BUG();
-			break;
-		}
-	else
+	if (task->thread_info.status & TS_COMPAT) {
+		regs->bx = *args++;
+		regs->cx = *args++;
+		regs->dx = *args++;
+		regs->si = *args++;
+		regs->di = *args++;
+		regs->bp = *args;
+	} else
 # endif
-		switch (i) {
-		case 0:
-			if (!n--) break;
-			regs->di = *args++;
-		case 1:
-			if (!n--) break;
-			regs->si = *args++;
-		case 2:
-			if (!n--) break;
-			regs->dx = *args++;
-		case 3:
-			if (!n--) break;
-			regs->r10 = *args++;
-		case 4:
-			if (!n--) break;
-			regs->r8 = *args++;
-		case 5:
-			if (!n--) break;
-			regs->r9 = *args++;
-		case 6:
-			if (!n--) break;
-		default:
-			BUG();
-			break;
-		}
+	{
+		regs->di = *args++;
+		regs->si = *args++;
+		regs->dx = *args++;
+		regs->r10 = *args++;
+		regs->r8 = *args++;
+		regs->r9 = *args;
+	}
 }
 
 static inline int syscall_get_arch(void)
diff --git a/arch/xtensa/include/asm/syscall.h b/arch/xtensa/include/asm/syscall.h
index 1504ce9a233a..91dc06d58060 100644
--- a/arch/xtensa/include/asm/syscall.h
+++ b/arch/xtensa/include/asm/syscall.h
@@ -70,24 +70,13 @@ static inline void syscall_get_arguments(struct task_struct *task,
 
 static inline void syscall_set_arguments(struct task_struct *task,
 					 struct pt_regs *regs,
-					 unsigned int i, unsigned int n,
 					 const unsigned long *args)
 {
 	static const unsigned int reg[] = XTENSA_SYSCALL_ARGUMENT_REGS;
-	unsigned int j;
-
-	if (n == 0)
-		return;
-
-	if (WARN_ON_ONCE(i + n > SYSCALL_MAX_ARGS)) {
-		if (i < SYSCALL_MAX_ARGS)
-			n = SYSCALL_MAX_ARGS - i;
-		else
-			return;
-	}
+	unsigned int i;
 
-	for (j = 0; j < n; ++j)
-		regs->areg[reg[i + j]] = args[j];
+	for (i = 0; i < 6; ++i)
+		regs->areg[reg[i]] = args[i];
 }
 
 asmlinkage long xtensa_rt_sigreturn(struct pt_regs*);
diff --git a/include/asm-generic/syscall.h b/include/asm-generic/syscall.h
index 269e9412ef42..b88239e9efe4 100644
--- a/include/asm-generic/syscall.h
+++ b/include/asm-generic/syscall.h
@@ -120,21 +120,15 @@ void syscall_get_arguments(struct task_struct *task, struct pt_regs *regs,
  * syscall_set_arguments - change system call parameter value
  * @task:	task of interest, must be in system call entry tracing
  * @regs:	task_pt_regs() of @task
- * @i:		argument index [0,5]
- * @n:		number of arguments; n+i must be [1,6].
  * @args:	array of argument values to store
  *
- * Changes @n arguments to the system call starting with the @i'th argument.
- * Argument @i gets value @args[0], and so on.
- * An arch inline version is probably optimal when @i and @n are constants.
+ * Changes 6 arguments to the system call.
+ * The first argument gets value @args[0], and so on.
  *
  * It's only valid to call this when @task is stopped for tracing on
  * entry to a system call, due to %TIF_SYSCALL_TRACE or %TIF_SYSCALL_AUDIT.
- * It's invalid to call this with @i + @n > 6; we only support system calls
- * taking up to 6 arguments.
  */
 void syscall_set_arguments(struct task_struct *task, struct pt_regs *regs,
-			   unsigned int i, unsigned int n,
 			   const unsigned long *args);
 
 /**
-- 
cgit v1.2.3


From 5f074f3e192f10c9fade898b9b3b8812e3d83342 Mon Sep 17 00:00:00 2001
From: Nick Desaulniers <ndesaulniers@google.com>
Date: Fri, 5 Apr 2019 18:38:45 -0700
Subject: lib/string.c: implement a basic bcmp

A recent optimization in Clang (r355672) lowers comparisons of the
return value of memcmp against zero to comparisons of the return value
of bcmp against zero.  This helps some platforms that implement bcmp
more efficiently than memcmp.  glibc simply aliases bcmp to memcmp, but
an optimized implementation is in the works.

This results in linkage failures for all targets with Clang due to the
undefined symbol.  For now, just implement bcmp as a tailcail to memcmp
to unbreak the build.  This routine can be further optimized in the
future.

Other ideas discussed:

 * A weak alias was discussed, but breaks for architectures that define
   their own implementations of memcmp since aliases to declarations are
   not permitted (only definitions). Arch-specific memcmp
   implementations typically declare memcmp in C headers, but implement
   them in assembly.

 * -ffreestanding also is used sporadically throughout the kernel.

 * -fno-builtin-bcmp doesn't work when doing LTO.

Link: https://bugs.llvm.org/show_bug.cgi?id=41035
Link: https://code.woboq.org/userspace/glibc/string/memcmp.c.html#bcmp
Link: https://github.com/llvm/llvm-project/commit/8e16d73346f8091461319a7dfc4ddd18eedcff13
Link: https://github.com/ClangBuiltLinux/linux/issues/416
Link: http://lkml.kernel.org/r/20190313211335.165605-1-ndesaulniers@google.com
Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Reported-by: Nathan Chancellor <natechancellor@gmail.com>
Reported-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
Suggested-by: Arnd Bergmann <arnd@arndb.de>
Suggested-by: James Y Knight <jyknight@google.com>
Suggested-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Suggested-by: Nathan Chancellor <natechancellor@gmail.com>
Suggested-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Acked-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
Reviewed-by: Nathan Chancellor <natechancellor@gmail.com>
Tested-by: Nathan Chancellor <natechancellor@gmail.com>
Reviewed-by: Masahiro Yamada <yamada.masahiro@socionext.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: David Laight <David.Laight@ACULAB.COM>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Dan Williams <dan.j.williams@intel.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/string.h |  3 +++
 lib/string.c           | 20 ++++++++++++++++++++
 2 files changed, 23 insertions(+)

(limited to 'include')

diff --git a/include/linux/string.h b/include/linux/string.h
index 7927b875f80c..6ab0a6fa512e 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -150,6 +150,9 @@ extern void * memscan(void *,int,__kernel_size_t);
 #ifndef __HAVE_ARCH_MEMCMP
 extern int memcmp(const void *,const void *,__kernel_size_t);
 #endif
+#ifndef __HAVE_ARCH_BCMP
+extern int bcmp(const void *,const void *,__kernel_size_t);
+#endif
 #ifndef __HAVE_ARCH_MEMCHR
 extern void * memchr(const void *,int,__kernel_size_t);
 #endif
diff --git a/lib/string.c b/lib/string.c
index 38e4ca08e757..3ab861c1a857 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -866,6 +866,26 @@ __visible int memcmp(const void *cs, const void *ct, size_t count)
 EXPORT_SYMBOL(memcmp);
 #endif
 
+#ifndef __HAVE_ARCH_BCMP
+/**
+ * bcmp - returns 0 if and only if the buffers have identical contents.
+ * @a: pointer to first buffer.
+ * @b: pointer to second buffer.
+ * @len: size of buffers.
+ *
+ * The sign or magnitude of a non-zero return value has no particular
+ * meaning, and architectures may implement their own more efficient bcmp(). So
+ * while this particular implementation is a simple (tail) call to memcmp, do
+ * not rely on anything but whether the return value is zero or non-zero.
+ */
+#undef bcmp
+int bcmp(const void *a, const void *b, size_t len)
+{
+	return memcmp(a, b, len);
+}
+EXPORT_SYMBOL(bcmp);
+#endif
+
 #ifndef __HAVE_ARCH_MEMSCAN
 /**
  * memscan - Find a character in an area of memory.
-- 
cgit v1.2.3


From 6147e136ff5071609b54f18982dea87706288e21 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 5 Apr 2019 18:38:53 -0700
Subject: include/linux/bitrev.h: fix constant bitrev

clang points out with hundreds of warnings that the bitrev macros have a
problem with constant input:

  drivers/hwmon/sht15.c:187:11: error: variable '__x' is uninitialized when used within its own initialization
        [-Werror,-Wuninitialized]
          u8 crc = bitrev8(data->val_status & 0x0F);
                   ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  include/linux/bitrev.h:102:21: note: expanded from macro 'bitrev8'
          __constant_bitrev8(__x) :                       \
          ~~~~~~~~~~~~~~~~~~~^~~~
  include/linux/bitrev.h:67:11: note: expanded from macro '__constant_bitrev8'
          u8 __x = x;                     \
             ~~~   ^

Both the bitrev and the __constant_bitrev macros use an internal
variable named __x, which goes horribly wrong when passing one to the
other.

The obvious fix is to rename one of the variables, so this adds an extra
'_'.

It seems we got away with this because

 - there are only a few drivers using bitrev macros

 - usually there are no constant arguments to those

 - when they are constant, they tend to be either 0 or (unsigned)-1
   (drivers/isdn/i4l/isdnhdlc.o, drivers/iio/amplifiers/ad8366.c) and
   give the correct result by pure chance.

In fact, the only driver that I could find that gets different results
with this is drivers/net/wan/slic_ds26522.c, which in turn is a driver
for fairly rare hardware (adding the maintainer to Cc for testing).

Link: http://lkml.kernel.org/r/20190322140503.123580-1-arnd@arndb.de
Fixes: 556d2f055bf6 ("ARM: 8187/1: add CONFIG_HAVE_ARCH_BITREVERSE to support rbit instruction")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Cc: Zhao Qiang <qiang.zhao@nxp.com>
Cc: Yalin Wang <yalin.wang@sonymobile.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bitrev.h | 46 +++++++++++++++++++++++-----------------------
 1 file changed, 23 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/include/linux/bitrev.h b/include/linux/bitrev.h
index 50fb0dee23e8..d35b8ec1c485 100644
--- a/include/linux/bitrev.h
+++ b/include/linux/bitrev.h
@@ -34,41 +34,41 @@ static inline u32 __bitrev32(u32 x)
 
 #define __constant_bitrev32(x)	\
 ({					\
-	u32 __x = x;			\
-	__x = (__x >> 16) | (__x << 16);	\
-	__x = ((__x & (u32)0xFF00FF00UL) >> 8) | ((__x & (u32)0x00FF00FFUL) << 8);	\
-	__x = ((__x & (u32)0xF0F0F0F0UL) >> 4) | ((__x & (u32)0x0F0F0F0FUL) << 4);	\
-	__x = ((__x & (u32)0xCCCCCCCCUL) >> 2) | ((__x & (u32)0x33333333UL) << 2);	\
-	__x = ((__x & (u32)0xAAAAAAAAUL) >> 1) | ((__x & (u32)0x55555555UL) << 1);	\
-	__x;								\
+	u32 ___x = x;			\
+	___x = (___x >> 16) | (___x << 16);	\
+	___x = ((___x & (u32)0xFF00FF00UL) >> 8) | ((___x & (u32)0x00FF00FFUL) << 8);	\
+	___x = ((___x & (u32)0xF0F0F0F0UL) >> 4) | ((___x & (u32)0x0F0F0F0FUL) << 4);	\
+	___x = ((___x & (u32)0xCCCCCCCCUL) >> 2) | ((___x & (u32)0x33333333UL) << 2);	\
+	___x = ((___x & (u32)0xAAAAAAAAUL) >> 1) | ((___x & (u32)0x55555555UL) << 1);	\
+	___x;								\
 })
 
 #define __constant_bitrev16(x)	\
 ({					\
-	u16 __x = x;			\
-	__x = (__x >> 8) | (__x << 8);	\
-	__x = ((__x & (u16)0xF0F0U) >> 4) | ((__x & (u16)0x0F0FU) << 4);	\
-	__x = ((__x & (u16)0xCCCCU) >> 2) | ((__x & (u16)0x3333U) << 2);	\
-	__x = ((__x & (u16)0xAAAAU) >> 1) | ((__x & (u16)0x5555U) << 1);	\
-	__x;								\
+	u16 ___x = x;			\
+	___x = (___x >> 8) | (___x << 8);	\
+	___x = ((___x & (u16)0xF0F0U) >> 4) | ((___x & (u16)0x0F0FU) << 4);	\
+	___x = ((___x & (u16)0xCCCCU) >> 2) | ((___x & (u16)0x3333U) << 2);	\
+	___x = ((___x & (u16)0xAAAAU) >> 1) | ((___x & (u16)0x5555U) << 1);	\
+	___x;								\
 })
 
 #define __constant_bitrev8x4(x) \
 ({			\
-	u32 __x = x;	\
-	__x = ((__x & (u32)0xF0F0F0F0UL) >> 4) | ((__x & (u32)0x0F0F0F0FUL) << 4);	\
-	__x = ((__x & (u32)0xCCCCCCCCUL) >> 2) | ((__x & (u32)0x33333333UL) << 2);	\
-	__x = ((__x & (u32)0xAAAAAAAAUL) >> 1) | ((__x & (u32)0x55555555UL) << 1);	\
-	__x;								\
+	u32 ___x = x;	\
+	___x = ((___x & (u32)0xF0F0F0F0UL) >> 4) | ((___x & (u32)0x0F0F0F0FUL) << 4);	\
+	___x = ((___x & (u32)0xCCCCCCCCUL) >> 2) | ((___x & (u32)0x33333333UL) << 2);	\
+	___x = ((___x & (u32)0xAAAAAAAAUL) >> 1) | ((___x & (u32)0x55555555UL) << 1);	\
+	___x;								\
 })
 
 #define __constant_bitrev8(x)	\
 ({					\
-	u8 __x = x;			\
-	__x = (__x >> 4) | (__x << 4);	\
-	__x = ((__x & (u8)0xCCU) >> 2) | ((__x & (u8)0x33U) << 2);	\
-	__x = ((__x & (u8)0xAAU) >> 1) | ((__x & (u8)0x55U) << 1);	\
-	__x;								\
+	u8 ___x = x;			\
+	___x = (___x >> 4) | (___x << 4);	\
+	___x = ((___x & (u8)0xCCU) >> 2) | ((___x & (u8)0x33U) << 2);	\
+	___x = ((___x & (u8)0xAAU) >> 1) | ((___x & (u8)0x55U) << 1);	\
+	___x;								\
 })
 
 #define bitrev32(x) \
-- 
cgit v1.2.3


From fcae96ff96538f66e7acd5d4e0f2e7516ff8cbd0 Mon Sep 17 00:00:00 2001
From: Jann Horn <jannh@google.com>
Date: Fri, 5 Apr 2019 18:39:01 -0700
Subject: mm: fix vm_fault_t cast in VM_FAULT_GET_HINDEX()

Symmetrically to VM_FAULT_SET_HINDEX(), we need a force-cast in
VM_FAULT_GET_HINDEX() to tell sparse that this is intentional.

Sparse complains about the current code when building a kernel with
CONFIG_MEMORY_FAILURE:

  arch/x86/mm/fault.c:1058:53: warning: restricted vm_fault_t degrades to integer

Link: http://lkml.kernel.org/r/20190327204117.35215-1-jannh@google.com
Fixes: 3d3539018d2c ("mm: create the new vm_fault_t type")
Signed-off-by: Jann Horn <jannh@google.com>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Souptick Joarder <jrdr.linux@gmail.com>
Cc: Matthew Wilcox <willy@infradead.org>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Rik van Riel <riel@surriel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_types.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 7eade9132f02..4ef4bbe78a1d 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -671,7 +671,7 @@ enum vm_fault_reason {
 
 /* Encode hstate index for a hwpoisoned large page */
 #define VM_FAULT_SET_HINDEX(x) ((__force vm_fault_t)((x) << 16))
-#define VM_FAULT_GET_HINDEX(x) (((x) >> 16) & 0xf)
+#define VM_FAULT_GET_HINDEX(x) (((__force unsigned int)(x) >> 16) & 0xf)
 
 #define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS |	\
 			VM_FAULT_SIGSEGV | VM_FAULT_HWPOISON |	\
-- 
cgit v1.2.3


From 0b3d6e6f2dd0a7b697b1aa8c167265908940624b Mon Sep 17 00:00:00 2001
From: Greg Thelen <gthelen@google.com>
Date: Fri, 5 Apr 2019 18:39:18 -0700
Subject: mm: writeback: use exact memcg dirty counts

Since commit a983b5ebee57 ("mm: memcontrol: fix excessive complexity in
memory.stat reporting") memcg dirty and writeback counters are managed
as:

 1) per-memcg per-cpu values in range of [-32..32]

 2) per-memcg atomic counter

When a per-cpu counter cannot fit in [-32..32] it's flushed to the
atomic.  Stat readers only check the atomic.  Thus readers such as
balance_dirty_pages() may see a nontrivial error margin: 32 pages per
cpu.

Assuming 100 cpus:
   4k x86 page_size:  13 MiB error per memcg
  64k ppc page_size: 200 MiB error per memcg

Considering that dirty+writeback are used together for some decisions the
errors double.

This inaccuracy can lead to undeserved oom kills.  One nasty case is
when all per-cpu counters hold positive values offsetting an atomic
negative value (i.e.  per_cpu[*]=32, atomic=n_cpu*-32).
balance_dirty_pages() only consults the atomic and does not consider
throttling the next n_cpu*32 dirty pages.  If the file_lru is in the
13..200 MiB range then there's absolutely no dirty throttling, which
burdens vmscan with only dirty+writeback pages thus resorting to oom
kill.

It could be argued that tiny containers are not supported, but it's more
subtle.  It's the amount the space available for file lru that matters.
If a container has memory.max-200MiB of non reclaimable memory, then it
will also suffer such oom kills on a 100 cpu machine.

The following test reliably ooms without this patch.  This patch avoids
oom kills.

  $ cat test
  mount -t cgroup2 none /dev/cgroup
  cd /dev/cgroup
  echo +io +memory > cgroup.subtree_control
  mkdir test
  cd test
  echo 10M > memory.max
  (echo $BASHPID > cgroup.procs && exec /memcg-writeback-stress /foo)
  (echo $BASHPID > cgroup.procs && exec dd if=/dev/zero of=/foo bs=2M count=100)

  $ cat memcg-writeback-stress.c
  /*
   * Dirty pages from all but one cpu.
   * Clean pages from the non dirtying cpu.
   * This is to stress per cpu counter imbalance.
   * On a 100 cpu machine:
   * - per memcg per cpu dirty count is 32 pages for each of 99 cpus
   * - per memcg atomic is -99*32 pages
   * - thus the complete dirty limit: sum of all counters 0
   * - balance_dirty_pages() only sees atomic count -99*32 pages, which
   *   it max()s to 0.
   * - So a workload can dirty -99*32 pages before balance_dirty_pages()
   *   cares.
   */
  #define _GNU_SOURCE
  #include <err.h>
  #include <fcntl.h>
  #include <sched.h>
  #include <stdlib.h>
  #include <stdio.h>
  #include <sys/stat.h>
  #include <sys/sysinfo.h>
  #include <sys/types.h>
  #include <unistd.h>

  static char *buf;
  static int bufSize;

  static void set_affinity(int cpu)
  {
  	cpu_set_t affinity;

  	CPU_ZERO(&affinity);
  	CPU_SET(cpu, &affinity);
  	if (sched_setaffinity(0, sizeof(affinity), &affinity))
  		err(1, "sched_setaffinity");
  }

  static void dirty_on(int output_fd, int cpu)
  {
  	int i, wrote;

  	set_affinity(cpu);
  	for (i = 0; i < 32; i++) {
  		for (wrote = 0; wrote < bufSize; ) {
  			int ret = write(output_fd, buf+wrote, bufSize-wrote);
  			if (ret == -1)
  				err(1, "write");
  			wrote += ret;
  		}
  	}
  }

  int main(int argc, char **argv)
  {
  	int cpu, flush_cpu = 1, output_fd;
  	const char *output;

  	if (argc != 2)
  		errx(1, "usage: output_file");

  	output = argv[1];
  	bufSize = getpagesize();
  	buf = malloc(getpagesize());
  	if (buf == NULL)
  		errx(1, "malloc failed");

  	output_fd = open(output, O_CREAT|O_RDWR);
  	if (output_fd == -1)
  		err(1, "open(%s)", output);

  	for (cpu = 0; cpu < get_nprocs(); cpu++) {
  		if (cpu != flush_cpu)
  			dirty_on(output_fd, cpu);
  	}

  	set_affinity(flush_cpu);
  	if (fsync(output_fd))
  		err(1, "fsync(%s)", output);
  	if (close(output_fd))
  		err(1, "close(%s)", output);
  	free(buf);
  }

Make balance_dirty_pages() and wb_over_bg_thresh() work harder to
collect exact per memcg counters.  This avoids the aforementioned oom
kills.

This does not affect the overhead of memory.stat, which still reads the
single atomic counter.

Why not use percpu_counter? memcg already handles cpus going offline, so
no need for that overhead from percpu_counter.  And the percpu_counter
spinlocks are more heavyweight than is required.

It probably also makes sense to use exact dirty and writeback counters
in memcg oom reports.  But that is saved for later.

Link: http://lkml.kernel.org/r/20190329174609.164344-1-gthelen@google.com
Signed-off-by: Greg Thelen <gthelen@google.com>
Reviewed-by: Roman Gushchin <guro@fb.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@kernel.org>
Cc: Vladimir Davydov <vdavydov.dev@gmail.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: <stable@vger.kernel.org>	[4.16+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  5 ++++-
 mm/memcontrol.c            | 20 ++++++++++++++++++--
 2 files changed, 22 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 1f3d880b7ca1..dbb6118370c1 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -566,7 +566,10 @@ struct mem_cgroup *lock_page_memcg(struct page *page);
 void __unlock_page_memcg(struct mem_cgroup *memcg);
 void unlock_page_memcg(struct page *page);
 
-/* idx can be of type enum memcg_stat_item or node_stat_item */
+/*
+ * idx can be of type enum memcg_stat_item or node_stat_item.
+ * Keep in sync with memcg_exact_page_state().
+ */
 static inline unsigned long memcg_page_state(struct mem_cgroup *memcg,
 					     int idx)
 {
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 532e0e2a4817..81a0d3914ec9 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3882,6 +3882,22 @@ struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb)
 	return &memcg->cgwb_domain;
 }
 
+/*
+ * idx can be of type enum memcg_stat_item or node_stat_item.
+ * Keep in sync with memcg_exact_page().
+ */
+static unsigned long memcg_exact_page_state(struct mem_cgroup *memcg, int idx)
+{
+	long x = atomic_long_read(&memcg->stat[idx]);
+	int cpu;
+
+	for_each_online_cpu(cpu)
+		x += per_cpu_ptr(memcg->stat_cpu, cpu)->count[idx];
+	if (x < 0)
+		x = 0;
+	return x;
+}
+
 /**
  * mem_cgroup_wb_stats - retrieve writeback related stats from its memcg
  * @wb: bdi_writeback in question
@@ -3907,10 +3923,10 @@ void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pfilepages,
 	struct mem_cgroup *memcg = mem_cgroup_from_css(wb->memcg_css);
 	struct mem_cgroup *parent;
 
-	*pdirty = memcg_page_state(memcg, NR_FILE_DIRTY);
+	*pdirty = memcg_exact_page_state(memcg, NR_FILE_DIRTY);
 
 	/* this should eventually include NR_UNSTABLE_NFS */
-	*pwriteback = memcg_page_state(memcg, NR_WRITEBACK);
+	*pwriteback = memcg_exact_page_state(memcg, NR_WRITEBACK);
 	*pfilepages = mem_cgroup_nr_lru_pages(memcg, (1 << LRU_INACTIVE_FILE) |
 						     (1 << LRU_ACTIVE_FILE));
 	*pheadroom = PAGE_COUNTER_MAX;
-- 
cgit v1.2.3


From 10dce8af34226d90fa56746a934f8da5dcdba3df Mon Sep 17 00:00:00 2001
From: Kirill Smelkov <kirr@nexedi.com>
Date: Tue, 26 Mar 2019 22:20:43 +0000
Subject: fs: stream_open - opener for stream-like files so that read and write
 can run simultaneously without deadlock

Commit 9c225f2655e3 ("vfs: atomic f_pos accesses as per POSIX") added
locking for file.f_pos access and in particular made concurrent read and
write not possible - now both those functions take f_pos lock for the
whole run, and so if e.g. a read is blocked waiting for data, write will
deadlock waiting for that read to complete.

This caused regression for stream-like files where previously read and
write could run simultaneously, but after that patch could not do so
anymore. See e.g. commit 581d21a2d02a ("xenbus: fix deadlock on writes
to /proc/xen/xenbus") which fixes such regression for particular case of
/proc/xen/xenbus.

The patch that added f_pos lock in 2014 did so to guarantee POSIX thread
safety for read/write/lseek and added the locking to file descriptors of
all regular files. In 2014 that thread-safety problem was not new as it
was already discussed earlier in 2006.

However even though 2006'th version of Linus's patch was adding f_pos
locking "only for files that are marked seekable with FMODE_LSEEK (thus
avoiding the stream-like objects like pipes and sockets)", the 2014
version - the one that actually made it into the tree as 9c225f2655e3 -
is doing so irregardless of whether a file is seekable or not.

See

    https://lore.kernel.org/lkml/53022DB1.4070805@gmail.com/
    https://lwn.net/Articles/180387
    https://lwn.net/Articles/180396

for historic context.

The reason that it did so is, probably, that there are many files that
are marked non-seekable, but e.g. their read implementation actually
depends on knowing current position to correctly handle the read. Some
examples:

	kernel/power/user.c		snapshot_read
	fs/debugfs/file.c		u32_array_read
	fs/fuse/control.c		fuse_conn_waiting_read + ...
	drivers/hwmon/asus_atk0110.c	atk_debugfs_ggrp_read
	arch/s390/hypfs/inode.c		hypfs_read_iter
	...

Despite that, many nonseekable_open users implement read and write with
pure stream semantics - they don't depend on passed ppos at all. And for
those cases where read could wait for something inside, it creates a
situation similar to xenbus - the write could be never made to go until
read is done, and read is waiting for some, potentially external, event,
for potentially unbounded time -> deadlock.

Besides xenbus, there are 14 such places in the kernel that I've found
with semantic patch (see below):

	drivers/xen/evtchn.c:667:8-24: ERROR: evtchn_fops: .read() can deadlock .write()
	drivers/isdn/capi/capi.c:963:8-24: ERROR: capi_fops: .read() can deadlock .write()
	drivers/input/evdev.c:527:1-17: ERROR: evdev_fops: .read() can deadlock .write()
	drivers/char/pcmcia/cm4000_cs.c:1685:7-23: ERROR: cm4000_fops: .read() can deadlock .write()
	net/rfkill/core.c:1146:8-24: ERROR: rfkill_fops: .read() can deadlock .write()
	drivers/s390/char/fs3270.c:488:1-17: ERROR: fs3270_fops: .read() can deadlock .write()
	drivers/usb/misc/ldusb.c:310:1-17: ERROR: ld_usb_fops: .read() can deadlock .write()
	drivers/hid/uhid.c:635:1-17: ERROR: uhid_fops: .read() can deadlock .write()
	net/batman-adv/icmp_socket.c:80:1-17: ERROR: batadv_fops: .read() can deadlock .write()
	drivers/media/rc/lirc_dev.c:198:1-17: ERROR: lirc_fops: .read() can deadlock .write()
	drivers/leds/uleds.c:77:1-17: ERROR: uleds_fops: .read() can deadlock .write()
	drivers/input/misc/uinput.c:400:1-17: ERROR: uinput_fops: .read() can deadlock .write()
	drivers/infiniband/core/user_mad.c:985:7-23: ERROR: umad_fops: .read() can deadlock .write()
	drivers/gnss/core.c:45:1-17: ERROR: gnss_fops: .read() can deadlock .write()

In addition to the cases above another regression caused by f_pos
locking is that now FUSE filesystems that implement open with
FOPEN_NONSEEKABLE flag, can no longer implement bidirectional
stream-like files - for the same reason as above e.g. read can deadlock
write locking on file.f_pos in the kernel.

FUSE's FOPEN_NONSEEKABLE was added in 2008 in a7c1b990f715 ("fuse:
implement nonseekable open") to support OSSPD. OSSPD implements /dev/dsp
in userspace with FOPEN_NONSEEKABLE flag, with corresponding read and
write routines not depending on current position at all, and with both
read and write being potentially blocking operations:

See

    https://github.com/libfuse/osspd
    https://lwn.net/Articles/308445

    https://github.com/libfuse/osspd/blob/14a9cff0/osspd.c#L1406
    https://github.com/libfuse/osspd/blob/14a9cff0/osspd.c#L1438-L1477
    https://github.com/libfuse/osspd/blob/14a9cff0/osspd.c#L1479-L1510

Corresponding libfuse example/test also describes FOPEN_NONSEEKABLE as
"somewhat pipe-like files ..." with read handler not using offset.
However that test implements only read without write and cannot exercise
the deadlock scenario:

    https://github.com/libfuse/libfuse/blob/fuse-3.4.2-3-ga1bff7d/example/poll.c#L124-L131
    https://github.com/libfuse/libfuse/blob/fuse-3.4.2-3-ga1bff7d/example/poll.c#L146-L163
    https://github.com/libfuse/libfuse/blob/fuse-3.4.2-3-ga1bff7d/example/poll.c#L209-L216

I've actually hit the read vs write deadlock for real while implementing
my FUSE filesystem where there is /head/watch file, for which open
creates separate bidirectional socket-like stream in between filesystem
and its user with both read and write being later performed
simultaneously. And there it is semantically not easy to split the
stream into two separate read-only and write-only channels:

    https://lab.nexedi.com/kirr/wendelin.core/blob/f13aa600/wcfs/wcfs.go#L88-169

Let's fix this regression. The plan is:

1. We can't change nonseekable_open to include &~FMODE_ATOMIC_POS -
   doing so would break many in-kernel nonseekable_open users which
   actually use ppos in read/write handlers.

2. Add stream_open() to kernel to open stream-like non-seekable file
   descriptors. Read and write on such file descriptors would never use
   nor change ppos. And with that property on stream-like files read and
   write will be running without taking f_pos lock - i.e. read and write
   could be running simultaneously.

3. With semantic patch search and convert to stream_open all in-kernel
   nonseekable_open users for which read and write actually do not
   depend on ppos and where there is no other methods in file_operations
   which assume @offset access.

4. Add FOPEN_STREAM to fs/fuse/ and open in-kernel file-descriptors via
   steam_open if that bit is present in filesystem open reply.

   It was tempting to change fs/fuse/ open handler to use stream_open
   instead of nonseekable_open on just FOPEN_NONSEEKABLE flags, but
   grepping through Debian codesearch shows users of FOPEN_NONSEEKABLE,
   and in particular GVFS which actually uses offset in its read and
   write handlers

	https://codesearch.debian.net/search?q=-%3Enonseekable+%3D
	https://gitlab.gnome.org/GNOME/gvfs/blob/1.40.0-6-gcbc54396/client/gvfsfusedaemon.c#L1080
	https://gitlab.gnome.org/GNOME/gvfs/blob/1.40.0-6-gcbc54396/client/gvfsfusedaemon.c#L1247-1346
	https://gitlab.gnome.org/GNOME/gvfs/blob/1.40.0-6-gcbc54396/client/gvfsfusedaemon.c#L1399-1481

   so if we would do such a change it will break a real user.

5. Add stream_open and FOPEN_STREAM handling to stable kernels starting
   from v3.14+ (the kernel where 9c225f2655 first appeared).

   This will allow to patch OSSPD and other FUSE filesystems that
   provide stream-like files to return FOPEN_STREAM | FOPEN_NONSEEKABLE
   in their open handler and this way avoid the deadlock on all kernel
   versions. This should work because fs/fuse/ ignores unknown open
   flags returned from a filesystem and so passing FOPEN_STREAM to a
   kernel that is not aware of this flag cannot hurt. In turn the kernel
   that is not aware of FOPEN_STREAM will be < v3.14 where just
   FOPEN_NONSEEKABLE is sufficient to implement streams without read vs
   write deadlock.

This patch adds stream_open, converts /proc/xen/xenbus to it and adds
semantic patch to automatically locate in-kernel places that are either
required to be converted due to read vs write deadlock, or that are just
safe to be converted because read and write do not use ppos and there
are no other funky methods in file_operations.

Regarding semantic patch I've verified each generated change manually -
that it is correct to convert - and each other nonseekable_open instance
left - that it is either not correct to convert there, or that it is not
converted due to current stream_open.cocci limitations.

The script also does not convert files that should be valid to convert,
but that currently have .llseek = noop_llseek or generic_file_llseek for
unknown reason despite file being opened with nonseekable_open (e.g.
drivers/input/mousedev.c)

Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Yongzhi Pan <panyongzhi@gmail.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: David Vrabel <david.vrabel@citrix.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Miklos Szeredi <miklos@szeredi.hu>
Cc: Tejun Heo <tj@kernel.org>
Cc: Kirill Tkhai <ktkhai@virtuozzo.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Julia Lawall <Julia.Lawall@lip6.fr>
Cc: Nikolaus Rath <Nikolaus@rath.org>
Cc: Han-Wen Nienhuys <hanwen@google.com>
Signed-off-by: Kirill Smelkov <kirr@nexedi.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/xen/xenbus/xenbus_dev_frontend.c |   4 +-
 fs/open.c                                |  18 ++
 fs/read_write.c                          |   5 +-
 include/linux/fs.h                       |   4 +
 scripts/coccinelle/api/stream_open.cocci | 363 +++++++++++++++++++++++++++++++
 5 files changed, 389 insertions(+), 5 deletions(-)
 create mode 100644 scripts/coccinelle/api/stream_open.cocci

(limited to 'include')

diff --git a/drivers/xen/xenbus/xenbus_dev_frontend.c b/drivers/xen/xenbus/xenbus_dev_frontend.c
index c3e201025ef0..0782ff3c2273 100644
--- a/drivers/xen/xenbus/xenbus_dev_frontend.c
+++ b/drivers/xen/xenbus/xenbus_dev_frontend.c
@@ -622,9 +622,7 @@ static int xenbus_file_open(struct inode *inode, struct file *filp)
 	if (xen_store_evtchn == 0)
 		return -ENOENT;
 
-	nonseekable_open(inode, filp);
-
-	filp->f_mode &= ~FMODE_ATOMIC_POS; /* cdev-style semantics */
+	stream_open(inode, filp);
 
 	u = kzalloc(sizeof(*u), GFP_KERNEL);
 	if (u == NULL)
diff --git a/fs/open.c b/fs/open.c
index f1c2f855fd43..a00350018a47 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -1215,3 +1215,21 @@ int nonseekable_open(struct inode *inode, struct file *filp)
 }
 
 EXPORT_SYMBOL(nonseekable_open);
+
+/*
+ * stream_open is used by subsystems that want stream-like file descriptors.
+ * Such file descriptors are not seekable and don't have notion of position
+ * (file.f_pos is always 0). Contrary to file descriptors of other regular
+ * files, .read() and .write() can run simultaneously.
+ *
+ * stream_open never fails and is marked to return int so that it could be
+ * directly used as file_operations.open .
+ */
+int stream_open(struct inode *inode, struct file *filp)
+{
+	filp->f_mode &= ~(FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE | FMODE_ATOMIC_POS);
+	filp->f_mode |= FMODE_STREAM;
+	return 0;
+}
+
+EXPORT_SYMBOL(stream_open);
diff --git a/fs/read_write.c b/fs/read_write.c
index 177ccc3d405a..61b43ad7608e 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -560,12 +560,13 @@ ssize_t vfs_write(struct file *file, const char __user *buf, size_t count, loff_
 
 static inline loff_t file_pos_read(struct file *file)
 {
-	return file->f_pos;
+	return file->f_mode & FMODE_STREAM ? 0 : file->f_pos;
 }
 
 static inline void file_pos_write(struct file *file, loff_t pos)
 {
-	file->f_pos = pos;
+	if ((file->f_mode & FMODE_STREAM) == 0)
+		file->f_pos = pos;
 }
 
 ssize_t ksys_read(unsigned int fd, char __user *buf, size_t count)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 8b42df09b04c..dd28e7679089 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -158,6 +158,9 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
 #define FMODE_OPENED		((__force fmode_t)0x80000)
 #define FMODE_CREATED		((__force fmode_t)0x100000)
 
+/* File is stream-like */
+#define FMODE_STREAM		((__force fmode_t)0x200000)
+
 /* File was opened by fanotify and shouldn't generate fanotify events */
 #define FMODE_NONOTIFY		((__force fmode_t)0x4000000)
 
@@ -3074,6 +3077,7 @@ extern loff_t no_seek_end_llseek_size(struct file *, loff_t, int, loff_t);
 extern loff_t no_seek_end_llseek(struct file *, loff_t, int);
 extern int generic_file_open(struct inode * inode, struct file * filp);
 extern int nonseekable_open(struct inode * inode, struct file * filp);
+extern int stream_open(struct inode * inode, struct file * filp);
 
 #ifdef CONFIG_BLOCK
 typedef void (dio_submit_t)(struct bio *bio, struct inode *inode,
diff --git a/scripts/coccinelle/api/stream_open.cocci b/scripts/coccinelle/api/stream_open.cocci
new file mode 100644
index 000000000000..350145da7669
--- /dev/null
+++ b/scripts/coccinelle/api/stream_open.cocci
@@ -0,0 +1,363 @@
+// SPDX-License-Identifier: GPL-2.0
+// Author: Kirill Smelkov (kirr@nexedi.com)
+//
+// Search for stream-like files that are using nonseekable_open and convert
+// them to stream_open. A stream-like file is a file that does not use ppos in
+// its read and write. Rationale for the conversion is to avoid deadlock in
+// between read and write.
+
+virtual report
+virtual patch
+virtual explain  // explain decisions in the patch (SPFLAGS="-D explain")
+
+// stream-like reader & writer - ones that do not depend on f_pos.
+@ stream_reader @
+identifier readstream, ppos;
+identifier f, buf, len;
+type loff_t;
+@@
+  ssize_t readstream(struct file *f, char *buf, size_t len, loff_t *ppos)
+  {
+    ... when != ppos
+  }
+
+@ stream_writer @
+identifier writestream, ppos;
+identifier f, buf, len;
+type loff_t;
+@@
+  ssize_t writestream(struct file *f, const char *buf, size_t len, loff_t *ppos)
+  {
+    ... when != ppos
+  }
+
+
+// a function that blocks
+@ blocks @
+identifier block_f;
+identifier wait_event =~ "^wait_event_.*";
+@@
+  block_f(...) {
+    ... when exists
+    wait_event(...)
+    ... when exists
+  }
+
+// stream_reader that can block inside.
+//
+// XXX wait_* can be called not directly from current function (e.g. func -> f -> g -> wait())
+// XXX currently reader_blocks supports only direct and 1-level indirect cases.
+@ reader_blocks_direct @
+identifier stream_reader.readstream;
+identifier wait_event =~ "^wait_event_.*";
+@@
+  readstream(...)
+  {
+    ... when exists
+    wait_event(...)
+    ... when exists
+  }
+
+@ reader_blocks_1 @
+identifier stream_reader.readstream;
+identifier blocks.block_f;
+@@
+  readstream(...)
+  {
+    ... when exists
+    block_f(...)
+    ... when exists
+  }
+
+@ reader_blocks depends on reader_blocks_direct || reader_blocks_1 @
+identifier stream_reader.readstream;
+@@
+  readstream(...) {
+    ...
+  }
+
+
+// file_operations + whether they have _any_ .read, .write, .llseek ... at all.
+//
+// XXX add support for file_operations xxx[N] = ...	(sound/core/pcm_native.c)
+@ fops0 @
+identifier fops;
+@@
+  struct file_operations fops = {
+    ...
+  };
+
+@ has_read @
+identifier fops0.fops;
+identifier read_f;
+@@
+  struct file_operations fops = {
+    .read = read_f,
+  };
+
+@ has_read_iter @
+identifier fops0.fops;
+identifier read_iter_f;
+@@
+  struct file_operations fops = {
+    .read_iter = read_iter_f,
+  };
+
+@ has_write @
+identifier fops0.fops;
+identifier write_f;
+@@
+  struct file_operations fops = {
+    .write = write_f,
+  };
+
+@ has_write_iter @
+identifier fops0.fops;
+identifier write_iter_f;
+@@
+  struct file_operations fops = {
+    .write_iter = write_iter_f,
+  };
+
+@ has_llseek @
+identifier fops0.fops;
+identifier llseek_f;
+@@
+  struct file_operations fops = {
+    .llseek = llseek_f,
+  };
+
+@ has_no_llseek @
+identifier fops0.fops;
+@@
+  struct file_operations fops = {
+    .llseek = no_llseek,
+  };
+
+@ has_mmap @
+identifier fops0.fops;
+identifier mmap_f;
+@@
+  struct file_operations fops = {
+    .mmap = mmap_f,
+  };
+
+@ has_copy_file_range @
+identifier fops0.fops;
+identifier copy_file_range_f;
+@@
+  struct file_operations fops = {
+    .copy_file_range = copy_file_range_f,
+  };
+
+@ has_remap_file_range @
+identifier fops0.fops;
+identifier remap_file_range_f;
+@@
+  struct file_operations fops = {
+    .remap_file_range = remap_file_range_f,
+  };
+
+@ has_splice_read @
+identifier fops0.fops;
+identifier splice_read_f;
+@@
+  struct file_operations fops = {
+    .splice_read = splice_read_f,
+  };
+
+@ has_splice_write @
+identifier fops0.fops;
+identifier splice_write_f;
+@@
+  struct file_operations fops = {
+    .splice_write = splice_write_f,
+  };
+
+
+// file_operations that is candidate for stream_open conversion - it does not
+// use mmap and other methods that assume @offset access to file.
+//
+// XXX for simplicity require no .{read/write}_iter and no .splice_{read/write} for now.
+// XXX maybe_steam.fops cannot be used in other rules - it gives "bad rule maybe_stream or bad variable fops".
+@ maybe_stream depends on (!has_llseek || has_no_llseek) && !has_mmap && !has_copy_file_range && !has_remap_file_range && !has_read_iter && !has_write_iter && !has_splice_read && !has_splice_write @
+identifier fops0.fops;
+@@
+  struct file_operations fops = {
+  };
+
+
+// ---- conversions ----
+
+// XXX .open = nonseekable_open -> .open = stream_open
+// XXX .open = func -> openfunc -> nonseekable_open
+
+// read & write
+//
+// if both are used in the same file_operations together with an opener -
+// under that conditions we can use stream_open instead of nonseekable_open.
+@ fops_rw depends on maybe_stream @
+identifier fops0.fops, openfunc;
+identifier stream_reader.readstream;
+identifier stream_writer.writestream;
+@@
+  struct file_operations fops = {
+      .open  = openfunc,
+      .read  = readstream,
+      .write = writestream,
+  };
+
+@ report_rw depends on report @
+identifier fops_rw.openfunc;
+position p1;
+@@
+  openfunc(...) {
+    <...
+     nonseekable_open@p1
+    ...>
+  }
+
+@ script:python depends on report && reader_blocks @
+fops << fops0.fops;
+p << report_rw.p1;
+@@
+coccilib.report.print_report(p[0],
+  "ERROR: %s: .read() can deadlock .write(); change nonseekable_open -> stream_open to fix." % (fops,))
+
+@ script:python depends on report && !reader_blocks @
+fops << fops0.fops;
+p << report_rw.p1;
+@@
+coccilib.report.print_report(p[0],
+  "WARNING: %s: .read() and .write() have stream semantic; safe to change nonseekable_open -> stream_open." % (fops,))
+
+
+@ explain_rw_deadlocked depends on explain && reader_blocks @
+identifier fops_rw.openfunc;
+@@
+  openfunc(...) {
+    <...
+-    nonseekable_open
++    nonseekable_open /* read & write (was deadlock) */
+    ...>
+  }
+
+
+@ explain_rw_nodeadlock depends on explain && !reader_blocks @
+identifier fops_rw.openfunc;
+@@
+  openfunc(...) {
+    <...
+-    nonseekable_open
++    nonseekable_open /* read & write (no direct deadlock) */
+    ...>
+  }
+
+@ patch_rw depends on patch @
+identifier fops_rw.openfunc;
+@@
+  openfunc(...) {
+    <...
+-   nonseekable_open
++   stream_open
+    ...>
+  }
+
+
+// read, but not write
+@ fops_r depends on maybe_stream && !has_write @
+identifier fops0.fops, openfunc;
+identifier stream_reader.readstream;
+@@
+  struct file_operations fops = {
+      .open  = openfunc,
+      .read  = readstream,
+  };
+
+@ report_r depends on report @
+identifier fops_r.openfunc;
+position p1;
+@@
+  openfunc(...) {
+    <...
+    nonseekable_open@p1
+    ...>
+  }
+
+@ script:python depends on report @
+fops << fops0.fops;
+p << report_r.p1;
+@@
+coccilib.report.print_report(p[0],
+  "WARNING: %s: .read() has stream semantic; safe to change nonseekable_open -> stream_open." % (fops,))
+
+@ explain_r depends on explain @
+identifier fops_r.openfunc;
+@@
+  openfunc(...) {
+    <...
+-   nonseekable_open
++   nonseekable_open /* read only */
+    ...>
+  }
+
+@ patch_r depends on patch @
+identifier fops_r.openfunc;
+@@
+  openfunc(...) {
+    <...
+-   nonseekable_open
++   stream_open
+    ...>
+  }
+
+
+// write, but not read
+@ fops_w depends on maybe_stream && !has_read @
+identifier fops0.fops, openfunc;
+identifier stream_writer.writestream;
+@@
+  struct file_operations fops = {
+      .open  = openfunc,
+      .write = writestream,
+  };
+
+@ report_w depends on report @
+identifier fops_w.openfunc;
+position p1;
+@@
+  openfunc(...) {
+    <...
+    nonseekable_open@p1
+    ...>
+  }
+
+@ script:python depends on report @
+fops << fops0.fops;
+p << report_w.p1;
+@@
+coccilib.report.print_report(p[0],
+  "WARNING: %s: .write() has stream semantic; safe to change nonseekable_open -> stream_open." % (fops,))
+
+@ explain_w depends on explain @
+identifier fops_w.openfunc;
+@@
+  openfunc(...) {
+    <...
+-   nonseekable_open
++   nonseekable_open /* write only */
+    ...>
+  }
+
+@ patch_w depends on patch @
+identifier fops_w.openfunc;
+@@
+  openfunc(...) {
+    <...
+-   nonseekable_open
++   stream_open
+    ...>
+  }
+
+
+// no read, no write - don't change anything
-- 
cgit v1.2.3


From 6491d698396fd5da4941980a35ca7c162a672016 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 3 Apr 2019 10:13:51 +0300
Subject: nfc: nci: Potential off by one in ->pipes[] array

This is similar to commit e285d5bfb7e9 ("NFC: Fix the number of pipes")
where we changed NFC_HCI_MAX_PIPES from 127 to 128.

As the comment next to the define explains, the pipe identifier is 7
bits long.  The highest possible pipe is 127, but the number of possible
pipes is 128.  As the code is now, then there is potential for an
out of bounds array access:

    net/nfc/nci/hci.c:297 nci_hci_cmd_received() warn: array off by one?
    'ndev->hci_dev->pipes[pipe]' '0-127 == 127'

Fixes: 11f54f228643 ("NFC: nci: Add HCI over NCI protocol support")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/nfc/nci_core.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h
index 87499b6b35d6..df5c69db68af 100644
--- a/include/net/nfc/nci_core.h
+++ b/include/net/nfc/nci_core.h
@@ -166,7 +166,7 @@ struct nci_conn_info {
  * According to specification 102 622 chapter 4.4 Pipes,
  * the pipe identifier is 7 bits long.
  */
-#define NCI_HCI_MAX_PIPES          127
+#define NCI_HCI_MAX_PIPES          128
 
 struct nci_hci_gate {
 	u8 gate;
-- 
cgit v1.2.3


From b4ed6b51f356224c6c71540ed94087f7f09b84af Mon Sep 17 00:00:00 2001
From: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Date: Fri, 5 Apr 2019 09:57:08 -0700
Subject: ASoC: core: conditionally increase module refcount on component open

Recently, for Intel platforms the "ignore_module_refcount" field
was introduced for the component driver. In order to avoid a
deadlock preventing the PCI modules from being removed
even when the card was idle, the refcounts were not incremented
for the device driver module during component probe.

However, this change introduced a nasty side effect:
the device driver module can be unloaded while a pcm stream is open.

This patch proposes to change the field to be renamed as
"module_get_upon_open". When this field is set, the module
refcount should be incremented on pcm open amd decremented
upon pcm close. This will enable modules to be removed
when no PCM playback/capture happens and prevent removal
when the component is actually in use.

Also, align with the skylake component driver with the new name.

Fixes: b450b878('ASoC: core: don't increase component module refcount
                 unconditionally'
Signed-off-by: Ranjani Sridharan <ranjani.sridharan@linux.intel.com>
Acked-by: Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/sound/soc.h               | 9 +++++++--
 sound/soc/intel/skylake/skl-pcm.c | 2 +-
 sound/soc/soc-core.c              | 4 ++--
 3 files changed, 10 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/sound/soc.h b/include/sound/soc.h
index 1e2be35ed36f..482b4ea87c3c 100644
--- a/include/sound/soc.h
+++ b/include/sound/soc.h
@@ -802,8 +802,13 @@ struct snd_soc_component_driver {
 	int probe_order;
 	int remove_order;
 
-	/* signal if the module handling the component cannot be removed */
-	unsigned int ignore_module_refcount:1;
+	/*
+	 * signal if the module handling the component should not be removed
+	 * if a pcm is open. Setting this would prevent the module
+	 * refcount being incremented in probe() but allow it be incremented
+	 * when a pcm is opened and decremented when it is closed.
+	 */
+	unsigned int module_get_upon_open:1;
 
 	/* bits */
 	unsigned int idle_bias_on:1;
diff --git a/sound/soc/intel/skylake/skl-pcm.c b/sound/soc/intel/skylake/skl-pcm.c
index 57031b6d4d45..9735e2412251 100644
--- a/sound/soc/intel/skylake/skl-pcm.c
+++ b/sound/soc/intel/skylake/skl-pcm.c
@@ -1475,7 +1475,7 @@ static const struct snd_soc_component_driver skl_component  = {
 	.ops		= &skl_platform_ops,
 	.pcm_new	= skl_pcm_new,
 	.pcm_free	= skl_pcm_free,
-	.ignore_module_refcount = 1, /* do not increase the refcount in core */
+	.module_get_upon_open = 1, /* increment refcount when a pcm is opened */
 };
 
 int skl_platform_register(struct device *dev)
diff --git a/sound/soc/soc-core.c b/sound/soc/soc-core.c
index d88757659729..46e3ab0fced4 100644
--- a/sound/soc/soc-core.c
+++ b/sound/soc/soc-core.c
@@ -947,7 +947,7 @@ static void soc_cleanup_component(struct snd_soc_component *component)
 	snd_soc_dapm_free(snd_soc_component_get_dapm(component));
 	soc_cleanup_component_debugfs(component);
 	component->card = NULL;
-	if (!component->driver->ignore_module_refcount)
+	if (!component->driver->module_get_upon_open)
 		module_put(component->dev->driver->owner);
 }
 
@@ -1381,7 +1381,7 @@ static int soc_probe_component(struct snd_soc_card *card,
 		return 0;
 	}
 
-	if (!component->driver->ignore_module_refcount &&
+	if (!component->driver->module_get_upon_open &&
 	    !try_module_get(component->dev->driver->owner))
 		return -ENODEV;
 
-- 
cgit v1.2.3


From 1200e07f3ad4b9d976cf2fff3a0c3d9a1faecb3e Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Mon, 8 Apr 2019 19:02:38 +0800
Subject: block: don't use for-inside-for in bio_for_each_segment_all

Commit 6dc4f100c175 ("block: allow bio_for_each_segment_all() to
iterate over multi-page bvec") changes bio_for_each_segment_all()
to use for-inside-for.

This way breaks all bio_for_each_segment_all() call with error out
branch via 'break', since now 'break' can only break from the inner
loop.

Fixes this issue by implementing bio_for_each_segment_all() via
single 'for' loop, and now the logic is very similar with normal
bvec iterator.

Cc: Qu Wenruo <quwenruo.btrfs@gmx.com>
Cc: linux-btrfs@vger.kernel.org
Cc: linux-fsdevel@vger.kernel.org
Cc: Omar Sandoval <osandov@fb.com>
Reviewed-by: Johannes Thumshirn <jthumshirn@suse.de>
Reported-and-Tested-by: Qu Wenruo <quwenruo.btrfs@gmx.com>
Fixes: 6dc4f100c175 ("block: allow bio_for_each_segment_all() to iterate over multi-page bvec")
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/bio.h  | 20 ++++++++++++--------
 include/linux/bvec.h | 14 ++++++++++----
 2 files changed, 22 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/bio.h b/include/linux/bio.h
index bb6090aa165d..e584673c1881 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -120,19 +120,23 @@ static inline bool bio_full(struct bio *bio)
 	return bio->bi_vcnt >= bio->bi_max_vecs;
 }
 
-#define mp_bvec_for_each_segment(bv, bvl, i, iter_all)			\
-	for (bv = bvec_init_iter_all(&iter_all);			\
-		(iter_all.done < (bvl)->bv_len) &&			\
-		(mp_bvec_next_segment((bvl), &iter_all), 1);		\
-		iter_all.done += bv->bv_len, i += 1)
+static inline bool bio_next_segment(const struct bio *bio,
+				    struct bvec_iter_all *iter)
+{
+	if (iter->idx >= bio->bi_vcnt)
+		return false;
+
+	bvec_advance(&bio->bi_io_vec[iter->idx], iter);
+	return true;
+}
 
 /*
  * drivers should _never_ use the all version - the bio may have been split
  * before it got to the driver and the driver won't own all of it
  */
-#define bio_for_each_segment_all(bvl, bio, i, iter_all)		\
-	for (i = 0, iter_all.idx = 0; iter_all.idx < (bio)->bi_vcnt; iter_all.idx++)	\
-		mp_bvec_for_each_segment(bvl, &((bio)->bi_io_vec[iter_all.idx]), i, iter_all)
+#define bio_for_each_segment_all(bvl, bio, i, iter)			\
+	for (i = 0, bvl = bvec_init_iter_all(&iter);			\
+	     bio_next_segment((bio), &iter); i++)
 
 static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter,
 				    unsigned bytes)
diff --git a/include/linux/bvec.h b/include/linux/bvec.h
index f6275c4da13a..3bc91879e1e2 100644
--- a/include/linux/bvec.h
+++ b/include/linux/bvec.h
@@ -145,18 +145,18 @@ static inline bool bvec_iter_advance(const struct bio_vec *bv,
 
 static inline struct bio_vec *bvec_init_iter_all(struct bvec_iter_all *iter_all)
 {
-	iter_all->bv.bv_page = NULL;
 	iter_all->done = 0;
+	iter_all->idx = 0;
 
 	return &iter_all->bv;
 }
 
-static inline void mp_bvec_next_segment(const struct bio_vec *bvec,
-					struct bvec_iter_all *iter_all)
+static inline void bvec_advance(const struct bio_vec *bvec,
+				struct bvec_iter_all *iter_all)
 {
 	struct bio_vec *bv = &iter_all->bv;
 
-	if (bv->bv_page) {
+	if (iter_all->done) {
 		bv->bv_page = nth_page(bv->bv_page, 1);
 		bv->bv_offset = 0;
 	} else {
@@ -165,6 +165,12 @@ static inline void mp_bvec_next_segment(const struct bio_vec *bvec,
 	}
 	bv->bv_len = min_t(unsigned int, PAGE_SIZE - bv->bv_offset,
 			   bvec->bv_len - iter_all->done);
+	iter_all->done += bv->bv_len;
+
+	if (iter_all->done == bvec->bv_len) {
+		iter_all->idx++;
+		iter_all->done = 0;
+	}
 }
 
 /*
-- 
cgit v1.2.3


From cf94db21905333e610e479688add629397a4b384 Mon Sep 17 00:00:00 2001
From: Cornelia Huck <cohuck@redhat.com>
Date: Mon, 8 Apr 2019 14:33:22 +0200
Subject: virtio: Honour 'may_reduce_num' in vring_create_virtqueue

vring_create_virtqueue() allows the caller to specify via the
may_reduce_num parameter whether the vring code is allowed to
allocate a smaller ring than specified.

However, the split ring allocation code tries to allocate a
smaller ring on allocation failure regardless of what the
caller specified. This may cause trouble for e.g. virtio-pci
in legacy mode, which does not support ring resizing. (The
packed ring code does not resize in any case.)

Let's fix this by bailing out immediately in the split ring code
if the requested size cannot be allocated and may_reduce_num has
not been specified.

While at it, fix a typo in the usage instructions.

Fixes: 2a2d1382fe9d ("virtio: Add improved queue allocation API")
Cc: stable@vger.kernel.org # v4.6+
Signed-off-by: Cornelia Huck <cohuck@redhat.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Halil Pasic <pasic@linux.ibm.com>
Reviewed-by: Jens Freimann <jfreimann@redhat.com>
---
 drivers/virtio/virtio_ring.c | 2 ++
 include/linux/virtio_ring.h  | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 18846afb39da..5df92c308286 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -882,6 +882,8 @@ static struct virtqueue *vring_create_virtqueue_split(
 					  GFP_KERNEL|__GFP_NOWARN|__GFP_ZERO);
 		if (queue)
 			break;
+		if (!may_reduce_num)
+			return NULL;
 	}
 
 	if (!num)
diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h
index fab02133a919..3dc70adfe5f5 100644
--- a/include/linux/virtio_ring.h
+++ b/include/linux/virtio_ring.h
@@ -63,7 +63,7 @@ struct virtqueue;
 /*
  * Creates a virtqueue and allocates the descriptor ring.  If
  * may_reduce_num is set, then this may allocate a smaller ring than
- * expected.  The caller should query virtqueue_get_ring_size to learn
+ * expected.  The caller should query virtqueue_get_vring_size to learn
  * the actual size of the ring.
  */
 struct virtqueue *vring_create_virtqueue(unsigned int index,
-- 
cgit v1.2.3


From be24b37e22c20cbaa891971616784dd0f35211e8 Mon Sep 17 00:00:00 2001
From: "ndesaulniers@google.com" <ndesaulniers@google.com>
Date: Mon, 22 Oct 2018 16:43:57 -0700
Subject: KEYS: trusted: fix -Wvarags warning

Fixes the warning reported by Clang:
security/keys/trusted.c:146:17: warning: passing an object that
undergoes default
      argument promotion to 'va_start' has undefined behavior [-Wvarargs]
        va_start(argp, h3);
                       ^
security/keys/trusted.c:126:37: note: parameter of type 'unsigned
char' is declared here
unsigned char *h2, unsigned char h3, ...)
                               ^
Specifically, it seems that both the C90 (4.8.1.1) and C11 (7.16.1.4)
standards explicitly call this out as undefined behavior:

The parameter parmN is the identifier of the rightmost parameter in
the variable parameter list in the function definition (the one just
before the ...). If the parameter parmN is declared with ... or with a
type that is not compatible with the type that results after
application of the default argument promotions, the behavior is
undefined.

Link: https://github.com/ClangBuiltLinux/linux/issues/41
Link: https://www.eskimo.com/~scs/cclass/int/sx11c.html
Suggested-by: David Laight <David.Laight@aculab.com>
Suggested-by: Denis Kenzior <denkenz@gmail.com>
Suggested-by: James Bottomley <jejb@linux.vnet.ibm.com>
Suggested-by: Nathan Chancellor <natechancellor@gmail.com>
Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Reviewed-by: Nathan Chancellor <natechancellor@gmail.com>
Tested-by: Nathan Chancellor <natechancellor@gmail.com>
Reviewed-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Signed-off-by: Jarkko Sakkinen <jarkko.sakkinen@linux.intel.com>
Signed-off-by: James Morris <james.morris@microsoft.com>
---
 include/keys/trusted.h  | 2 +-
 security/keys/trusted.c | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/keys/trusted.h b/include/keys/trusted.h
index adbcb6817826..0071298b9b28 100644
--- a/include/keys/trusted.h
+++ b/include/keys/trusted.h
@@ -38,7 +38,7 @@ enum {
 
 int TSS_authhmac(unsigned char *digest, const unsigned char *key,
 			unsigned int keylen, unsigned char *h1,
-			unsigned char *h2, unsigned char h3, ...);
+			unsigned char *h2, unsigned int h3, ...);
 int TSS_checkhmac1(unsigned char *buffer,
 			  const uint32_t command,
 			  const unsigned char *ononce,
diff --git a/security/keys/trusted.c b/security/keys/trusted.c
index a284d790de8c..efdbf17f3915 100644
--- a/security/keys/trusted.c
+++ b/security/keys/trusted.c
@@ -125,7 +125,7 @@ out:
  */
 int TSS_authhmac(unsigned char *digest, const unsigned char *key,
 			unsigned int keylen, unsigned char *h1,
-			unsigned char *h2, unsigned char h3, ...)
+			unsigned char *h2, unsigned int h3, ...)
 {
 	unsigned char paramdigest[SHA1_DIGEST_SIZE];
 	struct sdesc *sdesc;
@@ -144,7 +144,7 @@ int TSS_authhmac(unsigned char *digest, const unsigned char *key,
 		return PTR_ERR(sdesc);
 	}
 
-	c = h3;
+	c = !!h3;
 	ret = crypto_shash_init(&sdesc->shash);
 	if (ret < 0)
 		goto out;
-- 
cgit v1.2.3


From afe64245af9f58267e7fa8fb76ad5650ee7ec25f Mon Sep 17 00:00:00 2001
From: Michael Zhivich <mzhivich@akamai.com>
Date: Mon, 8 Apr 2019 10:48:45 -0400
Subject: ethtool: avoid signed-unsigned comparison in ethtool_validate_speed()

When building C++ userspace code that includes ethtool.h
with "-Werror -Wall", g++ complains about signed-unsigned comparison in
ethtool_validate_speed() due to definition of SPEED_UNKNOWN as -1.

Explicitly cast SPEED_UNKNOWN to __u32 to match type of
ethtool_validate_speed() argument.

Signed-off-by: Michael Zhivich <mzhivich@akamai.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/ethtool.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index 3652b239dad1..d473e5ed044c 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -1591,7 +1591,7 @@ enum ethtool_link_mode_bit_indices {
 
 static inline int ethtool_validate_speed(__u32 speed)
 {
-	return speed <= INT_MAX || speed == SPEED_UNKNOWN;
+	return speed <= INT_MAX || speed == (__u32)SPEED_UNKNOWN;
 }
 
 /* Duplex, half or full. */
-- 
cgit v1.2.3


From 1b8f21b74c3c9c82fce5a751d7aefb7cc0b8d33d Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Tue, 9 Apr 2019 06:31:21 +0800
Subject: blk-mq: introduce blk_mq_complete_request_sync()

In NVMe's error handler, follows the typical steps of tearing down
hardware for recovering controller:

1) stop blk_mq hw queues
2) stop the real hw queues
3) cancel in-flight requests via
	blk_mq_tagset_busy_iter(tags, cancel_request, ...)
cancel_request():
	mark the request as abort
	blk_mq_complete_request(req);
4) destroy real hw queues

However, there may be race between #3 and #4, because blk_mq_complete_request()
may run q->mq_ops->complete(rq) remotelly and asynchronously, and
->complete(rq) may be run after #4.

This patch introduces blk_mq_complete_request_sync() for fixing the
above race.

Cc: Sagi Grimberg <sagi@grimberg.me>
Cc: Bart Van Assche <bvanassche@acm.org>
Cc: James Smart <james.smart@broadcom.com>
Cc: linux-nvme@lists.infradead.org
Reviewed-by: Keith Busch <keith.busch@intel.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c         | 7 +++++++
 include/linux/blk-mq.h | 1 +
 2 files changed, 8 insertions(+)

(limited to 'include')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index a9354835cf51..9516304a38ee 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -654,6 +654,13 @@ bool blk_mq_complete_request(struct request *rq)
 }
 EXPORT_SYMBOL(blk_mq_complete_request);
 
+void blk_mq_complete_request_sync(struct request *rq)
+{
+	WRITE_ONCE(rq->state, MQ_RQ_COMPLETE);
+	rq->q->mq_ops->complete(rq);
+}
+EXPORT_SYMBOL_GPL(blk_mq_complete_request_sync);
+
 int blk_mq_request_started(struct request *rq)
 {
 	return blk_mq_rq_state(rq) != MQ_RQ_IDLE;
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index cb2aa7ecafff..db29928de467 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -302,6 +302,7 @@ void blk_mq_requeue_request(struct request *rq, bool kick_requeue_list);
 void blk_mq_kick_requeue_list(struct request_queue *q);
 void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs);
 bool blk_mq_complete_request(struct request *rq);
+void blk_mq_complete_request_sync(struct request *rq);
 bool blk_mq_bio_list_merge(struct request_queue *q, struct list_head *list,
 			   struct bio *bio);
 bool blk_mq_queue_stopped(struct request_queue *q);
-- 
cgit v1.2.3


From 7c2e07130090ae001a97a6b65597830d6815e93e Mon Sep 17 00:00:00 2001
From: David Müller <dave.mueller@gmx.ch>
Date: Mon, 8 Apr 2019 15:33:54 +0200
Subject: clk: x86: Add system specific quirk to mark clocks as critical
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Since commit 648e921888ad ("clk: x86: Stop marking clocks as
CLK_IS_CRITICAL"), the pmc_plt_clocks of the Bay Trail SoC are
unconditionally gated off. Unfortunately this will break systems where these
clocks are used for external purposes beyond the kernel's knowledge. Fix it
by implementing a system specific quirk to mark the necessary pmc_plt_clks as
critical.

Fixes: 648e921888ad ("clk: x86: Stop marking clocks as CLK_IS_CRITICAL")
Signed-off-by: David Müller <dave.mueller@gmx.ch>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Reviewed-by: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Stephen Boyd <sboyd@kernel.org>
---
 drivers/clk/x86/clk-pmc-atom.c                 | 14 +++++++++++---
 drivers/platform/x86/pmc_atom.c                | 21 +++++++++++++++++++++
 include/linux/platform_data/x86/clk-pmc-atom.h |  3 +++
 3 files changed, 35 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/clk/x86/clk-pmc-atom.c b/drivers/clk/x86/clk-pmc-atom.c
index d977193842df..19174835693b 100644
--- a/drivers/clk/x86/clk-pmc-atom.c
+++ b/drivers/clk/x86/clk-pmc-atom.c
@@ -165,7 +165,7 @@ static const struct clk_ops plt_clk_ops = {
 };
 
 static struct clk_plt *plt_clk_register(struct platform_device *pdev, int id,
-					void __iomem *base,
+					const struct pmc_clk_data *pmc_data,
 					const char **parent_names,
 					int num_parents)
 {
@@ -184,9 +184,17 @@ static struct clk_plt *plt_clk_register(struct platform_device *pdev, int id,
 	init.num_parents = num_parents;
 
 	pclk->hw.init = &init;
-	pclk->reg = base + PMC_CLK_CTL_OFFSET + id * PMC_CLK_CTL_SIZE;
+	pclk->reg = pmc_data->base + PMC_CLK_CTL_OFFSET + id * PMC_CLK_CTL_SIZE;
 	spin_lock_init(&pclk->lock);
 
+	/*
+	 * On some systems, the pmc_plt_clocks already enabled by the
+	 * firmware are being marked as critical to avoid them being
+	 * gated by the clock framework.
+	 */
+	if (pmc_data->critical && plt_clk_is_enabled(&pclk->hw))
+		init.flags |= CLK_IS_CRITICAL;
+
 	ret = devm_clk_hw_register(&pdev->dev, &pclk->hw);
 	if (ret) {
 		pclk = ERR_PTR(ret);
@@ -332,7 +340,7 @@ static int plt_clk_probe(struct platform_device *pdev)
 		return PTR_ERR(parent_names);
 
 	for (i = 0; i < PMC_CLK_NUM; i++) {
-		data->clks[i] = plt_clk_register(pdev, i, pmc_data->base,
+		data->clks[i] = plt_clk_register(pdev, i, pmc_data,
 						 parent_names, data->nparents);
 		if (IS_ERR(data->clks[i])) {
 			err = PTR_ERR(data->clks[i]);
diff --git a/drivers/platform/x86/pmc_atom.c b/drivers/platform/x86/pmc_atom.c
index 8f018b3f3cd4..eaec2d306481 100644
--- a/drivers/platform/x86/pmc_atom.c
+++ b/drivers/platform/x86/pmc_atom.c
@@ -17,6 +17,7 @@
 
 #include <linux/debugfs.h>
 #include <linux/device.h>
+#include <linux/dmi.h>
 #include <linux/init.h>
 #include <linux/io.h>
 #include <linux/platform_data/x86/clk-pmc-atom.h>
@@ -391,11 +392,27 @@ static int pmc_dbgfs_register(struct pmc_dev *pmc)
 }
 #endif /* CONFIG_DEBUG_FS */
 
+/*
+ * Some systems need one or more of their pmc_plt_clks to be
+ * marked as critical.
+ */
+static const struct dmi_system_id critclk_systems[] __initconst = {
+	{
+		.ident = "MPL CEC1x",
+		.matches = {
+			DMI_MATCH(DMI_SYS_VENDOR, "MPL AG"),
+			DMI_MATCH(DMI_PRODUCT_NAME, "CEC10 Family"),
+		},
+	},
+	{ /*sentinel*/ }
+};
+
 static int pmc_setup_clks(struct pci_dev *pdev, void __iomem *pmc_regmap,
 			  const struct pmc_data *pmc_data)
 {
 	struct platform_device *clkdev;
 	struct pmc_clk_data *clk_data;
+	const struct dmi_system_id *d = dmi_first_match(critclk_systems);
 
 	clk_data = kzalloc(sizeof(*clk_data), GFP_KERNEL);
 	if (!clk_data)
@@ -403,6 +420,10 @@ static int pmc_setup_clks(struct pci_dev *pdev, void __iomem *pmc_regmap,
 
 	clk_data->base = pmc_regmap; /* offset is added by client */
 	clk_data->clks = pmc_data->clks;
+	if (d) {
+		clk_data->critical = true;
+		pr_info("%s critclks quirk enabled\n", d->ident);
+	}
 
 	clkdev = platform_device_register_data(&pdev->dev, "clk-pmc-atom",
 					       PLATFORM_DEVID_NONE,
diff --git a/include/linux/platform_data/x86/clk-pmc-atom.h b/include/linux/platform_data/x86/clk-pmc-atom.h
index 3ab892208343..7a37ac27d0fb 100644
--- a/include/linux/platform_data/x86/clk-pmc-atom.h
+++ b/include/linux/platform_data/x86/clk-pmc-atom.h
@@ -35,10 +35,13 @@ struct pmc_clk {
  *
  * @base:	PMC clock register base offset
  * @clks:	pointer to set of registered clocks, typically 0..5
+ * @critical:	flag to indicate if firmware enabled pmc_plt_clks
+ *		should be marked as critial or not
  */
 struct pmc_clk_data {
 	void __iomem *base;
 	const struct pmc_clk *clks;
+	bool critical;
 };
 
 #endif /* __PLATFORM_DATA_X86_CLK_PMC_ATOM_H */
-- 
cgit v1.2.3


From d808b7f759b50acf0784ce6230ffa63e12ef465d Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Tue, 9 Apr 2019 10:03:59 -0600
Subject: nvmet: fix discover log page when offsets are used

The nvme target hadn't been taking the Get Log Page offset parameter
into consideration, and so has been returning corrupted log pages when
offsets are used. Since many tools, including nvme-cli, split the log
request to 4k, we've been breaking discovery log responses when more
than 3 subsystems exist.

Fix the returned data by internally generating the entire discovery
log page and copying only the requested bytes into the user buffer. The
command log page offset type has been modified to a native __le64 to
make it easier to extract the value from a command.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Tested-by: Minwoo Im <minwoo.im@samsung.com>
Reviewed-by: Chaitanya Kulkarni <chaitanya.kulkarni@wdc.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Reviewed-by: James Smart <james.smart@broadcom.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/nvme/target/admin-cmd.c |  5 +++
 drivers/nvme/target/discovery.c | 68 +++++++++++++++++++++++++++--------------
 drivers/nvme/target/nvmet.h     |  1 +
 include/linux/nvme.h            |  9 ++++--
 4 files changed, 58 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c
index 76250181fee0..9f72d515fc4b 100644
--- a/drivers/nvme/target/admin-cmd.c
+++ b/drivers/nvme/target/admin-cmd.c
@@ -24,6 +24,11 @@ u32 nvmet_get_log_page_len(struct nvme_command *cmd)
 	return len;
 }
 
+u64 nvmet_get_log_page_offset(struct nvme_command *cmd)
+{
+	return le64_to_cpu(cmd->get_log_page.lpo);
+}
+
 static void nvmet_execute_get_log_page_noop(struct nvmet_req *req)
 {
 	nvmet_req_complete(req, nvmet_zero_sgl(req, 0, req->data_len));
diff --git a/drivers/nvme/target/discovery.c b/drivers/nvme/target/discovery.c
index c872b47a88f3..33ed95e72d6b 100644
--- a/drivers/nvme/target/discovery.c
+++ b/drivers/nvme/target/discovery.c
@@ -131,54 +131,76 @@ static void nvmet_set_disc_traddr(struct nvmet_req *req, struct nvmet_port *port
 		memcpy(traddr, port->disc_addr.traddr, NVMF_TRADDR_SIZE);
 }
 
+static size_t discovery_log_entries(struct nvmet_req *req)
+{
+	struct nvmet_ctrl *ctrl = req->sq->ctrl;
+	struct nvmet_subsys_link *p;
+	struct nvmet_port *r;
+	size_t entries = 0;
+
+	list_for_each_entry(p, &req->port->subsystems, entry) {
+		if (!nvmet_host_allowed(p->subsys, ctrl->hostnqn))
+			continue;
+		entries++;
+	}
+	list_for_each_entry(r, &req->port->referrals, entry)
+		entries++;
+	return entries;
+}
+
 static void nvmet_execute_get_disc_log_page(struct nvmet_req *req)
 {
 	const int entry_size = sizeof(struct nvmf_disc_rsp_page_entry);
 	struct nvmet_ctrl *ctrl = req->sq->ctrl;
 	struct nvmf_disc_rsp_page_hdr *hdr;
+	u64 offset = nvmet_get_log_page_offset(req->cmd);
 	size_t data_len = nvmet_get_log_page_len(req->cmd);
-	size_t alloc_len = max(data_len, sizeof(*hdr));
-	int residual_len = data_len - sizeof(*hdr);
+	size_t alloc_len;
 	struct nvmet_subsys_link *p;
 	struct nvmet_port *r;
 	u32 numrec = 0;
 	u16 status = 0;
+	void *buffer;
+
+	/* Spec requires dword aligned offsets */
+	if (offset & 0x3) {
+		status = NVME_SC_INVALID_FIELD | NVME_SC_DNR;
+		goto out;
+	}
 
 	/*
 	 * Make sure we're passing at least a buffer of response header size.
 	 * If host provided data len is less than the header size, only the
 	 * number of bytes requested by host will be sent to host.
 	 */
-	hdr = kzalloc(alloc_len, GFP_KERNEL);
-	if (!hdr) {
+	down_read(&nvmet_config_sem);
+	alloc_len = sizeof(*hdr) + entry_size * discovery_log_entries(req);
+	buffer = kzalloc(alloc_len, GFP_KERNEL);
+	if (!buffer) {
+		up_read(&nvmet_config_sem);
 		status = NVME_SC_INTERNAL;
 		goto out;
 	}
 
-	down_read(&nvmet_config_sem);
+	hdr = buffer;
 	list_for_each_entry(p, &req->port->subsystems, entry) {
+		char traddr[NVMF_TRADDR_SIZE];
+
 		if (!nvmet_host_allowed(p->subsys, ctrl->hostnqn))
 			continue;
-		if (residual_len >= entry_size) {
-			char traddr[NVMF_TRADDR_SIZE];
-
-			nvmet_set_disc_traddr(req, req->port, traddr);
-			nvmet_format_discovery_entry(hdr, req->port,
-					p->subsys->subsysnqn, traddr,
-					NVME_NQN_NVME, numrec);
-			residual_len -= entry_size;
-		}
+
+		nvmet_set_disc_traddr(req, req->port, traddr);
+		nvmet_format_discovery_entry(hdr, req->port,
+				p->subsys->subsysnqn, traddr,
+				NVME_NQN_NVME, numrec);
 		numrec++;
 	}
 
 	list_for_each_entry(r, &req->port->referrals, entry) {
-		if (residual_len >= entry_size) {
-			nvmet_format_discovery_entry(hdr, r,
-					NVME_DISC_SUBSYS_NAME,
-					r->disc_addr.traddr,
-					NVME_NQN_DISC, numrec);
-			residual_len -= entry_size;
-		}
+		nvmet_format_discovery_entry(hdr, r,
+				NVME_DISC_SUBSYS_NAME,
+				r->disc_addr.traddr,
+				NVME_NQN_DISC, numrec);
 		numrec++;
 	}
 
@@ -190,8 +212,8 @@ static void nvmet_execute_get_disc_log_page(struct nvmet_req *req)
 
 	up_read(&nvmet_config_sem);
 
-	status = nvmet_copy_to_sgl(req, 0, hdr, data_len);
-	kfree(hdr);
+	status = nvmet_copy_to_sgl(req, 0, buffer + offset, data_len);
+	kfree(buffer);
 out:
 	nvmet_req_complete(req, status);
 }
diff --git a/drivers/nvme/target/nvmet.h b/drivers/nvme/target/nvmet.h
index 51e49efd7849..1653d19b187f 100644
--- a/drivers/nvme/target/nvmet.h
+++ b/drivers/nvme/target/nvmet.h
@@ -428,6 +428,7 @@ u16 nvmet_copy_from_sgl(struct nvmet_req *req, off_t off, void *buf,
 u16 nvmet_zero_sgl(struct nvmet_req *req, off_t off, size_t len);
 
 u32 nvmet_get_log_page_len(struct nvme_command *cmd);
+u64 nvmet_get_log_page_offset(struct nvme_command *cmd);
 
 extern struct list_head *nvmet_ports;
 void nvmet_port_disc_changed(struct nvmet_port *port,
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index baa49e6a23cc..c40720cb59ac 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -967,8 +967,13 @@ struct nvme_get_log_page_command {
 	__le16			numdl;
 	__le16			numdu;
 	__u16			rsvd11;
-	__le32			lpol;
-	__le32			lpou;
+	union {
+		struct {
+			__le32 lpol;
+			__le32 lpou;
+		};
+		__le64 lpo;
+	};
 	__u32			rsvd14[2];
 };
 
-- 
cgit v1.2.3


From af6b61d7ef58099c82d854395a0e002be6bd036c Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@hammerspace.com>
Date: Thu, 11 Apr 2019 15:16:52 -0400
Subject: Revert "SUNRPC: Micro-optimise when the task is known not to be
 sleeping"

This reverts commit 009a82f6437490c262584d65a14094a818bcb747.

The ability to optimise here relies on compiler being able to optimise
away tail calls to avoid stack overflows. Unfortunately, we are seeing
reports of problems, so let's just revert.

Reported-by: Daniel Mack <daniel@zonque.org>
Signed-off-by: Trond Myklebust <trond.myklebust@hammerspace.com>
---
 include/linux/sunrpc/sched.h |  8 --------
 net/sunrpc/clnt.c            | 45 ++++++++------------------------------------
 2 files changed, 8 insertions(+), 45 deletions(-)

(limited to 'include')

diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index ec861cd0cfe8..52d41d0c1ae1 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -304,12 +304,4 @@ rpc_clnt_swap_deactivate(struct rpc_clnt *clnt)
 }
 #endif /* CONFIG_SUNRPC_SWAP */
 
-static inline bool
-rpc_task_need_resched(const struct rpc_task *task)
-{
-	if (RPC_IS_QUEUED(task) || task->tk_callback)
-		return true;
-	return false;
-}
-
 #endif /* _LINUX_SUNRPC_SCHED_H_ */
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 187d10443a15..1d0395ef62c9 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1540,7 +1540,6 @@ call_start(struct rpc_task *task)
 	clnt->cl_stats->rpccnt++;
 	task->tk_action = call_reserve;
 	rpc_task_set_transport(task, clnt);
-	call_reserve(task);
 }
 
 /*
@@ -1554,9 +1553,6 @@ call_reserve(struct rpc_task *task)
 	task->tk_status  = 0;
 	task->tk_action  = call_reserveresult;
 	xprt_reserve(task);
-	if (rpc_task_need_resched(task))
-		return;
-	 call_reserveresult(task);
 }
 
 static void call_retry_reserve(struct rpc_task *task);
@@ -1579,7 +1575,6 @@ call_reserveresult(struct rpc_task *task)
 	if (status >= 0) {
 		if (task->tk_rqstp) {
 			task->tk_action = call_refresh;
-			call_refresh(task);
 			return;
 		}
 
@@ -1605,7 +1600,6 @@ call_reserveresult(struct rpc_task *task)
 		/* fall through */
 	case -EAGAIN:	/* woken up; retry */
 		task->tk_action = call_retry_reserve;
-		call_retry_reserve(task);
 		return;
 	case -EIO:	/* probably a shutdown */
 		break;
@@ -1628,9 +1622,6 @@ call_retry_reserve(struct rpc_task *task)
 	task->tk_status  = 0;
 	task->tk_action  = call_reserveresult;
 	xprt_retry_reserve(task);
-	if (rpc_task_need_resched(task))
-		return;
-	call_reserveresult(task);
 }
 
 /*
@@ -1645,9 +1636,6 @@ call_refresh(struct rpc_task *task)
 	task->tk_status = 0;
 	task->tk_client->cl_stats->rpcauthrefresh++;
 	rpcauth_refreshcred(task);
-	if (rpc_task_need_resched(task))
-		return;
-	call_refreshresult(task);
 }
 
 /*
@@ -1666,7 +1654,6 @@ call_refreshresult(struct rpc_task *task)
 	case 0:
 		if (rpcauth_uptodatecred(task)) {
 			task->tk_action = call_allocate;
-			call_allocate(task);
 			return;
 		}
 		/* Use rate-limiting and a max number of retries if refresh
@@ -1685,7 +1672,6 @@ call_refreshresult(struct rpc_task *task)
 		task->tk_cred_retry--;
 		dprintk("RPC: %5u %s: retry refresh creds\n",
 				task->tk_pid, __func__);
-		call_refresh(task);
 		return;
 	}
 	dprintk("RPC: %5u %s: refresh creds failed with error %d\n",
@@ -1711,10 +1697,8 @@ call_allocate(struct rpc_task *task)
 	task->tk_status = 0;
 	task->tk_action = call_encode;
 
-	if (req->rq_buffer) {
-		call_encode(task);
+	if (req->rq_buffer)
 		return;
-	}
 
 	if (proc->p_proc != 0) {
 		BUG_ON(proc->p_arglen == 0);
@@ -1740,12 +1724,8 @@ call_allocate(struct rpc_task *task)
 
 	status = xprt->ops->buf_alloc(task);
 	xprt_inject_disconnect(xprt);
-	if (status == 0) {
-		if (rpc_task_need_resched(task))
-			return;
-		call_encode(task);
+	if (status == 0)
 		return;
-	}
 	if (status != -ENOMEM) {
 		rpc_exit(task, status);
 		return;
@@ -1828,8 +1808,12 @@ call_encode(struct rpc_task *task)
 		xprt_request_enqueue_receive(task);
 	xprt_request_enqueue_transmit(task);
 out:
-	task->tk_action = call_bind;
-	call_bind(task);
+	task->tk_action = call_transmit;
+	/* Check that the connection is OK */
+	if (!xprt_bound(task->tk_xprt))
+		task->tk_action = call_bind;
+	else if (!xprt_connected(task->tk_xprt))
+		task->tk_action = call_connect;
 }
 
 /*
@@ -1847,7 +1831,6 @@ rpc_task_handle_transmitted(struct rpc_task *task)
 {
 	xprt_end_transmit(task);
 	task->tk_action = call_transmit_status;
-	call_transmit_status(task);
 }
 
 /*
@@ -1865,7 +1848,6 @@ call_bind(struct rpc_task *task)
 
 	if (xprt_bound(xprt)) {
 		task->tk_action = call_connect;
-		call_connect(task);
 		return;
 	}
 
@@ -1896,7 +1878,6 @@ call_bind_status(struct rpc_task *task)
 		dprint_status(task);
 		task->tk_status = 0;
 		task->tk_action = call_connect;
-		call_connect(task);
 		return;
 	}
 
@@ -1981,7 +1962,6 @@ call_connect(struct rpc_task *task)
 
 	if (xprt_connected(xprt)) {
 		task->tk_action = call_transmit;
-		call_transmit(task);
 		return;
 	}
 
@@ -2051,7 +2031,6 @@ call_connect_status(struct rpc_task *task)
 	case 0:
 		clnt->cl_stats->netreconn++;
 		task->tk_action = call_transmit;
-		call_transmit(task);
 		return;
 	}
 	rpc_exit(task, status);
@@ -2087,9 +2066,6 @@ call_transmit(struct rpc_task *task)
 		xprt_transmit(task);
 	}
 	xprt_end_transmit(task);
-	if (rpc_task_need_resched(task))
-		return;
-	call_transmit_status(task);
 }
 
 /*
@@ -2107,9 +2083,6 @@ call_transmit_status(struct rpc_task *task)
 	if (rpc_task_transmitted(task)) {
 		if (task->tk_status == 0)
 			xprt_request_wait_receive(task);
-		if (rpc_task_need_resched(task))
-			return;
-		call_status(task);
 		return;
 	}
 
@@ -2170,7 +2143,6 @@ call_bc_encode(struct rpc_task *task)
 {
 	xprt_request_enqueue_transmit(task);
 	task->tk_action = call_bc_transmit;
-	call_bc_transmit(task);
 }
 
 /*
@@ -2261,7 +2233,6 @@ call_status(struct rpc_task *task)
 	status = task->tk_status;
 	if (status >= 0) {
 		task->tk_action = call_decode;
-		call_decode(task);
 		return;
 	}
 
-- 
cgit v1.2.3


From f958d7b528b1b40c44cfda5eabe2d82760d868c3 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 11 Apr 2019 10:06:20 -0700
Subject: mm: make page ref count overflow check tighter and more explicit

We have a VM_BUG_ON() to check that the page reference count doesn't
underflow (or get close to overflow) by checking the sign of the count.

That's all fine, but we actually want to allow people to use a "get page
ref unless it's already very high" helper function, and we want that one
to use the sign of the page ref (without triggering this VM_BUG_ON).

Change the VM_BUG_ON to only check for small underflows (or _very_ close
to overflowing), and ignore overflows which have strayed into negative
territory.

Acked-by: Matthew Wilcox <willy@infradead.org>
Cc: Jann Horn <jannh@google.com>
Cc: stable@kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 80bb6408fe73..541d99b86aea 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -965,6 +965,10 @@ static inline bool is_pci_p2pdma_page(const struct page *page)
 }
 #endif /* CONFIG_DEV_PAGEMAP_OPS */
 
+/* 127: arbitrary random number, small enough to assemble well */
+#define page_ref_zero_or_close_to_overflow(page) \
+	((unsigned int) page_ref_count(page) + 127u <= 127u)
+
 static inline void get_page(struct page *page)
 {
 	page = compound_head(page);
@@ -972,7 +976,7 @@ static inline void get_page(struct page *page)
 	 * Getting a normal page or the head of a compound page
 	 * requires to already have an elevated page->_refcount.
 	 */
-	VM_BUG_ON_PAGE(page_ref_count(page) <= 0, page);
+	VM_BUG_ON_PAGE(page_ref_zero_or_close_to_overflow(page), page);
 	page_ref_inc(page);
 }
 
-- 
cgit v1.2.3


From 88b1a17dfc3ed7728316478fae0f5ad508f50397 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 11 Apr 2019 10:14:59 -0700
Subject: mm: add 'try_get_page()' helper function

This is the same as the traditional 'get_page()' function, but instead
of unconditionally incrementing the reference count of the page, it only
does so if the count was "safe".  It returns whether the reference count
was incremented (and is marked __must_check, since the caller obviously
has to be aware of it).

Also like 'get_page()', you can't use this function unless you already
had a reference to the page.  The intent is that you can use this
exactly like get_page(), but in situations where you want to limit the
maximum reference count.

The code currently does an unconditional WARN_ON_ONCE() if we ever hit
the reference count issues (either zero or negative), as a notification
that the conditional non-increment actually happened.

NOTE! The count access for the "safety" check is inherently racy, but
that doesn't matter since the buffer we use is basically half the range
of the reference count (ie we look at the sign of the count).

Acked-by: Matthew Wilcox <willy@infradead.org>
Cc: Jann Horn <jannh@google.com>
Cc: stable@kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 541d99b86aea..7000ddd807e0 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -980,6 +980,15 @@ static inline void get_page(struct page *page)
 	page_ref_inc(page);
 }
 
+static inline __must_check bool try_get_page(struct page *page)
+{
+	page = compound_head(page);
+	if (WARN_ON_ONCE(page_ref_count(page) <= 0))
+		return false;
+	page_ref_inc(page);
+	return true;
+}
+
 static inline void put_page(struct page *page)
 {
 	page = compound_head(page);
-- 
cgit v1.2.3


From 15fab63e1e57be9fdb5eec1bbc5916e9825e9acb Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@infradead.org>
Date: Fri, 5 Apr 2019 14:02:10 -0700
Subject: fs: prevent page refcount overflow in pipe_buf_get

Change pipe_buf_get() to return a bool indicating whether it succeeded
in raising the refcount of the page (if the thing in the pipe is a page).
This removes another mechanism for overflowing the page refcount.  All
callers converted to handle a failure.

Reported-by: Jann Horn <jannh@google.com>
Signed-off-by: Matthew Wilcox <willy@infradead.org>
Cc: stable@kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/fuse/dev.c             | 12 ++++++------
 fs/pipe.c                 |  4 ++--
 fs/splice.c               | 12 ++++++++++--
 include/linux/pipe_fs_i.h | 10 ++++++----
 kernel/trace/trace.c      |  6 +++++-
 5 files changed, 29 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 809c0f2f9942..64f4de983468 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -2034,10 +2034,8 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
 		rem += pipe->bufs[(pipe->curbuf + idx) & (pipe->buffers - 1)].len;
 
 	ret = -EINVAL;
-	if (rem < len) {
-		pipe_unlock(pipe);
-		goto out;
-	}
+	if (rem < len)
+		goto out_free;
 
 	rem = len;
 	while (rem) {
@@ -2055,7 +2053,9 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
 			pipe->curbuf = (pipe->curbuf + 1) & (pipe->buffers - 1);
 			pipe->nrbufs--;
 		} else {
-			pipe_buf_get(pipe, ibuf);
+			if (!pipe_buf_get(pipe, ibuf))
+				goto out_free;
+
 			*obuf = *ibuf;
 			obuf->flags &= ~PIPE_BUF_FLAG_GIFT;
 			obuf->len = rem;
@@ -2078,11 +2078,11 @@ static ssize_t fuse_dev_splice_write(struct pipe_inode_info *pipe,
 	ret = fuse_dev_do_write(fud, &cs, len);
 
 	pipe_lock(pipe);
+out_free:
 	for (idx = 0; idx < nbuf; idx++)
 		pipe_buf_release(pipe, &bufs[idx]);
 	pipe_unlock(pipe);
 
-out:
 	kvfree(bufs);
 	return ret;
 }
diff --git a/fs/pipe.c b/fs/pipe.c
index bdc5d3c0977d..b1543b85c14a 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -189,9 +189,9 @@ EXPORT_SYMBOL(generic_pipe_buf_steal);
  *	in the tee() system call, when we duplicate the buffers in one
  *	pipe into another.
  */
-void generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
+bool generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
 {
-	get_page(buf->page);
+	return try_get_page(buf->page);
 }
 EXPORT_SYMBOL(generic_pipe_buf_get);
 
diff --git a/fs/splice.c b/fs/splice.c
index de2ede048473..f30af82b850d 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1588,7 +1588,11 @@ retry:
 			 * Get a reference to this pipe buffer,
 			 * so we can copy the contents over.
 			 */
-			pipe_buf_get(ipipe, ibuf);
+			if (!pipe_buf_get(ipipe, ibuf)) {
+				if (ret == 0)
+					ret = -EFAULT;
+				break;
+			}
 			*obuf = *ibuf;
 
 			/*
@@ -1660,7 +1664,11 @@ static int link_pipe(struct pipe_inode_info *ipipe,
 		 * Get a reference to this pipe buffer,
 		 * so we can copy the contents over.
 		 */
-		pipe_buf_get(ipipe, ibuf);
+		if (!pipe_buf_get(ipipe, ibuf)) {
+			if (ret == 0)
+				ret = -EFAULT;
+			break;
+		}
 
 		obuf = opipe->bufs + nbuf;
 		*obuf = *ibuf;
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index 5a3bb3b7c9ad..3f2a42c11e20 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -108,18 +108,20 @@ struct pipe_buf_operations {
 	/*
 	 * Get a reference to the pipe buffer.
 	 */
-	void (*get)(struct pipe_inode_info *, struct pipe_buffer *);
+	bool (*get)(struct pipe_inode_info *, struct pipe_buffer *);
 };
 
 /**
  * pipe_buf_get - get a reference to a pipe_buffer
  * @pipe:	the pipe that the buffer belongs to
  * @buf:	the buffer to get a reference to
+ *
+ * Return: %true if the reference was successfully obtained.
  */
-static inline void pipe_buf_get(struct pipe_inode_info *pipe,
+static inline __must_check bool pipe_buf_get(struct pipe_inode_info *pipe,
 				struct pipe_buffer *buf)
 {
-	buf->ops->get(pipe, buf);
+	return buf->ops->get(pipe, buf);
 }
 
 /**
@@ -178,7 +180,7 @@ struct pipe_inode_info *alloc_pipe_info(void);
 void free_pipe_info(struct pipe_inode_info *);
 
 /* Generic pipe buffer ops functions */
-void generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *);
+bool generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *);
 int generic_pipe_buf_confirm(struct pipe_inode_info *, struct pipe_buffer *);
 int generic_pipe_buf_steal(struct pipe_inode_info *, struct pipe_buffer *);
 void generic_pipe_buf_release(struct pipe_inode_info *, struct pipe_buffer *);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index c4238b441624..0f300d488c9f 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -6835,12 +6835,16 @@ static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
 	buf->private = 0;
 }
 
-static void buffer_pipe_buf_get(struct pipe_inode_info *pipe,
+static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
 				struct pipe_buffer *buf)
 {
 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
 
+	if (ref->ref > INT_MAX/2)
+		return false;
+
 	ref->ref++;
+	return true;
 }
 
 /* Pipe buffer operations for a buffer. */
-- 
cgit v1.2.3