From f75a2804da391571563c4b6b29e7797787332673 Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Thu, 31 Jan 2019 13:05:49 -0800
Subject: xfrm: destroy xfrm_state synchronously on net exit path

xfrm_state_put() moves struct xfrm_state to the GC list
and schedules the GC work to clean it up. On net exit call
path, xfrm_state_flush() is called to clean up and
xfrm_flush_gc() is called to wait for the GC work to complete
before exit.

However, this doesn't work because one of the ->destructor(),
ipcomp_destroy(), schedules the same GC work again inside
the GC work. It is hard to wait for such a nested async
callback. This is also why syzbot still reports the following
warning:

 WARNING: CPU: 1 PID: 33 at net/ipv6/xfrm6_tunnel.c:351 xfrm6_tunnel_net_exit+0x2cb/0x500 net/ipv6/xfrm6_tunnel.c:351
 ...
  ops_exit_list.isra.0+0xb0/0x160 net/core/net_namespace.c:153
  cleanup_net+0x51d/0xb10 net/core/net_namespace.c:551
  process_one_work+0xd0c/0x1ce0 kernel/workqueue.c:2153
  worker_thread+0x143/0x14a0 kernel/workqueue.c:2296
  kthread+0x357/0x430 kernel/kthread.c:246
  ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:352

In fact, it is perfectly fine to bypass GC and destroy xfrm_state
synchronously on net exit call path, because it is in process context
and doesn't need a work struct to do any blocking work.

This patch introduces xfrm_state_put_sync() which simply bypasses
GC, and lets its callers to decide whether to use this synchronous
version. On net exit path, xfrm_state_fini() and
xfrm6_tunnel_net_exit() use it. And, as ipcomp_destroy() itself is
blocking, it can use xfrm_state_put_sync() directly too.

Also rename xfrm_state_gc_destroy() to ___xfrm_state_destroy() to
reflect this change.

Fixes: b48c05ab5d32 ("xfrm: Fix warning in xfrm6_tunnel_net_exit.")
Reported-and-tested-by: syzbot+e9aebef558e3ed673934@syzkaller.appspotmail.com
Cc: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/xfrm.h | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 7298a53b9702..85386becbaea 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -853,7 +853,7 @@ static inline void xfrm_pols_put(struct xfrm_policy **pols, int npols)
 		xfrm_pol_put(pols[i]);
 }
 
-void __xfrm_state_destroy(struct xfrm_state *);
+void __xfrm_state_destroy(struct xfrm_state *, bool);
 
 static inline void __xfrm_state_put(struct xfrm_state *x)
 {
@@ -863,7 +863,13 @@ static inline void __xfrm_state_put(struct xfrm_state *x)
 static inline void xfrm_state_put(struct xfrm_state *x)
 {
 	if (refcount_dec_and_test(&x->refcnt))
-		__xfrm_state_destroy(x);
+		__xfrm_state_destroy(x, false);
+}
+
+static inline void xfrm_state_put_sync(struct xfrm_state *x)
+{
+	if (refcount_dec_and_test(&x->refcnt))
+		__xfrm_state_destroy(x, true);
 }
 
 static inline void xfrm_state_hold(struct xfrm_state *x)
@@ -1590,7 +1596,7 @@ struct xfrmk_spdinfo {
 
 struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq);
 int xfrm_state_delete(struct xfrm_state *x);
-int xfrm_state_flush(struct net *net, u8 proto, bool task_valid);
+int xfrm_state_flush(struct net *net, u8 proto, bool task_valid, bool sync);
 int xfrm_dev_state_flush(struct net *net, struct net_device *dev, bool task_valid);
 void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si);
 void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si);
-- 
cgit v1.2.3


From 45815d0962e90ee5dbefff9a88c195a769ace21b Mon Sep 17 00:00:00 2001
From: Chandan Uddaraju <chandanu@codeaurora.org>
Date: Mon, 28 Jan 2019 14:58:53 -0800
Subject: drm: add definitions for DP Audio/Video compliance tests

This change adds definitions needed for DP audio compliance testing.
It also adds missing definition for DP video compliance.

Changes in V2:
-- Delete cover letter for this patch.
-- Move the description from cover letter into patch commit message.
-- Remove DPU from subject prefix

Reviewed-by: Sean Paul <sean@poorly.run>
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch> # for merging through -msm.
Signed-off-by: Chandan Uddaraju <chandanu@codeaurora.org>
Signed-off-by: Sean Paul <seanpaul@chromium.org>
---
 include/drm/drm_dp_helper.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include')

diff --git a/include/drm/drm_dp_helper.h b/include/drm/drm_dp_helper.h
index c223c87ef119..af181c9c8f54 100644
--- a/include/drm/drm_dp_helper.h
+++ b/include/drm/drm_dp_helper.h
@@ -560,6 +560,8 @@
 # define DP_TEST_LINK_EDID_READ		    (1 << 2)
 # define DP_TEST_LINK_PHY_TEST_PATTERN	    (1 << 3) /* DPCD >= 1.1 */
 # define DP_TEST_LINK_FAUX_PATTERN	    (1 << 4) /* DPCD >= 1.2 */
+# define DP_TEST_LINK_AUDIO_PATTERN         (1 << 5) /* DPCD >= 1.2 */
+# define DP_TEST_LINK_AUDIO_DISABLED_VIDEO  (1 << 6) /* DPCD >= 1.2 */
 
 #define DP_TEST_LINK_RATE		    0x219
 # define DP_LINK_RATE_162		    (0x6)
@@ -608,6 +610,7 @@
 # define DP_COLOR_FORMAT_RGB                (0 << 1)
 # define DP_COLOR_FORMAT_YCbCr422           (1 << 1)
 # define DP_COLOR_FORMAT_YCbCr444           (2 << 1)
+# define DP_TEST_DYNAMIC_RANGE_VESA         (0 << 3)
 # define DP_TEST_DYNAMIC_RANGE_CEA          (1 << 3)
 # define DP_TEST_YCBCR_COEFFICIENTS         (1 << 4)
 # define DP_YCBCR_COEFFICIENTS_ITU601       (0 << 4)
@@ -657,6 +660,16 @@
 
 #define DP_TEST_SINK			    0x270
 # define DP_TEST_SINK_START		    (1 << 0)
+#define DP_TEST_AUDIO_MODE		    0x271
+#define DP_TEST_AUDIO_PATTERN_TYPE	    0x272
+#define DP_TEST_AUDIO_PERIOD_CH1	    0x273
+#define DP_TEST_AUDIO_PERIOD_CH2	    0x274
+#define DP_TEST_AUDIO_PERIOD_CH3	    0x275
+#define DP_TEST_AUDIO_PERIOD_CH4	    0x276
+#define DP_TEST_AUDIO_PERIOD_CH5	    0x277
+#define DP_TEST_AUDIO_PERIOD_CH6	    0x278
+#define DP_TEST_AUDIO_PERIOD_CH7	    0x279
+#define DP_TEST_AUDIO_PERIOD_CH8	    0x27A
 
 #define DP_FEC_STATUS			    0x280    /* 1.4 */
 # define DP_FEC_DECODE_EN_DETECTED	    (1 << 0)
-- 
cgit v1.2.3


From b90efd2258749e04e1b3f71ef0d716f2ac2337e0 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Thu, 7 Feb 2019 14:54:16 -0500
Subject: bpf: only adjust gso_size on bytestream protocols

bpf_skb_change_proto and bpf_skb_adjust_room change skb header length.
For GSO packets they adjust gso_size to maintain the same MTU.

The gso size can only be safely adjusted on bytestream protocols.
Commit d02f51cbcf12 ("bpf: fix bpf_skb_adjust_net/bpf_skb_proto_xlat
to deal with gso sctp skbs") excluded SKB_GSO_SCTP.

Since then type SKB_GSO_UDP_L4 has been added, whose contents are one
gso_size unit per datagram. Also exclude these.

Move from a blacklist to a whitelist check to future proof against
additional such new GSO types, e.g., for fraglist based GRO.

Fixes: bec1f6f69736 ("udp: generate gso with UDP_SEGMENT")
Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/skbuff.h |  6 ++++++
 net/core/filter.c      | 12 ++++--------
 2 files changed, 10 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 95d25b010a25..5a7a8b93a5ab 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -4212,6 +4212,12 @@ static inline bool skb_is_gso_sctp(const struct sk_buff *skb)
 	return skb_shinfo(skb)->gso_type & SKB_GSO_SCTP;
 }
 
+static inline bool skb_is_gso_tcp(const struct sk_buff *skb)
+{
+	return skb_is_gso(skb) &&
+	       skb_shinfo(skb)->gso_type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6);
+}
+
 static inline void skb_gso_reset(struct sk_buff *skb)
 {
 	skb_shinfo(skb)->gso_size = 0;
diff --git a/net/core/filter.c b/net/core/filter.c
index 7a54dc11ac2d..f7d0004fc160 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2789,8 +2789,7 @@ static int bpf_skb_proto_4_to_6(struct sk_buff *skb)
 	u32 off = skb_mac_header_len(skb);
 	int ret;
 
-	/* SCTP uses GSO_BY_FRAGS, thus cannot adjust it. */
-	if (skb_is_gso(skb) && unlikely(skb_is_gso_sctp(skb)))
+	if (!skb_is_gso_tcp(skb))
 		return -ENOTSUPP;
 
 	ret = skb_cow(skb, len_diff);
@@ -2831,8 +2830,7 @@ static int bpf_skb_proto_6_to_4(struct sk_buff *skb)
 	u32 off = skb_mac_header_len(skb);
 	int ret;
 
-	/* SCTP uses GSO_BY_FRAGS, thus cannot adjust it. */
-	if (skb_is_gso(skb) && unlikely(skb_is_gso_sctp(skb)))
+	if (!skb_is_gso_tcp(skb))
 		return -ENOTSUPP;
 
 	ret = skb_unclone(skb, GFP_ATOMIC);
@@ -2957,8 +2955,7 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 len_diff)
 	u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb);
 	int ret;
 
-	/* SCTP uses GSO_BY_FRAGS, thus cannot adjust it. */
-	if (skb_is_gso(skb) && unlikely(skb_is_gso_sctp(skb)))
+	if (!skb_is_gso_tcp(skb))
 		return -ENOTSUPP;
 
 	ret = skb_cow(skb, len_diff);
@@ -2987,8 +2984,7 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 len_diff)
 	u32 off = skb_mac_header_len(skb) + bpf_skb_net_base_len(skb);
 	int ret;
 
-	/* SCTP uses GSO_BY_FRAGS, thus cannot adjust it. */
-	if (skb_is_gso(skb) && unlikely(skb_is_gso_sctp(skb)))
+	if (!skb_is_gso_tcp(skb))
 		return -ENOTSUPP;
 
 	ret = skb_unclone(skb, GFP_ATOMIC);
-- 
cgit v1.2.3


From f16bb4d280c753ab4bf76d8765a33eb61633d711 Mon Sep 17 00:00:00 2001
From: Emil Velikov <emil.velikov@collabora.com>
Date: Mon, 14 Jan 2019 08:54:07 +0000
Subject: drm: annotate drm_core_check_feature() dev arg. as const

This static inline function doesn't modify any state.

Cc: intel-gfx@lists.freedesktop.org
Signed-off-by: Emil Velikov <emil.velikov@collabora.com>
Reviewed-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/20190114085408.15933-1-emil.l.velikov@gmail.com
---
 include/drm/drm_drv.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/drm/drm_drv.h b/include/drm/drm_drv.h
index 35af23f5fa0d..eca73330ffaf 100644
--- a/include/drm/drm_drv.h
+++ b/include/drm/drm_drv.h
@@ -666,7 +666,7 @@ static inline bool drm_dev_is_unplugged(struct drm_device *dev)
  *
  * Returns true if the @feature is supported, false otherwise.
  */
-static inline bool drm_core_check_feature(struct drm_device *dev, u32 feature)
+static inline bool drm_core_check_feature(const struct drm_device *dev, u32 feature)
 {
 	return dev->driver->driver_features & dev->driver_features & feature;
 }
-- 
cgit v1.2.3


From b5bb37eddb63b16b7ab959598d108b1c444be77d Mon Sep 17 00:00:00 2001
From: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Date: Wed, 30 Jan 2019 02:53:22 +0100
Subject: drm/amdgpu: Add command to override the context priority.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Given a master fd we can then override the priority of the context
in another fd.

Using these overrides was recommended by Christian instead of trying
to submit from a master fd, and I am adding a way to override a
single context instead of the entire process so we can only upgrade
a single Vulkan queue and not effectively the entire process.

Reused the flags field as it was checked to be 0 anyways, so nothing
used it. This is source-incompatible (due to the name change), but
ABI compatible.

Signed-off-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Reviewed-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Alex Deucher <alexander.deucher@amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c | 41 ++++++++++++++++++++++++++++++-
 include/uapi/drm/amdgpu_drm.h             |  3 ++-
 2 files changed, 42 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
index 0b70410488b6..0767a93e4d91 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
@@ -76,6 +76,39 @@ static int amdgpu_sched_process_priority_override(struct amdgpu_device *adev,
 	return 0;
 }
 
+static int amdgpu_sched_context_priority_override(struct amdgpu_device *adev,
+						  int fd,
+						  unsigned ctx_id,
+						  enum drm_sched_priority priority)
+{
+	struct file *filp = fget(fd);
+	struct amdgpu_fpriv *fpriv;
+	struct amdgpu_ctx *ctx;
+	int r;
+
+	if (!filp)
+		return -EINVAL;
+
+	r = amdgpu_file_to_fpriv(filp, &fpriv);
+	if (r) {
+		fput(filp);
+		return r;
+	}
+
+	ctx = amdgpu_ctx_get(fpriv, ctx_id);
+
+	if (!ctx) {
+		fput(filp);
+		return -EINVAL;
+	}
+
+	amdgpu_ctx_priority_override(ctx, priority);
+	amdgpu_ctx_put(ctx);
+	fput(filp);
+
+	return 0;
+}
+
 int amdgpu_sched_ioctl(struct drm_device *dev, void *data,
 		       struct drm_file *filp)
 {
@@ -85,7 +118,7 @@ int amdgpu_sched_ioctl(struct drm_device *dev, void *data,
 	int r;
 
 	priority = amdgpu_to_sched_priority(args->in.priority);
-	if (args->in.flags || priority == DRM_SCHED_PRIORITY_INVALID)
+	if (priority == DRM_SCHED_PRIORITY_INVALID)
 		return -EINVAL;
 
 	switch (args->in.op) {
@@ -94,6 +127,12 @@ int amdgpu_sched_ioctl(struct drm_device *dev, void *data,
 							   args->in.fd,
 							   priority);
 		break;
+	case AMDGPU_SCHED_OP_CONTEXT_PRIORITY_OVERRIDE:
+		r = amdgpu_sched_context_priority_override(adev,
+							   args->in.fd,
+							   args->in.ctx_id,
+							   priority);
+		break;
 	default:
 		DRM_ERROR("Invalid sched op specified: %d\n", args->in.op);
 		r = -EINVAL;
diff --git a/include/uapi/drm/amdgpu_drm.h b/include/uapi/drm/amdgpu_drm.h
index 2acbc8bc465b..4a53f6cfa034 100644
--- a/include/uapi/drm/amdgpu_drm.h
+++ b/include/uapi/drm/amdgpu_drm.h
@@ -272,13 +272,14 @@ union drm_amdgpu_vm {
 
 /* sched ioctl */
 #define AMDGPU_SCHED_OP_PROCESS_PRIORITY_OVERRIDE	1
+#define AMDGPU_SCHED_OP_CONTEXT_PRIORITY_OVERRIDE	2
 
 struct drm_amdgpu_sched_in {
 	/* AMDGPU_SCHED_OP_* */
 	__u32	op;
 	__u32	fd;
 	__s32	priority;
-	__u32	flags;
+	__u32   ctx_id;
 };
 
 union drm_amdgpu_sched {
-- 
cgit v1.2.3


From 822ad64d7e46a8e2c8b8a796738d7b657cbb146d Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Thu, 14 Feb 2019 16:20:25 +0000
Subject: keys: Fix dependency loop between construction record and auth key

In the request_key() upcall mechanism there's a dependency loop by which if
a key type driver overrides the ->request_key hook and the userspace side
manages to lose the authorisation key, the auth key and the internal
construction record (struct key_construction) can keep each other pinned.

Fix this by the following changes:

 (1) Killing off the construction record and using the auth key instead.

 (2) Including the operation name in the auth key payload and making the
     payload available outside of security/keys/.

 (3) The ->request_key hook is given the authkey instead of the cons
     record and operation name.

Changes (2) and (3) allow the auth key to naturally be cleaned up if the
keyring it is in is destroyed or cleared or the auth key is unlinked.

Fixes: 7ee02a316600 ("keys: Fix dependency loop between construction record and auth key")
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: James Morris <james.morris@microsoft.com>
---
 fs/nfs/nfs4idmap.c                   | 31 +++++++++-------
 include/keys/request_key_auth-type.h | 36 ++++++++++++++++++
 include/linux/key-type.h             | 22 +++--------
 security/keys/internal.h             | 13 +------
 security/keys/keyctl.c               |  1 +
 security/keys/process_keys.c         |  1 +
 security/keys/request_key.c          | 72 +++++++++++++++---------------------
 security/keys/request_key_auth.c     | 16 ++++----
 8 files changed, 100 insertions(+), 92 deletions(-)
 create mode 100644 include/keys/request_key_auth-type.h

(limited to 'include')

diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c
index 3f23b6840547..bf34ddaa2ad7 100644
--- a/fs/nfs/nfs4idmap.c
+++ b/fs/nfs/nfs4idmap.c
@@ -44,6 +44,7 @@
 #include <linux/keyctl.h>
 #include <linux/key-type.h>
 #include <keys/user-type.h>
+#include <keys/request_key_auth-type.h>
 #include <linux/module.h>
 
 #include "internal.h"
@@ -59,7 +60,7 @@ static struct key_type key_type_id_resolver_legacy;
 struct idmap_legacy_upcalldata {
 	struct rpc_pipe_msg pipe_msg;
 	struct idmap_msg idmap_msg;
-	struct key_construction	*key_cons;
+	struct key	*authkey;
 	struct idmap *idmap;
 };
 
@@ -384,7 +385,7 @@ static const match_table_t nfs_idmap_tokens = {
 	{ Opt_find_err, NULL }
 };
 
-static int nfs_idmap_legacy_upcall(struct key_construction *, const char *, void *);
+static int nfs_idmap_legacy_upcall(struct key *, void *);
 static ssize_t idmap_pipe_downcall(struct file *, const char __user *,
 				   size_t);
 static void idmap_release_pipe(struct inode *);
@@ -549,11 +550,12 @@ nfs_idmap_prepare_pipe_upcall(struct idmap *idmap,
 static void
 nfs_idmap_complete_pipe_upcall_locked(struct idmap *idmap, int ret)
 {
-	struct key_construction *cons = idmap->idmap_upcall_data->key_cons;
+	struct key *authkey = idmap->idmap_upcall_data->authkey;
 
 	kfree(idmap->idmap_upcall_data);
 	idmap->idmap_upcall_data = NULL;
-	complete_request_key(cons, ret);
+	complete_request_key(authkey, ret);
+	key_put(authkey);
 }
 
 static void
@@ -563,15 +565,14 @@ nfs_idmap_abort_pipe_upcall(struct idmap *idmap, int ret)
 		nfs_idmap_complete_pipe_upcall_locked(idmap, ret);
 }
 
-static int nfs_idmap_legacy_upcall(struct key_construction *cons,
-				   const char *op,
-				   void *aux)
+static int nfs_idmap_legacy_upcall(struct key *authkey, void *aux)
 {
 	struct idmap_legacy_upcalldata *data;
+	struct request_key_auth *rka = get_request_key_auth(authkey);
 	struct rpc_pipe_msg *msg;
 	struct idmap_msg *im;
 	struct idmap *idmap = (struct idmap *)aux;
-	struct key *key = cons->key;
+	struct key *key = rka->target_key;
 	int ret = -ENOKEY;
 
 	if (!aux)
@@ -586,7 +587,7 @@ static int nfs_idmap_legacy_upcall(struct key_construction *cons,
 	msg = &data->pipe_msg;
 	im = &data->idmap_msg;
 	data->idmap = idmap;
-	data->key_cons = cons;
+	data->authkey = key_get(authkey);
 
 	ret = nfs_idmap_prepare_message(key->description, idmap, im, msg);
 	if (ret < 0)
@@ -604,7 +605,7 @@ static int nfs_idmap_legacy_upcall(struct key_construction *cons,
 out2:
 	kfree(data);
 out1:
-	complete_request_key(cons, ret);
+	complete_request_key(authkey, ret);
 	return ret;
 }
 
@@ -651,9 +652,10 @@ out:
 static ssize_t
 idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
 {
+	struct request_key_auth *rka;
 	struct rpc_inode *rpci = RPC_I(file_inode(filp));
 	struct idmap *idmap = (struct idmap *)rpci->private;
-	struct key_construction *cons;
+	struct key *authkey;
 	struct idmap_msg im;
 	size_t namelen_in;
 	int ret = -ENOKEY;
@@ -665,7 +667,8 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
 	if (idmap->idmap_upcall_data == NULL)
 		goto out_noupcall;
 
-	cons = idmap->idmap_upcall_data->key_cons;
+	authkey = idmap->idmap_upcall_data->authkey;
+	rka = get_request_key_auth(authkey);
 
 	if (mlen != sizeof(im)) {
 		ret = -ENOSPC;
@@ -690,9 +693,9 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
 
 	ret = nfs_idmap_read_and_verify_message(&im,
 			&idmap->idmap_upcall_data->idmap_msg,
-			cons->key, cons->authkey);
+			rka->target_key, authkey);
 	if (ret >= 0) {
-		key_set_timeout(cons->key, nfs_idmap_cache_timeout);
+		key_set_timeout(rka->target_key, nfs_idmap_cache_timeout);
 		ret = mlen;
 	}
 
diff --git a/include/keys/request_key_auth-type.h b/include/keys/request_key_auth-type.h
new file mode 100644
index 000000000000..a726dd3f1dc6
--- /dev/null
+++ b/include/keys/request_key_auth-type.h
@@ -0,0 +1,36 @@
+/* request_key authorisation token key type
+ *
+ * Copyright (C) 2005 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public Licence
+ * as published by the Free Software Foundation; either version
+ * 2 of the Licence, or (at your option) any later version.
+ */
+
+#ifndef _KEYS_REQUEST_KEY_AUTH_TYPE_H
+#define _KEYS_REQUEST_KEY_AUTH_TYPE_H
+
+#include <linux/key.h>
+
+/*
+ * Authorisation record for request_key().
+ */
+struct request_key_auth {
+	struct key		*target_key;
+	struct key		*dest_keyring;
+	const struct cred	*cred;
+	void			*callout_info;
+	size_t			callout_len;
+	pid_t			pid;
+	char			op[8];
+} __randomize_layout;
+
+static inline struct request_key_auth *get_request_key_auth(const struct key *key)
+{
+	return key->payload.data[0];
+}
+
+
+#endif /* _KEYS_REQUEST_KEY_AUTH_TYPE_H */
diff --git a/include/linux/key-type.h b/include/linux/key-type.h
index bc9af551fc83..e49d1de0614e 100644
--- a/include/linux/key-type.h
+++ b/include/linux/key-type.h
@@ -20,15 +20,6 @@
 struct kernel_pkey_query;
 struct kernel_pkey_params;
 
-/*
- * key under-construction record
- * - passed to the request_key actor if supplied
- */
-struct key_construction {
-	struct key	*key;	/* key being constructed */
-	struct key	*authkey;/* authorisation for key being constructed */
-};
-
 /*
  * Pre-parsed payload, used by key add, update and instantiate.
  *
@@ -50,8 +41,7 @@ struct key_preparsed_payload {
 	time64_t	expiry;		/* Expiry time of key */
 } __randomize_layout;
 
-typedef int (*request_key_actor_t)(struct key_construction *key,
-				   const char *op, void *aux);
+typedef int (*request_key_actor_t)(struct key *auth_key, void *aux);
 
 /*
  * Preparsed matching criterion.
@@ -181,20 +171,20 @@ extern int key_instantiate_and_link(struct key *key,
 				    const void *data,
 				    size_t datalen,
 				    struct key *keyring,
-				    struct key *instkey);
+				    struct key *authkey);
 extern int key_reject_and_link(struct key *key,
 			       unsigned timeout,
 			       unsigned error,
 			       struct key *keyring,
-			       struct key *instkey);
-extern void complete_request_key(struct key_construction *cons, int error);
+			       struct key *authkey);
+extern void complete_request_key(struct key *authkey, int error);
 
 static inline int key_negate_and_link(struct key *key,
 				      unsigned timeout,
 				      struct key *keyring,
-				      struct key *instkey)
+				      struct key *authkey)
 {
-	return key_reject_and_link(key, timeout, ENOKEY, keyring, instkey);
+	return key_reject_and_link(key, timeout, ENOKEY, keyring, authkey);
 }
 
 extern int generic_key_instantiate(struct key *key, struct key_preparsed_payload *prep);
diff --git a/security/keys/internal.h b/security/keys/internal.h
index 479909b858c7..8f533c81aa8d 100644
--- a/security/keys/internal.h
+++ b/security/keys/internal.h
@@ -186,20 +186,9 @@ static inline int key_permission(const key_ref_t key_ref, unsigned perm)
 	return key_task_permission(key_ref, current_cred(), perm);
 }
 
-/*
- * Authorisation record for request_key().
- */
-struct request_key_auth {
-	struct key		*target_key;
-	struct key		*dest_keyring;
-	const struct cred	*cred;
-	void			*callout_info;
-	size_t			callout_len;
-	pid_t			pid;
-} __randomize_layout;
-
 extern struct key_type key_type_request_key_auth;
 extern struct key *request_key_auth_new(struct key *target,
+					const char *op,
 					const void *callout_info,
 					size_t callout_len,
 					struct key *dest_keyring);
diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c
index e8093d025966..7bbe03593e58 100644
--- a/security/keys/keyctl.c
+++ b/security/keys/keyctl.c
@@ -25,6 +25,7 @@
 #include <linux/security.h>
 #include <linux/uio.h>
 #include <linux/uaccess.h>
+#include <keys/request_key_auth-type.h>
 #include "internal.h"
 
 #define KEY_MAX_DESC_SIZE 4096
diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c
index 02c77e928f68..0e0b9ccad2f8 100644
--- a/security/keys/process_keys.c
+++ b/security/keys/process_keys.c
@@ -19,6 +19,7 @@
 #include <linux/security.h>
 #include <linux/user_namespace.h>
 #include <linux/uaccess.h>
+#include <keys/request_key_auth-type.h>
 #include "internal.h"
 
 /* Session keyring create vs join semaphore */
diff --git a/security/keys/request_key.c b/security/keys/request_key.c
index 301f0e300dbd..3f56a312dd35 100644
--- a/security/keys/request_key.c
+++ b/security/keys/request_key.c
@@ -18,31 +18,30 @@
 #include <linux/keyctl.h>
 #include <linux/slab.h>
 #include "internal.h"
+#include <keys/request_key_auth-type.h>
 
 #define key_negative_timeout	60	/* default timeout on a negative key's existence */
 
 /**
  * complete_request_key - Complete the construction of a key.
- * @cons: The key construction record.
+ * @auth_key: The authorisation key.
  * @error: The success or failute of the construction.
  *
  * Complete the attempt to construct a key.  The key will be negated
  * if an error is indicated.  The authorisation key will be revoked
  * unconditionally.
  */
-void complete_request_key(struct key_construction *cons, int error)
+void complete_request_key(struct key *authkey, int error)
 {
-	kenter("{%d,%d},%d", cons->key->serial, cons->authkey->serial, error);
+	struct request_key_auth *rka = get_request_key_auth(authkey);
+	struct key *key = rka->target_key;
+
+	kenter("%d{%d},%d", authkey->serial, key->serial, error);
 
 	if (error < 0)
-		key_negate_and_link(cons->key, key_negative_timeout, NULL,
-				    cons->authkey);
+		key_negate_and_link(key, key_negative_timeout, NULL, authkey);
 	else
-		key_revoke(cons->authkey);
-
-	key_put(cons->key);
-	key_put(cons->authkey);
-	kfree(cons);
+		key_revoke(authkey);
 }
 EXPORT_SYMBOL(complete_request_key);
 
@@ -91,21 +90,19 @@ static int call_usermodehelper_keys(const char *path, char **argv, char **envp,
  * Request userspace finish the construction of a key
  * - execute "/sbin/request-key <op> <key> <uid> <gid> <keyring> <keyring> <keyring>"
  */
-static int call_sbin_request_key(struct key_construction *cons,
-				 const char *op,
-				 void *aux)
+static int call_sbin_request_key(struct key *authkey, void *aux)
 {
 	static char const request_key[] = "/sbin/request-key";
+	struct request_key_auth *rka = get_request_key_auth(authkey);
 	const struct cred *cred = current_cred();
 	key_serial_t prkey, sskey;
-	struct key *key = cons->key, *authkey = cons->authkey, *keyring,
-		*session;
+	struct key *key = rka->target_key, *keyring, *session;
 	char *argv[9], *envp[3], uid_str[12], gid_str[12];
 	char key_str[12], keyring_str[3][12];
 	char desc[20];
 	int ret, i;
 
-	kenter("{%d},{%d},%s", key->serial, authkey->serial, op);
+	kenter("{%d},{%d},%s", key->serial, authkey->serial, rka->op);
 
 	ret = install_user_keyrings();
 	if (ret < 0)
@@ -163,7 +160,7 @@ static int call_sbin_request_key(struct key_construction *cons,
 	/* set up the argument list */
 	i = 0;
 	argv[i++] = (char *)request_key;
-	argv[i++] = (char *) op;
+	argv[i++] = (char *)rka->op;
 	argv[i++] = key_str;
 	argv[i++] = uid_str;
 	argv[i++] = gid_str;
@@ -191,7 +188,7 @@ error_link:
 	key_put(keyring);
 
 error_alloc:
-	complete_request_key(cons, ret);
+	complete_request_key(authkey, ret);
 	kleave(" = %d", ret);
 	return ret;
 }
@@ -205,42 +202,31 @@ static int construct_key(struct key *key, const void *callout_info,
 			 size_t callout_len, void *aux,
 			 struct key *dest_keyring)
 {
-	struct key_construction *cons;
 	request_key_actor_t actor;
 	struct key *authkey;
 	int ret;
 
 	kenter("%d,%p,%zu,%p", key->serial, callout_info, callout_len, aux);
 
-	cons = kmalloc(sizeof(*cons), GFP_KERNEL);
-	if (!cons)
-		return -ENOMEM;
-
 	/* allocate an authorisation key */
-	authkey = request_key_auth_new(key, callout_info, callout_len,
+	authkey = request_key_auth_new(key, "create", callout_info, callout_len,
 				       dest_keyring);
-	if (IS_ERR(authkey)) {
-		kfree(cons);
-		ret = PTR_ERR(authkey);
-		authkey = NULL;
-	} else {
-		cons->authkey = key_get(authkey);
-		cons->key = key_get(key);
+	if (IS_ERR(authkey))
+		return PTR_ERR(authkey);
 
-		/* make the call */
-		actor = call_sbin_request_key;
-		if (key->type->request_key)
-			actor = key->type->request_key;
+	/* Make the call */
+	actor = call_sbin_request_key;
+	if (key->type->request_key)
+		actor = key->type->request_key;
 
-		ret = actor(cons, "create", aux);
+	ret = actor(authkey, aux);
 
-		/* check that the actor called complete_request_key() prior to
-		 * returning an error */
-		WARN_ON(ret < 0 &&
-			!test_bit(KEY_FLAG_REVOKED, &authkey->flags));
-		key_put(authkey);
-	}
+	/* check that the actor called complete_request_key() prior to
+	 * returning an error */
+	WARN_ON(ret < 0 &&
+		!test_bit(KEY_FLAG_REVOKED, &authkey->flags));
 
+	key_put(authkey);
 	kleave(" = %d", ret);
 	return ret;
 }
@@ -275,7 +261,7 @@ static int construct_get_dest_keyring(struct key **_dest_keyring)
 			if (cred->request_key_auth) {
 				authkey = cred->request_key_auth;
 				down_read(&authkey->sem);
-				rka = authkey->payload.data[0];
+				rka = get_request_key_auth(authkey);
 				if (!test_bit(KEY_FLAG_REVOKED,
 					      &authkey->flags))
 					dest_keyring =
diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c
index 87ea2f54dedc..afc304e8b61e 100644
--- a/security/keys/request_key_auth.c
+++ b/security/keys/request_key_auth.c
@@ -17,7 +17,7 @@
 #include <linux/slab.h>
 #include <linux/uaccess.h>
 #include "internal.h"
-#include <keys/user-type.h>
+#include <keys/request_key_auth-type.h>
 
 static int request_key_auth_preparse(struct key_preparsed_payload *);
 static void request_key_auth_free_preparse(struct key_preparsed_payload *);
@@ -68,7 +68,7 @@ static int request_key_auth_instantiate(struct key *key,
 static void request_key_auth_describe(const struct key *key,
 				      struct seq_file *m)
 {
-	struct request_key_auth *rka = key->payload.data[0];
+	struct request_key_auth *rka = get_request_key_auth(key);
 
 	seq_puts(m, "key:");
 	seq_puts(m, key->description);
@@ -83,7 +83,7 @@ static void request_key_auth_describe(const struct key *key,
 static long request_key_auth_read(const struct key *key,
 				  char __user *buffer, size_t buflen)
 {
-	struct request_key_auth *rka = key->payload.data[0];
+	struct request_key_auth *rka = get_request_key_auth(key);
 	size_t datalen;
 	long ret;
 
@@ -109,7 +109,7 @@ static long request_key_auth_read(const struct key *key,
  */
 static void request_key_auth_revoke(struct key *key)
 {
-	struct request_key_auth *rka = key->payload.data[0];
+	struct request_key_auth *rka = get_request_key_auth(key);
 
 	kenter("{%d}", key->serial);
 
@@ -136,7 +136,7 @@ static void free_request_key_auth(struct request_key_auth *rka)
  */
 static void request_key_auth_destroy(struct key *key)
 {
-	struct request_key_auth *rka = key->payload.data[0];
+	struct request_key_auth *rka = get_request_key_auth(key);
 
 	kenter("{%d}", key->serial);
 
@@ -147,8 +147,9 @@ static void request_key_auth_destroy(struct key *key)
  * Create an authorisation token for /sbin/request-key or whoever to gain
  * access to the caller's security data.
  */
-struct key *request_key_auth_new(struct key *target, const void *callout_info,
-				 size_t callout_len, struct key *dest_keyring)
+struct key *request_key_auth_new(struct key *target, const char *op,
+				 const void *callout_info, size_t callout_len,
+				 struct key *dest_keyring)
 {
 	struct request_key_auth *rka, *irka;
 	const struct cred *cred = current->cred;
@@ -166,6 +167,7 @@ struct key *request_key_auth_new(struct key *target, const void *callout_info,
 	if (!rka->callout_info)
 		goto error_free_rka;
 	rka->callout_len = callout_len;
+	strlcpy(rka->op, op, sizeof(rka->op));
 
 	/* see if the calling process is already servicing the key request of
 	 * another process */
-- 
cgit v1.2.3


From 3b89ea9c5902acccdbbdec307c85edd1bf52515e Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens <hauke.mehrtens@intel.com>
Date: Fri, 15 Feb 2019 17:58:54 +0100
Subject: net: Fix for_each_netdev_feature on Big endian

The features attribute is of type u64 and stored in the native endianes on
the system. The for_each_set_bit() macro takes a pointer to a 32 bit array
and goes over the bits in this area. On little Endian systems this also
works with an u64 as the most significant bit is on the highest address,
but on big endian the words are swapped. When we expect bit 15 here we get
bit 47 (15 + 32).

This patch converts it more or less to its own for_each_set_bit()
implementation which works on 64 bit integers directly. This is then
completely in host endianness and should work like expected.

Fixes: fd867d51f ("net/core: generic support for disabling netdev features down stack")
Signed-off-by: Hauke Mehrtens <hauke.mehrtens@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdev_features.h | 23 +++++++++++++++++++++--
 net/core/dev.c                  |  4 ++--
 2 files changed, 23 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h
index 2b2a6dce1630..fce28562bed2 100644
--- a/include/linux/netdev_features.h
+++ b/include/linux/netdev_features.h
@@ -11,6 +11,7 @@
 #define _LINUX_NETDEV_FEATURES_H
 
 #include <linux/types.h>
+#include <asm/byteorder.h>
 
 typedef u64 netdev_features_t;
 
@@ -154,8 +155,26 @@ enum {
 #define NETIF_F_HW_TLS_TX	__NETIF_F(HW_TLS_TX)
 #define NETIF_F_HW_TLS_RX	__NETIF_F(HW_TLS_RX)
 
-#define for_each_netdev_feature(mask_addr, bit)	\
-	for_each_set_bit(bit, (unsigned long *)mask_addr, NETDEV_FEATURE_COUNT)
+/* Finds the next feature with the highest number of the range of start till 0.
+ */
+static inline int find_next_netdev_feature(u64 feature, unsigned long start)
+{
+	/* like BITMAP_LAST_WORD_MASK() for u64
+	 * this sets the most significant 64 - start to 0.
+	 */
+	feature &= ~0ULL >> (-start & ((sizeof(feature) * 8) - 1));
+
+	return fls64(feature) - 1;
+}
+
+/* This goes for the MSB to the LSB through the set feature bits,
+ * mask_addr should be a u64 and bit an int
+ */
+#define for_each_netdev_feature(mask_addr, bit)				\
+	for ((bit) = find_next_netdev_feature((mask_addr),		\
+					      NETDEV_FEATURE_COUNT);	\
+	     (bit) >= 0;						\
+	     (bit) = find_next_netdev_feature((mask_addr), (bit) - 1))
 
 /* Features valid for ethtool to change */
 /* = all defined minus driver/device-class-related */
diff --git a/net/core/dev.c b/net/core/dev.c
index 8e276e0192a1..5d03889502eb 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -8152,7 +8152,7 @@ static netdev_features_t netdev_sync_upper_features(struct net_device *lower,
 	netdev_features_t feature;
 	int feature_bit;
 
-	for_each_netdev_feature(&upper_disables, feature_bit) {
+	for_each_netdev_feature(upper_disables, feature_bit) {
 		feature = __NETIF_F_BIT(feature_bit);
 		if (!(upper->wanted_features & feature)
 		    && (features & feature)) {
@@ -8172,7 +8172,7 @@ static void netdev_sync_lower_features(struct net_device *upper,
 	netdev_features_t feature;
 	int feature_bit;
 
-	for_each_netdev_feature(&upper_disables, feature_bit) {
+	for_each_netdev_feature(upper_disables, feature_bit) {
 		feature = __NETIF_F_BIT(feature_bit);
 		if (!(features & feature) && (lower->features & feature)) {
 			netdev_dbg(upper, "Disabling feature %pNF on lower dev %s.\n",
-- 
cgit v1.2.3


From d5be7f632bad0f489879eed0ff4b99bd7fe0b74c Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Fri, 15 Feb 2019 12:15:47 -0500
Subject: net: validate untrusted gso packets without csum offload

Syzkaller again found a path to a kernel crash through bad gso input.
By building an excessively large packet to cause an skb field to wrap.

If VIRTIO_NET_HDR_F_NEEDS_CSUM was set this would have been dropped in
skb_partial_csum_set.

GSO packets that do not set checksum offload are suspicious and rare.
Most callers of virtio_net_hdr_to_skb already pass them to
skb_probe_transport_header.

Move that test forward, change it to detect parse failure and drop
packets on failure as those cleary are not one of the legitimate
VIRTIO_NET_HDR_GSO types.

Fixes: bfd5f4a3d605 ("packet: Add GSO/csum offload support.")
Fixes: f43798c27684 ("tun: Allow GSO using virtio_net_hdr")
Reported-by: syzbot <syzkaller@googlegroups.com>
Signed-off-by: Willem de Bruijn <willemb@google.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h     | 2 +-
 include/linux/virtio_net.h | 9 +++++++++
 2 files changed, 10 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 95d25b010a25..4c1c82a5678c 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2434,7 +2434,7 @@ static inline void skb_probe_transport_header(struct sk_buff *skb,
 
 	if (skb_flow_dissect_flow_keys_basic(skb, &keys, NULL, 0, 0, 0, 0))
 		skb_set_transport_header(skb, keys.control.thoff);
-	else
+	else if (offset_hint >= 0)
 		skb_set_transport_header(skb, offset_hint);
 }
 
diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index cb462f9ab7dd..71f2394abbf7 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -57,6 +57,15 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
 
 		if (!skb_partial_csum_set(skb, start, off))
 			return -EINVAL;
+	} else {
+		/* gso packets without NEEDS_CSUM do not set transport_offset.
+		 * probe and drop if does not match one of the above types.
+		 */
+		if (gso_type) {
+			skb_probe_transport_header(skb, -1);
+			if (!skb_transport_header_was_set(skb))
+				return -EINVAL;
+		}
 	}
 
 	if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) {
-- 
cgit v1.2.3


From 8681ef1f3d295bd3600315325f3b3396d76d02f6 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sat, 16 Feb 2019 13:44:39 -0800
Subject: net: Add header for usage of fls64()

Fixes: 3b89ea9c5902 ("net: Fix for_each_netdev_feature on Big endian")
Suggested-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdev_features.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h
index fce28562bed2..4c76fe2c8488 100644
--- a/include/linux/netdev_features.h
+++ b/include/linux/netdev_features.h
@@ -11,6 +11,7 @@
 #define _LINUX_NETDEV_FEATURES_H
 
 #include <linux/types.h>
+#include <linux/bitops.h>
 #include <asm/byteorder.h>
 
 typedef u64 netdev_features_t;
-- 
cgit v1.2.3


From eeaf06ac1a5584e41cf289f8351e446bb131374b Mon Sep 17 00:00:00 2001
From: Ben Skeggs <bskeggs@redhat.com>
Date: Thu, 5 Jul 2018 12:57:12 +1000
Subject: drm/nouveau/svm: initial support for shared virtual memory

This uses HMM to mirror a process' CPU page tables into a channel's page
tables, and keep them synchronised so that both the CPU and GPU are able
to access the same memory at the same virtual address.

While this code also supports Volta/Turing, it's only enabled for Pascal
GPUs currently due to channel recovery being unreliable right now on the
later GPUs.

Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
---
 drivers/gpu/drm/nouveau/Kbuild         |   1 +
 drivers/gpu/drm/nouveau/Kconfig        |  11 +
 drivers/gpu/drm/nouveau/nouveau_chan.c |   9 +
 drivers/gpu/drm/nouveau/nouveau_drm.c  |   7 +-
 drivers/gpu/drm/nouveau/nouveau_drv.h  |   2 +
 drivers/gpu/drm/nouveau/nouveau_svm.c  | 737 +++++++++++++++++++++++++++++++++
 drivers/gpu/drm/nouveau/nouveau_svm.h  |  41 ++
 drivers/gpu/drm/nouveau/nouveau_vmm.c  |   2 +
 drivers/gpu/drm/nouveau/nouveau_vmm.h  |   1 +
 include/uapi/drm/nouveau_drm.h         |   8 +
 10 files changed, 818 insertions(+), 1 deletion(-)
 create mode 100644 drivers/gpu/drm/nouveau/nouveau_svm.c
 create mode 100644 drivers/gpu/drm/nouveau/nouveau_svm.h

(limited to 'include')

diff --git a/drivers/gpu/drm/nouveau/Kbuild b/drivers/gpu/drm/nouveau/Kbuild
index b17843dd050d..71bd48aafe64 100644
--- a/drivers/gpu/drm/nouveau/Kbuild
+++ b/drivers/gpu/drm/nouveau/Kbuild
@@ -30,6 +30,7 @@ nouveau-y += nouveau_vga.o
 # DRM - memory management
 nouveau-y += nouveau_bo.o
 nouveau-y += nouveau_gem.o
+nouveau-$(CONFIG_DRM_NOUVEAU_SVM) += nouveau_svm.o
 nouveau-y += nouveau_mem.o
 nouveau-y += nouveau_prime.o
 nouveau-y += nouveau_sgdma.o
diff --git a/drivers/gpu/drm/nouveau/Kconfig b/drivers/gpu/drm/nouveau/Kconfig
index 432c440223bb..e7b26a6f386f 100644
--- a/drivers/gpu/drm/nouveau/Kconfig
+++ b/drivers/gpu/drm/nouveau/Kconfig
@@ -71,3 +71,14 @@ config DRM_NOUVEAU_BACKLIGHT
 	help
 	  Say Y here if you want to control the backlight of your display
 	  (e.g. a laptop panel).
+
+config DRM_NOUVEAU_SVM
+	bool "(EXPERIMENTAL) Enable SVM (Shared Virtual Memory) support"
+	depends on ARCH_HAS_HMM
+	depends on DRM_NOUVEAU
+	depends on STAGING
+	select HMM_MIRROR
+	default n
+	help
+	  Say Y here if you want to enable experimental support for
+	  Shared Virtual Memory (SVM).
diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.c b/drivers/gpu/drm/nouveau/nouveau_chan.c
index 5160a0a9c77d..282fd90b65e1 100644
--- a/drivers/gpu/drm/nouveau/nouveau_chan.c
+++ b/drivers/gpu/drm/nouveau/nouveau_chan.c
@@ -42,6 +42,7 @@
 #include "nouveau_fence.h"
 #include "nouveau_abi16.h"
 #include "nouveau_vmm.h"
+#include "nouveau_svm.h"
 
 MODULE_PARM_DESC(vram_pushbuf, "Create DMA push buffers in VRAM");
 int nouveau_vram_pushbuf;
@@ -95,6 +96,10 @@ nouveau_channel_del(struct nouveau_channel **pchan)
 
 		if (chan->fence)
 			nouveau_fence(chan->drm)->context_del(chan);
+
+		if (cli)
+			nouveau_svmm_part(chan->vmm->svmm, chan->inst);
+
 		nvif_object_fini(&chan->nvsw);
 		nvif_object_fini(&chan->gart);
 		nvif_object_fini(&chan->vram);
@@ -494,6 +499,10 @@ nouveau_channel_new(struct nouveau_drm *drm, struct nvif_device *device,
 		nouveau_channel_del(pchan);
 	}
 
+	ret = nouveau_svmm_join((*pchan)->vmm->svmm, (*pchan)->inst);
+	if (ret)
+		nouveau_channel_del(pchan);
+
 done:
 	cli->base.super = super;
 	return ret;
diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c
index 72755d4c3983..53fdfa283ee2 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
@@ -62,6 +62,7 @@
 #include "nouveau_usif.h"
 #include "nouveau_connector.h"
 #include "nouveau_platform.h"
+#include "nouveau_svm.h"
 
 MODULE_PARM_DESC(config, "option string to pass to driver core");
 static char *nouveau_config;
@@ -549,6 +550,7 @@ nouveau_drm_device_init(struct drm_device *dev)
 
 	nouveau_debugfs_init(drm);
 	nouveau_hwmon_init(dev);
+	nouveau_svm_init(drm);
 	nouveau_fbcon_init(dev);
 	nouveau_led_init(dev);
 
@@ -592,6 +594,7 @@ nouveau_drm_device_fini(struct drm_device *dev)
 
 	nouveau_led_fini(dev);
 	nouveau_fbcon_fini(dev);
+	nouveau_svm_fini(drm);
 	nouveau_hwmon_fini(dev);
 	nouveau_debugfs_fini(drm);
 
@@ -737,6 +740,7 @@ nouveau_do_suspend(struct drm_device *dev, bool runtime)
 	struct nouveau_drm *drm = nouveau_drm(dev);
 	int ret;
 
+	nouveau_svm_suspend(drm);
 	nouveau_led_suspend(dev);
 
 	if (dev->mode_config.num_crtc) {
@@ -813,7 +817,7 @@ nouveau_do_resume(struct drm_device *dev, bool runtime)
 	}
 
 	nouveau_led_resume(dev);
-
+	nouveau_svm_resume(drm);
 	return 0;
 }
 
@@ -1033,6 +1037,7 @@ nouveau_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(NOUVEAU_GROBJ_ALLOC, nouveau_abi16_ioctl_grobj_alloc, DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(NOUVEAU_NOTIFIEROBJ_ALLOC, nouveau_abi16_ioctl_notifierobj_alloc, DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(NOUVEAU_GPUOBJ_FREE, nouveau_abi16_ioctl_gpuobj_free, DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(NOUVEAU_SVM_INIT, nouveau_svmm_init, DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_NEW, nouveau_gem_ioctl_new, DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_PUSHBUF, nouveau_gem_ioctl_pushbuf, DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_CPU_PREP, nouveau_gem_ioctl_cpu_prep, DRM_AUTH|DRM_RENDER_ALLOW),
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
index 8fcff0a4800e..1326b42f75e6 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -210,6 +210,8 @@ struct nouveau_drm {
 	bool have_disp_power_ref;
 
 	struct dev_pm_domain vga_pm_domain;
+
+	struct nouveau_svm *svm;
 };
 
 static inline struct nouveau_drm *
diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c
new file mode 100644
index 000000000000..6158e99b1dc3
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nouveau_svm.c
@@ -0,0 +1,737 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "nouveau_svm.h"
+#include "nouveau_drv.h"
+#include "nouveau_chan.h"
+
+#include <nvif/notify.h>
+#include <nvif/object.h>
+#include <nvif/vmm.h>
+
+#include <nvif/class.h>
+#include <nvif/clb069.h>
+#include <nvif/ifc00d.h>
+
+#include <linux/sched/mm.h>
+#include <linux/sort.h>
+#include <linux/hmm.h>
+
+struct nouveau_svm {
+	struct nouveau_drm *drm;
+	struct mutex mutex;
+	struct list_head inst;
+
+	struct nouveau_svm_fault_buffer {
+		int id;
+		struct nvif_object object;
+		u32 entries;
+		u32 getaddr;
+		u32 putaddr;
+		u32 get;
+		u32 put;
+		struct nvif_notify notify;
+
+		struct nouveau_svm_fault {
+			u64 inst;
+			u64 addr;
+			u64 time;
+			u32 engine;
+			u8  gpc;
+			u8  hub;
+			u8  access;
+			u8  client;
+			u8  fault;
+			struct nouveau_svmm *svmm;
+		} **fault;
+		int fault_nr;
+	} buffer[1];
+};
+
+#define SVM_DBG(s,f,a...) NV_DEBUG((s)->drm, "svm: "f"\n", ##a)
+#define SVM_ERR(s,f,a...) NV_WARN((s)->drm, "svm: "f"\n", ##a)
+
+struct nouveau_ivmm {
+	struct nouveau_svmm *svmm;
+	u64 inst;
+	struct list_head head;
+};
+
+static struct nouveau_ivmm *
+nouveau_ivmm_find(struct nouveau_svm *svm, u64 inst)
+{
+	struct nouveau_ivmm *ivmm;
+	list_for_each_entry(ivmm, &svm->inst, head) {
+		if (ivmm->inst == inst)
+			return ivmm;
+	}
+	return NULL;
+}
+
+struct nouveau_svmm {
+	struct nouveau_vmm *vmm;
+	struct {
+		unsigned long start;
+		unsigned long limit;
+	} unmanaged;
+
+	struct mutex mutex;
+
+	struct mm_struct *mm;
+	struct hmm_mirror mirror;
+};
+
+#define SVMM_DBG(s,f,a...)                                                     \
+	NV_DEBUG((s)->vmm->cli->drm, "svm-%p: "f"\n", (s), ##a)
+#define SVMM_ERR(s,f,a...)                                                     \
+	NV_WARN((s)->vmm->cli->drm, "svm-%p: "f"\n", (s), ##a)
+
+/* Unlink channel instance from SVMM. */
+void
+nouveau_svmm_part(struct nouveau_svmm *svmm, u64 inst)
+{
+	struct nouveau_ivmm *ivmm;
+	if (svmm) {
+		mutex_lock(&svmm->vmm->cli->drm->svm->mutex);
+		ivmm = nouveau_ivmm_find(svmm->vmm->cli->drm->svm, inst);
+		if (ivmm) {
+			list_del(&ivmm->head);
+			kfree(ivmm);
+		}
+		mutex_unlock(&svmm->vmm->cli->drm->svm->mutex);
+	}
+}
+
+/* Link channel instance to SVMM. */
+int
+nouveau_svmm_join(struct nouveau_svmm *svmm, u64 inst)
+{
+	struct nouveau_ivmm *ivmm;
+	if (svmm) {
+		if (!(ivmm = kmalloc(sizeof(*ivmm), GFP_KERNEL)))
+			return -ENOMEM;
+		ivmm->svmm = svmm;
+		ivmm->inst = inst;
+
+		mutex_lock(&svmm->vmm->cli->drm->svm->mutex);
+		list_add(&ivmm->head, &svmm->vmm->cli->drm->svm->inst);
+		mutex_unlock(&svmm->vmm->cli->drm->svm->mutex);
+	}
+	return 0;
+}
+
+/* Invalidate SVMM address-range on GPU. */
+static void
+nouveau_svmm_invalidate(struct nouveau_svmm *svmm, u64 start, u64 limit)
+{
+	if (limit > start) {
+		bool super = svmm->vmm->vmm.object.client->super;
+		svmm->vmm->vmm.object.client->super = true;
+		nvif_object_mthd(&svmm->vmm->vmm.object, NVIF_VMM_V0_PFNCLR,
+				 &(struct nvif_vmm_pfnclr_v0) {
+					.addr = start,
+					.size = limit - start,
+				 }, sizeof(struct nvif_vmm_pfnclr_v0));
+		svmm->vmm->vmm.object.client->super = super;
+	}
+}
+
+static int
+nouveau_svmm_sync_cpu_device_pagetables(struct hmm_mirror *mirror,
+					const struct hmm_update *update)
+{
+	struct nouveau_svmm *svmm = container_of(mirror, typeof(*svmm), mirror);
+	unsigned long start = update->start;
+	unsigned long limit = update->end;
+
+	if (!update->blockable)
+		return -EAGAIN;
+
+	SVMM_DBG(svmm, "invalidate %016lx-%016lx", start, limit);
+
+	mutex_lock(&svmm->mutex);
+	if (limit > svmm->unmanaged.start && start < svmm->unmanaged.limit) {
+		if (start < svmm->unmanaged.start) {
+			nouveau_svmm_invalidate(svmm, start,
+						svmm->unmanaged.limit);
+		}
+		start = svmm->unmanaged.limit;
+	}
+
+	nouveau_svmm_invalidate(svmm, start, limit);
+	mutex_unlock(&svmm->mutex);
+	return 0;
+}
+
+static void
+nouveau_svmm_release(struct hmm_mirror *mirror)
+{
+}
+
+static const struct hmm_mirror_ops
+nouveau_svmm = {
+	.sync_cpu_device_pagetables = nouveau_svmm_sync_cpu_device_pagetables,
+	.release = nouveau_svmm_release,
+};
+
+void
+nouveau_svmm_fini(struct nouveau_svmm **psvmm)
+{
+	struct nouveau_svmm *svmm = *psvmm;
+	if (svmm) {
+		hmm_mirror_unregister(&svmm->mirror);
+		kfree(*psvmm);
+		*psvmm = NULL;
+	}
+}
+
+int
+nouveau_svmm_init(struct drm_device *dev, void *data,
+		  struct drm_file *file_priv)
+{
+	struct nouveau_cli *cli = nouveau_cli(file_priv);
+	struct nouveau_svmm *svmm;
+	struct drm_nouveau_svm_init *args = data;
+	int ret;
+
+	/* Allocate tracking for SVM-enabled VMM. */
+	if (!(svmm = kzalloc(sizeof(*svmm), GFP_KERNEL)))
+		return -ENOMEM;
+	svmm->vmm = &cli->svm;
+	svmm->unmanaged.start = args->unmanaged_addr;
+	svmm->unmanaged.limit = args->unmanaged_addr + args->unmanaged_size;
+	mutex_init(&svmm->mutex);
+
+	/* Check that SVM isn't already enabled for the client. */
+	mutex_lock(&cli->mutex);
+	if (cli->svm.cli) {
+		ret = -EBUSY;
+		goto done;
+	}
+
+	/* Allocate a new GPU VMM that can support SVM (managed by the
+	 * client, with replayable faults enabled).
+	 *
+	 * All future channel/memory allocations will make use of this
+	 * VMM instead of the standard one.
+	 */
+	ret = nvif_vmm_init(&cli->mmu, cli->vmm.vmm.object.oclass, true,
+			    args->unmanaged_addr, args->unmanaged_size,
+			    &(struct gp100_vmm_v0) {
+				.fault_replay = true,
+			    }, sizeof(struct gp100_vmm_v0), &cli->svm.vmm);
+	if (ret)
+		goto done;
+
+	/* Enable HMM mirroring of CPU address-space to VMM. */
+	svmm->mm = get_task_mm(current);
+	down_write(&svmm->mm->mmap_sem);
+	svmm->mirror.ops = &nouveau_svmm;
+	ret = hmm_mirror_register(&svmm->mirror, svmm->mm);
+	if (ret == 0) {
+		cli->svm.svmm = svmm;
+		cli->svm.cli = cli;
+	}
+	up_write(&svmm->mm->mmap_sem);
+	mmput(svmm->mm);
+
+done:
+	if (ret)
+		nouveau_svmm_fini(&svmm);
+	mutex_unlock(&cli->mutex);
+	return ret;
+}
+
+static const u64
+nouveau_svm_pfn_flags[HMM_PFN_FLAG_MAX] = {
+	[HMM_PFN_VALID         ] = NVIF_VMM_PFNMAP_V0_V,
+	[HMM_PFN_WRITE         ] = NVIF_VMM_PFNMAP_V0_W,
+	[HMM_PFN_DEVICE_PRIVATE] = NVIF_VMM_PFNMAP_V0_VRAM,
+};
+
+static const u64
+nouveau_svm_pfn_values[HMM_PFN_VALUE_MAX] = {
+	[HMM_PFN_ERROR  ] = ~NVIF_VMM_PFNMAP_V0_V,
+	[HMM_PFN_NONE   ] =  NVIF_VMM_PFNMAP_V0_NONE,
+	[HMM_PFN_SPECIAL] = ~NVIF_VMM_PFNMAP_V0_V,
+};
+
+/* Issue fault replay for GPU to retry accesses that faulted previously. */
+static void
+nouveau_svm_fault_replay(struct nouveau_svm *svm)
+{
+	SVM_DBG(svm, "replay");
+	WARN_ON(nvif_object_mthd(&svm->drm->client.vmm.vmm.object,
+				 GP100_VMM_VN_FAULT_REPLAY,
+				 &(struct gp100_vmm_fault_replay_vn) {},
+				 sizeof(struct gp100_vmm_fault_replay_vn)));
+}
+
+/* Cancel a replayable fault that could not be handled.
+ *
+ * Cancelling the fault will trigger recovery to reset the engine
+ * and kill the offending channel (ie. GPU SIGSEGV).
+ */
+static void
+nouveau_svm_fault_cancel(struct nouveau_svm *svm,
+			 u64 inst, u8 hub, u8 gpc, u8 client)
+{
+	SVM_DBG(svm, "cancel %016llx %d %02x %02x", inst, hub, gpc, client);
+	WARN_ON(nvif_object_mthd(&svm->drm->client.vmm.vmm.object,
+				 GP100_VMM_VN_FAULT_CANCEL,
+				 &(struct gp100_vmm_fault_cancel_v0) {
+					.hub = hub,
+					.gpc = gpc,
+					.client = client,
+					.inst = inst,
+				 }, sizeof(struct gp100_vmm_fault_cancel_v0)));
+}
+
+static void
+nouveau_svm_fault_cancel_fault(struct nouveau_svm *svm,
+			       struct nouveau_svm_fault *fault)
+{
+	nouveau_svm_fault_cancel(svm, fault->inst,
+				      fault->hub,
+				      fault->gpc,
+				      fault->client);
+}
+
+static int
+nouveau_svm_fault_cmp(const void *a, const void *b)
+{
+	const struct nouveau_svm_fault *fa = *(struct nouveau_svm_fault **)a;
+	const struct nouveau_svm_fault *fb = *(struct nouveau_svm_fault **)b;
+	int ret;
+	if ((ret = (s64)fa->inst - fb->inst))
+		return ret;
+	if ((ret = (s64)fa->addr - fb->addr))
+		return ret;
+	/*XXX: atomic? */
+	return (fa->access == 0 || fa->access == 3) -
+	       (fb->access == 0 || fb->access == 3);
+}
+
+static void
+nouveau_svm_fault_cache(struct nouveau_svm *svm,
+			struct nouveau_svm_fault_buffer *buffer, u32 offset)
+{
+	struct nvif_object *memory = &buffer->object;
+	const u32 instlo = nvif_rd32(memory, offset + 0x00);
+	const u32 insthi = nvif_rd32(memory, offset + 0x04);
+	const u32 addrlo = nvif_rd32(memory, offset + 0x08);
+	const u32 addrhi = nvif_rd32(memory, offset + 0x0c);
+	const u32 timelo = nvif_rd32(memory, offset + 0x10);
+	const u32 timehi = nvif_rd32(memory, offset + 0x14);
+	const u32 engine = nvif_rd32(memory, offset + 0x18);
+	const u32   info = nvif_rd32(memory, offset + 0x1c);
+	const u64   inst = (u64)insthi << 32 | instlo;
+	const u8     gpc = (info & 0x1f000000) >> 24;
+	const u8     hub = (info & 0x00100000) >> 20;
+	const u8  client = (info & 0x00007f00) >> 8;
+	struct nouveau_svm_fault *fault;
+
+	//XXX: i think we're supposed to spin waiting */
+	if (WARN_ON(!(info & 0x80000000)))
+		return;
+
+	nvif_mask(memory, offset + 0x1c, 0x80000000, 0x00000000);
+
+	if (!buffer->fault[buffer->fault_nr]) {
+		fault = kmalloc(sizeof(*fault), GFP_KERNEL);
+		if (WARN_ON(!fault)) {
+			nouveau_svm_fault_cancel(svm, inst, hub, gpc, client);
+			return;
+		}
+		buffer->fault[buffer->fault_nr] = fault;
+	}
+
+	fault = buffer->fault[buffer->fault_nr++];
+	fault->inst   = inst;
+	fault->addr   = (u64)addrhi << 32 | addrlo;
+	fault->time   = (u64)timehi << 32 | timelo;
+	fault->engine = engine;
+	fault->gpc    = gpc;
+	fault->hub    = hub;
+	fault->access = (info & 0x000f0000) >> 16;
+	fault->client = client;
+	fault->fault  = (info & 0x0000001f);
+
+	SVM_DBG(svm, "fault %016llx %016llx %02x",
+		fault->inst, fault->addr, fault->access);
+}
+
+static int
+nouveau_svm_fault(struct nvif_notify *notify)
+{
+	struct nouveau_svm_fault_buffer *buffer =
+		container_of(notify, typeof(*buffer), notify);
+	struct nouveau_svm *svm =
+		container_of(buffer, typeof(*svm), buffer[buffer->id]);
+	struct nvif_object *device = &svm->drm->client.device.object;
+	struct nouveau_svmm *svmm;
+	struct {
+		struct {
+			struct nvif_ioctl_v0 i;
+			struct nvif_ioctl_mthd_v0 m;
+			struct nvif_vmm_pfnmap_v0 p;
+		} i;
+		u64 phys[16];
+	} args;
+	struct hmm_range range;
+	struct vm_area_struct *vma;
+	u64 inst, start, limit;
+	int fi, fn, pi, fill;
+	int replay = 0, ret;
+
+	/* Parse available fault buffer entries into a cache, and update
+	 * the GET pointer so HW can reuse the entries.
+	 */
+	SVM_DBG(svm, "fault handler");
+	if (buffer->get == buffer->put) {
+		buffer->put = nvif_rd32(device, buffer->putaddr);
+		buffer->get = nvif_rd32(device, buffer->getaddr);
+		if (buffer->get == buffer->put)
+			return NVIF_NOTIFY_KEEP;
+	}
+	buffer->fault_nr = 0;
+
+	SVM_DBG(svm, "get %08x put %08x", buffer->get, buffer->put);
+	while (buffer->get != buffer->put) {
+		nouveau_svm_fault_cache(svm, buffer, buffer->get * 0x20);
+		if (++buffer->get == buffer->entries)
+			buffer->get = 0;
+	}
+	nvif_wr32(device, buffer->getaddr, buffer->get);
+	SVM_DBG(svm, "%d fault(s) pending", buffer->fault_nr);
+
+	/* Sort parsed faults by instance pointer to prevent unnecessary
+	 * instance to SVMM translations, followed by address and access
+	 * type to reduce the amount of work when handling the faults.
+	 */
+	sort(buffer->fault, buffer->fault_nr, sizeof(*buffer->fault),
+	     nouveau_svm_fault_cmp, NULL);
+
+	/* Lookup SVMM structure for each unique instance pointer. */
+	mutex_lock(&svm->mutex);
+	for (fi = 0, svmm = NULL; fi < buffer->fault_nr; fi++) {
+		if (!svmm || buffer->fault[fi]->inst != inst) {
+			struct nouveau_ivmm *ivmm =
+				nouveau_ivmm_find(svm, buffer->fault[fi]->inst);
+			svmm = ivmm ? ivmm->svmm : NULL;
+			inst = buffer->fault[fi]->inst;
+			SVM_DBG(svm, "inst %016llx -> svm-%p", inst, svmm);
+		}
+		buffer->fault[fi]->svmm = svmm;
+	}
+	mutex_unlock(&svm->mutex);
+
+	/* Process list of faults. */
+	args.i.i.version = 0;
+	args.i.i.type = NVIF_IOCTL_V0_MTHD;
+	args.i.m.version = 0;
+	args.i.m.method = NVIF_VMM_V0_PFNMAP;
+	args.i.p.version = 0;
+
+	for (fi = 0; fn = fi + 1, fi < buffer->fault_nr; fi = fn) {
+		/* Cancel any faults from non-SVM channels. */
+		if (!(svmm = buffer->fault[fi]->svmm)) {
+			nouveau_svm_fault_cancel_fault(svm, buffer->fault[fi]);
+			continue;
+		}
+		SVMM_DBG(svmm, "addr %016llx", buffer->fault[fi]->addr);
+
+		/* We try and group handling of faults within a small
+		 * window into a single update.
+		 */
+		start = buffer->fault[fi]->addr;
+		limit = start + (ARRAY_SIZE(args.phys) << PAGE_SHIFT);
+		if (start < svmm->unmanaged.limit)
+			limit = min_t(u64, limit, svmm->unmanaged.start);
+		else
+		if (limit > svmm->unmanaged.start)
+			start = max_t(u64, start, svmm->unmanaged.limit);
+		SVMM_DBG(svmm, "wndw %016llx-%016llx", start, limit);
+
+		/* Intersect fault window with the CPU VMA, cancelling
+		 * the fault if the address is invalid.
+		 */
+		down_read(&svmm->mm->mmap_sem);
+		vma = find_vma_intersection(svmm->mm, start, limit);
+		if (!vma) {
+			SVMM_ERR(svmm, "wndw %016llx-%016llx", start, limit);
+			up_read(&svmm->mm->mmap_sem);
+			nouveau_svm_fault_cancel_fault(svm, buffer->fault[fi]);
+			continue;
+		}
+		start = max_t(u64, start, vma->vm_start);
+		limit = min_t(u64, limit, vma->vm_end);
+		SVMM_DBG(svmm, "wndw %016llx-%016llx", start, limit);
+
+		if (buffer->fault[fi]->addr != start) {
+			SVMM_ERR(svmm, "addr %016llx", buffer->fault[fi]->addr);
+			up_read(&svmm->mm->mmap_sem);
+			nouveau_svm_fault_cancel_fault(svm, buffer->fault[fi]);
+			continue;
+		}
+
+		/* Prepare the GPU-side update of all pages within the
+		 * fault window, determining required pages and access
+		 * permissions based on pending faults.
+		 */
+		args.i.p.page = PAGE_SHIFT;
+		args.i.p.addr = start;
+		for (fn = fi, pi = 0;;) {
+			/* Determine required permissions based on GPU fault
+			 * access flags.
+			 *XXX: atomic?
+			 */
+			if (buffer->fault[fn]->access != 0 /* READ. */ &&
+			    buffer->fault[fn]->access != 3 /* PREFETCH. */) {
+				args.phys[pi++] = NVIF_VMM_PFNMAP_V0_V |
+						  NVIF_VMM_PFNMAP_V0_W;
+			} else {
+				args.phys[pi++] = NVIF_VMM_PFNMAP_V0_V;
+			}
+			args.i.p.size = pi << PAGE_SHIFT;
+
+			/* It's okay to skip over duplicate addresses from the
+			 * same SVMM as faults are ordered by access type such
+			 * that only the first one needs to be handled.
+			 *
+			 * ie. WRITE faults appear first, thus any handling of
+			 * pending READ faults will already be satisfied.
+			 */
+			while (++fn < buffer->fault_nr &&
+			       buffer->fault[fn]->svmm == svmm &&
+			       buffer->fault[fn    ]->addr ==
+			       buffer->fault[fn - 1]->addr);
+
+			/* If the next fault is outside the window, or all GPU
+			 * faults have been dealt with, we're done here.
+			 */
+			if (fn >= buffer->fault_nr ||
+			    buffer->fault[fn]->svmm != svmm ||
+			    buffer->fault[fn]->addr >= limit)
+				break;
+
+			/* Fill in the gap between this fault and the next. */
+			fill = (buffer->fault[fn    ]->addr -
+				buffer->fault[fn - 1]->addr) >> PAGE_SHIFT;
+			while (--fill)
+				args.phys[pi++] = NVIF_VMM_PFNMAP_V0_NONE;
+		}
+
+		SVMM_DBG(svmm, "wndw %016llx-%016llx covering %d fault(s)",
+			 args.i.p.addr,
+			 args.i.p.addr + args.i.p.size, fn - fi);
+
+		/* Have HMM fault pages within the fault window to the GPU. */
+		range.vma = vma;
+		range.start = args.i.p.addr;
+		range.end = args.i.p.addr + args.i.p.size;
+		range.pfns = args.phys;
+		range.flags = nouveau_svm_pfn_flags;
+		range.values = nouveau_svm_pfn_values;
+		range.pfn_shift = NVIF_VMM_PFNMAP_V0_ADDR_SHIFT;
+again:
+		ret = hmm_vma_fault(&range, true);
+		if (ret == 0) {
+			mutex_lock(&svmm->mutex);
+			if (!hmm_vma_range_done(&range)) {
+				mutex_unlock(&svmm->mutex);
+				goto again;
+			}
+
+			svmm->vmm->vmm.object.client->super = true;
+			ret = nvif_object_ioctl(&svmm->vmm->vmm.object,
+						&args, sizeof(args.i) +
+						pi * sizeof(args.phys[0]),
+						NULL);
+			svmm->vmm->vmm.object.client->super = false;
+			mutex_unlock(&svmm->mutex);
+		}
+		up_read(&svmm->mm->mmap_sem);
+
+		/* Cancel any faults in the window whose pages didn't manage
+		 * to keep their valid bit, or stay writeable when required.
+		 *
+		 * If handling failed completely, cancel all faults.
+		 */
+		while (fi < fn) {
+			struct nouveau_svm_fault *fault = buffer->fault[fi++];
+			pi = (fault->addr - range.start) >> PAGE_SHIFT;
+			if (ret ||
+			     !(range.pfns[pi] & NVIF_VMM_PFNMAP_V0_V) ||
+			    (!(range.pfns[pi] & NVIF_VMM_PFNMAP_V0_W) &&
+			     fault->access != 0 && fault->access != 3)) {
+				nouveau_svm_fault_cancel_fault(svm, fault);
+				continue;
+			}
+			replay++;
+		}
+	}
+
+	/* Issue fault replay to the GPU. */
+	if (replay)
+		nouveau_svm_fault_replay(svm);
+	return NVIF_NOTIFY_KEEP;
+}
+
+static void
+nouveau_svm_fault_buffer_fini(struct nouveau_svm *svm, int id)
+{
+	struct nouveau_svm_fault_buffer *buffer = &svm->buffer[id];
+	nvif_notify_put(&buffer->notify);
+}
+
+static int
+nouveau_svm_fault_buffer_init(struct nouveau_svm *svm, int id)
+{
+	struct nouveau_svm_fault_buffer *buffer = &svm->buffer[id];
+	struct nvif_object *device = &svm->drm->client.device.object;
+	buffer->get = nvif_rd32(device, buffer->getaddr);
+	buffer->put = nvif_rd32(device, buffer->putaddr);
+	SVM_DBG(svm, "get %08x put %08x (init)", buffer->get, buffer->put);
+	return nvif_notify_get(&buffer->notify);
+}
+
+static void
+nouveau_svm_fault_buffer_dtor(struct nouveau_svm *svm, int id)
+{
+	struct nouveau_svm_fault_buffer *buffer = &svm->buffer[id];
+	int i;
+
+	if (buffer->fault) {
+		for (i = 0; buffer->fault[i] && i < buffer->entries; i++)
+			kfree(buffer->fault[i]);
+		kvfree(buffer->fault);
+	}
+
+	nouveau_svm_fault_buffer_fini(svm, id);
+
+	nvif_notify_fini(&buffer->notify);
+	nvif_object_fini(&buffer->object);
+}
+
+static int
+nouveau_svm_fault_buffer_ctor(struct nouveau_svm *svm, s32 oclass, int id)
+{
+	struct nouveau_svm_fault_buffer *buffer = &svm->buffer[id];
+	struct nouveau_drm *drm = svm->drm;
+	struct nvif_object *device = &drm->client.device.object;
+	struct nvif_clb069_v0 args = {};
+	int ret;
+
+	buffer->id = id;
+
+	ret = nvif_object_init(device, 0, oclass, &args, sizeof(args),
+			       &buffer->object);
+	if (ret < 0) {
+		SVM_ERR(svm, "Fault buffer allocation failed: %d", ret);
+		return ret;
+	}
+
+	nvif_object_map(&buffer->object, NULL, 0);
+	buffer->entries = args.entries;
+	buffer->getaddr = args.get;
+	buffer->putaddr = args.put;
+
+	ret = nvif_notify_init(&buffer->object, nouveau_svm_fault, true,
+			       NVB069_V0_NTFY_FAULT, NULL, 0, 0,
+			       &buffer->notify);
+	if (ret)
+		return ret;
+
+	buffer->fault = kvzalloc(sizeof(*buffer->fault) * buffer->entries, GFP_KERNEL);
+	if (!buffer->fault)
+		return -ENOMEM;
+
+	return nouveau_svm_fault_buffer_init(svm, id);
+}
+
+void
+nouveau_svm_resume(struct nouveau_drm *drm)
+{
+	struct nouveau_svm *svm = drm->svm;
+	if (svm)
+		nouveau_svm_fault_buffer_init(svm, 0);
+}
+
+void
+nouveau_svm_suspend(struct nouveau_drm *drm)
+{
+	struct nouveau_svm *svm = drm->svm;
+	if (svm)
+		nouveau_svm_fault_buffer_fini(svm, 0);
+}
+
+void
+nouveau_svm_fini(struct nouveau_drm *drm)
+{
+	struct nouveau_svm *svm = drm->svm;
+	if (svm) {
+		nouveau_svm_fault_buffer_dtor(svm, 0);
+		kfree(drm->svm);
+		drm->svm = NULL;
+	}
+}
+
+void
+nouveau_svm_init(struct nouveau_drm *drm)
+{
+	static const struct nvif_mclass buffers[] = {
+		{   VOLTA_FAULT_BUFFER_A, 0 },
+		{ MAXWELL_FAULT_BUFFER_A, 0 },
+		{}
+	};
+	struct nouveau_svm *svm;
+	int ret;
+
+	/* Disable on Volta and newer until channel recovery is fixed,
+	 * otherwise clients will have a trivial way to trash the GPU
+	 * for everyone.
+	 */
+	if (drm->client.device.info.family > NV_DEVICE_INFO_V0_PASCAL)
+		return;
+
+	if (!(drm->svm = svm = kzalloc(sizeof(*drm->svm), GFP_KERNEL)))
+		return;
+
+	drm->svm->drm = drm;
+	mutex_init(&drm->svm->mutex);
+	INIT_LIST_HEAD(&drm->svm->inst);
+
+	ret = nvif_mclass(&drm->client.device.object, buffers);
+	if (ret < 0) {
+		SVM_DBG(svm, "No supported fault buffer class");
+		nouveau_svm_fini(drm);
+		return;
+	}
+
+	ret = nouveau_svm_fault_buffer_ctor(svm, buffers[ret].oclass, 0);
+	if (ret) {
+		nouveau_svm_fini(drm);
+		return;
+	}
+
+	SVM_DBG(svm, "Initialised");
+}
diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.h b/drivers/gpu/drm/nouveau/nouveau_svm.h
new file mode 100644
index 000000000000..0379a1c316ca
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nouveau_svm.h
@@ -0,0 +1,41 @@
+#ifndef __NOUVEAU_SVM_H__
+#define __NOUVEAU_SVM_H__
+#include <nvif/os.h>
+struct drm_device;
+struct drm_file;
+struct nouveau_drm;
+
+struct nouveau_svmm;
+
+#if IS_ENABLED(CONFIG_DRM_NOUVEAU_SVM)
+void nouveau_svm_init(struct nouveau_drm *);
+void nouveau_svm_fini(struct nouveau_drm *);
+void nouveau_svm_suspend(struct nouveau_drm *);
+void nouveau_svm_resume(struct nouveau_drm *);
+
+int nouveau_svmm_init(struct drm_device *, void *, struct drm_file *);
+void nouveau_svmm_fini(struct nouveau_svmm **);
+int nouveau_svmm_join(struct nouveau_svmm *, u64 inst);
+void nouveau_svmm_part(struct nouveau_svmm *, u64 inst);
+#else /* IS_ENABLED(CONFIG_DRM_NOUVEAU_SVM) */
+static inline void nouveau_svm_init(struct nouveau_drm *drm) {}
+static inline void nouveau_svm_fini(struct nouveau_drm *drm) {}
+static inline void nouveau_svm_suspend(struct nouveau_drm *drm) {}
+static inline void nouveau_svm_resume(struct nouveau_drm *drm) {}
+
+static inline int nouveau_svmm_init(struct drm_device *device, void *p,
+				    struct drm_file *file)
+{
+	return -ENOSYS;
+}
+
+static inline void nouveau_svmm_fini(struct nouveau_svmm **svmmp) {}
+
+static inline int nouveau_svmm_join(struct nouveau_svmm *svmm, u64 inst)
+{
+	return 0;
+}
+
+static inline void nouveau_svmm_part(struct nouveau_svmm *svmm, u64 inst) {}
+#endif /* IS_ENABLED(CONFIG_DRM_NOUVEAU_SVM) */
+#endif
diff --git a/drivers/gpu/drm/nouveau/nouveau_vmm.c b/drivers/gpu/drm/nouveau/nouveau_vmm.c
index 724d02d7c049..77061182a1cf 100644
--- a/drivers/gpu/drm/nouveau/nouveau_vmm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_vmm.c
@@ -22,6 +22,7 @@
 #include "nouveau_vmm.h"
 #include "nouveau_drv.h"
 #include "nouveau_bo.h"
+#include "nouveau_svm.h"
 #include "nouveau_mem.h"
 
 void
@@ -119,6 +120,7 @@ done:
 void
 nouveau_vmm_fini(struct nouveau_vmm *vmm)
 {
+	nouveau_svmm_fini(&vmm->svmm);
 	nvif_vmm_fini(&vmm->vmm);
 	vmm->cli = NULL;
 }
diff --git a/drivers/gpu/drm/nouveau/nouveau_vmm.h b/drivers/gpu/drm/nouveau/nouveau_vmm.h
index ede872f6f668..2b98d975f37e 100644
--- a/drivers/gpu/drm/nouveau/nouveau_vmm.h
+++ b/drivers/gpu/drm/nouveau/nouveau_vmm.h
@@ -25,6 +25,7 @@ void nouveau_vma_unmap(struct nouveau_vma *);
 struct nouveau_vmm {
 	struct nouveau_cli *cli;
 	struct nvif_vmm vmm;
+	struct nouveau_svmm *svmm;
 };
 
 int nouveau_vmm_init(struct nouveau_cli *, s32 oclass, struct nouveau_vmm *);
diff --git a/include/uapi/drm/nouveau_drm.h b/include/uapi/drm/nouveau_drm.h
index 259588a4b61b..afac182c80d8 100644
--- a/include/uapi/drm/nouveau_drm.h
+++ b/include/uapi/drm/nouveau_drm.h
@@ -133,12 +133,20 @@ struct drm_nouveau_gem_cpu_fini {
 #define DRM_NOUVEAU_NOTIFIEROBJ_ALLOC  0x05 /* deprecated */
 #define DRM_NOUVEAU_GPUOBJ_FREE        0x06 /* deprecated */
 #define DRM_NOUVEAU_NVIF               0x07
+#define DRM_NOUVEAU_SVM_INIT           0x08
 #define DRM_NOUVEAU_GEM_NEW            0x40
 #define DRM_NOUVEAU_GEM_PUSHBUF        0x41
 #define DRM_NOUVEAU_GEM_CPU_PREP       0x42
 #define DRM_NOUVEAU_GEM_CPU_FINI       0x43
 #define DRM_NOUVEAU_GEM_INFO           0x44
 
+struct drm_nouveau_svm_init {
+	__u64 unmanaged_addr;
+	__u64 unmanaged_size;
+};
+
+#define DRM_IOCTL_NOUVEAU_SVM_INIT           DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_SVM_INIT, struct drm_nouveau_svm_init)
+
 #define DRM_IOCTL_NOUVEAU_GEM_NEW            DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_NEW, struct drm_nouveau_gem_new)
 #define DRM_IOCTL_NOUVEAU_GEM_PUSHBUF        DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_PUSHBUF, struct drm_nouveau_gem_pushbuf)
 #define DRM_IOCTL_NOUVEAU_GEM_CPU_PREP       DRM_IOW (DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_CPU_PREP, struct drm_nouveau_gem_cpu_prep)
-- 
cgit v1.2.3


From f180bf12ac061f093abb9247505f661817973cae Mon Sep 17 00:00:00 2001
From: Jérôme Glisse <jglisse@redhat.com>
Date: Tue, 7 Aug 2018 16:13:16 -0400
Subject: drm/nouveau/svm: new ioctl to migrate process memory to GPU memory
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This add an ioctl to migrate a range of process address space to the
device memory. On platform without cache coherent bus (x86, ARM, ...)
this means that CPU can not access that range directly, instead CPU
will fault which will migrate the memory back to system memory.

This is behind a staging flag so that we can evolve the API.

Signed-off-by: Jérôme Glisse <jglisse@redhat.com>
---
 drivers/gpu/drm/nouveau/nouveau_drm.c |  1 +
 drivers/gpu/drm/nouveau/nouveau_svm.c | 96 +++++++++++++++++++++++++++++++++++
 drivers/gpu/drm/nouveau/nouveau_svm.h |  7 +++
 include/uapi/drm/nouveau_drm.h        | 43 ++++++++++++++++
 4 files changed, 147 insertions(+)

(limited to 'include')

diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c
index ee2c4d498d7d..5020265bfbd9 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
@@ -1043,6 +1043,7 @@ nouveau_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(NOUVEAU_NOTIFIEROBJ_ALLOC, nouveau_abi16_ioctl_notifierobj_alloc, DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(NOUVEAU_GPUOBJ_FREE, nouveau_abi16_ioctl_gpuobj_free, DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(NOUVEAU_SVM_INIT, nouveau_svmm_init, DRM_AUTH|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(NOUVEAU_SVM_BIND, nouveau_svmm_bind, DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_NEW, nouveau_gem_ioctl_new, DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_PUSHBUF, nouveau_gem_ioctl_pushbuf, DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(NOUVEAU_GEM_CPU_PREP, nouveau_gem_ioctl_cpu_prep, DRM_AUTH|DRM_RENDER_ALLOW),
diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.c b/drivers/gpu/drm/nouveau/nouveau_svm.c
index 3ba980d80a1a..93ed43c413f0 100644
--- a/drivers/gpu/drm/nouveau/nouveau_svm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_svm.c
@@ -22,6 +22,7 @@
 #include "nouveau_svm.h"
 #include "nouveau_drv.h"
 #include "nouveau_chan.h"
+#include "nouveau_dmem.h"
 
 #include <nvif/notify.h>
 #include <nvif/object.h>
@@ -104,6 +105,101 @@ struct nouveau_svmm {
 #define SVMM_ERR(s,f,a...)                                                     \
 	NV_WARN((s)->vmm->cli->drm, "svm-%p: "f"\n", (s), ##a)
 
+int
+nouveau_svmm_bind(struct drm_device *dev, void *data,
+		  struct drm_file *file_priv)
+{
+	struct nouveau_cli *cli = nouveau_cli(file_priv);
+	struct drm_nouveau_svm_bind *args = data;
+	unsigned target, cmd, priority;
+	unsigned long addr, end, size;
+	struct mm_struct *mm;
+
+	args->va_start &= PAGE_MASK;
+	args->va_end &= PAGE_MASK;
+
+	/* Sanity check arguments */
+	if (args->reserved0 || args->reserved1)
+		return -EINVAL;
+	if (args->header & (~NOUVEAU_SVM_BIND_VALID_MASK))
+		return -EINVAL;
+	if (args->va_start >= args->va_end)
+		return -EINVAL;
+	if (!args->npages)
+		return -EINVAL;
+
+	cmd = args->header >> NOUVEAU_SVM_BIND_COMMAND_SHIFT;
+	cmd &= NOUVEAU_SVM_BIND_COMMAND_MASK;
+	switch (cmd) {
+	case NOUVEAU_SVM_BIND_COMMAND__MIGRATE:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	priority = args->header >> NOUVEAU_SVM_BIND_PRIORITY_SHIFT;
+	priority &= NOUVEAU_SVM_BIND_PRIORITY_MASK;
+
+	/* FIXME support CPU target ie all target value < GPU_VRAM */
+	target = args->header >> NOUVEAU_SVM_BIND_TARGET_SHIFT;
+	target &= NOUVEAU_SVM_BIND_TARGET_MASK;
+	switch (target) {
+	case NOUVEAU_SVM_BIND_TARGET__GPU_VRAM:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/*
+	 * FIXME: For now refuse non 0 stride, we need to change the migrate
+	 * kernel function to handle stride to avoid to create a mess within
+	 * each device driver.
+	 */
+	if (args->stride)
+		return -EINVAL;
+
+	size = ((unsigned long)args->npages) << PAGE_SHIFT;
+	if ((args->va_start + size) <= args->va_start)
+		return -EINVAL;
+	if ((args->va_start + size) > args->va_end)
+		return -EINVAL;
+
+	/*
+	 * Ok we are ask to do something sane, for now we only support migrate
+	 * commands but we will add things like memory policy (what to do on
+	 * page fault) and maybe some other commands.
+	 */
+
+	mm = get_task_mm(current);
+	down_read(&mm->mmap_sem);
+
+	for (addr = args->va_start, end = args->va_start + size; addr < end;) {
+		struct vm_area_struct *vma;
+		unsigned long next;
+
+		vma = find_vma_intersection(mm, addr, end);
+		if (!vma)
+			break;
+
+		next = min(vma->vm_end, end);
+		/* This is a best effort so we ignore errors */
+		nouveau_dmem_migrate_vma(cli->drm, vma, addr, next);
+		addr = next;
+	}
+
+	/*
+	 * FIXME Return the number of page we have migrated, again we need to
+	 * update the migrate API to return that information so that we can
+	 * report it to user space.
+	 */
+	args->result = 0;
+
+	up_read(&mm->mmap_sem);
+	mmput(mm);
+
+	return 0;
+}
+
 /* Unlink channel instance from SVMM. */
 void
 nouveau_svmm_part(struct nouveau_svmm *svmm, u64 inst)
diff --git a/drivers/gpu/drm/nouveau/nouveau_svm.h b/drivers/gpu/drm/nouveau/nouveau_svm.h
index 0379a1c316ca..e839d8189461 100644
--- a/drivers/gpu/drm/nouveau/nouveau_svm.h
+++ b/drivers/gpu/drm/nouveau/nouveau_svm.h
@@ -17,6 +17,7 @@ int nouveau_svmm_init(struct drm_device *, void *, struct drm_file *);
 void nouveau_svmm_fini(struct nouveau_svmm **);
 int nouveau_svmm_join(struct nouveau_svmm *, u64 inst);
 void nouveau_svmm_part(struct nouveau_svmm *, u64 inst);
+int nouveau_svmm_bind(struct drm_device *, void *, struct drm_file *);
 #else /* IS_ENABLED(CONFIG_DRM_NOUVEAU_SVM) */
 static inline void nouveau_svm_init(struct nouveau_drm *drm) {}
 static inline void nouveau_svm_fini(struct nouveau_drm *drm) {}
@@ -37,5 +38,11 @@ static inline int nouveau_svmm_join(struct nouveau_svmm *svmm, u64 inst)
 }
 
 static inline void nouveau_svmm_part(struct nouveau_svmm *svmm, u64 inst) {}
+
+static inline int nouveau_svmm_bind(struct drm_device *device, void *p,
+				    struct drm_file *file)
+{
+	return -ENOSYS;
+}
 #endif /* IS_ENABLED(CONFIG_DRM_NOUVEAU_SVM) */
 #endif
diff --git a/include/uapi/drm/nouveau_drm.h b/include/uapi/drm/nouveau_drm.h
index afac182c80d8..9459a6e3bc1f 100644
--- a/include/uapi/drm/nouveau_drm.h
+++ b/include/uapi/drm/nouveau_drm.h
@@ -134,6 +134,7 @@ struct drm_nouveau_gem_cpu_fini {
 #define DRM_NOUVEAU_GPUOBJ_FREE        0x06 /* deprecated */
 #define DRM_NOUVEAU_NVIF               0x07
 #define DRM_NOUVEAU_SVM_INIT           0x08
+#define DRM_NOUVEAU_SVM_BIND           0x09
 #define DRM_NOUVEAU_GEM_NEW            0x40
 #define DRM_NOUVEAU_GEM_PUSHBUF        0x41
 #define DRM_NOUVEAU_GEM_CPU_PREP       0x42
@@ -145,7 +146,49 @@ struct drm_nouveau_svm_init {
 	__u64 unmanaged_size;
 };
 
+struct drm_nouveau_svm_bind {
+	__u64 header;
+	__u64 va_start;
+	__u64 va_end;
+	__u64 npages;
+	__u64 stride;
+	__u64 result;
+	__u64 reserved0;
+	__u64 reserved1;
+};
+
+#define NOUVEAU_SVM_BIND_COMMAND_SHIFT          0
+#define NOUVEAU_SVM_BIND_COMMAND_BITS           8
+#define NOUVEAU_SVM_BIND_COMMAND_MASK           ((1 << 8) - 1)
+#define NOUVEAU_SVM_BIND_PRIORITY_SHIFT         8
+#define NOUVEAU_SVM_BIND_PRIORITY_BITS          8
+#define NOUVEAU_SVM_BIND_PRIORITY_MASK          ((1 << 8) - 1)
+#define NOUVEAU_SVM_BIND_TARGET_SHIFT           16
+#define NOUVEAU_SVM_BIND_TARGET_BITS            32
+#define NOUVEAU_SVM_BIND_TARGET_MASK            0xffffffff
+
+/*
+ * Below is use to validate ioctl argument, userspace can also use it to make
+ * sure that no bit are set beyond known fields for a given kernel version.
+ */
+#define NOUVEAU_SVM_BIND_VALID_BITS     48
+#define NOUVEAU_SVM_BIND_VALID_MASK     ((1ULL << NOUVEAU_SVM_BIND_VALID_BITS) - 1)
+
+
+/*
+ * NOUVEAU_BIND_COMMAND__MIGRATE: synchronous migrate to target memory.
+ * result: number of page successfuly migrate to the target memory.
+ */
+#define NOUVEAU_SVM_BIND_COMMAND__MIGRATE               0
+
+/*
+ * NOUVEAU_SVM_BIND_HEADER_TARGET__GPU_VRAM: target the GPU VRAM memory.
+ */
+#define NOUVEAU_SVM_BIND_TARGET__GPU_VRAM               (1UL << 31)
+
+
 #define DRM_IOCTL_NOUVEAU_SVM_INIT           DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_SVM_INIT, struct drm_nouveau_svm_init)
+#define DRM_IOCTL_NOUVEAU_SVM_BIND           DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_SVM_BIND, struct drm_nouveau_svm_bind)
 
 #define DRM_IOCTL_NOUVEAU_GEM_NEW            DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_NEW, struct drm_nouveau_gem_new)
 #define DRM_IOCTL_NOUVEAU_GEM_PUSHBUF        DRM_IOWR(DRM_COMMAND_BASE + DRM_NOUVEAU_GEM_PUSHBUF, struct drm_nouveau_gem_pushbuf)
-- 
cgit v1.2.3


From 9e8db5913264d3967b93c765a6a9e464d9c473db Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Mon, 18 Feb 2019 23:37:12 -0500
Subject: net: avoid false positives in untrusted gso validation

GSO packets with vnet_hdr must conform to a small set of gso_types.
The below commit uses flow dissection to drop packets that do not.

But it has false positives when the skb is not fully initialized.
Dissection needs skb->protocol and skb->network_header.

Infer skb->protocol from gso_type as the two must agree.
SKB_GSO_UDP can use both ipv4 and ipv6, so try both.

Exclude callers for which network header offset is not known.

Fixes: d5be7f632bad ("net: validate untrusted gso packets without csum offload")
Signed-off-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/virtio_net.h | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index 71f2394abbf7..e0348cb0a1dd 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -61,10 +61,20 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
 		/* gso packets without NEEDS_CSUM do not set transport_offset.
 		 * probe and drop if does not match one of the above types.
 		 */
-		if (gso_type) {
+		if (gso_type && skb->network_header) {
+			if (!skb->protocol)
+				virtio_net_hdr_set_proto(skb, hdr);
+retry:
 			skb_probe_transport_header(skb, -1);
-			if (!skb_transport_header_was_set(skb))
+			if (!skb_transport_header_was_set(skb)) {
+				/* UFO does not specify ipv4 or 6: try both */
+				if (gso_type & SKB_GSO_UDP &&
+				    skb->protocol == htons(ETH_P_IP)) {
+					skb->protocol = htons(ETH_P_IPV6);
+					goto retry;
+				}
 				return -EINVAL;
+			}
 		}
 	}
 
-- 
cgit v1.2.3


From 6321aa197547da397753757bd84c6ce64b3e3d89 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 19 Feb 2019 22:53:50 +0100
Subject: phonet: fix building with clang
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

clang warns about overflowing the data[] member in the struct pnpipehdr:

net/phonet/pep.c:295:8: warning: array index 4 is past the end of the array (which contains 1 element) [-Warray-bounds]
                        if (hdr->data[4] == PEP_IND_READY)
                            ^         ~
include/net/phonet/pep.h:66:3: note: array 'data' declared here
                u8              data[1];

Using a flexible array member at the end of the struct avoids the
warning, but since we cannot have a flexible array member inside
of the union, each index now has to be moved back by one, which
makes it a little uglier.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Rémi Denis-Courmont <remi@remlab.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/phonet/pep.h |  5 +++--
 net/phonet/pep.c         | 32 ++++++++++++++++----------------
 2 files changed, 19 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/net/phonet/pep.h b/include/net/phonet/pep.h
index b669fe6dbc3b..98f31c7ea23d 100644
--- a/include/net/phonet/pep.h
+++ b/include/net/phonet/pep.h
@@ -63,10 +63,11 @@ struct pnpipehdr {
 		u8		state_after_reset;	/* reset request */
 		u8		error_code;		/* any response */
 		u8		pep_type;		/* status indication */
-		u8		data[1];
+		u8		data0;			/* anything else */
 	};
+	u8			data[];
 };
-#define other_pep_type		data[1]
+#define other_pep_type		data[0]
 
 static inline struct pnpipehdr *pnp_hdr(struct sk_buff *skb)
 {
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index 9fc76b19cd3c..db3473540303 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -132,7 +132,7 @@ static int pep_indicate(struct sock *sk, u8 id, u8 code,
 	ph->utid = 0;
 	ph->message_id = id;
 	ph->pipe_handle = pn->pipe_handle;
-	ph->data[0] = code;
+	ph->error_code = code;
 	return pn_skb_send(sk, skb, NULL);
 }
 
@@ -153,7 +153,7 @@ static int pipe_handler_request(struct sock *sk, u8 id, u8 code,
 	ph->utid = id; /* whatever */
 	ph->message_id = id;
 	ph->pipe_handle = pn->pipe_handle;
-	ph->data[0] = code;
+	ph->error_code = code;
 	return pn_skb_send(sk, skb, NULL);
 }
 
@@ -208,7 +208,7 @@ static int pep_ctrlreq_error(struct sock *sk, struct sk_buff *oskb, u8 code,
 	struct pnpipehdr *ph;
 	struct sockaddr_pn dst;
 	u8 data[4] = {
-		oph->data[0], /* PEP type */
+		oph->pep_type, /* PEP type */
 		code, /* error code, at an unusual offset */
 		PAD, PAD,
 	};
@@ -221,7 +221,7 @@ static int pep_ctrlreq_error(struct sock *sk, struct sk_buff *oskb, u8 code,
 	ph->utid = oph->utid;
 	ph->message_id = PNS_PEP_CTRL_RESP;
 	ph->pipe_handle = oph->pipe_handle;
-	ph->data[0] = oph->data[1]; /* CTRL id */
+	ph->data0 = oph->data[0]; /* CTRL id */
 
 	pn_skb_get_src_sockaddr(oskb, &dst);
 	return pn_skb_send(sk, skb, &dst);
@@ -272,17 +272,17 @@ static int pipe_rcv_status(struct sock *sk, struct sk_buff *skb)
 		return -EINVAL;
 
 	hdr = pnp_hdr(skb);
-	if (hdr->data[0] != PN_PEP_TYPE_COMMON) {
+	if (hdr->pep_type != PN_PEP_TYPE_COMMON) {
 		net_dbg_ratelimited("Phonet unknown PEP type: %u\n",
-				    (unsigned int)hdr->data[0]);
+				    (unsigned int)hdr->pep_type);
 		return -EOPNOTSUPP;
 	}
 
-	switch (hdr->data[1]) {
+	switch (hdr->data[0]) {
 	case PN_PEP_IND_FLOW_CONTROL:
 		switch (pn->tx_fc) {
 		case PN_LEGACY_FLOW_CONTROL:
-			switch (hdr->data[4]) {
+			switch (hdr->data[3]) {
 			case PEP_IND_BUSY:
 				atomic_set(&pn->tx_credits, 0);
 				break;
@@ -292,7 +292,7 @@ static int pipe_rcv_status(struct sock *sk, struct sk_buff *skb)
 			}
 			break;
 		case PN_ONE_CREDIT_FLOW_CONTROL:
-			if (hdr->data[4] == PEP_IND_READY)
+			if (hdr->data[3] == PEP_IND_READY)
 				atomic_set(&pn->tx_credits, wake = 1);
 			break;
 		}
@@ -301,12 +301,12 @@ static int pipe_rcv_status(struct sock *sk, struct sk_buff *skb)
 	case PN_PEP_IND_ID_MCFC_GRANT_CREDITS:
 		if (pn->tx_fc != PN_MULTI_CREDIT_FLOW_CONTROL)
 			break;
-		atomic_add(wake = hdr->data[4], &pn->tx_credits);
+		atomic_add(wake = hdr->data[3], &pn->tx_credits);
 		break;
 
 	default:
 		net_dbg_ratelimited("Phonet unknown PEP indication: %u\n",
-				    (unsigned int)hdr->data[1]);
+				    (unsigned int)hdr->data[0]);
 		return -EOPNOTSUPP;
 	}
 	if (wake)
@@ -318,7 +318,7 @@ static int pipe_rcv_created(struct sock *sk, struct sk_buff *skb)
 {
 	struct pep_sock *pn = pep_sk(sk);
 	struct pnpipehdr *hdr = pnp_hdr(skb);
-	u8 n_sb = hdr->data[0];
+	u8 n_sb = hdr->data0;
 
 	pn->rx_fc = pn->tx_fc = PN_LEGACY_FLOW_CONTROL;
 	__skb_pull(skb, sizeof(*hdr));
@@ -506,7 +506,7 @@ static int pep_connresp_rcv(struct sock *sk, struct sk_buff *skb)
 		return -ECONNREFUSED;
 
 	/* Parse sub-blocks */
-	n_sb = hdr->data[4];
+	n_sb = hdr->data[3];
 	while (n_sb > 0) {
 		u8 type, buf[6], len = sizeof(buf);
 		const u8 *data = pep_get_sb(skb, &type, &len, buf);
@@ -739,7 +739,7 @@ static int pipe_do_remove(struct sock *sk)
 	ph->utid = 0;
 	ph->message_id = PNS_PIPE_REMOVE_REQ;
 	ph->pipe_handle = pn->pipe_handle;
-	ph->data[0] = PAD;
+	ph->data0 = PAD;
 	return pn_skb_send(sk, skb, NULL);
 }
 
@@ -817,7 +817,7 @@ static struct sock *pep_sock_accept(struct sock *sk, int flags, int *errp,
 	peer_type = hdr->other_pep_type << 8;
 
 	/* Parse sub-blocks (options) */
-	n_sb = hdr->data[4];
+	n_sb = hdr->data[3];
 	while (n_sb > 0) {
 		u8 type, buf[1], len = sizeof(buf);
 		const u8 *data = pep_get_sb(skb, &type, &len, buf);
@@ -1109,7 +1109,7 @@ static int pipe_skb_send(struct sock *sk, struct sk_buff *skb)
 	ph->utid = 0;
 	if (pn->aligned) {
 		ph->message_id = PNS_PIPE_ALIGNED_DATA;
-		ph->data[0] = 0; /* padding */
+		ph->data0 = 0; /* padding */
 	} else
 		ph->message_id = PNS_PIPE_DATA;
 	ph->pipe_handle = pn->pipe_handle;
-- 
cgit v1.2.3


From 4bfbd561fc7d3758873ebfc38b0043065acb77f1 Mon Sep 17 00:00:00 2001
From: Lucas Stach <l.stach@pengutronix.de>
Date: Tue, 11 Sep 2018 15:26:54 +0200
Subject: gpu: ipu-v3: prg: add function to get channel configure status

This allows channels using the PRG to check if a requested configuration
update has been applied or is still pending.

Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
[p.zabel@pengutronix.de: inverted logic: done -> pending]
Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
---
 drivers/gpu/ipu-v3/ipu-prg.c | 16 ++++++++++++++++
 include/video/imx-ipu-v3.h   |  1 +
 2 files changed, 17 insertions(+)

(limited to 'include')

diff --git a/drivers/gpu/ipu-v3/ipu-prg.c b/drivers/gpu/ipu-v3/ipu-prg.c
index 38a3a9764e49..94b76badf677 100644
--- a/drivers/gpu/ipu-v3/ipu-prg.c
+++ b/drivers/gpu/ipu-v3/ipu-prg.c
@@ -347,6 +347,22 @@ int ipu_prg_channel_configure(struct ipuv3_channel *ipu_chan,
 }
 EXPORT_SYMBOL_GPL(ipu_prg_channel_configure);
 
+bool ipu_prg_channel_configure_pending(struct ipuv3_channel *ipu_chan)
+{
+	int prg_chan = ipu_prg_ipu_to_prg_chan(ipu_chan->num);
+	struct ipu_prg *prg = ipu_chan->ipu->prg_priv;
+	struct ipu_prg_channel *chan;
+
+	if (prg_chan < 0)
+		return false;
+
+	chan = &prg->chan[prg_chan];
+	WARN_ON(!chan->enabled);
+
+	return ipu_pre_update_pending(prg->pres[chan->used_pre]);
+}
+EXPORT_SYMBOL_GPL(ipu_prg_channel_configure_pending);
+
 static int ipu_prg_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
diff --git a/include/video/imx-ipu-v3.h b/include/video/imx-ipu-v3.h
index e582e8e7527a..b80b85f0d9d8 100644
--- a/include/video/imx-ipu-v3.h
+++ b/include/video/imx-ipu-v3.h
@@ -348,6 +348,7 @@ int ipu_prg_channel_configure(struct ipuv3_channel *ipu_chan,
 			      unsigned int axi_id,  unsigned int width,
 			      unsigned int height, unsigned int stride,
 			      u32 format, uint64_t modifier, unsigned long *eba);
+bool ipu_prg_channel_configure_pending(struct ipuv3_channel *ipu_chan);
 
 /*
  * IPU CMOS Sensor Interface (csi) functions
-- 
cgit v1.2.3


From cc1780fc42c76c705dd07ea123f1143dc5057630 Mon Sep 17 00:00:00 2001
From: Eric Biggers <ebiggers@google.com>
Date: Wed, 20 Feb 2019 13:32:11 +0000
Subject: KEYS: user: Align the payload buffer

Align the payload of "user" and "logon" keys so that users of the
keyrings service can access it as a struct that requires more than
2-byte alignment.  fscrypt currently does this which results in the read
of fscrypt_key::size being misaligned as it needs 4-byte alignment.

Align to __alignof__(u64) rather than __alignof__(long) since in the
future it's conceivable that people would use structs beginning with
u64, which on some platforms would require more than 'long' alignment.

Reported-by: Aaro Koskinen <aaro.koskinen@iki.fi>
Fixes: 2aa349f6e37c ("[PATCH] Keys: Export user-defined keyring operations")
Fixes: 88bd6ccdcdd6 ("ext4 crypto: add encryption key management facilities")
Cc: stable@vger.kernel.org
Signed-off-by: Eric Biggers <ebiggers@google.com>
Tested-by: Aaro Koskinen <aaro.koskinen@iki.fi>
Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: James Morris <james.morris@microsoft.com>
---
 include/keys/user-type.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/keys/user-type.h b/include/keys/user-type.h
index e098cbe27db5..12babe991594 100644
--- a/include/keys/user-type.h
+++ b/include/keys/user-type.h
@@ -31,7 +31,7 @@
 struct user_key_payload {
 	struct rcu_head	rcu;		/* RCU destructor */
 	unsigned short	datalen;	/* length of this data */
-	char		data[0];	/* actual data */
+	char		data[0] __aligned(__alignof__(u64)); /* actual data */
 };
 
 extern struct key_type key_type_user;
-- 
cgit v1.2.3


From 4c8e0459b585e2a7b367545be3e102737f1e489f Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Sun, 24 Feb 2019 01:11:15 +0100
Subject: net: phy: realtek: Dummy IRQ calls for RTL8366RB

This fixes a regression introduced by
commit 0d2e778e38e0ddffab4bb2b0e9ed2ad5165c4bf7
"net: phy: replace PHY_HAS_INTERRUPT with a check for
config_intr and ack_interrupt".

This assumes that a PHY cannot trigger interrupt unless
it has .config_intr() or .ack_interrupt() implemented.
A later patch makes the code assume both need to be
implemented for interrupts to be present.

But this PHY (which is inside a DSA) will happily
fire interrupts without either callback.

Implement dummy callbacks for .config_intr() and
.ack_interrupt() in the phy header to fix this.

Tested on the RTL8366RB on D-Link DIR-685.

Fixes: 0d2e778e38e0 ("net: phy: replace PHY_HAS_INTERRUPT with a check for config_intr and ack_interrupt")
Cc: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/realtek.c | 7 +++++++
 include/linux/phy.h       | 8 ++++++++
 2 files changed, 15 insertions(+)

(limited to 'include')

diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c
index c6010fb1aa0f..cb4a23041a94 100644
--- a/drivers/net/phy/realtek.c
+++ b/drivers/net/phy/realtek.c
@@ -282,6 +282,13 @@ static struct phy_driver realtek_drvs[] = {
 		.name		= "RTL8366RB Gigabit Ethernet",
 		.features	= PHY_GBIT_FEATURES,
 		.config_init	= &rtl8366rb_config_init,
+		/* These interrupts are handled by the irq controller
+		 * embedded inside the RTL8366RB, they get unmasked when the
+		 * irq is requested and ACKed by reading the status register,
+		 * which is done by the irqchip code.
+		 */
+		.ack_interrupt	= genphy_no_ack_interrupt,
+		.config_intr	= genphy_no_config_intr,
 		.suspend	= genphy_suspend,
 		.resume		= genphy_resume,
 	},
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 127fcc9c3778..333b56d8f746 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -992,6 +992,14 @@ static inline int genphy_no_soft_reset(struct phy_device *phydev)
 {
 	return 0;
 }
+static inline int genphy_no_ack_interrupt(struct phy_device *phydev)
+{
+	return 0;
+}
+static inline int genphy_no_config_intr(struct phy_device *phydev)
+{
+	return 0;
+}
 int genphy_read_mmd_unsupported(struct phy_device *phdev, int devad,
 				u16 regnum);
 int genphy_write_mmd_unsupported(struct phy_device *phdev, int devnum,
-- 
cgit v1.2.3


From 53a41cb7ed381edee91029cdcabe9b3250f43f4d Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Mon, 25 Feb 2019 09:10:51 -0800
Subject: Revert "x86/fault: BUG() when uaccess helpers fault on kernel
 addresses"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This reverts commit 9da3f2b74054406f87dff7101a569217ffceb29b.

It was well-intentioned, but wrong.  Overriding the exception tables for
instructions for random reasons is just wrong, and that is what the new
code did.

It caused problems for tracing, and it caused problems for strncpy_from_user(),
because the new checks made perfectly valid use cases break, rather than
catch things that did bad things.

Unchecked user space accesses are a problem, but that's not a reason to
add invalid checks that then people have to work around with silly flags
(in this case, that 'kernel_uaccess_faults_ok' flag, which is just an
odd way to say "this commit was wrong" and was sprinked into random
places to hide the wrongness).

The real fix to unchecked user space accesses is to get rid of the
special "let's not check __get_user() and __put_user() at all" logic.
Make __{get|put}_user() be just aliases to the regular {get|put}_user()
functions, and make it impossible to access user space without having
the proper checks in places.

The raison d'être of the special double-underscore versions used to be
that the range check was expensive, and if you did multiple user
accesses, you'd do the range check up front (like the signal frame
handling code, for example).  But SMAP (on x86) and PAN (on ARM) have
made that optimization pointless, because the _real_ expense is the "set
CPU flag to allow user space access".

Do let's not break the valid cases to catch invalid cases that shouldn't
even exist.

Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Kees Cook <keescook@chromium.org>
Cc: Tobin C. Harding <tobin@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Jann Horn <jannh@google.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/mm/extable.c | 58 ---------------------------------------------------
 fs/namespace.c        |  2 --
 include/linux/sched.h |  6 ------
 mm/maccess.c          |  6 ------
 4 files changed, 72 deletions(-)

(limited to 'include')

diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c
index 6521134057e8..856fa409c536 100644
--- a/arch/x86/mm/extable.c
+++ b/arch/x86/mm/extable.c
@@ -117,67 +117,11 @@ __visible bool ex_handler_fprestore(const struct exception_table_entry *fixup,
 }
 EXPORT_SYMBOL_GPL(ex_handler_fprestore);
 
-/* Helper to check whether a uaccess fault indicates a kernel bug. */
-static bool bogus_uaccess(struct pt_regs *regs, int trapnr,
-			  unsigned long fault_addr)
-{
-	/* This is the normal case: #PF with a fault address in userspace. */
-	if (trapnr == X86_TRAP_PF && fault_addr < TASK_SIZE_MAX)
-		return false;
-
-	/*
-	 * This code can be reached for machine checks, but only if the #MC
-	 * handler has already decided that it looks like a candidate for fixup.
-	 * This e.g. happens when attempting to access userspace memory which
-	 * the CPU can't access because of uncorrectable bad memory.
-	 */
-	if (trapnr == X86_TRAP_MC)
-		return false;
-
-	/*
-	 * There are two remaining exception types we might encounter here:
-	 *  - #PF for faulting accesses to kernel addresses
-	 *  - #GP for faulting accesses to noncanonical addresses
-	 * Complain about anything else.
-	 */
-	if (trapnr != X86_TRAP_PF && trapnr != X86_TRAP_GP) {
-		WARN(1, "unexpected trap %d in uaccess\n", trapnr);
-		return false;
-	}
-
-	/*
-	 * This is a faulting memory access in kernel space, on a kernel
-	 * address, in a usercopy function. This can e.g. be caused by improper
-	 * use of helpers like __put_user and by improper attempts to access
-	 * userspace addresses in KERNEL_DS regions.
-	 * The one (semi-)legitimate exception are probe_kernel_{read,write}(),
-	 * which can be invoked from places like kgdb, /dev/mem (for reading)
-	 * and privileged BPF code (for reading).
-	 * The probe_kernel_*() functions set the kernel_uaccess_faults_ok flag
-	 * to tell us that faulting on kernel addresses, and even noncanonical
-	 * addresses, in a userspace accessor does not necessarily imply a
-	 * kernel bug, root might just be doing weird stuff.
-	 */
-	if (current->kernel_uaccess_faults_ok)
-		return false;
-
-	/* This is bad. Refuse the fixup so that we go into die(). */
-	if (trapnr == X86_TRAP_PF) {
-		pr_emerg("BUG: pagefault on kernel address 0x%lx in non-whitelisted uaccess\n",
-			 fault_addr);
-	} else {
-		pr_emerg("BUG: GPF in non-whitelisted uaccess (non-canonical address?)\n");
-	}
-	return true;
-}
-
 __visible bool ex_handler_uaccess(const struct exception_table_entry *fixup,
 				  struct pt_regs *regs, int trapnr,
 				  unsigned long error_code,
 				  unsigned long fault_addr)
 {
-	if (bogus_uaccess(regs, trapnr, fault_addr))
-		return false;
 	regs->ip = ex_fixup_addr(fixup);
 	return true;
 }
@@ -188,8 +132,6 @@ __visible bool ex_handler_ext(const struct exception_table_entry *fixup,
 			      unsigned long error_code,
 			      unsigned long fault_addr)
 {
-	if (bogus_uaccess(regs, trapnr, fault_addr))
-		return false;
 	/* Special hack for uaccess_err */
 	current->thread.uaccess_err = 1;
 	regs->ip = ex_fixup_addr(fixup);
diff --git a/fs/namespace.c b/fs/namespace.c
index a677b59efd74..678ef175d63a 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2698,7 +2698,6 @@ static long exact_copy_from_user(void *to, const void __user * from,
 	if (!access_ok(from, n))
 		return n;
 
-	current->kernel_uaccess_faults_ok++;
 	while (n) {
 		if (__get_user(c, f)) {
 			memset(t, 0, n);
@@ -2708,7 +2707,6 @@ static long exact_copy_from_user(void *to, const void __user * from,
 		f++;
 		n--;
 	}
-	current->kernel_uaccess_faults_ok--;
 	return n;
 }
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index bba3afb4e9bf..f9b43c989577 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -739,12 +739,6 @@ struct task_struct {
 	unsigned			use_memdelay:1;
 #endif
 
-	/*
-	 * May usercopy functions fault on kernel addresses?
-	 * This is not just a single bit because this can potentially nest.
-	 */
-	unsigned int			kernel_uaccess_faults_ok;
-
 	unsigned long			atomic_flags; /* Flags requiring atomic access. */
 
 	struct restart_block		restart_block;
diff --git a/mm/maccess.c b/mm/maccess.c
index f3416632e5a4..ec00be51a24f 100644
--- a/mm/maccess.c
+++ b/mm/maccess.c
@@ -30,10 +30,8 @@ long __probe_kernel_read(void *dst, const void *src, size_t size)
 
 	set_fs(KERNEL_DS);
 	pagefault_disable();
-	current->kernel_uaccess_faults_ok++;
 	ret = __copy_from_user_inatomic(dst,
 			(__force const void __user *)src, size);
-	current->kernel_uaccess_faults_ok--;
 	pagefault_enable();
 	set_fs(old_fs);
 
@@ -60,9 +58,7 @@ long __probe_kernel_write(void *dst, const void *src, size_t size)
 
 	set_fs(KERNEL_DS);
 	pagefault_disable();
-	current->kernel_uaccess_faults_ok++;
 	ret = __copy_to_user_inatomic((__force void __user *)dst, src, size);
-	current->kernel_uaccess_faults_ok--;
 	pagefault_enable();
 	set_fs(old_fs);
 
@@ -98,13 +94,11 @@ long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count)
 
 	set_fs(KERNEL_DS);
 	pagefault_disable();
-	current->kernel_uaccess_faults_ok++;
 
 	do {
 		ret = __get_user(*dst++, (const char __user __force *)src++);
 	} while (dst[-1] && ret == 0 && src - unsafe_addr < count);
 
-	current->kernel_uaccess_faults_ok--;
 	dst[-1] = '\0';
 	pagefault_enable();
 	set_fs(old_fs);
-- 
cgit v1.2.3


From 9ef6b42ad6fd7929dd1b6092cb02014e382c6a91 Mon Sep 17 00:00:00 2001
From: Nazarov Sergey <s-nazarov@yandex.ru>
Date: Mon, 25 Feb 2019 19:24:15 +0300
Subject: net: Add __icmp_send helper.

Add __icmp_send function having ip_options struct parameter

Signed-off-by: Sergey Nazarov <s-nazarov@yandex.ru>
Reviewed-by: Paul Moore <paul@paul-moore.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/icmp.h | 9 ++++++++-
 net/ipv4/icmp.c    | 7 ++++---
 2 files changed, 12 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/icmp.h b/include/net/icmp.h
index 6ac3a5bd0117..e0f709d26dde 100644
--- a/include/net/icmp.h
+++ b/include/net/icmp.h
@@ -22,6 +22,7 @@
 
 #include <net/inet_sock.h>
 #include <net/snmp.h>
+#include <net/ip.h>
 
 struct icmp_err {
   int		errno;
@@ -39,7 +40,13 @@ struct net_proto_family;
 struct sk_buff;
 struct net;
 
-void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info);
+void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
+		 const struct ip_options *opt);
+static inline void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
+{
+	__icmp_send(skb_in, type, code, info, &IPCB(skb_in)->opt);
+}
+
 int icmp_rcv(struct sk_buff *skb);
 int icmp_err(struct sk_buff *skb, u32 info);
 int icmp_init(void);
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 065997f414e6..3f24414150e2 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -570,7 +570,8 @@ relookup_failed:
  *			MUST reply to only the first fragment.
  */
 
-void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
+void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info,
+		 const struct ip_options *opt)
 {
 	struct iphdr *iph;
 	int room;
@@ -691,7 +692,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 					  iph->tos;
 	mark = IP4_REPLY_MARK(net, skb_in->mark);
 
-	if (ip_options_echo(net, &icmp_param.replyopts.opt.opt, skb_in))
+	if (__ip_options_echo(net, &icmp_param.replyopts.opt.opt, skb_in, opt))
 		goto out_unlock;
 
 
@@ -742,7 +743,7 @@ out_bh_enable:
 	local_bh_enable();
 out:;
 }
-EXPORT_SYMBOL(icmp_send);
+EXPORT_SYMBOL(__icmp_send);
 
 
 static void icmp_socket_deliver(struct sk_buff *skb, u32 info)
-- 
cgit v1.2.3


From 3da1ed7ac398f34fff1694017a07054d69c5f5c5 Mon Sep 17 00:00:00 2001
From: Nazarov Sergey <s-nazarov@yandex.ru>
Date: Mon, 25 Feb 2019 19:27:15 +0300
Subject: net: avoid use IPCB in cipso_v4_error

Extract IP options in cipso_v4_error and use __icmp_send.

Signed-off-by: Sergey Nazarov <s-nazarov@yandex.ru>
Acked-by: Paul Moore <paul@paul-moore.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h      |  2 ++
 net/ipv4/cipso_ipv4.c | 17 +++++++++++++++--
 net/ipv4/ip_options.c | 22 +++++++++++++++++-----
 3 files changed, 34 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/net/ip.h b/include/net/ip.h
index 8866bfce6121..f0e8d064e249 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -667,6 +667,8 @@ static inline int ip_options_echo(struct net *net, struct ip_options *dopt,
 }
 
 void ip_options_fragment(struct sk_buff *skb);
+int __ip_options_compile(struct net *net, struct ip_options *opt,
+			 struct sk_buff *skb, __be32 *info);
 int ip_options_compile(struct net *net, struct ip_options *opt,
 		       struct sk_buff *skb);
 int ip_options_get(struct net *net, struct ip_options_rcu **optp,
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 777fa3b7fb13..eff86a71c1b0 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -1735,13 +1735,26 @@ validate_return:
  */
 void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway)
 {
+	unsigned char optbuf[sizeof(struct ip_options) + 40];
+	struct ip_options *opt = (struct ip_options *)optbuf;
+
 	if (ip_hdr(skb)->protocol == IPPROTO_ICMP || error != -EACCES)
 		return;
 
+	/*
+	 * We might be called above the IP layer,
+	 * so we can not use icmp_send and IPCB here.
+	 */
+
+	memset(opt, 0, sizeof(struct ip_options));
+	opt->optlen = ip_hdr(skb)->ihl*4 - sizeof(struct iphdr);
+	if (__ip_options_compile(dev_net(skb->dev), opt, skb, NULL))
+		return;
+
 	if (gateway)
-		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_ANO, 0);
+		__icmp_send(skb, ICMP_DEST_UNREACH, ICMP_NET_ANO, 0, opt);
 	else
-		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_ANO, 0);
+		__icmp_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_ANO, 0, opt);
 }
 
 /**
diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c
index ed194d46c00e..32a35043c9f5 100644
--- a/net/ipv4/ip_options.c
+++ b/net/ipv4/ip_options.c
@@ -251,8 +251,9 @@ static void spec_dst_fill(__be32 *spec_dst, struct sk_buff *skb)
  * If opt == NULL, then skb->data should point to IP header.
  */
 
-int ip_options_compile(struct net *net,
-		       struct ip_options *opt, struct sk_buff *skb)
+int __ip_options_compile(struct net *net,
+			 struct ip_options *opt, struct sk_buff *skb,
+			 __be32 *info)
 {
 	__be32 spec_dst = htonl(INADDR_ANY);
 	unsigned char *pp_ptr = NULL;
@@ -468,11 +469,22 @@ eol:
 		return 0;
 
 error:
-	if (skb) {
-		icmp_send(skb, ICMP_PARAMETERPROB, 0, htonl((pp_ptr-iph)<<24));
-	}
+	if (info)
+		*info = htonl((pp_ptr-iph)<<24);
 	return -EINVAL;
 }
+
+int ip_options_compile(struct net *net,
+		       struct ip_options *opt, struct sk_buff *skb)
+{
+	int ret;
+	__be32 info;
+
+	ret = __ip_options_compile(net, opt, skb, &info);
+	if (ret != 0 && skb)
+		icmp_send(skb, ICMP_PARAMETERPROB, 0, info);
+	return ret;
+}
 EXPORT_SYMBOL(ip_options_compile);
 
 /*
-- 
cgit v1.2.3


From f4d7b3e23d259c44f1f1c39645450680fcd935d6 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Wed, 27 Feb 2019 13:37:26 +0300
Subject: net: dev: Use unsigned integer as an argument to left-shift

1 << 31 is Undefined Behaviour according to the C standard.
Use U type modifier to avoid theoretical overflow.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 86dbb3e29139..848b54b7ec91 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3861,7 +3861,7 @@ static inline u32 netif_msg_init(int debug_value, int default_msg_enable_bits)
 	if (debug_value == 0)	/* no output */
 		return 0;
 	/* set low N bits */
-	return (1 << debug_value) - 1;
+	return (1U << debug_value) - 1;
 }
 
 static inline void __netif_tx_lock(struct netdev_queue *txq, int cpu)
-- 
cgit v1.2.3


From 5e1a99eae84999a2536f50a0beaf5d5262337f40 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Wed, 27 Feb 2019 16:15:29 +0800
Subject: ipv4: Add ICMPv6 support when parse route ipproto

For ip rules, we need to use 'ipproto ipv6-icmp' to match ICMPv6 headers.
But for ip -6 route, currently we only support tcp, udp and icmp.

Add ICMPv6 support so we can match ipv6-icmp rules for route lookup.

v2: As David Ahern and Sabrina Dubroca suggested, Add an argument to
rtm_getroute_parse_ip_proto() to handle ICMP/ICMPv6 with different family.

Reported-by: Jianlin Shi <jishi@redhat.com>
Fixes: eacb9384a3fe ("ipv6: support sport, dport and ip_proto in RTM_GETROUTE")
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h   |  2 +-
 net/ipv4/netlink.c | 17 +++++++++++++----
 net/ipv4/route.c   |  2 +-
 net/ipv6/route.c   |  3 ++-
 4 files changed, 17 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/net/ip.h b/include/net/ip.h
index f0e8d064e249..be3cad9c2e4c 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -718,7 +718,7 @@ extern int sysctl_icmp_msgs_burst;
 int ip_misc_proc_init(void);
 #endif
 
-int rtm_getroute_parse_ip_proto(struct nlattr *attr, u8 *ip_proto,
+int rtm_getroute_parse_ip_proto(struct nlattr *attr, u8 *ip_proto, u8 family,
 				struct netlink_ext_ack *extack);
 
 #endif	/* _IP_H */
diff --git a/net/ipv4/netlink.c b/net/ipv4/netlink.c
index f86bb4f06609..d8e3a1fb8e82 100644
--- a/net/ipv4/netlink.c
+++ b/net/ipv4/netlink.c
@@ -3,9 +3,10 @@
 #include <linux/types.h>
 #include <net/net_namespace.h>
 #include <net/netlink.h>
+#include <linux/in6.h>
 #include <net/ip.h>
 
-int rtm_getroute_parse_ip_proto(struct nlattr *attr, u8 *ip_proto,
+int rtm_getroute_parse_ip_proto(struct nlattr *attr, u8 *ip_proto, u8 family,
 				struct netlink_ext_ack *extack)
 {
 	*ip_proto = nla_get_u8(attr);
@@ -13,11 +14,19 @@ int rtm_getroute_parse_ip_proto(struct nlattr *attr, u8 *ip_proto,
 	switch (*ip_proto) {
 	case IPPROTO_TCP:
 	case IPPROTO_UDP:
+		return 0;
 	case IPPROTO_ICMP:
+		if (family != AF_INET)
+			break;
+		return 0;
+#if IS_ENABLED(CONFIG_IPV6)
+	case IPPROTO_ICMPV6:
+		if (family != AF_INET6)
+			break;
 		return 0;
-	default:
-		NL_SET_ERR_MSG(extack, "Unsupported ip proto");
-		return -EOPNOTSUPP;
+#endif
 	}
+	NL_SET_ERR_MSG(extack, "Unsupported ip proto");
+	return -EOPNOTSUPP;
 }
 EXPORT_SYMBOL_GPL(rtm_getroute_parse_ip_proto);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 5163b64f8fb3..7bb9128c8363 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2803,7 +2803,7 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 
 	if (tb[RTA_IP_PROTO]) {
 		err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO],
-						  &ip_proto, extack);
+						  &ip_proto, AF_INET, extack);
 		if (err)
 			return err;
 	}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index b7a620023a52..8dad1d690b78 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -4893,7 +4893,8 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
 
 	if (tb[RTA_IP_PROTO]) {
 		err = rtm_getroute_parse_ip_proto(tb[RTA_IP_PROTO],
-						  &fl6.flowi6_proto, extack);
+						  &fl6.flowi6_proto, AF_INET6,
+						  extack);
 		if (err)
 			goto errout;
 	}
-- 
cgit v1.2.3