241 files changed, 7735 insertions, 4945 deletions
diff --git a/fs/9p/fid.c b/fs/9p/fid.c
index 3d681a2c2731..9d9de62592be 100644
--- a/fs/9p/fid.c
+++ b/fs/9p/fid.c
@@ -39,6 +39,48 @@ void v9fs_fid_add(struct dentry *dentry, struct p9_fid *fid)
 }
 
 /**
+ * v9fs_fid_find_inode - search for an open fid off of the inode list
+ * @inode: return a fid pointing to a specific inode
+ * @uid: return a fid belonging to the specified user
+ *
+ */
+
+static struct p9_fid *v9fs_fid_find_inode(struct inode *inode, kuid_t uid)
+{
+	struct hlist_head *h;
+	struct p9_fid *fid, *ret = NULL;
+
+	p9_debug(P9_DEBUG_VFS, " inode: %p\n", inode);
+
+	spin_lock(&inode->i_lock);
+	h = (struct hlist_head *)&inode->i_private;
+	hlist_for_each_entry(fid, h, ilist) {
+		if (uid_eq(fid->uid, uid)) {
+			refcount_inc(&fid->count);
+			ret = fid;
+			break;
+		}
+	}
+	spin_unlock(&inode->i_lock);
+	return ret;
+}
+
+/**
+ * v9fs_open_fid_add - add an open fid to an inode
+ * @dentry: inode that the fid is being added to
+ * @fid: fid to add
+ *
+ */
+
+void v9fs_open_fid_add(struct inode *inode, struct p9_fid *fid)
+{
+	spin_lock(&inode->i_lock);
+	hlist_add_head(&fid->ilist, (struct hlist_head *)&inode->i_private);
+	spin_unlock(&inode->i_lock);
+}
+
+
+/**
  * v9fs_fid_find - retrieve a fid that belongs to the specified uid
  * @dentry: dentry to look for fid in
  * @uid: return fid that belongs to the specified user
@@ -54,13 +96,18 @@ static struct p9_fid *v9fs_fid_find(struct dentry *dentry, kuid_t uid, int any)
 		 dentry, dentry, from_kuid(&init_user_ns, uid),
 		 any);
 	ret = NULL;
+
+	if (d_inode(dentry))
+		ret = v9fs_fid_find_inode(d_inode(dentry), uid);
+
 	/* we'll recheck under lock if there's anything to look in */
-	if (dentry->d_fsdata) {
+	if (!ret && dentry->d_fsdata) {
 		struct hlist_head *h = (struct hlist_head *)&dentry->d_fsdata;
 		spin_lock(&dentry->d_lock);
 		hlist_for_each_entry(fid, h, dlist) {
 			if (any || uid_eq(fid->uid, uid)) {
 				ret = fid;
+				refcount_inc(&ret->count);
 				break;
 			}
 		}
@@ -122,7 +169,10 @@ static struct p9_fid *v9fs_fid_lookup_with_uid(struct dentry *dentry,
 	fid = v9fs_fid_find(ds, uid, any);
 	if (fid) {
 		/* Found the parent fid do a lookup with that */
-		fid = p9_client_walk(fid, 1, &dentry->d_name.name, 1);
+		struct p9_fid *ofid = fid;
+
+		fid = p9_client_walk(ofid, 1, &dentry->d_name.name, 1);
+		p9_client_clunk(ofid);
 		goto fid_out;
 	}
 	up_read(&v9ses->rename_sem);
@@ -147,8 +197,10 @@ static struct p9_fid *v9fs_fid_lookup_with_uid(struct dentry *dentry,
 		v9fs_fid_add(dentry->d_sb->s_root, fid);
 	}
 	/* If we are root ourself just return that */
-	if (dentry->d_sb->s_root == dentry)
+	if (dentry->d_sb->s_root == dentry) {
+		refcount_inc(&fid->count);
 		return fid;
+	}
 	/*
 	 * Do a multipath walk with attached root.
 	 * When walking parent we need to make sure we
@@ -195,6 +247,7 @@ fid_out:
 			fid = ERR_PTR(-ENOENT);
 		} else {
 			__add_fid(dentry, fid);
+			refcount_inc(&fid->count);
 			spin_unlock(&dentry->d_lock);
 		}
 	}
@@ -245,11 +298,13 @@ struct p9_fid *v9fs_fid_lookup(struct dentry *dentry)
 struct p9_fid *v9fs_writeback_fid(struct dentry *dentry)
 {
 	int err;
-	struct p9_fid *fid;
+	struct p9_fid *fid, *ofid;
 
-	fid = clone_fid(v9fs_fid_lookup_with_uid(dentry, GLOBAL_ROOT_UID, 0));
+	ofid = v9fs_fid_lookup_with_uid(dentry, GLOBAL_ROOT_UID, 0);
+	fid = clone_fid(ofid);
 	if (IS_ERR(fid))
 		goto error_out;
+	p9_client_clunk(ofid);
 	/*
 	 * writeback fid will only be used to write back the
 	 * dirty pages. We always request for the open fid in read-write
diff --git a/fs/9p/fid.h b/fs/9p/fid.h
index 928b1093f511..f7f33509e169 100644
--- a/fs/9p/fid.h
+++ b/fs/9p/fid.h
@@ -15,12 +15,21 @@ static inline struct p9_fid *v9fs_parent_fid(struct dentry *dentry)
 }
 void v9fs_fid_add(struct dentry *dentry, struct p9_fid *fid);
 struct p9_fid *v9fs_writeback_fid(struct dentry *dentry);
+void v9fs_open_fid_add(struct inode *inode, struct p9_fid *fid);
 static inline struct p9_fid *clone_fid(struct p9_fid *fid)
 {
 	return IS_ERR(fid) ? fid :  p9_client_walk(fid, 0, NULL, 1);
 }
 static inline struct p9_fid *v9fs_fid_clone(struct dentry *dentry)
 {
-	return clone_fid(v9fs_fid_lookup(dentry));
+	struct p9_fid *fid, *nfid;
+
+	fid = v9fs_fid_lookup(dentry);
+	if (!fid || IS_ERR(fid))
+		return fid;
+
+	nfid = clone_fid(fid);
+	p9_client_clunk(fid);
+	return nfid;
 }
 #endif
diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c
index 7d6f69aefd45..4b4292123b3d 100644
--- a/fs/9p/vfs_dentry.c
+++ b/fs/9p/vfs_dentry.c
@@ -85,6 +85,8 @@ static int v9fs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
 			retval = v9fs_refresh_inode_dotl(fid, inode);
 		else
 			retval = v9fs_refresh_inode(fid, inode);
+		p9_client_clunk(fid);
+
 		if (retval == -ENOENT)
 			return 0;
 		if (retval < 0)
diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c
index 674d22bf4f6f..b6a5a0be444d 100644
--- a/fs/9p/vfs_dir.c
+++ b/fs/9p/vfs_dir.c
@@ -210,8 +210,12 @@ int v9fs_dir_release(struct inode *inode, struct file *filp)
 	fid = filp->private_data;
 	p9_debug(P9_DEBUG_VFS, "inode: %p filp: %p fid: %d\n",
 		 inode, filp, fid ? fid->fid : -1);
-	if (fid)
+	if (fid) {
+		spin_lock(&inode->i_lock);
+		hlist_del(&fid->ilist);
+		spin_unlock(&inode->i_lock);
 		p9_client_clunk(fid);
+	}
 	return 0;
 }
 
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index be5768949cb1..649f04f112dc 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -46,7 +46,7 @@ int v9fs_file_open(struct inode *inode, struct file *file)
 	int err;
 	struct v9fs_inode *v9inode;
 	struct v9fs_session_info *v9ses;
-	struct p9_fid *fid;
+	struct p9_fid *fid, *writeback_fid;
 	int omode;
 
 	p9_debug(P9_DEBUG_VFS, "inode: %p file: %p\n", inode, file);
@@ -85,17 +85,18 @@ int v9fs_file_open(struct inode *inode, struct file *file)
 		 * because we want write after unlink usecase
 		 * to work.
 		 */
-		fid = v9fs_writeback_fid(file_dentry(file));
+		writeback_fid = v9fs_writeback_fid(file_dentry(file));
 		if (IS_ERR(fid)) {
 			err = PTR_ERR(fid);
 			mutex_unlock(&v9inode->v_mutex);
 			goto out_error;
 		}
-		v9inode->writeback_fid = (void *) fid;
+		v9inode->writeback_fid = (void *) writeback_fid;
 	}
 	mutex_unlock(&v9inode->v_mutex);
 	if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
 		v9fs_cache_inode_set_cookie(inode, file);
+	v9fs_open_fid_add(inode, fid);
 	return 0;
 out_error:
 	p9_client_clunk(file->private_data);
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index ae0c38ad1fcb..4a937fac1acb 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -256,6 +256,7 @@ int v9fs_init_inode(struct v9fs_session_info *v9ses,
 	inode->i_rdev = rdev;
 	inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
 	inode->i_mapping->a_ops = &v9fs_addr_operations;
+	inode->i_private = NULL;
 
 	switch (mode & S_IFMT) {
 	case S_IFIFO:
@@ -550,6 +551,7 @@ static int v9fs_remove(struct inode *dir, struct dentry *dentry, int flags)
 	if (v9fs_proto_dotl(v9ses))
 		retval = p9_client_unlinkat(dfid, dentry->d_name.name,
 					    v9fs_at_to_dotl_flags(flags));
+	p9_client_clunk(dfid);
 	if (retval == -EOPNOTSUPP) {
 		/* Try the one based on path */
 		v9fid = v9fs_fid_clone(dentry);
@@ -570,6 +572,10 @@ static int v9fs_remove(struct inode *dir, struct dentry *dentry, int flags)
 
 		v9fs_invalidate_inode_attr(inode);
 		v9fs_invalidate_inode_attr(dir);
+
+		/* invalidate all fids associated with dentry */
+		/* NOTE: This will not include open fids */
+		dentry->d_op->d_release(dentry);
 	}
 	return retval;
 }
@@ -590,14 +596,12 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir,
 {
 	int err;
 	const unsigned char *name;
-	struct p9_fid *dfid, *ofid, *fid;
+	struct p9_fid *dfid, *ofid = NULL, *fid = NULL;
 	struct inode *inode;
 
 	p9_debug(P9_DEBUG_VFS, "name %pd\n", dentry);
 
 	err = 0;
-	ofid = NULL;
-	fid = NULL;
 	name = dentry->d_name.name;
 	dfid = v9fs_parent_fid(dentry);
 	if (IS_ERR(dfid)) {
@@ -611,12 +615,14 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir,
 	if (IS_ERR(ofid)) {
 		err = PTR_ERR(ofid);
 		p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err);
+		p9_client_clunk(dfid);
 		return ERR_PTR(err);
 	}
 
 	err = p9_client_fcreate(ofid, name, perm, mode, extension);
 	if (err < 0) {
 		p9_debug(P9_DEBUG_VFS, "p9_client_fcreate failed %d\n", err);
+		p9_client_clunk(dfid);
 		goto error;
 	}
 
@@ -628,6 +634,7 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir,
 			p9_debug(P9_DEBUG_VFS,
 				   "p9_client_walk failed %d\n", err);
 			fid = NULL;
+			p9_client_clunk(dfid);
 			goto error;
 		}
 		/*
@@ -638,11 +645,13 @@ v9fs_create(struct v9fs_session_info *v9ses, struct inode *dir,
 			err = PTR_ERR(inode);
 			p9_debug(P9_DEBUG_VFS,
 				   "inode creation failed %d\n", err);
+			p9_client_clunk(dfid);
 			goto error;
 		}
 		v9fs_fid_add(dentry, fid);
 		d_instantiate(dentry, inode);
 	}
+	p9_client_clunk(dfid);
 	return ofid;
 error:
 	if (ofid)
@@ -755,6 +764,7 @@ struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
 	 */
 	name = dentry->d_name.name;
 	fid = p9_client_walk(dfid, 1, &name, 1);
+	p9_client_clunk(dfid);
 	if (fid == ERR_PTR(-ENOENT))
 		inode = NULL;
 	else if (IS_ERR(fid))
@@ -792,6 +802,7 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
 	struct v9fs_session_info *v9ses;
 	struct p9_fid *fid, *inode_fid;
 	struct dentry *res = NULL;
+	struct inode *inode;
 
 	if (d_in_lookup(dentry)) {
 		res = v9fs_vfs_lookup(dir, dentry, 0);
@@ -820,7 +831,8 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
 	}
 
 	v9fs_invalidate_inode_attr(dir);
-	v9inode = V9FS_I(d_inode(dentry));
+	inode = d_inode(dentry);
+	v9inode = V9FS_I(inode);
 	mutex_lock(&v9inode->v_mutex);
 	if ((v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) &&
 	    !v9inode->writeback_fid &&
@@ -848,6 +860,7 @@ v9fs_vfs_atomic_open(struct inode *dir, struct dentry *dentry,
 	file->private_data = fid;
 	if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
 		v9fs_cache_inode_set_cookie(d_inode(dentry), file);
+	v9fs_open_fid_add(inode, fid);
 
 	file->f_mode |= FMODE_CREATED;
 out:
@@ -902,7 +915,7 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	struct inode *old_inode;
 	struct inode *new_inode;
 	struct v9fs_session_info *v9ses;
-	struct p9_fid *oldfid;
+	struct p9_fid *oldfid, *dfid;
 	struct p9_fid *olddirfid;
 	struct p9_fid *newdirfid;
 	struct p9_wstat wstat;
@@ -919,13 +932,20 @@ v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
 	if (IS_ERR(oldfid))
 		return PTR_ERR(oldfid);
 
-	olddirfid = clone_fid(v9fs_parent_fid(old_dentry));
+	dfid = v9fs_parent_fid(old_dentry);
+	olddirfid = clone_fid(dfid);
+	if (dfid && !IS_ERR(dfid))
+		p9_client_clunk(dfid);
+
 	if (IS_ERR(olddirfid)) {
 		retval = PTR_ERR(olddirfid);
 		goto done;
 	}
 
-	newdirfid = clone_fid(v9fs_parent_fid(new_dentry));
+	dfid = v9fs_parent_fid(new_dentry);
+	newdirfid = clone_fid(dfid);
+	p9_client_clunk(dfid);
+
 	if (IS_ERR(newdirfid)) {
 		retval = PTR_ERR(newdirfid);
 		goto clunk_olddir;
@@ -982,6 +1002,7 @@ clunk_olddir:
 	p9_client_clunk(olddirfid);
 
 done:
+	p9_client_clunk(oldfid);
 	return retval;
 }
 
@@ -1014,6 +1035,7 @@ v9fs_vfs_getattr(const struct path *path, struct kstat *stat,
 		return PTR_ERR(fid);
 
 	st = p9_client_stat(fid);
+	p9_client_clunk(fid);
 	if (IS_ERR(st))
 		return PTR_ERR(st);
 
@@ -1034,7 +1056,7 @@ v9fs_vfs_getattr(const struct path *path, struct kstat *stat,
 
 static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
 {
-	int retval;
+	int retval, use_dentry = 0;
 	struct v9fs_session_info *v9ses;
 	struct p9_fid *fid = NULL;
 	struct p9_wstat wstat;
@@ -1050,8 +1072,10 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
 		fid = iattr->ia_file->private_data;
 		WARN_ON(!fid);
 	}
-	if (!fid)
+	if (!fid) {
 		fid = v9fs_fid_lookup(dentry);
+		use_dentry = 1;
+	}
 	if(IS_ERR(fid))
 		return PTR_ERR(fid);
 
@@ -1081,6 +1105,10 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
 		filemap_write_and_wait(d_inode(dentry)->i_mapping);
 
 	retval = p9_client_wstat(fid, &wstat);
+
+	if (use_dentry)
+		p9_client_clunk(fid);
+
 	if (retval < 0)
 		return retval;
 
@@ -1205,6 +1233,7 @@ static const char *v9fs_vfs_get_link(struct dentry *dentry,
 		return ERR_PTR(-EBADF);
 
 	st = p9_client_stat(fid);
+	p9_client_clunk(fid);
 	if (IS_ERR(st))
 		return ERR_CAST(st);
 
diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c
index 0028eccb665a..823c2eb5f1bf 100644
--- a/fs/9p/vfs_inode_dotl.c
+++ b/fs/9p/vfs_inode_dotl.c
@@ -296,6 +296,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
 
 	/* instantiate inode and assign the unopened fid to the dentry */
 	fid = p9_client_walk(dfid, 1, &name, 1);
+	p9_client_clunk(dfid);
 	if (IS_ERR(fid)) {
 		err = PTR_ERR(fid);
 		p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n", err);
@@ -342,6 +343,7 @@ v9fs_vfs_atomic_open_dotl(struct inode *dir, struct dentry *dentry,
 	file->private_data = ofid;
 	if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE)
 		v9fs_cache_inode_set_cookie(inode, file);
+	v9fs_open_fid_add(inode, ofid);
 	file->f_mode |= FMODE_CREATED;
 out:
 	v9fs_put_acl(dacl, pacl);
@@ -407,7 +409,6 @@ static int v9fs_vfs_mkdir_dotl(struct inode *dir,
 	err = p9_client_mkdir_dotl(dfid, name, mode, gid, &qid);
 	if (err < 0)
 		goto error;
-
 	fid = p9_client_walk(dfid, 1, &name, 1);
 	if (IS_ERR(fid)) {
 		err = PTR_ERR(fid);
@@ -451,6 +452,7 @@ error:
 	if (fid)
 		p9_client_clunk(fid);
 	v9fs_put_acl(dacl, pacl);
+	p9_client_clunk(dfid);
 	return err;
 }
 
@@ -478,6 +480,7 @@ v9fs_vfs_getattr_dotl(const struct path *path, struct kstat *stat,
 	 */
 
 	st = p9_client_getattr_dotl(fid, P9_STATS_ALL);
+	p9_client_clunk(fid);
 	if (IS_ERR(st))
 		return PTR_ERR(st);
 
@@ -539,7 +542,7 @@ static int v9fs_mapped_iattr_valid(int iattr_valid)
 
 int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
 {
-	int retval;
+	int retval, use_dentry = 0;
 	struct p9_fid *fid = NULL;
 	struct p9_iattr_dotl p9attr;
 	struct inode *inode = d_inode(dentry);
@@ -564,8 +567,10 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
 		fid = iattr->ia_file->private_data;
 		WARN_ON(!fid);
 	}
-	if (!fid)
+	if (!fid) {
 		fid = v9fs_fid_lookup(dentry);
+		use_dentry = 1;
+	}
 	if (IS_ERR(fid))
 		return PTR_ERR(fid);
 
@@ -574,8 +579,11 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
 		filemap_write_and_wait(inode->i_mapping);
 
 	retval = p9_client_setattr(fid, &p9attr);
-	if (retval < 0)
+	if (retval < 0) {
+		if (use_dentry)
+			p9_client_clunk(fid);
 		return retval;
+	}
 
 	if ((iattr->ia_valid & ATTR_SIZE) &&
 	    iattr->ia_size != i_size_read(inode))
@@ -587,9 +595,15 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
 	if (iattr->ia_valid & ATTR_MODE) {
 		/* We also want to update ACL when we update mode bits */
 		retval = v9fs_acl_chmod(inode, fid);
-		if (retval < 0)
+		if (retval < 0) {
+			if (use_dentry)
+				p9_client_clunk(fid);
 			return retval;
+		}
 	}
+	if (use_dentry)
+		p9_client_clunk(fid);
+
 	return 0;
 }
 
@@ -741,6 +755,7 @@ error:
 	if (fid)
 		p9_client_clunk(fid);
 
+	p9_client_clunk(dfid);
 	return err;
 }
 
@@ -769,11 +784,15 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir,
 		return PTR_ERR(dfid);
 
 	oldfid = v9fs_fid_lookup(old_dentry);
-	if (IS_ERR(oldfid))
+	if (IS_ERR(oldfid)) {
+		p9_client_clunk(dfid);
 		return PTR_ERR(oldfid);
+	}
 
 	err = p9_client_link(dfid, oldfid, dentry->d_name.name);
 
+	p9_client_clunk(dfid);
+	p9_client_clunk(oldfid);
 	if (err < 0) {
 		p9_debug(P9_DEBUG_VFS, "p9_client_link failed %d\n", err);
 		return err;
@@ -788,6 +807,7 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir,
 			return PTR_ERR(fid);
 
 		v9fs_refresh_inode_dotl(fid, d_inode(old_dentry));
+		p9_client_clunk(fid);
 	}
 	ihold(d_inode(old_dentry));
 	d_instantiate(dentry, d_inode(old_dentry));
@@ -886,6 +906,8 @@ error:
 	if (fid)
 		p9_client_clunk(fid);
 	v9fs_put_acl(dacl, pacl);
+	p9_client_clunk(dfid);
+
 	return err;
 }
 
@@ -914,6 +936,7 @@ v9fs_vfs_get_link_dotl(struct dentry *dentry,
 	if (IS_ERR(fid))
 		return ERR_CAST(fid);
 	retval = p9_client_readlink(fid, &target);
+	p9_client_clunk(fid);
 	if (retval)
 		return ERR_PTR(retval);
 	set_delayed_call(done, kfree_link, target);
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index 9a21269b7234..5fce6e30bc5a 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -268,6 +268,7 @@ static int v9fs_statfs(struct dentry *dentry, struct kstatfs *buf)
 	}
 	res = simple_statfs(dentry, buf);
 done:
+	p9_client_clunk(fid);
 	return res;
 }
 
diff --git a/fs/9p/xattr.c b/fs/9p/xattr.c
index ac8ff8ca4c11..87217dd0433e 100644
--- a/fs/9p/xattr.c
+++ b/fs/9p/xattr.c
@@ -71,14 +71,17 @@ ssize_t v9fs_xattr_get(struct dentry *dentry, const char *name,
 		       void *buffer, size_t buffer_size)
 {
 	struct p9_fid *fid;
+	int ret;
 
 	p9_debug(P9_DEBUG_VFS, "name = %s value_len = %zu\n",
 		 name, buffer_size);
 	fid = v9fs_fid_lookup(dentry);
 	if (IS_ERR(fid))
 		return PTR_ERR(fid);
+	ret = v9fs_fid_xattr_get(fid, name, buffer, buffer_size);
+	p9_client_clunk(fid);
 
-	return v9fs_fid_xattr_get(fid, name, buffer, buffer_size);
+	return ret;
 }
 
 /*
@@ -96,8 +99,15 @@ ssize_t v9fs_xattr_get(struct dentry *dentry, const char *name,
 int v9fs_xattr_set(struct dentry *dentry, const char *name,
 		   const void *value, size_t value_len, int flags)
 {
-	struct p9_fid *fid = v9fs_fid_lookup(dentry);
-	return v9fs_fid_xattr_set(fid, name, value, value_len, flags);
+	int ret;
+	struct p9_fid *fid;
+
+	fid  = v9fs_fid_lookup(dentry);
+	if (IS_ERR(fid))
+		return PTR_ERR(fid);
+	ret = v9fs_fid_xattr_set(fid, name, value, value_len, flags);
+	p9_client_clunk(fid);
+	return ret;
 }
 
 int v9fs_fid_xattr_set(struct p9_fid *fid, const char *name,
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 9e56ee1f2652..9293045e128c 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1,9 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- *  linux/fs/block_dev.c
- *
  *  Copyright (C) 1991, 1992  Linus Torvalds
  *  Copyright (C) 2001  Andrea Arcangeli <andrea@suse.de> SuSE
+ *  Copyright (C) 2016 - 2020 Christoph Hellwig
  */
 
 #include <linux/init.h>
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 35c83f65475b..950552944436 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -840,7 +840,7 @@ static int ceph_writepages_start(struct address_space *mapping,
 	     wbc->sync_mode == WB_SYNC_NONE ? "NONE" :
 	     (wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD"));
 
-	if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
+	if (READ_ONCE(fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) {
 		if (ci->i_wrbuffer_ref > 0) {
 			pr_warn_ratelimited(
 				"writepage_start %p %lld forced umount\n",
@@ -1264,7 +1264,7 @@ ceph_find_incompatible(struct page *page)
 	struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
 	struct ceph_inode_info *ci = ceph_inode(inode);
 
-	if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
+	if (READ_ONCE(fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) {
 		dout(" page %p forced umount\n", page);
 		return ERR_PTR(-EIO);
 	}
@@ -1321,7 +1321,7 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping,
 	dout("write_begin file %p inode %p page %p %d~%d\n", file, inode, page, (int)pos, (int)len);
 
 	for (;;) {
-		page = grab_cache_page_write_begin(mapping, index, 0);
+		page = grab_cache_page_write_begin(mapping, index, flags);
 		if (!page) {
 			r = -ENOMEM;
 			break;
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index ded4229c314a..255a512f1277 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -1140,16 +1140,24 @@ void __ceph_remove_cap(struct ceph_cap *cap, bool queue_release)
 {
 	struct ceph_mds_session *session = cap->session;
 	struct ceph_inode_info *ci = cap->ci;
-	struct ceph_mds_client *mdsc =
-		ceph_sb_to_client(ci->vfs_inode.i_sb)->mdsc;
+	struct ceph_mds_client *mdsc;
 	int removed = 0;
 
+	/* 'ci' being NULL means the remove have already occurred */
+	if (!ci) {
+		dout("%s: cap inode is NULL\n", __func__);
+		return;
+	}
+
 	dout("__ceph_remove_cap %p from %p\n", cap, &ci->vfs_inode);
 
+	mdsc = ceph_inode_to_client(&ci->vfs_inode)->mdsc;
+
 	/* remove from inode's cap rbtree, and clear auth cap */
 	rb_erase(&cap->ci_node, &ci->i_caps);
 	if (ci->i_auth_cap == cap) {
-		WARN_ON_ONCE(!list_empty(&ci->i_dirty_item));
+		WARN_ON_ONCE(!list_empty(&ci->i_dirty_item) &&
+			     !mdsc->fsc->blocklisted);
 		ci->i_auth_cap = NULL;
 	}
 
@@ -2746,7 +2754,7 @@ again:
 			goto out_unlock;
 		}
 
-		if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
+		if (READ_ONCE(mdsc->fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) {
 			dout("get_cap_refs %p forced umount\n", inode);
 			ret = -EIO;
 			goto out_unlock;
@@ -4027,15 +4035,13 @@ void ceph_handle_caps(struct ceph_mds_session *session,
 	}
 
 	if (msg_version >= 8) {
-		u64 flush_tid;
-		u32 caller_uid, caller_gid;
 		u32 pool_ns_len;
 
 		/* version >= 6 */
-		ceph_decode_64_safe(&p, end, flush_tid, bad);
+		ceph_decode_skip_64(&p, end, bad);	// flush_tid
 		/* version >= 7 */
-		ceph_decode_32_safe(&p, end, caller_uid, bad);
-		ceph_decode_32_safe(&p, end, caller_gid, bad);
+		ceph_decode_skip_32(&p, end, bad);	// caller_uid
+		ceph_decode_skip_32(&p, end, bad);	// caller_gid
 		/* version >= 8 */
 		ceph_decode_32_safe(&p, end, pool_ns_len, bad);
 		if (pool_ns_len > 0) {
@@ -4058,9 +4064,8 @@ void ceph_handle_caps(struct ceph_mds_session *session,
 	}
 
 	if (msg_version >= 11) {
-		u32 flags;
 		/* version >= 10 */
-		ceph_decode_32_safe(&p, end, flags, bad);
+		ceph_decode_skip_32(&p, end, bad); // flags
 		/* version >= 11 */
 		extra_info.dirstat_valid = true;
 		ceph_decode_64_safe(&p, end, extra_info.nfiles, bad);
diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c
index 7a8fbe3e4751..66989c880adb 100644
--- a/fs/ceph/debugfs.c
+++ b/fs/ceph/debugfs.c
@@ -304,11 +304,25 @@ static int mds_sessions_show(struct seq_file *s, void *ptr)
 	return 0;
 }
 
+static int status_show(struct seq_file *s, void *p)
+{
+	struct ceph_fs_client *fsc = s->private;
+	struct ceph_entity_inst *inst = &fsc->client->msgr.inst;
+	struct ceph_entity_addr *client_addr = ceph_client_addr(fsc->client);
+
+	seq_printf(s, "instance: %s.%lld %s/%u\n", ENTITY_NAME(inst->name),
+		   ceph_pr_addr(client_addr), le32_to_cpu(client_addr->nonce));
+	seq_printf(s, "blocklisted: %s\n", fsc->blocklisted ? "true" : "false");
+
+	return 0;
+}
+
 DEFINE_SHOW_ATTRIBUTE(mdsmap);
 DEFINE_SHOW_ATTRIBUTE(mdsc);
 DEFINE_SHOW_ATTRIBUTE(caps);
 DEFINE_SHOW_ATTRIBUTE(mds_sessions);
 DEFINE_SHOW_ATTRIBUTE(metric);
+DEFINE_SHOW_ATTRIBUTE(status);
 
 
 /*
@@ -394,6 +408,12 @@ void ceph_fs_debugfs_init(struct ceph_fs_client *fsc)
 						fsc->client->debugfs_dir,
 						fsc,
 						&caps_fops);
+
+	fsc->debugfs_status = debugfs_create_file("status",
+						  0400,
+						  fsc->client->debugfs_dir,
+						  fsc,
+						  &status_fops);
 }
 
 
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index a4d48370b2b3..858ee7362ff5 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -1202,12 +1202,11 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
 			op = CEPH_MDS_OP_RENAMESNAP;
 		else
 			return -EROFS;
-	} else if (old_dir != new_dir) {
-		err = ceph_quota_check_rename(mdsc, d_inode(old_dentry),
-					      new_dir);
-		if (err)
-			return err;
 	}
+	/* don't allow cross-quota renames */
+	if ((old_dir != new_dir) &&
+	    (!ceph_quota_is_same_realm(old_dir, new_dir)))
+		return -EXDEV;
 
 	dout("rename dir %p dentry %p to dir %p dentry %p\n",
 	     old_dir, old_dentry, new_dir, new_dentry);
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 526faf4778ce..adc8fc3c5d85 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1315,15 +1315,10 @@ retry_lookup:
 	}
 
 	if (rinfo->head->is_target) {
-		tvino.ino = le64_to_cpu(rinfo->targeti.in->ino);
-		tvino.snap = le64_to_cpu(rinfo->targeti.in->snapid);
-
-		in = ceph_get_inode(sb, tvino);
-		if (IS_ERR(in)) {
-			err = PTR_ERR(in);
-			goto done;
-		}
+		/* Should be filled in by handle_reply */
+		BUG_ON(!req->r_target_inode);
 
+		in = req->r_target_inode;
 		err = ceph_fill_inode(in, req->r_locked_page, &rinfo->targeti,
 				NULL, session,
 				(!test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags) &&
@@ -1333,11 +1328,13 @@ retry_lookup:
 		if (err < 0) {
 			pr_err("ceph_fill_inode badness %p %llx.%llx\n",
 				in, ceph_vinop(in));
+			req->r_target_inode = NULL;
 			if (in->i_state & I_NEW)
 				discard_new_inode(in);
+			else
+				iput(in);
 			goto done;
 		}
-		req->r_target_inode = in;
 		if (in->i_state & I_NEW)
 			unlock_new_inode(in);
 	}
@@ -1597,8 +1594,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
 	struct dentry *dn;
 	struct inode *in;
 	int err = 0, skipped = 0, ret, i;
-	struct ceph_mds_request_head *rhead = req->r_request->front.iov_base;
-	u32 frag = le32_to_cpu(rhead->args.readdir.frag);
+	u32 frag = le32_to_cpu(req->r_args.readdir.frag);
 	u32 last_hash = 0;
 	u32 fpos_offset;
 	struct ceph_readdir_cache_control cache_ctl = {};
@@ -1615,7 +1611,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
 		} else if (rinfo->offset_hash) {
 			/* mds understands offset_hash */
 			WARN_ON_ONCE(req->r_readdir_offset != 2);
-			last_hash = le32_to_cpu(rhead->args.readdir.offset_hash);
+			last_hash = le32_to_cpu(req->r_args.readdir.offset_hash);
 		}
 	}
 
@@ -1888,7 +1884,7 @@ static void ceph_do_invalidate_pages(struct inode *inode)
 
 	mutex_lock(&ci->i_truncate_mutex);
 
-	if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
+	if (READ_ONCE(fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) {
 		pr_warn_ratelimited("invalidate_pages %p %lld forced umount\n",
 				    inode, ceph_ino(inode));
 		mapping_set_error(inode->i_mapping, -EIO);
@@ -2340,15 +2336,23 @@ int ceph_permission(struct inode *inode, int mask)
 }
 
 /* Craft a mask of needed caps given a set of requested statx attrs. */
-static int statx_to_caps(u32 want)
+static int statx_to_caps(u32 want, umode_t mode)
 {
 	int mask = 0;
 
 	if (want & (STATX_MODE|STATX_UID|STATX_GID|STATX_CTIME|STATX_BTIME))
 		mask |= CEPH_CAP_AUTH_SHARED;
 
-	if (want & (STATX_NLINK|STATX_CTIME))
-		mask |= CEPH_CAP_LINK_SHARED;
+	if (want & (STATX_NLINK|STATX_CTIME)) {
+		/*
+		 * The link count for directories depends on inode->i_subdirs,
+		 * and that is only updated when Fs caps are held.
+		 */
+		if (S_ISDIR(mode))
+			mask |= CEPH_CAP_FILE_SHARED;
+		else
+			mask |= CEPH_CAP_LINK_SHARED;
+	}
 
 	if (want & (STATX_ATIME|STATX_MTIME|STATX_CTIME|STATX_SIZE|
 		    STATX_BLOCKS))
@@ -2374,8 +2378,9 @@ int ceph_getattr(const struct path *path, struct kstat *stat,
 
 	/* Skip the getattr altogether if we're asked not to sync */
 	if (!(flags & AT_STATX_DONT_SYNC)) {
-		err = ceph_do_getattr(inode, statx_to_caps(request_mask),
-				      flags & AT_STATX_FORCE_SYNC);
+		err = ceph_do_getattr(inode,
+				statx_to_caps(request_mask, inode->i_mode),
+				flags & AT_STATX_FORCE_SYNC);
 		if (err)
 			return err;
 	}
diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index 048a435a29be..fa8a847743d0 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -57,7 +57,7 @@ static const struct file_lock_operations ceph_fl_lock_ops = {
 	.fl_release_private = ceph_fl_release_lock,
 };
 
-/**
+/*
  * Implement fcntl and flock locking functions.
  */
 static int ceph_lock_message(u8 lock_type, u16 operation, struct inode *inode,
@@ -225,7 +225,7 @@ static int try_unlock_file(struct file *file, struct file_lock *fl)
 	return 1;
 }
 
-/**
+/*
  * Attempt to set an fcntl lock.
  * For now, this just goes away to the server. Later it may be more awesome.
  */
@@ -408,7 +408,7 @@ static int lock_to_ceph_filelock(struct file_lock *lock,
 	return err;
 }
 
-/**
+/*
  * Encode the flock and fcntl locks for the given inode into the ceph_filelock
  * array. Must be called with inode->i_lock already held.
  * If we encounter more of a specific lock type than expected, return -ENOSPC.
@@ -458,7 +458,7 @@ fail:
 	return err;
 }
 
-/**
+/*
  * Copy the encoded flock and fcntl locks into the pagelist.
  * Format is: #fcntl locks, sequential fcntl locks, #flock locks,
  * sequential flock locks.
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 8f1d7500a7ec..98c15ff2e599 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -516,13 +516,9 @@ static int parse_reply_info_create(void **p, void *end,
 			/* Malformed reply? */
 			info->has_create_ino = false;
 		} else if (test_bit(CEPHFS_FEATURE_DELEG_INO, &s->s_features)) {
-			u8 struct_v, struct_compat;
-			u32 len;
-
 			info->has_create_ino = true;
-			ceph_decode_8_safe(p, end, struct_v, bad);
-			ceph_decode_8_safe(p, end, struct_compat, bad);
-			ceph_decode_32_safe(p, end, len, bad);
+			/* struct_v, struct_compat, and len */
+			ceph_decode_skip_n(p, end, 2 + sizeof(u32), bad);
 			ceph_decode_64_safe(p, end, info->ino, bad);
 			ret = ceph_parse_deleg_inos(p, end, s);
 			if (ret)
@@ -837,6 +833,7 @@ void ceph_mdsc_release_request(struct kref *kref)
 	}
 	kfree(req->r_path1);
 	kfree(req->r_path2);
+	put_cred(req->r_cred);
 	if (req->r_pagelist)
 		ceph_pagelist_release(req->r_pagelist);
 	put_request_session(req);
@@ -892,8 +889,7 @@ static void __register_request(struct ceph_mds_client *mdsc,
 	ceph_mdsc_get_request(req);
 	insert_request(&mdsc->request_tree, req);
 
-	req->r_uid = current_fsuid();
-	req->r_gid = current_fsgid();
+	req->r_cred = get_current_cred();
 
 	if (mdsc->oldest_tid == 0 && req->r_op != CEPH_MDS_OP_SETFILELOCK)
 		mdsc->oldest_tid = req->r_tid;
@@ -1243,7 +1239,7 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6
 {
 	struct ceph_msg *msg;
 	struct ceph_mds_session_head *h;
-	int i = -1;
+	int i;
 	int extra_bytes = 0;
 	int metadata_key_count = 0;
 	struct ceph_options *opt = mdsc->fsc->client->options;
@@ -1595,7 +1591,7 @@ static int remove_session_caps_cb(struct inode *inode, struct ceph_cap *cap,
 		struct ceph_cap_flush *cf;
 		struct ceph_mds_client *mdsc = fsc->mdsc;
 
-		if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
+		if (READ_ONCE(fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN) {
 			if (inode->i_data.nrpages > 0)
 				invalidate = true;
 			if (ci->i_wrbuffer_ref > 0)
@@ -2482,21 +2478,24 @@ static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry,
 /*
  * called under mdsc->mutex
  */
-static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
+static struct ceph_msg *create_request_message(struct ceph_mds_session *session,
 					       struct ceph_mds_request *req,
-					       int mds, bool drop_cap_releases)
+					       bool drop_cap_releases)
 {
+	int mds = session->s_mds;
+	struct ceph_mds_client *mdsc = session->s_mdsc;
 	struct ceph_msg *msg;
-	struct ceph_mds_request_head *head;
+	struct ceph_mds_request_head_old *head;
 	const char *path1 = NULL;
 	const char *path2 = NULL;
 	u64 ino1 = 0, ino2 = 0;
 	int pathlen1 = 0, pathlen2 = 0;
 	bool freepath1 = false, freepath2 = false;
-	int len;
+	int len, i;
 	u16 releases;
 	void *p, *end;
 	int ret;
+	bool legacy = !(session->s_con.peer_features & CEPH_FEATURE_FS_BTIME);
 
 	ret = set_request_path_attr(req->r_inode, req->r_dentry,
 			      req->r_parent, req->r_path1, req->r_ino1.ino,
@@ -2518,14 +2517,23 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
 		goto out_free1;
 	}
 
-	len = sizeof(*head) +
-		pathlen1 + pathlen2 + 2*(1 + sizeof(u32) + sizeof(u64)) +
+	if (legacy) {
+		/* Old style */
+		len = sizeof(*head);
+	} else {
+		/* New style: add gid_list and any later fields */
+		len = sizeof(struct ceph_mds_request_head) + sizeof(u32) +
+		      (sizeof(u64) * req->r_cred->group_info->ngroups);
+	}
+
+	len += pathlen1 + pathlen2 + 2*(1 + sizeof(u32) + sizeof(u64)) +
 		sizeof(struct ceph_timespec);
 
 	/* calculate (max) length for cap releases */
 	len += sizeof(struct ceph_mds_request_release) *
 		(!!req->r_inode_drop + !!req->r_dentry_drop +
 		 !!req->r_old_inode_drop + !!req->r_old_dentry_drop);
+
 	if (req->r_dentry_drop)
 		len += pathlen1;
 	if (req->r_old_dentry_drop)
@@ -2537,17 +2545,33 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
 		goto out_free2;
 	}
 
-	msg->hdr.version = cpu_to_le16(2);
 	msg->hdr.tid = cpu_to_le64(req->r_tid);
 
-	head = msg->front.iov_base;
-	p = msg->front.iov_base + sizeof(*head);
+	/*
+	 * The old ceph_mds_request_header didn't contain a version field, and
+	 * one was added when we moved the message version from 3->4.
+	 */
+	if (legacy) {
+		msg->hdr.version = cpu_to_le16(3);
+		head = msg->front.iov_base;
+		p = msg->front.iov_base + sizeof(*head);
+	} else {
+		struct ceph_mds_request_head *new_head = msg->front.iov_base;
+
+		msg->hdr.version = cpu_to_le16(4);
+		new_head->version = cpu_to_le16(CEPH_MDS_REQUEST_HEAD_VERSION);
+		head = (struct ceph_mds_request_head_old *)&new_head->oldest_client_tid;
+		p = msg->front.iov_base + sizeof(*new_head);
+	}
+
 	end = msg->front.iov_base + msg->front.iov_len;
 
 	head->mdsmap_epoch = cpu_to_le32(mdsc->mdsmap->m_epoch);
 	head->op = cpu_to_le32(req->r_op);
-	head->caller_uid = cpu_to_le32(from_kuid(&init_user_ns, req->r_uid));
-	head->caller_gid = cpu_to_le32(from_kgid(&init_user_ns, req->r_gid));
+	head->caller_uid = cpu_to_le32(from_kuid(&init_user_ns,
+						 req->r_cred->fsuid));
+	head->caller_gid = cpu_to_le32(from_kgid(&init_user_ns,
+						 req->r_cred->fsgid));
 	head->ino = cpu_to_le64(req->r_deleg_ino);
 	head->args = req->r_args;
 
@@ -2592,6 +2616,14 @@ static struct ceph_msg *create_request_message(struct ceph_mds_client *mdsc,
 		ceph_encode_copy(&p, &ts, sizeof(ts));
 	}
 
+	/* gid list */
+	if (!legacy) {
+		ceph_encode_32(&p, req->r_cred->group_info->ngroups);
+		for (i = 0; i < req->r_cred->group_info->ngroups; i++)
+			ceph_encode_64(&p, from_kgid(&init_user_ns,
+				       req->r_cred->group_info->gid[i]));
+	}
+
 	if (WARN_ON_ONCE(p > end)) {
 		ceph_msg_put(msg);
 		msg = ERR_PTR(-ERANGE);
@@ -2635,14 +2667,28 @@ static void complete_request(struct ceph_mds_client *mdsc,
 	complete_all(&req->r_completion);
 }
 
+static struct ceph_mds_request_head_old *
+find_old_request_head(void *p, u64 features)
+{
+	bool legacy = !(features & CEPH_FEATURE_FS_BTIME);
+	struct ceph_mds_request_head *new_head;
+
+	if (legacy)
+		return (struct ceph_mds_request_head_old *)p;
+	new_head = (struct ceph_mds_request_head *)p;
+	return (struct ceph_mds_request_head_old *)&new_head->oldest_client_tid;
+}
+
 /*
  * called under mdsc->mutex
  */
-static int __prepare_send_request(struct ceph_mds_client *mdsc,
+static int __prepare_send_request(struct ceph_mds_session *session,
 				  struct ceph_mds_request *req,
-				  int mds, bool drop_cap_releases)
+				  bool drop_cap_releases)
 {
-	struct ceph_mds_request_head *rhead;
+	int mds = session->s_mds;
+	struct ceph_mds_client *mdsc = session->s_mdsc;
+	struct ceph_mds_request_head_old *rhead;
 	struct ceph_msg *msg;
 	int flags = 0;
 
@@ -2661,6 +2707,7 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
 
 	if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags)) {
 		void *p;
+
 		/*
 		 * Replay.  Do not regenerate message (and rebuild
 		 * paths, etc.); just use the original message.
@@ -2668,7 +2715,8 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
 		 * d_move mangles the src name.
 		 */
 		msg = req->r_request;
-		rhead = msg->front.iov_base;
+		rhead = find_old_request_head(msg->front.iov_base,
+					      session->s_con.peer_features);
 
 		flags = le32_to_cpu(rhead->flags);
 		flags |= CEPH_MDS_FLAG_REPLAY;
@@ -2699,14 +2747,15 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
 		ceph_msg_put(req->r_request);
 		req->r_request = NULL;
 	}
-	msg = create_request_message(mdsc, req, mds, drop_cap_releases);
+	msg = create_request_message(session, req, drop_cap_releases);
 	if (IS_ERR(msg)) {
 		req->r_err = PTR_ERR(msg);
 		return PTR_ERR(msg);
 	}
 	req->r_request = msg;
 
-	rhead = msg->front.iov_base;
+	rhead = find_old_request_head(msg->front.iov_base,
+				      session->s_con.peer_features);
 	rhead->oldest_client_tid = cpu_to_le64(__get_oldest_tid(mdsc));
 	if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags))
 		flags |= CEPH_MDS_FLAG_REPLAY;
@@ -2725,15 +2774,13 @@ static int __prepare_send_request(struct ceph_mds_client *mdsc,
 /*
  * called under mdsc->mutex
  */
-static int __send_request(struct ceph_mds_client *mdsc,
-			  struct ceph_mds_session *session,
+static int __send_request(struct ceph_mds_session *session,
 			  struct ceph_mds_request *req,
 			  bool drop_cap_releases)
 {
 	int err;
 
-	err = __prepare_send_request(mdsc, req, session->s_mds,
-				     drop_cap_releases);
+	err = __prepare_send_request(session, req, drop_cap_releases);
 	if (!err) {
 		ceph_msg_get(req->r_request);
 		ceph_con_send(&session->s_con, req->r_request);
@@ -2818,10 +2865,6 @@ static void __do_request(struct ceph_mds_client *mdsc,
 	     ceph_session_state_name(session->s_state));
 	if (session->s_state != CEPH_MDS_SESSION_OPEN &&
 	    session->s_state != CEPH_MDS_SESSION_HUNG) {
-		if (session->s_state == CEPH_MDS_SESSION_REJECTED) {
-			err = -EACCES;
-			goto out_session;
-		}
 		/*
 		 * We cannot queue async requests since the caps and delegated
 		 * inodes are bound to the session. Just return -EJUKEBOX and
@@ -2831,6 +2874,20 @@ static void __do_request(struct ceph_mds_client *mdsc,
 			err = -EJUKEBOX;
 			goto out_session;
 		}
+
+		/*
+		 * If the session has been REJECTED, then return a hard error,
+		 * unless it's a CLEANRECOVER mount, in which case we'll queue
+		 * it to the mdsc queue.
+		 */
+		if (session->s_state == CEPH_MDS_SESSION_REJECTED) {
+			if (ceph_test_mount_opt(mdsc->fsc, CLEANRECOVER))
+				list_add(&req->r_wait, &mdsc->waiting_for_map);
+			else
+				err = -EACCES;
+			goto out_session;
+		}
+
 		if (session->s_state == CEPH_MDS_SESSION_NEW ||
 		    session->s_state == CEPH_MDS_SESSION_CLOSING) {
 			err = __open_session(mdsc, session);
@@ -2850,7 +2907,7 @@ static void __do_request(struct ceph_mds_client *mdsc,
 	if (req->r_request_started == 0)   /* note request start time */
 		req->r_request_started = jiffies;
 
-	err = __send_request(mdsc, session, req, false);
+	err = __send_request(session, req, false);
 
 out_session:
 	ceph_put_mds_session(session);
@@ -3173,6 +3230,23 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
 		err = parse_reply_info(session, msg, rinfo, session->s_con.peer_features);
 	mutex_unlock(&mdsc->mutex);
 
+	/* Must find target inode outside of mutexes to avoid deadlocks */
+	if ((err >= 0) && rinfo->head->is_target) {
+		struct inode *in;
+		struct ceph_vino tvino = {
+			.ino  = le64_to_cpu(rinfo->targeti.in->ino),
+			.snap = le64_to_cpu(rinfo->targeti.in->snapid)
+		};
+
+		in = ceph_get_inode(mdsc->fsc->sb, tvino);
+		if (IS_ERR(in)) {
+			err = PTR_ERR(in);
+			mutex_lock(&session->s_mutex);
+			goto out_err;
+		}
+		req->r_target_inode = in;
+	}
+
 	mutex_lock(&session->s_mutex);
 	if (err < 0) {
 		pr_err("mdsc_handle_reply got corrupt reply mds%d(tid:%lld)\n", mds, tid);
@@ -3514,7 +3588,7 @@ static void replay_unsafe_requests(struct ceph_mds_client *mdsc,
 
 	mutex_lock(&mdsc->mutex);
 	list_for_each_entry_safe(req, nreq, &session->s_unsafe, r_unsafe_item)
-		__send_request(mdsc, session, req, true);
+		__send_request(session, req, true);
 
 	/*
 	 * also re-send old requests when MDS enters reconnect stage. So that MDS
@@ -3535,7 +3609,7 @@ static void replay_unsafe_requests(struct ceph_mds_client *mdsc,
 
 		ceph_mdsc_release_dir_caps_no_check(req);
 
-		__send_request(mdsc, session, req, true);
+		__send_request(session, req, true);
 	}
 	mutex_unlock(&mdsc->mutex);
 }
@@ -4374,12 +4448,7 @@ static void maybe_recover_session(struct ceph_mds_client *mdsc)
 	if (!READ_ONCE(fsc->blocklisted))
 		return;
 
-	if (fsc->last_auto_reconnect &&
-	    time_before(jiffies, fsc->last_auto_reconnect + HZ * 60 * 30))
-		return;
-
 	pr_info("auto reconnect after blocklisted\n");
-	fsc->last_auto_reconnect = jiffies;
 	ceph_force_reconnect(fsc->sb);
 }
 
@@ -4678,7 +4747,7 @@ void ceph_mdsc_sync(struct ceph_mds_client *mdsc)
 {
 	u64 want_tid, want_flush;
 
-	if (READ_ONCE(mdsc->fsc->mount_state) == CEPH_MOUNT_SHUTDOWN)
+	if (READ_ONCE(mdsc->fsc->mount_state) >= CEPH_MOUNT_SHUTDOWN)
 		return;
 
 	dout("sync\n");
@@ -4855,10 +4924,8 @@ void ceph_mdsc_handle_fsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
 	void *p = msg->front.iov_base;
 	void *end = p + msg->front.iov_len;
 	u32 epoch;
-	u32 map_len;
 	u32 num_fs;
 	u32 mount_fscid = (u32)-1;
-	u8 struct_v, struct_cv;
 	int err = -EINVAL;
 
 	ceph_decode_need(&p, end, sizeof(u32), bad);
@@ -4866,24 +4933,17 @@ void ceph_mdsc_handle_fsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
 
 	dout("handle_fsmap epoch %u\n", epoch);
 
-	ceph_decode_need(&p, end, 2 + sizeof(u32), bad);
-	struct_v = ceph_decode_8(&p);
-	struct_cv = ceph_decode_8(&p);
-	map_len = ceph_decode_32(&p);
-
-	ceph_decode_need(&p, end, sizeof(u32) * 3, bad);
-	p += sizeof(u32) * 2; /* skip epoch and legacy_client_fscid */
+	/* struct_v, struct_cv, map_len, epoch, legacy_client_fscid */
+	ceph_decode_skip_n(&p, end, 2 + sizeof(u32) * 3, bad);
 
-	num_fs = ceph_decode_32(&p);
+	ceph_decode_32_safe(&p, end, num_fs, bad);
 	while (num_fs-- > 0) {
 		void *info_p, *info_end;
 		u32 info_len;
-		u8 info_v, info_cv;
 		u32 fscid, namelen;
 
 		ceph_decode_need(&p, end, 2 + sizeof(u32), bad);
-		info_v = ceph_decode_8(&p);
-		info_cv = ceph_decode_8(&p);
+		p += 2;		// info_v, info_cv
 		info_len = ceph_decode_32(&p);
 		ceph_decode_need(&p, end, info_len, bad);
 		info_p = p;
@@ -4954,7 +5014,7 @@ void ceph_mdsc_handle_mdsmap(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
 		return;
 	}
 
-	newmap = ceph_mdsmap_decode(&p, end);
+	newmap = ceph_mdsmap_decode(&p, end, ceph_msgr2(mdsc->fsc->client));
 	if (IS_ERR(newmap)) {
 		err = PTR_ERR(newmap);
 		goto bad_unlock;
@@ -5081,23 +5141,12 @@ static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con,
 	struct ceph_mds_client *mdsc = s->s_mdsc;
 	struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth;
 	struct ceph_auth_handshake *auth = &s->s_auth;
+	int ret;
 
-	if (force_new && auth->authorizer) {
-		ceph_auth_destroy_authorizer(auth->authorizer);
-		auth->authorizer = NULL;
-	}
-	if (!auth->authorizer) {
-		int ret = ceph_auth_create_authorizer(ac, CEPH_ENTITY_TYPE_MDS,
-						      auth);
-		if (ret)
-			return ERR_PTR(ret);
-	} else {
-		int ret = ceph_auth_update_authorizer(ac, CEPH_ENTITY_TYPE_MDS,
-						      auth);
-		if (ret)
-			return ERR_PTR(ret);
-	}
-	*proto = ac->protocol;
+	ret = __ceph_auth_get_authorizer(ac, auth, CEPH_ENTITY_TYPE_MDS,
+					 force_new, proto, NULL, NULL);
+	if (ret)
+		return ERR_PTR(ret);
 
 	return auth;
 }
@@ -5118,8 +5167,11 @@ static int verify_authorizer_reply(struct ceph_connection *con)
 	struct ceph_mds_session *s = con->private;
 	struct ceph_mds_client *mdsc = s->s_mdsc;
 	struct ceph_auth_client *ac = mdsc->fsc->client->monc.auth;
+	struct ceph_auth_handshake *auth = &s->s_auth;
 
-	return ceph_auth_verify_authorizer_reply(ac, s->s_auth.authorizer);
+	return ceph_auth_verify_authorizer_reply(ac, auth->authorizer,
+		auth->authorizer_reply_buf, auth->authorizer_reply_buf_len,
+		NULL, NULL, NULL, NULL);
 }
 
 static int invalidate_authorizer(struct ceph_connection *con)
@@ -5133,6 +5185,80 @@ static int invalidate_authorizer(struct ceph_connection *con)
 	return ceph_monc_validate_auth(&mdsc->fsc->client->monc);
 }
 
+static int mds_get_auth_request(struct ceph_connection *con,
+				void *buf, int *buf_len,
+				void **authorizer, int *authorizer_len)
+{
+	struct ceph_mds_session *s = con->private;
+	struct ceph_auth_client *ac = s->s_mdsc->fsc->client->monc.auth;
+	struct ceph_auth_handshake *auth = &s->s_auth;
+	int ret;
+
+	ret = ceph_auth_get_authorizer(ac, auth, CEPH_ENTITY_TYPE_MDS,
+				       buf, buf_len);
+	if (ret)
+		return ret;
+
+	*authorizer = auth->authorizer_buf;
+	*authorizer_len = auth->authorizer_buf_len;
+	return 0;
+}
+
+static int mds_handle_auth_reply_more(struct ceph_connection *con,
+				      void *reply, int reply_len,
+				      void *buf, int *buf_len,
+				      void **authorizer, int *authorizer_len)
+{
+	struct ceph_mds_session *s = con->private;
+	struct ceph_auth_client *ac = s->s_mdsc->fsc->client->monc.auth;
+	struct ceph_auth_handshake *auth = &s->s_auth;
+	int ret;
+
+	ret = ceph_auth_handle_svc_reply_more(ac, auth, reply, reply_len,
+					      buf, buf_len);
+	if (ret)
+		return ret;
+
+	*authorizer = auth->authorizer_buf;
+	*authorizer_len = auth->authorizer_buf_len;
+	return 0;
+}
+
+static int mds_handle_auth_done(struct ceph_connection *con,
+				u64 global_id, void *reply, int reply_len,
+				u8 *session_key, int *session_key_len,
+				u8 *con_secret, int *con_secret_len)
+{
+	struct ceph_mds_session *s = con->private;
+	struct ceph_auth_client *ac = s->s_mdsc->fsc->client->monc.auth;
+	struct ceph_auth_handshake *auth = &s->s_auth;
+
+	return ceph_auth_handle_svc_reply_done(ac, auth, reply, reply_len,
+					       session_key, session_key_len,
+					       con_secret, con_secret_len);
+}
+
+static int mds_handle_auth_bad_method(struct ceph_connection *con,
+				      int used_proto, int result,
+				      const int *allowed_protos, int proto_cnt,
+				      const int *allowed_modes, int mode_cnt)
+{
+	struct ceph_mds_session *s = con->private;
+	struct ceph_mon_client *monc = &s->s_mdsc->fsc->client->monc;
+	int ret;
+
+	if (ceph_auth_handle_bad_authorizer(monc->auth, CEPH_ENTITY_TYPE_MDS,
+					    used_proto, result,
+					    allowed_protos, proto_cnt,
+					    allowed_modes, mode_cnt)) {
+		ret = ceph_monc_validate_auth(monc);
+		if (ret)
+			return ret;
+	}
+
+	return -EACCES;
+}
+
 static struct ceph_msg *mds_alloc_msg(struct ceph_connection *con,
 				struct ceph_msg_header *hdr, int *skip)
 {
@@ -5182,6 +5308,10 @@ static const struct ceph_connection_operations mds_con_ops = {
 	.alloc_msg = mds_alloc_msg,
 	.sign_message = mds_sign_message,
 	.check_message_signature = mds_check_message_signature,
+	.get_auth_request = mds_get_auth_request,
+	.handle_auth_reply_more = mds_handle_auth_reply_more,
+	.handle_auth_done = mds_handle_auth_done,
+	.handle_auth_bad_method = mds_handle_auth_bad_method,
 };
 
 /* eof */
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index f5adbebcb38e..eaa7c5422116 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -275,8 +275,7 @@ struct ceph_mds_request {
 
 	union ceph_mds_request_args r_args;
 	int r_fmode;        /* file mode, if expecting cap */
-	kuid_t r_uid;
-	kgid_t r_gid;
+	const struct cred *r_cred;
 	int r_request_release_offset;
 	struct timespec64 r_stamp;
 
diff --git a/fs/ceph/mdsmap.c b/fs/ceph/mdsmap.c
index e4aba6c6d3b5..abd9af7727ad 100644
--- a/fs/ceph/mdsmap.c
+++ b/fs/ceph/mdsmap.c
@@ -114,7 +114,7 @@ bad:
  * Ignore any fields we don't care about (there are quite a few of
  * them).
  */
-struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
+struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end, bool msgr2)
 {
 	struct ceph_mdsmap *m;
 	const void *start = *p;
@@ -201,18 +201,19 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
 		namelen = ceph_decode_32(p);  /* skip mds name */
 		*p += namelen;
 
-		ceph_decode_need(p, end,
-				 4*sizeof(u32) + sizeof(u64) +
-				 sizeof(addr) + sizeof(struct ceph_timespec),
-				 bad);
-		mds = ceph_decode_32(p);
-		inc = ceph_decode_32(p);
-		state = ceph_decode_32(p);
+		ceph_decode_32_safe(p, end, mds, bad);
+		ceph_decode_32_safe(p, end, inc, bad);
+		ceph_decode_32_safe(p, end, state, bad);
 		*p += sizeof(u64);		/* state_seq */
-		err = ceph_decode_entity_addr(p, end, &addr);
+		if (info_v >= 8)
+			err = ceph_decode_entity_addrvec(p, end, msgr2, &addr);
+		else
+			err = ceph_decode_entity_addr(p, end, &addr);
 		if (err)
 			goto corrupt;
-		ceph_decode_copy(p, &laggy_since, sizeof(laggy_since));
+
+		ceph_decode_copy_safe(p, end, &laggy_since, sizeof(laggy_since),
+				      bad);
 		laggy = laggy_since.tv_sec != 0 || laggy_since.tv_nsec != 0;
 		*p += sizeof(u32);
 		ceph_decode_32_safe(p, end, namelen, bad);
@@ -243,8 +244,8 @@ struct ceph_mdsmap *ceph_mdsmap_decode(void **p, void *end)
 		}
 
 		if (state <= 0) {
-			pr_warn("mdsmap_decode got incorrect state(%s)\n",
-				ceph_mds_state_name(state));
+			dout("mdsmap_decode got incorrect state(%s)\n",
+			     ceph_mds_state_name(state));
 			continue;
 		}
 
diff --git a/fs/ceph/metric.c b/fs/ceph/metric.c
index fee4c4778313..5ec94bd4c1de 100644
--- a/fs/ceph/metric.c
+++ b/fs/ceph/metric.c
@@ -16,6 +16,7 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc,
 	struct ceph_metric_read_latency *read;
 	struct ceph_metric_write_latency *write;
 	struct ceph_metric_metadata_latency *meta;
+	struct ceph_metric_dlease *dlease;
 	struct ceph_client_metric *m = &mdsc->metric;
 	u64 nr_caps = atomic64_read(&m->total_caps);
 	struct ceph_msg *msg;
@@ -25,7 +26,7 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc,
 	s32 len;
 
 	len = sizeof(*head) + sizeof(*cap) + sizeof(*read) + sizeof(*write)
-	      + sizeof(*meta);
+	      + sizeof(*meta) + sizeof(*dlease);
 
 	msg = ceph_msg_new(CEPH_MSG_CLIENT_METRICS, len, GFP_NOFS, true);
 	if (!msg) {
@@ -42,8 +43,8 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc,
 	cap->ver = 1;
 	cap->compat = 1;
 	cap->data_len = cpu_to_le32(sizeof(*cap) - 10);
-	cap->hit = cpu_to_le64(percpu_counter_sum(&mdsc->metric.i_caps_hit));
-	cap->mis = cpu_to_le64(percpu_counter_sum(&mdsc->metric.i_caps_mis));
+	cap->hit = cpu_to_le64(percpu_counter_sum(&m->i_caps_hit));
+	cap->mis = cpu_to_le64(percpu_counter_sum(&m->i_caps_mis));
 	cap->total = cpu_to_le64(nr_caps);
 	items++;
 
@@ -83,6 +84,17 @@ static bool ceph_mdsc_send_metrics(struct ceph_mds_client *mdsc,
 	meta->nsec = cpu_to_le32(ts.tv_nsec);
 	items++;
 
+	/* encode the dentry lease metric */
+	dlease = (struct ceph_metric_dlease *)(meta + 1);
+	dlease->type = cpu_to_le32(CLIENT_METRIC_TYPE_DENTRY_LEASE);
+	dlease->ver = 1;
+	dlease->compat = 1;
+	dlease->data_len = cpu_to_le32(sizeof(*dlease) - 10);
+	dlease->hit = cpu_to_le64(percpu_counter_sum(&m->d_lease_hit));
+	dlease->mis = cpu_to_le64(percpu_counter_sum(&m->d_lease_mis));
+	dlease->total = cpu_to_le64(atomic64_read(&m->total_dentries));
+	items++;
+
 	put_unaligned_le32(items, &head->num);
 	msg->front.iov_len = len;
 	msg->hdr.version = cpu_to_le16(1);
diff --git a/fs/ceph/metric.h b/fs/ceph/metric.h
index 710f3f1dceab..af6038ff39d4 100644
--- a/fs/ceph/metric.h
+++ b/fs/ceph/metric.h
@@ -27,6 +27,7 @@ enum ceph_metric_type {
 	CLIENT_METRIC_TYPE_READ_LATENCY,	\
 	CLIENT_METRIC_TYPE_WRITE_LATENCY,	\
 	CLIENT_METRIC_TYPE_METADATA_LATENCY,	\
+	CLIENT_METRIC_TYPE_DENTRY_LEASE,	\
 						\
 	CLIENT_METRIC_TYPE_MAX,			\
 }
@@ -80,6 +81,19 @@ struct ceph_metric_metadata_latency {
 	__le32 nsec;
 } __packed;
 
+/* metric dentry lease header */
+struct ceph_metric_dlease {
+	__le32 type;     /* ceph metric type */
+
+	__u8  ver;
+	__u8  compat;
+
+	__le32 data_len; /* length of sizeof(hit + mis + total) */
+	__le64 hit;
+	__le64 mis;
+	__le64 total;
+} __packed;
+
 struct ceph_metric_head {
 	__le32 num;	/* the number of metrics that will be sent */
 } __packed;
diff --git a/fs/ceph/quota.c b/fs/ceph/quota.c
index 9b785f11e95a..4e32c9600ecc 100644
--- a/fs/ceph/quota.c
+++ b/fs/ceph/quota.c
@@ -264,7 +264,7 @@ restart:
 	return NULL;
 }
 
-static bool ceph_quota_is_same_realm(struct inode *old, struct inode *new)
+bool ceph_quota_is_same_realm(struct inode *old, struct inode *new)
 {
 	struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(old->i_sb);
 	struct ceph_snap_realm *old_realm, *new_realm;
@@ -516,59 +516,3 @@ bool ceph_quota_update_statfs(struct ceph_fs_client *fsc, struct kstatfs *buf)
 	return is_updated;
 }
 
-/*
- * ceph_quota_check_rename - check if a rename can be executed
- * @mdsc:	MDS client instance
- * @old:	inode to be copied
- * @new:	destination inode (directory)
- *
- * This function verifies if a rename (e.g. moving a file or directory) can be
- * executed.  It forces an rstat update in the @new target directory (and in the
- * source @old as well, if it's a directory).  The actual check is done both for
- * max_files and max_bytes.
- *
- * This function returns 0 if it's OK to do the rename, or, if quotas are
- * exceeded, -EXDEV (if @old is a directory) or -EDQUOT.
- */
-int ceph_quota_check_rename(struct ceph_mds_client *mdsc,
-			    struct inode *old, struct inode *new)
-{
-	struct ceph_inode_info *ci_old = ceph_inode(old);
-	int ret = 0;
-
-	if (ceph_quota_is_same_realm(old, new))
-		return 0;
-
-	/*
-	 * Get the latest rstat for target directory (and for source, if a
-	 * directory)
-	 */
-	ret = ceph_do_getattr(new, CEPH_STAT_RSTAT, false);
-	if (ret)
-		return ret;
-
-	if (S_ISDIR(old->i_mode)) {
-		ret = ceph_do_getattr(old, CEPH_STAT_RSTAT, false);
-		if (ret)
-			return ret;
-		ret = check_quota_exceeded(new, QUOTA_CHECK_MAX_BYTES_OP,
-					   ci_old->i_rbytes);
-		if (!ret)
-			ret = check_quota_exceeded(new,
-						   QUOTA_CHECK_MAX_FILES_OP,
-						   ci_old->i_rfiles +
-						   ci_old->i_rsubdirs);
-		if (ret)
-			ret = -EXDEV;
-	} else {
-		ret = check_quota_exceeded(new, QUOTA_CHECK_MAX_BYTES_OP,
-					   i_size_read(old));
-		if (!ret)
-			ret = check_quota_exceeded(new,
-						   QUOTA_CHECK_MAX_FILES_OP, 1);
-		if (ret)
-			ret = -EDQUOT;
-	}
-
-	return ret;
-}
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index 33ba6f0aa55c..9b1b7f4cfdd4 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -831,6 +831,13 @@ static void destroy_caches(void)
 	ceph_fscache_unregister();
 }
 
+static void __ceph_umount_begin(struct ceph_fs_client *fsc)
+{
+	ceph_osdc_abort_requests(&fsc->client->osdc, -EIO);
+	ceph_mdsc_force_umount(fsc->mdsc);
+	fsc->filp_gen++; // invalidate open files
+}
+
 /*
  * ceph_umount_begin - initiate forced umount.  Tear down the
  * mount, skipping steps that may hang while waiting for server(s).
@@ -843,9 +850,7 @@ static void ceph_umount_begin(struct super_block *sb)
 	if (!fsc)
 		return;
 	fsc->mount_state = CEPH_MOUNT_SHUTDOWN;
-	ceph_osdc_abort_requests(&fsc->client->osdc, -EIO);
-	ceph_mdsc_force_umount(fsc->mdsc);
-	fsc->filp_gen++; // invalidate open files
+	__ceph_umount_begin(fsc);
 }
 
 static const struct super_operations ceph_super_ops = {
@@ -1234,7 +1239,8 @@ int ceph_force_reconnect(struct super_block *sb)
 	struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
 	int err = 0;
 
-	ceph_umount_begin(sb);
+	fsc->mount_state = CEPH_MOUNT_RECOVER;
+	__ceph_umount_begin(fsc);
 
 	/* Make sure all page caches get invalidated.
 	 * see remove_session_caps_cb() */
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 482473e4cce1..b62d8fee3b86 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -106,9 +106,8 @@ struct ceph_fs_client {
 	struct ceph_mount_options *mount_options;
 	struct ceph_client *client;
 
-	unsigned long mount_state;
+	int mount_state;
 
-	unsigned long last_auto_reconnect;
 	bool blocklisted;
 
 	bool have_copy_from2;
@@ -129,6 +128,7 @@ struct ceph_fs_client {
 	struct dentry *debugfs_bdi;
 	struct dentry *debugfs_mdsc, *debugfs_mdsmap;
 	struct dentry *debugfs_metric;
+	struct dentry *debugfs_status;
 	struct dentry *debugfs_mds_sessions;
 #endif
 
@@ -1222,14 +1222,13 @@ extern void ceph_handle_quota(struct ceph_mds_client *mdsc,
 			      struct ceph_mds_session *session,
 			      struct ceph_msg *msg);
 extern bool ceph_quota_is_max_files_exceeded(struct inode *inode);
+extern bool ceph_quota_is_same_realm(struct inode *old, struct inode *new);
 extern bool ceph_quota_is_max_bytes_exceeded(struct inode *inode,
 					     loff_t newlen);
 extern bool ceph_quota_is_max_bytes_approaching(struct inode *inode,
 						loff_t newlen);
 extern bool ceph_quota_update_statfs(struct ceph_fs_client *fsc,
 				     struct kstatfs *buf);
-extern int ceph_quota_check_rename(struct ceph_mds_client *mdsc,
-				   struct inode *old, struct inode *new);
 extern void ceph_cleanup_quotarealms_inodes(struct ceph_mds_client *mdsc);
 
 #endif /* _FS_CEPH_SUPER_H */
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 197cb1234341..24997982de01 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -42,6 +42,7 @@ struct ceph_vxattr {
 #define VXATTR_FLAG_READONLY		(1<<0)
 #define VXATTR_FLAG_HIDDEN		(1<<1)
 #define VXATTR_FLAG_RSTAT		(1<<2)
+#define VXATTR_FLAG_DIRSTAT		(1<<3)
 
 /* layouts */
 
@@ -303,6 +304,36 @@ static ssize_t ceph_vxattrcb_snap_btime(struct ceph_inode_info *ci, char *val,
 				ci->i_snap_btime.tv_nsec);
 }
 
+static ssize_t ceph_vxattrcb_cluster_fsid(struct ceph_inode_info *ci,
+					  char *val, size_t size)
+{
+	struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
+
+	return ceph_fmt_xattr(val, size, "%pU", &fsc->client->fsid);
+}
+
+static ssize_t ceph_vxattrcb_client_id(struct ceph_inode_info *ci,
+				       char *val, size_t size)
+{
+	struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
+
+	return ceph_fmt_xattr(val, size, "client%lld",
+			      ceph_client_gid(fsc->client));
+}
+
+static ssize_t ceph_vxattrcb_caps(struct ceph_inode_info *ci, char *val,
+					size_t size)
+{
+	int issued;
+
+	spin_lock(&ci->i_ceph_lock);
+	issued = __ceph_caps_issued(ci, NULL);
+	spin_unlock(&ci->i_ceph_lock);
+
+	return ceph_fmt_xattr(val, size, "%s/0x%x",
+			      ceph_cap_string(issued), issued);
+}
+
 #define CEPH_XATTR_NAME(_type, _name)	XATTR_CEPH_PREFIX #_type "." #_name
 #define CEPH_XATTR_NAME2(_type, _name, _name2)	\
 	XATTR_CEPH_PREFIX #_type "." #_name "." #_name2
@@ -347,9 +378,9 @@ static struct ceph_vxattr ceph_dir_vxattrs[] = {
 	XATTR_LAYOUT_FIELD(dir, layout, object_size),
 	XATTR_LAYOUT_FIELD(dir, layout, pool),
 	XATTR_LAYOUT_FIELD(dir, layout, pool_namespace),
-	XATTR_NAME_CEPH(dir, entries, 0),
-	XATTR_NAME_CEPH(dir, files, 0),
-	XATTR_NAME_CEPH(dir, subdirs, 0),
+	XATTR_NAME_CEPH(dir, entries, VXATTR_FLAG_DIRSTAT),
+	XATTR_NAME_CEPH(dir, files, VXATTR_FLAG_DIRSTAT),
+	XATTR_NAME_CEPH(dir, subdirs, VXATTR_FLAG_DIRSTAT),
 	XATTR_RSTAT_FIELD(dir, rentries),
 	XATTR_RSTAT_FIELD(dir, rfiles),
 	XATTR_RSTAT_FIELD(dir, rsubdirs),
@@ -378,6 +409,13 @@ static struct ceph_vxattr ceph_dir_vxattrs[] = {
 		.exists_cb = ceph_vxattrcb_snap_btime_exists,
 		.flags = VXATTR_FLAG_READONLY,
 	},
+	{
+		.name = "ceph.caps",
+		.name_size = sizeof("ceph.caps"),
+		.getxattr_cb = ceph_vxattrcb_caps,
+		.exists_cb = NULL,
+		.flags = VXATTR_FLAG_HIDDEN,
+	},
 	{ .name = NULL, 0 }	/* Required table terminator */
 };
 
@@ -403,6 +441,31 @@ static struct ceph_vxattr ceph_file_vxattrs[] = {
 		.exists_cb = ceph_vxattrcb_snap_btime_exists,
 		.flags = VXATTR_FLAG_READONLY,
 	},
+	{
+		.name = "ceph.caps",
+		.name_size = sizeof("ceph.caps"),
+		.getxattr_cb = ceph_vxattrcb_caps,
+		.exists_cb = NULL,
+		.flags = VXATTR_FLAG_HIDDEN,
+	},
+	{ .name = NULL, 0 }	/* Required table terminator */
+};
+
+static struct ceph_vxattr ceph_common_vxattrs[] = {
+	{
+		.name = "ceph.cluster_fsid",
+		.name_size = sizeof("ceph.cluster_fsid"),
+		.getxattr_cb = ceph_vxattrcb_cluster_fsid,
+		.exists_cb = NULL,
+		.flags = VXATTR_FLAG_READONLY,
+	},
+	{
+		.name = "ceph.client_id",
+		.name_size = sizeof("ceph.client_id"),
+		.getxattr_cb = ceph_vxattrcb_client_id,
+		.exists_cb = NULL,
+		.flags = VXATTR_FLAG_READONLY,
+	},
 	{ .name = NULL, 0 }	/* Required table terminator */
 };
 
@@ -428,6 +491,13 @@ static struct ceph_vxattr *ceph_match_vxattr(struct inode *inode,
 		}
 	}
 
+	vxattr = ceph_common_vxattrs;
+	while (vxattr->name) {
+		if (!strcmp(vxattr->name, name))
+			return vxattr;
+		vxattr++;
+	}
+
 	return NULL;
 }
 
@@ -837,6 +907,8 @@ ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value,
 		int mask = 0;
 		if (vxattr->flags & VXATTR_FLAG_RSTAT)
 			mask |= CEPH_STAT_RSTAT;
+		if (vxattr->flags & VXATTR_FLAG_DIRSTAT)
+			mask |= CEPH_CAP_FILE_SHARED;
 		err = ceph_do_getattr(inode, mask, true);
 		if (err)
 			return err;
@@ -950,6 +1022,7 @@ static int ceph_sync_setxattr(struct inode *inode, const char *name,
 	struct ceph_inode_info *ci = ceph_inode(inode);
 	struct ceph_mds_request *req;
 	struct ceph_mds_client *mdsc = fsc->mdsc;
+	struct ceph_osd_client *osdc = &fsc->client->osdc;
 	struct ceph_pagelist *pagelist = NULL;
 	int op = CEPH_MDS_OP_SETXATTR;
 	int err;
@@ -988,6 +1061,8 @@ static int ceph_sync_setxattr(struct inode *inode, const char *name,
 
 	if (op == CEPH_MDS_OP_SETXATTR) {
 		req->r_args.setxattr.flags = cpu_to_le32(flags);
+		req->r_args.setxattr.osdmap_epoch =
+			cpu_to_le32(osdc->osdmap->epoch);
 		req->r_pagelist = pagelist;
 		pagelist = NULL;
 	}
diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig
index 604f65f4b6c5..fe03cbdae959 100644
--- a/fs/cifs/Kconfig
+++ b/fs/cifs/Kconfig
@@ -60,9 +60,9 @@ config CIFS_STATS2
 	  Enabling this option will allow more detailed statistics on SMB
 	  request timing to be displayed in /proc/fs/cifs/DebugData and also
 	  allow optional logging of slow responses to dmesg (depending on the
-	  value of /proc/fs/cifs/cifsFYI, see fs/cifs/README for more details).
-	  These additional statistics may have a minor effect on performance
-	  and memory utilization.
+	  value of /proc/fs/cifs/cifsFYI). See Documentation/admin-guide/cifs/usage.rst
+	  for more details. These additional statistics may have a minor effect
+	  on performance and memory utilization.
 
 	  Unless you are a developer or are doing network performance analysis
 	  or tuning, say N.
@@ -102,10 +102,10 @@ config CIFS_WEAK_PW_HASH
 	  is enabled in the kernel build, LANMAN authentication will not be
 	  used automatically. At runtime LANMAN mounts are disabled but
 	  can be set to required (or optional) either in
-	  /proc/fs/cifs (see fs/cifs/README for more detail) or via an
-	  option on the mount command. This support is disabled by
-	  default in order to reduce the possibility of a downgrade
-	  attack.
+	  /proc/fs/cifs (see Documentation/admin-guide/cifs/usage.rst for
+	  more detail) or via an option on the mount command. This support
+	  is disabled by default in order to reduce the possibility of a
+	  downgrade attack.
 
 	  If unsure, say N.
 
@@ -190,6 +190,17 @@ config CIFS_DFS_UPCALL
 	  servers if their addresses change or for implicit mounts of
 	  DFS junction points. If unsure, say Y.
 
+config CIFS_SWN_UPCALL
+	bool "SWN feature support"
+	depends on CIFS
+	help
+	  The Service Witness Protocol (SWN) is used to get notifications
+	  from a highly available server of resource state changes. This
+	  feature enables an upcall mechanism for CIFS which contacts a
+	  userspace daemon to establish the DCE/RPC connection to retrieve
+	  the cluster available interfaces and resource change notifications.
+	  If unsure, say Y.
+
 config CIFS_NFSD_EXPORT
 	bool "Allow nfsd to export CIFS file system"
 	depends on CIFS && BROKEN
diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile
index cd17d0e50f2a..5213b20843b5 100644
--- a/fs/cifs/Makefile
+++ b/fs/cifs/Makefile
@@ -8,7 +8,7 @@ obj-$(CONFIG_CIFS) += cifs.o
 cifs-y := trace.o cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o \
 	  inode.o link.o misc.o netmisc.o smbencrypt.o transport.o asn1.o \
 	  cifs_unicode.o nterr.o cifsencrypt.o \
-	  readdir.o ioctl.o sess.o export.o smb1ops.o winucase.o \
+	  readdir.o ioctl.o sess.o export.o smb1ops.o unc.o winucase.o \
 	  smb2ops.o smb2maperror.o smb2transport.o \
 	  smb2misc.o smb2pdu.o smb2inode.o smb2file.o cifsacl.o fs_context.o
 
@@ -18,6 +18,8 @@ cifs-$(CONFIG_CIFS_UPCALL) += cifs_spnego.o
 
 cifs-$(CONFIG_CIFS_DFS_UPCALL) += dns_resolve.o cifs_dfs_ref.o dfs_cache.o
 
+cifs-$(CONFIG_CIFS_SWN_UPCALL) += netlink.o cifs_swn.o
+
 cifs-$(CONFIG_CIFS_FSCACHE) += fscache.o cache.o
 
 cifs-$(CONFIG_CIFS_SMB_DIRECT) += smbdirect.o
diff --git a/fs/cifs/cache.c b/fs/cifs/cache.c
index 0f2adecb94f2..488fe0ffc1ef 100644
--- a/fs/cifs/cache.c
+++ b/fs/cifs/cache.c
@@ -53,30 +53,6 @@ const struct fscache_cookie_def cifs_fscache_server_index_def = {
 	.type = FSCACHE_COOKIE_TYPE_INDEX,
 };
 
-char *extract_sharename(const char *treename)
-{
-	const char *src;
-	char *delim, *dst;
-	int len;
-
-	/* skip double chars at the beginning */
-	src = treename + 2;
-
-	/* share name is always preceded by '\\' now */
-	delim = strchr(src, '\\');
-	if (!delim)
-		return ERR_PTR(-EINVAL);
-	delim++;
-	len = strlen(delim);
-
-	/* caller has to free the memory */
-	dst = kstrndup(delim, len, GFP_KERNEL);
-	if (!dst)
-		return ERR_PTR(-ENOMEM);
-
-	return dst;
-}
-
 static enum
 fscache_checkaux cifs_fscache_super_check_aux(void *cookie_netfs_data,
 					      const void *data,
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 53588d7517b4..b231dcf1d1f9 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -23,6 +23,9 @@
 #ifdef CONFIG_CIFS_SMB_DIRECT
 #include "smbdirect.h"
 #endif
+#ifdef CONFIG_CIFS_SWN_UPCALL
+#include "cifs_swn.h"
+#endif
 
 void
 cifs_dump_mem(char *label, void *data, int length)
@@ -115,6 +118,10 @@ static void cifs_debug_tcon(struct seq_file *m, struct cifs_tcon *tcon)
 		seq_printf(m, " POSIX Extensions");
 	if (tcon->ses->server->ops->dump_share_caps)
 		tcon->ses->server->ops->dump_share_caps(m, tcon);
+#ifdef CONFIG_CIFS_SWN_UPCALL
+	if (tcon->use_witness)
+		seq_puts(m, " Witness");
+#endif
 
 	if (tcon->need_reconnect)
 		seq_puts(m, "\tDISCONNECTED ");
@@ -262,6 +269,9 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
 	seq_printf(m, ",XATTR");
 #endif
 	seq_printf(m, ",ACL");
+#ifdef CONFIG_CIFS_SWN_UPCALL
+	seq_puts(m, ",WITNESS");
+#endif
 	seq_putc(m, '\n');
 	seq_printf(m, "CIFSMaxBufSize: %d\n", CIFSMaxBufSize);
 	seq_printf(m, "Active VFS Requests: %d\n", GlobalTotalActiveXid);
@@ -462,6 +472,9 @@ skip_rdma:
 	spin_unlock(&cifs_tcp_ses_lock);
 	seq_putc(m, '\n');
 
+#ifdef CONFIG_CIFS_SWN_UPCALL
+	cifs_swn_dump(m);
+#endif
 	/* BB add code to dump additional info such as TCP session info now */
 	return 0;
 }
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index cc3ada12848d..e4c6ae47a796 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -23,6 +23,7 @@
 #include "cifs_debug.h"
 #include "cifs_unicode.h"
 #include "dfs_cache.h"
+#include "fs_context.h"
 
 static LIST_HEAD(cifs_dfs_automount_list);
 
@@ -124,7 +125,6 @@ cifs_build_devname(char *nodename, const char *prepath)
  * @sb_mountdata:	parent/root DFS mount options (template)
  * @fullpath:		full path in UNC format
  * @ref:		optional server's referral
- * @devname:		optional pointer for saving device name
  *
  * creates mount options for submount based on template options sb_mountdata
  * and replacing unc,ip,prefixpath options with ones we've got form ref_unc.
@@ -134,8 +134,7 @@ cifs_build_devname(char *nodename, const char *prepath)
  */
 char *cifs_compose_mount_options(const char *sb_mountdata,
 				   const char *fullpath,
-				   const struct dfs_info3_param *ref,
-				   char **devname)
+				   const struct dfs_info3_param *ref)
 {
 	int rc;
 	char *name;
@@ -232,10 +231,7 @@ char *cifs_compose_mount_options(const char *sb_mountdata,
 	strcat(mountdata, "ip=");
 	strcat(mountdata, srvIP);
 
-	if (devname)
-		*devname = name;
-	else
-		kfree(name);
+	kfree(name);
 
 	/*cifs_dbg(FYI, "%s: parent mountdata: %s\n", __func__, sb_mountdata);*/
 	/*cifs_dbg(FYI, "%s: submount mountdata: %s\n", __func__, mountdata );*/
@@ -258,6 +254,7 @@ compose_mount_options_err:
  * to perform failover in case we failed to connect to the first target in the
  * referral.
  *
+ * @mntpt:		directory entry for the path we are trying to automount
  * @cifs_sb:		parent/root superblock
  * @fullpath:		full path in UNC format
  */
@@ -275,9 +272,13 @@ static struct vfsmount *cifs_dfs_do_mount(struct dentry *mntpt,
 
 	convert_delimiter(devname, '/');
 
+	/* TODO: change to call fs_context_for_mount(), fill in context directly, call fc_mount */
+
+	/* See afs_mntpt_do_automount in fs/afs/mntpt.c for an example */
+
 	/* strip first '\' from fullpath */
-	mountdata = cifs_compose_mount_options(cifs_sb->mountdata,
-					       fullpath + 1, NULL, NULL);
+	mountdata = cifs_compose_mount_options(cifs_sb->ctx->mount_options,
+					       fullpath + 1, NULL);
 	if (IS_ERR(mountdata)) {
 		kfree(devname);
 		return (struct vfsmount *)mountdata;
diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h
index 6e7c4427369d..aa77edc12212 100644
--- a/fs/cifs/cifs_fs_sb.h
+++ b/fs/cifs/cifs_fs_sb.h
@@ -61,19 +61,9 @@ struct cifs_sb_info {
 	spinlock_t tlink_tree_lock;
 	struct tcon_link *master_tlink;
 	struct nls_table *local_nls;
-	unsigned int bsize;
-	unsigned int rsize;
-	unsigned int wsize;
-	unsigned long actimeo; /* attribute cache timeout (jiffies) */
+	struct smb3_fs_context *ctx;
 	atomic_t active;
-	kuid_t	mnt_uid;
-	kgid_t	mnt_gid;
-	kuid_t	mnt_backupuid;
-	kgid_t	mnt_backupgid;
-	umode_t	mnt_file_mode;
-	umode_t	mnt_dir_mode;
 	unsigned int mnt_cifs_flags;
-	char   *mountdata; /* options received at mount time or via DFS refs */
 	struct delayed_work prune_tlinks;
 	struct rcu_head rcu;
 
diff --git a/fs/cifs/cifs_swn.c b/fs/cifs/cifs_swn.c
new file mode 100644
index 000000000000..d35f599aa00e
--- /dev/null
+++ b/fs/cifs/cifs_swn.c
@@ -0,0 +1,695 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Witness Service client for CIFS
+ *
+ * Copyright (c) 2020 Samuel Cabrero <scabrero@suse.de>
+ */
+
+#include <linux/kref.h>
+#include <net/genetlink.h>
+#include <uapi/linux/cifs/cifs_netlink.h>
+
+#include "cifs_swn.h"
+#include "cifsglob.h"
+#include "cifsproto.h"
+#include "fscache.h"
+#include "cifs_debug.h"
+#include "netlink.h"
+
+static DEFINE_IDR(cifs_swnreg_idr);
+static DEFINE_MUTEX(cifs_swnreg_idr_mutex);
+
+struct cifs_swn_reg {
+	int id;
+	struct kref ref_count;
+
+	const char *net_name;
+	const char *share_name;
+	bool net_name_notify;
+	bool share_name_notify;
+	bool ip_notify;
+
+	struct cifs_tcon *tcon;
+};
+
+static int cifs_swn_auth_info_krb(struct cifs_tcon *tcon, struct sk_buff *skb)
+{
+	int ret;
+
+	ret = nla_put_flag(skb, CIFS_GENL_ATTR_SWN_KRB_AUTH);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static int cifs_swn_auth_info_ntlm(struct cifs_tcon *tcon, struct sk_buff *skb)
+{
+	int ret;
+
+	if (tcon->ses->user_name != NULL) {
+		ret = nla_put_string(skb, CIFS_GENL_ATTR_SWN_USER_NAME, tcon->ses->user_name);
+		if (ret < 0)
+			return ret;
+	}
+
+	if (tcon->ses->password != NULL) {
+		ret = nla_put_string(skb, CIFS_GENL_ATTR_SWN_PASSWORD, tcon->ses->password);
+		if (ret < 0)
+			return ret;
+	}
+
+	if (tcon->ses->domainName != NULL) {
+		ret = nla_put_string(skb, CIFS_GENL_ATTR_SWN_DOMAIN_NAME, tcon->ses->domainName);
+		if (ret < 0)
+			return ret;
+	}
+
+	return 0;
+}
+
+/*
+ * Sends a register message to the userspace daemon based on the registration.
+ * The authentication information to connect to the witness service is bundled
+ * into the message.
+ */
+static int cifs_swn_send_register_message(struct cifs_swn_reg *swnreg)
+{
+	struct sk_buff *skb;
+	struct genlmsghdr *hdr;
+	enum securityEnum authtype;
+	struct sockaddr_storage *addr;
+	int ret;
+
+	skb = genlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (skb == NULL) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+	hdr = genlmsg_put(skb, 0, 0, &cifs_genl_family, 0, CIFS_GENL_CMD_SWN_REGISTER);
+	if (hdr == NULL) {
+		ret = -ENOMEM;
+		goto nlmsg_fail;
+	}
+
+	ret = nla_put_u32(skb, CIFS_GENL_ATTR_SWN_REGISTRATION_ID, swnreg->id);
+	if (ret < 0)
+		goto nlmsg_fail;
+
+	ret = nla_put_string(skb, CIFS_GENL_ATTR_SWN_NET_NAME, swnreg->net_name);
+	if (ret < 0)
+		goto nlmsg_fail;
+
+	ret = nla_put_string(skb, CIFS_GENL_ATTR_SWN_SHARE_NAME, swnreg->share_name);
+	if (ret < 0)
+		goto nlmsg_fail;
+
+	/*
+	 * If there is an address stored use it instead of the server address, because we are
+	 * in the process of reconnecting to it after a share has been moved or we have been
+	 * told to switch to it (client move message). In these cases we unregister from the
+	 * server address and register to the new address when we receive the notification.
+	 */
+	if (swnreg->tcon->ses->server->use_swn_dstaddr)
+		addr = &swnreg->tcon->ses->server->swn_dstaddr;
+	else
+		addr = &swnreg->tcon->ses->server->dstaddr;
+
+	ret = nla_put(skb, CIFS_GENL_ATTR_SWN_IP, sizeof(struct sockaddr_storage), addr);
+	if (ret < 0)
+		goto nlmsg_fail;
+
+	if (swnreg->net_name_notify) {
+		ret = nla_put_flag(skb, CIFS_GENL_ATTR_SWN_NET_NAME_NOTIFY);
+		if (ret < 0)
+			goto nlmsg_fail;
+	}
+
+	if (swnreg->share_name_notify) {
+		ret = nla_put_flag(skb, CIFS_GENL_ATTR_SWN_SHARE_NAME_NOTIFY);
+		if (ret < 0)
+			goto nlmsg_fail;
+	}
+
+	if (swnreg->ip_notify) {
+		ret = nla_put_flag(skb, CIFS_GENL_ATTR_SWN_IP_NOTIFY);
+		if (ret < 0)
+			goto nlmsg_fail;
+	}
+
+	authtype = cifs_select_sectype(swnreg->tcon->ses->server, swnreg->tcon->ses->sectype);
+	switch (authtype) {
+	case Kerberos:
+		ret = cifs_swn_auth_info_krb(swnreg->tcon, skb);
+		if (ret < 0) {
+			cifs_dbg(VFS, "%s: Failed to get kerberos auth info: %d\n", __func__, ret);
+			goto nlmsg_fail;
+		}
+		break;
+	case LANMAN:
+	case NTLM:
+	case NTLMv2:
+	case RawNTLMSSP:
+		ret = cifs_swn_auth_info_ntlm(swnreg->tcon, skb);
+		if (ret < 0) {
+			cifs_dbg(VFS, "%s: Failed to get NTLM auth info: %d\n", __func__, ret);
+			goto nlmsg_fail;
+		}
+		break;
+	default:
+		cifs_dbg(VFS, "%s: secType %d not supported!\n", __func__, authtype);
+		ret = -EINVAL;
+		goto nlmsg_fail;
+	}
+
+	genlmsg_end(skb, hdr);
+	genlmsg_multicast(&cifs_genl_family, skb, 0, CIFS_GENL_MCGRP_SWN, GFP_ATOMIC);
+
+	cifs_dbg(FYI, "%s: Message to register for network name %s with id %d sent\n", __func__,
+			swnreg->net_name, swnreg->id);
+
+	return 0;
+
+nlmsg_fail:
+	genlmsg_cancel(skb, hdr);
+	nlmsg_free(skb);
+fail:
+	return ret;
+}
+
+/*
+ * Sends an uregister message to the userspace daemon based on the registration
+ */
+static int cifs_swn_send_unregister_message(struct cifs_swn_reg *swnreg)
+{
+	struct sk_buff *skb;
+	struct genlmsghdr *hdr;
+	int ret;
+
+	skb = genlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (skb == NULL)
+		return -ENOMEM;
+
+	hdr = genlmsg_put(skb, 0, 0, &cifs_genl_family, 0, CIFS_GENL_CMD_SWN_UNREGISTER);
+	if (hdr == NULL) {
+		ret = -ENOMEM;
+		goto nlmsg_fail;
+	}
+
+	ret = nla_put_u32(skb, CIFS_GENL_ATTR_SWN_REGISTRATION_ID, swnreg->id);
+	if (ret < 0)
+		goto nlmsg_fail;
+
+	ret = nla_put_string(skb, CIFS_GENL_ATTR_SWN_NET_NAME, swnreg->net_name);
+	if (ret < 0)
+		goto nlmsg_fail;
+
+	ret = nla_put_string(skb, CIFS_GENL_ATTR_SWN_SHARE_NAME, swnreg->share_name);
+	if (ret < 0)
+		goto nlmsg_fail;
+
+	ret = nla_put(skb, CIFS_GENL_ATTR_SWN_IP, sizeof(struct sockaddr_storage),
+			&swnreg->tcon->ses->server->dstaddr);
+	if (ret < 0)
+		goto nlmsg_fail;
+
+	if (swnreg->net_name_notify) {
+		ret = nla_put_flag(skb, CIFS_GENL_ATTR_SWN_NET_NAME_NOTIFY);
+		if (ret < 0)
+			goto nlmsg_fail;
+	}
+
+	if (swnreg->share_name_notify) {
+		ret = nla_put_flag(skb, CIFS_GENL_ATTR_SWN_SHARE_NAME_NOTIFY);
+		if (ret < 0)
+			goto nlmsg_fail;
+	}
+
+	if (swnreg->ip_notify) {
+		ret = nla_put_flag(skb, CIFS_GENL_ATTR_SWN_IP_NOTIFY);
+		if (ret < 0)
+			goto nlmsg_fail;
+	}
+
+	genlmsg_end(skb, hdr);
+	genlmsg_multicast(&cifs_genl_family, skb, 0, CIFS_GENL_MCGRP_SWN, GFP_ATOMIC);
+
+	cifs_dbg(FYI, "%s: Message to unregister for network name %s with id %d sent\n", __func__,
+			swnreg->net_name, swnreg->id);
+
+	return 0;
+
+nlmsg_fail:
+	genlmsg_cancel(skb, hdr);
+	nlmsg_free(skb);
+	return ret;
+}
+
+/*
+ * Try to find a matching registration for the tcon's server name and share name.
+ * Calls to this funciton must be protected by cifs_swnreg_idr_mutex.
+ * TODO Try to avoid memory allocations
+ */
+static struct cifs_swn_reg *cifs_find_swn_reg(struct cifs_tcon *tcon)
+{
+	struct cifs_swn_reg *swnreg;
+	int id;
+	const char *share_name;
+	const char *net_name;
+
+	net_name = extract_hostname(tcon->treeName);
+	if (IS_ERR(net_name)) {
+		int ret;
+
+		ret = PTR_ERR(net_name);
+		cifs_dbg(VFS, "%s: failed to extract host name from target '%s': %d\n",
+				__func__, tcon->treeName, ret);
+		return ERR_PTR(-EINVAL);
+	}
+
+	share_name = extract_sharename(tcon->treeName);
+	if (IS_ERR(share_name)) {
+		int ret;
+
+		ret = PTR_ERR(net_name);
+		cifs_dbg(VFS, "%s: failed to extract share name from target '%s': %d\n",
+				__func__, tcon->treeName, ret);
+		kfree(net_name);
+		return ERR_PTR(-EINVAL);
+	}
+
+	idr_for_each_entry(&cifs_swnreg_idr, swnreg, id) {
+		if (strcasecmp(swnreg->net_name, net_name) != 0
+		    || strcasecmp(swnreg->share_name, share_name) != 0) {
+			continue;
+		}
+
+		cifs_dbg(FYI, "Existing swn registration for %s:%s found\n", swnreg->net_name,
+				swnreg->share_name);
+
+		kfree(net_name);
+		kfree(share_name);
+
+		return swnreg;
+	}
+
+	kfree(net_name);
+	kfree(share_name);
+
+	return ERR_PTR(-EEXIST);
+}
+
+/*
+ * Get a registration for the tcon's server and share name, allocating a new one if it does not
+ * exists
+ */
+static struct cifs_swn_reg *cifs_get_swn_reg(struct cifs_tcon *tcon)
+{
+	struct cifs_swn_reg *reg = NULL;
+	int ret;
+
+	mutex_lock(&cifs_swnreg_idr_mutex);
+
+	/* Check if we are already registered for this network and share names */
+	reg = cifs_find_swn_reg(tcon);
+	if (!IS_ERR(reg)) {
+		kref_get(&reg->ref_count);
+		mutex_unlock(&cifs_swnreg_idr_mutex);
+		return reg;
+	} else if (PTR_ERR(reg) != -EEXIST) {
+		mutex_unlock(&cifs_swnreg_idr_mutex);
+		return reg;
+	}
+
+	reg = kmalloc(sizeof(struct cifs_swn_reg), GFP_ATOMIC);
+	if (reg == NULL) {
+		mutex_unlock(&cifs_swnreg_idr_mutex);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	kref_init(&reg->ref_count);
+
+	reg->id = idr_alloc(&cifs_swnreg_idr, reg, 1, 0, GFP_ATOMIC);
+	if (reg->id < 0) {
+		cifs_dbg(FYI, "%s: failed to allocate registration id\n", __func__);
+		ret = reg->id;
+		goto fail;
+	}
+
+	reg->net_name = extract_hostname(tcon->treeName);
+	if (IS_ERR(reg->net_name)) {
+		ret = PTR_ERR(reg->net_name);
+		cifs_dbg(VFS, "%s: failed to extract host name from target: %d\n", __func__, ret);
+		goto fail_idr;
+	}
+
+	reg->share_name = extract_sharename(tcon->treeName);
+	if (IS_ERR(reg->share_name)) {
+		ret = PTR_ERR(reg->share_name);
+		cifs_dbg(VFS, "%s: failed to extract share name from target: %d\n", __func__, ret);
+		goto fail_net_name;
+	}
+
+	reg->net_name_notify = true;
+	reg->share_name_notify = true;
+	reg->ip_notify = (tcon->capabilities & SMB2_SHARE_CAP_SCALEOUT);
+
+	reg->tcon = tcon;
+
+	mutex_unlock(&cifs_swnreg_idr_mutex);
+
+	return reg;
+
+fail_net_name:
+	kfree(reg->net_name);
+fail_idr:
+	idr_remove(&cifs_swnreg_idr, reg->id);
+fail:
+	kfree(reg);
+	mutex_unlock(&cifs_swnreg_idr_mutex);
+	return ERR_PTR(ret);
+}
+
+static void cifs_swn_reg_release(struct kref *ref)
+{
+	struct cifs_swn_reg *swnreg = container_of(ref, struct cifs_swn_reg, ref_count);
+	int ret;
+
+	ret = cifs_swn_send_unregister_message(swnreg);
+	if (ret < 0)
+		cifs_dbg(VFS, "%s: Failed to send unregister message: %d\n", __func__, ret);
+
+	idr_remove(&cifs_swnreg_idr, swnreg->id);
+	kfree(swnreg->net_name);
+	kfree(swnreg->share_name);
+	kfree(swnreg);
+}
+
+static void cifs_put_swn_reg(struct cifs_swn_reg *swnreg)
+{
+	mutex_lock(&cifs_swnreg_idr_mutex);
+	kref_put(&swnreg->ref_count, cifs_swn_reg_release);
+	mutex_unlock(&cifs_swnreg_idr_mutex);
+}
+
+static int cifs_swn_resource_state_changed(struct cifs_swn_reg *swnreg, const char *name, int state)
+{
+	int i;
+
+	switch (state) {
+	case CIFS_SWN_RESOURCE_STATE_UNAVAILABLE:
+		cifs_dbg(FYI, "%s: resource name '%s' become unavailable\n", __func__, name);
+		for (i = 0; i < swnreg->tcon->ses->chan_count; i++) {
+			spin_lock(&GlobalMid_Lock);
+			if (swnreg->tcon->ses->chans[i].server->tcpStatus != CifsExiting)
+				swnreg->tcon->ses->chans[i].server->tcpStatus = CifsNeedReconnect;
+			spin_unlock(&GlobalMid_Lock);
+		}
+		break;
+	case CIFS_SWN_RESOURCE_STATE_AVAILABLE:
+		cifs_dbg(FYI, "%s: resource name '%s' become available\n", __func__, name);
+		for (i = 0; i < swnreg->tcon->ses->chan_count; i++) {
+			spin_lock(&GlobalMid_Lock);
+			if (swnreg->tcon->ses->chans[i].server->tcpStatus != CifsExiting)
+				swnreg->tcon->ses->chans[i].server->tcpStatus = CifsNeedReconnect;
+			spin_unlock(&GlobalMid_Lock);
+		}
+		break;
+	case CIFS_SWN_RESOURCE_STATE_UNKNOWN:
+		cifs_dbg(FYI, "%s: resource name '%s' changed to unknown state\n", __func__, name);
+		break;
+	}
+	return 0;
+}
+
+static bool cifs_sockaddr_equal(struct sockaddr_storage *addr1, struct sockaddr_storage *addr2)
+{
+	if (addr1->ss_family != addr2->ss_family)
+		return false;
+
+	if (addr1->ss_family == AF_INET) {
+		return (memcmp(&((const struct sockaddr_in *)addr1)->sin_addr,
+				&((const struct sockaddr_in *)addr2)->sin_addr,
+				sizeof(struct in_addr)) == 0);
+	}
+
+	if (addr1->ss_family == AF_INET6) {
+		return (memcmp(&((const struct sockaddr_in6 *)addr1)->sin6_addr,
+				&((const struct sockaddr_in6 *)addr2)->sin6_addr,
+				sizeof(struct in6_addr)) == 0);
+	}
+
+	return false;
+}
+
+static int cifs_swn_store_swn_addr(const struct sockaddr_storage *new,
+				   const struct sockaddr_storage *old,
+				   struct sockaddr_storage *dst)
+{
+	__be16 port;
+
+	if (old->ss_family == AF_INET) {
+		struct sockaddr_in *ipv4 = (struct sockaddr_in *)old;
+
+		port = ipv4->sin_port;
+	}
+
+	if (old->ss_family == AF_INET6) {
+		struct sockaddr_in6 *ipv6 = (struct sockaddr_in6 *)old;
+
+		port = ipv6->sin6_port;
+	}
+
+	if (new->ss_family == AF_INET) {
+		struct sockaddr_in *ipv4 = (struct sockaddr_in *)new;
+
+		ipv4->sin_port = port;
+	}
+
+	if (new->ss_family == AF_INET6) {
+		struct sockaddr_in6 *ipv6 = (struct sockaddr_in6 *)new;
+
+		ipv6->sin6_port = port;
+	}
+
+	*dst = *new;
+
+	return 0;
+}
+
+static int cifs_swn_reconnect(struct cifs_tcon *tcon, struct sockaddr_storage *addr)
+{
+	int ret = 0;
+
+	/* Store the reconnect address */
+	mutex_lock(&tcon->ses->server->srv_mutex);
+	if (cifs_sockaddr_equal(&tcon->ses->server->dstaddr, addr))
+		goto unlock;
+
+	ret = cifs_swn_store_swn_addr(addr, &tcon->ses->server->dstaddr,
+				      &tcon->ses->server->swn_dstaddr);
+	if (ret < 0) {
+		cifs_dbg(VFS, "%s: failed to store address: %d\n", __func__, ret);
+		goto unlock;
+	}
+	tcon->ses->server->use_swn_dstaddr = true;
+
+	/*
+	 * Unregister to stop receiving notifications for the old IP address.
+	 */
+	ret = cifs_swn_unregister(tcon);
+	if (ret < 0) {
+		cifs_dbg(VFS, "%s: Failed to unregister for witness notifications: %d\n",
+			 __func__, ret);
+		goto unlock;
+	}
+
+	/*
+	 * And register to receive notifications for the new IP address now that we have
+	 * stored the new address.
+	 */
+	ret = cifs_swn_register(tcon);
+	if (ret < 0) {
+		cifs_dbg(VFS, "%s: Failed to register for witness notifications: %d\n",
+			 __func__, ret);
+		goto unlock;
+	}
+
+	spin_lock(&GlobalMid_Lock);
+	if (tcon->ses->server->tcpStatus != CifsExiting)
+		tcon->ses->server->tcpStatus = CifsNeedReconnect;
+	spin_unlock(&GlobalMid_Lock);
+
+unlock:
+	mutex_unlock(&tcon->ses->server->srv_mutex);
+
+	return ret;
+}
+
+static int cifs_swn_client_move(struct cifs_swn_reg *swnreg, struct sockaddr_storage *addr)
+{
+	struct sockaddr_in *ipv4 = (struct sockaddr_in *)addr;
+	struct sockaddr_in6 *ipv6 = (struct sockaddr_in6 *)addr;
+
+	if (addr->ss_family == AF_INET)
+		cifs_dbg(FYI, "%s: move to %pI4\n", __func__, &ipv4->sin_addr);
+	else if (addr->ss_family == AF_INET6)
+		cifs_dbg(FYI, "%s: move to %pI6\n", __func__, &ipv6->sin6_addr);
+
+	return cifs_swn_reconnect(swnreg->tcon, addr);
+}
+
+int cifs_swn_notify(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cifs_swn_reg *swnreg;
+	char name[256];
+	int type;
+
+	if (info->attrs[CIFS_GENL_ATTR_SWN_REGISTRATION_ID]) {
+		int swnreg_id;
+
+		swnreg_id = nla_get_u32(info->attrs[CIFS_GENL_ATTR_SWN_REGISTRATION_ID]);
+		mutex_lock(&cifs_swnreg_idr_mutex);
+		swnreg = idr_find(&cifs_swnreg_idr, swnreg_id);
+		mutex_unlock(&cifs_swnreg_idr_mutex);
+		if (swnreg == NULL) {
+			cifs_dbg(FYI, "%s: registration id %d not found\n", __func__, swnreg_id);
+			return -EINVAL;
+		}
+	} else {
+		cifs_dbg(FYI, "%s: missing registration id attribute\n", __func__);
+		return -EINVAL;
+	}
+
+	if (info->attrs[CIFS_GENL_ATTR_SWN_NOTIFICATION_TYPE]) {
+		type = nla_get_u32(info->attrs[CIFS_GENL_ATTR_SWN_NOTIFICATION_TYPE]);
+	} else {
+		cifs_dbg(FYI, "%s: missing notification type attribute\n", __func__);
+		return -EINVAL;
+	}
+
+	switch (type) {
+	case CIFS_SWN_NOTIFICATION_RESOURCE_CHANGE: {
+		int state;
+
+		if (info->attrs[CIFS_GENL_ATTR_SWN_RESOURCE_NAME]) {
+			nla_strscpy(name, info->attrs[CIFS_GENL_ATTR_SWN_RESOURCE_NAME],
+					sizeof(name));
+		} else {
+			cifs_dbg(FYI, "%s: missing resource name attribute\n", __func__);
+			return -EINVAL;
+		}
+		if (info->attrs[CIFS_GENL_ATTR_SWN_RESOURCE_STATE]) {
+			state = nla_get_u32(info->attrs[CIFS_GENL_ATTR_SWN_RESOURCE_STATE]);
+		} else {
+			cifs_dbg(FYI, "%s: missing resource state attribute\n", __func__);
+			return -EINVAL;
+		}
+		return cifs_swn_resource_state_changed(swnreg, name, state);
+	}
+	case CIFS_SWN_NOTIFICATION_CLIENT_MOVE: {
+		struct sockaddr_storage addr;
+
+		if (info->attrs[CIFS_GENL_ATTR_SWN_IP]) {
+			nla_memcpy(&addr, info->attrs[CIFS_GENL_ATTR_SWN_IP], sizeof(addr));
+		} else {
+			cifs_dbg(FYI, "%s: missing IP address attribute\n", __func__);
+			return -EINVAL;
+		}
+		return cifs_swn_client_move(swnreg, &addr);
+	}
+	default:
+		cifs_dbg(FYI, "%s: unknown notification type %d\n", __func__, type);
+		break;
+	}
+
+	return 0;
+}
+
+int cifs_swn_register(struct cifs_tcon *tcon)
+{
+	struct cifs_swn_reg *swnreg;
+	int ret;
+
+	swnreg = cifs_get_swn_reg(tcon);
+	if (IS_ERR(swnreg))
+		return PTR_ERR(swnreg);
+
+	ret = cifs_swn_send_register_message(swnreg);
+	if (ret < 0) {
+		cifs_dbg(VFS, "%s: Failed to send swn register message: %d\n", __func__, ret);
+		/* Do not put the swnreg or return error, the echo task will retry */
+	}
+
+	return 0;
+}
+
+int cifs_swn_unregister(struct cifs_tcon *tcon)
+{
+	struct cifs_swn_reg *swnreg;
+
+	mutex_lock(&cifs_swnreg_idr_mutex);
+
+	swnreg = cifs_find_swn_reg(tcon);
+	if (IS_ERR(swnreg)) {
+		mutex_unlock(&cifs_swnreg_idr_mutex);
+		return PTR_ERR(swnreg);
+	}
+
+	mutex_unlock(&cifs_swnreg_idr_mutex);
+
+	cifs_put_swn_reg(swnreg);
+
+	return 0;
+}
+
+void cifs_swn_dump(struct seq_file *m)
+{
+	struct cifs_swn_reg *swnreg;
+	struct sockaddr_in *sa;
+	struct sockaddr_in6 *sa6;
+	int id;
+
+	seq_puts(m, "Witness registrations:");
+
+	mutex_lock(&cifs_swnreg_idr_mutex);
+	idr_for_each_entry(&cifs_swnreg_idr, swnreg, id) {
+		seq_printf(m, "\nId: %u Refs: %u Network name: '%s'%s Share name: '%s'%s Ip address: ",
+				id, kref_read(&swnreg->ref_count),
+				swnreg->net_name, swnreg->net_name_notify ? "(y)" : "(n)",
+				swnreg->share_name, swnreg->share_name_notify ? "(y)" : "(n)");
+		switch (swnreg->tcon->ses->server->dstaddr.ss_family) {
+		case AF_INET:
+			sa = (struct sockaddr_in *) &swnreg->tcon->ses->server->dstaddr;
+			seq_printf(m, "%pI4", &sa->sin_addr.s_addr);
+			break;
+		case AF_INET6:
+			sa6 = (struct sockaddr_in6 *) &swnreg->tcon->ses->server->dstaddr;
+			seq_printf(m, "%pI6", &sa6->sin6_addr.s6_addr);
+			if (sa6->sin6_scope_id)
+				seq_printf(m, "%%%u", sa6->sin6_scope_id);
+			break;
+		default:
+			seq_puts(m, "(unknown)");
+		}
+		seq_printf(m, "%s", swnreg->ip_notify ? "(y)" : "(n)");
+	}
+	mutex_unlock(&cifs_swnreg_idr_mutex);
+	seq_puts(m, "\n");
+}
+
+void cifs_swn_check(void)
+{
+	struct cifs_swn_reg *swnreg;
+	int id;
+	int ret;
+
+	mutex_lock(&cifs_swnreg_idr_mutex);
+	idr_for_each_entry(&cifs_swnreg_idr, swnreg, id) {
+		ret = cifs_swn_send_register_message(swnreg);
+		if (ret < 0)
+			cifs_dbg(FYI, "%s: Failed to send register message: %d\n", __func__, ret);
+	}
+	mutex_unlock(&cifs_swnreg_idr_mutex);
+}
diff --git a/fs/cifs/cifs_swn.h b/fs/cifs/cifs_swn.h
new file mode 100644
index 000000000000..236ecd4959d5
--- /dev/null
+++ b/fs/cifs/cifs_swn.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Witness Service client for CIFS
+ *
+ * Copyright (c) 2020 Samuel Cabrero <scabrero@suse.de>
+ */
+
+#ifndef _CIFS_SWN_H
+#define _CIFS_SWN_H
+
+struct cifs_tcon;
+struct sk_buff;
+struct genl_info;
+
+extern int cifs_swn_register(struct cifs_tcon *tcon);
+
+extern int cifs_swn_unregister(struct cifs_tcon *tcon);
+
+extern int cifs_swn_notify(struct sk_buff *skb, struct genl_info *info);
+
+extern void cifs_swn_dump(struct seq_file *m);
+
+extern void cifs_swn_check(void);
+
+#endif /* _CIFS_SWN_H */
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index ef4784e72b1d..562913e2b3f2 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -32,6 +32,7 @@
 #include "cifsacl.h"
 #include "cifsproto.h"
 #include "cifs_debug.h"
+#include "fs_context.h"
 
 /* security id for everyone/world system group */
 static const struct cifs_sid sid_everyone = {
@@ -346,8 +347,8 @@ sid_to_id(struct cifs_sb_info *cifs_sb, struct cifs_sid *psid,
 	struct key *sidkey;
 	char *sidstr;
 	const struct cred *saved_cred;
-	kuid_t fuid = cifs_sb->mnt_uid;
-	kgid_t fgid = cifs_sb->mnt_gid;
+	kuid_t fuid = cifs_sb->ctx->linux_uid;
+	kgid_t fgid = cifs_sb->ctx->linux_gid;
 
 	/*
 	 * If we have too many subauthorities, then something is really wrong.
@@ -448,7 +449,7 @@ out_revert_creds:
 
 	/*
 	 * Note that we return 0 here unconditionally. If the mapping
-	 * fails then we just fall back to using the mnt_uid/mnt_gid.
+	 * fails then we just fall back to using the ctx->linux_uid/linux_gid.
 	 */
 got_valid_id:
 	rc = 0;
@@ -557,30 +558,37 @@ static void copy_sec_desc(const struct cifs_ntsd *pntsd,
    bits to set can be: S_IRWXU, S_IRWXG or S_IRWXO ie 00700 or 00070 or 00007
 */
 static void access_flags_to_mode(__le32 ace_flags, int type, umode_t *pmode,
-				 umode_t *pbits_to_set)
+				 umode_t *pdenied, umode_t mask)
 {
 	__u32 flags = le32_to_cpu(ace_flags);
-	/* the order of ACEs is important.  The canonical order is to begin with
-	   DENY entries followed by ALLOW, otherwise an allow entry could be
-	   encountered first, making the subsequent deny entry like "dead code"
-	   which would be superflous since Windows stops when a match is made
-	   for the operation you are trying to perform for your user */
-
-	/* For deny ACEs we change the mask so that subsequent allow access
-	   control entries do not turn on the bits we are denying */
+	/*
+	 * Do not assume "preferred" or "canonical" order.
+	 * The first DENY or ALLOW ACE which matches perfectly is
+	 * the permission to be used. Once allowed or denied, same
+	 * permission in later ACEs do not matter.
+	 */
+
+	/* If not already allowed, deny these bits */
 	if (type == ACCESS_DENIED) {
-		if (flags & GENERIC_ALL)
-			*pbits_to_set &= ~S_IRWXUGO;
-
-		if ((flags & GENERIC_WRITE) ||
-			((flags & FILE_WRITE_RIGHTS) == FILE_WRITE_RIGHTS))
-			*pbits_to_set &= ~S_IWUGO;
-		if ((flags & GENERIC_READ) ||
-			((flags & FILE_READ_RIGHTS) == FILE_READ_RIGHTS))
-			*pbits_to_set &= ~S_IRUGO;
-		if ((flags & GENERIC_EXECUTE) ||
-			((flags & FILE_EXEC_RIGHTS) == FILE_EXEC_RIGHTS))
-			*pbits_to_set &= ~S_IXUGO;
+		if (flags & GENERIC_ALL &&
+				!(*pmode & mask & 0777))
+			*pdenied |= mask & 0777;
+
+		if (((flags & GENERIC_WRITE) ||
+				((flags & FILE_WRITE_RIGHTS) == FILE_WRITE_RIGHTS)) &&
+				!(*pmode & mask & 0222))
+			*pdenied |= mask & 0222;
+
+		if (((flags & GENERIC_READ) ||
+				((flags & FILE_READ_RIGHTS) == FILE_READ_RIGHTS)) &&
+				!(*pmode & mask & 0444))
+			*pdenied |= mask & 0444;
+
+		if (((flags & GENERIC_EXECUTE) ||
+				((flags & FILE_EXEC_RIGHTS) == FILE_EXEC_RIGHTS)) &&
+				!(*pmode & mask & 0111))
+			*pdenied |= mask & 0111;
+
 		return;
 	} else if (type != ACCESS_ALLOWED) {
 		cifs_dbg(VFS, "unknown access control type %d\n", type);
@@ -588,20 +596,38 @@ static void access_flags_to_mode(__le32 ace_flags, int type, umode_t *pmode,
 	}
 	/* else ACCESS_ALLOWED type */
 
-	if (flags & GENERIC_ALL) {
-		*pmode |= (S_IRWXUGO & (*pbits_to_set));
+	if ((flags & GENERIC_ALL) &&
+			!(*pdenied & mask & 0777)) {
+		*pmode |= mask & 0777;
 		cifs_dbg(NOISY, "all perms\n");
 		return;
 	}
-	if ((flags & GENERIC_WRITE) ||
-			((flags & FILE_WRITE_RIGHTS) == FILE_WRITE_RIGHTS))
-		*pmode |= (S_IWUGO & (*pbits_to_set));
-	if ((flags & GENERIC_READ) ||
-			((flags & FILE_READ_RIGHTS) == FILE_READ_RIGHTS))
-		*pmode |= (S_IRUGO & (*pbits_to_set));
-	if ((flags & GENERIC_EXECUTE) ||
-			((flags & FILE_EXEC_RIGHTS) == FILE_EXEC_RIGHTS))
-		*pmode |= (S_IXUGO & (*pbits_to_set));
+
+	if (((flags & GENERIC_WRITE) ||
+			((flags & FILE_WRITE_RIGHTS) == FILE_WRITE_RIGHTS)) &&
+			!(*pdenied & mask & 0222))
+		*pmode |= mask & 0222;
+
+	if (((flags & GENERIC_READ) ||
+			((flags & FILE_READ_RIGHTS) == FILE_READ_RIGHTS)) &&
+			!(*pdenied & mask & 0444))
+		*pmode |= mask & 0444;
+
+	if (((flags & GENERIC_EXECUTE) ||
+			((flags & FILE_EXEC_RIGHTS) == FILE_EXEC_RIGHTS)) &&
+			!(*pdenied & mask & 0111))
+		*pmode |= mask & 0111;
+
+	/* If DELETE_CHILD is set only on an owner ACE, set sticky bit */
+	if (flags & FILE_DELETE_CHILD) {
+		if (mask == ACL_OWNER_MASK) {
+			if (!(*pdenied & 01000))
+				*pmode |= 01000;
+		} else if (!(*pdenied & 01000)) {
+			*pmode &= ~01000;
+			*pdenied |= 01000;
+		}
+	}
 
 	cifs_dbg(NOISY, "access flags 0x%x mode now %04o\n", flags, *pmode);
 	return;
@@ -638,17 +664,26 @@ static void mode_to_access_flags(umode_t mode, umode_t bits_to_use,
 }
 
 static __u16 fill_ace_for_sid(struct cifs_ace *pntace,
-			const struct cifs_sid *psid, __u64 nmode, umode_t bits)
+			const struct cifs_sid *psid, __u64 nmode,
+			umode_t bits, __u8 access_type,
+			bool allow_delete_child)
 {
 	int i;
 	__u16 size = 0;
 	__u32 access_req = 0;
 
-	pntace->type = ACCESS_ALLOWED;
+	pntace->type = access_type;
 	pntace->flags = 0x0;
 	mode_to_access_flags(nmode, bits, &access_req);
-	if (!access_req)
+
+	if (access_type == ACCESS_ALLOWED && allow_delete_child)
+		access_req |= FILE_DELETE_CHILD;
+
+	if (access_type == ACCESS_ALLOWED && !access_req)
 		access_req = SET_MINIMUM_RIGHTS;
+	else if (access_type == ACCESS_DENIED)
+		access_req &= ~SET_MINIMUM_RIGHTS;
+
 	pntace->access_req = cpu_to_le32(access_req);
 
 	pntace->sid.revision = psid->revision;
@@ -716,7 +751,7 @@ static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl,
 	if (!pdacl) {
 		/* no DACL in the security descriptor, set
 		   all the permissions for user/group/other */
-		fattr->cf_mode |= S_IRWXUGO;
+		fattr->cf_mode |= 0777;
 		return;
 	}
 
@@ -733,16 +768,14 @@ static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl,
 	/* reset rwx permissions for user/group/other.
 	   Also, if num_aces is 0 i.e. DACL has no ACEs,
 	   user/group/other have no permissions */
-	fattr->cf_mode &= ~(S_IRWXUGO);
+	fattr->cf_mode &= ~(0777);
 
 	acl_base = (char *)pdacl;
 	acl_size = sizeof(struct cifs_acl);
 
 	num_aces = le32_to_cpu(pdacl->num_aces);
 	if (num_aces > 0) {
-		umode_t user_mask = S_IRWXU;
-		umode_t group_mask = S_IRWXG;
-		umode_t other_mask = S_IRWXU | S_IRWXG | S_IRWXO;
+		umode_t denied_mode = 0;
 
 		if (num_aces > ULONG_MAX / sizeof(struct cifs_ace *))
 			return;
@@ -768,26 +801,28 @@ static void parse_dacl(struct cifs_acl *pdacl, char *end_of_acl,
 				fattr->cf_mode |=
 					le32_to_cpu(ppace[i]->sid.sub_auth[2]);
 				break;
-			} else if (compare_sids(&(ppace[i]->sid), pownersid) == 0)
-				access_flags_to_mode(ppace[i]->access_req,
-						     ppace[i]->type,
-						     &fattr->cf_mode,
-						     &user_mask);
-			else if (compare_sids(&(ppace[i]->sid), pgrpsid) == 0)
-				access_flags_to_mode(ppace[i]->access_req,
-						     ppace[i]->type,
-						     &fattr->cf_mode,
-						     &group_mask);
-			else if (compare_sids(&(ppace[i]->sid), &sid_everyone) == 0)
-				access_flags_to_mode(ppace[i]->access_req,
-						     ppace[i]->type,
-						     &fattr->cf_mode,
-						     &other_mask);
-			else if (compare_sids(&(ppace[i]->sid), &sid_authusers) == 0)
-				access_flags_to_mode(ppace[i]->access_req,
-						     ppace[i]->type,
-						     &fattr->cf_mode,
-						     &other_mask);
+			} else {
+				if (compare_sids(&(ppace[i]->sid), pownersid) == 0) {
+					access_flags_to_mode(ppace[i]->access_req,
+							ppace[i]->type,
+							&fattr->cf_mode,
+							&denied_mode,
+							ACL_OWNER_MASK);
+				} else if (compare_sids(&(ppace[i]->sid), pgrpsid) == 0) {
+					access_flags_to_mode(ppace[i]->access_req,
+							ppace[i]->type,
+							&fattr->cf_mode,
+							&denied_mode,
+							ACL_GROUP_MASK);
+				} else if ((compare_sids(&(ppace[i]->sid), &sid_everyone) == 0) ||
+						(compare_sids(&(ppace[i]->sid), &sid_authusers) == 0)) {
+					access_flags_to_mode(ppace[i]->access_req,
+							ppace[i]->type,
+							&fattr->cf_mode,
+							&denied_mode,
+							ACL_EVERYONE_MASK);
+				}
+			}
 
 
 /*			memcpy((void *)(&(cifscred->aces[i])),
@@ -873,32 +908,91 @@ unsigned int setup_special_user_owner_ACE(struct cifs_ace *pntace)
 }
 
 static int set_chmod_dacl(struct cifs_acl *pndacl, struct cifs_sid *pownersid,
-			struct cifs_sid *pgrpsid, __u64 nmode, bool modefromsid)
+			struct cifs_sid *pgrpsid, __u64 *pnmode, bool modefromsid)
 {
 	u16 size = 0;
 	u32 num_aces = 0;
 	struct cifs_acl *pnndacl;
+	__u64 nmode;
+	__u64 user_mode;
+	__u64 group_mode;
+	__u64 other_mode;
+	__u64 deny_user_mode = 0;
+	__u64 deny_group_mode = 0;
+	bool sticky_set = false;
 
 	pnndacl = (struct cifs_acl *)((char *)pndacl + sizeof(struct cifs_acl));
 
+	nmode = *pnmode;
+
 	if (modefromsid) {
 		struct cifs_ace *pntace =
 			(struct cifs_ace *)((char *)pnndacl + size);
 
 		size += setup_special_mode_ACE(pntace, nmode);
 		num_aces++;
+		goto set_size;
 	}
 
+	/*
+	 * We'll try to keep the mode as requested by the user.
+	 * But in cases where we cannot meaningfully convert that
+	 * into ACL, return back the updated mode, so that it is
+	 * updated in the inode.
+	 */
+
+	if (!memcmp(pownersid, pgrpsid, sizeof(struct cifs_sid))) {
+		/*
+		 * Case when owner and group SIDs are the same.
+		 * Set the more restrictive of the two modes.
+		 */
+		user_mode = nmode & (nmode << 3) & 0700;
+		group_mode = nmode & (nmode >> 3) & 0070;
+	} else {
+		user_mode = nmode & 0700;
+		group_mode = nmode & 0070;
+	}
+
+	other_mode = nmode & 0007;
+
+	/* We need DENY ACE when the perm is more restrictive than the next sets. */
+	deny_user_mode = ~(user_mode) & ((group_mode << 3) | (other_mode << 6)) & 0700;
+	deny_group_mode = ~(group_mode) & (other_mode << 3) & 0070;
+
+	*pnmode = user_mode | group_mode | other_mode | (nmode & ~0777);
+
+	/* This tells if we should allow delete child for group and everyone. */
+	if (nmode & 01000)
+		sticky_set = true;
+
+	if (deny_user_mode) {
+		size += fill_ace_for_sid((struct cifs_ace *)((char *)pnndacl + size),
+				pownersid, deny_user_mode, 0700, ACCESS_DENIED, false);
+		num_aces++;
+	}
+	/* Group DENY ACE does not conflict with owner ALLOW ACE. Keep in preferred order*/
+	if (deny_group_mode && !(deny_group_mode & (user_mode >> 3))) {
+		size += fill_ace_for_sid((struct cifs_ace *)((char *)pnndacl + size),
+				pgrpsid, deny_group_mode, 0070, ACCESS_DENIED, false);
+		num_aces++;
+	}
 	size += fill_ace_for_sid((struct cifs_ace *) ((char *)pnndacl + size),
-					pownersid, nmode, S_IRWXU);
+			pownersid, user_mode, 0700, ACCESS_ALLOWED, true);
 	num_aces++;
+	/* Group DENY ACE conflicts with owner ALLOW ACE. So keep it after. */
+	if (deny_group_mode && (deny_group_mode & (user_mode >> 3))) {
+		size += fill_ace_for_sid((struct cifs_ace *)((char *)pnndacl + size),
+				pgrpsid, deny_group_mode, 0070, ACCESS_DENIED, false);
+		num_aces++;
+	}
 	size += fill_ace_for_sid((struct cifs_ace *)((char *)pnndacl + size),
-					pgrpsid, nmode, S_IRWXG);
+			pgrpsid, group_mode, 0070, ACCESS_ALLOWED, !sticky_set);
 	num_aces++;
 	size += fill_ace_for_sid((struct cifs_ace *)((char *)pnndacl + size),
-					 &sid_everyone, nmode, S_IRWXO);
+			&sid_everyone, other_mode, 0007, ACCESS_ALLOWED, !sticky_set);
 	num_aces++;
 
+set_size:
 	pndacl->num_aces = cpu_to_le32(num_aces);
 	pndacl->size = cpu_to_le16(size + sizeof(struct cifs_acl));
 
@@ -1000,7 +1094,7 @@ static int parse_sec_desc(struct cifs_sb_info *cifs_sb,
 
 /* Convert permission bits from mode to equivalent CIFS ACL */
 static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd,
-	__u32 secdesclen, __u64 nmode, kuid_t uid, kgid_t gid,
+	__u32 secdesclen, __u64 *pnmode, kuid_t uid, kgid_t gid,
 	bool mode_from_sid, bool id_from_sid, int *aclflag)
 {
 	int rc = 0;
@@ -1012,7 +1106,7 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd,
 	struct cifs_acl *dacl_ptr = NULL;  /* no need for SACL ptr */
 	struct cifs_acl *ndacl_ptr = NULL; /* no need for SACL ptr */
 
-	if (nmode != NO_CHANGE_64) { /* chmod */
+	if (pnmode && *pnmode != NO_CHANGE_64) { /* chmod */
 		owner_sid_ptr = (struct cifs_sid *)((char *)pntsd +
 				le32_to_cpu(pntsd->osidoffset));
 		group_sid_ptr = (struct cifs_sid *)((char *)pntsd +
@@ -1026,7 +1120,7 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd,
 		ndacl_ptr->num_aces = 0;
 
 		rc = set_chmod_dacl(ndacl_ptr, owner_sid_ptr, group_sid_ptr,
-				    nmode, mode_from_sid);
+				    pnmode, mode_from_sid);
 		sidsoffset = ndacloffset + le16_to_cpu(ndacl_ptr->size);
 		/* copy sec desc control portion & owner and group sids */
 		copy_sec_desc(pntsd, pnntsd, sidsoffset);
@@ -1101,7 +1195,8 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd,
 }
 
 struct cifs_ntsd *get_cifs_acl_by_fid(struct cifs_sb_info *cifs_sb,
-		const struct cifs_fid *cifsfid, u32 *pacllen)
+				      const struct cifs_fid *cifsfid, u32 *pacllen,
+				      u32 __maybe_unused unused)
 {
 	struct cifs_ntsd *pntsd = NULL;
 	unsigned int xid;
@@ -1169,7 +1264,7 @@ static struct cifs_ntsd *get_cifs_acl_by_path(struct cifs_sb_info *cifs_sb,
 /* Retrieve an ACL from the server */
 struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *cifs_sb,
 				      struct inode *inode, const char *path,
-				      u32 *pacllen)
+			       u32 *pacllen, u32 info)
 {
 	struct cifs_ntsd *pntsd = NULL;
 	struct cifsFileInfo *open_file = NULL;
@@ -1179,7 +1274,7 @@ struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *cifs_sb,
 	if (!open_file)
 		return get_cifs_acl_by_path(cifs_sb, path, pacllen);
 
-	pntsd = get_cifs_acl_by_fid(cifs_sb, &open_file->fid, pacllen);
+	pntsd = get_cifs_acl_by_fid(cifs_sb, &open_file->fid, pacllen, info);
 	cifsFileInfo_put(open_file);
 	return pntsd;
 }
@@ -1244,6 +1339,7 @@ cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr,
 	int rc = 0;
 	struct tcon_link *tlink = cifs_sb_tlink(cifs_sb);
 	struct smb_version_operations *ops;
+	const u32 info = 0;
 
 	cifs_dbg(NOISY, "converting ACL to mode for %s\n", path);
 
@@ -1253,9 +1349,9 @@ cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr,
 	ops = tlink_tcon(tlink)->ses->server->ops;
 
 	if (pfid && (ops->get_acl_by_fid))
-		pntsd = ops->get_acl_by_fid(cifs_sb, pfid, &acllen);
+		pntsd = ops->get_acl_by_fid(cifs_sb, pfid, &acllen, info);
 	else if (ops->get_acl)
-		pntsd = ops->get_acl(cifs_sb, inode, path, &acllen);
+		pntsd = ops->get_acl(cifs_sb, inode, path, &acllen, info);
 	else {
 		cifs_put_tlink(tlink);
 		return -EOPNOTSUPP;
@@ -1282,7 +1378,7 @@ cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb, struct cifs_fattr *fattr,
 
 /* Convert mode bits to an ACL so we can update the ACL on the server */
 int
-id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 nmode,
+id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 *pnmode,
 			kuid_t uid, kgid_t gid)
 {
 	int rc = 0;
@@ -1294,6 +1390,7 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 nmode,
 	struct tcon_link *tlink = cifs_sb_tlink(cifs_sb);
 	struct smb_version_operations *ops;
 	bool mode_from_sid, id_from_sid;
+	const u32 info = 0;
 
 	if (IS_ERR(tlink))
 		return PTR_ERR(tlink);
@@ -1309,7 +1406,7 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 nmode,
 		return -EOPNOTSUPP;
 	}
 
-	pntsd = ops->get_acl(cifs_sb, inode, path, &secdesclen);
+	pntsd = ops->get_acl(cifs_sb, inode, path, &secdesclen, info);
 	if (IS_ERR(pntsd)) {
 		rc = PTR_ERR(pntsd);
 		cifs_dbg(VFS, "%s: error %d getting sec desc\n", __func__, rc);
@@ -1341,7 +1438,7 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 nmode,
 	else
 		id_from_sid = false;
 
-	rc = build_sec_desc(pntsd, pnntsd, secdesclen, nmode, uid, gid,
+	rc = build_sec_desc(pntsd, pnntsd, secdesclen, pnmode, uid, gid,
 			    mode_from_sid, id_from_sid, &aclflag);
 
 	cifs_dbg(NOISY, "build_sec_desc rc: %d\n", rc);
diff --git a/fs/cifs/cifsacl.h b/fs/cifs/cifsacl.h
index 45665ff87b64..ff7fd0862e28 100644
--- a/fs/cifs/cifsacl.h
+++ b/fs/cifs/cifsacl.h
@@ -30,6 +30,10 @@
 #define WRITE_BIT       0x2
 #define EXEC_BIT        0x1
 
+#define ACL_OWNER_MASK 0700
+#define ACL_GROUP_MASK 0770
+#define ACL_EVERYONE_MASK 0777
+
 #define UBITSHIFT	6
 #define GBITSHIFT	3
 
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index 9daa256f69d4..51d53e4bdf6b 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -661,6 +661,11 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp)
 	unsigned char *tiblob = NULL; /* target info blob */
 	__le64 rsp_timestamp;
 
+	if (nls_cp == NULL) {
+		cifs_dbg(VFS, "%s called with nls_cp==NULL\n", __func__);
+		return -EINVAL;
+	}
+
 	if (ses->server->negflavor == CIFS_NEGFLAVOR_EXTENDED) {
 		if (!ses->domainName) {
 			if (ses->domainAuto) {
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 472cb7777e3e..ce0d0037fd0a 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -55,6 +55,10 @@
 #ifdef CONFIG_CIFS_DFS_UPCALL
 #include "dfs_cache.h"
 #endif
+#ifdef CONFIG_CIFS_SWN_UPCALL
+#include "netlink.h"
+#endif
+#include "fs_context.h"
 
 /*
  * DOS dates from 1980/1/1 through 2107/12/31
@@ -214,7 +218,7 @@ cifs_read_super(struct super_block *sb)
 	if (rc)
 		goto out_no_root;
 	/* tune readahead according to rsize */
-	sb->s_bdi->ra_pages = cifs_sb->rsize / PAGE_SIZE;
+	sb->s_bdi->ra_pages = cifs_sb->ctx->rsize / PAGE_SIZE;
 
 	sb->s_blocksize = CIFS_MAX_MSGSIZE;
 	sb->s_blocksize_bits = 14;	/* default 2**14 = CIFS_MAX_MSGSIZE */
@@ -458,16 +462,23 @@ cifs_show_cache_flavor(struct seq_file *s, struct cifs_sb_info *cifs_sb)
 		seq_puts(s, "loose");
 }
 
-static void
-cifs_show_nls(struct seq_file *s, struct nls_table *cur)
+/*
+ * cifs_show_devname() is used so we show the mount device name with correct
+ * format (e.g. forward slashes vs. back slashes) in /proc/mounts
+ */
+static int cifs_show_devname(struct seq_file *m, struct dentry *root)
 {
-	struct nls_table *def;
-
-	/* Display iocharset= option if it's not default charset */
-	def = load_nls_default();
-	if (def != cur)
-		seq_printf(s, ",iocharset=%s", cur->charset);
-	unload_nls(def);
+	struct cifs_sb_info *cifs_sb = CIFS_SB(root->d_sb);
+	char *devname = kstrdup(cifs_sb->ctx->UNC, GFP_KERNEL);
+
+	if (devname == NULL)
+		seq_puts(m, "none");
+	else {
+		convert_delimiter(devname, '/');
+		seq_puts(m, devname);
+		kfree(devname);
+	}
+	return 0;
 }
 
 /*
@@ -489,7 +500,7 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
 
 	if (tcon->no_lease)
 		seq_puts(s, ",nolease");
-	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER)
+	if (cifs_sb->ctx->multiuser)
 		seq_puts(s, ",multiuser");
 	else if (tcon->ses->user_name)
 		seq_show_option(s, "username", tcon->ses->user_name);
@@ -514,14 +525,14 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
 	}
 
 	seq_printf(s, ",uid=%u",
-		   from_kuid_munged(&init_user_ns, cifs_sb->mnt_uid));
+		   from_kuid_munged(&init_user_ns, cifs_sb->ctx->linux_uid));
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID)
 		seq_puts(s, ",forceuid");
 	else
 		seq_puts(s, ",noforceuid");
 
 	seq_printf(s, ",gid=%u",
-		   from_kgid_munged(&init_user_ns, cifs_sb->mnt_gid));
+		   from_kgid_munged(&init_user_ns, cifs_sb->ctx->linux_gid));
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID)
 		seq_puts(s, ",forcegid");
 	else
@@ -531,11 +542,10 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
 
 	if (!tcon->unix_ext)
 		seq_printf(s, ",file_mode=0%ho,dir_mode=0%ho",
-					   cifs_sb->mnt_file_mode,
-					   cifs_sb->mnt_dir_mode);
-
-	cifs_show_nls(s, cifs_sb->local_nls);
-
+					   cifs_sb->ctx->file_mode,
+					   cifs_sb->ctx->dir_mode);
+	if (cifs_sb->ctx->iocharset)
+		seq_printf(s, ",iocharset=%s", cifs_sb->ctx->iocharset);
 	if (tcon->seal)
 		seq_puts(s, ",seal");
 	else if (tcon->ses->server->ignore_signature)
@@ -605,15 +615,15 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_BACKUPUID)
 		seq_printf(s, ",backupuid=%u",
 			   from_kuid_munged(&init_user_ns,
-					    cifs_sb->mnt_backupuid));
+					    cifs_sb->ctx->backupuid));
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_BACKUPGID)
 		seq_printf(s, ",backupgid=%u",
 			   from_kgid_munged(&init_user_ns,
-					    cifs_sb->mnt_backupgid));
+					    cifs_sb->ctx->backupgid));
 
-	seq_printf(s, ",rsize=%u", cifs_sb->rsize);
-	seq_printf(s, ",wsize=%u", cifs_sb->wsize);
-	seq_printf(s, ",bsize=%u", cifs_sb->bsize);
+	seq_printf(s, ",rsize=%u", cifs_sb->ctx->rsize);
+	seq_printf(s, ",wsize=%u", cifs_sb->ctx->wsize);
+	seq_printf(s, ",bsize=%u", cifs_sb->ctx->bsize);
 	if (tcon->ses->server->min_offload)
 		seq_printf(s, ",esize=%u", tcon->ses->server->min_offload);
 	seq_printf(s, ",echo_interval=%lu",
@@ -628,12 +638,17 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
 	if (tcon->handle_timeout)
 		seq_printf(s, ",handletimeout=%u", tcon->handle_timeout);
 	/* convert actimeo and display it in seconds */
-	seq_printf(s, ",actimeo=%lu", cifs_sb->actimeo / HZ);
+	seq_printf(s, ",actimeo=%lu", cifs_sb->ctx->actimeo / HZ);
 
 	if (tcon->ses->chan_max > 1)
 		seq_printf(s, ",multichannel,max_channels=%zu",
 			   tcon->ses->chan_max);
 
+#ifdef CONFIG_CIFS_SWN_UPCALL
+	if (tcon->use_witness)
+		seq_puts(s, ",witness");
+#endif
+
 	return 0;
 }
 
@@ -681,13 +696,6 @@ static int cifs_show_stats(struct seq_file *s, struct dentry *root)
 }
 #endif
 
-static int cifs_remount(struct super_block *sb, int *flags, char *data)
-{
-	sync_filesystem(sb);
-	*flags |= SB_NODIRATIME;
-	return 0;
-}
-
 static int cifs_drop_inode(struct inode *inode)
 {
 	struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
@@ -703,13 +711,14 @@ static const struct super_operations cifs_super_ops = {
 	.free_inode = cifs_free_inode,
 	.drop_inode	= cifs_drop_inode,
 	.evict_inode	= cifs_evict_inode,
+/*	.show_path	= cifs_show_path, */ /* Would we ever need show path? */
+	.show_devname   = cifs_show_devname,
 /*	.delete_inode	= cifs_delete_inode,  */  /* Do not need above
 	function unless later we add lazy close of inodes or unless the
 	kernel forgets to call us with the same number of releases (closes)
 	as opens */
 	.show_options = cifs_show_options,
 	.umount_begin   = cifs_umount_begin,
-	.remount_fs = cifs_remount,
 #ifdef CONFIG_CIFS_STATS2
 	.show_stats = cifs_show_stats,
 #endif
@@ -720,7 +729,7 @@ static const struct super_operations cifs_super_ops = {
  * Return dentry with refcount + 1 on success and NULL otherwise.
  */
 static struct dentry *
-cifs_get_root(struct smb_vol *vol, struct super_block *sb)
+cifs_get_root(struct smb3_fs_context *ctx, struct super_block *sb)
 {
 	struct dentry *dentry;
 	struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
@@ -731,7 +740,7 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb)
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH)
 		return dget(sb->s_root);
 
-	full_path = cifs_build_path_to_root(vol, cifs_sb,
+	full_path = cifs_build_path_to_root(ctx, cifs_sb,
 				cifs_sb_master_tcon(cifs_sb), 0);
 	if (full_path == NULL)
 		return ERR_PTR(-ENOMEM);
@@ -777,14 +786,13 @@ static int cifs_set_super(struct super_block *sb, void *data)
 	return set_anon_super(sb, NULL);
 }
 
-static struct dentry *
+struct dentry *
 cifs_smb3_do_mount(struct file_system_type *fs_type,
-	      int flags, const char *dev_name, void *data, bool is_smb3)
+	      int flags, struct smb3_fs_context *old_ctx)
 {
 	int rc;
 	struct super_block *sb;
-	struct cifs_sb_info *cifs_sb;
-	struct smb_vol *volume_info;
+	struct cifs_sb_info *cifs_sb = NULL;
 	struct cifs_mnt_data mnt_data;
 	struct dentry *root;
 
@@ -793,42 +801,49 @@ cifs_smb3_do_mount(struct file_system_type *fs_type,
 	 *	If CIFS_DEBUG && cifs_FYI
 	 */
 	if (cifsFYI)
-		cifs_dbg(FYI, "Devname: %s flags: %d\n", dev_name, flags);
+		cifs_dbg(FYI, "Devname: %s flags: %d\n", old_ctx->UNC, flags);
 	else
-		cifs_info("Attempting to mount %s\n", dev_name);
-
-	volume_info = cifs_get_volume_info((char *)data, dev_name, is_smb3);
-	if (IS_ERR(volume_info))
-		return ERR_CAST(volume_info);
+		cifs_info("Attempting to mount %s\n", old_ctx->UNC);
 
 	cifs_sb = kzalloc(sizeof(struct cifs_sb_info), GFP_KERNEL);
 	if (cifs_sb == NULL) {
 		root = ERR_PTR(-ENOMEM);
-		goto out_nls;
+		goto out;
 	}
 
-	cifs_sb->mountdata = kstrndup(data, PAGE_SIZE, GFP_KERNEL);
-	if (cifs_sb->mountdata == NULL) {
+	cifs_sb->ctx = kzalloc(sizeof(struct smb3_fs_context), GFP_KERNEL);
+	if (!cifs_sb->ctx) {
 		root = ERR_PTR(-ENOMEM);
-		goto out_free;
+		goto out;
+	}
+	rc = smb3_fs_context_dup(cifs_sb->ctx, old_ctx);
+	if (rc) {
+		root = ERR_PTR(rc);
+		goto out;
+	}
+
+	rc = cifs_setup_volume_info(cifs_sb->ctx);
+	if (rc) {
+		root = ERR_PTR(rc);
+		goto out;
 	}
 
-	rc = cifs_setup_cifs_sb(volume_info, cifs_sb);
+	rc = cifs_setup_cifs_sb(cifs_sb);
 	if (rc) {
 		root = ERR_PTR(rc);
-		goto out_free;
+		goto out;
 	}
 
-	rc = cifs_mount(cifs_sb, volume_info);
+	rc = cifs_mount(cifs_sb, cifs_sb->ctx);
 	if (rc) {
 		if (!(flags & SB_SILENT))
 			cifs_dbg(VFS, "cifs_mount failed w/return code = %d\n",
 				 rc);
 		root = ERR_PTR(rc);
-		goto out_free;
+		goto out;
 	}
 
-	mnt_data.vol = volume_info;
+	mnt_data.ctx = cifs_sb->ctx;
 	mnt_data.cifs_sb = cifs_sb;
 	mnt_data.flags = flags;
 
@@ -839,12 +854,14 @@ cifs_smb3_do_mount(struct file_system_type *fs_type,
 	if (IS_ERR(sb)) {
 		root = ERR_CAST(sb);
 		cifs_umount(cifs_sb);
+		cifs_sb = NULL;
 		goto out;
 	}
 
 	if (sb->s_root) {
 		cifs_dbg(FYI, "Use existing superblock\n");
 		cifs_umount(cifs_sb);
+		cifs_sb = NULL;
 	} else {
 		rc = cifs_read_super(sb);
 		if (rc) {
@@ -855,41 +872,24 @@ cifs_smb3_do_mount(struct file_system_type *fs_type,
 		sb->s_flags |= SB_ACTIVE;
 	}
 
-	root = cifs_get_root(volume_info, sb);
+	root = cifs_get_root(cifs_sb ? cifs_sb->ctx : old_ctx, sb);
 	if (IS_ERR(root))
 		goto out_super;
 
 	cifs_dbg(FYI, "dentry root is: %p\n", root);
-	goto out;
+	return root;
 
 out_super:
 	deactivate_locked_super(sb);
 out:
-	cifs_cleanup_volume_info(volume_info);
+	if (cifs_sb) {
+		kfree(cifs_sb->prepath);
+		smb3_cleanup_fs_context(cifs_sb->ctx);
+		kfree(cifs_sb);
+	}
 	return root;
-
-out_free:
-	kfree(cifs_sb->prepath);
-	kfree(cifs_sb->mountdata);
-	kfree(cifs_sb);
-out_nls:
-	unload_nls(volume_info->local_nls);
-	goto out;
 }
 
-static struct dentry *
-smb3_do_mount(struct file_system_type *fs_type,
-	      int flags, const char *dev_name, void *data)
-{
-	return cifs_smb3_do_mount(fs_type, flags, dev_name, data, true);
-}
-
-static struct dentry *
-cifs_do_mount(struct file_system_type *fs_type,
-	      int flags, const char *dev_name, void *data)
-{
-	return cifs_smb3_do_mount(fs_type, flags, dev_name, data, false);
-}
 
 static ssize_t
 cifs_loose_read_iter(struct kiocb *iocb, struct iov_iter *iter)
@@ -1026,7 +1026,8 @@ cifs_setlease(struct file *file, long arg, struct file_lock **lease, void **priv
 struct file_system_type cifs_fs_type = {
 	.owner = THIS_MODULE,
 	.name = "cifs",
-	.mount = cifs_do_mount,
+	.init_fs_context = smb3_init_fs_context,
+	.parameters = smb3_fs_parameters,
 	.kill_sb = cifs_kill_sb,
 	.fs_flags = FS_RENAME_DOES_D_MOVE,
 };
@@ -1035,7 +1036,8 @@ MODULE_ALIAS_FS("cifs");
 static struct file_system_type smb3_fs_type = {
 	.owner = THIS_MODULE,
 	.name = "smb3",
-	.mount = smb3_do_mount,
+	.init_fs_context = smb3_init_fs_context,
+	.parameters = smb3_fs_parameters,
 	.kill_sb = cifs_kill_sb,
 	.fs_flags = FS_RENAME_DOES_D_MOVE,
 };
@@ -1617,10 +1619,15 @@ init_cifs(void)
 	if (rc)
 		goto out_destroy_dfs_cache;
 #endif /* CONFIG_CIFS_UPCALL */
+#ifdef CONFIG_CIFS_SWN_UPCALL
+	rc = cifs_genl_init();
+	if (rc)
+		goto out_register_key_type;
+#endif /* CONFIG_CIFS_SWN_UPCALL */
 
 	rc = init_cifs_idmap();
 	if (rc)
-		goto out_register_key_type;
+		goto out_cifs_swn_init;
 
 	rc = register_filesystem(&cifs_fs_type);
 	if (rc)
@@ -1636,7 +1643,11 @@ init_cifs(void)
 
 out_init_cifs_idmap:
 	exit_cifs_idmap();
+out_cifs_swn_init:
+#ifdef CONFIG_CIFS_SWN_UPCALL
+	cifs_genl_exit();
 out_register_key_type:
+#endif
 #ifdef CONFIG_CIFS_UPCALL
 	exit_cifs_spnego();
 out_destroy_dfs_cache:
@@ -1673,6 +1684,9 @@ exit_cifs(void)
 	unregister_filesystem(&smb3_fs_type);
 	cifs_dfs_release_automount_timer();
 	exit_cifs_idmap();
+#ifdef CONFIG_CIFS_SWN_UPCALL
+	cifs_genl_exit();
+#endif
 #ifdef CONFIG_CIFS_UPCALL
 	exit_cifs_spnego();
 #endif
diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h
index 905d03863721..2307bb0f6147 100644
--- a/fs/cifs/cifsfs.h
+++ b/fs/cifs/cifsfs.h
@@ -152,9 +152,13 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
 extern void cifs_setsize(struct inode *inode, loff_t offset);
 extern int cifs_truncate_page(struct address_space *mapping, loff_t from);
 
+struct smb3_fs_context;
+extern struct dentry *cifs_smb3_do_mount(struct file_system_type *fs_type,
+					 int flags, struct smb3_fs_context *ctx);
+
 #ifdef CONFIG_CIFS_NFSD_EXPORT
 extern const struct export_operations cifs_export_ops;
 #endif /* CONFIG_CIFS_NFSD_EXPORT */
 
-#define CIFS_VERSION   "2.29"
+#define CIFS_VERSION   "2.30"
 #endif				/* _CIFSFS_H */
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 484ec2d8c5c9..50fcb65920e8 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -202,7 +202,7 @@ struct cifs_ses;
 struct cifs_tcon;
 struct dfs_info3_param;
 struct cifs_fattr;
-struct smb_vol;
+struct smb3_fs_context;
 struct cifs_fid;
 struct cifs_readdata;
 struct cifs_writedata;
@@ -268,9 +268,9 @@ struct smb_version_operations {
 	/* negotiate to the server */
 	int (*negotiate)(const unsigned int, struct cifs_ses *);
 	/* set negotiated write size */
-	unsigned int (*negotiate_wsize)(struct cifs_tcon *, struct smb_vol *);
+	unsigned int (*negotiate_wsize)(struct cifs_tcon *tcon, struct smb3_fs_context *ctx);
 	/* set negotiated read size */
-	unsigned int (*negotiate_rsize)(struct cifs_tcon *, struct smb_vol *);
+	unsigned int (*negotiate_rsize)(struct cifs_tcon *tcon, struct smb3_fs_context *ctx);
 	/* setup smb sessionn */
 	int (*sess_setup)(const unsigned int, struct cifs_ses *,
 			  const struct nls_table *);
@@ -456,9 +456,9 @@ struct smb_version_operations {
 			const char *, const void *, const __u16,
 			const struct nls_table *, struct cifs_sb_info *);
 	struct cifs_ntsd * (*get_acl)(struct cifs_sb_info *, struct inode *,
-			const char *, u32 *);
+			const char *, u32 *, u32);
 	struct cifs_ntsd * (*get_acl_by_fid)(struct cifs_sb_info *,
-			const struct cifs_fid *, u32 *);
+			const struct cifs_fid *, u32 *, u32);
 	int (*set_acl)(struct cifs_ntsd *, __u32, struct inode *, const char *,
 			int);
 	/* writepages retry size */
@@ -530,97 +530,6 @@ struct smb_version_values {
 #define HEADER_SIZE(server) (server->vals->header_size)
 #define MAX_HEADER_SIZE(server) (server->vals->max_header_size)
 
-struct smb_vol {
-	char *username;
-	char *password;
-	char *domainname;
-	char *UNC;
-	char *iocharset;  /* local code page for mapping to and from Unicode */
-	char source_rfc1001_name[RFC1001_NAME_LEN_WITH_NULL]; /* clnt nb name */
-	char target_rfc1001_name[RFC1001_NAME_LEN_WITH_NULL]; /* srvr nb name */
-	kuid_t cred_uid;
-	kuid_t linux_uid;
-	kgid_t linux_gid;
-	kuid_t backupuid;
-	kgid_t backupgid;
-	umode_t file_mode;
-	umode_t dir_mode;
-	enum securityEnum sectype; /* sectype requested via mnt opts */
-	bool sign; /* was signing requested via mnt opts? */
-	bool ignore_signature:1;
-	bool retry:1;
-	bool intr:1;
-	bool setuids:1;
-	bool setuidfromacl:1;
-	bool override_uid:1;
-	bool override_gid:1;
-	bool dynperm:1;
-	bool noperm:1;
-	bool nodelete:1;
-	bool mode_ace:1;
-	bool no_psx_acl:1; /* set if posix acl support should be disabled */
-	bool cifs_acl:1;
-	bool backupuid_specified; /* mount option  backupuid  is specified */
-	bool backupgid_specified; /* mount option  backupgid  is specified */
-	bool no_xattr:1;   /* set if xattr (EA) support should be disabled*/
-	bool server_ino:1; /* use inode numbers from server ie UniqueId */
-	bool direct_io:1;
-	bool strict_io:1; /* strict cache behavior */
-	bool cache_ro:1;
-	bool cache_rw:1;
-	bool remap:1;      /* set to remap seven reserved chars in filenames */
-	bool sfu_remap:1;  /* remap seven reserved chars ala SFU */
-	bool posix_paths:1; /* unset to not ask for posix pathnames. */
-	bool no_linux_ext:1;
-	bool linux_ext:1;
-	bool sfu_emul:1;
-	bool nullauth:1;   /* attempt to authenticate with null user */
-	bool nocase:1;     /* request case insensitive filenames */
-	bool nobrl:1;      /* disable sending byte range locks to srv */
-	bool nohandlecache:1; /* disable caching dir handles if srvr probs */
-	bool mand_lock:1;  /* send mandatory not posix byte range lock reqs */
-	bool seal:1;       /* request transport encryption on share */
-	bool nodfs:1;      /* Do not request DFS, even if available */
-	bool local_lease:1; /* check leases only on local system, not remote */
-	bool noblocksnd:1;
-	bool noautotune:1;
-	bool nostrictsync:1; /* do not force expensive SMBflush on every sync */
-	bool no_lease:1;     /* disable requesting leases */
-	bool fsc:1;	/* enable fscache */
-	bool mfsymlinks:1; /* use Minshall+French Symlinks */
-	bool multiuser:1;
-	bool rwpidforward:1; /* pid forward for read/write operations */
-	bool nosharesock:1;
-	bool persistent:1;
-	bool nopersistent:1;
-	bool resilient:1; /* noresilient not required since not fored for CA */
-	bool domainauto:1;
-	bool rdma:1;
-	bool multichannel:1;
-	bool use_client_guid:1;
-	/* reuse existing guid for multichannel */
-	u8 client_guid[SMB2_CLIENT_GUID_SIZE];
-	unsigned int bsize;
-	unsigned int rsize;
-	unsigned int wsize;
-	unsigned int min_offload;
-	bool sockopt_tcp_nodelay:1;
-	unsigned long actimeo; /* attribute cache timeout (jiffies) */
-	struct smb_version_operations *ops;
-	struct smb_version_values *vals;
-	char *prepath;
-	struct sockaddr_storage dstaddr; /* destination address */
-	struct sockaddr_storage srcaddr; /* allow binding to a local IP */
-	struct nls_table *local_nls;
-	unsigned int echo_interval; /* echo interval in secs */
-	__u64 snapshot_time; /* needed for timewarp tokens */
-	__u32 handle_timeout; /* persistent and durable handle timeout in ms */
-	unsigned int max_credits; /* smb3 max_credits 10 < credits < 60000 */
-	unsigned int max_channels;
-	__u16 compression; /* compression algorithm 0xFFFF default 0=disabled */
-	bool rootfs:1; /* if it's a SMB root file system */
-};
-
 /**
  * CIFS superblock mount flags (mnt_cifs_flags) to consider when
  * trying to reuse existing superblock for a new mount
@@ -649,7 +558,7 @@ struct smb_vol {
 
 struct cifs_mnt_data {
 	struct cifs_sb_info *cifs_sb;
-	struct smb_vol *vol;
+	struct smb3_fs_context *ctx;
 	int flags;
 };
 
@@ -778,6 +687,10 @@ struct TCP_Server_Info {
 	int nr_targets;
 	bool noblockcnt; /* use non-blocking connect() */
 	bool is_channel; /* if a session channel */
+#ifdef CONFIG_CIFS_SWN_UPCALL
+	bool use_swn_dstaddr;
+	struct sockaddr_storage swn_dstaddr;
+#endif
 };
 
 struct cifs_credits {
@@ -1177,6 +1090,9 @@ struct cifs_tcon {
 	int remap:2;
 	struct list_head ulist; /* cache update list */
 #endif
+#ifdef CONFIG_CIFS_SWN_UPCALL
+	bool use_witness:1; /* use witness protocol */
+#endif
 };
 
 /*
diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h
index 593d826820c3..64fe5a47b5e8 100644
--- a/fs/cifs/cifspdu.h
+++ b/fs/cifs/cifspdu.h
@@ -240,6 +240,8 @@
 #define SYNCHRONIZE           0x00100000  /* The file handle can waited on to */
 					  /* synchronize with the completion  */
 					  /* of an input/output request       */
+#define SYSTEM_SECURITY       0x01000000  /* The system access control list   */
+					  /* can be read and changed          */
 #define GENERIC_ALL           0x10000000
 #define GENERIC_EXECUTE       0x20000000
 #define GENERIC_WRITE         0x40000000
@@ -262,7 +264,7 @@
 				| WRITE_OWNER | SYNCHRONIZE)
 #define SET_FILE_WRITE_RIGHTS (FILE_WRITE_DATA | FILE_APPEND_DATA \
 				| FILE_READ_EA | FILE_WRITE_EA \
-				| FILE_DELETE_CHILD | FILE_READ_ATTRIBUTES \
+				| FILE_READ_ATTRIBUTES \
 				| FILE_WRITE_ATTRIBUTES \
 				| DELETE | READ_CONTROL | WRITE_DAC \
 				| WRITE_OWNER | SYNCHRONIZE)
diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h
index 24c6f36177ba..340ff81ee87b 100644
--- a/fs/cifs/cifsproto.h
+++ b/fs/cifs/cifsproto.h
@@ -27,8 +27,8 @@
 #endif
 
 struct statfs;
-struct smb_vol;
 struct smb_rqst;
+struct smb3_fs_context;
 
 /*
  *****************************************************************
@@ -72,14 +72,13 @@ extern void exit_cifs_spnego(void);
 extern char *build_path_from_dentry(struct dentry *);
 extern char *build_path_from_dentry_optional_prefix(struct dentry *direntry,
 						    bool prefix);
-extern char *cifs_build_path_to_root(struct smb_vol *vol,
+extern char *cifs_build_path_to_root(struct smb3_fs_context *ctx,
 				     struct cifs_sb_info *cifs_sb,
 				     struct cifs_tcon *tcon,
 				     int add_treename);
 extern char *build_wildcard_path_from_dentry(struct dentry *direntry);
 extern char *cifs_compose_mount_options(const char *sb_mountdata,
-		const char *fullpath, const struct dfs_info3_param *ref,
-		char **devname);
+		const char *fullpath, const struct dfs_info3_param *ref);
 /* extern void renew_parental_timestamps(struct dentry *direntry);*/
 extern struct mid_q_entry *AllocMidQEntry(const struct smb_hdr *smb_buffer,
 					struct TCP_Server_Info *server);
@@ -89,6 +88,7 @@ extern void cifs_mid_q_entry_release(struct mid_q_entry *midEntry);
 extern void cifs_wake_up_task(struct mid_q_entry *mid);
 extern int cifs_handle_standard(struct TCP_Server_Info *server,
 				struct mid_q_entry *mid);
+extern int smb3_parse_devname(const char *devname, struct smb3_fs_context *ctx);
 extern bool cifs_match_ipaddr(struct sockaddr *srcaddr, struct sockaddr *rhs);
 extern int cifs_discard_remaining_data(struct TCP_Server_Info *server);
 extern int cifs_call_async(struct TCP_Server_Info *server,
@@ -215,12 +215,12 @@ extern int cifs_acl_to_fattr(struct cifs_sb_info *cifs_sb,
 			      struct cifs_fattr *fattr, struct inode *inode,
 			      bool get_mode_from_special_sid,
 			      const char *path, const struct cifs_fid *pfid);
-extern int id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64,
-					kuid_t, kgid_t);
+extern int id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 *pnmode,
+					kuid_t uid, kgid_t gid);
 extern struct cifs_ntsd *get_cifs_acl(struct cifs_sb_info *, struct inode *,
-					const char *, u32 *);
+				      const char *, u32 *, u32);
 extern struct cifs_ntsd *get_cifs_acl_by_fid(struct cifs_sb_info *,
-						const struct cifs_fid *, u32 *);
+				const struct cifs_fid *, u32 *, u32);
 extern int set_cifs_acl(struct cifs_ntsd *, __u32, struct inode *,
 				const char *, int);
 extern unsigned int setup_authusers_ACE(struct cifs_ace *pace);
@@ -234,13 +234,9 @@ extern int cifs_read_page_from_socket(struct TCP_Server_Info *server,
 					struct page *page,
 					unsigned int page_offset,
 					unsigned int to_read);
-extern int cifs_setup_cifs_sb(struct smb_vol *pvolume_info,
-			       struct cifs_sb_info *cifs_sb);
+extern int cifs_setup_cifs_sb(struct cifs_sb_info *cifs_sb);
 extern int cifs_match_super(struct super_block *, void *);
-extern void cifs_cleanup_volume_info(struct smb_vol *pvolume_info);
-extern struct smb_vol *cifs_get_volume_info(char *mount_data,
-					    const char *devname, bool is_smb3);
-extern int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *vol);
+extern int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx);
 extern void cifs_umount(struct cifs_sb_info *);
 extern void cifs_mark_open_files_invalid(struct cifs_tcon *tcon);
 extern void cifs_reopen_persistent_handles(struct cifs_tcon *tcon);
@@ -256,7 +252,7 @@ extern void cifs_add_pending_open_locked(struct cifs_fid *fid,
 					 struct tcon_link *tlink,
 					 struct cifs_pending_open *open);
 extern void cifs_del_pending_open(struct cifs_pending_open *open);
-extern struct TCP_Server_Info *cifs_get_tcp_session(struct smb_vol *vol);
+extern struct TCP_Server_Info *cifs_get_tcp_session(struct smb3_fs_context *ctx);
 extern void cifs_put_tcp_session(struct TCP_Server_Info *server,
 				 int from_reconnect);
 extern void cifs_put_tcon(struct cifs_tcon *tcon);
@@ -332,7 +328,7 @@ extern int parse_dfs_referrals(struct get_dfs_referral_rsp *rsp, u32 rsp_size,
 			       const char *searchName, bool is_unicode);
 extern void reset_cifs_unix_caps(unsigned int xid, struct cifs_tcon *tcon,
 				 struct cifs_sb_info *cifs_sb,
-				 struct smb_vol *vol);
+				 struct smb3_fs_context *ctx);
 extern int CIFSSMBQFSInfo(const unsigned int xid, struct cifs_tcon *tcon,
 			struct kstatfs *FSData);
 extern int SMBOldQFSInfo(const unsigned int xid, struct cifs_tcon *tcon,
@@ -553,18 +549,15 @@ extern int SMBencrypt(unsigned char *passwd, const unsigned char *c8,
 			unsigned char *p24);
 
 extern int
-cifs_setup_volume_info(struct smb_vol *volume_info, char *mount_data,
-		       const char *devname, bool is_smb3);
-extern void
-cifs_cleanup_volume_info_contents(struct smb_vol *volume_info);
+cifs_setup_volume_info(struct smb3_fs_context *ctx);
 
 extern struct TCP_Server_Info *
-cifs_find_tcp_session(struct smb_vol *vol);
+cifs_find_tcp_session(struct smb3_fs_context *ctx);
 
 extern void cifs_put_smb_ses(struct cifs_ses *ses);
 
 extern struct cifs_ses *
-cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info);
+cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx);
 
 void cifs_readdata_release(struct kref *refcount);
 int cifs_async_readv(struct cifs_readdata *rdata);
@@ -604,9 +597,7 @@ extern void rqst_page_get_length(struct smb_rqst *rqst, unsigned int page,
 				unsigned int *len, unsigned int *offset);
 struct cifs_chan *
 cifs_ses_find_chan(struct cifs_ses *ses, struct TCP_Server_Info *server);
-int cifs_try_adding_channels(struct cifs_ses *ses);
-int cifs_ses_add_channel(struct cifs_ses *ses,
-				struct cifs_server_iface *iface);
+int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses);
 bool is_server_using_iface(struct TCP_Server_Info *server,
 			   struct cifs_server_iface *iface);
 bool is_ses_using_iface(struct cifs_ses *ses, struct cifs_server_iface *iface);
@@ -620,6 +611,8 @@ int smb2_parse_query_directory(struct cifs_tcon *tcon, struct kvec *rsp_iov,
 struct super_block *cifs_get_tcp_super(struct TCP_Server_Info *server);
 void cifs_put_tcp_super(struct super_block *sb);
 int update_super_prepath(struct cifs_tcon *tcon, char *prefix);
+char *extract_hostname(const char *unc);
+char *extract_sharename(const char *unc);
 
 #ifdef CONFIG_CIFS_DFS_UPCALL
 static inline int get_dfs_path(const unsigned int xid, struct cifs_ses *ses,
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 44f9cce57099..b9df85506938 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -62,6 +62,9 @@
 #include "dfs_cache.h"
 #endif
 #include "fs_context.h"
+#ifdef CONFIG_CIFS_SWN_UPCALL
+#include "cifs_swn.h"
+#endif
 
 extern mempool_t *cifs_req_poolp;
 extern bool disable_legacy_dialects;
@@ -73,218 +76,10 @@ extern bool disable_legacy_dialects;
 /* Drop the connection to not overload the server */
 #define NUM_STATUS_IO_TIMEOUT   5
 
-enum {
-	/* Mount options that take no arguments */
-	Opt_user_xattr, Opt_nouser_xattr,
-	Opt_forceuid, Opt_noforceuid,
-	Opt_forcegid, Opt_noforcegid,
-	Opt_noblocksend, Opt_noautotune, Opt_nolease,
-	Opt_hard, Opt_soft, Opt_perm, Opt_noperm, Opt_nodelete,
-	Opt_mapposix, Opt_nomapposix,
-	Opt_mapchars, Opt_nomapchars, Opt_sfu,
-	Opt_nosfu, Opt_nodfs, Opt_posixpaths,
-	Opt_noposixpaths, Opt_nounix, Opt_unix,
-	Opt_nocase,
-	Opt_brl, Opt_nobrl,
-	Opt_handlecache, Opt_nohandlecache,
-	Opt_forcemandatorylock, Opt_setuidfromacl, Opt_setuids,
-	Opt_nosetuids, Opt_dynperm, Opt_nodynperm,
-	Opt_nohard, Opt_nosoft,
-	Opt_nointr, Opt_intr,
-	Opt_nostrictsync, Opt_strictsync,
-	Opt_serverino, Opt_noserverino,
-	Opt_rwpidforward, Opt_cifsacl, Opt_nocifsacl,
-	Opt_acl, Opt_noacl, Opt_locallease,
-	Opt_sign, Opt_ignore_signature, Opt_seal, Opt_noac,
-	Opt_fsc, Opt_mfsymlinks,
-	Opt_multiuser, Opt_sloppy, Opt_nosharesock,
-	Opt_persistent, Opt_nopersistent,
-	Opt_resilient, Opt_noresilient,
-	Opt_domainauto, Opt_rdma, Opt_modesid, Opt_rootfs,
-	Opt_multichannel, Opt_nomultichannel,
-	Opt_compress,
-
-	/* Mount options which take numeric value */
-	Opt_backupuid, Opt_backupgid, Opt_uid,
-	Opt_cruid, Opt_gid, Opt_file_mode,
-	Opt_dirmode, Opt_port,
-	Opt_min_enc_offload,
-	Opt_blocksize, Opt_rsize, Opt_wsize, Opt_actimeo,
-	Opt_echo_interval, Opt_max_credits, Opt_handletimeout,
-	Opt_snapshot, Opt_max_channels,
-
-	/* Mount options which take string value */
-	Opt_user, Opt_pass, Opt_ip,
-	Opt_domain, Opt_srcaddr, Opt_iocharset,
-	Opt_netbiosname, Opt_servern,
-	Opt_ver, Opt_vers, Opt_sec, Opt_cache,
-
-	/* Mount options to be ignored */
-	Opt_ignore,
-
-	/* Options which could be blank */
-	Opt_blank_pass,
-	Opt_blank_user,
-	Opt_blank_ip,
-
-	Opt_err
-};
-
-static const match_table_t cifs_mount_option_tokens = {
-
-	{ Opt_user_xattr, "user_xattr" },
-	{ Opt_nouser_xattr, "nouser_xattr" },
-	{ Opt_forceuid, "forceuid" },
-	{ Opt_noforceuid, "noforceuid" },
-	{ Opt_forcegid, "forcegid" },
-	{ Opt_noforcegid, "noforcegid" },
-	{ Opt_noblocksend, "noblocksend" },
-	{ Opt_noautotune, "noautotune" },
-	{ Opt_nolease, "nolease" },
-	{ Opt_hard, "hard" },
-	{ Opt_soft, "soft" },
-	{ Opt_perm, "perm" },
-	{ Opt_noperm, "noperm" },
-	{ Opt_nodelete, "nodelete" },
-	{ Opt_mapchars, "mapchars" }, /* SFU style */
-	{ Opt_nomapchars, "nomapchars" },
-	{ Opt_mapposix, "mapposix" }, /* SFM style */
-	{ Opt_nomapposix, "nomapposix" },
-	{ Opt_sfu, "sfu" },
-	{ Opt_nosfu, "nosfu" },
-	{ Opt_nodfs, "nodfs" },
-	{ Opt_posixpaths, "posixpaths" },
-	{ Opt_noposixpaths, "noposixpaths" },
-	{ Opt_nounix, "nounix" },
-	{ Opt_nounix, "nolinux" },
-	{ Opt_nounix, "noposix" },
-	{ Opt_unix, "unix" },
-	{ Opt_unix, "linux" },
-	{ Opt_unix, "posix" },
-	{ Opt_nocase, "nocase" },
-	{ Opt_nocase, "ignorecase" },
-	{ Opt_brl, "brl" },
-	{ Opt_nobrl, "nobrl" },
-	{ Opt_handlecache, "handlecache" },
-	{ Opt_nohandlecache, "nohandlecache" },
-	{ Opt_nobrl, "nolock" },
-	{ Opt_forcemandatorylock, "forcemandatorylock" },
-	{ Opt_forcemandatorylock, "forcemand" },
-	{ Opt_setuids, "setuids" },
-	{ Opt_nosetuids, "nosetuids" },
-	{ Opt_setuidfromacl, "idsfromsid" },
-	{ Opt_dynperm, "dynperm" },
-	{ Opt_nodynperm, "nodynperm" },
-	{ Opt_nohard, "nohard" },
-	{ Opt_nosoft, "nosoft" },
-	{ Opt_nointr, "nointr" },
-	{ Opt_intr, "intr" },
-	{ Opt_nostrictsync, "nostrictsync" },
-	{ Opt_strictsync, "strictsync" },
-	{ Opt_serverino, "serverino" },
-	{ Opt_noserverino, "noserverino" },
-	{ Opt_rwpidforward, "rwpidforward" },
-	{ Opt_modesid, "modefromsid" },
-	{ Opt_cifsacl, "cifsacl" },
-	{ Opt_nocifsacl, "nocifsacl" },
-	{ Opt_acl, "acl" },
-	{ Opt_noacl, "noacl" },
-	{ Opt_locallease, "locallease" },
-	{ Opt_sign, "sign" },
-	{ Opt_ignore_signature, "signloosely" },
-	{ Opt_seal, "seal" },
-	{ Opt_noac, "noac" },
-	{ Opt_fsc, "fsc" },
-	{ Opt_mfsymlinks, "mfsymlinks" },
-	{ Opt_multiuser, "multiuser" },
-	{ Opt_sloppy, "sloppy" },
-	{ Opt_nosharesock, "nosharesock" },
-	{ Opt_persistent, "persistenthandles"},
-	{ Opt_nopersistent, "nopersistenthandles"},
-	{ Opt_resilient, "resilienthandles"},
-	{ Opt_noresilient, "noresilienthandles"},
-	{ Opt_domainauto, "domainauto"},
-	{ Opt_rdma, "rdma"},
-	{ Opt_multichannel, "multichannel" },
-	{ Opt_nomultichannel, "nomultichannel" },
-
-	{ Opt_backupuid, "backupuid=%s" },
-	{ Opt_backupgid, "backupgid=%s" },
-	{ Opt_uid, "uid=%s" },
-	{ Opt_cruid, "cruid=%s" },
-	{ Opt_gid, "gid=%s" },
-	{ Opt_file_mode, "file_mode=%s" },
-	{ Opt_dirmode, "dirmode=%s" },
-	{ Opt_dirmode, "dir_mode=%s" },
-	{ Opt_port, "port=%s" },
-	{ Opt_min_enc_offload, "esize=%s" },
-	{ Opt_blocksize, "bsize=%s" },
-	{ Opt_rsize, "rsize=%s" },
-	{ Opt_wsize, "wsize=%s" },
-	{ Opt_actimeo, "actimeo=%s" },
-	{ Opt_handletimeout, "handletimeout=%s" },
-	{ Opt_echo_interval, "echo_interval=%s" },
-	{ Opt_max_credits, "max_credits=%s" },
-	{ Opt_snapshot, "snapshot=%s" },
-	{ Opt_max_channels, "max_channels=%s" },
-	{ Opt_compress, "compress=%s" },
-
-	{ Opt_blank_user, "user=" },
-	{ Opt_blank_user, "username=" },
-	{ Opt_user, "user=%s" },
-	{ Opt_user, "username=%s" },
-	{ Opt_blank_pass, "pass=" },
-	{ Opt_blank_pass, "password=" },
-	{ Opt_pass, "pass=%s" },
-	{ Opt_pass, "password=%s" },
-	{ Opt_blank_ip, "ip=" },
-	{ Opt_blank_ip, "addr=" },
-	{ Opt_ip, "ip=%s" },
-	{ Opt_ip, "addr=%s" },
-	{ Opt_ignore, "unc=%s" },
-	{ Opt_ignore, "target=%s" },
-	{ Opt_ignore, "path=%s" },
-	{ Opt_domain, "dom=%s" },
-	{ Opt_domain, "domain=%s" },
-	{ Opt_domain, "workgroup=%s" },
-	{ Opt_srcaddr, "srcaddr=%s" },
-	{ Opt_ignore, "prefixpath=%s" },
-	{ Opt_iocharset, "iocharset=%s" },
-	{ Opt_netbiosname, "netbiosname=%s" },
-	{ Opt_servern, "servern=%s" },
-	{ Opt_ver, "ver=%s" },
-	{ Opt_vers, "vers=%s" },
-	{ Opt_sec, "sec=%s" },
-	{ Opt_cache, "cache=%s" },
-
-	{ Opt_ignore, "cred" },
-	{ Opt_ignore, "credentials" },
-	{ Opt_ignore, "cred=%s" },
-	{ Opt_ignore, "credentials=%s" },
-	{ Opt_ignore, "guest" },
-	{ Opt_ignore, "rw" },
-	{ Opt_ignore, "ro" },
-	{ Opt_ignore, "suid" },
-	{ Opt_ignore, "nosuid" },
-	{ Opt_ignore, "exec" },
-	{ Opt_ignore, "noexec" },
-	{ Opt_ignore, "nodev" },
-	{ Opt_ignore, "noauto" },
-	{ Opt_ignore, "dev" },
-	{ Opt_ignore, "mand" },
-	{ Opt_ignore, "nomand" },
-	{ Opt_ignore, "relatime" },
-	{ Opt_ignore, "_netdev" },
-	{ Opt_rootfs, "rootfs" },
-
-	{ Opt_err, NULL }
-};
-
 static int ip_connect(struct TCP_Server_Info *server);
 static int generic_ip_connect(struct TCP_Server_Info *server);
 static void tlink_rb_insert(struct rb_root *root, struct tcon_link *new_tlink);
 static void cifs_prune_tlinks(struct work_struct *work);
-static char *extract_hostname(const char *unc);
 
 /*
  * Resolve hostname and set ip addr in tcp ses. Useful for hostnames that may
@@ -293,7 +88,7 @@ static char *extract_hostname(const char *unc);
  * This should be called with server->srv_mutex held.
  */
 #ifdef CONFIG_CIFS_DFS_UPCALL
-static int reconn_set_ipaddr(struct TCP_Server_Info *server)
+static int reconn_set_ipaddr_from_hostname(struct TCP_Server_Info *server)
 {
 	int rc;
 	int len;
@@ -328,14 +123,7 @@ static int reconn_set_ipaddr(struct TCP_Server_Info *server)
 
 	return !rc ? -1 : 0;
 }
-#else
-static inline int reconn_set_ipaddr(struct TCP_Server_Info *server)
-{
-	return 0;
-}
-#endif
 
-#ifdef CONFIG_CIFS_DFS_UPCALL
 /* These functions must be called with server->srv_mutex held */
 static void reconn_set_next_dfs_target(struct TCP_Server_Info *server,
 				       struct cifs_sb_info *cifs_sb,
@@ -343,6 +131,7 @@ static void reconn_set_next_dfs_target(struct TCP_Server_Info *server,
 				       struct dfs_cache_tgt_iterator **tgt_it)
 {
 	const char *name;
+	int rc;
 
 	if (!cifs_sb || !cifs_sb->origin_fullpath)
 		return;
@@ -366,6 +155,13 @@ static void reconn_set_next_dfs_target(struct TCP_Server_Info *server,
 		cifs_dbg(FYI,
 			 "%s: failed to extract hostname from target: %ld\n",
 			 __func__, PTR_ERR(server->hostname));
+		return;
+	}
+
+	rc = reconn_set_ipaddr_from_hostname(server);
+	if (rc) {
+		cifs_dbg(FYI, "%s: failed to resolve hostname: %d\n",
+			 __func__, rc);
 	}
 }
 
@@ -517,19 +313,25 @@ cifs_reconnect(struct TCP_Server_Info *server)
 		try_to_freeze();
 
 		mutex_lock(&server->srv_mutex);
+
+#ifdef CONFIG_CIFS_SWN_UPCALL
+		if (server->use_swn_dstaddr) {
+			server->dstaddr = server->swn_dstaddr;
+		} else {
+#endif
+
 #ifdef CONFIG_CIFS_DFS_UPCALL
-		/*
-		 * Set up next DFS target server (if any) for reconnect. If DFS
-		 * feature is disabled, then we will retry last server we
-		 * connected to before.
-		 */
-		reconn_set_next_dfs_target(server, cifs_sb, &tgt_list, &tgt_it);
+			/*
+			 * Set up next DFS target server (if any) for reconnect. If DFS
+			 * feature is disabled, then we will retry last server we
+			 * connected to before.
+			 */
+			reconn_set_next_dfs_target(server, cifs_sb, &tgt_list, &tgt_it);
 #endif
-		rc = reconn_set_ipaddr(server);
-		if (rc) {
-			cifs_dbg(FYI, "%s: failed to resolve hostname: %d\n",
-				 __func__, rc);
+
+#ifdef CONFIG_CIFS_SWN_UPCALL
 		}
+#endif
 
 		if (cifs_rdma_enabled(server))
 			rc = smbd_reconnect(server);
@@ -546,6 +348,9 @@ cifs_reconnect(struct TCP_Server_Info *server)
 			if (server->tcpStatus != CifsExiting)
 				server->tcpStatus = CifsNeedNegotiate;
 			spin_unlock(&GlobalMid_Lock);
+#ifdef CONFIG_CIFS_SWN_UPCALL
+			server->use_swn_dstaddr = false;
+#endif
 			mutex_unlock(&server->srv_mutex);
 		}
 	} while (server->tcpStatus == CifsNeedReconnect);
@@ -610,6 +415,11 @@ cifs_echo_request(struct work_struct *work)
 		cifs_dbg(FYI, "Unable to send echo request to server: %s\n",
 			 server->hostname);
 
+#ifdef CONFIG_CIFS_SWN_UPCALL
+	/* Check witness registrations */
+	cifs_swn_check();
+#endif
+
 requeue_echo:
 	queue_delayed_work(cifsiod_wq, &server->echo, server->echo_interval);
 }
@@ -1036,6 +846,7 @@ static void
 smb2_add_credits_from_hdr(char *buffer, struct TCP_Server_Info *server)
 {
 	struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)buffer;
+	int scredits = server->credits;
 
 	/*
 	 * SMB1 does not use credits.
@@ -1048,6 +859,13 @@ smb2_add_credits_from_hdr(char *buffer, struct TCP_Server_Info *server)
 		server->credits += le16_to_cpu(shdr->CreditRequest);
 		spin_unlock(&server->req_lock);
 		wake_up(&server->request_q);
+
+		trace_smb3_add_credits(server->CurrentMid,
+				server->hostname, scredits,
+				le16_to_cpu(shdr->CreditRequest));
+		cifs_server_dbg(FYI, "%s: added %u credits total=%d\n",
+				__func__, le16_to_cpu(shdr->CreditRequest),
+				scredits);
 	}
 }
 
@@ -1232,1051 +1050,9 @@ next_pdu:
 	module_put_and_exit(0);
 }
 
-/* extract the host portion of the UNC string */
-static char *
-extract_hostname(const char *unc)
-{
-	const char *src;
-	char *dst, *delim;
-	unsigned int len;
-
-	/* skip double chars at beginning of string */
-	/* BB: check validity of these bytes? */
-	if (strlen(unc) < 3)
-		return ERR_PTR(-EINVAL);
-	for (src = unc; *src && *src == '\\'; src++)
-		;
-	if (!*src)
-		return ERR_PTR(-EINVAL);
-
-	/* delimiter between hostname and sharename is always '\\' now */
-	delim = strchr(src, '\\');
-	if (!delim)
-		return ERR_PTR(-EINVAL);
-
-	len = delim - src;
-	dst = kmalloc((len + 1), GFP_KERNEL);
-	if (dst == NULL)
-		return ERR_PTR(-ENOMEM);
-
-	memcpy(dst, src, len);
-	dst[len] = '\0';
-
-	return dst;
-}
-
-static int get_option_ul(substring_t args[], unsigned long *option)
-{
-	int rc;
-	char *string;
-
-	string = match_strdup(args);
-	if (string == NULL)
-		return -ENOMEM;
-	rc = kstrtoul(string, 0, option);
-	kfree(string);
-
-	return rc;
-}
-
-static int get_option_uid(substring_t args[], kuid_t *result)
-{
-	unsigned long value;
-	kuid_t uid;
-	int rc;
-
-	rc = get_option_ul(args, &value);
-	if (rc)
-		return rc;
-
-	uid = make_kuid(current_user_ns(), value);
-	if (!uid_valid(uid))
-		return -EINVAL;
-
-	*result = uid;
-	return 0;
-}
-
-static int get_option_gid(substring_t args[], kgid_t *result)
-{
-	unsigned long value;
-	kgid_t gid;
-	int rc;
-
-	rc = get_option_ul(args, &value);
-	if (rc)
-		return rc;
-
-	gid = make_kgid(current_user_ns(), value);
-	if (!gid_valid(gid))
-		return -EINVAL;
-
-	*result = gid;
-	return 0;
-}
-
-/*
- * Parse a devname into substrings and populate the vol->UNC and vol->prepath
- * fields with the result. Returns 0 on success and an error otherwise.
- */
-static int
-cifs_parse_devname(const char *devname, struct smb_vol *vol)
-{
-	char *pos;
-	const char *delims = "/\\";
-	size_t len;
-
-	if (unlikely(!devname || !*devname)) {
-		cifs_dbg(VFS, "Device name not specified\n");
-		return -EINVAL;
-	}
-
-	/* make sure we have a valid UNC double delimiter prefix */
-	len = strspn(devname, delims);
-	if (len != 2)
-		return -EINVAL;
-
-	/* find delimiter between host and sharename */
-	pos = strpbrk(devname + 2, delims);
-	if (!pos)
-		return -EINVAL;
-
-	/* skip past delimiter */
-	++pos;
-
-	/* now go until next delimiter or end of string */
-	len = strcspn(pos, delims);
-
-	/* move "pos" up to delimiter or NULL */
-	pos += len;
-	vol->UNC = kstrndup(devname, pos - devname, GFP_KERNEL);
-	if (!vol->UNC)
-		return -ENOMEM;
-
-	convert_delimiter(vol->UNC, '\\');
-
-	/* skip any delimiter */
-	if (*pos == '/' || *pos == '\\')
-		pos++;
-
-	/* If pos is NULL then no prepath */
-	if (!*pos)
-		return 0;
-
-	vol->prepath = kstrdup(pos, GFP_KERNEL);
-	if (!vol->prepath)
-		return -ENOMEM;
-
-	return 0;
-}
-
-static int
-cifs_parse_mount_options(const char *mountdata, const char *devname,
-			 struct smb_vol *vol, bool is_smb3)
-{
-	char *data, *end;
-	char *mountdata_copy = NULL, *options;
-	unsigned int  temp_len, i, j;
-	char separator[2];
-	short int override_uid = -1;
-	short int override_gid = -1;
-	bool uid_specified = false;
-	bool gid_specified = false;
-	bool sloppy = false;
-	char *invalid = NULL;
-	char *nodename = utsname()->nodename;
-	char *string = NULL;
-	char *tmp_end, *value;
-	char delim;
-	bool got_ip = false;
-	bool got_version = false;
-	unsigned short port = 0;
-	struct sockaddr *dstaddr = (struct sockaddr *)&vol->dstaddr;
-
-	separator[0] = ',';
-	separator[1] = 0;
-	delim = separator[0];
-
-	/* ensure we always start with zeroed-out smb_vol */
-	memset(vol, 0, sizeof(*vol));
-
-	/*
-	 * does not have to be perfect mapping since field is
-	 * informational, only used for servers that do not support
-	 * port 445 and it can be overridden at mount time
-	 */
-	memset(vol->source_rfc1001_name, 0x20, RFC1001_NAME_LEN);
-	for (i = 0; i < strnlen(nodename, RFC1001_NAME_LEN); i++)
-		vol->source_rfc1001_name[i] = toupper(nodename[i]);
-
-	vol->source_rfc1001_name[RFC1001_NAME_LEN] = 0;
-	/* null target name indicates to use *SMBSERVR default called name
-	   if we end up sending RFC1001 session initialize */
-	vol->target_rfc1001_name[0] = 0;
-	vol->cred_uid = current_uid();
-	vol->linux_uid = current_uid();
-	vol->linux_gid = current_gid();
-	vol->bsize = 1024 * 1024; /* can improve cp performance significantly */
-	/*
-	 * default to SFM style remapping of seven reserved characters
-	 * unless user overrides it or we negotiate CIFS POSIX where
-	 * it is unnecessary.  Can not simultaneously use more than one mapping
-	 * since then readdir could list files that open could not open
-	 */
-	vol->remap = true;
-
-	/* default to only allowing write access to owner of the mount */
-	vol->dir_mode = vol->file_mode = S_IRUGO | S_IXUGO | S_IWUSR;
-
-	/* vol->retry default is 0 (i.e. "soft" limited retry not hard retry) */
-	/* default is always to request posix paths. */
-	vol->posix_paths = 1;
-	/* default to using server inode numbers where available */
-	vol->server_ino = 1;
-
-	/* default is to use strict cifs caching semantics */
-	vol->strict_io = true;
-
-	vol->actimeo = CIFS_DEF_ACTIMEO;
-
-	/* Most clients set timeout to 0, allows server to use its default */
-	vol->handle_timeout = 0; /* See MS-SMB2 spec section 2.2.14.2.12 */
-
-	/* offer SMB2.1 and later (SMB3 etc). Secure and widely accepted */
-	vol->ops = &smb30_operations;
-	vol->vals = &smbdefault_values;
-
-	vol->echo_interval = SMB_ECHO_INTERVAL_DEFAULT;
-
-	/* default to no multichannel (single server connection) */
-	vol->multichannel = false;
-	vol->max_channels = 1;
-
-	if (!mountdata)
-		goto cifs_parse_mount_err;
-
-	mountdata_copy = kstrndup(mountdata, PAGE_SIZE, GFP_KERNEL);
-	if (!mountdata_copy)
-		goto cifs_parse_mount_err;
-
-	options = mountdata_copy;
-	end = options + strlen(options);
-
-	if (strncmp(options, "sep=", 4) == 0) {
-		if (options[4] != 0) {
-			separator[0] = options[4];
-			options += 5;
-		} else {
-			cifs_dbg(FYI, "Null separator not allowed\n");
-		}
-	}
-	vol->backupuid_specified = false; /* no backup intent for a user */
-	vol->backupgid_specified = false; /* no backup intent for a group */
-
-	switch (cifs_parse_devname(devname, vol)) {
-	case 0:
-		break;
-	case -ENOMEM:
-		cifs_dbg(VFS, "Unable to allocate memory for devname\n");
-		goto cifs_parse_mount_err;
-	case -EINVAL:
-		cifs_dbg(VFS, "Malformed UNC in devname\n");
-		goto cifs_parse_mount_err;
-	default:
-		cifs_dbg(VFS, "Unknown error parsing devname\n");
-		goto cifs_parse_mount_err;
-	}
-
-	while ((data = strsep(&options, separator)) != NULL) {
-		substring_t args[MAX_OPT_ARGS];
-		unsigned long option;
-		int token;
-
-		if (!*data)
-			continue;
-
-		token = match_token(data, cifs_mount_option_tokens, args);
-
-		switch (token) {
-
-		/* Ingnore the following */
-		case Opt_ignore:
-			break;
-
-		/* Boolean values */
-		case Opt_user_xattr:
-			vol->no_xattr = 0;
-			break;
-		case Opt_nouser_xattr:
-			vol->no_xattr = 1;
-			break;
-		case Opt_forceuid:
-			override_uid = 1;
-			break;
-		case Opt_noforceuid:
-			override_uid = 0;
-			break;
-		case Opt_forcegid:
-			override_gid = 1;
-			break;
-		case Opt_noforcegid:
-			override_gid = 0;
-			break;
-		case Opt_noblocksend:
-			vol->noblocksnd = 1;
-			break;
-		case Opt_noautotune:
-			vol->noautotune = 1;
-			break;
-		case Opt_nolease:
-			vol->no_lease = 1;
-			break;
-		case Opt_hard:
-			vol->retry = 1;
-			break;
-		case Opt_soft:
-			vol->retry = 0;
-			break;
-		case Opt_perm:
-			vol->noperm = 0;
-			break;
-		case Opt_noperm:
-			vol->noperm = 1;
-			break;
-		case Opt_nodelete:
-			vol->nodelete = 1;
-			break;
-		case Opt_mapchars:
-			vol->sfu_remap = true;
-			vol->remap = false; /* disable SFM mapping */
-			break;
-		case Opt_nomapchars:
-			vol->sfu_remap = false;
-			break;
-		case Opt_mapposix:
-			vol->remap = true;
-			vol->sfu_remap = false; /* disable SFU mapping */
-			break;
-		case Opt_nomapposix:
-			vol->remap = false;
-			break;
-		case Opt_sfu:
-			vol->sfu_emul = 1;
-			break;
-		case Opt_nosfu:
-			vol->sfu_emul = 0;
-			break;
-		case Opt_nodfs:
-			vol->nodfs = 1;
-			break;
-		case Opt_rootfs:
-#ifdef CONFIG_CIFS_ROOT
-			vol->rootfs = true;
-#endif
-			break;
-		case Opt_posixpaths:
-			vol->posix_paths = 1;
-			break;
-		case Opt_noposixpaths:
-			vol->posix_paths = 0;
-			break;
-		case Opt_nounix:
-			if (vol->linux_ext)
-				cifs_dbg(VFS,
-					"conflicting unix mount options\n");
-			vol->no_linux_ext = 1;
-			break;
-		case Opt_unix:
-			if (vol->no_linux_ext)
-				cifs_dbg(VFS,
-					"conflicting unix mount options\n");
-			vol->linux_ext = 1;
-			break;
-		case Opt_nocase:
-			vol->nocase = 1;
-			break;
-		case Opt_brl:
-			vol->nobrl =  0;
-			break;
-		case Opt_nobrl:
-			vol->nobrl =  1;
-			/*
-			 * turn off mandatory locking in mode
-			 * if remote locking is turned off since the
-			 * local vfs will do advisory
-			 */
-			if (vol->file_mode ==
-				(S_IALLUGO & ~(S_ISUID | S_IXGRP)))
-				vol->file_mode = S_IALLUGO;
-			break;
-		case Opt_nohandlecache:
-			vol->nohandlecache = 1;
-			break;
-		case Opt_handlecache:
-			vol->nohandlecache = 0;
-			break;
-		case Opt_forcemandatorylock:
-			vol->mand_lock = 1;
-			break;
-		case Opt_setuids:
-			vol->setuids = 1;
-			break;
-		case Opt_nosetuids:
-			vol->setuids = 0;
-			break;
-		case Opt_setuidfromacl:
-			vol->setuidfromacl = 1;
-			break;
-		case Opt_dynperm:
-			vol->dynperm = true;
-			break;
-		case Opt_nodynperm:
-			vol->dynperm = false;
-			break;
-		case Opt_nohard:
-			vol->retry = 0;
-			break;
-		case Opt_nosoft:
-			vol->retry = 1;
-			break;
-		case Opt_nointr:
-			vol->intr = 0;
-			break;
-		case Opt_intr:
-			vol->intr = 1;
-			break;
-		case Opt_nostrictsync:
-			vol->nostrictsync = 1;
-			break;
-		case Opt_strictsync:
-			vol->nostrictsync = 0;
-			break;
-		case Opt_serverino:
-			vol->server_ino = 1;
-			break;
-		case Opt_noserverino:
-			vol->server_ino = 0;
-			break;
-		case Opt_rwpidforward:
-			vol->rwpidforward = 1;
-			break;
-		case Opt_modesid:
-			vol->mode_ace = 1;
-			break;
-		case Opt_cifsacl:
-			vol->cifs_acl = 1;
-			break;
-		case Opt_nocifsacl:
-			vol->cifs_acl = 0;
-			break;
-		case Opt_acl:
-			vol->no_psx_acl = 0;
-			break;
-		case Opt_noacl:
-			vol->no_psx_acl = 1;
-			break;
-		case Opt_locallease:
-			vol->local_lease = 1;
-			break;
-		case Opt_sign:
-			vol->sign = true;
-			break;
-		case Opt_ignore_signature:
-			vol->sign = true;
-			vol->ignore_signature = true;
-			break;
-		case Opt_seal:
-			/* we do not do the following in secFlags because seal
-			 * is a per tree connection (mount) not a per socket
-			 * or per-smb connection option in the protocol
-			 * vol->secFlg |= CIFSSEC_MUST_SEAL;
-			 */
-			vol->seal = 1;
-			break;
-		case Opt_noac:
-			pr_warn("Mount option noac not supported. Instead set /proc/fs/cifs/LookupCacheEnabled to 0\n");
-			break;
-		case Opt_fsc:
-#ifndef CONFIG_CIFS_FSCACHE
-			cifs_dbg(VFS, "FS-Cache support needs CONFIG_CIFS_FSCACHE kernel config option set\n");
-			goto cifs_parse_mount_err;
-#endif
-			vol->fsc = true;
-			break;
-		case Opt_mfsymlinks:
-			vol->mfsymlinks = true;
-			break;
-		case Opt_multiuser:
-			vol->multiuser = true;
-			break;
-		case Opt_sloppy:
-			sloppy = true;
-			break;
-		case Opt_nosharesock:
-			vol->nosharesock = true;
-			break;
-		case Opt_nopersistent:
-			vol->nopersistent = true;
-			if (vol->persistent) {
-				cifs_dbg(VFS,
-				  "persistenthandles mount options conflict\n");
-				goto cifs_parse_mount_err;
-			}
-			break;
-		case Opt_persistent:
-			vol->persistent = true;
-			if ((vol->nopersistent) || (vol->resilient)) {
-				cifs_dbg(VFS,
-				  "persistenthandles mount options conflict\n");
-				goto cifs_parse_mount_err;
-			}
-			break;
-		case Opt_resilient:
-			vol->resilient = true;
-			if (vol->persistent) {
-				cifs_dbg(VFS,
-				  "persistenthandles mount options conflict\n");
-				goto cifs_parse_mount_err;
-			}
-			break;
-		case Opt_noresilient:
-			vol->resilient = false; /* already the default */
-			break;
-		case Opt_domainauto:
-			vol->domainauto = true;
-			break;
-		case Opt_rdma:
-			vol->rdma = true;
-			break;
-		case Opt_multichannel:
-			vol->multichannel = true;
-			/* if number of channels not specified, default to 2 */
-			if (vol->max_channels < 2)
-				vol->max_channels = 2;
-			break;
-		case Opt_nomultichannel:
-			vol->multichannel = false;
-			vol->max_channels = 1;
-			break;
-		case Opt_compress:
-			vol->compression = UNKNOWN_TYPE;
-			cifs_dbg(VFS,
-				"SMB3 compression support is experimental\n");
-			break;
-
-		/* Numeric Values */
-		case Opt_backupuid:
-			if (get_option_uid(args, &vol->backupuid)) {
-				cifs_dbg(VFS, "%s: Invalid backupuid value\n",
-					 __func__);
-				goto cifs_parse_mount_err;
-			}
-			vol->backupuid_specified = true;
-			break;
-		case Opt_backupgid:
-			if (get_option_gid(args, &vol->backupgid)) {
-				cifs_dbg(VFS, "%s: Invalid backupgid value\n",
-					 __func__);
-				goto cifs_parse_mount_err;
-			}
-			vol->backupgid_specified = true;
-			break;
-		case Opt_uid:
-			if (get_option_uid(args, &vol->linux_uid)) {
-				cifs_dbg(VFS, "%s: Invalid uid value\n",
-					 __func__);
-				goto cifs_parse_mount_err;
-			}
-			uid_specified = true;
-			break;
-		case Opt_cruid:
-			if (get_option_uid(args, &vol->cred_uid)) {
-				cifs_dbg(VFS, "%s: Invalid cruid value\n",
-					 __func__);
-				goto cifs_parse_mount_err;
-			}
-			break;
-		case Opt_gid:
-			if (get_option_gid(args, &vol->linux_gid)) {
-				cifs_dbg(VFS, "%s: Invalid gid value\n",
-					 __func__);
-				goto cifs_parse_mount_err;
-			}
-			gid_specified = true;
-			break;
-		case Opt_file_mode:
-			if (get_option_ul(args, &option)) {
-				cifs_dbg(VFS, "%s: Invalid file_mode value\n",
-					 __func__);
-				goto cifs_parse_mount_err;
-			}
-			vol->file_mode = option;
-			break;
-		case Opt_dirmode:
-			if (get_option_ul(args, &option)) {
-				cifs_dbg(VFS, "%s: Invalid dir_mode value\n",
-					 __func__);
-				goto cifs_parse_mount_err;
-			}
-			vol->dir_mode = option;
-			break;
-		case Opt_port:
-			if (get_option_ul(args, &option) ||
-			    option > USHRT_MAX) {
-				cifs_dbg(VFS, "%s: Invalid port value\n",
-					 __func__);
-				goto cifs_parse_mount_err;
-			}
-			port = (unsigned short)option;
-			break;
-		case Opt_min_enc_offload:
-			if (get_option_ul(args, &option)) {
-				cifs_dbg(VFS, "Invalid minimum encrypted read offload size (esize)\n");
-				goto cifs_parse_mount_err;
-			}
-			vol->min_offload = option;
-			break;
-		case Opt_blocksize:
-			if (get_option_ul(args, &option)) {
-				cifs_dbg(VFS, "%s: Invalid blocksize value\n",
-					__func__);
-				goto cifs_parse_mount_err;
-			}
-			/*
-			 * inode blocksize realistically should never need to be
-			 * less than 16K or greater than 16M and default is 1MB.
-			 * Note that small inode block sizes (e.g. 64K) can lead
-			 * to very poor performance of common tools like cp and scp
-			 */
-			if ((option < CIFS_MAX_MSGSIZE) ||
-			   (option > (4 * SMB3_DEFAULT_IOSIZE))) {
-				cifs_dbg(VFS, "%s: Invalid blocksize\n",
-					__func__);
-				goto cifs_parse_mount_err;
-			}
-			vol->bsize = option;
-			break;
-		case Opt_rsize:
-			if (get_option_ul(args, &option)) {
-				cifs_dbg(VFS, "%s: Invalid rsize value\n",
-					 __func__);
-				goto cifs_parse_mount_err;
-			}
-			vol->rsize = option;
-			break;
-		case Opt_wsize:
-			if (get_option_ul(args, &option)) {
-				cifs_dbg(VFS, "%s: Invalid wsize value\n",
-					 __func__);
-				goto cifs_parse_mount_err;
-			}
-			vol->wsize = option;
-			break;
-		case Opt_actimeo:
-			if (get_option_ul(args, &option)) {
-				cifs_dbg(VFS, "%s: Invalid actimeo value\n",
-					 __func__);
-				goto cifs_parse_mount_err;
-			}
-			vol->actimeo = HZ * option;
-			if (vol->actimeo > CIFS_MAX_ACTIMEO) {
-				cifs_dbg(VFS, "attribute cache timeout too large\n");
-				goto cifs_parse_mount_err;
-			}
-			break;
-		case Opt_handletimeout:
-			if (get_option_ul(args, &option)) {
-				cifs_dbg(VFS, "%s: Invalid handletimeout value\n",
-					 __func__);
-				goto cifs_parse_mount_err;
-			}
-			vol->handle_timeout = option;
-			if (vol->handle_timeout > SMB3_MAX_HANDLE_TIMEOUT) {
-				cifs_dbg(VFS, "Invalid handle cache timeout, longer than 16 minutes\n");
-				goto cifs_parse_mount_err;
-			}
-			break;
-		case Opt_echo_interval:
-			if (get_option_ul(args, &option)) {
-				cifs_dbg(VFS, "%s: Invalid echo interval value\n",
-					 __func__);
-				goto cifs_parse_mount_err;
-			}
-			vol->echo_interval = option;
-			break;
-		case Opt_snapshot:
-			if (get_option_ul(args, &option)) {
-				cifs_dbg(VFS, "%s: Invalid snapshot time\n",
-					 __func__);
-				goto cifs_parse_mount_err;
-			}
-			vol->snapshot_time = option;
-			break;
-		case Opt_max_credits:
-			if (get_option_ul(args, &option) || (option < 20) ||
-			    (option > 60000)) {
-				cifs_dbg(VFS, "%s: Invalid max_credits value\n",
-					 __func__);
-				goto cifs_parse_mount_err;
-			}
-			vol->max_credits = option;
-			break;
-		case Opt_max_channels:
-			if (get_option_ul(args, &option) || option < 1 ||
-				option > CIFS_MAX_CHANNELS) {
-				cifs_dbg(VFS, "%s: Invalid max_channels value, needs to be 1-%d\n",
-					 __func__, CIFS_MAX_CHANNELS);
-				goto cifs_parse_mount_err;
-			}
-			vol->max_channels = option;
-			break;
-
-		/* String Arguments */
-
-		case Opt_blank_user:
-			/* null user, ie. anonymous authentication */
-			vol->nullauth = 1;
-			vol->username = NULL;
-			break;
-		case Opt_user:
-			string = match_strdup(args);
-			if (string == NULL)
-				goto out_nomem;
-
-			if (strnlen(string, CIFS_MAX_USERNAME_LEN) >
-							CIFS_MAX_USERNAME_LEN) {
-				pr_warn("username too long\n");
-				goto cifs_parse_mount_err;
-			}
-
-			kfree(vol->username);
-			vol->username = kstrdup(string, GFP_KERNEL);
-			if (!vol->username)
-				goto cifs_parse_mount_err;
-			break;
-		case Opt_blank_pass:
-			/* passwords have to be handled differently
-			 * to allow the character used for deliminator
-			 * to be passed within them
-			 */
-
-			/*
-			 * Check if this is a case where the  password
-			 * starts with a delimiter
-			 */
-			tmp_end = strchr(data, '=');
-			tmp_end++;
-			if (!(tmp_end < end && tmp_end[1] == delim)) {
-				/* No it is not. Set the password to NULL */
-				kfree_sensitive(vol->password);
-				vol->password = NULL;
-				break;
-			}
-			fallthrough;	/* to Opt_pass below */
-		case Opt_pass:
-			/* Obtain the value string */
-			value = strchr(data, '=');
-			value++;
-
-			/* Set tmp_end to end of the string */
-			tmp_end = (char *) value + strlen(value);
-
-			/* Check if following character is the deliminator
-			 * If yes, we have encountered a double deliminator
-			 * reset the NULL character to the deliminator
-			 */
-			if (tmp_end < end && tmp_end[1] == delim) {
-				tmp_end[0] = delim;
-
-				/* Keep iterating until we get to a single
-				 * deliminator OR the end
-				 */
-				while ((tmp_end = strchr(tmp_end, delim))
-					!= NULL && (tmp_end[1] == delim)) {
-						tmp_end = (char *) &tmp_end[2];
-				}
-
-				/* Reset var options to point to next element */
-				if (tmp_end) {
-					tmp_end[0] = '\0';
-					options = (char *) &tmp_end[1];
-				} else
-					/* Reached the end of the mount option
-					 * string */
-					options = end;
-			}
-
-			kfree_sensitive(vol->password);
-			/* Now build new password string */
-			temp_len = strlen(value);
-			vol->password = kzalloc(temp_len+1, GFP_KERNEL);
-			if (vol->password == NULL) {
-				pr_warn("no memory for password\n");
-				goto cifs_parse_mount_err;
-			}
-
-			for (i = 0, j = 0; i < temp_len; i++, j++) {
-				vol->password[j] = value[i];
-				if ((value[i] == delim) &&
-				     value[i+1] == delim)
-					/* skip the second deliminator */
-					i++;
-			}
-			vol->password[j] = '\0';
-			break;
-		case Opt_blank_ip:
-			/* FIXME: should this be an error instead? */
-			got_ip = false;
-			break;
-		case Opt_ip:
-			string = match_strdup(args);
-			if (string == NULL)
-				goto out_nomem;
-
-			if (!cifs_convert_address(dstaddr, string,
-					strlen(string))) {
-				pr_err("bad ip= option (%s)\n", string);
-				goto cifs_parse_mount_err;
-			}
-			got_ip = true;
-			break;
-		case Opt_domain:
-			string = match_strdup(args);
-			if (string == NULL)
-				goto out_nomem;
-
-			if (strnlen(string, CIFS_MAX_DOMAINNAME_LEN)
-					== CIFS_MAX_DOMAINNAME_LEN) {
-				pr_warn("domain name too long\n");
-				goto cifs_parse_mount_err;
-			}
-
-			kfree(vol->domainname);
-			vol->domainname = kstrdup(string, GFP_KERNEL);
-			if (!vol->domainname) {
-				pr_warn("no memory for domainname\n");
-				goto cifs_parse_mount_err;
-			}
-			cifs_dbg(FYI, "Domain name set\n");
-			break;
-		case Opt_srcaddr:
-			string = match_strdup(args);
-			if (string == NULL)
-				goto out_nomem;
-
-			if (!cifs_convert_address(
-					(struct sockaddr *)&vol->srcaddr,
-					string, strlen(string))) {
-				pr_warn("Could not parse srcaddr: %s\n",
-					string);
-				goto cifs_parse_mount_err;
-			}
-			break;
-		case Opt_iocharset:
-			string = match_strdup(args);
-			if (string == NULL)
-				goto out_nomem;
-
-			if (strnlen(string, 1024) >= 65) {
-				pr_warn("iocharset name too long\n");
-				goto cifs_parse_mount_err;
-			}
-
-			 if (strncasecmp(string, "default", 7) != 0) {
-				kfree(vol->iocharset);
-				vol->iocharset = kstrdup(string,
-							 GFP_KERNEL);
-				if (!vol->iocharset) {
-					pr_warn("no memory for charset\n");
-					goto cifs_parse_mount_err;
-				}
-			}
-			/* if iocharset not set then load_nls_default
-			 * is used by caller
-			 */
-			 cifs_dbg(FYI, "iocharset set to %s\n", string);
-			break;
-		case Opt_netbiosname:
-			string = match_strdup(args);
-			if (string == NULL)
-				goto out_nomem;
-
-			memset(vol->source_rfc1001_name, 0x20,
-				RFC1001_NAME_LEN);
-			/*
-			 * FIXME: are there cases in which a comma can
-			 * be valid in workstation netbios name (and
-			 * need special handling)?
-			 */
-			for (i = 0; i < RFC1001_NAME_LEN; i++) {
-				/* don't ucase netbiosname for user */
-				if (string[i] == 0)
-					break;
-				vol->source_rfc1001_name[i] = string[i];
-			}
-			/* The string has 16th byte zero still from
-			 * set at top of the function
-			 */
-			if (i == RFC1001_NAME_LEN && string[i] != 0)
-				pr_warn("netbiosname longer than 15 truncated\n");
-			break;
-		case Opt_servern:
-			/* servernetbiosname specified override *SMBSERVER */
-			string = match_strdup(args);
-			if (string == NULL)
-				goto out_nomem;
-
-			/* last byte, type, is 0x20 for servr type */
-			memset(vol->target_rfc1001_name, 0x20,
-				RFC1001_NAME_LEN_WITH_NULL);
-
-			/* BB are there cases in which a comma can be
-			   valid in this workstation netbios name
-			   (and need special handling)? */
-
-			/* user or mount helper must uppercase the
-			   netbios name */
-			for (i = 0; i < 15; i++) {
-				if (string[i] == 0)
-					break;
-				vol->target_rfc1001_name[i] = string[i];
-			}
-			/* The string has 16th byte zero still from
-			   set at top of the function  */
-			if (i == RFC1001_NAME_LEN && string[i] != 0)
-				pr_warn("server netbiosname longer than 15 truncated\n");
-			break;
-		case Opt_ver:
-			/* version of mount userspace tools, not dialect */
-			string = match_strdup(args);
-			if (string == NULL)
-				goto out_nomem;
-
-			/* If interface changes in mount.cifs bump to new ver */
-			if (strncasecmp(string, "1", 1) == 0) {
-				if (strlen(string) > 1) {
-					pr_warn("Bad mount helper ver=%s. Did you want SMB1 (CIFS) dialect and mean to type vers=1.0 instead?\n",
-						string);
-					goto cifs_parse_mount_err;
-				}
-				/* This is the default */
-				break;
-			}
-			/* For all other value, error */
-			pr_warn("Invalid mount helper version specified\n");
-			goto cifs_parse_mount_err;
-		case Opt_vers:
-			/* protocol version (dialect) */
-			string = match_strdup(args);
-			if (string == NULL)
-				goto out_nomem;
-
-			if (cifs_parse_smb_version(string, vol, is_smb3) != 0)
-				goto cifs_parse_mount_err;
-			got_version = true;
-			break;
-		case Opt_sec:
-			string = match_strdup(args);
-			if (string == NULL)
-				goto out_nomem;
-
-			if (cifs_parse_security_flavors(string, vol) != 0)
-				goto cifs_parse_mount_err;
-			break;
-		case Opt_cache:
-			string = match_strdup(args);
-			if (string == NULL)
-				goto out_nomem;
-
-			if (cifs_parse_cache_flavor(string, vol) != 0)
-				goto cifs_parse_mount_err;
-			break;
-		default:
-			/*
-			 * An option we don't recognize. Save it off for later
-			 * if we haven't already found one
-			 */
-			if (!invalid)
-				invalid = data;
-			break;
-		}
-		/* Free up any allocated string */
-		kfree(string);
-		string = NULL;
-	}
-
-	if (!sloppy && invalid) {
-		pr_err("Unknown mount option \"%s\"\n", invalid);
-		goto cifs_parse_mount_err;
-	}
-
-	if (vol->rdma && vol->vals->protocol_id < SMB30_PROT_ID) {
-		cifs_dbg(VFS, "SMB Direct requires Version >=3.0\n");
-		goto cifs_parse_mount_err;
-	}
-
-#ifndef CONFIG_KEYS
-	/* Muliuser mounts require CONFIG_KEYS support */
-	if (vol->multiuser) {
-		cifs_dbg(VFS, "Multiuser mounts require kernels with CONFIG_KEYS enabled\n");
-		goto cifs_parse_mount_err;
-	}
-#endif
-	if (!vol->UNC) {
-		cifs_dbg(VFS, "CIFS mount error: No usable UNC path provided in device string!\n");
-		goto cifs_parse_mount_err;
-	}
-
-	/* make sure UNC has a share name */
-	if (!strchr(vol->UNC + 3, '\\')) {
-		cifs_dbg(VFS, "Malformed UNC. Unable to find share name.\n");
-		goto cifs_parse_mount_err;
-	}
-
-	if (!got_ip) {
-		int len;
-		const char *slash;
-
-		/* No ip= option specified? Try to get it from UNC */
-		/* Use the address part of the UNC. */
-		slash = strchr(&vol->UNC[2], '\\');
-		len = slash - &vol->UNC[2];
-		if (!cifs_convert_address(dstaddr, &vol->UNC[2], len)) {
-			pr_err("Unable to determine destination address\n");
-			goto cifs_parse_mount_err;
-		}
-	}
-
-	/* set the port that we got earlier */
-	cifs_set_port(dstaddr, port);
-
-	if (uid_specified)
-		vol->override_uid = override_uid;
-	else if (override_uid == 1)
-		pr_notice("ignoring forceuid mount option specified with no uid= option\n");
-
-	if (gid_specified)
-		vol->override_gid = override_gid;
-	else if (override_gid == 1)
-		pr_notice("ignoring forcegid mount option specified with no gid= option\n");
-
-	if (got_version == false)
-		pr_warn_once("No dialect specified on mount. Default has changed to a more secure dialect, SMB2.1 or later (e.g. SMB3.1.1), from CIFS (SMB1). To use the less secure SMB1 dialect to access old servers which do not support SMB3.1.1 (or even SMB3 or SMB2.1) specify vers=1.0 on mount.\n");
-
-	kfree(mountdata_copy);
-	return 0;
-
-out_nomem:
-	pr_warn("Could not allocate temporary buffer\n");
-cifs_parse_mount_err:
-	kfree(string);
-	kfree(mountdata_copy);
-	return 1;
-}
-
-/** Returns true if srcaddr isn't specified and rhs isn't
- * specified, or if srcaddr is specified and
- * matches the IP address of the rhs argument.
+/**
+ * Returns true if srcaddr isn't specified and rhs isn't specified, or
+ * if srcaddr is specified and matches the IP address of the rhs argument
  */
 bool
 cifs_match_ipaddr(struct sockaddr *srcaddr, struct sockaddr *rhs)
@@ -2377,14 +1153,14 @@ match_address(struct TCP_Server_Info *server, struct sockaddr *addr,
 }
 
 static bool
-match_security(struct TCP_Server_Info *server, struct smb_vol *vol)
+match_security(struct TCP_Server_Info *server, struct smb3_fs_context *ctx)
 {
 	/*
-	 * The select_sectype function should either return the vol->sectype
+	 * The select_sectype function should either return the ctx->sectype
 	 * that was specified, or "Unspecified" if that sectype was not
 	 * compatible with the given NEGOTIATE request.
 	 */
-	if (server->ops->select_sectype(server, vol->sectype)
+	if (server->ops->select_sectype(server, ctx->sectype)
 	     == Unspecified)
 		return false;
 
@@ -2393,60 +1169,60 @@ match_security(struct TCP_Server_Info *server, struct smb_vol *vol)
 	 * global_secflags at this point since if MUST_SIGN is set then
 	 * the server->sign had better be too.
 	 */
-	if (vol->sign && !server->sign)
+	if (ctx->sign && !server->sign)
 		return false;
 
 	return true;
 }
 
-static int match_server(struct TCP_Server_Info *server, struct smb_vol *vol)
+static int match_server(struct TCP_Server_Info *server, struct smb3_fs_context *ctx)
 {
-	struct sockaddr *addr = (struct sockaddr *)&vol->dstaddr;
+	struct sockaddr *addr = (struct sockaddr *)&ctx->dstaddr;
 
-	if (vol->nosharesock)
+	if (ctx->nosharesock)
 		return 0;
 
 	/* If multidialect negotiation see if existing sessions match one */
-	if (strcmp(vol->vals->version_string, SMB3ANY_VERSION_STRING) == 0) {
+	if (strcmp(ctx->vals->version_string, SMB3ANY_VERSION_STRING) == 0) {
 		if (server->vals->protocol_id < SMB30_PROT_ID)
 			return 0;
-	} else if (strcmp(vol->vals->version_string,
+	} else if (strcmp(ctx->vals->version_string,
 		   SMBDEFAULT_VERSION_STRING) == 0) {
 		if (server->vals->protocol_id < SMB21_PROT_ID)
 			return 0;
-	} else if ((server->vals != vol->vals) || (server->ops != vol->ops))
+	} else if ((server->vals != ctx->vals) || (server->ops != ctx->ops))
 		return 0;
 
 	if (!net_eq(cifs_net_ns(server), current->nsproxy->net_ns))
 		return 0;
 
 	if (!match_address(server, addr,
-			   (struct sockaddr *)&vol->srcaddr))
+			   (struct sockaddr *)&ctx->srcaddr))
 		return 0;
 
 	if (!match_port(server, addr))
 		return 0;
 
-	if (!match_security(server, vol))
+	if (!match_security(server, ctx))
 		return 0;
 
-	if (server->echo_interval != vol->echo_interval * HZ)
+	if (server->echo_interval != ctx->echo_interval * HZ)
 		return 0;
 
-	if (server->rdma != vol->rdma)
+	if (server->rdma != ctx->rdma)
 		return 0;
 
-	if (server->ignore_signature != vol->ignore_signature)
+	if (server->ignore_signature != ctx->ignore_signature)
 		return 0;
 
-	if (server->min_offload != vol->min_offload)
+	if (server->min_offload != ctx->min_offload)
 		return 0;
 
 	return 1;
 }
 
 struct TCP_Server_Info *
-cifs_find_tcp_session(struct smb_vol *vol)
+cifs_find_tcp_session(struct smb3_fs_context *ctx)
 {
 	struct TCP_Server_Info *server;
 
@@ -2456,7 +1232,7 @@ cifs_find_tcp_session(struct smb_vol *vol)
 		 * Skip ses channels since they're only handled in lower layers
 		 * (e.g. cifs_send_recv).
 		 */
-		if (server->is_channel || !match_server(server, vol))
+		if (server->is_channel || !match_server(server, ctx))
 			continue;
 
 		++server->srv_count;
@@ -2514,15 +1290,15 @@ cifs_put_tcp_session(struct TCP_Server_Info *server, int from_reconnect)
 }
 
 struct TCP_Server_Info *
-cifs_get_tcp_session(struct smb_vol *volume_info)
+cifs_get_tcp_session(struct smb3_fs_context *ctx)
 {
 	struct TCP_Server_Info *tcp_ses = NULL;
 	int rc;
 
-	cifs_dbg(FYI, "UNC: %s\n", volume_info->UNC);
+	cifs_dbg(FYI, "UNC: %s\n", ctx->UNC);
 
 	/* see if we already have a matching tcp_ses */
-	tcp_ses = cifs_find_tcp_session(volume_info);
+	tcp_ses = cifs_find_tcp_session(ctx);
 	if (tcp_ses)
 		return tcp_ses;
 
@@ -2532,20 +1308,20 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
 		goto out_err;
 	}
 
-	tcp_ses->ops = volume_info->ops;
-	tcp_ses->vals = volume_info->vals;
+	tcp_ses->ops = ctx->ops;
+	tcp_ses->vals = ctx->vals;
 	cifs_set_net_ns(tcp_ses, get_net(current->nsproxy->net_ns));
-	tcp_ses->hostname = extract_hostname(volume_info->UNC);
+	tcp_ses->hostname = extract_hostname(ctx->UNC);
 	if (IS_ERR(tcp_ses->hostname)) {
 		rc = PTR_ERR(tcp_ses->hostname);
 		goto out_err_crypto_release;
 	}
 
-	tcp_ses->noblockcnt = volume_info->rootfs;
-	tcp_ses->noblocksnd = volume_info->noblocksnd || volume_info->rootfs;
-	tcp_ses->noautotune = volume_info->noautotune;
-	tcp_ses->tcp_nodelay = volume_info->sockopt_tcp_nodelay;
-	tcp_ses->rdma = volume_info->rdma;
+	tcp_ses->noblockcnt = ctx->rootfs;
+	tcp_ses->noblocksnd = ctx->noblocksnd || ctx->rootfs;
+	tcp_ses->noautotune = ctx->noautotune;
+	tcp_ses->tcp_nodelay = ctx->sockopt_tcp_nodelay;
+	tcp_ses->rdma = ctx->rdma;
 	tcp_ses->in_flight = 0;
 	tcp_ses->max_in_flight = 0;
 	tcp_ses->credits = 1;
@@ -2554,26 +1330,26 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
 	INIT_LIST_HEAD(&tcp_ses->pending_mid_q);
 	mutex_init(&tcp_ses->srv_mutex);
 	memcpy(tcp_ses->workstation_RFC1001_name,
-		volume_info->source_rfc1001_name, RFC1001_NAME_LEN_WITH_NULL);
+		ctx->source_rfc1001_name, RFC1001_NAME_LEN_WITH_NULL);
 	memcpy(tcp_ses->server_RFC1001_name,
-		volume_info->target_rfc1001_name, RFC1001_NAME_LEN_WITH_NULL);
+		ctx->target_rfc1001_name, RFC1001_NAME_LEN_WITH_NULL);
 	tcp_ses->session_estab = false;
 	tcp_ses->sequence_number = 0;
 	tcp_ses->reconnect_instance = 1;
 	tcp_ses->lstrp = jiffies;
-	tcp_ses->compress_algorithm = cpu_to_le16(volume_info->compression);
+	tcp_ses->compress_algorithm = cpu_to_le16(ctx->compression);
 	spin_lock_init(&tcp_ses->req_lock);
 	INIT_LIST_HEAD(&tcp_ses->tcp_ses_list);
 	INIT_LIST_HEAD(&tcp_ses->smb_ses_list);
 	INIT_DELAYED_WORK(&tcp_ses->echo, cifs_echo_request);
 	INIT_DELAYED_WORK(&tcp_ses->reconnect, smb2_reconnect_server);
 	mutex_init(&tcp_ses->reconnect_mutex);
-	memcpy(&tcp_ses->srcaddr, &volume_info->srcaddr,
+	memcpy(&tcp_ses->srcaddr, &ctx->srcaddr,
 	       sizeof(tcp_ses->srcaddr));
-	memcpy(&tcp_ses->dstaddr, &volume_info->dstaddr,
+	memcpy(&tcp_ses->dstaddr, &ctx->dstaddr,
 		sizeof(tcp_ses->dstaddr));
-	if (volume_info->use_client_guid)
-		memcpy(tcp_ses->client_guid, volume_info->client_guid,
+	if (ctx->use_client_guid)
+		memcpy(tcp_ses->client_guid, ctx->client_guid,
 		       SMB2_CLIENT_GUID_SIZE);
 	else
 		generate_random_uuid(tcp_ses->client_guid);
@@ -2585,9 +1361,9 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
 	tcp_ses->tcpStatus = CifsNew;
 	++tcp_ses->srv_count;
 
-	if (volume_info->echo_interval >= SMB_ECHO_INTERVAL_MIN &&
-		volume_info->echo_interval <= SMB_ECHO_INTERVAL_MAX)
-		tcp_ses->echo_interval = volume_info->echo_interval * HZ;
+	if (ctx->echo_interval >= SMB_ECHO_INTERVAL_MIN &&
+		ctx->echo_interval <= SMB_ECHO_INTERVAL_MAX)
+		tcp_ses->echo_interval = ctx->echo_interval * HZ;
 	else
 		tcp_ses->echo_interval = SMB_ECHO_INTERVAL_DEFAULT * HZ;
 	if (tcp_ses->rdma) {
@@ -2597,7 +1373,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info)
 		goto out_err_crypto_release;
 #endif
 		tcp_ses->smbd_conn = smbd_get_connection(
-			tcp_ses, (struct sockaddr *)&volume_info->dstaddr);
+			tcp_ses, (struct sockaddr *)&ctx->dstaddr);
 		if (tcp_ses->smbd_conn) {
 			cifs_dbg(VFS, "RDMA transport established\n");
 			rc = 0;
@@ -2626,11 +1402,11 @@ smbd_connected:
 		module_put(THIS_MODULE);
 		goto out_err_crypto_release;
 	}
-	tcp_ses->min_offload = volume_info->min_offload;
+	tcp_ses->min_offload = ctx->min_offload;
 	tcp_ses->tcpStatus = CifsNeedNegotiate;
 
 	tcp_ses->nr_targets = 1;
-	tcp_ses->ignore_signature = volume_info->ignore_signature;
+	tcp_ses->ignore_signature = ctx->ignore_signature;
 	/* thread spawned, put it on the list */
 	spin_lock(&cifs_tcp_ses_lock);
 	list_add(&tcp_ses->tcp_ses_list, &cifs_tcp_ses_list);
@@ -2659,41 +1435,41 @@ out_err:
 	return ERR_PTR(rc);
 }
 
-static int match_session(struct cifs_ses *ses, struct smb_vol *vol)
+static int match_session(struct cifs_ses *ses, struct smb3_fs_context *ctx)
 {
-	if (vol->sectype != Unspecified &&
-	    vol->sectype != ses->sectype)
+	if (ctx->sectype != Unspecified &&
+	    ctx->sectype != ses->sectype)
 		return 0;
 
 	/*
 	 * If an existing session is limited to less channels than
 	 * requested, it should not be reused
 	 */
-	if (ses->chan_max < vol->max_channels)
+	if (ses->chan_max < ctx->max_channels)
 		return 0;
 
 	switch (ses->sectype) {
 	case Kerberos:
-		if (!uid_eq(vol->cred_uid, ses->cred_uid))
+		if (!uid_eq(ctx->cred_uid, ses->cred_uid))
 			return 0;
 		break;
 	default:
 		/* NULL username means anonymous session */
 		if (ses->user_name == NULL) {
-			if (!vol->nullauth)
+			if (!ctx->nullauth)
 				return 0;
 			break;
 		}
 
 		/* anything else takes username/password */
 		if (strncmp(ses->user_name,
-			    vol->username ? vol->username : "",
+			    ctx->username ? ctx->username : "",
 			    CIFS_MAX_USERNAME_LEN))
 			return 0;
-		if ((vol->username && strlen(vol->username) != 0) &&
+		if ((ctx->username && strlen(ctx->username) != 0) &&
 		    ses->password != NULL &&
 		    strncmp(ses->password,
-			    vol->password ? vol->password : "",
+			    ctx->password ? ctx->password : "",
 			    CIFS_MAX_PASSWORD_LEN))
 			return 0;
 	}
@@ -2707,11 +1483,10 @@ static int match_session(struct cifs_ses *ses, struct smb_vol *vol)
  * tcon_ipc. The IPC tcon has the same lifetime as the session.
  */
 static int
-cifs_setup_ipc(struct cifs_ses *ses, struct smb_vol *volume_info)
+cifs_setup_ipc(struct cifs_ses *ses, struct smb3_fs_context *ctx)
 {
 	int rc = 0, xid;
 	struct cifs_tcon *tcon;
-	struct nls_table *nls_codepage;
 	char unc[SERVER_NAME_LENGTH + sizeof("//x/IPC$")] = {0};
 	bool seal = false;
 	struct TCP_Server_Info *server = ses->server;
@@ -2720,7 +1495,7 @@ cifs_setup_ipc(struct cifs_ses *ses, struct smb_vol *volume_info)
 	 * If the mount request that resulted in the creation of the
 	 * session requires encryption, force IPC to be encrypted too.
 	 */
-	if (volume_info->seal) {
+	if (ctx->seal) {
 		if (server->capabilities & SMB2_GLOBAL_CAP_ENCRYPTION)
 			seal = true;
 		else {
@@ -2736,14 +1511,11 @@ cifs_setup_ipc(struct cifs_ses *ses, struct smb_vol *volume_info)
 
 	scnprintf(unc, sizeof(unc), "\\\\%s\\IPC$", server->hostname);
 
-	/* cannot fail */
-	nls_codepage = load_nls_default();
-
 	xid = get_xid();
 	tcon->ses = ses;
 	tcon->ipc = true;
 	tcon->seal = seal;
-	rc = server->ops->tree_connect(xid, ses, unc, tcon, nls_codepage);
+	rc = server->ops->tree_connect(xid, ses, unc, tcon, ctx->local_nls);
 	free_xid(xid);
 
 	if (rc) {
@@ -2756,7 +1528,6 @@ cifs_setup_ipc(struct cifs_ses *ses, struct smb_vol *volume_info)
 
 	ses->tcon_ipc = tcon;
 out:
-	unload_nls(nls_codepage);
 	return rc;
 }
 
@@ -2789,7 +1560,7 @@ cifs_free_ipc(struct cifs_ses *ses)
 }
 
 static struct cifs_ses *
-cifs_find_smb_ses(struct TCP_Server_Info *server, struct smb_vol *vol)
+cifs_find_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx)
 {
 	struct cifs_ses *ses;
 
@@ -2797,7 +1568,7 @@ cifs_find_smb_ses(struct TCP_Server_Info *server, struct smb_vol *vol)
 	list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) {
 		if (ses->status == CifsExiting)
 			continue;
-		if (!match_session(ses, vol))
+		if (!match_session(ses, ctx))
 			continue;
 		++ses->ses_count;
 		spin_unlock(&cifs_tcp_ses_lock);
@@ -2861,7 +1632,7 @@ void cifs_put_smb_ses(struct cifs_ses *ses)
 
 /* Populate username and pw fields from keyring if possible */
 static int
-cifs_set_cifscreds(struct smb_vol *vol, struct cifs_ses *ses)
+cifs_set_cifscreds(struct smb3_fs_context *ctx, struct cifs_ses *ses)
 {
 	int rc = 0;
 	int is_domain = 0;
@@ -2941,32 +1712,32 @@ cifs_set_cifscreds(struct smb_vol *vol, struct cifs_ses *ses)
 		goto out_key_put;
 	}
 
-	vol->username = kstrndup(payload, len, GFP_KERNEL);
-	if (!vol->username) {
+	ctx->username = kstrndup(payload, len, GFP_KERNEL);
+	if (!ctx->username) {
 		cifs_dbg(FYI, "Unable to allocate %zd bytes for username\n",
 			 len);
 		rc = -ENOMEM;
 		goto out_key_put;
 	}
-	cifs_dbg(FYI, "%s: username=%s\n", __func__, vol->username);
+	cifs_dbg(FYI, "%s: username=%s\n", __func__, ctx->username);
 
 	len = key->datalen - (len + 1);
 	if (len > CIFS_MAX_PASSWORD_LEN || len <= 0) {
 		cifs_dbg(FYI, "Bad len for password search (len=%zd)\n", len);
 		rc = -EINVAL;
-		kfree(vol->username);
-		vol->username = NULL;
+		kfree(ctx->username);
+		ctx->username = NULL;
 		goto out_key_put;
 	}
 
 	++delim;
-	vol->password = kstrndup(delim, len, GFP_KERNEL);
-	if (!vol->password) {
+	ctx->password = kstrndup(delim, len, GFP_KERNEL);
+	if (!ctx->password) {
 		cifs_dbg(FYI, "Unable to allocate %zd bytes for password\n",
 			 len);
 		rc = -ENOMEM;
-		kfree(vol->username);
-		vol->username = NULL;
+		kfree(ctx->username);
+		ctx->username = NULL;
 		goto out_key_put;
 	}
 
@@ -2975,17 +1746,17 @@ cifs_set_cifscreds(struct smb_vol *vol, struct cifs_ses *ses)
 	 * for the request.
 	 */
 	if (is_domain && ses->domainName) {
-		vol->domainname = kstrndup(ses->domainName,
+		ctx->domainname = kstrndup(ses->domainName,
 					   strlen(ses->domainName),
 					   GFP_KERNEL);
-		if (!vol->domainname) {
+		if (!ctx->domainname) {
 			cifs_dbg(FYI, "Unable to allocate %zd bytes for domain\n",
 				 len);
 			rc = -ENOMEM;
-			kfree(vol->username);
-			vol->username = NULL;
-			kfree_sensitive(vol->password);
-			vol->password = NULL;
+			kfree(ctx->username);
+			ctx->username = NULL;
+			kfree_sensitive(ctx->password);
+			ctx->password = NULL;
 			goto out_key_put;
 		}
 	}
@@ -3000,7 +1771,7 @@ out_err:
 }
 #else /* ! CONFIG_KEYS */
 static inline int
-cifs_set_cifscreds(struct smb_vol *vol __attribute__((unused)),
+cifs_set_cifscreds(struct smb3_fs_context *ctx __attribute__((unused)),
 		   struct cifs_ses *ses __attribute__((unused)))
 {
 	return -ENOSYS;
@@ -3008,14 +1779,14 @@ cifs_set_cifscreds(struct smb_vol *vol __attribute__((unused)),
 #endif /* CONFIG_KEYS */
 
 /**
- * cifs_get_smb_ses - get a session matching @volume_info data from @server
+ * cifs_get_smb_ses - get a session matching @ctx data from @server
  *
  * This function assumes it is being called from cifs_mount() where we
  * already got a server reference (server refcount +1). See
  * cifs_get_tcon() for refcount explanations.
  */
 struct cifs_ses *
-cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
+cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx)
 {
 	int rc = -ENOMEM;
 	unsigned int xid;
@@ -3025,7 +1796,7 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
 
 	xid = get_xid();
 
-	ses = cifs_find_smb_ses(server, volume_info);
+	ses = cifs_find_smb_ses(server, ctx);
 	if (ses) {
 		cifs_dbg(FYI, "Existing smb sess found (status=%d)\n",
 			 ses->status);
@@ -3042,7 +1813,7 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
 		if (ses->need_reconnect) {
 			cifs_dbg(FYI, "Session needs reconnect\n");
 			rc = cifs_setup_session(xid, ses,
-						volume_info->local_nls);
+						ctx->local_nls);
 			if (rc) {
 				mutex_unlock(&ses->session_mutex);
 				/* problem -- put our reference */
@@ -3071,40 +1842,40 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
 	else
 		sprintf(ses->serverName, "%pI4", &addr->sin_addr);
 
-	if (volume_info->username) {
-		ses->user_name = kstrdup(volume_info->username, GFP_KERNEL);
+	if (ctx->username) {
+		ses->user_name = kstrdup(ctx->username, GFP_KERNEL);
 		if (!ses->user_name)
 			goto get_ses_fail;
 	}
 
-	/* volume_info->password freed at unmount */
-	if (volume_info->password) {
-		ses->password = kstrdup(volume_info->password, GFP_KERNEL);
+	/* ctx->password freed at unmount */
+	if (ctx->password) {
+		ses->password = kstrdup(ctx->password, GFP_KERNEL);
 		if (!ses->password)
 			goto get_ses_fail;
 	}
-	if (volume_info->domainname) {
-		ses->domainName = kstrdup(volume_info->domainname, GFP_KERNEL);
+	if (ctx->domainname) {
+		ses->domainName = kstrdup(ctx->domainname, GFP_KERNEL);
 		if (!ses->domainName)
 			goto get_ses_fail;
 	}
-	if (volume_info->domainauto)
-		ses->domainAuto = volume_info->domainauto;
-	ses->cred_uid = volume_info->cred_uid;
-	ses->linux_uid = volume_info->linux_uid;
+	if (ctx->domainauto)
+		ses->domainAuto = ctx->domainauto;
+	ses->cred_uid = ctx->cred_uid;
+	ses->linux_uid = ctx->linux_uid;
 
-	ses->sectype = volume_info->sectype;
-	ses->sign = volume_info->sign;
+	ses->sectype = ctx->sectype;
+	ses->sign = ctx->sign;
 	mutex_lock(&ses->session_mutex);
 
 	/* add server as first channel */
 	ses->chans[0].server = server;
 	ses->chan_count = 1;
-	ses->chan_max = volume_info->multichannel ? volume_info->max_channels:1;
+	ses->chan_max = ctx->multichannel ? ctx->max_channels:1;
 
 	rc = cifs_negotiate_protocol(xid, ses);
 	if (!rc)
-		rc = cifs_setup_session(xid, ses, volume_info->local_nls);
+		rc = cifs_setup_session(xid, ses, ctx->local_nls);
 
 	/* each channel uses a different signing key */
 	memcpy(ses->chans[0].signkey, ses->smb3signingkey,
@@ -3121,7 +1892,7 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb_vol *volume_info)
 
 	free_xid(xid);
 
-	cifs_setup_ipc(ses, volume_info);
+	cifs_setup_ipc(ses, ctx);
 
 	return ses;
 
@@ -3131,27 +1902,27 @@ get_ses_fail:
 	return ERR_PTR(rc);
 }
 
-static int match_tcon(struct cifs_tcon *tcon, struct smb_vol *volume_info)
+static int match_tcon(struct cifs_tcon *tcon, struct smb3_fs_context *ctx)
 {
 	if (tcon->tidStatus == CifsExiting)
 		return 0;
-	if (strncmp(tcon->treeName, volume_info->UNC, MAX_TREE_SIZE))
+	if (strncmp(tcon->treeName, ctx->UNC, MAX_TREE_SIZE))
 		return 0;
-	if (tcon->seal != volume_info->seal)
+	if (tcon->seal != ctx->seal)
 		return 0;
-	if (tcon->snapshot_time != volume_info->snapshot_time)
+	if (tcon->snapshot_time != ctx->snapshot_time)
 		return 0;
-	if (tcon->handle_timeout != volume_info->handle_timeout)
+	if (tcon->handle_timeout != ctx->handle_timeout)
 		return 0;
-	if (tcon->no_lease != volume_info->no_lease)
+	if (tcon->no_lease != ctx->no_lease)
 		return 0;
-	if (tcon->nodelete != volume_info->nodelete)
+	if (tcon->nodelete != ctx->nodelete)
 		return 0;
 	return 1;
 }
 
 static struct cifs_tcon *
-cifs_find_tcon(struct cifs_ses *ses, struct smb_vol *volume_info)
+cifs_find_tcon(struct cifs_ses *ses, struct smb3_fs_context *ctx)
 {
 	struct list_head *tmp;
 	struct cifs_tcon *tcon;
@@ -3163,7 +1934,7 @@ cifs_find_tcon(struct cifs_ses *ses, struct smb_vol *volume_info)
 		if (tcon->dfs_path)
 			continue;
 #endif
-		if (!match_tcon(tcon, volume_info))
+		if (!match_tcon(tcon, ctx))
 			continue;
 		++tcon->tc_count;
 		spin_unlock(&cifs_tcp_ses_lock);
@@ -3194,6 +1965,18 @@ cifs_put_tcon(struct cifs_tcon *tcon)
 		return;
 	}
 
+#ifdef CONFIG_CIFS_SWN_UPCALL
+	if (tcon->use_witness) {
+		int rc;
+
+		rc = cifs_swn_unregister(tcon);
+		if (rc < 0) {
+			cifs_dbg(VFS, "%s: Failed to unregister for witness notifications: %d\n",
+					__func__, rc);
+		}
+	}
+#endif
+
 	list_del_init(&tcon->tcon_list);
 	spin_unlock(&cifs_tcp_ses_lock);
 
@@ -3208,7 +1991,7 @@ cifs_put_tcon(struct cifs_tcon *tcon)
 }
 
 /**
- * cifs_get_tcon - get a tcon matching @volume_info data from @ses
+ * cifs_get_tcon - get a tcon matching @ctx data from @ses
  *
  * - tcon refcount is the number of mount points using the tcon.
  * - ses refcount is the number of tcon using the session.
@@ -3228,12 +2011,12 @@ cifs_put_tcon(struct cifs_tcon *tcon)
  *    decrement the ses refcount.
  */
 static struct cifs_tcon *
-cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info)
+cifs_get_tcon(struct cifs_ses *ses, struct smb3_fs_context *ctx)
 {
 	int rc, xid;
 	struct cifs_tcon *tcon;
 
-	tcon = cifs_find_tcon(ses, volume_info);
+	tcon = cifs_find_tcon(ses, ctx);
 	if (tcon) {
 		/*
 		 * tcon has refcount already incremented but we need to
@@ -3255,36 +2038,36 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info)
 		goto out_fail;
 	}
 
-	if (volume_info->snapshot_time) {
+	if (ctx->snapshot_time) {
 		if (ses->server->vals->protocol_id == 0) {
 			cifs_dbg(VFS,
 			     "Use SMB2 or later for snapshot mount option\n");
 			rc = -EOPNOTSUPP;
 			goto out_fail;
 		} else
-			tcon->snapshot_time = volume_info->snapshot_time;
+			tcon->snapshot_time = ctx->snapshot_time;
 	}
 
-	if (volume_info->handle_timeout) {
+	if (ctx->handle_timeout) {
 		if (ses->server->vals->protocol_id == 0) {
 			cifs_dbg(VFS,
 			     "Use SMB2.1 or later for handle timeout option\n");
 			rc = -EOPNOTSUPP;
 			goto out_fail;
 		} else
-			tcon->handle_timeout = volume_info->handle_timeout;
+			tcon->handle_timeout = ctx->handle_timeout;
 	}
 
 	tcon->ses = ses;
-	if (volume_info->password) {
-		tcon->password = kstrdup(volume_info->password, GFP_KERNEL);
+	if (ctx->password) {
+		tcon->password = kstrdup(ctx->password, GFP_KERNEL);
 		if (!tcon->password) {
 			rc = -ENOMEM;
 			goto out_fail;
 		}
 	}
 
-	if (volume_info->seal) {
+	if (ctx->seal) {
 		if (ses->server->vals->protocol_id == 0) {
 			cifs_dbg(VFS,
 				 "SMB3 or later required for encryption\n");
@@ -3300,7 +2083,7 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info)
 		}
 	}
 
-	if (volume_info->linux_ext) {
+	if (ctx->linux_ext) {
 		if (ses->server->posix_ext_supported) {
 			tcon->posix_extensions = true;
 			pr_warn_once("SMB3.11 POSIX Extensions are experimental\n");
@@ -3316,8 +2099,8 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info)
 	 * SetFS as we do on SessSetup and reconnect?
 	 */
 	xid = get_xid();
-	rc = ses->server->ops->tree_connect(xid, ses, volume_info->UNC, tcon,
-					    volume_info->local_nls);
+	rc = ses->server->ops->tree_connect(xid, ses, ctx->UNC, tcon,
+					    ctx->local_nls);
 	free_xid(xid);
 	cifs_dbg(FYI, "Tcon rc = %d\n", rc);
 	if (rc)
@@ -3325,7 +2108,7 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info)
 
 	tcon->use_persistent = false;
 	/* check if SMB2 or later, CIFS does not support persistent handles */
-	if (volume_info->persistent) {
+	if (ctx->persistent) {
 		if (ses->server->vals->protocol_id == 0) {
 			cifs_dbg(VFS,
 			     "SMB3 or later required for persistent handles\n");
@@ -3342,10 +2125,10 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info)
 		}
 	} else if ((tcon->capabilities & SMB2_SHARE_CAP_CONTINUOUS_AVAILABILITY)
 	     && (ses->server->capabilities & SMB2_GLOBAL_CAP_PERSISTENT_HANDLES)
-	     && (volume_info->nopersistent == false)) {
+	     && (ctx->nopersistent == false)) {
 		cifs_dbg(FYI, "enabling persistent handles\n");
 		tcon->use_persistent = true;
-	} else if (volume_info->resilient) {
+	} else if (ctx->resilient) {
 		if (ses->server->vals->protocol_id == 0) {
 			cifs_dbg(VFS,
 			     "SMB2.1 or later required for resilient handles\n");
@@ -3354,23 +2137,52 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info)
 		}
 		tcon->use_resilient = true;
 	}
+#ifdef CONFIG_CIFS_SWN_UPCALL
+	tcon->use_witness = false;
+	if (ctx->witness) {
+		if (ses->server->vals->protocol_id >= SMB30_PROT_ID) {
+			if (tcon->capabilities & SMB2_SHARE_CAP_CLUSTER) {
+				/*
+				 * Set witness in use flag in first place
+				 * to retry registration in the echo task
+				 */
+				tcon->use_witness = true;
+				/* And try to register immediately */
+				rc = cifs_swn_register(tcon);
+				if (rc < 0) {
+					cifs_dbg(VFS, "Failed to register for witness notifications: %d\n", rc);
+					goto out_fail;
+				}
+			} else {
+				/* TODO: try to extend for non-cluster uses (eg multichannel) */
+				cifs_dbg(VFS, "witness requested on mount but no CLUSTER capability on share\n");
+				rc = -EOPNOTSUPP;
+				goto out_fail;
+			}
+		} else {
+			cifs_dbg(VFS, "SMB3 or later required for witness option\n");
+			rc = -EOPNOTSUPP;
+			goto out_fail;
+		}
+	}
+#endif
 
 	/* If the user really knows what they are doing they can override */
 	if (tcon->share_flags & SMB2_SHAREFLAG_NO_CACHING) {
-		if (volume_info->cache_ro)
+		if (ctx->cache_ro)
 			cifs_dbg(VFS, "cache=ro requested on mount but NO_CACHING flag set on share\n");
-		else if (volume_info->cache_rw)
+		else if (ctx->cache_rw)
 			cifs_dbg(VFS, "cache=singleclient requested on mount but NO_CACHING flag set on share\n");
 	}
 
-	if (volume_info->no_lease) {
+	if (ctx->no_lease) {
 		if (ses->server->vals->protocol_id == 0) {
 			cifs_dbg(VFS,
 				"SMB2 or later required for nolease option\n");
 			rc = -EOPNOTSUPP;
 			goto out_fail;
 		} else
-			tcon->no_lease = volume_info->no_lease;
+			tcon->no_lease = ctx->no_lease;
 	}
 
 	/*
@@ -3378,14 +2190,14 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb_vol *volume_info)
 	 * resources mounted more than once to the same server share the last
 	 * value passed in for the retry flag is used.
 	 */
-	tcon->retry = volume_info->retry;
-	tcon->nocase = volume_info->nocase;
+	tcon->retry = ctx->retry;
+	tcon->nocase = ctx->nocase;
 	if (ses->server->capabilities & SMB2_GLOBAL_CAP_DIRECTORY_LEASING)
-		tcon->nohandlecache = volume_info->nohandlecache;
+		tcon->nohandlecache = ctx->nohandlecache;
 	else
 		tcon->nohandlecache = 1;
-	tcon->nodelete = volume_info->nodelete;
-	tcon->local_lease = volume_info->local_lease;
+	tcon->nodelete = ctx->nodelete;
+	tcon->local_lease = ctx->local_lease;
 	INIT_LIST_HEAD(&tcon->pending_opens);
 
 	spin_lock(&cifs_tcp_ses_lock);
@@ -3440,23 +2252,24 @@ compare_mount_options(struct super_block *sb, struct cifs_mnt_data *mnt_data)
 	 * We want to share sb only if we don't specify an r/wsize or
 	 * specified r/wsize is greater than or equal to existing one.
 	 */
-	if (new->wsize && new->wsize < old->wsize)
+	if (new->ctx->wsize && new->ctx->wsize < old->ctx->wsize)
 		return 0;
 
-	if (new->rsize && new->rsize < old->rsize)
+	if (new->ctx->rsize && new->ctx->rsize < old->ctx->rsize)
 		return 0;
 
-	if (!uid_eq(old->mnt_uid, new->mnt_uid) || !gid_eq(old->mnt_gid, new->mnt_gid))
+	if (!uid_eq(old->ctx->linux_uid, new->ctx->linux_uid) ||
+	    !gid_eq(old->ctx->linux_gid, new->ctx->linux_gid))
 		return 0;
 
-	if (old->mnt_file_mode != new->mnt_file_mode ||
-	    old->mnt_dir_mode != new->mnt_dir_mode)
+	if (old->ctx->file_mode != new->ctx->file_mode ||
+	    old->ctx->dir_mode != new->ctx->dir_mode)
 		return 0;
 
 	if (strcmp(old->local_nls->charset, new->local_nls->charset))
 		return 0;
 
-	if (old->actimeo != new->actimeo)
+	if (old->ctx->actimeo != new->ctx->actimeo)
 		return 0;
 
 	return 1;
@@ -3484,7 +2297,7 @@ int
 cifs_match_super(struct super_block *sb, void *data)
 {
 	struct cifs_mnt_data *mnt_data = (struct cifs_mnt_data *)data;
-	struct smb_vol *volume_info;
+	struct smb3_fs_context *ctx;
 	struct cifs_sb_info *cifs_sb;
 	struct TCP_Server_Info *tcp_srv;
 	struct cifs_ses *ses;
@@ -3503,11 +2316,11 @@ cifs_match_super(struct super_block *sb, void *data)
 	ses = tcon->ses;
 	tcp_srv = ses->server;
 
-	volume_info = mnt_data->vol;
+	ctx = mnt_data->ctx;
 
-	if (!match_server(tcp_srv, volume_info) ||
-	    !match_session(ses, volume_info) ||
-	    !match_tcon(tcon, volume_info) ||
+	if (!match_server(tcp_srv, ctx) ||
+	    !match_session(ses, ctx) ||
+	    !match_tcon(tcon, ctx) ||
 	    !match_prepath(sb, mnt_data)) {
 		rc = 0;
 		goto out;
@@ -3792,9 +2605,10 @@ ip_connect(struct TCP_Server_Info *server)
 }
 
 void reset_cifs_unix_caps(unsigned int xid, struct cifs_tcon *tcon,
-			  struct cifs_sb_info *cifs_sb, struct smb_vol *vol_info)
+			  struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx)
 {
-	/* if we are reconnecting then should we check to see if
+	/*
+	 * If we are reconnecting then should we check to see if
 	 * any requested capabilities changed locally e.g. via
 	 * remount but we can not do much about it here
 	 * if they have (even if we could detect it by the following)
@@ -3802,15 +2616,16 @@ void reset_cifs_unix_caps(unsigned int xid, struct cifs_tcon *tcon,
 	 * or if we change to make all sb to same share the same
 	 * sb as NFS - then we only have one backpointer to sb.
 	 * What if we wanted to mount the server share twice once with
-	 * and once without posixacls or posix paths? */
+	 * and once without posixacls or posix paths?
+	 */
 	__u64 saved_cap = le64_to_cpu(tcon->fsUnixInfo.Capability);
 
-	if (vol_info && vol_info->no_linux_ext) {
+	if (ctx && ctx->no_linux_ext) {
 		tcon->fsUnixInfo.Capability = 0;
 		tcon->unix_ext = 0; /* Unix Extensions disabled */
 		cifs_dbg(FYI, "Linux protocol extensions disabled\n");
 		return;
-	} else if (vol_info)
+	} else if (ctx)
 		tcon->unix_ext = 1; /* Unix Extensions supported */
 
 	if (tcon->unix_ext == 0) {
@@ -3821,11 +2636,15 @@ void reset_cifs_unix_caps(unsigned int xid, struct cifs_tcon *tcon,
 	if (!CIFSSMBQFSUnixInfo(xid, tcon)) {
 		__u64 cap = le64_to_cpu(tcon->fsUnixInfo.Capability);
 		cifs_dbg(FYI, "unix caps which server supports %lld\n", cap);
-		/* check for reconnect case in which we do not
-		   want to change the mount behavior if we can avoid it */
-		if (vol_info == NULL) {
-			/* turn off POSIX ACL and PATHNAMES if not set
-			   originally at mount time */
+		/*
+		 * check for reconnect case in which we do not
+		 * want to change the mount behavior if we can avoid it
+		 */
+		if (ctx == NULL) {
+			/*
+			 * turn off POSIX ACL and PATHNAMES if not set
+			 * originally at mount time
+			 */
 			if ((saved_cap & CIFS_UNIX_POSIX_ACL_CAP) == 0)
 				cap &= ~CIFS_UNIX_POSIX_ACL_CAP;
 			if ((saved_cap & CIFS_UNIX_POSIX_PATHNAMES_CAP) == 0) {
@@ -3842,7 +2661,7 @@ void reset_cifs_unix_caps(unsigned int xid, struct cifs_tcon *tcon,
 			cifs_dbg(VFS, "per-share encryption not supported yet\n");
 
 		cap &= CIFS_UNIX_CAP_MASK;
-		if (vol_info && vol_info->no_psx_acl)
+		if (ctx && ctx->no_psx_acl)
 			cap &= ~CIFS_UNIX_POSIX_ACL_CAP;
 		else if (CIFS_UNIX_POSIX_ACL_CAP & cap) {
 			cifs_dbg(FYI, "negotiated posix acl support\n");
@@ -3851,7 +2670,7 @@ void reset_cifs_unix_caps(unsigned int xid, struct cifs_tcon *tcon,
 					CIFS_MOUNT_POSIXACL;
 		}
 
-		if (vol_info && vol_info->posix_paths == 0)
+		if (ctx && ctx->posix_paths == 0)
 			cap &= ~CIFS_UNIX_POSIX_PATHNAMES_CAP;
 		else if (cap & CIFS_UNIX_POSIX_PATHNAMES_CAP) {
 			cifs_dbg(FYI, "negotiate posix pathnames\n");
@@ -3882,129 +2701,59 @@ void reset_cifs_unix_caps(unsigned int xid, struct cifs_tcon *tcon,
 			cifs_dbg(FYI, "mandatory transport encryption cap\n");
 #endif /* CIFS_DEBUG2 */
 		if (CIFSSMBSetFSUnixInfo(xid, tcon, cap)) {
-			if (vol_info == NULL) {
+			if (ctx == NULL)
 				cifs_dbg(FYI, "resetting capabilities failed\n");
-			} else
+			else
 				cifs_dbg(VFS, "Negotiating Unix capabilities with the server failed. Consider mounting with the Unix Extensions disabled if problems are found by specifying the nounix mount option.\n");
 
 		}
 	}
 }
 
-int cifs_setup_cifs_sb(struct smb_vol *pvolume_info,
-			struct cifs_sb_info *cifs_sb)
+int cifs_setup_cifs_sb(struct cifs_sb_info *cifs_sb)
 {
+	struct smb3_fs_context *ctx = cifs_sb->ctx;
+
 	INIT_DELAYED_WORK(&cifs_sb->prune_tlinks, cifs_prune_tlinks);
 
 	spin_lock_init(&cifs_sb->tlink_tree_lock);
 	cifs_sb->tlink_tree = RB_ROOT;
 
-	cifs_sb->bsize = pvolume_info->bsize;
-	/*
-	 * Temporarily set r/wsize for matching superblock. If we end up using
-	 * new sb then client will later negotiate it downward if needed.
-	 */
-	cifs_sb->rsize = pvolume_info->rsize;
-	cifs_sb->wsize = pvolume_info->wsize;
-
-	cifs_sb->mnt_uid = pvolume_info->linux_uid;
-	cifs_sb->mnt_gid = pvolume_info->linux_gid;
-	cifs_sb->mnt_file_mode = pvolume_info->file_mode;
-	cifs_sb->mnt_dir_mode = pvolume_info->dir_mode;
 	cifs_dbg(FYI, "file mode: %04ho  dir mode: %04ho\n",
-		 cifs_sb->mnt_file_mode, cifs_sb->mnt_dir_mode);
-
-	cifs_sb->actimeo = pvolume_info->actimeo;
-	cifs_sb->local_nls = pvolume_info->local_nls;
-
-	if (pvolume_info->nodfs)
-		cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_DFS;
-	if (pvolume_info->noperm)
-		cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_PERM;
-	if (pvolume_info->setuids)
-		cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_SET_UID;
-	if (pvolume_info->setuidfromacl)
-		cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_UID_FROM_ACL;
-	if (pvolume_info->server_ino)
-		cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_SERVER_INUM;
-	if (pvolume_info->remap)
-		cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_MAP_SFM_CHR;
-	if (pvolume_info->sfu_remap)
-		cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_MAP_SPECIAL_CHR;
-	if (pvolume_info->no_xattr)
-		cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_XATTR;
-	if (pvolume_info->sfu_emul)
-		cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_UNX_EMUL;
-	if (pvolume_info->nobrl)
-		cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_BRL;
-	if (pvolume_info->nohandlecache)
-		cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_HANDLE_CACHE;
-	if (pvolume_info->nostrictsync)
-		cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NOSSYNC;
-	if (pvolume_info->mand_lock)
-		cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NOPOSIXBRL;
-	if (pvolume_info->rwpidforward)
-		cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_RWPIDFORWARD;
-	if (pvolume_info->mode_ace)
-		cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_MODE_FROM_SID;
-	if (pvolume_info->cifs_acl)
-		cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_CIFS_ACL;
-	if (pvolume_info->backupuid_specified) {
-		cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_CIFS_BACKUPUID;
-		cifs_sb->mnt_backupuid = pvolume_info->backupuid;
-	}
-	if (pvolume_info->backupgid_specified) {
-		cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_CIFS_BACKUPGID;
-		cifs_sb->mnt_backupgid = pvolume_info->backupgid;
-	}
-	if (pvolume_info->override_uid)
-		cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_OVERR_UID;
-	if (pvolume_info->override_gid)
-		cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_OVERR_GID;
-	if (pvolume_info->dynperm)
-		cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DYNPERM;
-	if (pvolume_info->fsc)
-		cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_FSCACHE;
-	if (pvolume_info->multiuser)
-		cifs_sb->mnt_cifs_flags |= (CIFS_MOUNT_MULTIUSER |
-					    CIFS_MOUNT_NO_PERM);
-	if (pvolume_info->strict_io)
-		cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_STRICT_IO;
-	if (pvolume_info->direct_io) {
-		cifs_dbg(FYI, "mounting share using direct i/o\n");
-		cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DIRECT_IO;
+		 ctx->file_mode, ctx->dir_mode);
+
+	/* this is needed for ASCII cp to Unicode converts */
+	if (ctx->iocharset == NULL) {
+		/* load_nls_default cannot return null */
+		cifs_sb->local_nls = load_nls_default();
+	} else {
+		cifs_sb->local_nls = load_nls(ctx->iocharset);
+		if (cifs_sb->local_nls == NULL) {
+			cifs_dbg(VFS, "CIFS mount error: iocharset %s not found\n",
+				 ctx->iocharset);
+			return -ELIBACC;
+		}
 	}
-	if (pvolume_info->cache_ro) {
+	ctx->local_nls = cifs_sb->local_nls;
+
+	smb3_update_mnt_flags(cifs_sb);
+
+	if (ctx->direct_io)
+		cifs_dbg(FYI, "mounting share using direct i/o\n");
+	if (ctx->cache_ro) {
 		cifs_dbg(VFS, "mounting share with read only caching. Ensure that the share will not be modified while in use.\n");
 		cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_RO_CACHE;
-	} else if (pvolume_info->cache_rw) {
+	} else if (ctx->cache_rw) {
 		cifs_dbg(VFS, "mounting share in single client RW caching mode. Ensure that no other systems will be accessing the share.\n");
 		cifs_sb->mnt_cifs_flags |= (CIFS_MOUNT_RO_CACHE |
 					    CIFS_MOUNT_RW_CACHE);
 	}
-	if (pvolume_info->mfsymlinks) {
-		if (pvolume_info->sfu_emul) {
-			/*
-			 * Our SFU ("Services for Unix" emulation does not allow
-			 * creating symlinks but does allow reading existing SFU
-			 * symlinks (it does allow both creating and reading SFU
-			 * style mknod and FIFOs though). When "mfsymlinks" and
-			 * "sfu" are both enabled at the same time, it allows
-			 * reading both types of symlinks, but will only create
-			 * them with mfsymlinks format. This allows better
-			 * Apple compatibility (probably better for Samba too)
-			 * while still recognizing old Windows style symlinks.
-			 */
-			cifs_dbg(VFS, "mount options mfsymlinks and sfu both enabled\n");
-		}
-		cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_MF_SYMLINKS;
-	}
 
-	if ((pvolume_info->cifs_acl) && (pvolume_info->dynperm))
+	if ((ctx->cifs_acl) && (ctx->dynperm))
 		cifs_dbg(VFS, "mount option dynperm ignored if cifsacl mount option supported\n");
 
-	if (pvolume_info->prepath) {
-		cifs_sb->prepath = kstrdup(pvolume_info->prepath, GFP_KERNEL);
+	if (ctx->prepath) {
+		cifs_sb->prepath = kstrdup(ctx->prepath, GFP_KERNEL);
 		if (cifs_sb->prepath == NULL)
 			return -ENOMEM;
 	}
@@ -4012,26 +2761,6 @@ int cifs_setup_cifs_sb(struct smb_vol *pvolume_info,
 	return 0;
 }
 
-void
-cifs_cleanup_volume_info_contents(struct smb_vol *volume_info)
-{
-	kfree(volume_info->username);
-	kfree_sensitive(volume_info->password);
-	kfree(volume_info->UNC);
-	kfree(volume_info->domainname);
-	kfree(volume_info->iocharset);
-	kfree(volume_info->prepath);
-}
-
-void
-cifs_cleanup_volume_info(struct smb_vol *volume_info)
-{
-	if (!volume_info)
-		return;
-	cifs_cleanup_volume_info_contents(volume_info);
-	kfree(volume_info);
-}
-
 /* Release all succeed connections */
 static inline void mount_put_conns(struct cifs_sb_info *cifs_sb,
 				   unsigned int xid,
@@ -4051,7 +2780,7 @@ static inline void mount_put_conns(struct cifs_sb_info *cifs_sb,
 }
 
 /* Get connections for tcp, ses and tcon */
-static int mount_get_conns(struct smb_vol *vol, struct cifs_sb_info *cifs_sb,
+static int mount_get_conns(struct smb3_fs_context *ctx, struct cifs_sb_info *cifs_sb,
 			   unsigned int *xid,
 			   struct TCP_Server_Info **nserver,
 			   struct cifs_ses **nses, struct cifs_tcon **ntcon)
@@ -4068,7 +2797,7 @@ static int mount_get_conns(struct smb_vol *vol, struct cifs_sb_info *cifs_sb,
 	*xid = get_xid();
 
 	/* get a reference to a tcp session */
-	server = cifs_get_tcp_session(vol);
+	server = cifs_get_tcp_session(ctx);
 	if (IS_ERR(server)) {
 		rc = PTR_ERR(server);
 		return rc;
@@ -4076,13 +2805,13 @@ static int mount_get_conns(struct smb_vol *vol, struct cifs_sb_info *cifs_sb,
 
 	*nserver = server;
 
-	if ((vol->max_credits < 20) || (vol->max_credits > 60000))
+	if ((ctx->max_credits < 20) || (ctx->max_credits > 60000))
 		server->max_credits = SMB2_MAX_CREDITS_AVAILABLE;
 	else
-		server->max_credits = vol->max_credits;
+		server->max_credits = ctx->max_credits;
 
 	/* get a reference to a SMB session */
-	ses = cifs_get_smb_ses(server, vol);
+	ses = cifs_get_smb_ses(server, ctx);
 	if (IS_ERR(ses)) {
 		rc = PTR_ERR(ses);
 		return rc;
@@ -4090,14 +2819,14 @@ static int mount_get_conns(struct smb_vol *vol, struct cifs_sb_info *cifs_sb,
 
 	*nses = ses;
 
-	if ((vol->persistent == true) && (!(ses->server->capabilities &
+	if ((ctx->persistent == true) && (!(ses->server->capabilities &
 					    SMB2_GLOBAL_CAP_PERSISTENT_HANDLES))) {
 		cifs_server_dbg(VFS, "persistent handles not supported by server\n");
 		return -EOPNOTSUPP;
 	}
 
 	/* search for existing tcon to this server share */
-	tcon = cifs_get_tcon(ses, vol);
+	tcon = cifs_get_tcon(ses, ctx);
 	if (IS_ERR(tcon)) {
 		rc = PTR_ERR(tcon);
 		return rc;
@@ -4115,7 +2844,7 @@ static int mount_get_conns(struct smb_vol *vol, struct cifs_sb_info *cifs_sb,
 		 * reset of caps checks mount to see if unix extensions disabled
 		 * for just this mount.
 		 */
-		reset_cifs_unix_caps(*xid, tcon, cifs_sb, vol);
+		reset_cifs_unix_caps(*xid, tcon, cifs_sb, ctx);
 		if ((tcon->ses->server->tcpStatus == CifsNeedReconnect) &&
 		    (le64_to_cpu(tcon->fsUnixInfo.Capability) &
 		     CIFS_UNIX_TRANSPORT_ENCRYPTION_MANDATORY_CAP))
@@ -4137,8 +2866,17 @@ static int mount_get_conns(struct smb_vol *vol, struct cifs_sb_info *cifs_sb,
 		}
 	}
 
-	cifs_sb->wsize = server->ops->negotiate_wsize(tcon, vol);
-	cifs_sb->rsize = server->ops->negotiate_rsize(tcon, vol);
+	/*
+	 * Clamp the rsize/wsize mount arguments if they are too big for the server
+	 * and set the rsize/wsize to the negotiated values if not passed in by
+	 * the user on mount
+	 */
+	if ((cifs_sb->ctx->wsize == 0) ||
+	    (cifs_sb->ctx->wsize > server->ops->negotiate_wsize(tcon, ctx)))
+		cifs_sb->ctx->wsize = server->ops->negotiate_wsize(tcon, ctx);
+	if ((cifs_sb->ctx->rsize == 0) ||
+	    (cifs_sb->ctx->rsize > server->ops->negotiate_rsize(tcon, ctx)))
+		cifs_sb->ctx->rsize = server->ops->negotiate_rsize(tcon, ctx);
 
 	return 0;
 }
@@ -4175,13 +2913,13 @@ static int mount_setup_tlink(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses,
  * exiting connection (tcon)
  */
 static char *
-build_unc_path_to_root(const struct smb_vol *vol,
+build_unc_path_to_root(const struct smb3_fs_context *ctx,
 		       const struct cifs_sb_info *cifs_sb, bool useppath)
 {
 	char *full_path, *pos;
-	unsigned int pplen = useppath && vol->prepath ?
-		strlen(vol->prepath) + 1 : 0;
-	unsigned int unc_len = strnlen(vol->UNC, MAX_TREE_SIZE + 1);
+	unsigned int pplen = useppath && ctx->prepath ?
+		strlen(ctx->prepath) + 1 : 0;
+	unsigned int unc_len = strnlen(ctx->UNC, MAX_TREE_SIZE + 1);
 
 	if (unc_len > MAX_TREE_SIZE)
 		return ERR_PTR(-EINVAL);
@@ -4190,12 +2928,12 @@ build_unc_path_to_root(const struct smb_vol *vol,
 	if (full_path == NULL)
 		return ERR_PTR(-ENOMEM);
 
-	memcpy(full_path, vol->UNC, unc_len);
+	memcpy(full_path, ctx->UNC, unc_len);
 	pos = full_path + unc_len;
 
 	if (pplen) {
 		*pos = CIFS_DIR_SEP(cifs_sb);
-		memcpy(pos + 1, vol->prepath, pplen);
+		memcpy(pos + 1, ctx->prepath, pplen);
 		pos += pplen;
 	}
 
@@ -4208,8 +2946,7 @@ build_unc_path_to_root(const struct smb_vol *vol,
 /**
  * expand_dfs_referral - Perform a dfs referral query and update the cifs_sb
  *
- *
- * If a referral is found, cifs_sb->mountdata will be (re-)allocated
+ * If a referral is found, cifs_sb->ctx->mount_options will be (re-)allocated
  * to a string containing updated options for the submount.  Otherwise it
  * will be left untouched.
  *
@@ -4218,7 +2955,7 @@ build_unc_path_to_root(const struct smb_vol *vol,
  */
 static int
 expand_dfs_referral(const unsigned int xid, struct cifs_ses *ses,
-		    struct smb_vol *volume_info, struct cifs_sb_info *cifs_sb,
+		    struct smb3_fs_context *ctx, struct cifs_sb_info *cifs_sb,
 		    char *ref_path)
 {
 	int rc;
@@ -4228,31 +2965,26 @@ expand_dfs_referral(const unsigned int xid, struct cifs_ses *ses,
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_DFS)
 		return -EREMOTE;
 
-	full_path = build_unc_path_to_root(volume_info, cifs_sb, true);
+	full_path = build_unc_path_to_root(ctx, cifs_sb, true);
 	if (IS_ERR(full_path))
 		return PTR_ERR(full_path);
 
 	rc = dfs_cache_find(xid, ses, cifs_sb->local_nls, cifs_remap(cifs_sb),
 			    ref_path, &referral, NULL);
 	if (!rc) {
-		char *fake_devname = NULL;
-
-		mdata = cifs_compose_mount_options(cifs_sb->mountdata,
-						   full_path + 1, &referral,
-						   &fake_devname);
+		mdata = cifs_compose_mount_options(cifs_sb->ctx->mount_options,
+						   full_path + 1, &referral);
 		free_dfs_info_param(&referral);
 
 		if (IS_ERR(mdata)) {
 			rc = PTR_ERR(mdata);
 			mdata = NULL;
 		} else {
-			cifs_cleanup_volume_info_contents(volume_info);
-			rc = cifs_setup_volume_info(volume_info, mdata,
-						    fake_devname, false);
+			smb3_cleanup_fs_context_contents(ctx);
+			rc = cifs_setup_volume_info(ctx);
 		}
-		kfree(fake_devname);
-		kfree(cifs_sb->mountdata);
-		cifs_sb->mountdata = mdata;
+		kfree(cifs_sb->ctx->mount_options);
+		cifs_sb->ctx->mount_options = mdata;
 	}
 	kfree(full_path);
 	return rc;
@@ -4270,7 +3002,7 @@ static inline int get_next_dfs_tgt(const char *path,
 }
 
 static int update_vol_info(const struct dfs_cache_tgt_iterator *tgt_it,
-			   struct smb_vol *fake_vol, struct smb_vol *vol)
+			   struct smb3_fs_context *fake_ctx, struct smb3_fs_context *ctx)
 {
 	const char *tgt = dfs_cache_get_tgt_name(tgt_it);
 	int len = strlen(tgt) + 2;
@@ -4281,29 +3013,29 @@ static int update_vol_info(const struct dfs_cache_tgt_iterator *tgt_it,
 		return -ENOMEM;
 	scnprintf(new_unc, len, "\\%s", tgt);
 
-	kfree(vol->UNC);
-	vol->UNC = new_unc;
+	kfree(ctx->UNC);
+	ctx->UNC = new_unc;
 
-	if (fake_vol->prepath) {
-		kfree(vol->prepath);
-		vol->prepath = fake_vol->prepath;
-		fake_vol->prepath = NULL;
+	if (fake_ctx->prepath) {
+		kfree(ctx->prepath);
+		ctx->prepath = fake_ctx->prepath;
+		fake_ctx->prepath = NULL;
 	}
-	memcpy(&vol->dstaddr, &fake_vol->dstaddr, sizeof(vol->dstaddr));
+	memcpy(&ctx->dstaddr, &fake_ctx->dstaddr, sizeof(ctx->dstaddr));
 
 	return 0;
 }
 
 static int setup_dfs_tgt_conn(const char *path, const char *full_path,
 			      const struct dfs_cache_tgt_iterator *tgt_it,
-			      struct cifs_sb_info *cifs_sb, struct smb_vol *vol, unsigned int *xid,
-			      struct TCP_Server_Info **server, struct cifs_ses **ses,
-			      struct cifs_tcon **tcon)
+			      struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx,
+			      unsigned int *xid, struct TCP_Server_Info **server,
+			      struct cifs_ses **ses, struct cifs_tcon **tcon)
 {
 	int rc;
 	struct dfs_info3_param ref = {0};
-	char *mdata = NULL, *fake_devname = NULL;
-	struct smb_vol fake_vol = {NULL};
+	char *mdata = NULL;
+	struct smb3_fs_context fake_ctx = {NULL};
 
 	cifs_dbg(FYI, "%s: dfs path: %s\n", __func__, path);
 
@@ -4311,45 +3043,43 @@ static int setup_dfs_tgt_conn(const char *path, const char *full_path,
 	if (rc)
 		return rc;
 
-	mdata = cifs_compose_mount_options(cifs_sb->mountdata, full_path + 1, &ref, &fake_devname);
+	mdata = cifs_compose_mount_options(cifs_sb->ctx->mount_options,
+					   full_path + 1, &ref);
 	free_dfs_info_param(&ref);
 
 	if (IS_ERR(mdata)) {
 		rc = PTR_ERR(mdata);
 		mdata = NULL;
-	} else {
-		cifs_dbg(FYI, "%s: fake_devname: %s\n", __func__, fake_devname);
-		rc = cifs_setup_volume_info(&fake_vol, mdata, fake_devname,
-					    false);
-	}
+	} else
+		rc = cifs_setup_volume_info(&fake_ctx);
+
 	kfree(mdata);
-	kfree(fake_devname);
 
 	if (!rc) {
 		/*
-		 * We use a 'fake_vol' here because we need pass it down to the
+		 * We use a 'fake_ctx' here because we need pass it down to the
 		 * mount_{get,put} functions to test connection against new DFS
 		 * targets.
 		 */
 		mount_put_conns(cifs_sb, *xid, *server, *ses, *tcon);
-		rc = mount_get_conns(&fake_vol, cifs_sb, xid, server, ses,
+		rc = mount_get_conns(&fake_ctx, cifs_sb, xid, server, ses,
 				     tcon);
 		if (!rc || (*server && *ses)) {
 			/*
 			 * We were able to connect to new target server.
-			 * Update current volume info with new target server.
+			 * Update current context with new target server.
 			 */
-			rc = update_vol_info(tgt_it, &fake_vol, vol);
+			rc = update_vol_info(tgt_it, &fake_ctx, ctx);
 		}
 	}
-	cifs_cleanup_volume_info_contents(&fake_vol);
+	smb3_cleanup_fs_context_contents(&fake_ctx);
 	return rc;
 }
 
 static int do_dfs_failover(const char *path, const char *full_path, struct cifs_sb_info *cifs_sb,
-			   struct smb_vol *vol, struct cifs_ses *root_ses, unsigned int *xid,
-			   struct TCP_Server_Info **server, struct cifs_ses **ses,
-			   struct cifs_tcon **tcon)
+			   struct smb3_fs_context *ctx, struct cifs_ses *root_ses,
+			   unsigned int *xid, struct TCP_Server_Info **server,
+			   struct cifs_ses **ses, struct cifs_tcon **tcon)
 {
 	int rc;
 	struct dfs_cache_tgt_list tgt_list;
@@ -4368,7 +3098,7 @@ static int do_dfs_failover(const char *path, const char *full_path, struct cifs_
 		if (rc)
 			break;
 		/* Connect to next DFS target */
-		rc = setup_dfs_tgt_conn(path, full_path, tgt_it, cifs_sb, vol, xid, server, ses,
+		rc = setup_dfs_tgt_conn(path, full_path, tgt_it, cifs_sb, ctx, xid, server, ses,
 					tcon);
 		if (!rc || (*server && *ses))
 			break;
@@ -4388,22 +3118,21 @@ static int do_dfs_failover(const char *path, const char *full_path, struct cifs_
 }
 #endif
 
+/* TODO: all callers to this are broken. We are not parsing mount_options here
+ * we should pass a clone of the original context?
+ */
 int
-cifs_setup_volume_info(struct smb_vol *volume_info, char *mount_data,
-			const char *devname, bool is_smb3)
+cifs_setup_volume_info(struct smb3_fs_context *ctx)
 {
 	int rc = 0;
 
-	if (cifs_parse_mount_options(mount_data, devname, volume_info, is_smb3))
-		return -EINVAL;
-
-	if (volume_info->nullauth) {
+	if (ctx->nullauth) {
 		cifs_dbg(FYI, "Anonymous login\n");
-		kfree(volume_info->username);
-		volume_info->username = NULL;
-	} else if (volume_info->username) {
+		kfree(ctx->username);
+		ctx->username = NULL;
+	} else if (ctx->username) {
 		/* BB fixme parse for domain name here */
-		cifs_dbg(FYI, "Username: %s\n", volume_info->username);
+		cifs_dbg(FYI, "Username: %s\n", ctx->username);
 	} else {
 		cifs_dbg(VFS, "No username specified\n");
 	/* In userspace mount helper we can get user name from alternate
@@ -4411,41 +3140,9 @@ cifs_setup_volume_info(struct smb_vol *volume_info, char *mount_data,
 		return -EINVAL;
 	}
 
-	/* this is needed for ASCII cp to Unicode converts */
-	if (volume_info->iocharset == NULL) {
-		/* load_nls_default cannot return null */
-		volume_info->local_nls = load_nls_default();
-	} else {
-		volume_info->local_nls = load_nls(volume_info->iocharset);
-		if (volume_info->local_nls == NULL) {
-			cifs_dbg(VFS, "CIFS mount error: iocharset %s not found\n",
-				 volume_info->iocharset);
-			return -ELIBACC;
-		}
-	}
-
 	return rc;
 }
 
-struct smb_vol *
-cifs_get_volume_info(char *mount_data, const char *devname, bool is_smb3)
-{
-	int rc;
-	struct smb_vol *volume_info;
-
-	volume_info = kmalloc(sizeof(struct smb_vol), GFP_KERNEL);
-	if (!volume_info)
-		return ERR_PTR(-ENOMEM);
-
-	rc = cifs_setup_volume_info(volume_info, mount_data, devname, is_smb3);
-	if (rc) {
-		cifs_cleanup_volume_info(volume_info);
-		volume_info = ERR_PTR(rc);
-	}
-
-	return volume_info;
-}
-
 static int
 cifs_are_all_path_components_accessible(struct TCP_Server_Info *server,
 					unsigned int xid,
@@ -4497,7 +3194,7 @@ cifs_are_all_path_components_accessible(struct TCP_Server_Info *server,
  * Check if path is remote (e.g. a DFS share). Return -EREMOTE if it is,
  * otherwise 0.
  */
-static int is_path_remote(struct cifs_sb_info *cifs_sb, struct smb_vol *vol,
+static int is_path_remote(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx,
 			  const unsigned int xid,
 			  struct TCP_Server_Info *server,
 			  struct cifs_tcon *tcon)
@@ -4511,7 +3208,7 @@ static int is_path_remote(struct cifs_sb_info *cifs_sb, struct smb_vol *vol,
 	/*
 	 * cifs_build_path_to_root works only when we have a valid tcon
 	 */
-	full_path = cifs_build_path_to_root(vol, cifs_sb, tcon,
+	full_path = cifs_build_path_to_root(ctx, cifs_sb, tcon,
 					    tcon->Flags & SMB_SHARE_IS_IN_DFS);
 	if (full_path == NULL)
 		return -ENOMEM;
@@ -4560,7 +3257,7 @@ static void put_root_ses(struct cifs_ses *ses)
 }
 
 /* Check if a path component is remote and then update @dfs_path accordingly */
-static int check_dfs_prepath(struct cifs_sb_info *cifs_sb, struct smb_vol *vol,
+static int check_dfs_prepath(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx,
 			     const unsigned int xid, struct TCP_Server_Info *server,
 			     struct cifs_tcon *tcon, char **dfs_path)
 {
@@ -4571,7 +3268,7 @@ static int check_dfs_prepath(struct cifs_sb_info *cifs_sb, struct smb_vol *vol,
 	int added_treename = tcon->Flags & SMB_SHARE_IS_IN_DFS;
 	int skip = added_treename;
 
-	path = cifs_build_path_to_root(vol, cifs_sb, tcon, added_treename);
+	path = cifs_build_path_to_root(ctx, cifs_sb, tcon, added_treename);
 	if (!path)
 		return -ENOMEM;
 
@@ -4602,17 +3299,17 @@ static int check_dfs_prepath(struct cifs_sb_info *cifs_sb, struct smb_vol *vol,
 		*s = 0;
 		rc = server->ops->is_path_accessible(xid, tcon, cifs_sb, path);
 		if (rc && rc == -EREMOTE) {
-			struct smb_vol v = {NULL};
+			struct smb3_fs_context v = {NULL};
 			/* if @path contains a tree name, skip it in the prefix path */
 			if (added_treename) {
-				rc = cifs_parse_devname(path, &v);
+				rc = smb3_parse_devname(path, &v);
 				if (rc)
 					break;
 				rc = -EREMOTE;
 				npath = build_unc_path_to_root(&v, cifs_sb, true);
-				cifs_cleanup_volume_info_contents(&v);
+				smb3_cleanup_fs_context_contents(&v);
 			} else {
-				v.UNC = vol->UNC;
+				v.UNC = ctx->UNC;
 				v.prepath = path + 1;
 				npath = build_unc_path_to_root(&v, cifs_sb, true);
 			}
@@ -4630,7 +3327,7 @@ static int check_dfs_prepath(struct cifs_sb_info *cifs_sb, struct smb_vol *vol,
 	return rc;
 }
 
-int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *vol)
+int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx)
 {
 	int rc = 0;
 	unsigned int xid;
@@ -4642,7 +3339,7 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *vol)
 	char *oldmnt = NULL;
 	char *mntdata = NULL;
 
-	rc = mount_get_conns(vol, cifs_sb, &xid, &server, &ses, &tcon);
+	rc = mount_get_conns(ctx, cifs_sb, &xid, &server, &ses, &tcon);
 	/*
 	 * Unconditionally try to get an DFS referral (even cached) to determine whether it is an
 	 * DFS mount.
@@ -4650,26 +3347,27 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *vol)
 	 * Skip prefix path to provide support for DFS referrals from w2k8 servers which don't seem
 	 * to respond with PATH_NOT_COVERED to requests that include the prefix.
 	 */
-	if (dfs_cache_find(xid, ses, cifs_sb->local_nls, cifs_remap(cifs_sb), vol->UNC + 1, NULL,
+	if (dfs_cache_find(xid, ses, cifs_sb->local_nls, cifs_remap(cifs_sb), ctx->UNC + 1, NULL,
 			   NULL)) {
 		/* No DFS referral was returned.  Looks like a regular share. */
 		if (rc)
 			goto error;
 		/* Check if it is fully accessible and then mount it */
-		rc = is_path_remote(cifs_sb, vol, xid, server, tcon);
+		rc = is_path_remote(cifs_sb, ctx, xid, server, tcon);
 		if (!rc)
 			goto out;
 		if (rc != -EREMOTE)
 			goto error;
 	}
 	/* Save mount options */
-	mntdata = kstrndup(cifs_sb->mountdata, strlen(cifs_sb->mountdata), GFP_KERNEL);
+	mntdata = kstrndup(cifs_sb->ctx->mount_options,
+			   strlen(cifs_sb->ctx->mount_options), GFP_KERNEL);
 	if (!mntdata) {
 		rc = -ENOMEM;
 		goto error;
 	}
 	/* Get path of DFS root */
-	ref_path = build_unc_path_to_root(vol, cifs_sb, false);
+	ref_path = build_unc_path_to_root(ctx, cifs_sb, false);
 	if (IS_ERR(ref_path)) {
 		rc = PTR_ERR(ref_path);
 		ref_path = NULL;
@@ -4680,25 +3378,25 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *vol)
 	do {
 		/* Save full path of last DFS path we used to resolve final target server */
 		kfree(full_path);
-		full_path = build_unc_path_to_root(vol, cifs_sb, !!count);
+		full_path = build_unc_path_to_root(ctx, cifs_sb, !!count);
 		if (IS_ERR(full_path)) {
 			rc = PTR_ERR(full_path);
 			full_path = NULL;
 			break;
 		}
 		/* Chase referral */
-		oldmnt = cifs_sb->mountdata;
-		rc = expand_dfs_referral(xid, root_ses, vol, cifs_sb, ref_path + 1);
+		oldmnt = cifs_sb->ctx->mount_options;
+		rc = expand_dfs_referral(xid, root_ses, ctx, cifs_sb, ref_path + 1);
 		if (rc)
 			break;
 		/* Connect to new DFS target only if we were redirected */
-		if (oldmnt != cifs_sb->mountdata) {
+		if (oldmnt != cifs_sb->ctx->mount_options) {
 			mount_put_conns(cifs_sb, xid, server, ses, tcon);
-			rc = mount_get_conns(vol, cifs_sb, &xid, &server, &ses, &tcon);
+			rc = mount_get_conns(ctx, cifs_sb, &xid, &server, &ses, &tcon);
 		}
 		if (rc && !server && !ses) {
 			/* Failed to connect. Try to connect to other targets in the referral. */
-			rc = do_dfs_failover(ref_path + 1, full_path, cifs_sb, vol, root_ses, &xid,
+			rc = do_dfs_failover(ref_path + 1, full_path, cifs_sb, ctx, root_ses, &xid,
 					     &server, &ses, &tcon);
 		}
 		if (rc == -EACCES || rc == -EOPNOTSUPP || !server || !ses)
@@ -4711,7 +3409,7 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *vol)
 			set_root_ses(cifs_sb, ses, &root_ses);
 		}
 		/* Check for remaining path components and then continue chasing them (-EREMOTE) */
-		rc = check_dfs_prepath(cifs_sb, vol, xid, server, tcon, &ref_path);
+		rc = check_dfs_prepath(cifs_sb, ctx, xid, server, tcon, &ref_path);
 		/* Prevent recursion on broken link referrals */
 		if (rc == -EREMOTE && ++count > MAX_NESTED_LINKS)
 			rc = -ELOOP;
@@ -4742,8 +3440,8 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *vol)
 	tcon->remap = cifs_remap(cifs_sb);
 	spin_unlock(&cifs_tcp_ses_lock);
 
-	/* Add original volume information for DFS cache to be used when refreshing referrals */
-	rc = dfs_cache_add_vol(mntdata, vol, cifs_sb->origin_fullpath);
+	/* Add original context for DFS cache to be used when refreshing referrals */
+	rc = dfs_cache_add_vol(mntdata, ctx, cifs_sb->origin_fullpath);
 	if (rc)
 		goto error;
 	/*
@@ -4758,12 +3456,12 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *vol)
 	 */
 	cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_USE_PREFIX_PATH;
 	kfree(cifs_sb->prepath);
-	cifs_sb->prepath = vol->prepath;
-	vol->prepath = NULL;
+	cifs_sb->prepath = ctx->prepath;
+	ctx->prepath = NULL;
 
 out:
 	free_xid(xid);
-	cifs_try_adding_channels(ses);
+	cifs_try_adding_channels(cifs_sb, ses);
 	return mount_setup_tlink(cifs_sb, ses, tcon);
 
 error:
@@ -4776,7 +3474,7 @@ error:
 	return rc;
 }
 #else
-int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *vol)
+int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx)
 {
 	int rc = 0;
 	unsigned int xid;
@@ -4784,12 +3482,12 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *vol)
 	struct cifs_tcon *tcon;
 	struct TCP_Server_Info *server;
 
-	rc = mount_get_conns(vol, cifs_sb, &xid, &server, &ses, &tcon);
+	rc = mount_get_conns(ctx, cifs_sb, &xid, &server, &ses, &tcon);
 	if (rc)
 		goto error;
 
 	if (tcon) {
-		rc = is_path_remote(cifs_sb, vol, xid, server, tcon);
+		rc = is_path_remote(cifs_sb, ctx, xid, server, tcon);
 		if (rc == -EREMOTE)
 			rc = -EOPNOTSUPP;
 		if (rc)
@@ -4970,9 +3668,11 @@ CIFSTCon(const unsigned int xid, struct cifs_ses *ses,
 
 static void delayed_free(struct rcu_head *p)
 {
-	struct cifs_sb_info *sbi = container_of(p, struct cifs_sb_info, rcu);
-	unload_nls(sbi->local_nls);
-	kfree(sbi);
+	struct cifs_sb_info *cifs_sb = container_of(p, struct cifs_sb_info, rcu);
+
+	unload_nls(cifs_sb->local_nls);
+	smb3_cleanup_fs_context(cifs_sb->ctx);
+	kfree(cifs_sb);
 }
 
 void
@@ -4997,7 +3697,6 @@ cifs_umount(struct cifs_sb_info *cifs_sb)
 	}
 	spin_unlock(&cifs_sb->tlink_tree_lock);
 
-	kfree(cifs_sb->mountdata);
 	kfree(cifs_sb->prepath);
 #ifdef CONFIG_CIFS_DFS_UPCALL
 	dfs_cache_del_vol(cifs_sb->origin_fullpath);
@@ -5066,15 +3765,15 @@ cifs_setup_session(const unsigned int xid, struct cifs_ses *ses,
 }
 
 static int
-cifs_set_vol_auth(struct smb_vol *vol, struct cifs_ses *ses)
+cifs_set_vol_auth(struct smb3_fs_context *ctx, struct cifs_ses *ses)
 {
-	vol->sectype = ses->sectype;
+	ctx->sectype = ses->sectype;
 
 	/* krb5 is special, since we don't need username or pw */
-	if (vol->sectype == Kerberos)
+	if (ctx->sectype == Kerberos)
 		return 0;
 
-	return cifs_set_cifscreds(vol, ses);
+	return cifs_set_cifscreds(ctx, ses);
 }
 
 static struct cifs_tcon *
@@ -5084,31 +3783,34 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid)
 	struct cifs_tcon *master_tcon = cifs_sb_master_tcon(cifs_sb);
 	struct cifs_ses *ses;
 	struct cifs_tcon *tcon = NULL;
-	struct smb_vol *vol_info;
+	struct smb3_fs_context *ctx;
 
-	vol_info = kzalloc(sizeof(*vol_info), GFP_KERNEL);
-	if (vol_info == NULL)
+	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
+	if (ctx == NULL)
 		return ERR_PTR(-ENOMEM);
 
-	vol_info->local_nls = cifs_sb->local_nls;
-	vol_info->linux_uid = fsuid;
-	vol_info->cred_uid = fsuid;
-	vol_info->UNC = master_tcon->treeName;
-	vol_info->retry = master_tcon->retry;
-	vol_info->nocase = master_tcon->nocase;
-	vol_info->nohandlecache = master_tcon->nohandlecache;
-	vol_info->local_lease = master_tcon->local_lease;
-	vol_info->no_lease = master_tcon->no_lease;
-	vol_info->resilient = master_tcon->use_resilient;
-	vol_info->persistent = master_tcon->use_persistent;
-	vol_info->handle_timeout = master_tcon->handle_timeout;
-	vol_info->no_linux_ext = !master_tcon->unix_ext;
-	vol_info->linux_ext = master_tcon->posix_extensions;
-	vol_info->sectype = master_tcon->ses->sectype;
-	vol_info->sign = master_tcon->ses->sign;
-	vol_info->seal = master_tcon->seal;
-
-	rc = cifs_set_vol_auth(vol_info, master_tcon->ses);
+	ctx->local_nls = cifs_sb->local_nls;
+	ctx->linux_uid = fsuid;
+	ctx->cred_uid = fsuid;
+	ctx->UNC = master_tcon->treeName;
+	ctx->retry = master_tcon->retry;
+	ctx->nocase = master_tcon->nocase;
+	ctx->nohandlecache = master_tcon->nohandlecache;
+	ctx->local_lease = master_tcon->local_lease;
+	ctx->no_lease = master_tcon->no_lease;
+	ctx->resilient = master_tcon->use_resilient;
+	ctx->persistent = master_tcon->use_persistent;
+	ctx->handle_timeout = master_tcon->handle_timeout;
+	ctx->no_linux_ext = !master_tcon->unix_ext;
+	ctx->linux_ext = master_tcon->posix_extensions;
+	ctx->sectype = master_tcon->ses->sectype;
+	ctx->sign = master_tcon->ses->sign;
+	ctx->seal = master_tcon->seal;
+#ifdef CONFIG_CIFS_SWN_UPCALL
+	ctx->witness = master_tcon->use_witness;
+#endif
+
+	rc = cifs_set_vol_auth(ctx, master_tcon->ses);
 	if (rc) {
 		tcon = ERR_PTR(rc);
 		goto out;
@@ -5119,26 +3821,26 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid)
 	++master_tcon->ses->server->srv_count;
 	spin_unlock(&cifs_tcp_ses_lock);
 
-	ses = cifs_get_smb_ses(master_tcon->ses->server, vol_info);
+	ses = cifs_get_smb_ses(master_tcon->ses->server, ctx);
 	if (IS_ERR(ses)) {
 		tcon = (struct cifs_tcon *)ses;
 		cifs_put_tcp_session(master_tcon->ses->server, 0);
 		goto out;
 	}
 
-	tcon = cifs_get_tcon(ses, vol_info);
+	tcon = cifs_get_tcon(ses, ctx);
 	if (IS_ERR(tcon)) {
 		cifs_put_smb_ses(ses);
 		goto out;
 	}
 
 	if (cap_unix(ses))
-		reset_cifs_unix_caps(0, tcon, NULL, vol_info);
+		reset_cifs_unix_caps(0, tcon, NULL, ctx);
 
 out:
-	kfree(vol_info->username);
-	kfree_sensitive(vol_info->password);
-	kfree(vol_info);
+	kfree(ctx->username);
+	kfree_sensitive(ctx->password);
+	kfree(ctx);
 
 	return tcon;
 }
diff --git a/fs/cifs/dfs_cache.c b/fs/cifs/dfs_cache.c
index 6ee849698962..6ad6ba5f6ebe 100644
--- a/fs/cifs/dfs_cache.c
+++ b/fs/cifs/dfs_cache.c
@@ -18,6 +18,7 @@
 #include "cifs_debug.h"
 #include "cifs_unicode.h"
 #include "smb2glob.h"
+#include "fs_context.h"
 
 #include "dfs_cache.h"
 
@@ -48,8 +49,8 @@ struct cache_entry {
 
 struct vol_info {
 	char *fullpath;
-	spinlock_t smb_vol_lock;
-	struct smb_vol smb_vol;
+	spinlock_t ctx_lock;
+	struct smb3_fs_context ctx;
 	char *mntdata;
 	struct list_head list;
 	struct list_head rlist;
@@ -586,7 +587,7 @@ static void __vol_release(struct vol_info *vi)
 {
 	kfree(vi->fullpath);
 	kfree(vi->mntdata);
-	cifs_cleanup_volume_info_contents(&vi->smb_vol);
+	smb3_cleanup_fs_context_contents(&vi->ctx);
 	kfree(vi);
 }
 
@@ -1140,80 +1141,22 @@ out_unlock:
 	return rc;
 }
 
-static int dup_vol(struct smb_vol *vol, struct smb_vol *new)
-{
-	memcpy(new, vol, sizeof(*new));
-
-	if (vol->username) {
-		new->username = kstrndup(vol->username, strlen(vol->username),
-					 GFP_KERNEL);
-		if (!new->username)
-			return -ENOMEM;
-	}
-	if (vol->password) {
-		new->password = kstrndup(vol->password, strlen(vol->password),
-					 GFP_KERNEL);
-		if (!new->password)
-			goto err_free_username;
-	}
-	if (vol->UNC) {
-		cifs_dbg(FYI, "%s: vol->UNC: %s\n", __func__, vol->UNC);
-		new->UNC = kstrndup(vol->UNC, strlen(vol->UNC), GFP_KERNEL);
-		if (!new->UNC)
-			goto err_free_password;
-	}
-	if (vol->domainname) {
-		new->domainname = kstrndup(vol->domainname,
-					   strlen(vol->domainname), GFP_KERNEL);
-		if (!new->domainname)
-			goto err_free_unc;
-	}
-	if (vol->iocharset) {
-		new->iocharset = kstrndup(vol->iocharset,
-					  strlen(vol->iocharset), GFP_KERNEL);
-		if (!new->iocharset)
-			goto err_free_domainname;
-	}
-	if (vol->prepath) {
-		cifs_dbg(FYI, "%s: vol->prepath: %s\n", __func__, vol->prepath);
-		new->prepath = kstrndup(vol->prepath, strlen(vol->prepath),
-					GFP_KERNEL);
-		if (!new->prepath)
-			goto err_free_iocharset;
-	}
-
-	return 0;
-
-err_free_iocharset:
-	kfree(new->iocharset);
-err_free_domainname:
-	kfree(new->domainname);
-err_free_unc:
-	kfree(new->UNC);
-err_free_password:
-	kfree_sensitive(new->password);
-err_free_username:
-	kfree(new->username);
-	kfree(new);
-	return -ENOMEM;
-}
-
 /**
- * dfs_cache_add_vol - add a cifs volume during mount() that will be handled by
+ * dfs_cache_add_vol - add a cifs context during mount() that will be handled by
  * DFS cache refresh worker.
  *
  * @mntdata: mount data.
- * @vol: cifs volume.
+ * @ctx: cifs context.
  * @fullpath: origin full path.
  *
- * Return zero if volume was set up correctly, otherwise non-zero.
+ * Return zero if context was set up correctly, otherwise non-zero.
  */
-int dfs_cache_add_vol(char *mntdata, struct smb_vol *vol, const char *fullpath)
+int dfs_cache_add_vol(char *mntdata, struct smb3_fs_context *ctx, const char *fullpath)
 {
 	int rc;
 	struct vol_info *vi;
 
-	if (!vol || !fullpath || !mntdata)
+	if (!ctx || !fullpath || !mntdata)
 		return -EINVAL;
 
 	cifs_dbg(FYI, "%s: fullpath: %s\n", __func__, fullpath);
@@ -1228,12 +1171,12 @@ int dfs_cache_add_vol(char *mntdata, struct smb_vol *vol, const char *fullpath)
 		goto err_free_vi;
 	}
 
-	rc = dup_vol(vol, &vi->smb_vol);
+	rc = smb3_fs_context_dup(&vi->ctx, ctx);
 	if (rc)
 		goto err_free_fullpath;
 
 	vi->mntdata = mntdata;
-	spin_lock_init(&vi->smb_vol_lock);
+	spin_lock_init(&vi->ctx_lock);
 	kref_init(&vi->refcnt);
 
 	spin_lock(&vol_list_lock);
@@ -1289,10 +1232,10 @@ int dfs_cache_update_vol(const char *fullpath, struct TCP_Server_Info *server)
 	spin_unlock(&vol_list_lock);
 
 	cifs_dbg(FYI, "%s: updating volume info\n", __func__);
-	spin_lock(&vi->smb_vol_lock);
-	memcpy(&vi->smb_vol.dstaddr, &server->dstaddr,
-	       sizeof(vi->smb_vol.dstaddr));
-	spin_unlock(&vi->smb_vol_lock);
+	spin_lock(&vi->ctx_lock);
+	memcpy(&vi->ctx.dstaddr, &server->dstaddr,
+	       sizeof(vi->ctx.dstaddr));
+	spin_unlock(&vi->ctx_lock);
 
 	kref_put(&vi->refcnt, vol_release);
 
@@ -1445,11 +1388,11 @@ static inline void put_tcp_server(struct TCP_Server_Info *server)
 	cifs_put_tcp_session(server, 0);
 }
 
-static struct TCP_Server_Info *get_tcp_server(struct smb_vol *vol)
+static struct TCP_Server_Info *get_tcp_server(struct smb3_fs_context *ctx)
 {
 	struct TCP_Server_Info *server;
 
-	server = cifs_find_tcp_session(vol);
+	server = cifs_find_tcp_session(ctx);
 	if (IS_ERR_OR_NULL(server))
 		return NULL;
 
@@ -1473,10 +1416,10 @@ static struct cifs_ses *find_root_ses(struct vol_info *vi,
 	int rc;
 	struct cache_entry *ce;
 	struct dfs_info3_param ref = {0};
-	char *mdata = NULL, *devname = NULL;
+	char *mdata = NULL;
 	struct TCP_Server_Info *server;
 	struct cifs_ses *ses;
-	struct smb_vol vol = {NULL};
+	struct smb3_fs_context ctx = {NULL};
 
 	rpath = get_dfs_root(path);
 	if (IS_ERR(rpath))
@@ -1500,8 +1443,7 @@ static struct cifs_ses *find_root_ses(struct vol_info *vi,
 
 	up_read(&htable_rw_lock);
 
-	mdata = cifs_compose_mount_options(vi->mntdata, rpath, &ref,
-					   &devname);
+	mdata = cifs_compose_mount_options(vi->mntdata, rpath, &ref);
 	free_dfs_info_param(&ref);
 
 	if (IS_ERR(mdata)) {
@@ -1510,24 +1452,23 @@ static struct cifs_ses *find_root_ses(struct vol_info *vi,
 		goto out;
 	}
 
-	rc = cifs_setup_volume_info(&vol, mdata, devname, false);
-	kfree(devname);
+	rc = cifs_setup_volume_info(&ctx);
 
 	if (rc) {
 		ses = ERR_PTR(rc);
 		goto out;
 	}
 
-	server = get_tcp_server(&vol);
+	server = get_tcp_server(&ctx);
 	if (!server) {
 		ses = ERR_PTR(-EHOSTDOWN);
 		goto out;
 	}
 
-	ses = cifs_get_smb_ses(server, &vol);
+	ses = cifs_get_smb_ses(server, &ctx);
 
 out:
-	cifs_cleanup_volume_info_contents(&vol);
+	smb3_cleanup_fs_context_contents(&ctx);
 	kfree(mdata);
 	kfree(rpath);
 
@@ -1619,7 +1560,7 @@ static void refresh_cache_worker(struct work_struct *work)
 	 */
 	spin_lock(&vol_list_lock);
 	list_for_each_entry(vi, &vol_list, list) {
-		server = get_tcp_server(&vi->smb_vol);
+		server = get_tcp_server(&vi->ctx);
 		if (!server)
 			continue;
 
@@ -1631,9 +1572,9 @@ static void refresh_cache_worker(struct work_struct *work)
 
 	/* Walk through all TCONs and refresh any expired cache entry */
 	list_for_each_entry_safe(vi, nvi, &vols, rlist) {
-		spin_lock(&vi->smb_vol_lock);
-		server = get_tcp_server(&vi->smb_vol);
-		spin_unlock(&vi->smb_vol_lock);
+		spin_lock(&vi->ctx_lock);
+		server = get_tcp_server(&vi->ctx);
+		spin_unlock(&vi->ctx_lock);
 
 		if (!server)
 			goto next_vol;
diff --git a/fs/cifs/dfs_cache.h b/fs/cifs/dfs_cache.h
index 3d7c05194536..1afc4f590c47 100644
--- a/fs/cifs/dfs_cache.h
+++ b/fs/cifs/dfs_cache.h
@@ -44,7 +44,7 @@ dfs_cache_noreq_update_tgthint(const char *path,
 extern int dfs_cache_get_tgt_referral(const char *path,
 				      const struct dfs_cache_tgt_iterator *it,
 				      struct dfs_info3_param *ref);
-extern int dfs_cache_add_vol(char *mntdata, struct smb_vol *vol,
+extern int dfs_cache_add_vol(char *mntdata, struct smb3_fs_context *ctx,
 			const char *fullpath);
 extern int dfs_cache_update_vol(const char *fullpath,
 				struct TCP_Server_Info *server);
diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c
index 398c1eef7190..68900f1629bf 100644
--- a/fs/cifs/dir.c
+++ b/fs/cifs/dir.c
@@ -33,6 +33,7 @@
 #include "cifs_debug.h"
 #include "cifs_fs_sb.h"
 #include "cifs_unicode.h"
+#include "fs_context.h"
 
 static void
 renew_parental_timestamps(struct dentry *direntry)
@@ -46,10 +47,10 @@ renew_parental_timestamps(struct dentry *direntry)
 }
 
 char *
-cifs_build_path_to_root(struct smb_vol *vol, struct cifs_sb_info *cifs_sb,
+cifs_build_path_to_root(struct smb3_fs_context *ctx, struct cifs_sb_info *cifs_sb,
 			struct cifs_tcon *tcon, int add_treename)
 {
-	int pplen = vol->prepath ? strlen(vol->prepath) + 1 : 0;
+	int pplen = ctx->prepath ? strlen(ctx->prepath) + 1 : 0;
 	int dfsplen;
 	char *full_path = NULL;
 
@@ -71,7 +72,7 @@ cifs_build_path_to_root(struct smb_vol *vol, struct cifs_sb_info *cifs_sb,
 	if (dfsplen)
 		memcpy(full_path, tcon->treeName, dfsplen);
 	full_path[dfsplen] = CIFS_DIR_SEP(cifs_sb);
-	memcpy(full_path + dfsplen + 1, vol->prepath, pplen);
+	memcpy(full_path + dfsplen + 1, ctx->prepath, pplen);
 	convert_delimiter(full_path, CIFS_DIR_SEP(cifs_sb));
 	return full_path;
 }
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index be46fab4c96d..6d001905c8e5 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -44,6 +44,7 @@
 #include "cifs_fs_sb.h"
 #include "fscache.h"
 #include "smbdirect.h"
+#include "fs_context.h"
 
 static inline int cifs_convert_flags(unsigned int flags)
 {
@@ -416,6 +417,8 @@ static void cifsFileInfo_put_work(struct work_struct *work)
  * cifsFileInfo_put - release a reference of file priv data
  *
  * Always potentially wait for oplock handler. See _cifsFileInfo_put().
+ *
+ * @cifs_file:	cifs/smb3 specific info (eg refcounts) for an open file
  */
 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
 {
@@ -431,8 +434,11 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
  *
  * If @wait_for_oplock_handler is true and we are releasing the last
  * reference, wait for any running oplock break handler of the file
- * and cancel any pending one. If calling this function from the
- * oplock break handler, you need to pass false.
+ * and cancel any pending one.
+ *
+ * @cifs_file:	cifs/smb3 specific info (eg refcounts) for an open file
+ * @wait_oplock_handler: must be false if called from oplock_break_handler
+ * @offload:	not offloaded on close and oplock breaks
  *
  */
 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file,
@@ -566,7 +572,7 @@ int cifs_open(struct inode *inode, struct file *file)
 				le64_to_cpu(tcon->fsUnixInfo.Capability))) {
 		/* can not refresh inode info since size could be stale */
 		rc = cifs_posix_open(full_path, &inode, inode->i_sb,
-				cifs_sb->mnt_file_mode /* ignored */,
+				cifs_sb->ctx->file_mode /* ignored */,
 				file->f_flags, &oplock, &fid.netfid, xid);
 		if (rc == 0) {
 			cifs_dbg(FYI, "posix open succeeded\n");
@@ -735,7 +741,7 @@ cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
 						~(O_CREAT | O_EXCL | O_TRUNC);
 
 		rc = cifs_posix_open(full_path, NULL, inode->i_sb,
-				     cifs_sb->mnt_file_mode /* ignored */,
+				     cifs_sb->ctx->file_mode /* ignored */,
 				     oflags, &oplock, &cfile->fid.netfid, xid);
 		if (rc == 0) {
 			cifs_dbg(FYI, "posix reopen succeeded\n");
@@ -2330,7 +2336,7 @@ static int cifs_writepages(struct address_space *mapping,
 	 * If wsize is smaller than the page cache size, default to writing
 	 * one page at a time via cifs_writepage
 	 */
-	if (cifs_sb->wsize < PAGE_SIZE)
+	if (cifs_sb->ctx->wsize < PAGE_SIZE)
 		return generic_writepages(mapping, wbc);
 
 	xid = get_xid();
@@ -2363,7 +2369,7 @@ retry:
 		if (rc)
 			get_file_rc = rc;
 
-		rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
+		rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
 						   &wsize, credits);
 		if (rc != 0) {
 			done = true;
@@ -2905,7 +2911,7 @@ cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
 				break;
 		}
 
-		rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
+		rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->wsize,
 						   &wsize, credits);
 		if (rc)
 			break;
@@ -3636,7 +3642,7 @@ cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
 				break;
 		}
 
-		rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
+		rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
 						   &rsize, credits);
 		if (rc)
 			break;
@@ -4022,7 +4028,7 @@ cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
 	cifs_sb = CIFS_FILE_SB(file);
 
 	/* FIXME: set up handlers for larger reads and/or convert to async */
-	rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
+	rsize = min_t(unsigned int, cifs_sb->ctx->rsize, CIFSMaxBufSize);
 
 	if (file->private_data == NULL) {
 		rc = -EBADF;
@@ -4407,7 +4413,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
 				break;
 		}
 
-		rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
+		rc = server->ops->wait_mtu_credits(server, cifs_sb->ctx->rsize,
 						   &rsize, credits);
 		if (rc)
 			break;
diff --git a/fs/cifs/fs_context.c b/fs/cifs/fs_context.c
index ad6c2fed4055..0afccbbed2e6 100644
--- a/fs/cifs/fs_context.c
+++ b/fs/cifs/fs_context.c
@@ -6,8 +6,32 @@
  *              David Howells <dhowells@redhat.com>
  */
 
+/*
+#include <linux/module.h>
+#include <linux/nsproxy.h>
+#include <linux/slab.h>
+#include <linux/magic.h>
+#include <linux/security.h>
+#include <net/net_namespace.h>
+*/
+
+#include <linux/ctype.h>
+#include <linux/fs_context.h>
+#include <linux/fs_parser.h>
+#include <linux/fs.h>
+#include <linux/mount.h>
+#include <linux/parser.h>
+#include <linux/utsname.h>
+#include "cifsfs.h"
+#include "cifspdu.h"
 #include "cifsglob.h"
+#include "cifsproto.h"
+#include "cifs_unicode.h"
 #include "cifs_debug.h"
+#include "cifs_fs_sb.h"
+#include "ntlmssp.h"
+#include "nterr.h"
+#include "rfc1002pdu.h"
 #include "fs_context.h"
 
 static const match_table_t cifs_smb_version_tokens = {
@@ -24,77 +48,6 @@ static const match_table_t cifs_smb_version_tokens = {
 	{ Smb_version_err, NULL }
 };
 
-int
-cifs_parse_smb_version(char *value, struct smb_vol *vol, bool is_smb3)
-{
-	substring_t args[MAX_OPT_ARGS];
-
-	switch (match_token(value, cifs_smb_version_tokens, args)) {
-#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
-	case Smb_1:
-		if (disable_legacy_dialects) {
-			cifs_dbg(VFS, "mount with legacy dialect disabled\n");
-			return 1;
-		}
-		if (is_smb3) {
-			cifs_dbg(VFS, "vers=1.0 (cifs) not permitted when mounting with smb3\n");
-			return 1;
-		}
-		cifs_dbg(VFS, "Use of the less secure dialect vers=1.0 is not recommended unless required for access to very old servers\n");
-		vol->ops = &smb1_operations;
-		vol->vals = &smb1_values;
-		break;
-	case Smb_20:
-		if (disable_legacy_dialects) {
-			cifs_dbg(VFS, "mount with legacy dialect disabled\n");
-			return 1;
-		}
-		if (is_smb3) {
-			cifs_dbg(VFS, "vers=2.0 not permitted when mounting with smb3\n");
-			return 1;
-		}
-		vol->ops = &smb20_operations;
-		vol->vals = &smb20_values;
-		break;
-#else
-	case Smb_1:
-		cifs_dbg(VFS, "vers=1.0 (cifs) mount not permitted when legacy dialects disabled\n");
-		return 1;
-	case Smb_20:
-		cifs_dbg(VFS, "vers=2.0 mount not permitted when legacy dialects disabled\n");
-		return 1;
-#endif /* CIFS_ALLOW_INSECURE_LEGACY */
-	case Smb_21:
-		vol->ops = &smb21_operations;
-		vol->vals = &smb21_values;
-		break;
-	case Smb_30:
-		vol->ops = &smb30_operations;
-		vol->vals = &smb30_values;
-		break;
-	case Smb_302:
-		vol->ops = &smb30_operations; /* currently identical with 3.0 */
-		vol->vals = &smb302_values;
-		break;
-	case Smb_311:
-		vol->ops = &smb311_operations;
-		vol->vals = &smb311_values;
-		break;
-	case Smb_3any:
-		vol->ops = &smb30_operations; /* currently identical with 3.0 */
-		vol->vals = &smb3any_values;
-		break;
-	case Smb_default:
-		vol->ops = &smb30_operations; /* currently identical with 3.0 */
-		vol->vals = &smbdefault_values;
-		break;
-	default:
-		cifs_dbg(VFS, "Unknown vers= option specified: %s\n", value);
-		return 1;
-	}
-	return 0;
-}
-
 static const match_table_t cifs_secflavor_tokens = {
 	{ Opt_sec_krb5, "krb5" },
 	{ Opt_sec_krb5i, "krb5i" },
@@ -112,7 +65,123 @@ static const match_table_t cifs_secflavor_tokens = {
 	{ Opt_sec_err, NULL }
 };
 
-int cifs_parse_security_flavors(char *value, struct smb_vol *vol)
+const struct fs_parameter_spec smb3_fs_parameters[] = {
+	/* Mount options that take no arguments */
+	fsparam_flag_no("user_xattr", Opt_user_xattr),
+	fsparam_flag_no("forceuid", Opt_forceuid),
+	fsparam_flag_no("multichannel", Opt_multichannel),
+	fsparam_flag_no("forcegid", Opt_forcegid),
+	fsparam_flag("noblocksend", Opt_noblocksend),
+	fsparam_flag("noautotune", Opt_noautotune),
+	fsparam_flag("nolease", Opt_nolease),
+	fsparam_flag_no("hard", Opt_hard),
+	fsparam_flag_no("soft", Opt_soft),
+	fsparam_flag_no("perm", Opt_perm),
+	fsparam_flag("nodelete", Opt_nodelete),
+	fsparam_flag_no("mapposix", Opt_mapposix),
+	fsparam_flag("mapchars", Opt_mapchars),
+	fsparam_flag("nomapchars", Opt_nomapchars),
+	fsparam_flag_no("sfu", Opt_sfu),
+	fsparam_flag("nodfs", Opt_nodfs),
+	fsparam_flag_no("posixpaths", Opt_posixpaths),
+	fsparam_flag_no("unix", Opt_unix),
+	fsparam_flag_no("linux", Opt_unix),
+	fsparam_flag_no("posix", Opt_unix),
+	fsparam_flag("nocase", Opt_nocase),
+	fsparam_flag("ignorecase", Opt_nocase),
+	fsparam_flag_no("brl", Opt_brl),
+	fsparam_flag_no("handlecache", Opt_handlecache),
+	fsparam_flag("forcemandatorylock", Opt_forcemandatorylock),
+	fsparam_flag("forcemand", Opt_forcemandatorylock),
+	fsparam_flag("setuidfromacl", Opt_setuidfromacl),
+	fsparam_flag("idsfromsid", Opt_setuidfromacl),
+	fsparam_flag_no("setuids", Opt_setuids),
+	fsparam_flag_no("dynperm", Opt_dynperm),
+	fsparam_flag_no("intr", Opt_intr),
+	fsparam_flag_no("strictsync", Opt_strictsync),
+	fsparam_flag_no("serverino", Opt_serverino),
+	fsparam_flag("rwpidforward", Opt_rwpidforward),
+	fsparam_flag("cifsacl", Opt_cifsacl),
+	fsparam_flag_no("acl", Opt_acl),
+	fsparam_flag("locallease", Opt_locallease),
+	fsparam_flag("sign", Opt_sign),
+	fsparam_flag("ignore_signature", Opt_ignore_signature),
+	fsparam_flag("signloosely", Opt_ignore_signature),
+	fsparam_flag("seal", Opt_seal),
+	fsparam_flag("noac", Opt_noac),
+	fsparam_flag("fsc", Opt_fsc),
+	fsparam_flag("mfsymlinks", Opt_mfsymlinks),
+	fsparam_flag("multiuser", Opt_multiuser),
+	fsparam_flag("sloppy", Opt_sloppy),
+	fsparam_flag("nosharesock", Opt_nosharesock),
+	fsparam_flag_no("persistenthandles", Opt_persistent),
+	fsparam_flag_no("resilienthandles", Opt_resilient),
+	fsparam_flag("domainauto", Opt_domainauto),
+	fsparam_flag("rdma", Opt_rdma),
+	fsparam_flag("modesid", Opt_modesid),
+	fsparam_flag("modefromsid", Opt_modesid),
+	fsparam_flag("rootfs", Opt_rootfs),
+	fsparam_flag("compress", Opt_compress),
+	fsparam_flag("witness", Opt_witness),
+
+	/* Mount options which take numeric value */
+	fsparam_u32("backupuid", Opt_backupuid),
+	fsparam_u32("backupgid", Opt_backupgid),
+	fsparam_u32("uid", Opt_uid),
+	fsparam_u32("cruid", Opt_cruid),
+	fsparam_u32("gid", Opt_gid),
+	fsparam_u32("file_mode", Opt_file_mode),
+	fsparam_u32("dirmode", Opt_dirmode),
+	fsparam_u32("dir_mode", Opt_dirmode),
+	fsparam_u32("port", Opt_port),
+	fsparam_u32("min_enc_offload", Opt_min_enc_offload),
+	fsparam_u32("esize", Opt_min_enc_offload),
+	fsparam_u32("bsize", Opt_blocksize),
+	fsparam_u32("rsize", Opt_rsize),
+	fsparam_u32("wsize", Opt_wsize),
+	fsparam_u32("actimeo", Opt_actimeo),
+	fsparam_u32("echo_interval", Opt_echo_interval),
+	fsparam_u32("max_credits", Opt_max_credits),
+	fsparam_u32("handletimeout", Opt_handletimeout),
+	fsparam_u32("snapshot", Opt_snapshot),
+	fsparam_u32("max_channels", Opt_max_channels),
+
+	/* Mount options which take string value */
+	fsparam_string("source", Opt_source),
+	fsparam_string("unc", Opt_source),
+	fsparam_string("user", Opt_user),
+	fsparam_string("username", Opt_user),
+	fsparam_string("pass", Opt_pass),
+	fsparam_string("password", Opt_pass),
+	fsparam_string("ip", Opt_ip),
+	fsparam_string("addr", Opt_ip),
+	fsparam_string("domain", Opt_domain),
+	fsparam_string("dom", Opt_domain),
+	fsparam_string("srcaddr", Opt_srcaddr),
+	fsparam_string("iocharset", Opt_iocharset),
+	fsparam_string("netbiosname", Opt_netbiosname),
+	fsparam_string("servern", Opt_servern),
+	fsparam_string("ver", Opt_ver),
+	fsparam_string("vers", Opt_vers),
+	fsparam_string("sec", Opt_sec),
+	fsparam_string("cache", Opt_cache),
+
+	/* Arguments that should be ignored */
+	fsparam_flag("guest", Opt_ignore),
+	fsparam_flag("noatime", Opt_ignore),
+	fsparam_flag("relatime", Opt_ignore),
+	fsparam_flag("_netdev", Opt_ignore),
+	fsparam_flag_no("suid", Opt_ignore),
+	fsparam_flag_no("exec", Opt_ignore),
+	fsparam_flag_no("dev", Opt_ignore),
+	fsparam_flag_no("mand", Opt_ignore),
+	fsparam_string("cred", Opt_ignore),
+	fsparam_string("credentials", Opt_ignore),
+	{}
+};
+
+int
+cifs_parse_security_flavors(char *value, struct smb3_fs_context *ctx)
 {
 
 	substring_t args[MAX_OPT_ARGS];
@@ -121,44 +190,44 @@ int cifs_parse_security_flavors(char *value, struct smb_vol *vol)
 	 * With mount options, the last one should win. Reset any existing
 	 * settings back to default.
 	 */
-	vol->sectype = Unspecified;
-	vol->sign = false;
+	ctx->sectype = Unspecified;
+	ctx->sign = false;
 
 	switch (match_token(value, cifs_secflavor_tokens, args)) {
 	case Opt_sec_krb5p:
 		cifs_dbg(VFS, "sec=krb5p is not supported!\n");
 		return 1;
 	case Opt_sec_krb5i:
-		vol->sign = true;
+		ctx->sign = true;
 		fallthrough;
 	case Opt_sec_krb5:
-		vol->sectype = Kerberos;
+		ctx->sectype = Kerberos;
 		break;
 	case Opt_sec_ntlmsspi:
-		vol->sign = true;
+		ctx->sign = true;
 		fallthrough;
 	case Opt_sec_ntlmssp:
-		vol->sectype = RawNTLMSSP;
+		ctx->sectype = RawNTLMSSP;
 		break;
 	case Opt_sec_ntlmi:
-		vol->sign = true;
+		ctx->sign = true;
 		fallthrough;
 	case Opt_ntlm:
-		vol->sectype = NTLM;
+		ctx->sectype = NTLM;
 		break;
 	case Opt_sec_ntlmv2i:
-		vol->sign = true;
+		ctx->sign = true;
 		fallthrough;
 	case Opt_sec_ntlmv2:
-		vol->sectype = NTLMv2;
+		ctx->sectype = NTLMv2;
 		break;
 #ifdef CONFIG_CIFS_WEAK_PW_HASH
 	case Opt_sec_lanman:
-		vol->sectype = LANMAN;
+		ctx->sectype = LANMAN;
 		break;
 #endif
 	case Opt_sec_none:
-		vol->nullauth = 1;
+		ctx->nullauth = 1;
 		break;
 	default:
 		cifs_dbg(VFS, "bad security option: %s\n", value);
@@ -178,40 +247,40 @@ static const match_table_t cifs_cacheflavor_tokens = {
 };
 
 int
-cifs_parse_cache_flavor(char *value, struct smb_vol *vol)
+cifs_parse_cache_flavor(char *value, struct smb3_fs_context *ctx)
 {
 	substring_t args[MAX_OPT_ARGS];
 
 	switch (match_token(value, cifs_cacheflavor_tokens, args)) {
 	case Opt_cache_loose:
-		vol->direct_io = false;
-		vol->strict_io = false;
-		vol->cache_ro = false;
-		vol->cache_rw = false;
+		ctx->direct_io = false;
+		ctx->strict_io = false;
+		ctx->cache_ro = false;
+		ctx->cache_rw = false;
 		break;
 	case Opt_cache_strict:
-		vol->direct_io = false;
-		vol->strict_io = true;
-		vol->cache_ro = false;
-		vol->cache_rw = false;
+		ctx->direct_io = false;
+		ctx->strict_io = true;
+		ctx->cache_ro = false;
+		ctx->cache_rw = false;
 		break;
 	case Opt_cache_none:
-		vol->direct_io = true;
-		vol->strict_io = false;
-		vol->cache_ro = false;
-		vol->cache_rw = false;
+		ctx->direct_io = true;
+		ctx->strict_io = false;
+		ctx->cache_ro = false;
+		ctx->cache_rw = false;
 		break;
 	case Opt_cache_ro:
-		vol->direct_io = false;
-		vol->strict_io = false;
-		vol->cache_ro = true;
-		vol->cache_rw = false;
+		ctx->direct_io = false;
+		ctx->strict_io = false;
+		ctx->cache_ro = true;
+		ctx->cache_rw = false;
 		break;
 	case Opt_cache_rw:
-		vol->direct_io = false;
-		vol->strict_io = false;
-		vol->cache_ro = false;
-		vol->cache_rw = true;
+		ctx->direct_io = false;
+		ctx->strict_io = false;
+		ctx->cache_ro = false;
+		ctx->cache_rw = true;
 		break;
 	default:
 		cifs_dbg(VFS, "bad cache= option: %s\n", value);
@@ -219,3 +288,1253 @@ cifs_parse_cache_flavor(char *value, struct smb_vol *vol)
 	}
 	return 0;
 }
+
+#define DUP_CTX_STR(field)						\
+do {									\
+	if (ctx->field) {						\
+		new_ctx->field = kstrdup(ctx->field, GFP_ATOMIC);	\
+		if (new_ctx->field == NULL) {				\
+			smb3_cleanup_fs_context_contents(new_ctx);	\
+			return -ENOMEM;					\
+		}							\
+	}								\
+} while (0)
+
+int
+smb3_fs_context_dup(struct smb3_fs_context *new_ctx, struct smb3_fs_context *ctx)
+{
+	int rc = 0;
+
+	memcpy(new_ctx, ctx, sizeof(*ctx));
+	new_ctx->prepath = NULL;
+	new_ctx->mount_options = NULL;
+	new_ctx->nodename = NULL;
+	new_ctx->username = NULL;
+	new_ctx->password = NULL;
+	new_ctx->domainname = NULL;
+	new_ctx->UNC = NULL;
+	new_ctx->iocharset = NULL;
+
+	/*
+	 * Make sure to stay in sync with smb3_cleanup_fs_context_contents()
+	 */
+	DUP_CTX_STR(prepath);
+	DUP_CTX_STR(mount_options);
+	DUP_CTX_STR(username);
+	DUP_CTX_STR(password);
+	DUP_CTX_STR(UNC);
+	DUP_CTX_STR(domainname);
+	DUP_CTX_STR(nodename);
+	DUP_CTX_STR(iocharset);
+
+	return rc;
+}
+
+static int
+cifs_parse_smb_version(char *value, struct smb3_fs_context *ctx, bool is_smb3)
+{
+	substring_t args[MAX_OPT_ARGS];
+
+	switch (match_token(value, cifs_smb_version_tokens, args)) {
+#ifdef CONFIG_CIFS_ALLOW_INSECURE_LEGACY
+	case Smb_1:
+		if (disable_legacy_dialects) {
+			cifs_dbg(VFS, "mount with legacy dialect disabled\n");
+			return 1;
+		}
+		if (is_smb3) {
+			cifs_dbg(VFS, "vers=1.0 (cifs) not permitted when mounting with smb3\n");
+			return 1;
+		}
+		cifs_dbg(VFS, "Use of the less secure dialect vers=1.0 is not recommended unless required for access to very old servers\n");
+		ctx->ops = &smb1_operations;
+		ctx->vals = &smb1_values;
+		break;
+	case Smb_20:
+		if (disable_legacy_dialects) {
+			cifs_dbg(VFS, "mount with legacy dialect disabled\n");
+			return 1;
+		}
+		if (is_smb3) {
+			cifs_dbg(VFS, "vers=2.0 not permitted when mounting with smb3\n");
+			return 1;
+		}
+		ctx->ops = &smb20_operations;
+		ctx->vals = &smb20_values;
+		break;
+#else
+	case Smb_1:
+		cifs_dbg(VFS, "vers=1.0 (cifs) mount not permitted when legacy dialects disabled\n");
+		return 1;
+	case Smb_20:
+		cifs_dbg(VFS, "vers=2.0 mount not permitted when legacy dialects disabled\n");
+		return 1;
+#endif /* CIFS_ALLOW_INSECURE_LEGACY */
+	case Smb_21:
+		ctx->ops = &smb21_operations;
+		ctx->vals = &smb21_values;
+		break;
+	case Smb_30:
+		ctx->ops = &smb30_operations;
+		ctx->vals = &smb30_values;
+		break;
+	case Smb_302:
+		ctx->ops = &smb30_operations; /* currently identical with 3.0 */
+		ctx->vals = &smb302_values;
+		break;
+	case Smb_311:
+		ctx->ops = &smb311_operations;
+		ctx->vals = &smb311_values;
+		break;
+	case Smb_3any:
+		ctx->ops = &smb30_operations; /* currently identical with 3.0 */
+		ctx->vals = &smb3any_values;
+		break;
+	case Smb_default:
+		ctx->ops = &smb30_operations; /* currently identical with 3.0 */
+		ctx->vals = &smbdefault_values;
+		break;
+	default:
+		cifs_dbg(VFS, "Unknown vers= option specified: %s\n", value);
+		return 1;
+	}
+	return 0;
+}
+
+/*
+ * Parse a devname into substrings and populate the ctx->UNC and ctx->prepath
+ * fields with the result. Returns 0 on success and an error otherwise
+ * (e.g. ENOMEM or EINVAL)
+ */
+int
+smb3_parse_devname(const char *devname, struct smb3_fs_context *ctx)
+{
+	char *pos;
+	const char *delims = "/\\";
+	size_t len;
+
+	if (unlikely(!devname || !*devname)) {
+		cifs_dbg(VFS, "Device name not specified\n");
+		return -EINVAL;
+	}
+
+	/* make sure we have a valid UNC double delimiter prefix */
+	len = strspn(devname, delims);
+	if (len != 2)
+		return -EINVAL;
+
+	/* find delimiter between host and sharename */
+	pos = strpbrk(devname + 2, delims);
+	if (!pos)
+		return -EINVAL;
+
+	/* skip past delimiter */
+	++pos;
+
+	/* now go until next delimiter or end of string */
+	len = strcspn(pos, delims);
+
+	/* move "pos" up to delimiter or NULL */
+	pos += len;
+	ctx->UNC = kstrndup(devname, pos - devname, GFP_KERNEL);
+	if (!ctx->UNC)
+		return -ENOMEM;
+
+	convert_delimiter(ctx->UNC, '\\');
+
+	/* skip any delimiter */
+	if (*pos == '/' || *pos == '\\')
+		pos++;
+
+	/* If pos is NULL then no prepath */
+	if (!*pos)
+		return 0;
+
+	ctx->prepath = kstrdup(pos, GFP_KERNEL);
+	if (!ctx->prepath)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void smb3_fs_context_free(struct fs_context *fc);
+static int smb3_fs_context_parse_param(struct fs_context *fc,
+				       struct fs_parameter *param);
+static int smb3_fs_context_parse_monolithic(struct fs_context *fc,
+					    void *data);
+static int smb3_get_tree(struct fs_context *fc);
+static int smb3_reconfigure(struct fs_context *fc);
+
+static const struct fs_context_operations smb3_fs_context_ops = {
+	.free			= smb3_fs_context_free,
+	.parse_param		= smb3_fs_context_parse_param,
+	.parse_monolithic	= smb3_fs_context_parse_monolithic,
+	.get_tree		= smb3_get_tree,
+	.reconfigure		= smb3_reconfigure,
+};
+
+/*
+ * Parse a monolithic block of data from sys_mount().
+ * smb3_fs_context_parse_monolithic - Parse key[=val][,key[=val]]* mount data
+ * @ctx: The superblock configuration to fill in.
+ * @data: The data to parse
+ *
+ * Parse a blob of data that's in key[=val][,key[=val]]* form.  This can be
+ * called from the ->monolithic_mount_data() fs_context operation.
+ *
+ * Returns 0 on success or the error returned by the ->parse_option() fs_context
+ * operation on failure.
+ */
+static int smb3_fs_context_parse_monolithic(struct fs_context *fc,
+					   void *data)
+{
+	struct smb3_fs_context *ctx = smb3_fc2context(fc);
+	char *options = data, *key;
+	int ret = 0;
+
+	if (!options)
+		return 0;
+
+	ctx->mount_options = kstrdup(data, GFP_KERNEL);
+	if (ctx->mount_options == NULL)
+		return -ENOMEM;
+
+	ret = security_sb_eat_lsm_opts(options, &fc->security);
+	if (ret)
+		return ret;
+
+	/* BB Need to add support for sep= here TBD */
+	while ((key = strsep(&options, ",")) != NULL) {
+		if (*key) {
+			size_t v_len = 0;
+			char *value = strchr(key, '=');
+
+			if (value) {
+				if (value == key)
+					continue;
+				*value++ = 0;
+				v_len = strlen(value);
+			}
+			ret = vfs_parse_fs_string(fc, key, value, v_len);
+			if (ret < 0)
+				break;
+		}
+	}
+
+	return ret;
+}
+
+/*
+ * Validate the preparsed information in the config.
+ */
+static int smb3_fs_context_validate(struct fs_context *fc)
+{
+	struct smb3_fs_context *ctx = smb3_fc2context(fc);
+
+	if (ctx->rdma && ctx->vals->protocol_id < SMB30_PROT_ID) {
+		cifs_dbg(VFS, "SMB Direct requires Version >=3.0\n");
+		return -1;
+	}
+
+#ifndef CONFIG_KEYS
+	/* Muliuser mounts require CONFIG_KEYS support */
+	if (ctx->multiuser) {
+		cifs_dbg(VFS, "Multiuser mounts require kernels with CONFIG_KEYS enabled\n");
+		return -1;
+	}
+#endif
+
+	if (ctx->got_version == false)
+		pr_warn_once("No dialect specified on mount. Default has changed to a more secure dialect, SMB2.1 or later (e.g. SMB3.1.1), from CIFS (SMB1). To use the less secure SMB1 dialect to access old servers which do not support SMB3.1.1 (or even SMB3 or SMB2.1) specify vers=1.0 on mount.\n");
+
+
+	if (!ctx->UNC) {
+		cifs_dbg(VFS, "CIFS mount error: No usable UNC path provided in device string!\n");
+		return -1;
+	}
+
+	/* make sure UNC has a share name */
+	if (strlen(ctx->UNC) < 3 || !strchr(ctx->UNC + 3, '\\')) {
+		cifs_dbg(VFS, "Malformed UNC. Unable to find share name.\n");
+		return -1;
+	}
+
+	if (!ctx->got_ip) {
+		int len;
+		const char *slash;
+
+		/* No ip= option specified? Try to get it from UNC */
+		/* Use the address part of the UNC. */
+		slash = strchr(&ctx->UNC[2], '\\');
+		len = slash - &ctx->UNC[2];
+		if (!cifs_convert_address((struct sockaddr *)&ctx->dstaddr,
+					  &ctx->UNC[2], len)) {
+			pr_err("Unable to determine destination address\n");
+			return -1;
+		}
+	}
+
+	/* set the port that we got earlier */
+	cifs_set_port((struct sockaddr *)&ctx->dstaddr, ctx->port);
+
+	if (ctx->override_uid && !ctx->uid_specified) {
+		ctx->override_uid = 0;
+		pr_notice("ignoring forceuid mount option specified with no uid= option\n");
+	}
+
+	if (ctx->override_gid && !ctx->gid_specified) {
+		ctx->override_gid = 0;
+		pr_notice("ignoring forcegid mount option specified with no gid= option\n");
+	}
+
+	return 0;
+}
+
+static int smb3_get_tree_common(struct fs_context *fc)
+{
+	struct smb3_fs_context *ctx = smb3_fc2context(fc);
+	struct dentry *root;
+	int rc = 0;
+
+	root = cifs_smb3_do_mount(fc->fs_type, 0, ctx);
+	if (IS_ERR(root))
+		return PTR_ERR(root);
+
+	fc->root = root;
+
+	return rc;
+}
+
+/*
+ * Create an SMB3 superblock from the parameters passed.
+ */
+static int smb3_get_tree(struct fs_context *fc)
+{
+	int err = smb3_fs_context_validate(fc);
+
+	if (err)
+		return err;
+	return smb3_get_tree_common(fc);
+}
+
+static void smb3_fs_context_free(struct fs_context *fc)
+{
+	struct smb3_fs_context *ctx = smb3_fc2context(fc);
+
+	smb3_cleanup_fs_context(ctx);
+}
+
+/*
+ * Compare the old and new proposed context during reconfigure
+ * and check if the changes are compatible.
+ */
+static int smb3_verify_reconfigure_ctx(struct smb3_fs_context *new_ctx,
+				       struct smb3_fs_context *old_ctx)
+{
+	if (new_ctx->posix_paths != old_ctx->posix_paths) {
+		cifs_dbg(VFS, "can not change posixpaths during remount\n");
+		return -EINVAL;
+	}
+	if (new_ctx->sectype != old_ctx->sectype) {
+		cifs_dbg(VFS, "can not change sec during remount\n");
+		return -EINVAL;
+	}
+	if (new_ctx->multiuser != old_ctx->multiuser) {
+		cifs_dbg(VFS, "can not change multiuser during remount\n");
+		return -EINVAL;
+	}
+	if (new_ctx->UNC &&
+	    (!old_ctx->UNC || strcmp(new_ctx->UNC, old_ctx->UNC))) {
+		cifs_dbg(VFS, "can not change UNC during remount\n");
+		return -EINVAL;
+	}
+	if (new_ctx->username &&
+	    (!old_ctx->username || strcmp(new_ctx->username, old_ctx->username))) {
+		cifs_dbg(VFS, "can not change username during remount\n");
+		return -EINVAL;
+	}
+	if (new_ctx->password &&
+	    (!old_ctx->password || strcmp(new_ctx->password, old_ctx->password))) {
+		cifs_dbg(VFS, "can not change password during remount\n");
+		return -EINVAL;
+	}
+	if (new_ctx->domainname &&
+	    (!old_ctx->domainname || strcmp(new_ctx->domainname, old_ctx->domainname))) {
+		cifs_dbg(VFS, "can not change domainname during remount\n");
+		return -EINVAL;
+	}
+	if (new_ctx->nodename &&
+	    (!old_ctx->nodename || strcmp(new_ctx->nodename, old_ctx->nodename))) {
+		cifs_dbg(VFS, "can not change nodename during remount\n");
+		return -EINVAL;
+	}
+	if (new_ctx->iocharset &&
+	    (!old_ctx->iocharset || strcmp(new_ctx->iocharset, old_ctx->iocharset))) {
+		cifs_dbg(VFS, "can not change iocharset during remount\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+#define STEAL_STRING(cifs_sb, ctx, field)				\
+do {									\
+	kfree(ctx->field);						\
+	ctx->field = cifs_sb->ctx->field;				\
+	cifs_sb->ctx->field = NULL;					\
+} while (0)
+
+static int smb3_reconfigure(struct fs_context *fc)
+{
+	struct smb3_fs_context *ctx = smb3_fc2context(fc);
+	struct dentry *root = fc->root;
+	struct cifs_sb_info *cifs_sb = CIFS_SB(root->d_sb);
+	int rc;
+
+	rc = smb3_verify_reconfigure_ctx(ctx, cifs_sb->ctx);
+	if (rc)
+		return rc;
+
+	/*
+	 * We can not change UNC/username/password/domainname/nodename/iocharset
+	 * during reconnect so ignore what we have in the new context and
+	 * just use what we already have in cifs_sb->ctx.
+	 */
+	STEAL_STRING(cifs_sb, ctx, UNC);
+	STEAL_STRING(cifs_sb, ctx, username);
+	STEAL_STRING(cifs_sb, ctx, password);
+	STEAL_STRING(cifs_sb, ctx, domainname);
+	STEAL_STRING(cifs_sb, ctx, nodename);
+	STEAL_STRING(cifs_sb, ctx, iocharset);
+
+	/* if rsize or wsize not passed in on remount, use previous values */
+	if (ctx->rsize == 0)
+		ctx->rsize = cifs_sb->ctx->rsize;
+	if (ctx->wsize == 0)
+		ctx->wsize = cifs_sb->ctx->wsize;
+
+
+	smb3_cleanup_fs_context_contents(cifs_sb->ctx);
+	rc = smb3_fs_context_dup(cifs_sb->ctx, ctx);
+	smb3_update_mnt_flags(cifs_sb);
+
+	return rc;
+}
+
+static int smb3_fs_context_parse_param(struct fs_context *fc,
+				      struct fs_parameter *param)
+{
+	struct fs_parse_result result;
+	struct smb3_fs_context *ctx = smb3_fc2context(fc);
+	int i, opt;
+	bool is_smb3 = !strcmp(fc->fs_type->name, "smb3");
+	bool skip_parsing = false;
+
+	cifs_dbg(FYI, "CIFS: parsing cifs mount option '%s'\n", param->key);
+
+	/*
+	 * fs_parse can not handle string options with an empty value so
+	 * we will need special handling of them.
+	 */
+	if (param->type == fs_value_is_string && param->string[0] == 0) {
+		if (!strcmp("pass", param->key) || !strcmp("password", param->key)) {
+			skip_parsing = true;
+			opt = Opt_pass;
+		} else if (!strcmp("user", param->key) || !strcmp("username", param->key)) {
+			skip_parsing = true;
+			opt = Opt_user;
+		}
+	}
+
+	if (!skip_parsing) {
+		opt = fs_parse(fc, smb3_fs_parameters, param, &result);
+		if (opt < 0)
+			return ctx->sloppy ? 1 : opt;
+	}
+
+	switch (opt) {
+	case Opt_compress:
+		ctx->compression = UNKNOWN_TYPE;
+		cifs_dbg(VFS,
+			"SMB3 compression support is experimental\n");
+		break;
+	case Opt_nodfs:
+		ctx->nodfs = 1;
+		break;
+	case Opt_hard:
+		if (result.negated)
+			ctx->retry = 0;
+		else
+			ctx->retry = 1;
+		break;
+	case Opt_soft:
+		if (result.negated)
+			ctx->retry = 1;
+		else
+			ctx->retry = 0;
+		break;
+	case Opt_mapposix:
+		if (result.negated)
+			ctx->remap = false;
+		else {
+			ctx->remap = true;
+			ctx->sfu_remap = false; /* disable SFU mapping */
+		}
+		break;
+	case Opt_user_xattr:
+		if (result.negated)
+			ctx->no_xattr = 1;
+		else
+			ctx->no_xattr = 0;
+		break;
+	case Opt_forceuid:
+		if (result.negated)
+			ctx->override_uid = 0;
+		else
+			ctx->override_uid = 1;
+		break;
+	case Opt_forcegid:
+		if (result.negated)
+			ctx->override_gid = 0;
+		else
+			ctx->override_gid = 1;
+		break;
+	case Opt_perm:
+		if (result.negated)
+			ctx->noperm = 1;
+		else
+			ctx->noperm = 0;
+		break;
+	case Opt_dynperm:
+		if (result.negated)
+			ctx->dynperm = 0;
+		else
+			ctx->dynperm = 1;
+		break;
+	case Opt_sfu:
+		if (result.negated)
+			ctx->sfu_emul = 0;
+		else
+			ctx->sfu_emul = 1;
+		break;
+	case Opt_noblocksend:
+		ctx->noblocksnd = 1;
+		break;
+	case Opt_noautotune:
+		ctx->noautotune = 1;
+		break;
+	case Opt_nolease:
+		ctx->no_lease = 1;
+		break;
+	case Opt_nodelete:
+		ctx->nodelete = 1;
+		break;
+	case Opt_multichannel:
+		if (result.negated) {
+			ctx->multichannel = false;
+			ctx->max_channels = 1;
+		} else {
+			ctx->multichannel = true;
+			/* if number of channels not specified, default to 2 */
+			if (ctx->max_channels < 2)
+				ctx->max_channels = 2;
+		}
+		break;
+	case Opt_uid:
+		ctx->linux_uid.val = result.uint_32;
+		ctx->uid_specified = true;
+		break;
+	case Opt_cruid:
+		ctx->cred_uid.val = result.uint_32;
+		break;
+	case Opt_backupgid:
+		ctx->backupgid.val = result.uint_32;
+		ctx->backupgid_specified = true;
+		break;
+	case Opt_gid:
+		ctx->linux_gid.val = result.uint_32;
+		ctx->gid_specified = true;
+		break;
+	case Opt_port:
+		ctx->port = result.uint_32;
+		break;
+	case Opt_file_mode:
+		ctx->file_mode = result.uint_32;
+		break;
+	case Opt_dirmode:
+		ctx->dir_mode = result.uint_32;
+		break;
+	case Opt_min_enc_offload:
+		ctx->min_offload = result.uint_32;
+		break;
+	case Opt_blocksize:
+		/*
+		 * inode blocksize realistically should never need to be
+		 * less than 16K or greater than 16M and default is 1MB.
+		 * Note that small inode block sizes (e.g. 64K) can lead
+		 * to very poor performance of common tools like cp and scp
+		 */
+		if ((result.uint_32 < CIFS_MAX_MSGSIZE) ||
+		   (result.uint_32 > (4 * SMB3_DEFAULT_IOSIZE))) {
+			cifs_dbg(VFS, "%s: Invalid blocksize\n",
+				__func__);
+			goto cifs_parse_mount_err;
+		}
+		ctx->bsize = result.uint_32;
+		ctx->got_bsize = true;
+		break;
+	case Opt_rsize:
+		ctx->rsize = result.uint_32;
+		ctx->got_rsize = true;
+		break;
+	case Opt_wsize:
+		ctx->wsize = result.uint_32;
+		ctx->got_wsize = true;
+		break;
+	case Opt_actimeo:
+		ctx->actimeo = HZ * result.uint_32;
+		if (ctx->actimeo > CIFS_MAX_ACTIMEO) {
+			cifs_dbg(VFS, "attribute cache timeout too large\n");
+			goto cifs_parse_mount_err;
+		}
+		break;
+	case Opt_echo_interval:
+		ctx->echo_interval = result.uint_32;
+		break;
+	case Opt_snapshot:
+		ctx->snapshot_time = result.uint_32;
+		break;
+	case Opt_max_credits:
+		if (result.uint_32 < 20 || result.uint_32 > 60000) {
+			cifs_dbg(VFS, "%s: Invalid max_credits value\n",
+				 __func__);
+			goto cifs_parse_mount_err;
+		}
+		ctx->max_credits = result.uint_32;
+		break;
+	case Opt_max_channels:
+		if (result.uint_32 < 1 || result.uint_32 > CIFS_MAX_CHANNELS) {
+			cifs_dbg(VFS, "%s: Invalid max_channels value, needs to be 1-%d\n",
+				 __func__, CIFS_MAX_CHANNELS);
+			goto cifs_parse_mount_err;
+		}
+		ctx->max_channels = result.uint_32;
+		break;
+	case Opt_handletimeout:
+		ctx->handle_timeout = result.uint_32;
+		if (ctx->handle_timeout > SMB3_MAX_HANDLE_TIMEOUT) {
+			cifs_dbg(VFS, "Invalid handle cache timeout, longer than 16 minutes\n");
+			goto cifs_parse_mount_err;
+		}
+		break;
+	case Opt_source:
+		kfree(ctx->UNC);
+		ctx->UNC = NULL;
+		switch (smb3_parse_devname(param->string, ctx)) {
+		case 0:
+			break;
+		case -ENOMEM:
+			cifs_dbg(VFS, "Unable to allocate memory for devname\n");
+			goto cifs_parse_mount_err;
+		case -EINVAL:
+			cifs_dbg(VFS, "Malformed UNC in devname\n");
+			goto cifs_parse_mount_err;
+		default:
+			cifs_dbg(VFS, "Unknown error parsing devname\n");
+			goto cifs_parse_mount_err;
+		}
+		fc->source = kstrdup(param->string, GFP_KERNEL);
+		if (fc->source == NULL) {
+			cifs_dbg(VFS, "OOM when copying UNC string\n");
+			goto cifs_parse_mount_err;
+		}
+		break;
+	case Opt_user:
+		kfree(ctx->username);
+		ctx->username = NULL;
+		if (strlen(param->string) == 0) {
+			/* null user, ie. anonymous authentication */
+			ctx->nullauth = 1;
+			break;
+		}
+
+		if (strnlen(param->string, CIFS_MAX_USERNAME_LEN) >
+		    CIFS_MAX_USERNAME_LEN) {
+			pr_warn("username too long\n");
+			goto cifs_parse_mount_err;
+		}
+		ctx->username = kstrdup(param->string, GFP_KERNEL);
+		if (ctx->username == NULL) {
+			cifs_dbg(VFS, "OOM when copying username string\n");
+			goto cifs_parse_mount_err;
+		}
+		break;
+	case Opt_pass:
+		kfree(ctx->password);
+		ctx->password = NULL;
+		if (strlen(param->string) == 0)
+			break;
+
+		ctx->password = kstrdup(param->string, GFP_KERNEL);
+		if (ctx->password == NULL) {
+			cifs_dbg(VFS, "OOM when copying password string\n");
+			goto cifs_parse_mount_err;
+		}
+		break;
+	case Opt_ip:
+		if (strlen(param->string) == 0) {
+			ctx->got_ip = false;
+			break;
+		}
+		if (!cifs_convert_address((struct sockaddr *)&ctx->dstaddr,
+					  param->string,
+					  strlen(param->string))) {
+			pr_err("bad ip= option (%s)\n", param->string);
+			goto cifs_parse_mount_err;
+		}
+		ctx->got_ip = true;
+		break;
+	case Opt_domain:
+		if (strnlen(param->string, CIFS_MAX_DOMAINNAME_LEN)
+				== CIFS_MAX_DOMAINNAME_LEN) {
+			pr_warn("domain name too long\n");
+			goto cifs_parse_mount_err;
+		}
+
+		kfree(ctx->domainname);
+		ctx->domainname = kstrdup(param->string, GFP_KERNEL);
+		if (ctx->domainname == NULL) {
+			cifs_dbg(VFS, "OOM when copying domainname string\n");
+			goto cifs_parse_mount_err;
+		}
+		cifs_dbg(FYI, "Domain name set\n");
+		break;
+	case Opt_srcaddr:
+		if (!cifs_convert_address(
+				(struct sockaddr *)&ctx->srcaddr,
+				param->string, strlen(param->string))) {
+			pr_warn("Could not parse srcaddr: %s\n",
+				param->string);
+			goto cifs_parse_mount_err;
+		}
+		break;
+	case Opt_iocharset:
+		if (strnlen(param->string, 1024) >= 65) {
+			pr_warn("iocharset name too long\n");
+			goto cifs_parse_mount_err;
+		}
+
+		if (strncasecmp(param->string, "default", 7) != 0) {
+			kfree(ctx->iocharset);
+			ctx->iocharset = kstrdup(param->string, GFP_KERNEL);
+			if (ctx->iocharset == NULL) {
+				cifs_dbg(VFS, "OOM when copying iocharset string\n");
+				goto cifs_parse_mount_err;
+			}
+		}
+		/* if iocharset not set then load_nls_default
+		 * is used by caller
+		 */
+		 cifs_dbg(FYI, "iocharset set to %s\n", ctx->iocharset);
+		break;
+	case Opt_netbiosname:
+		memset(ctx->source_rfc1001_name, 0x20,
+			RFC1001_NAME_LEN);
+		/*
+		 * FIXME: are there cases in which a comma can
+		 * be valid in workstation netbios name (and
+		 * need special handling)?
+		 */
+		for (i = 0; i < RFC1001_NAME_LEN; i++) {
+			/* don't ucase netbiosname for user */
+			if (param->string[i] == 0)
+				break;
+			ctx->source_rfc1001_name[i] = param->string[i];
+		}
+		/* The string has 16th byte zero still from
+		 * set at top of the function
+		 */
+		if (i == RFC1001_NAME_LEN && param->string[i] != 0)
+			pr_warn("netbiosname longer than 15 truncated\n");
+		break;
+	case Opt_servern:
+		/* last byte, type, is 0x20 for servr type */
+		memset(ctx->target_rfc1001_name, 0x20,
+			RFC1001_NAME_LEN_WITH_NULL);
+		/*
+		 * BB are there cases in which a comma can be valid in this
+		 * workstation netbios name (and need special handling)?
+		 */
+
+		/* user or mount helper must uppercase the netbios name */
+		for (i = 0; i < 15; i++) {
+			if (param->string[i] == 0)
+				break;
+			ctx->target_rfc1001_name[i] = param->string[i];
+		}
+
+		/* The string has 16th byte zero still from set at top of function */
+		if (i == RFC1001_NAME_LEN && param->string[i] != 0)
+			pr_warn("server netbiosname longer than 15 truncated\n");
+		break;
+	case Opt_ver:
+		/* version of mount userspace tools, not dialect */
+		/* If interface changes in mount.cifs bump to new ver */
+		if (strncasecmp(param->string, "1", 1) == 0) {
+			if (strlen(param->string) > 1) {
+				pr_warn("Bad mount helper ver=%s. Did you want SMB1 (CIFS) dialect and mean to type vers=1.0 instead?\n",
+					param->string);
+				goto cifs_parse_mount_err;
+			}
+			/* This is the default */
+			break;
+		}
+		/* For all other value, error */
+		pr_warn("Invalid mount helper version specified\n");
+		goto cifs_parse_mount_err;
+	case Opt_vers:
+		/* protocol version (dialect) */
+		if (cifs_parse_smb_version(param->string, ctx, is_smb3) != 0)
+			goto cifs_parse_mount_err;
+		ctx->got_version = true;
+		break;
+	case Opt_sec:
+		if (cifs_parse_security_flavors(param->string, ctx) != 0)
+			goto cifs_parse_mount_err;
+		break;
+	case Opt_cache:
+		if (cifs_parse_cache_flavor(param->string, ctx) != 0)
+			goto cifs_parse_mount_err;
+		break;
+	case Opt_witness:
+#ifndef CONFIG_CIFS_SWN_UPCALL
+		cifs_dbg(VFS, "Witness support needs CONFIG_CIFS_SWN_UPCALL config option\n");
+			goto cifs_parse_mount_err;
+#endif
+		ctx->witness = true;
+		pr_warn_once("Witness protocol support is experimental\n");
+		break;
+	case Opt_rootfs:
+#ifdef CONFIG_CIFS_ROOT
+		ctx->rootfs = true;
+#endif
+		break;
+	case Opt_posixpaths:
+		if (result.negated)
+			ctx->posix_paths = 0;
+		else
+			ctx->posix_paths = 1;
+		break;
+	case Opt_unix:
+		if (result.negated)
+			ctx->linux_ext = 0;
+		else
+			ctx->no_linux_ext = 1;
+		break;
+	case Opt_nocase:
+		ctx->nocase = 1;
+		break;
+	case Opt_brl:
+		if (result.negated) {
+			/*
+			 * turn off mandatory locking in mode
+			 * if remote locking is turned off since the
+			 * local vfs will do advisory
+			 */
+			if (ctx->file_mode ==
+				(S_IALLUGO & ~(S_ISUID | S_IXGRP)))
+				ctx->file_mode = S_IALLUGO;
+			ctx->nobrl =  1;
+		} else
+			ctx->nobrl =  0;
+		break;
+	case Opt_handlecache:
+		if (result.negated)
+			ctx->nohandlecache = 1;
+		else
+			ctx->nohandlecache = 0;
+		break;
+	case Opt_forcemandatorylock:
+		ctx->mand_lock = 1;
+		break;
+	case Opt_setuids:
+		ctx->setuids = result.negated;
+		break;
+	case Opt_intr:
+		ctx->intr = !result.negated;
+		break;
+	case Opt_setuidfromacl:
+		ctx->setuidfromacl = 1;
+		break;
+	case Opt_strictsync:
+		ctx->nostrictsync = result.negated;
+		break;
+	case Opt_serverino:
+		ctx->server_ino = !result.negated;
+		break;
+	case Opt_rwpidforward:
+		ctx->rwpidforward = 1;
+		break;
+	case Opt_modesid:
+		ctx->mode_ace = 1;
+		break;
+	case Opt_cifsacl:
+		ctx->cifs_acl = !result.negated;
+		break;
+	case Opt_acl:
+		ctx->no_psx_acl = result.negated;
+		break;
+	case Opt_locallease:
+		ctx->local_lease = 1;
+		break;
+	case Opt_sign:
+		ctx->sign = true;
+		break;
+	case Opt_ignore_signature:
+		ctx->sign = true;
+		ctx->ignore_signature = true;
+		break;
+	case Opt_seal:
+		/* we do not do the following in secFlags because seal
+		 * is a per tree connection (mount) not a per socket
+		 * or per-smb connection option in the protocol
+		 * vol->secFlg |= CIFSSEC_MUST_SEAL;
+		 */
+		ctx->seal = 1;
+		break;
+	case Opt_noac:
+		pr_warn("Mount option noac not supported. Instead set /proc/fs/cifs/LookupCacheEnabled to 0\n");
+		break;
+	case Opt_fsc:
+#ifndef CONFIG_CIFS_FSCACHE
+		cifs_dbg(VFS, "FS-Cache support needs CONFIG_CIFS_FSCACHE kernel config option set\n");
+		goto cifs_parse_mount_err;
+#endif
+		ctx->fsc = true;
+		break;
+	case Opt_mfsymlinks:
+		ctx->mfsymlinks = true;
+		break;
+	case Opt_multiuser:
+		ctx->multiuser = true;
+		break;
+	case Opt_sloppy:
+		ctx->sloppy = true;
+		break;
+	case Opt_nosharesock:
+		ctx->nosharesock = true;
+		break;
+	case Opt_persistent:
+		if (result.negated) {
+			ctx->nopersistent = true;
+			if (ctx->persistent) {
+				cifs_dbg(VFS,
+				  "persistenthandles mount options conflict\n");
+				goto cifs_parse_mount_err;
+			}
+		} else {
+			ctx->persistent = true;
+			if ((ctx->nopersistent) || (ctx->resilient)) {
+				cifs_dbg(VFS,
+				  "persistenthandles mount options conflict\n");
+				goto cifs_parse_mount_err;
+			}
+		}
+		break;
+	case Opt_resilient:
+		if (result.negated) {
+			ctx->resilient = false; /* already the default */
+		} else {
+			ctx->resilient = true;
+			if (ctx->persistent) {
+				cifs_dbg(VFS,
+				  "persistenthandles mount options conflict\n");
+				goto cifs_parse_mount_err;
+			}
+		}
+		break;
+	case Opt_domainauto:
+		ctx->domainauto = true;
+		break;
+	case Opt_rdma:
+		ctx->rdma = true;
+		break;
+	}
+	/* case Opt_ignore: - is ignored as expected ... */
+
+	return 0;
+
+ cifs_parse_mount_err:
+	return 1;
+}
+
+int smb3_init_fs_context(struct fs_context *fc)
+{
+	struct smb3_fs_context *ctx;
+	char *nodename = utsname()->nodename;
+	int i;
+
+	ctx = kzalloc(sizeof(struct smb3_fs_context), GFP_KERNEL);
+	if (unlikely(!ctx))
+		return -ENOMEM;
+
+	/*
+	 * does not have to be perfect mapping since field is
+	 * informational, only used for servers that do not support
+	 * port 445 and it can be overridden at mount time
+	 */
+	memset(ctx->source_rfc1001_name, 0x20, RFC1001_NAME_LEN);
+	for (i = 0; i < strnlen(nodename, RFC1001_NAME_LEN); i++)
+		ctx->source_rfc1001_name[i] = toupper(nodename[i]);
+
+	ctx->source_rfc1001_name[RFC1001_NAME_LEN] = 0;
+	/*
+	 * null target name indicates to use *SMBSERVR default called name
+	 *  if we end up sending RFC1001 session initialize
+	 */
+	ctx->target_rfc1001_name[0] = 0;
+	ctx->cred_uid = current_uid();
+	ctx->linux_uid = current_uid();
+	ctx->linux_gid = current_gid();
+	ctx->bsize = 1024 * 1024; /* can improve cp performance significantly */
+
+	/*
+	 * default to SFM style remapping of seven reserved characters
+	 * unless user overrides it or we negotiate CIFS POSIX where
+	 * it is unnecessary.  Can not simultaneously use more than one mapping
+	 * since then readdir could list files that open could not open
+	 */
+	ctx->remap = true;
+
+	/* default to only allowing write access to owner of the mount */
+	ctx->dir_mode = ctx->file_mode = S_IRUGO | S_IXUGO | S_IWUSR;
+
+	/* ctx->retry default is 0 (i.e. "soft" limited retry not hard retry) */
+	/* default is always to request posix paths. */
+	ctx->posix_paths = 1;
+	/* default to using server inode numbers where available */
+	ctx->server_ino = 1;
+
+	/* default is to use strict cifs caching semantics */
+	ctx->strict_io = true;
+
+	ctx->actimeo = CIFS_DEF_ACTIMEO;
+
+	/* Most clients set timeout to 0, allows server to use its default */
+	ctx->handle_timeout = 0; /* See MS-SMB2 spec section 2.2.14.2.12 */
+
+	/* offer SMB2.1 and later (SMB3 etc). Secure and widely accepted */
+	ctx->ops = &smb30_operations;
+	ctx->vals = &smbdefault_values;
+
+	ctx->echo_interval = SMB_ECHO_INTERVAL_DEFAULT;
+
+	/* default to no multichannel (single server connection) */
+	ctx->multichannel = false;
+	ctx->max_channels = 1;
+
+	ctx->backupuid_specified = false; /* no backup intent for a user */
+	ctx->backupgid_specified = false; /* no backup intent for a group */
+
+/*
+ *	short int override_uid = -1;
+ *	short int override_gid = -1;
+ *	char *nodename = strdup(utsname()->nodename);
+ *	struct sockaddr *dstaddr = (struct sockaddr *)&vol->dstaddr;
+ */
+
+	fc->fs_private = ctx;
+	fc->ops = &smb3_fs_context_ops;
+	return 0;
+}
+
+void
+smb3_cleanup_fs_context_contents(struct smb3_fs_context *ctx)
+{
+	if (ctx == NULL)
+		return;
+
+	/*
+	 * Make sure this stays in sync with smb3_fs_context_dup()
+	 */
+	kfree(ctx->mount_options);
+	ctx->mount_options = NULL;
+	kfree(ctx->username);
+	ctx->username = NULL;
+	kfree_sensitive(ctx->password);
+	ctx->password = NULL;
+	kfree(ctx->UNC);
+	ctx->UNC = NULL;
+	kfree(ctx->domainname);
+	ctx->domainname = NULL;
+	kfree(ctx->nodename);
+	ctx->nodename = NULL;
+	kfree(ctx->iocharset);
+	ctx->iocharset = NULL;
+	kfree(ctx->prepath);
+	ctx->prepath = NULL;
+}
+
+void
+smb3_cleanup_fs_context(struct smb3_fs_context *ctx)
+{
+	if (!ctx)
+		return;
+	smb3_cleanup_fs_context_contents(ctx);
+	kfree(ctx);
+}
+
+void smb3_update_mnt_flags(struct cifs_sb_info *cifs_sb)
+{
+	struct smb3_fs_context *ctx = cifs_sb->ctx;
+
+	if (ctx->nodfs)
+		cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_DFS;
+	else
+		cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_NO_DFS;
+
+	if (ctx->noperm)
+		cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_PERM;
+	else
+		cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_NO_PERM;
+
+	if (ctx->setuids)
+		cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_SET_UID;
+	else
+		cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_SET_UID;
+
+	if (ctx->setuidfromacl)
+		cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_UID_FROM_ACL;
+	else
+		cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_UID_FROM_ACL;
+
+	if (ctx->server_ino)
+		cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_SERVER_INUM;
+	else
+		cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_SERVER_INUM;
+
+	if (ctx->remap)
+		cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_MAP_SFM_CHR;
+	else
+		cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_MAP_SFM_CHR;
+
+	if (ctx->sfu_remap)
+		cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_MAP_SPECIAL_CHR;
+	else
+		cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_MAP_SPECIAL_CHR;
+
+	if (ctx->no_xattr)
+		cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_XATTR;
+	else
+		cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_NO_XATTR;
+
+	if (ctx->sfu_emul)
+		cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_UNX_EMUL;
+	else
+		cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_UNX_EMUL;
+
+	if (ctx->nobrl)
+		cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_BRL;
+	else
+		cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_NO_BRL;
+
+	if (ctx->nohandlecache)
+		cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NO_HANDLE_CACHE;
+	else
+		cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_NO_HANDLE_CACHE;
+
+	if (ctx->nostrictsync)
+		cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NOSSYNC;
+	else
+		cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_NOSSYNC;
+
+	if (ctx->mand_lock)
+		cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_NOPOSIXBRL;
+	else
+		cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_NOPOSIXBRL;
+
+	if (ctx->rwpidforward)
+		cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_RWPIDFORWARD;
+	else
+		cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_RWPIDFORWARD;
+
+	if (ctx->mode_ace)
+		cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_MODE_FROM_SID;
+	else
+		cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_MODE_FROM_SID;
+
+	if (ctx->cifs_acl)
+		cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_CIFS_ACL;
+	else
+		cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_CIFS_ACL;
+
+	if (ctx->backupuid_specified)
+		cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_CIFS_BACKUPUID;
+	else
+		cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_CIFS_BACKUPUID;
+
+	if (ctx->backupgid_specified)
+		cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_CIFS_BACKUPGID;
+	else
+		cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_CIFS_BACKUPGID;
+
+	if (ctx->override_uid)
+		cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_OVERR_UID;
+	else
+		cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_OVERR_UID;
+
+	if (ctx->override_gid)
+		cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_OVERR_GID;
+	else
+		cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_OVERR_GID;
+
+	if (ctx->dynperm)
+		cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DYNPERM;
+	else
+		cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_DYNPERM;
+
+	if (ctx->fsc)
+		cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_FSCACHE;
+	else
+		cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_FSCACHE;
+
+	if (ctx->multiuser)
+		cifs_sb->mnt_cifs_flags |= (CIFS_MOUNT_MULTIUSER |
+					    CIFS_MOUNT_NO_PERM);
+	else
+		cifs_sb->mnt_cifs_flags &= ~(CIFS_MOUNT_MULTIUSER |
+					     CIFS_MOUNT_NO_PERM);
+
+	if (ctx->strict_io)
+		cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_STRICT_IO;
+	else
+		cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_STRICT_IO;
+
+	if (ctx->direct_io)
+		cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_DIRECT_IO;
+	else
+		cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_DIRECT_IO;
+
+	if (ctx->mfsymlinks)
+		cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_MF_SYMLINKS;
+	else
+		cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_MF_SYMLINKS;
+	if (ctx->mfsymlinks) {
+		if (ctx->sfu_emul) {
+			/*
+			 * Our SFU ("Services for Unix" emulation does not allow
+			 * creating symlinks but does allow reading existing SFU
+			 * symlinks (it does allow both creating and reading SFU
+			 * style mknod and FIFOs though). When "mfsymlinks" and
+			 * "sfu" are both enabled at the same time, it allows
+			 * reading both types of symlinks, but will only create
+			 * them with mfsymlinks format. This allows better
+			 * Apple compatibility (probably better for Samba too)
+			 * while still recognizing old Windows style symlinks.
+			 */
+			cifs_dbg(VFS, "mount options mfsymlinks and sfu both enabled\n");
+		}
+	}
+
+	return;
+}
diff --git a/fs/cifs/fs_context.h b/fs/cifs/fs_context.h
index 886208a1b0ef..3358b33abcd0 100644
--- a/fs/cifs/fs_context.h
+++ b/fs/cifs/fs_context.h
@@ -9,8 +9,11 @@
 #ifndef _FS_CONTEXT_H
 #define _FS_CONTEXT_H
 
-#include <linux/parser.h>
 #include "cifsglob.h"
+#include <linux/parser.h>
+#include <linux/fs_parser.h>
+
+#define cifs_invalf(fc, fmt, ...) invalf(fc, fmt, ## __VA_ARGS__)
 
 enum smb_version {
 	Smb_1 = 1,
@@ -24,8 +27,6 @@ enum smb_version {
 	Smb_version_err
 };
 
-int cifs_parse_smb_version(char *value, struct smb_vol *vol, bool is_smb3);
-
 enum {
 	Opt_cache_loose,
 	Opt_cache_strict,
@@ -35,8 +36,6 @@ enum {
 	Opt_cache_err
 };
 
-int cifs_parse_cache_flavor(char *value, struct smb_vol *vol);
-
 enum cifs_sec_param {
 	Opt_sec_krb5,
 	Opt_sec_krb5i,
@@ -53,6 +52,220 @@ enum cifs_sec_param {
 	Opt_sec_err
 };
 
-int cifs_parse_security_flavors(char *value, struct smb_vol *vol);
+enum cifs_param {
+	/* Mount options that take no arguments */
+	Opt_user_xattr,
+	Opt_forceuid,
+	Opt_forcegid,
+	Opt_noblocksend,
+	Opt_noautotune,
+	Opt_nolease,
+	Opt_hard,
+	Opt_soft,
+	Opt_perm,
+	Opt_nodelete,
+	Opt_mapposix,
+	Opt_mapchars,
+	Opt_nomapchars,
+	Opt_sfu,
+	Opt_nodfs,
+	Opt_posixpaths,
+	Opt_unix,
+	Opt_nocase,
+	Opt_brl,
+	Opt_handlecache,
+	Opt_forcemandatorylock,
+	Opt_setuidfromacl,
+	Opt_setuids,
+	Opt_dynperm,
+	Opt_intr,
+	Opt_strictsync,
+	Opt_serverino,
+	Opt_rwpidforward,
+	Opt_cifsacl,
+	Opt_acl,
+	Opt_locallease,
+	Opt_sign,
+	Opt_ignore_signature,
+	Opt_seal,
+	Opt_noac,
+	Opt_fsc,
+	Opt_mfsymlinks,
+	Opt_multiuser,
+	Opt_sloppy,
+	Opt_nosharesock,
+	Opt_persistent,
+	Opt_resilient,
+	Opt_domainauto,
+	Opt_rdma,
+	Opt_modesid,
+	Opt_rootfs,
+	Opt_multichannel,
+	Opt_compress,
+	Opt_witness,
+
+	/* Mount options which take numeric value */
+	Opt_backupuid,
+	Opt_backupgid,
+	Opt_uid,
+	Opt_cruid,
+	Opt_gid,
+	Opt_port,
+	Opt_file_mode,
+	Opt_dirmode,
+	Opt_min_enc_offload,
+	Opt_blocksize,
+	Opt_rsize,
+	Opt_wsize,
+	Opt_actimeo,
+	Opt_echo_interval,
+	Opt_max_credits,
+	Opt_snapshot,
+	Opt_max_channels,
+	Opt_handletimeout,
+
+	/* Mount options which take string value */
+	Opt_source,
+	Opt_user,
+	Opt_pass,
+	Opt_ip,
+	Opt_domain,
+	Opt_srcaddr,
+	Opt_iocharset,
+	Opt_netbiosname,
+	Opt_servern,
+	Opt_ver,
+	Opt_vers,
+	Opt_sec,
+	Opt_cache,
+
+	/* Mount options to be ignored */
+	Opt_ignore,
+
+	Opt_err
+};
+
+struct smb3_fs_context {
+	bool uid_specified;
+	bool gid_specified;
+	bool sloppy;
+	bool got_ip;
+	bool got_version;
+	bool got_rsize;
+	bool got_wsize;
+	bool got_bsize;
+	unsigned short port;
+
+	char *username;
+	char *password;
+	char *domainname;
+	char *UNC;
+	char *nodename;
+	char *iocharset;  /* local code page for mapping to and from Unicode */
+	char source_rfc1001_name[RFC1001_NAME_LEN_WITH_NULL]; /* clnt nb name */
+	char target_rfc1001_name[RFC1001_NAME_LEN_WITH_NULL]; /* srvr nb name */
+	kuid_t cred_uid;
+	kuid_t linux_uid;
+	kgid_t linux_gid;
+	kuid_t backupuid;
+	kgid_t backupgid;
+	umode_t file_mode;
+	umode_t dir_mode;
+	enum securityEnum sectype; /* sectype requested via mnt opts */
+	bool sign; /* was signing requested via mnt opts? */
+	bool ignore_signature:1;
+	bool retry:1;
+	bool intr:1;
+	bool setuids:1;
+	bool setuidfromacl:1;
+	bool override_uid:1;
+	bool override_gid:1;
+	bool dynperm:1;
+	bool noperm:1;
+	bool nodelete:1;
+	bool mode_ace:1;
+	bool no_psx_acl:1; /* set if posix acl support should be disabled */
+	bool cifs_acl:1;
+	bool backupuid_specified; /* mount option  backupuid  is specified */
+	bool backupgid_specified; /* mount option  backupgid  is specified */
+	bool no_xattr:1;   /* set if xattr (EA) support should be disabled*/
+	bool server_ino:1; /* use inode numbers from server ie UniqueId */
+	bool direct_io:1;
+	bool strict_io:1; /* strict cache behavior */
+	bool cache_ro:1;
+	bool cache_rw:1;
+	bool remap:1;      /* set to remap seven reserved chars in filenames */
+	bool sfu_remap:1;  /* remap seven reserved chars ala SFU */
+	bool posix_paths:1; /* unset to not ask for posix pathnames. */
+	bool no_linux_ext:1;
+	bool linux_ext:1;
+	bool sfu_emul:1;
+	bool nullauth:1;   /* attempt to authenticate with null user */
+	bool nocase:1;     /* request case insensitive filenames */
+	bool nobrl:1;      /* disable sending byte range locks to srv */
+	bool nohandlecache:1; /* disable caching dir handles if srvr probs */
+	bool mand_lock:1;  /* send mandatory not posix byte range lock reqs */
+	bool seal:1;       /* request transport encryption on share */
+	bool nodfs:1;      /* Do not request DFS, even if available */
+	bool local_lease:1; /* check leases only on local system, not remote */
+	bool noblocksnd:1;
+	bool noautotune:1;
+	bool nostrictsync:1; /* do not force expensive SMBflush on every sync */
+	bool no_lease:1;     /* disable requesting leases */
+	bool fsc:1;	/* enable fscache */
+	bool mfsymlinks:1; /* use Minshall+French Symlinks */
+	bool multiuser:1;
+	bool rwpidforward:1; /* pid forward for read/write operations */
+	bool nosharesock:1;
+	bool persistent:1;
+	bool nopersistent:1;
+	bool resilient:1; /* noresilient not required since not fored for CA */
+	bool domainauto:1;
+	bool rdma:1;
+	bool multichannel:1;
+	bool use_client_guid:1;
+	/* reuse existing guid for multichannel */
+	u8 client_guid[SMB2_CLIENT_GUID_SIZE];
+	unsigned int bsize;
+	unsigned int rsize;
+	unsigned int wsize;
+	unsigned int min_offload;
+	bool sockopt_tcp_nodelay:1;
+	unsigned long actimeo; /* attribute cache timeout (jiffies) */
+	struct smb_version_operations *ops;
+	struct smb_version_values *vals;
+	char *prepath;
+	struct sockaddr_storage dstaddr; /* destination address */
+	struct sockaddr_storage srcaddr; /* allow binding to a local IP */
+	struct nls_table *local_nls; /* This is a copy of the pointer in cifs_sb */
+	unsigned int echo_interval; /* echo interval in secs */
+	__u64 snapshot_time; /* needed for timewarp tokens */
+	__u32 handle_timeout; /* persistent and durable handle timeout in ms */
+	unsigned int max_credits; /* smb3 max_credits 10 < credits < 60000 */
+	unsigned int max_channels;
+	__u16 compression; /* compression algorithm 0xFFFF default 0=disabled */
+	bool rootfs:1; /* if it's a SMB root file system */
+	bool witness:1; /* use witness protocol */
+
+	char *mount_options;
+};
+
+extern const struct fs_parameter_spec smb3_fs_parameters[];
+
+extern int cifs_parse_cache_flavor(char *value,
+				   struct smb3_fs_context *ctx);
+extern int cifs_parse_security_flavors(char *value,
+				       struct smb3_fs_context *ctx);
+extern int smb3_init_fs_context(struct fs_context *fc);
+extern void smb3_cleanup_fs_context_contents(struct smb3_fs_context *ctx);
+extern void smb3_cleanup_fs_context(struct smb3_fs_context *ctx);
+
+static inline struct smb3_fs_context *smb3_fc2context(const struct fs_context *fc)
+{
+	return fc->fs_private;
+}
+
+extern int smb3_fs_context_dup(struct smb3_fs_context *new_ctx, struct smb3_fs_context *ctx);
+extern void smb3_update_mnt_flags(struct cifs_sb_info *cifs_sb);
 
 #endif
diff --git a/fs/cifs/fscache.c b/fs/cifs/fscache.c
index da688185403c..20d24af33ee2 100644
--- a/fs/cifs/fscache.c
+++ b/fs/cifs/fscache.c
@@ -22,6 +22,7 @@
 #include "cifsglob.h"
 #include "cifs_debug.h"
 #include "cifs_fs_sb.h"
+#include "cifsproto.h"
 
 /*
  * Key layout of CIFS server cache index object
diff --git a/fs/cifs/fscache.h b/fs/cifs/fscache.h
index 1091633d2adb..e811f2dd7619 100644
--- a/fs/cifs/fscache.h
+++ b/fs/cifs/fscache.h
@@ -57,7 +57,6 @@ extern const struct fscache_cookie_def cifs_fscache_inode_object_def;
 
 extern int cifs_fscache_register(void);
 extern void cifs_fscache_unregister(void);
-extern char *extract_sharename(const char *);
 
 /*
  * fscache.c
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 9ee5f304592f..a83b3a8ffaac 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -37,6 +37,7 @@
 #include "cifs_fs_sb.h"
 #include "cifs_unicode.h"
 #include "fscache.h"
+#include "fs_context.h"
 
 
 static void cifs_set_ops(struct inode *inode)
@@ -294,7 +295,7 @@ cifs_unix_basic_to_fattr(struct cifs_fattr *fattr, FILE_UNIX_BASIC_INFO *info,
 		break;
 	}
 
-	fattr->cf_uid = cifs_sb->mnt_uid;
+	fattr->cf_uid = cifs_sb->ctx->linux_uid;
 	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID)) {
 		u64 id = le64_to_cpu(info->Uid);
 		if (id < ((uid_t)-1)) {
@@ -304,7 +305,7 @@ cifs_unix_basic_to_fattr(struct cifs_fattr *fattr, FILE_UNIX_BASIC_INFO *info,
 		}
 	}
 	
-	fattr->cf_gid = cifs_sb->mnt_gid;
+	fattr->cf_gid = cifs_sb->ctx->linux_gid;
 	if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID)) {
 		u64 id = le64_to_cpu(info->Gid);
 		if (id < ((gid_t)-1)) {
@@ -333,8 +334,8 @@ cifs_create_dfs_fattr(struct cifs_fattr *fattr, struct super_block *sb)
 
 	memset(fattr, 0, sizeof(*fattr));
 	fattr->cf_mode = S_IFDIR | S_IXUGO | S_IRWXU;
-	fattr->cf_uid = cifs_sb->mnt_uid;
-	fattr->cf_gid = cifs_sb->mnt_gid;
+	fattr->cf_uid = cifs_sb->ctx->linux_uid;
+	fattr->cf_gid = cifs_sb->ctx->linux_gid;
 	ktime_get_coarse_real_ts64(&fattr->cf_mtime);
 	fattr->cf_atime = fattr->cf_ctime = fattr->cf_mtime;
 	fattr->cf_nlink = 2;
@@ -644,8 +645,8 @@ smb311_posix_info_to_fattr(struct cifs_fattr *fattr, struct smb311_posix_qinfo *
 	}
 	/* else if reparse point ... TODO: add support for FIFO and blk dev; special file types */
 
-	fattr->cf_uid = cifs_sb->mnt_uid; /* TODO: map uid and gid from SID */
-	fattr->cf_gid = cifs_sb->mnt_gid;
+	fattr->cf_uid = cifs_sb->ctx->linux_uid; /* TODO: map uid and gid from SID */
+	fattr->cf_gid = cifs_sb->ctx->linux_gid;
 
 	cifs_dbg(FYI, "POSIX query info: mode 0x%x uniqueid 0x%llx nlink %d\n",
 		fattr->cf_mode, fattr->cf_uniqueid, fattr->cf_nlink);
@@ -685,25 +686,25 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info,
 
 	fattr->cf_nlink = le32_to_cpu(info->NumberOfLinks);
 	if (reparse_tag == IO_REPARSE_TAG_LX_SYMLINK) {
-		fattr->cf_mode |= S_IFLNK | cifs_sb->mnt_file_mode;
+		fattr->cf_mode |= S_IFLNK | cifs_sb->ctx->file_mode;
 		fattr->cf_dtype = DT_LNK;
 	} else if (reparse_tag == IO_REPARSE_TAG_LX_FIFO) {
-		fattr->cf_mode |= S_IFIFO | cifs_sb->mnt_file_mode;
+		fattr->cf_mode |= S_IFIFO | cifs_sb->ctx->file_mode;
 		fattr->cf_dtype = DT_FIFO;
 	} else if (reparse_tag == IO_REPARSE_TAG_AF_UNIX) {
-		fattr->cf_mode |= S_IFSOCK | cifs_sb->mnt_file_mode;
+		fattr->cf_mode |= S_IFSOCK | cifs_sb->ctx->file_mode;
 		fattr->cf_dtype = DT_SOCK;
 	} else if (reparse_tag == IO_REPARSE_TAG_LX_CHR) {
-		fattr->cf_mode |= S_IFCHR | cifs_sb->mnt_file_mode;
+		fattr->cf_mode |= S_IFCHR | cifs_sb->ctx->file_mode;
 		fattr->cf_dtype = DT_CHR;
 	} else if (reparse_tag == IO_REPARSE_TAG_LX_BLK) {
-		fattr->cf_mode |= S_IFBLK | cifs_sb->mnt_file_mode;
+		fattr->cf_mode |= S_IFBLK | cifs_sb->ctx->file_mode;
 		fattr->cf_dtype = DT_BLK;
 	} else if (symlink) { /* TODO add more reparse tag checks */
 		fattr->cf_mode = S_IFLNK;
 		fattr->cf_dtype = DT_LNK;
 	} else if (fattr->cf_cifsattrs & ATTR_DIRECTORY) {
-		fattr->cf_mode = S_IFDIR | cifs_sb->mnt_dir_mode;
+		fattr->cf_mode = S_IFDIR | cifs_sb->ctx->dir_mode;
 		fattr->cf_dtype = DT_DIR;
 		/*
 		 * Server can return wrong NumberOfLinks value for directories
@@ -712,7 +713,7 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info,
 		if (!tcon->unix_ext)
 			fattr->cf_flags |= CIFS_FATTR_UNKNOWN_NLINK;
 	} else {
-		fattr->cf_mode = S_IFREG | cifs_sb->mnt_file_mode;
+		fattr->cf_mode = S_IFREG | cifs_sb->ctx->file_mode;
 		fattr->cf_dtype = DT_REG;
 
 		/* clear write bits if ATTR_READONLY is set */
@@ -731,8 +732,8 @@ cifs_all_info_to_fattr(struct cifs_fattr *fattr, FILE_ALL_INFO *info,
 		}
 	}
 
-	fattr->cf_uid = cifs_sb->mnt_uid;
-	fattr->cf_gid = cifs_sb->mnt_gid;
+	fattr->cf_uid = cifs_sb->ctx->linux_uid;
+	fattr->cf_gid = cifs_sb->ctx->linux_gid;
 }
 
 static int
@@ -771,6 +772,7 @@ cifs_get_file_info(struct file *filp)
 		 */
 		rc = 0;
 		CIFS_I(inode)->time = 0;
+		goto cgfi_exit;
 	default:
 		goto cgfi_exit;
 	}
@@ -803,11 +805,15 @@ static __u64 simple_hashstr(const char *str)
  * cifs_backup_query_path_info - SMB1 fallback code to get ino
  *
  * Fallback code to get file metadata when we don't have access to
- * @full_path (EACCES) and have backup creds.
+ * full_path (EACCES) and have backup creds.
  *
- * @data will be set to search info result buffer
- * @resp_buf will be set to cifs resp buf and needs to be freed with
- * cifs_buf_release() when done with @data.
+ * @xid:	transaction id used to identify original request in logs
+ * @tcon:	information about the server share we have mounted
+ * @sb:	the superblock stores info such as disk space available
+ * @full_path:	name of the file we are getting the metadata for
+ * @resp_buf:	will be set to cifs resp buf and needs to be freed with
+ * 		cifs_buf_release() when done with @data
+ * @data:	will be set to search info result buffer
  */
 static int
 cifs_backup_query_path_info(int xid,
@@ -1386,8 +1392,8 @@ iget_no_retry:
 		set_nlink(inode, 2);
 		inode->i_op = &cifs_ipc_inode_ops;
 		inode->i_fop = &simple_dir_operations;
-		inode->i_uid = cifs_sb->mnt_uid;
-		inode->i_gid = cifs_sb->mnt_gid;
+		inode->i_uid = cifs_sb->ctx->linux_uid;
+		inode->i_gid = cifs_sb->ctx->linux_gid;
 		spin_unlock(&inode->i_lock);
 	} else if (rc) {
 		iget_failed(inode);
@@ -2192,11 +2198,11 @@ cifs_inode_needs_reval(struct inode *inode)
 	if (!lookupCacheEnabled)
 		return true;
 
-	if (!cifs_sb->actimeo)
+	if (!cifs_sb->ctx->actimeo)
 		return true;
 
 	if (!time_in_range(jiffies, cifs_i->time,
-				cifs_i->time + cifs_sb->actimeo))
+				cifs_i->time + cifs_sb->ctx->actimeo))
 		return true;
 
 	/* hardlinked files w/ noserverino get "special" treatment */
@@ -2228,7 +2234,9 @@ cifs_invalidate_mapping(struct inode *inode)
 
 /**
  * cifs_wait_bit_killable - helper for functions that are sleeping on bit locks
- * @word: long word containing the bit lock
+ *
+ * @key:	currently unused
+ * @mode:	the task state to sleep in
  */
 static int
 cifs_wait_bit_killable(struct wait_bit_key *key, int mode)
@@ -2401,7 +2409,7 @@ int cifs_getattr(const struct path *path, struct kstat *stat,
 	}
 
 	generic_fillattr(inode, stat);
-	stat->blksize = cifs_sb->bsize;
+	stat->blksize = cifs_sb->ctx->bsize;
 	stat->ino = CIFS_I(inode)->uniqueid;
 
 	/* old CIFS Unix Extensions doesn't return create time */
@@ -2812,7 +2820,8 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
 	if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) ||
 	    (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MODE_FROM_SID)) {
 		if (uid_valid(uid) || gid_valid(gid)) {
-			rc = id_mode_to_cifs_acl(inode, full_path, NO_CHANGE_64,
+			mode = NO_CHANGE_64;
+			rc = id_mode_to_cifs_acl(inode, full_path, &mode,
 							uid, gid);
 			if (rc) {
 				cifs_dbg(FYI, "%s: Setting id failed with error: %d\n",
@@ -2833,13 +2842,20 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
 		rc = 0;
 		if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_ACL) ||
 		    (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MODE_FROM_SID)) {
-			rc = id_mode_to_cifs_acl(inode, full_path, mode,
+			rc = id_mode_to_cifs_acl(inode, full_path, &mode,
 						INVALID_UID, INVALID_GID);
 			if (rc) {
 				cifs_dbg(FYI, "%s: Setting ACL failed with error: %d\n",
 					 __func__, rc);
 				goto cifs_setattr_exit;
 			}
+
+			/*
+			 * In case of CIFS_MOUNT_CIFS_ACL, we cannot support all modes.
+			 * Pick up the actual mode bits that were set.
+			 */
+			if (mode != attrs->ia_mode)
+				attrs->ia_mode = mode;
 		} else
 		if (((mode & S_IWUGO) == 0) &&
 		    (cifsInode->cifsAttrs & ATTR_READONLY) == 0) {
@@ -2862,10 +2878,10 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
 				attrs->ia_mode &= ~(S_IALLUGO);
 				if (S_ISDIR(inode->i_mode))
 					attrs->ia_mode |=
-						cifs_sb->mnt_dir_mode;
+						cifs_sb->ctx->dir_mode;
 				else
 					attrs->ia_mode |=
-						cifs_sb->mnt_file_mode;
+						cifs_sb->ctx->file_mode;
 			}
 		} else if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DYNPERM)) {
 			/* ignore mode change - ATTR_READONLY hasn't changed */
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 1c14cf01dbef..82e176720ca6 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -35,6 +35,7 @@
 #ifdef CONFIG_CIFS_DFS_UPCALL
 #include "dns_resolve.h"
 #endif
+#include "fs_context.h"
 
 extern mempool_t *cifs_sm_req_poolp;
 extern mempool_t *cifs_req_poolp;
@@ -632,11 +633,11 @@ bool
 backup_cred(struct cifs_sb_info *cifs_sb)
 {
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_BACKUPUID) {
-		if (uid_eq(cifs_sb->mnt_backupuid, current_fsuid()))
+		if (uid_eq(cifs_sb->ctx->backupuid, current_fsuid()))
 			return true;
 	}
 	if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_BACKUPGID) {
-		if (in_group_p(cifs_sb->mnt_backupgid))
+		if (in_group_p(cifs_sb->ctx->backupgid))
 			return true;
 	}
 
diff --git a/fs/cifs/netlink.c b/fs/cifs/netlink.c
new file mode 100644
index 000000000000..5aaabe4cc0a7
--- /dev/null
+++ b/fs/cifs/netlink.c
@@ -0,0 +1,89 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Netlink routines for CIFS
+ *
+ * Copyright (c) 2020 Samuel Cabrero <scabrero@suse.de>
+ */
+
+#include <net/genetlink.h>
+#include <uapi/linux/cifs/cifs_netlink.h>
+
+#include "netlink.h"
+#include "cifsglob.h"
+#include "cifs_debug.h"
+#include "cifs_swn.h"
+
+static const struct nla_policy cifs_genl_policy[CIFS_GENL_ATTR_MAX + 1] = {
+	[CIFS_GENL_ATTR_SWN_REGISTRATION_ID]	= { .type = NLA_U32 },
+	[CIFS_GENL_ATTR_SWN_NET_NAME]		= { .type = NLA_STRING },
+	[CIFS_GENL_ATTR_SWN_SHARE_NAME]		= { .type = NLA_STRING },
+	[CIFS_GENL_ATTR_SWN_IP]			= { .len = sizeof(struct sockaddr_storage) },
+	[CIFS_GENL_ATTR_SWN_NET_NAME_NOTIFY]	= { .type = NLA_FLAG },
+	[CIFS_GENL_ATTR_SWN_SHARE_NAME_NOTIFY]	= { .type = NLA_FLAG },
+	[CIFS_GENL_ATTR_SWN_IP_NOTIFY]		= { .type = NLA_FLAG },
+	[CIFS_GENL_ATTR_SWN_KRB_AUTH]		= { .type = NLA_FLAG },
+	[CIFS_GENL_ATTR_SWN_USER_NAME]		= { .type = NLA_STRING },
+	[CIFS_GENL_ATTR_SWN_PASSWORD]		= { .type = NLA_STRING },
+	[CIFS_GENL_ATTR_SWN_DOMAIN_NAME]	= { .type = NLA_STRING },
+	[CIFS_GENL_ATTR_SWN_NOTIFICATION_TYPE]	= { .type = NLA_U32 },
+	[CIFS_GENL_ATTR_SWN_RESOURCE_STATE]	= { .type = NLA_U32 },
+	[CIFS_GENL_ATTR_SWN_RESOURCE_NAME]	= { .type = NLA_STRING},
+};
+
+static struct genl_ops cifs_genl_ops[] = {
+	{
+		.cmd = CIFS_GENL_CMD_SWN_NOTIFY,
+		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.doit = cifs_swn_notify,
+	},
+};
+
+static const struct genl_multicast_group cifs_genl_mcgrps[] = {
+	[CIFS_GENL_MCGRP_SWN] = { .name = CIFS_GENL_MCGRP_SWN_NAME },
+};
+
+struct genl_family cifs_genl_family = {
+	.name		= CIFS_GENL_NAME,
+	.version	= CIFS_GENL_VERSION,
+	.hdrsize	= 0,
+	.maxattr	= CIFS_GENL_ATTR_MAX,
+	.module		= THIS_MODULE,
+	.policy		= cifs_genl_policy,
+	.ops		= cifs_genl_ops,
+	.n_ops		= ARRAY_SIZE(cifs_genl_ops),
+	.mcgrps		= cifs_genl_mcgrps,
+	.n_mcgrps	= ARRAY_SIZE(cifs_genl_mcgrps),
+};
+
+/**
+ * cifs_genl_init - Register generic netlink family
+ *
+ * Return zero if initialized successfully, otherwise non-zero.
+ */
+int cifs_genl_init(void)
+{
+	int ret;
+
+	ret = genl_register_family(&cifs_genl_family);
+	if (ret < 0) {
+		cifs_dbg(VFS, "%s: failed to register netlink family\n",
+				__func__);
+		return ret;
+	}
+
+	return 0;
+}
+
+/**
+ * cifs_genl_exit - Unregister generic netlink family
+ */
+void cifs_genl_exit(void)
+{
+	int ret;
+
+	ret = genl_unregister_family(&cifs_genl_family);
+	if (ret < 0) {
+		cifs_dbg(VFS, "%s: failed to unregister netlink family\n",
+				__func__);
+	}
+}
diff --git a/fs/cifs/netlink.h b/fs/cifs/netlink.h
new file mode 100644
index 000000000000..e2fa8ed24c54
--- /dev/null
+++ b/fs/cifs/netlink.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Netlink routines for CIFS
+ *
+ * Copyright (c) 2020 Samuel Cabrero <scabrero@suse.de>
+ */
+
+#ifndef _CIFS_NETLINK_H
+#define _CIFS_NETLINK_H
+
+extern struct genl_family cifs_genl_family;
+
+extern int cifs_genl_init(void);
+extern void cifs_genl_exit(void);
+
+#endif /* _CIFS_NETLINK_H */
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 799be3a5d25e..80bf4c6f4c7b 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -33,6 +33,7 @@
 #include "cifs_fs_sb.h"
 #include "cifsfs.h"
 #include "smb2proto.h"
+#include "fs_context.h"
 
 /*
  * To be safe - for UCS to UTF-8 with strings loaded with the rare long
@@ -165,8 +166,8 @@ static bool reparse_file_needs_reval(const struct cifs_fattr *fattr)
 static void
 cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb)
 {
-	fattr->cf_uid = cifs_sb->mnt_uid;
-	fattr->cf_gid = cifs_sb->mnt_gid;
+	fattr->cf_uid = cifs_sb->ctx->linux_uid;
+	fattr->cf_gid = cifs_sb->ctx->linux_gid;
 
 	/*
 	 * The IO_REPARSE_TAG_LX_ tags originally were used by WSL but they
@@ -177,25 +178,25 @@ cifs_fill_common_info(struct cifs_fattr *fattr, struct cifs_sb_info *cifs_sb)
 	 * reasonably map some of them to directories vs. files vs. symlinks
 	 */
 	if (fattr->cf_cifsattrs & ATTR_DIRECTORY) {
-		fattr->cf_mode = S_IFDIR | cifs_sb->mnt_dir_mode;
+		fattr->cf_mode = S_IFDIR | cifs_sb->ctx->dir_mode;
 		fattr->cf_dtype = DT_DIR;
 	} else if (fattr->cf_cifstag == IO_REPARSE_TAG_LX_SYMLINK) {
-		fattr->cf_mode |= S_IFLNK | cifs_sb->mnt_file_mode;
+		fattr->cf_mode |= S_IFLNK | cifs_sb->ctx->file_mode;
 		fattr->cf_dtype = DT_LNK;
 	} else if (fattr->cf_cifstag == IO_REPARSE_TAG_LX_FIFO) {
-		fattr->cf_mode |= S_IFIFO | cifs_sb->mnt_file_mode;
+		fattr->cf_mode |= S_IFIFO | cifs_sb->ctx->file_mode;
 		fattr->cf_dtype = DT_FIFO;
 	} else if (fattr->cf_cifstag == IO_REPARSE_TAG_AF_UNIX) {
-		fattr->cf_mode |= S_IFSOCK | cifs_sb->mnt_file_mode;
+		fattr->cf_mode |= S_IFSOCK | cifs_sb->ctx->file_mode;
 		fattr->cf_dtype = DT_SOCK;
 	} else if (fattr->cf_cifstag == IO_REPARSE_TAG_LX_CHR) {
-		fattr->cf_mode |= S_IFCHR | cifs_sb->mnt_file_mode;
+		fattr->cf_mode |= S_IFCHR | cifs_sb->ctx->file_mode;
 		fattr->cf_dtype = DT_CHR;
 	} else if (fattr->cf_cifstag == IO_REPARSE_TAG_LX_BLK) {
-		fattr->cf_mode |= S_IFBLK | cifs_sb->mnt_file_mode;
+		fattr->cf_mode |= S_IFBLK | cifs_sb->ctx->file_mode;
 		fattr->cf_dtype = DT_BLK;
 	} else { /* TODO: should we mark some other reparse points (like DFSR) as directories? */
-		fattr->cf_mode = S_IFREG | cifs_sb->mnt_file_mode;
+		fattr->cf_mode = S_IFREG | cifs_sb->ctx->file_mode;
 		fattr->cf_dtype = DT_REG;
 	}
 
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index de564368a887..213465718fa8 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -32,6 +32,11 @@
 #include <linux/slab.h>
 #include "cifs_spnego.h"
 #include "smb2proto.h"
+#include "fs_context.h"
+
+static int
+cifs_ses_add_channel(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses,
+		     struct cifs_server_iface *iface);
 
 bool
 is_server_using_iface(struct TCP_Server_Info *server,
@@ -70,7 +75,7 @@ bool is_ses_using_iface(struct cifs_ses *ses, struct cifs_server_iface *iface)
 }
 
 /* returns number of channels added */
-int cifs_try_adding_channels(struct cifs_ses *ses)
+int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses)
 {
 	int old_chan_count = ses->chan_count;
 	int left = ses->chan_max - ses->chan_count;
@@ -133,7 +138,7 @@ int cifs_try_adding_channels(struct cifs_ses *ses)
 			continue;
 		}
 
-		rc = cifs_ses_add_channel(ses, iface);
+		rc = cifs_ses_add_channel(cifs_sb, ses, iface);
 		if (rc) {
 			cifs_dbg(FYI, "failed to open extra channel on iface#%d rc=%d\n",
 				 i, rc);
@@ -166,11 +171,12 @@ cifs_ses_find_chan(struct cifs_ses *ses, struct TCP_Server_Info *server)
 	return NULL;
 }
 
-int
-cifs_ses_add_channel(struct cifs_ses *ses, struct cifs_server_iface *iface)
+static int
+cifs_ses_add_channel(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses,
+		     struct cifs_server_iface *iface)
 {
 	struct cifs_chan *chan;
-	struct smb_vol vol = {NULL};
+	struct smb3_fs_context ctx = {NULL};
 	static const char unc_fmt[] = "\\%s\\foo";
 	char unc[sizeof(unc_fmt)+SERVER_NAME_LEN_WITH_NULL] = {0};
 	struct sockaddr_in *ipv4 = (struct sockaddr_in *)&iface->sockaddr;
@@ -188,67 +194,62 @@ cifs_ses_add_channel(struct cifs_ses *ses, struct cifs_server_iface *iface)
 			 &ipv6->sin6_addr);
 
 	/*
-	 * Setup a smb_vol with mostly the same info as the existing
+	 * Setup a ctx with mostly the same info as the existing
 	 * session and overwrite it with the requested iface data.
 	 *
 	 * We need to setup at least the fields used for negprot and
 	 * sesssetup.
 	 *
-	 * We only need the volume here, so we can reuse memory from
+	 * We only need the ctx here, so we can reuse memory from
 	 * the session and server without caring about memory
 	 * management.
 	 */
 
 	/* Always make new connection for now (TODO?) */
-	vol.nosharesock = true;
+	ctx.nosharesock = true;
 
 	/* Auth */
-	vol.domainauto = ses->domainAuto;
-	vol.domainname = ses->domainName;
-	vol.username = ses->user_name;
-	vol.password = ses->password;
-	vol.sectype = ses->sectype;
-	vol.sign = ses->sign;
+	ctx.domainauto = ses->domainAuto;
+	ctx.domainname = ses->domainName;
+	ctx.username = ses->user_name;
+	ctx.password = ses->password;
+	ctx.sectype = ses->sectype;
+	ctx.sign = ses->sign;
 
 	/* UNC and paths */
 	/* XXX: Use ses->server->hostname? */
 	sprintf(unc, unc_fmt, ses->serverName);
-	vol.UNC = unc;
-	vol.prepath = "";
+	ctx.UNC = unc;
+	ctx.prepath = "";
 
 	/* Reuse same version as master connection */
-	vol.vals = ses->server->vals;
-	vol.ops = ses->server->ops;
+	ctx.vals = ses->server->vals;
+	ctx.ops = ses->server->ops;
 
-	vol.noblocksnd = ses->server->noblocksnd;
-	vol.noautotune = ses->server->noautotune;
-	vol.sockopt_tcp_nodelay = ses->server->tcp_nodelay;
-	vol.echo_interval = ses->server->echo_interval / HZ;
+	ctx.noblocksnd = ses->server->noblocksnd;
+	ctx.noautotune = ses->server->noautotune;
+	ctx.sockopt_tcp_nodelay = ses->server->tcp_nodelay;
+	ctx.echo_interval = ses->server->echo_interval / HZ;
 
 	/*
 	 * This will be used for encoding/decoding user/domain/pw
 	 * during sess setup auth.
-	 *
-	 * XXX: We use the default for simplicity but the proper way
-	 * would be to use the one that ses used, which is not
-	 * stored. This might break when dealing with non-ascii
-	 * strings.
 	 */
-	vol.local_nls = load_nls_default();
+	ctx.local_nls = cifs_sb->local_nls;
 
 	/* Use RDMA if possible */
-	vol.rdma = iface->rdma_capable;
-	memcpy(&vol.dstaddr, &iface->sockaddr, sizeof(struct sockaddr_storage));
+	ctx.rdma = iface->rdma_capable;
+	memcpy(&ctx.dstaddr, &iface->sockaddr, sizeof(struct sockaddr_storage));
 
 	/* reuse master con client guid */
-	memcpy(&vol.client_guid, ses->server->client_guid,
+	memcpy(&ctx.client_guid, ses->server->client_guid,
 	       SMB2_CLIENT_GUID_SIZE);
-	vol.use_client_guid = true;
+	ctx.use_client_guid = true;
 
 	mutex_lock(&ses->session_mutex);
 
 	chan = ses->binding_chan = &ses->chans[ses->chan_count];
-	chan->server = cifs_get_tcp_session(&vol);
+	chan->server = cifs_get_tcp_session(&ctx);
 	if (IS_ERR(chan->server)) {
 		rc = PTR_ERR(chan->server);
 		chan->server = NULL;
@@ -274,7 +275,7 @@ cifs_ses_add_channel(struct cifs_ses *ses, struct cifs_server_iface *iface)
 	if (rc)
 		goto out;
 
-	rc = cifs_setup_session(xid, ses, vol.local_nls);
+	rc = cifs_setup_session(xid, ses, cifs_sb->local_nls);
 	if (rc)
 		goto out;
 
@@ -297,7 +298,6 @@ out:
 
 	if (rc && chan->server)
 		cifs_put_tcp_session(chan->server, 0);
-	unload_nls(vol.local_nls);
 
 	return rc;
 }
@@ -812,6 +812,7 @@ cifs_select_sectype(struct TCP_Server_Info *server, enum securityEnum requested)
 				return NTLMv2;
 			if (global_secflags & CIFSSEC_MAY_NTLM)
 				return NTLM;
+			break;
 		default:
 			break;
 		}
diff --git a/fs/cifs/smb1ops.c b/fs/cifs/smb1ops.c
index 80287c26cfac..e31b939e628c 100644
--- a/fs/cifs/smb1ops.c
+++ b/fs/cifs/smb1ops.c
@@ -12,6 +12,7 @@
 #include "cifs_debug.h"
 #include "cifspdu.h"
 #include "cifs_unicode.h"
+#include "fs_context.h"
 
 /*
  * An NT cancel request header looks just like the original request except:
@@ -428,15 +429,15 @@ cifs_negotiate(const unsigned int xid, struct cifs_ses *ses)
 }
 
 static unsigned int
-cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *volume_info)
+cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx)
 {
 	__u64 unix_cap = le64_to_cpu(tcon->fsUnixInfo.Capability);
 	struct TCP_Server_Info *server = tcon->ses->server;
 	unsigned int wsize;
 
 	/* start with specified wsize, or default */
-	if (volume_info->wsize)
-		wsize = volume_info->wsize;
+	if (ctx->wsize)
+		wsize = ctx->wsize;
 	else if (tcon->unix_ext && (unix_cap & CIFS_UNIX_LARGE_WRITE_CAP))
 		wsize = CIFS_DEFAULT_IOSIZE;
 	else
@@ -463,7 +464,7 @@ cifs_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *volume_info)
 }
 
 static unsigned int
-cifs_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *volume_info)
+cifs_negotiate_rsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx)
 {
 	__u64 unix_cap = le64_to_cpu(tcon->fsUnixInfo.Capability);
 	struct TCP_Server_Info *server = tcon->ses->server;
@@ -488,7 +489,7 @@ cifs_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *volume_info)
 	else
 		defsize = server->maxBuf - sizeof(READ_RSP);
 
-	rsize = volume_info->rsize ? volume_info->rsize : defsize;
+	rsize = ctx->rsize ? ctx->rsize : defsize;
 
 	/*
 	 * no CAP_LARGE_READ_X? Then MS-CIFS states that we must limit this to
@@ -1005,7 +1006,7 @@ cifs_is_read_op(__u32 oplock)
 static unsigned int
 cifs_wp_retry_size(struct inode *inode)
 {
-	return CIFS_SB(inode->i_sb)->wsize;
+	return CIFS_SB(inode->i_sb)->ctx->wsize;
 }
 
 static bool
diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c
index d88e2683626e..60d4bd1eae2b 100644
--- a/fs/cifs/smb2misc.c
+++ b/fs/cifs/smb2misc.c
@@ -94,6 +94,8 @@ static const __le16 smb2_rsp_struct_sizes[NUMBER_OF_SMB2_COMMANDS] = {
 	/* SMB2_OPLOCK_BREAK */ cpu_to_le16(24)
 };
 
+#define SMB311_NEGPROT_BASE_SIZE (sizeof(struct smb2_sync_hdr) + sizeof(struct smb2_negotiate_rsp))
+
 static __u32 get_neg_ctxt_len(struct smb2_sync_hdr *hdr, __u32 len,
 			      __u32 non_ctxlen)
 {
@@ -107,13 +109,28 @@ static __u32 get_neg_ctxt_len(struct smb2_sync_hdr *hdr, __u32 len,
 	   (pneg_rsp->DialectRevision != cpu_to_le16(SMB311_PROT_ID)))
 		return 0;
 
-	/* Make sure that negotiate contexts start after gss security blob */
+	/*
+	 * if SPNEGO blob present (ie the RFC2478 GSS info which indicates
+	 * which security mechanisms the server supports) make sure that
+	 * the negotiate contexts start after it
+	 */
 	nc_offset = le32_to_cpu(pneg_rsp->NegotiateContextOffset);
-	if (nc_offset < non_ctxlen) {
-		pr_warn_once("Invalid negotiate context offset\n");
+	/*
+	 * non_ctxlen is at least shdr->StructureSize + pdu->StructureSize2
+	 * and the latter is 1 byte bigger than the fix-sized area of the
+	 * NEGOTIATE response
+	 */
+	if (nc_offset + 1 < non_ctxlen) {
+		pr_warn_once("Invalid negotiate context offset %d\n", nc_offset);
 		return 0;
-	}
-	size_of_pad_before_neg_ctxts = nc_offset - non_ctxlen;
+	} else if (nc_offset + 1 == non_ctxlen) {
+		cifs_dbg(FYI, "no SPNEGO security blob in negprot rsp\n");
+		size_of_pad_before_neg_ctxts = 0;
+	} else if (non_ctxlen == SMB311_NEGPROT_BASE_SIZE)
+		/* has padding, but no SPNEGO blob */
+		size_of_pad_before_neg_ctxts = nc_offset - non_ctxlen + 1;
+	else
+		size_of_pad_before_neg_ctxts = nc_offset - non_ctxlen;
 
 	/* Verify that at least minimal negotiate contexts fit within frame */
 	if (len < nc_offset + (neg_count * sizeof(struct smb2_neg_context))) {
@@ -859,6 +876,10 @@ smb2_handle_cancelled_mid(char *buffer, struct TCP_Server_Info *server)
  *
  * Assumes @iov does not contain the rfc1002 length and iov[0] has the
  * SMB2 header.
+ *
+ * @ses:	server session structure
+ * @iov:	array containing the SMB request we will send to the server
+ * @nvec:	number of array entries for the iov
  */
 int
 smb311_update_preauth_hash(struct cifs_ses *ses, struct kvec *iov, int nvec)
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index 3d914d7d0d11..f19274857292 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -24,6 +24,7 @@
 #include "smb2glob.h"
 #include "cifs_ioctl.h"
 #include "smbdirect.h"
+#include "fs_context.h"
 
 /* Change credits for different ops and return the total number of credits */
 static int
@@ -99,9 +100,10 @@ smb2_add_credits(struct TCP_Server_Info *server,
 	spin_unlock(&server->req_lock);
 	wake_up(&server->request_q);
 
-	if (reconnect_detected)
+	if (reconnect_detected) {
 		cifs_dbg(FYI, "trying to put %d credits from the old server instance %d\n",
 			 add, instance);
+	}
 
 	if (server->tcpStatus == CifsNeedReconnect
 	    || server->tcpStatus == CifsExiting)
@@ -123,7 +125,7 @@ smb2_add_credits(struct TCP_Server_Info *server,
 	default:
 		trace_smb3_add_credits(server->CurrentMid,
 			server->hostname, rc, add);
-		cifs_dbg(FYI, "add %u credits total=%d\n", add, rc);
+		cifs_dbg(FYI, "%s: added %u credits total=%d\n", __func__, add, rc);
 	}
 }
 
@@ -135,6 +137,11 @@ smb2_set_credits(struct TCP_Server_Info *server, const int val)
 	if (val == 1)
 		server->reconnect_instance++;
 	spin_unlock(&server->req_lock);
+
+	trace_smb3_set_credits(server->CurrentMid,
+			server->hostname, val, val);
+	cifs_dbg(FYI, "%s: set %u credits\n", __func__, val);
+
 	/* don't log while holding the lock */
 	if (val == 1)
 		cifs_dbg(FYI, "set credits to 1 due to smb2 reconnect\n");
@@ -201,6 +208,7 @@ smb2_wait_mtu_credits(struct TCP_Server_Info *server, unsigned int size,
 				DIV_ROUND_UP(*num, SMB2_MAX_BUFFER_SIZE);
 			credits->instance = server->reconnect_instance;
 			server->credits -= credits->value;
+			scredits = server->credits;
 			server->in_flight++;
 			if (server->in_flight > server->max_in_flight)
 				server->max_in_flight = server->in_flight;
@@ -208,6 +216,12 @@ smb2_wait_mtu_credits(struct TCP_Server_Info *server, unsigned int size,
 		}
 	}
 	spin_unlock(&server->req_lock);
+
+	trace_smb3_add_credits(server->CurrentMid,
+			server->hostname, scredits, -(credits->value));
+	cifs_dbg(FYI, "%s: removed %u credits total=%d\n",
+			__func__, credits->value, scredits);
+
 	return rc;
 }
 
@@ -217,13 +231,17 @@ smb2_adjust_credits(struct TCP_Server_Info *server,
 		    const unsigned int payload_size)
 {
 	int new_val = DIV_ROUND_UP(payload_size, SMB2_MAX_BUFFER_SIZE);
+	int scredits;
 
 	if (!credits->value || credits->value == new_val)
 		return 0;
 
 	if (credits->value < new_val) {
-		WARN_ONCE(1, "request has less credits (%d) than required (%d)",
-			  credits->value, new_val);
+		trace_smb3_too_many_credits(server->CurrentMid,
+				server->hostname, 0, credits->value - new_val);
+		cifs_server_dbg(VFS, "request has less credits (%d) than required (%d)",
+				credits->value, new_val);
+
 		return -ENOTSUPP;
 	}
 
@@ -231,15 +249,24 @@ smb2_adjust_credits(struct TCP_Server_Info *server,
 
 	if (server->reconnect_instance != credits->instance) {
 		spin_unlock(&server->req_lock);
+		trace_smb3_reconnect_detected(server->CurrentMid,
+			server->hostname, 0, 0);
 		cifs_server_dbg(VFS, "trying to return %d credits to old session\n",
 			 credits->value - new_val);
 		return -EAGAIN;
 	}
 
 	server->credits += credits->value - new_val;
+	scredits = server->credits;
 	spin_unlock(&server->req_lock);
 	wake_up(&server->request_q);
 	credits->value = new_val;
+
+	trace_smb3_add_credits(server->CurrentMid,
+			server->hostname, scredits, credits->value - new_val);
+	cifs_dbg(FYI, "%s: adjust added %u credits total=%d\n",
+			__func__, credits->value - new_val, scredits);
+
 	return 0;
 }
 
@@ -339,13 +366,13 @@ smb2_negotiate(const unsigned int xid, struct cifs_ses *ses)
 }
 
 static unsigned int
-smb2_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *volume_info)
+smb2_negotiate_wsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx)
 {
 	struct TCP_Server_Info *server = tcon->ses->server;
 	unsigned int wsize;
 
 	/* start with specified wsize, or default */
-	wsize = volume_info->wsize ? volume_info->wsize : CIFS_DEFAULT_IOSIZE;
+	wsize = ctx->wsize ? ctx->wsize : CIFS_DEFAULT_IOSIZE;
 	wsize = min_t(unsigned int, wsize, server->max_write);
 	if (!(server->capabilities & SMB2_GLOBAL_CAP_LARGE_MTU))
 		wsize = min_t(unsigned int, wsize, SMB2_MAX_BUFFER_SIZE);
@@ -354,13 +381,13 @@ smb2_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *volume_info)
 }
 
 static unsigned int
-smb3_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *volume_info)
+smb3_negotiate_wsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx)
 {
 	struct TCP_Server_Info *server = tcon->ses->server;
 	unsigned int wsize;
 
 	/* start with specified wsize, or default */
-	wsize = volume_info->wsize ? volume_info->wsize : SMB3_DEFAULT_IOSIZE;
+	wsize = ctx->wsize ? ctx->wsize : SMB3_DEFAULT_IOSIZE;
 	wsize = min_t(unsigned int, wsize, server->max_write);
 #ifdef CONFIG_CIFS_SMB_DIRECT
 	if (server->rdma) {
@@ -386,13 +413,13 @@ smb3_negotiate_wsize(struct cifs_tcon *tcon, struct smb_vol *volume_info)
 }
 
 static unsigned int
-smb2_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *volume_info)
+smb2_negotiate_rsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx)
 {
 	struct TCP_Server_Info *server = tcon->ses->server;
 	unsigned int rsize;
 
 	/* start with specified rsize, or default */
-	rsize = volume_info->rsize ? volume_info->rsize : CIFS_DEFAULT_IOSIZE;
+	rsize = ctx->rsize ? ctx->rsize : CIFS_DEFAULT_IOSIZE;
 	rsize = min_t(unsigned int, rsize, server->max_read);
 
 	if (!(server->capabilities & SMB2_GLOBAL_CAP_LARGE_MTU))
@@ -402,13 +429,13 @@ smb2_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *volume_info)
 }
 
 static unsigned int
-smb3_negotiate_rsize(struct cifs_tcon *tcon, struct smb_vol *volume_info)
+smb3_negotiate_rsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx)
 {
 	struct TCP_Server_Info *server = tcon->ses->server;
 	unsigned int rsize;
 
 	/* start with specified rsize, or default */
-	rsize = volume_info->rsize ? volume_info->rsize : SMB3_DEFAULT_IOSIZE;
+	rsize = ctx->rsize ? ctx->rsize : SMB3_DEFAULT_IOSIZE;
 	rsize = min_t(unsigned int, rsize, server->max_read);
 #ifdef CONFIG_CIFS_SMB_DIRECT
 	if (server->rdma) {
@@ -477,7 +504,8 @@ parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf,
 		goto out;
 	}
 
-	if (bytes_left || p->Next)
+	/* Azure rounds the buffer size up 8, to a 16 byte boundary */
+	if ((bytes_left > 8) || p->Next)
 		cifs_dbg(VFS, "%s: incomplete interface info\n", __func__);
 
 
@@ -2341,6 +2369,7 @@ static bool
 smb2_is_status_pending(char *buf, struct TCP_Server_Info *server)
 {
 	struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)buf;
+	int scredits;
 
 	if (shdr->Status != STATUS_PENDING)
 		return false;
@@ -2348,8 +2377,14 @@ smb2_is_status_pending(char *buf, struct TCP_Server_Info *server)
 	if (shdr->CreditRequest) {
 		spin_lock(&server->req_lock);
 		server->credits += le16_to_cpu(shdr->CreditRequest);
+		scredits = server->credits;
 		spin_unlock(&server->req_lock);
 		wake_up(&server->request_q);
+
+		trace_smb3_add_credits(server->CurrentMid,
+				server->hostname, scredits, le16_to_cpu(shdr->CreditRequest));
+		cifs_dbg(FYI, "%s: status pending add %u credits total=%d\n",
+				__func__, le16_to_cpu(shdr->CreditRequest), scredits);
 	}
 
 	return true;
@@ -3179,7 +3214,7 @@ smb2_query_reparse_tag(const unsigned int xid, struct cifs_tcon *tcon,
 
 static struct cifs_ntsd *
 get_smb2_acl_by_fid(struct cifs_sb_info *cifs_sb,
-		const struct cifs_fid *cifsfid, u32 *pacllen)
+		    const struct cifs_fid *cifsfid, u32 *pacllen, u32 info)
 {
 	struct cifs_ntsd *pntsd = NULL;
 	unsigned int xid;
@@ -3193,7 +3228,8 @@ get_smb2_acl_by_fid(struct cifs_sb_info *cifs_sb,
 	cifs_dbg(FYI, "trying to get acl\n");
 
 	rc = SMB2_query_acl(xid, tlink_tcon(tlink), cifsfid->persistent_fid,
-			    cifsfid->volatile_fid, (void **)&pntsd, pacllen);
+			    cifsfid->volatile_fid, (void **)&pntsd, pacllen,
+			    info);
 	free_xid(xid);
 
 	cifs_put_tlink(tlink);
@@ -3207,7 +3243,7 @@ get_smb2_acl_by_fid(struct cifs_sb_info *cifs_sb,
 
 static struct cifs_ntsd *
 get_smb2_acl_by_path(struct cifs_sb_info *cifs_sb,
-		const char *path, u32 *pacllen)
+		     const char *path, u32 *pacllen, u32 info)
 {
 	struct cifs_ntsd *pntsd = NULL;
 	u8 oplock = SMB2_OPLOCK_LEVEL_NONE;
@@ -3245,12 +3281,16 @@ get_smb2_acl_by_path(struct cifs_sb_info *cifs_sb,
 	oparms.fid = &fid;
 	oparms.reconnect = false;
 
+	if (info & SACL_SECINFO)
+		oparms.desired_access |= SYSTEM_SECURITY;
+
 	rc = SMB2_open(xid, &oparms, utf16_path, &oplock, NULL, NULL, NULL,
 		       NULL);
 	kfree(utf16_path);
 	if (!rc) {
 		rc = SMB2_query_acl(xid, tlink_tcon(tlink), fid.persistent_fid,
-			    fid.volatile_fid, (void **)&pntsd, pacllen);
+				    fid.volatile_fid, (void **)&pntsd, pacllen,
+				    info);
 		SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid);
 	}
 
@@ -3284,10 +3324,12 @@ set_smb2_acl(struct cifs_ntsd *pnntsd, __u32 acllen,
 	tcon = tlink_tcon(tlink);
 	xid = get_xid();
 
-	if (aclflag == CIFS_ACL_OWNER || aclflag == CIFS_ACL_GROUP)
-		access_flags = WRITE_OWNER;
-	else
-		access_flags = WRITE_DAC;
+	if (aclflag & CIFS_ACL_OWNER || aclflag & CIFS_ACL_GROUP)
+		access_flags |= WRITE_OWNER;
+	if (aclflag & CIFS_ACL_SACL)
+		access_flags |= SYSTEM_SECURITY;
+	if (aclflag & CIFS_ACL_DACL)
+		access_flags |= WRITE_DAC;
 
 	utf16_path = cifs_convert_path_to_utf16(path, cifs_sb);
 	if (!utf16_path) {
@@ -3321,18 +3363,18 @@ set_smb2_acl(struct cifs_ntsd *pnntsd, __u32 acllen,
 /* Retrieve an ACL from the server */
 static struct cifs_ntsd *
 get_smb2_acl(struct cifs_sb_info *cifs_sb,
-				      struct inode *inode, const char *path,
-				      u32 *pacllen)
+	     struct inode *inode, const char *path,
+	     u32 *pacllen, u32 info)
 {
 	struct cifs_ntsd *pntsd = NULL;
 	struct cifsFileInfo *open_file = NULL;
 
-	if (inode)
+	if (inode && !(info & SACL_SECINFO))
 		open_file = find_readable_file(CIFS_I(inode), true);
-	if (!open_file)
-		return get_smb2_acl_by_path(cifs_sb, path, pacllen);
+	if (!open_file || (info & SACL_SECINFO))
+		return get_smb2_acl_by_path(cifs_sb, path, pacllen, info);
 
-	pntsd = get_smb2_acl_by_fid(cifs_sb, &open_file->fid, pacllen);
+	pntsd = get_smb2_acl_by_fid(cifs_sb, &open_file->fid, pacllen, info);
 	cifsFileInfo_put(open_file);
 	return pntsd;
 }
@@ -3949,7 +3991,7 @@ smb3_parse_lease_buf(void *buf, unsigned int *epoch, char *lease_key)
 static unsigned int
 smb2_wp_retry_size(struct inode *inode)
 {
-	return min_t(unsigned int, CIFS_SB(inode->i_sb)->wsize,
+	return min_t(unsigned int, CIFS_SB(inode->i_sb)->ctx->wsize,
 		     SMB2_MAX_BUFFER_SIZE);
 }
 
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index acb72705062d..067eb44c7baa 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -427,8 +427,8 @@ build_preauth_ctxt(struct smb2_preauth_neg_context *pneg_ctxt)
 	pneg_ctxt->ContextType = SMB2_PREAUTH_INTEGRITY_CAPABILITIES;
 	pneg_ctxt->DataLength = cpu_to_le16(38);
 	pneg_ctxt->HashAlgorithmCount = cpu_to_le16(1);
-	pneg_ctxt->SaltLength = cpu_to_le16(SMB311_SALT_SIZE);
-	get_random_bytes(pneg_ctxt->Salt, SMB311_SALT_SIZE);
+	pneg_ctxt->SaltLength = cpu_to_le16(SMB311_LINUX_CLIENT_SALT_SIZE);
+	get_random_bytes(pneg_ctxt->Salt, SMB311_LINUX_CLIENT_SALT_SIZE);
 	pneg_ctxt->HashAlgorithms = SMB2_PREAUTH_INTEGRITY_SHA512;
 }
 
@@ -566,6 +566,9 @@ static void decode_preauth_context(struct smb2_preauth_neg_context *ctxt)
 	if (len < MIN_PREAUTH_CTXT_DATA_LEN) {
 		pr_warn_once("server sent bad preauth context\n");
 		return;
+	} else if (len < MIN_PREAUTH_CTXT_DATA_LEN + le16_to_cpu(ctxt->SaltLength)) {
+		pr_warn_once("server sent invalid SaltLength\n");
+		return;
 	}
 	if (le16_to_cpu(ctxt->HashAlgorithmCount) != 1)
 		pr_warn_once("Invalid SMB3 hash algorithm count\n");
@@ -3476,10 +3479,11 @@ SMB311_posix_query_info(const unsigned int xid, struct cifs_tcon *tcon,
 
 int
 SMB2_query_acl(const unsigned int xid, struct cifs_tcon *tcon,
-		u64 persistent_fid, u64 volatile_fid,
-		void **data, u32 *plen)
+	       u64 persistent_fid, u64 volatile_fid,
+	       void **data, u32 *plen, u32 extra_info)
 {
-	__u32 additional_info = OWNER_SECINFO | GROUP_SECINFO | DACL_SECINFO;
+	__u32 additional_info = OWNER_SECINFO | GROUP_SECINFO | DACL_SECINFO |
+				extra_info;
 	*plen = 0;
 
 	return query_info(xid, tcon, persistent_fid, volatile_fid,
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index fa57b03ca98c..204a622b89ed 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -333,12 +333,20 @@ struct smb2_neg_context {
 	/* Followed by array of data */
 } __packed;
 
-#define SMB311_SALT_SIZE			32
+#define SMB311_LINUX_CLIENT_SALT_SIZE			32
 /* Hash Algorithm Types */
 #define SMB2_PREAUTH_INTEGRITY_SHA512	cpu_to_le16(0x0001)
 #define SMB2_PREAUTH_HASH_SIZE 64
 
-#define MIN_PREAUTH_CTXT_DATA_LEN	(SMB311_SALT_SIZE + 6)
+/*
+ * SaltLength that the server send can be zero, so the only three required
+ * fields (all __le16) end up six bytes total, so the minimum context data len
+ * in the response is six bytes which accounts for
+ *
+ *      HashAlgorithmCount, SaltLength, and 1 HashAlgorithm.
+ */
+#define MIN_PREAUTH_CTXT_DATA_LEN 6
+
 struct smb2_preauth_neg_context {
 	__le16	ContextType; /* 1 */
 	__le16	DataLength;
@@ -346,7 +354,7 @@ struct smb2_preauth_neg_context {
 	__le16	HashAlgorithmCount; /* 1 */
 	__le16	SaltLength;
 	__le16	HashAlgorithms; /* HashAlgorithms[0] since only one defined */
-	__u8	Salt[SMB311_SALT_SIZE];
+	__u8	Salt[SMB311_LINUX_CLIENT_SALT_SIZE];
 } __packed;
 
 /* Encryption Algorithms Ciphers */
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index d4110447ee3a..9565e27681a5 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -200,8 +200,8 @@ extern int SMB2_query_info_init(struct cifs_tcon *tcon,
 				size_t input_len, void *input);
 extern void SMB2_query_info_free(struct smb_rqst *rqst);
 extern int SMB2_query_acl(const unsigned int xid, struct cifs_tcon *tcon,
-			   u64 persistent_file_id, u64 volatile_file_id,
-			   void **data, unsigned int *plen);
+			  u64 persistent_file_id, u64 volatile_file_id,
+			  void **data, unsigned int *plen, u32 info);
 extern int SMB2_get_srv_num(const unsigned int xid, struct cifs_tcon *tcon,
 			    u64 persistent_fid, u64 volatile_fid,
 			    __le64 *uniqueid);
diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c
index b029ed31ef91..10dfe5006792 100644
--- a/fs/cifs/smbdirect.c
+++ b/fs/cifs/smbdirect.c
@@ -246,6 +246,7 @@ smbd_qp_async_error_upcall(struct ib_event *event, void *context)
 	case IB_EVENT_CQ_ERR:
 	case IB_EVENT_QP_FATAL:
 		smbd_disconnect_rdma_connection(info);
+		break;
 
 	default:
 		break;
diff --git a/fs/cifs/trace.h b/fs/cifs/trace.h
index 90e0fab69bb8..c3d1a584f251 100644
--- a/fs/cifs/trace.h
+++ b/fs/cifs/trace.h
@@ -909,8 +909,12 @@ DEFINE_EVENT(smb3_credit_class, smb3_##name,  \
 	TP_ARGS(currmid, hostname, credits, credits_to_add))
 
 DEFINE_SMB3_CREDIT_EVENT(reconnect_with_invalid_credits);
+DEFINE_SMB3_CREDIT_EVENT(reconnect_detected);
 DEFINE_SMB3_CREDIT_EVENT(credit_timeout);
+DEFINE_SMB3_CREDIT_EVENT(insufficient_credits);
+DEFINE_SMB3_CREDIT_EVENT(too_many_credits);
 DEFINE_SMB3_CREDIT_EVENT(add_credits);
+DEFINE_SMB3_CREDIT_EVENT(set_credits);
 
 #endif /* _CIFS_TRACE_H */
 
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index 36b2ece43403..e9abb41aa89b 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -527,6 +527,7 @@ wait_for_free_credits(struct TCP_Server_Info *server, const int num_credits,
 	int *credits;
 	int optype;
 	long int t;
+	int scredits = server->credits;
 
 	if (timeout < 0)
 		t = MAX_JIFFY_OFFSET;
@@ -624,12 +625,18 @@ wait_for_free_credits(struct TCP_Server_Info *server, const int num_credits,
 			/* update # of requests on the wire to server */
 			if ((flags & CIFS_TIMEOUT_MASK) != CIFS_BLOCKING_OP) {
 				*credits -= num_credits;
+				scredits = *credits;
 				server->in_flight += num_credits;
 				if (server->in_flight > server->max_in_flight)
 					server->max_in_flight = server->in_flight;
 				*instance = server->reconnect_instance;
 			}
 			spin_unlock(&server->req_lock);
+
+			trace_smb3_add_credits(server->CurrentMid,
+					server->hostname, scredits, -(num_credits));
+			cifs_dbg(FYI, "%s: remove %u credits total=%d\n",
+					__func__, num_credits, scredits);
 			break;
 		}
 	}
@@ -649,10 +656,14 @@ wait_for_compound_request(struct TCP_Server_Info *server, int num,
 			  const int flags, unsigned int *instance)
 {
 	int *credits;
+	int scredits, sin_flight;
 
 	credits = server->ops->get_credits_field(server, flags & CIFS_OP_MASK);
 
 	spin_lock(&server->req_lock);
+	scredits = *credits;
+	sin_flight = server->in_flight;
+
 	if (*credits < num) {
 		/*
 		 * Return immediately if not too many requests in flight since
@@ -660,6 +671,10 @@ wait_for_compound_request(struct TCP_Server_Info *server, int num,
 		 */
 		if (server->in_flight < num - *credits) {
 			spin_unlock(&server->req_lock);
+			trace_smb3_insufficient_credits(server->CurrentMid,
+					server->hostname, scredits, sin_flight);
+			cifs_dbg(FYI, "%s: %d requests in flight, needed %d total=%d\n",
+					__func__, sin_flight, num, scredits);
 			return -ENOTSUPP;
 		}
 	}
diff --git a/fs/cifs/unc.c b/fs/cifs/unc.c
new file mode 100644
index 000000000000..394aa00cea40
--- /dev/null
+++ b/fs/cifs/unc.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *   Copyright (C) 2020, Microsoft Corporation.
+ *
+ *   Author(s): Steve French <stfrench@microsoft.com>
+ *              Suresh Jayaraman <sjayaraman@suse.de>
+ *              Jeff Layton <jlayton@kernel.org>
+ */
+
+#include <linux/fs.h>
+#include <linux/slab.h>
+#include <linux/inet.h>
+#include <linux/ctype.h>
+#include "cifsglob.h"
+#include "cifsproto.h"
+
+/* extract the host portion of the UNC string */
+char *extract_hostname(const char *unc)
+{
+	const char *src;
+	char *dst, *delim;
+	unsigned int len;
+
+	/* skip double chars at beginning of string */
+	/* BB: check validity of these bytes? */
+	if (strlen(unc) < 3)
+		return ERR_PTR(-EINVAL);
+	for (src = unc; *src && *src == '\\'; src++)
+		;
+	if (!*src)
+		return ERR_PTR(-EINVAL);
+
+	/* delimiter between hostname and sharename is always '\\' now */
+	delim = strchr(src, '\\');
+	if (!delim)
+		return ERR_PTR(-EINVAL);
+
+	len = delim - src;
+	dst = kmalloc((len + 1), GFP_KERNEL);
+	if (dst == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	memcpy(dst, src, len);
+	dst[len] = '\0';
+
+	return dst;
+}
+
+char *extract_sharename(const char *unc)
+{
+	const char *src;
+	char *delim, *dst;
+	int len;
+
+	/* skip double chars at the beginning */
+	src = unc + 2;
+
+	/* share name is always preceded by '\\' now */
+	delim = strchr(src, '\\');
+	if (!delim)
+		return ERR_PTR(-EINVAL);
+	delim++;
+	len = strlen(delim);
+
+	/* caller has to free the memory */
+	dst = kstrndup(delim, len, GFP_KERNEL);
+	if (!dst)
+		return ERR_PTR(-ENOMEM);
+
+	return dst;
+}
diff --git a/fs/cifs/xattr.c b/fs/cifs/xattr.c
index b8299173ea7e..6b658a1172ef 100644
--- a/fs/cifs/xattr.c
+++ b/fs/cifs/xattr.c
@@ -34,6 +34,7 @@
 #define MAX_EA_VALUE_SIZE CIFSMaxBufSize
 #define CIFS_XATTR_CIFS_ACL "system.cifs_acl" /* DACL only */
 #define CIFS_XATTR_CIFS_NTSD "system.cifs_ntsd" /* owner plus DACL */
+#define CIFS_XATTR_CIFS_NTSD_FULL "system.cifs_ntsd_full" /* owner/DACL/SACL */
 #define CIFS_XATTR_ATTRIB "cifs.dosattrib"  /* full name: user.cifs.dosattrib */
 #define CIFS_XATTR_CREATETIME "cifs.creationtime"  /* user.cifs.creationtime */
 /*
@@ -43,12 +44,13 @@
  */
 #define SMB3_XATTR_CIFS_ACL "system.smb3_acl" /* DACL only */
 #define SMB3_XATTR_CIFS_NTSD "system.smb3_ntsd" /* owner plus DACL */
+#define SMB3_XATTR_CIFS_NTSD_FULL "system.smb3_ntsd_full" /* owner/DACL/SACL */
 #define SMB3_XATTR_ATTRIB "smb3.dosattrib"  /* full name: user.smb3.dosattrib */
 #define SMB3_XATTR_CREATETIME "smb3.creationtime"  /* user.smb3.creationtime */
 /* BB need to add server (Samba e.g) support for security and trusted prefix */
 
 enum { XATTR_USER, XATTR_CIFS_ACL, XATTR_ACL_ACCESS, XATTR_ACL_DEFAULT,
-	XATTR_CIFS_NTSD };
+	XATTR_CIFS_NTSD, XATTR_CIFS_NTSD_FULL };
 
 static int cifs_attrib_set(unsigned int xid, struct cifs_tcon *pTcon,
 			   struct inode *inode, char *full_path,
@@ -164,7 +166,8 @@ static int cifs_xattr_set(const struct xattr_handler *handler,
 		break;
 
 	case XATTR_CIFS_ACL:
-	case XATTR_CIFS_NTSD: {
+	case XATTR_CIFS_NTSD:
+	case XATTR_CIFS_NTSD_FULL: {
 		struct cifs_ntsd *pacl;
 
 		if (!value)
@@ -174,23 +177,27 @@ static int cifs_xattr_set(const struct xattr_handler *handler,
 			rc = -ENOMEM;
 		} else {
 			memcpy(pacl, value, size);
-			if (value &&
-			    pTcon->ses->server->ops->set_acl) {
+			if (pTcon->ses->server->ops->set_acl) {
+				int aclflags = 0;
 				rc = 0;
-				if (handler->flags == XATTR_CIFS_NTSD) {
-					/* set owner and DACL */
-					rc = pTcon->ses->server->ops->set_acl(
-							pacl, size, inode,
-							full_path,
-							CIFS_ACL_OWNER);
-				}
-				if (rc == 0) {
-					/* set DACL */
-					rc = pTcon->ses->server->ops->set_acl(
-							pacl, size, inode,
-							full_path,
-							CIFS_ACL_DACL);
+
+				switch (handler->flags) {
+				case XATTR_CIFS_NTSD_FULL:
+					aclflags = (CIFS_ACL_OWNER |
+						    CIFS_ACL_DACL |
+						    CIFS_ACL_SACL);
+					break;
+				case XATTR_CIFS_NTSD:
+					aclflags = (CIFS_ACL_OWNER |
+						    CIFS_ACL_DACL);
+					break;
+				case XATTR_CIFS_ACL:
+				default:
+					aclflags = CIFS_ACL_DACL;
 				}
+
+				rc = pTcon->ses->server->ops->set_acl(pacl,
+					size, inode, full_path, aclflags);
 			} else {
 				rc = -EOPNOTSUPP;
 			}
@@ -327,16 +334,25 @@ static int cifs_xattr_get(const struct xattr_handler *handler,
 		break;
 
 	case XATTR_CIFS_ACL:
-	case XATTR_CIFS_NTSD: {
-		/* the whole ntsd is fetched regardless */
-		u32 acllen;
+	case XATTR_CIFS_NTSD:
+	case XATTR_CIFS_NTSD_FULL: {
+		/*
+		 * fetch owner, DACL, and SACL if asked for full descriptor,
+		 * fetch owner and DACL otherwise
+		 */
+		u32 acllen, extra_info;
 		struct cifs_ntsd *pacl;
 
 		if (pTcon->ses->server->ops->get_acl == NULL)
 			goto out; /* rc already EOPNOTSUPP */
 
+		if (handler->flags == XATTR_CIFS_NTSD_FULL) {
+			extra_info = SACL_SECINFO;
+		} else {
+			extra_info = 0;
+		}
 		pacl = pTcon->ses->server->ops->get_acl(cifs_sb,
-				inode, full_path, &acllen);
+				inode, full_path, &acllen, extra_info);
 		if (IS_ERR(pacl)) {
 			rc = PTR_ERR(pacl);
 			cifs_dbg(VFS, "%s: error %zd getting sec desc\n",
@@ -486,6 +502,27 @@ static const struct xattr_handler smb3_ntsd_xattr_handler = {
 	.set = cifs_xattr_set,
 };
 
+static const struct xattr_handler cifs_cifs_ntsd_full_xattr_handler = {
+	.name = CIFS_XATTR_CIFS_NTSD_FULL,
+	.flags = XATTR_CIFS_NTSD_FULL,
+	.get = cifs_xattr_get,
+	.set = cifs_xattr_set,
+};
+
+/*
+ * Although this is just an alias for the above, need to move away from
+ * confusing users and using the 20 year old term 'cifs' when it is no
+ * longer secure and was replaced by SMB2/SMB3 a long time ago, and
+ * SMB3 and later are highly secure.
+ */
+static const struct xattr_handler smb3_ntsd_full_xattr_handler = {
+	.name = SMB3_XATTR_CIFS_NTSD_FULL,
+	.flags = XATTR_CIFS_NTSD_FULL,
+	.get = cifs_xattr_get,
+	.set = cifs_xattr_set,
+};
+
+
 static const struct xattr_handler cifs_posix_acl_access_xattr_handler = {
 	.name = XATTR_NAME_POSIX_ACL_ACCESS,
 	.flags = XATTR_ACL_ACCESS,
@@ -507,6 +544,8 @@ const struct xattr_handler *cifs_xattr_handlers[] = {
 	&smb3_acl_xattr_handler, /* alias for above since avoiding "cifs" */
 	&cifs_cifs_ntsd_xattr_handler,
 	&smb3_ntsd_xattr_handler, /* alias for above since avoiding "cifs" */
+	&cifs_cifs_ntsd_full_xattr_handler,
+	&smb3_ntsd_full_xattr_handler, /* alias for above since avoiding "cifs" */
 	&cifs_posix_acl_access_xattr_handler,
 	&cifs_posix_acl_default_xattr_handler,
 	NULL
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index b0983e2a4e2c..b839dd1b459f 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -267,6 +267,7 @@ static void configfs_remove_dirent(struct dentry *dentry)
  *	configfs_create_dir - create a directory for an config_item.
  *	@item:		config_itemwe're creating directory for.
  *	@dentry:	config_item's dentry.
+ *	@frag:		config_item's fragment.
  *
  *	Note: user-created entries won't be allowed under this new directory
  *	until it is validated by configfs_dir_set_ready()
diff --git a/fs/crypto/fname.c b/fs/crypto/fname.c
index 3414d35ccbb6..6ca7d16593ff 100644
--- a/fs/crypto/fname.c
+++ b/fs/crypto/fname.c
@@ -574,7 +574,3 @@ int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags)
 	return valid;
 }
 EXPORT_SYMBOL_GPL(fscrypt_d_revalidate);
-
-const struct dentry_operations fscrypt_d_ops = {
-	.d_revalidate = fscrypt_d_revalidate,
-};
diff --git a/fs/crypto/fscrypt_private.h b/fs/crypto/fscrypt_private.h
index f0bed6b06fa6..3fa965eb3336 100644
--- a/fs/crypto/fscrypt_private.h
+++ b/fs/crypto/fscrypt_private.h
@@ -297,7 +297,6 @@ int fscrypt_fname_encrypt(const struct inode *inode, const struct qstr *iname,
 bool fscrypt_fname_encrypted_size(const union fscrypt_policy *policy,
 				  u32 orig_len, u32 max_len,
 				  u32 *encrypted_len_ret);
-extern const struct dentry_operations fscrypt_d_ops;
 
 /* hkdf.c */
 
diff --git a/fs/crypto/hooks.c b/fs/crypto/hooks.c
index 79570e0e8e61..a73b0376e6f3 100644
--- a/fs/crypto/hooks.c
+++ b/fs/crypto/hooks.c
@@ -108,7 +108,6 @@ int __fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry,
 		spin_lock(&dentry->d_lock);
 		dentry->d_flags |= DCACHE_NOKEY_NAME;
 		spin_unlock(&dentry->d_lock);
-		d_set_d_op(dentry, &fscrypt_d_ops);
 	}
 	return err;
 }
diff --git a/fs/eventfd.c b/fs/eventfd.c
index df466ef81ddd..e265b6dd4f34 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -182,11 +182,14 @@ static __poll_t eventfd_poll(struct file *file, poll_table *wait)
 	return events;
 }
 
-static void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt)
+void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt)
 {
+	lockdep_assert_held(&ctx->wqh.lock);
+
 	*cnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count;
 	ctx->count -= *cnt;
 }
+EXPORT_SYMBOL_GPL(eventfd_ctx_do_read);
 
 /**
  * eventfd_ctx_remove_wait_queue - Read the current counter and removes wait queue.
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 10b81e69db74..a829af074eb5 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -389,19 +389,24 @@ static bool ep_busy_loop_end(void *p, unsigned long start_time)
  *
  * we must do our busy polling with irqs enabled
  */
-static void ep_busy_loop(struct eventpoll *ep, int nonblock)
+static bool ep_busy_loop(struct eventpoll *ep, int nonblock)
 {
 	unsigned int napi_id = READ_ONCE(ep->napi_id);
 
-	if ((napi_id >= MIN_NAPI_ID) && net_busy_loop_on())
+	if ((napi_id >= MIN_NAPI_ID) && net_busy_loop_on()) {
 		napi_busy_loop(napi_id, nonblock ? NULL : ep_busy_loop_end, ep, false,
 			       BUSY_POLL_BUDGET);
-}
-
-static inline void ep_reset_busy_poll_napi_id(struct eventpoll *ep)
-{
-	if (ep->napi_id)
+		if (ep_events_available(ep))
+			return true;
+		/*
+		 * Busy poll timed out.  Drop NAPI ID for now, we can add
+		 * it back in when we have moved a socket with a valid NAPI
+		 * ID onto the ready list.
+		 */
 		ep->napi_id = 0;
+		return false;
+	}
+	return false;
 }
 
 /*
@@ -441,12 +446,9 @@ static inline void ep_set_busy_poll_napi_id(struct epitem *epi)
 
 #else
 
-static inline void ep_busy_loop(struct eventpoll *ep, int nonblock)
-{
-}
-
-static inline void ep_reset_busy_poll_napi_id(struct eventpoll *ep)
+static inline bool ep_busy_loop(struct eventpoll *ep, int nonblock)
 {
+	return false;
 }
 
 static inline void ep_set_busy_poll_napi_id(struct epitem *epi)
@@ -1625,6 +1627,14 @@ static int ep_send_events(struct eventpoll *ep,
 	poll_table pt;
 	int res = 0;
 
+	/*
+	 * Always short-circuit for fatal signals to allow threads to make a
+	 * timely exit without the chance of finding more events available and
+	 * fetching repeatedly.
+	 */
+	if (fatal_signal_pending(current))
+		return -EINTR;
+
 	init_poll_funcptr(&pt, NULL);
 
 	mutex_lock(&ep->mtx);
@@ -1702,15 +1712,25 @@ static int ep_send_events(struct eventpoll *ep,
 	return res;
 }
 
-static inline struct timespec64 ep_set_mstimeout(long ms)
+static struct timespec64 *ep_timeout_to_timespec(struct timespec64 *to, long ms)
 {
-	struct timespec64 now, ts = {
-		.tv_sec = ms / MSEC_PER_SEC,
-		.tv_nsec = NSEC_PER_MSEC * (ms % MSEC_PER_SEC),
-	};
+	struct timespec64 now;
+
+	if (ms < 0)
+		return NULL;
+
+	if (!ms) {
+		to->tv_sec = 0;
+		to->tv_nsec = 0;
+		return to;
+	}
+
+	to->tv_sec = ms / MSEC_PER_SEC;
+	to->tv_nsec = NSEC_PER_MSEC * (ms % MSEC_PER_SEC);
 
 	ktime_get_ts64(&now);
-	return timespec64_add_safe(now, ts);
+	*to = timespec64_add_safe(now, *to);
+	return to;
 }
 
 /**
@@ -1722,8 +1742,8 @@ static inline struct timespec64 ep_set_mstimeout(long ms)
  *          stored.
  * @maxevents: Size (in terms of number of events) of the caller event buffer.
  * @timeout: Maximum timeout for the ready events fetch operation, in
- *           milliseconds. If the @timeout is zero, the function will not block,
- *           while if the @timeout is less than zero, the function will block
+ *           timespec. If the timeout is zero, the function will not block,
+ *           while if the @timeout ptr is NULL, the function will block
  *           until at least one event has been retrieved (or an error
  *           occurred).
  *
@@ -1731,55 +1751,59 @@ static inline struct timespec64 ep_set_mstimeout(long ms)
  *          error code, in case of error.
  */
 static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
-		   int maxevents, long timeout)
+		   int maxevents, struct timespec64 *timeout)
 {
-	int res = 0, eavail, timed_out = 0;
+	int res, eavail, timed_out = 0;
 	u64 slack = 0;
 	wait_queue_entry_t wait;
 	ktime_t expires, *to = NULL;
 
 	lockdep_assert_irqs_enabled();
 
-	if (timeout > 0) {
-		struct timespec64 end_time = ep_set_mstimeout(timeout);
-
-		slack = select_estimate_accuracy(&end_time);
+	if (timeout && (timeout->tv_sec | timeout->tv_nsec)) {
+		slack = select_estimate_accuracy(timeout);
 		to = &expires;
-		*to = timespec64_to_ktime(end_time);
-	} else if (timeout == 0) {
+		*to = timespec64_to_ktime(*timeout);
+	} else if (timeout) {
 		/*
 		 * Avoid the unnecessary trip to the wait queue loop, if the
-		 * caller specified a non blocking operation. We still need
-		 * lock because we could race and not see an epi being added
-		 * to the ready list while in irq callback. Thus incorrectly
-		 * returning 0 back to userspace.
+		 * caller specified a non blocking operation.
 		 */
 		timed_out = 1;
-
-		write_lock_irq(&ep->lock);
-		eavail = ep_events_available(ep);
-		write_unlock_irq(&ep->lock);
-
-		goto send_events;
 	}
 
-fetch_events:
+	/*
+	 * This call is racy: We may or may not see events that are being added
+	 * to the ready list under the lock (e.g., in IRQ callbacks). For, cases
+	 * with a non-zero timeout, this thread will check the ready list under
+	 * lock and will added to the wait queue.  For, cases with a zero
+	 * timeout, the user by definition should not care and will have to
+	 * recheck again.
+	 */
+	eavail = ep_events_available(ep);
+
+	while (1) {
+		if (eavail) {
+			/*
+			 * Try to transfer events to user space. In case we get
+			 * 0 events and there's still timeout left over, we go
+			 * trying again in search of more luck.
+			 */
+			res = ep_send_events(ep, events, maxevents);
+			if (res)
+				return res;
+		}
 
-	if (!ep_events_available(ep))
-		ep_busy_loop(ep, timed_out);
+		if (timed_out)
+			return 0;
 
-	eavail = ep_events_available(ep);
-	if (eavail)
-		goto send_events;
+		eavail = ep_busy_loop(ep, timed_out);
+		if (eavail)
+			continue;
 
-	/*
-	 * Busy poll timed out.  Drop NAPI ID for now, we can add
-	 * it back in when we have moved a socket with a valid NAPI
-	 * ID onto the ready list.
-	 */
-	ep_reset_busy_poll_napi_id(ep);
+		if (signal_pending(current))
+			return -EINTR;
 
-	do {
 		/*
 		 * Internally init_wait() uses autoremove_wake_function(),
 		 * thus wait entry is removed from the wait queue on each
@@ -1809,55 +1833,38 @@ fetch_events:
 		 * important.
 		 */
 		eavail = ep_events_available(ep);
-		if (!eavail) {
-			if (signal_pending(current))
-				res = -EINTR;
-			else
-				__add_wait_queue_exclusive(&ep->wq, &wait);
-		}
-		write_unlock_irq(&ep->lock);
-
-		if (eavail || res)
-			break;
-
-		if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS)) {
-			timed_out = 1;
-			break;
-		}
-
-		/* We were woken up, thus go and try to harvest some events */
-		eavail = 1;
-
-	} while (0);
+		if (!eavail)
+			__add_wait_queue_exclusive(&ep->wq, &wait);
 
-	__set_current_state(TASK_RUNNING);
-
-	if (!list_empty_careful(&wait.entry)) {
-		write_lock_irq(&ep->lock);
-		__remove_wait_queue(&ep->wq, &wait);
 		write_unlock_irq(&ep->lock);
-	}
 
-send_events:
-	if (fatal_signal_pending(current)) {
+		if (!eavail)
+			timed_out = !schedule_hrtimeout_range(to, slack,
+							      HRTIMER_MODE_ABS);
+		__set_current_state(TASK_RUNNING);
+
 		/*
-		 * Always short-circuit for fatal signals to allow
-		 * threads to make a timely exit without the chance of
-		 * finding more events available and fetching
-		 * repeatedly.
+		 * We were woken up, thus go and try to harvest some events.
+		 * If timed out and still on the wait queue, recheck eavail
+		 * carefully under lock, below.
 		 */
-		res = -EINTR;
-	}
-	/*
-	 * Try to transfer events to user space. In case we get 0 events and
-	 * there's still timeout left over, we go trying again in search of
-	 * more luck.
-	 */
-	if (!res && eavail &&
-	    !(res = ep_send_events(ep, events, maxevents)) && !timed_out)
-		goto fetch_events;
+		eavail = 1;
 
-	return res;
+		if (!list_empty_careful(&wait.entry)) {
+			write_lock_irq(&ep->lock);
+			/*
+			 * If the thread timed out and is not on the wait queue,
+			 * it means that the thread was woken up after its
+			 * timeout expired before it could reacquire the lock.
+			 * Thus, when wait.entry is empty, it needs to harvest
+			 * events.
+			 */
+			if (timed_out)
+				eavail = list_empty(&wait.entry);
+			__remove_wait_queue(&ep->wq, &wait);
+			write_unlock_irq(&ep->lock);
+		}
+	}
 }
 
 /**
@@ -2176,7 +2183,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd,
  * part of the user space epoll_wait(2).
  */
 static int do_epoll_wait(int epfd, struct epoll_event __user *events,
-			 int maxevents, int timeout)
+			 int maxevents, struct timespec64 *to)
 {
 	int error;
 	struct fd f;
@@ -2210,7 +2217,7 @@ static int do_epoll_wait(int epfd, struct epoll_event __user *events,
 	ep = f.file->private_data;
 
 	/* Time to fish for events ... */
-	error = ep_poll(ep, events, maxevents, timeout);
+	error = ep_poll(ep, events, maxevents, to);
 
 error_fput:
 	fdput(f);
@@ -2220,16 +2227,19 @@ error_fput:
 SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events,
 		int, maxevents, int, timeout)
 {
-	return do_epoll_wait(epfd, events, maxevents, timeout);
+	struct timespec64 to;
+
+	return do_epoll_wait(epfd, events, maxevents,
+			     ep_timeout_to_timespec(&to, timeout));
 }
 
 /*
  * Implement the event wait interface for the eventpoll file. It is the kernel
  * part of the user space epoll_pwait(2).
  */
-SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events,
-		int, maxevents, int, timeout, const sigset_t __user *, sigmask,
-		size_t, sigsetsize)
+static int do_epoll_pwait(int epfd, struct epoll_event __user *events,
+			  int maxevents, struct timespec64 *to,
+			  const sigset_t __user *sigmask, size_t sigsetsize)
 {
 	int error;
 
@@ -2241,18 +2251,47 @@ SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events,
 	if (error)
 		return error;
 
-	error = do_epoll_wait(epfd, events, maxevents, timeout);
+	error = do_epoll_wait(epfd, events, maxevents, to);
+
 	restore_saved_sigmask_unless(error == -EINTR);
 
 	return error;
 }
 
+SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events,
+		int, maxevents, int, timeout, const sigset_t __user *, sigmask,
+		size_t, sigsetsize)
+{
+	struct timespec64 to;
+
+	return do_epoll_pwait(epfd, events, maxevents,
+			      ep_timeout_to_timespec(&to, timeout),
+			      sigmask, sigsetsize);
+}
+
+SYSCALL_DEFINE6(epoll_pwait2, int, epfd, struct epoll_event __user *, events,
+		int, maxevents, const struct __kernel_timespec __user *, timeout,
+		const sigset_t __user *, sigmask, size_t, sigsetsize)
+{
+	struct timespec64 ts, *to = NULL;
+
+	if (timeout) {
+		if (get_timespec64(&ts, timeout))
+			return -EFAULT;
+		to = &ts;
+		if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
+			return -EINVAL;
+	}
+
+	return do_epoll_pwait(epfd, events, maxevents, to,
+			      sigmask, sigsetsize);
+}
+
 #ifdef CONFIG_COMPAT
-COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd,
-			struct epoll_event __user *, events,
-			int, maxevents, int, timeout,
-			const compat_sigset_t __user *, sigmask,
-			compat_size_t, sigsetsize)
+static int do_compat_epoll_pwait(int epfd, struct epoll_event __user *events,
+				 int maxevents, struct timespec64 *timeout,
+				 const compat_sigset_t __user *sigmask,
+				 compat_size_t sigsetsize)
 {
 	long err;
 
@@ -2265,10 +2304,46 @@ COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd,
 		return err;
 
 	err = do_epoll_wait(epfd, events, maxevents, timeout);
+
 	restore_saved_sigmask_unless(err == -EINTR);
 
 	return err;
 }
+
+COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd,
+		       struct epoll_event __user *, events,
+		       int, maxevents, int, timeout,
+		       const compat_sigset_t __user *, sigmask,
+		       compat_size_t, sigsetsize)
+{
+	struct timespec64 to;
+
+	return do_compat_epoll_pwait(epfd, events, maxevents,
+				     ep_timeout_to_timespec(&to, timeout),
+				     sigmask, sigsetsize);
+}
+
+COMPAT_SYSCALL_DEFINE6(epoll_pwait2, int, epfd,
+		       struct epoll_event __user *, events,
+		       int, maxevents,
+		       const struct __kernel_timespec __user *, timeout,
+		       const compat_sigset_t __user *, sigmask,
+		       compat_size_t, sigsetsize)
+{
+	struct timespec64 ts, *to = NULL;
+
+	if (timeout) {
+		if (get_timespec64(&ts, timeout))
+			return -EFAULT;
+		to = &ts;
+		if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec))
+			return -EINVAL;
+	}
+
+	return do_compat_epoll_pwait(epfd, events, maxevents, to,
+				     sigmask, sigsetsize);
+}
+
 #endif
 
 static int __init eventpoll_init(void)
diff --git a/fs/exfat/nls.c b/fs/exfat/nls.c
index 675d0e7058c5..314d5407a1be 100644
--- a/fs/exfat/nls.c
+++ b/fs/exfat/nls.c
@@ -659,7 +659,7 @@ static int exfat_load_upcase_table(struct super_block *sb,
 	unsigned char skip = false;
 	unsigned short *upcase_table;
 
-	upcase_table = kcalloc(UTBL_COUNT, sizeof(unsigned short), GFP_KERNEL);
+	upcase_table = kvcalloc(UTBL_COUNT, sizeof(unsigned short), GFP_KERNEL);
 	if (!upcase_table)
 		return -ENOMEM;
 
@@ -715,7 +715,7 @@ static int exfat_load_default_upcase_table(struct super_block *sb)
 	unsigned short uni = 0, *upcase_table;
 	unsigned int index = 0;
 
-	upcase_table = kcalloc(UTBL_COUNT, sizeof(unsigned short), GFP_KERNEL);
+	upcase_table = kvcalloc(UTBL_COUNT, sizeof(unsigned short), GFP_KERNEL);
 	if (!upcase_table)
 		return -ENOMEM;
 
@@ -803,5 +803,5 @@ load_default:
 
 void exfat_free_upcase_table(struct exfat_sb_info *sbi)
 {
-	kfree(sbi->vol_utbl);
+	kvfree(sbi->vol_utbl);
 }
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 70355ab6740e..14aa45316ad2 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -66,12 +66,6 @@ static inline unsigned ext2_chunk_size(struct inode *inode)
 	return inode->i_sb->s_blocksize;
 }
 
-static inline void ext2_put_page(struct page *page)
-{
-	kunmap(page);
-	put_page(page);
-}
-
 /*
  * Return the offset into page `page_nr' of the last valid
  * byte in that page, plus one.
@@ -336,6 +330,8 @@ ext2_readdir(struct file *file, struct dir_context *ctx)
  * returns the page in which the entry was found (as a parameter - res_page),
  * and the entry itself. Page is returned mapped and unlocked.
  * Entry is guaranteed to be valid.
+ *
+ * On Success ext2_put_page() should be called on *res_page.
  */
 struct ext2_dir_entry_2 *ext2_find_entry (struct inode *dir,
 			const struct qstr *child, struct page **res_page)
@@ -401,6 +397,12 @@ found:
 	return de;
 }
 
+/**
+ * Return the '..' directory entry and the page in which the entry was found
+ * (as a parameter - p).
+ *
+ * On Success ext2_put_page() should be called on *p.
+ */
 struct ext2_dir_entry_2 * ext2_dotdot (struct inode *dir, struct page **p)
 {
 	struct page *page = ext2_get_page(dir, 0, 0);
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 5136b7289e8d..2a4175fbaf5e 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -16,6 +16,8 @@
 #include <linux/blockgroup_lock.h>
 #include <linux/percpu_counter.h>
 #include <linux/rbtree.h>
+#include <linux/mm.h>
+#include <linux/highmem.h>
 
 /* XXX Here for now... not interested in restructing headers JUST now */
 
@@ -745,6 +747,11 @@ extern int ext2_delete_entry (struct ext2_dir_entry_2 *, struct page *);
 extern int ext2_empty_dir (struct inode *);
 extern struct ext2_dir_entry_2 * ext2_dotdot (struct inode *, struct page **);
 extern void ext2_set_link(struct inode *, struct ext2_dir_entry_2 *, struct page *, struct inode *, int);
+static inline void ext2_put_page(struct page *page)
+{
+	kunmap(page);
+	put_page(page);
+}
 
 /* ialloc.c */
 extern struct inode * ext2_new_inode (struct inode *, umode_t, const struct qstr *);
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c
index 11c5c6fe75bb..78c417d3c898 100644
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -1256,6 +1256,7 @@ do_indirects:
 				mark_inode_dirty(inode);
 				ext2_free_branches(inode, &nr, &nr+1, 3);
 			}
+			break;
 		case EXT2_TIND_BLOCK:
 			;
 	}
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 5bf2c145643b..ea980f1e2e99 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -389,23 +389,18 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
 	if (dir_de) {
 		if (old_dir != new_dir)
 			ext2_set_link(old_inode, dir_de, dir_page, new_dir, 0);
-		else {
-			kunmap(dir_page);
-			put_page(dir_page);
-		}
+		else
+			ext2_put_page(dir_page);
 		inode_dec_link_count(old_dir);
 	}
 	return 0;
 
 
 out_dir:
-	if (dir_de) {
-		kunmap(dir_page);
-		put_page(dir_page);
-	}
+	if (dir_de)
+		ext2_put_page(dir_page);
 out_old:
-	kunmap(old_page);
-	put_page(old_page);
+	ext2_put_page(old_page);
 out:
 	return err;
 }
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 09f1fe676972..6c4753277916 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1070,7 +1070,7 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
 					/ EXT2_BLOCKS_PER_GROUP(sb)) + 1;
 	db_count = (sbi->s_groups_count + EXT2_DESC_PER_BLOCK(sb) - 1) /
 		   EXT2_DESC_PER_BLOCK(sb);
-	sbi->s_group_desc = kmalloc_array (db_count,
+	sbi->s_group_desc = kmalloc_array(db_count,
 					   sizeof(struct buffer_head *),
 					   GFP_KERNEL);
 	if (sbi->s_group_desc == NULL) {
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index c6d16353326a..5ed870614c8d 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -657,10 +657,3 @@ const struct file_operations ext4_dir_operations = {
 	.fsync		= ext4_sync_file,
 	.release	= ext4_release_dir,
 };
-
-#ifdef CONFIG_UNICODE
-const struct dentry_operations ext4_dentry_ops = {
-	.d_hash = generic_ci_d_hash,
-	.d_compare = generic_ci_d_compare,
-};
-#endif
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 65ecaf96d0a4..c64ea8f59ea7 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -3381,10 +3381,6 @@ static inline void ext4_unlock_group(struct super_block *sb,
 /* dir.c */
 extern const struct file_operations ext4_dir_operations;
 
-#ifdef CONFIG_UNICODE
-extern const struct dentry_operations ext4_dentry_ops;
-#endif
-
 /* file.c */
 extern const struct inode_operations ext4_file_inode_operations;
 extern const struct file_operations ext4_file_operations;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 5fa8436cd5fa..326fe402e495 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1608,6 +1608,7 @@ static struct buffer_head *ext4_lookup_entry(struct inode *dir,
 	struct buffer_head *bh;
 
 	err = ext4_fname_prepare_lookup(dir, dentry, &fname);
+	generic_set_encrypted_ci_d_ops(dentry);
 	if (err == -ENOENT)
 		return NULL;
 	if (err)
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index a2ec60fa8811..830c196ec069 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -4963,11 +4963,6 @@ no_journal:
 		goto failed_mount4;
 	}
 
-#ifdef CONFIG_UNICODE
-	if (sb->s_encoding)
-		sb->s_d_op = &ext4_dentry_ops;
-#endif
-
 	sb->s_root = d_make_root(root);
 	if (!sb->s_root) {
 		ext4_msg(sb, KERN_ERR, "get root dentry failed");
diff --git a/fs/f2fs/acl.c b/fs/f2fs/acl.c
index 306413589827..1e5e9b1136ee 100644
--- a/fs/f2fs/acl.c
+++ b/fs/f2fs/acl.c
@@ -384,7 +384,7 @@ int f2fs_init_acl(struct inode *inode, struct inode *dir, struct page *ipage,
 							struct page *dpage)
 {
 	struct posix_acl *default_acl = NULL, *acl = NULL;
-	int error = 0;
+	int error;
 
 	error = f2fs_acl_create(dir, &inode->i_mode, &default_acl, &acl, dpage);
 	if (error)
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 54a1905af052..897edb7c951a 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -37,7 +37,7 @@ void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io)
 struct page *f2fs_grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
 {
 	struct address_space *mapping = META_MAPPING(sbi);
-	struct page *page = NULL;
+	struct page *page;
 repeat:
 	page = f2fs_grab_cache_page(mapping, index, false);
 	if (!page) {
@@ -348,13 +348,13 @@ static int f2fs_write_meta_pages(struct address_space *mapping,
 		goto skip_write;
 
 	/* if locked failed, cp will flush dirty pages instead */
-	if (!mutex_trylock(&sbi->cp_mutex))
+	if (!down_write_trylock(&sbi->cp_global_sem))
 		goto skip_write;
 
 	trace_f2fs_writepages(mapping->host, wbc, META);
 	diff = nr_pages_to_write(sbi, META, wbc);
 	written = f2fs_sync_meta_pages(sbi, META, wbc->nr_to_write, FS_META_IO);
-	mutex_unlock(&sbi->cp_mutex);
+	up_write(&sbi->cp_global_sem);
 	wbc->nr_to_write = max((long)0, wbc->nr_to_write - written - diff);
 	return 0;
 
@@ -1385,6 +1385,26 @@ static void commit_checkpoint(struct f2fs_sb_info *sbi,
 	f2fs_submit_merged_write(sbi, META_FLUSH);
 }
 
+static inline u64 get_sectors_written(struct block_device *bdev)
+{
+	return (u64)part_stat_read(bdev, sectors[STAT_WRITE]);
+}
+
+u64 f2fs_get_sectors_written(struct f2fs_sb_info *sbi)
+{
+	if (f2fs_is_multi_device(sbi)) {
+		u64 sectors = 0;
+		int i;
+
+		for (i = 0; i < sbi->s_ndevs; i++)
+			sectors += get_sectors_written(FDEV(i).bdev);
+
+		return sectors;
+	}
+
+	return get_sectors_written(sbi->sb->s_bdev);
+}
+
 static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 {
 	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
@@ -1488,8 +1508,9 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 	start_blk += data_sum_blocks;
 
 	/* Record write statistics in the hot node summary */
-	kbytes_written = sbi->kbytes_written + BD_PART_WRITTEN(sbi);
-
+	kbytes_written = sbi->kbytes_written;
+	kbytes_written += (f2fs_get_sectors_written(sbi) -
+				sbi->sectors_written_start) >> 1;
 	seg_i->journal->info.kbytes_written = cpu_to_le64(kbytes_written);
 
 	if (__remain_node_summaries(cpc->reason)) {
@@ -1569,7 +1590,7 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 		f2fs_warn(sbi, "Start checkpoint disabled!");
 	}
 	if (cpc->reason != CP_RESIZE)
-		mutex_lock(&sbi->cp_mutex);
+		down_write(&sbi->cp_global_sem);
 
 	if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) &&
 		((cpc->reason & CP_FASTBOOT) || (cpc->reason & CP_SYNC) ||
@@ -1597,7 +1618,7 @@ int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 			goto out;
 		}
 
-		if (NM_I(sbi)->dirty_nat_cnt == 0 &&
+		if (NM_I(sbi)->nat_cnt[DIRTY_NAT] == 0 &&
 				SIT_I(sbi)->dirty_sentries == 0 &&
 				prefree_segments(sbi) == 0) {
 			f2fs_flush_sit_entries(sbi, cpc);
@@ -1644,7 +1665,7 @@ stop:
 	trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint");
 out:
 	if (cpc->reason != CP_RESIZE)
-		mutex_unlock(&sbi->cp_mutex);
+		up_write(&sbi->cp_global_sem);
 	return err;
 }
 
diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c
index 14262e0f1cd6..4bcbacfe3325 100644
--- a/fs/f2fs/compress.c
+++ b/fs/f2fs/compress.c
@@ -602,6 +602,7 @@ static int f2fs_compress_pages(struct compress_ctx *cc)
 				f2fs_cops[fi->i_compress_algorithm];
 	unsigned int max_len, new_nr_cpages;
 	struct page **new_cpages;
+	u32 chksum = 0;
 	int i, ret;
 
 	trace_f2fs_compress_pages_start(cc->inode, cc->cluster_idx,
@@ -655,6 +656,11 @@ static int f2fs_compress_pages(struct compress_ctx *cc)
 
 	cc->cbuf->clen = cpu_to_le32(cc->clen);
 
+	if (fi->i_compress_flag & 1 << COMPRESS_CHKSUM)
+		chksum = f2fs_crc32(F2FS_I_SB(cc->inode),
+					cc->cbuf->cdata, cc->clen);
+	cc->cbuf->chksum = cpu_to_le32(chksum);
+
 	for (i = 0; i < COMPRESS_DATA_RESERVED_SIZE; i++)
 		cc->cbuf->reserved[i] = cpu_to_le32(0);
 
@@ -790,6 +796,22 @@ void f2fs_decompress_pages(struct bio *bio, struct page *page, bool verity)
 
 	ret = cops->decompress_pages(dic);
 
+	if (!ret && (fi->i_compress_flag & 1 << COMPRESS_CHKSUM)) {
+		u32 provided = le32_to_cpu(dic->cbuf->chksum);
+		u32 calculated = f2fs_crc32(sbi, dic->cbuf->cdata, dic->clen);
+
+		if (provided != calculated) {
+			if (!is_inode_flag_set(dic->inode, FI_COMPRESS_CORRUPT)) {
+				set_inode_flag(dic->inode, FI_COMPRESS_CORRUPT);
+				printk_ratelimited(
+					"%sF2FS-fs (%s): checksum invalid, nid = %lu, %x vs %x",
+					KERN_INFO, sbi->sb->s_id, dic->inode->i_ino,
+					provided, calculated);
+			}
+			set_sbi_flag(sbi, SBI_NEED_FSCK);
+		}
+	}
+
 out_vunmap_cbuf:
 	vm_unmap_ram(dic->cbuf, dic->nr_cpages);
 out_vunmap_rbuf:
@@ -798,8 +820,6 @@ destroy_decompress_ctx:
 	if (cops->destroy_decompress_ctx)
 		cops->destroy_decompress_ctx(dic);
 out_free_dic:
-	if (verity)
-		atomic_set(&dic->pending_pages, dic->nr_cpages);
 	if (!verity)
 		f2fs_decompress_end_io(dic->rpages, dic->cluster_size,
 								ret, false);
@@ -921,7 +941,7 @@ int f2fs_is_compressed_cluster(struct inode *inode, pgoff_t index)
 
 static bool cluster_may_compress(struct compress_ctx *cc)
 {
-	if (!f2fs_compressed_file(cc->inode))
+	if (!f2fs_need_compress_data(cc->inode))
 		return false;
 	if (f2fs_is_atomic_file(cc->inode))
 		return false;
diff --git a/fs/f2fs/compress.h b/fs/f2fs/compress.h
new file mode 100644
index 000000000000..e69de29bb2d1
--- /dev/null
+++ b/fs/f2fs/compress.h
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index be4da52604ed..aa34d620bec9 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -202,7 +202,7 @@ static void f2fs_verify_bio(struct bio *bio)
 		dic = (struct decompress_io_ctx *)page_private(page);
 
 		if (dic) {
-			if (atomic_dec_return(&dic->pending_pages))
+			if (atomic_dec_return(&dic->verity_pages))
 				continue;
 			f2fs_verify_pages(dic->rpages,
 						dic->cluster_size);
@@ -736,6 +736,9 @@ int f2fs_submit_page_bio(struct f2fs_io_info *fio)
 static bool page_is_mergeable(struct f2fs_sb_info *sbi, struct bio *bio,
 				block_t last_blkaddr, block_t cur_blkaddr)
 {
+	if (unlikely(sbi->max_io_bytes &&
+			bio->bi_iter.bi_size >= sbi->max_io_bytes))
+		return false;
 	if (last_blkaddr + 1 != cur_blkaddr)
 		return false;
 	return __same_bdev(sbi, cur_blkaddr, bio);
@@ -1027,7 +1030,8 @@ static inline bool f2fs_need_verity(const struct inode *inode, pgoff_t idx)
 
 static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
 				      unsigned nr_pages, unsigned op_flag,
-				      pgoff_t first_idx, bool for_write)
+				      pgoff_t first_idx, bool for_write,
+				      bool for_verity)
 {
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 	struct bio *bio;
@@ -1049,7 +1053,7 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
 		post_read_steps |= 1 << STEP_DECRYPT;
 	if (f2fs_compressed_file(inode))
 		post_read_steps |= 1 << STEP_DECOMPRESS_NOWQ;
-	if (f2fs_need_verity(inode, first_idx))
+	if (for_verity && f2fs_need_verity(inode, first_idx))
 		post_read_steps |= 1 << STEP_VERITY;
 
 	if (post_read_steps) {
@@ -1079,7 +1083,7 @@ static int f2fs_submit_page_read(struct inode *inode, struct page *page,
 	struct bio *bio;
 
 	bio = f2fs_grab_read_bio(inode, blkaddr, 1, op_flags,
-					page->index, for_write);
+					page->index, for_write, true);
 	if (IS_ERR(bio))
 		return PTR_ERR(bio);
 
@@ -1750,6 +1754,16 @@ bool f2fs_overwrite_io(struct inode *inode, loff_t pos, size_t len)
 	return true;
 }
 
+static inline u64 bytes_to_blks(struct inode *inode, u64 bytes)
+{
+	return (bytes >> inode->i_blkbits);
+}
+
+static inline u64 blks_to_bytes(struct inode *inode, u64 blks)
+{
+	return (blks << inode->i_blkbits);
+}
+
 static int __get_data_block(struct inode *inode, sector_t iblock,
 			struct buffer_head *bh, int create, int flag,
 			pgoff_t *next_pgofs, int seg_type, bool may_write)
@@ -1758,7 +1772,7 @@ static int __get_data_block(struct inode *inode, sector_t iblock,
 	int err;
 
 	map.m_lblk = iblock;
-	map.m_len = bh->b_size >> inode->i_blkbits;
+	map.m_len = bytes_to_blks(inode, bh->b_size);
 	map.m_next_pgofs = next_pgofs;
 	map.m_next_extent = NULL;
 	map.m_seg_type = seg_type;
@@ -1768,20 +1782,11 @@ static int __get_data_block(struct inode *inode, sector_t iblock,
 	if (!err) {
 		map_bh(bh, inode->i_sb, map.m_pblk);
 		bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) | map.m_flags;
-		bh->b_size = (u64)map.m_len << inode->i_blkbits;
+		bh->b_size = blks_to_bytes(inode, map.m_len);
 	}
 	return err;
 }
 
-static int get_data_block(struct inode *inode, sector_t iblock,
-			struct buffer_head *bh_result, int create, int flag,
-			pgoff_t *next_pgofs)
-{
-	return __get_data_block(inode, iblock, bh_result, create,
-							flag, next_pgofs,
-							NO_CHECK_TYPE, create);
-}
-
 static int get_data_block_dio_write(struct inode *inode, sector_t iblock,
 			struct buffer_head *bh_result, int create)
 {
@@ -1800,24 +1805,6 @@ static int get_data_block_dio(struct inode *inode, sector_t iblock,
 				false);
 }
 
-static int get_data_block_bmap(struct inode *inode, sector_t iblock,
-			struct buffer_head *bh_result, int create)
-{
-	return __get_data_block(inode, iblock, bh_result, create,
-						F2FS_GET_BLOCK_BMAP, NULL,
-						NO_CHECK_TYPE, create);
-}
-
-static inline sector_t logical_to_blk(struct inode *inode, loff_t offset)
-{
-	return (offset >> inode->i_blkbits);
-}
-
-static inline loff_t blk_to_logical(struct inode *inode, sector_t blk)
-{
-	return (blk << inode->i_blkbits);
-}
-
 static int f2fs_xattr_fiemap(struct inode *inode,
 				struct fiemap_extent_info *fieinfo)
 {
@@ -1843,7 +1830,7 @@ static int f2fs_xattr_fiemap(struct inode *inode,
 			return err;
 		}
 
-		phys = (__u64)blk_to_logical(inode, ni.blk_addr);
+		phys = blks_to_bytes(inode, ni.blk_addr);
 		offset = offsetof(struct f2fs_inode, i_addr) +
 					sizeof(__le32) * (DEF_ADDRS_PER_INODE -
 					get_inline_xattr_addrs(inode));
@@ -1875,7 +1862,7 @@ static int f2fs_xattr_fiemap(struct inode *inode,
 			return err;
 		}
 
-		phys = (__u64)blk_to_logical(inode, ni.blk_addr);
+		phys = blks_to_bytes(inode, ni.blk_addr);
 		len = inode->i_sb->s_blocksize;
 
 		f2fs_put_page(page, 1);
@@ -1913,7 +1900,7 @@ static loff_t max_inode_blocks(struct inode *inode)
 int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 		u64 start, u64 len)
 {
-	struct buffer_head map_bh;
+	struct f2fs_map_blocks map;
 	sector_t start_blk, last_blk;
 	pgoff_t next_pgofs;
 	u64 logical = 0, phys = 0, size = 0;
@@ -1945,29 +1932,31 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 			goto out;
 	}
 
-	if (logical_to_blk(inode, len) == 0)
-		len = blk_to_logical(inode, 1);
+	if (bytes_to_blks(inode, len) == 0)
+		len = blks_to_bytes(inode, 1);
 
-	start_blk = logical_to_blk(inode, start);
-	last_blk = logical_to_blk(inode, start + len - 1);
+	start_blk = bytes_to_blks(inode, start);
+	last_blk = bytes_to_blks(inode, start + len - 1);
 
 next:
-	memset(&map_bh, 0, sizeof(struct buffer_head));
-	map_bh.b_size = len;
+	memset(&map, 0, sizeof(map));
+	map.m_lblk = start_blk;
+	map.m_len = bytes_to_blks(inode, len);
+	map.m_next_pgofs = &next_pgofs;
+	map.m_seg_type = NO_CHECK_TYPE;
 
 	if (compr_cluster)
-		map_bh.b_size = blk_to_logical(inode, cluster_size - 1);
+		map.m_len = cluster_size - 1;
 
-	ret = get_data_block(inode, start_blk, &map_bh, 0,
-					F2FS_GET_BLOCK_FIEMAP, &next_pgofs);
+	ret = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_FIEMAP);
 	if (ret)
 		goto out;
 
 	/* HOLE */
-	if (!buffer_mapped(&map_bh)) {
+	if (!(map.m_flags & F2FS_MAP_FLAGS)) {
 		start_blk = next_pgofs;
 
-		if (blk_to_logical(inode, start_blk) < blk_to_logical(inode,
+		if (blks_to_bytes(inode, start_blk) < blks_to_bytes(inode,
 						max_inode_blocks(inode)))
 			goto prep_next;
 
@@ -1993,9 +1982,9 @@ next:
 		compr_cluster = false;
 
 
-		logical = blk_to_logical(inode, start_blk - 1);
-		phys = blk_to_logical(inode, map_bh.b_blocknr);
-		size = blk_to_logical(inode, cluster_size);
+		logical = blks_to_bytes(inode, start_blk - 1);
+		phys = blks_to_bytes(inode, map.m_pblk);
+		size = blks_to_bytes(inode, cluster_size);
 
 		flags |= FIEMAP_EXTENT_ENCODED;
 
@@ -2007,20 +1996,20 @@ next:
 		goto prep_next;
 	}
 
-	if (map_bh.b_blocknr == COMPRESS_ADDR) {
+	if (map.m_pblk == COMPRESS_ADDR) {
 		compr_cluster = true;
 		start_blk++;
 		goto prep_next;
 	}
 
-	logical = blk_to_logical(inode, start_blk);
-	phys = blk_to_logical(inode, map_bh.b_blocknr);
-	size = map_bh.b_size;
+	logical = blks_to_bytes(inode, start_blk);
+	phys = blks_to_bytes(inode, map.m_pblk);
+	size = blks_to_bytes(inode, map.m_len);
 	flags = 0;
-	if (buffer_unwritten(&map_bh))
+	if (map.m_flags & F2FS_MAP_UNWRITTEN)
 		flags = FIEMAP_EXTENT_UNWRITTEN;
 
-	start_blk += logical_to_blk(inode, size);
+	start_blk += bytes_to_blks(inode, size);
 
 prep_next:
 	cond_resched();
@@ -2053,8 +2042,7 @@ static int f2fs_read_single_page(struct inode *inode, struct page *page,
 					bool is_readahead)
 {
 	struct bio *bio = *bio_ret;
-	const unsigned blkbits = inode->i_blkbits;
-	const unsigned blocksize = 1 << blkbits;
+	const unsigned blocksize = blks_to_bytes(inode, 1);
 	sector_t block_in_file;
 	sector_t last_block;
 	sector_t last_block_in_file;
@@ -2063,8 +2051,8 @@ static int f2fs_read_single_page(struct inode *inode, struct page *page,
 
 	block_in_file = (sector_t)page_index(page);
 	last_block = block_in_file + nr_pages;
-	last_block_in_file = (f2fs_readpage_limit(inode) + blocksize - 1) >>
-							blkbits;
+	last_block_in_file = bytes_to_blks(inode,
+			f2fs_readpage_limit(inode) + blocksize - 1);
 	if (last_block > last_block_in_file)
 		last_block = last_block_in_file;
 
@@ -2133,7 +2121,7 @@ submit_and_realloc:
 	if (bio == NULL) {
 		bio = f2fs_grab_read_bio(inode, block_nr, nr_pages,
 				is_readahead ? REQ_RAHEAD : 0, page->index,
-				false);
+				false, true);
 		if (IS_ERR(bio)) {
 			ret = PTR_ERR(bio);
 			bio = NULL;
@@ -2177,16 +2165,17 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
 	struct bio *bio = *bio_ret;
 	unsigned int start_idx = cc->cluster_idx << cc->log_cluster_size;
 	sector_t last_block_in_file;
-	const unsigned blkbits = inode->i_blkbits;
-	const unsigned blocksize = 1 << blkbits;
+	const unsigned blocksize = blks_to_bytes(inode, 1);
 	struct decompress_io_ctx *dic = NULL;
+	struct bio_post_read_ctx *ctx;
+	bool for_verity = false;
 	int i;
 	int ret = 0;
 
 	f2fs_bug_on(sbi, f2fs_cluster_is_empty(cc));
 
-	last_block_in_file = (f2fs_readpage_limit(inode) +
-					blocksize - 1) >> blkbits;
+	last_block_in_file = bytes_to_blks(inode,
+			f2fs_readpage_limit(inode) + blocksize - 1);
 
 	/* get rid of pages beyond EOF */
 	for (i = 0; i < cc->cluster_size; i++) {
@@ -2245,10 +2234,29 @@ int f2fs_read_multi_pages(struct compress_ctx *cc, struct bio **bio_ret,
 		goto out_put_dnode;
 	}
 
+	/*
+	 * It's possible to enable fsverity on the fly when handling a cluster,
+	 * which requires complicated error handling. Instead of adding more
+	 * complexity, let's give a rule where end_io post-processes fsverity
+	 * per cluster. In order to do that, we need to submit bio, if previous
+	 * bio sets a different post-process policy.
+	 */
+	if (fsverity_active(cc->inode)) {
+		atomic_set(&dic->verity_pages, cc->nr_cpages);
+		for_verity = true;
+
+		if (bio) {
+			ctx = bio->bi_private;
+			if (!(ctx->enabled_steps & (1 << STEP_VERITY))) {
+				__submit_bio(sbi, bio, DATA);
+				bio = NULL;
+			}
+		}
+	}
+
 	for (i = 0; i < dic->nr_cpages; i++) {
 		struct page *page = dic->cpages[i];
 		block_t blkaddr;
-		struct bio_post_read_ctx *ctx;
 
 		blkaddr = data_blkaddr(dn.inode, dn.node_page,
 						dn.ofs_in_node + i + 1);
@@ -2264,17 +2272,31 @@ submit_and_realloc:
 		if (!bio) {
 			bio = f2fs_grab_read_bio(inode, blkaddr, nr_pages,
 					is_readahead ? REQ_RAHEAD : 0,
-					page->index, for_write);
+					page->index, for_write, for_verity);
 			if (IS_ERR(bio)) {
+				unsigned int remained = dic->nr_cpages - i;
+				bool release = false;
+
 				ret = PTR_ERR(bio);
 				dic->failed = true;
-				if (!atomic_sub_return(dic->nr_cpages - i,
-							&dic->pending_pages)) {
+
+				if (for_verity) {
+					if (!atomic_sub_return(remained,
+						&dic->verity_pages))
+						release = true;
+				} else {
+					if (!atomic_sub_return(remained,
+						&dic->pending_pages))
+						release = true;
+				}
+
+				if (release) {
 					f2fs_decompress_end_io(dic->rpages,
-							cc->cluster_size, true,
-							false);
+						cc->cluster_size, true,
+						false);
 					f2fs_free_dic(dic);
 				}
+
 				f2fs_put_dnode(&dn);
 				*bio_ret = NULL;
 				return ret;
@@ -3164,7 +3186,7 @@ static inline bool __should_serialize_io(struct inode *inode,
 	if (IS_NOQUOTA(inode))
 		return false;
 
-	if (f2fs_compressed_file(inode))
+	if (f2fs_need_compress_data(inode))
 		return true;
 	if (wbc->sync_mode != WB_SYNC_ALL)
 		return true;
@@ -3799,9 +3821,6 @@ static sector_t f2fs_bmap_compress(struct inode *inode, sector_t block)
 static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
 {
 	struct inode *inode = mapping->host;
-	struct buffer_head tmp = {
-		.b_size = i_blocksize(inode),
-	};
 	sector_t blknr = 0;
 
 	if (f2fs_has_inline_data(inode))
@@ -3818,8 +3837,16 @@ static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
 	if (f2fs_compressed_file(inode)) {
 		blknr = f2fs_bmap_compress(inode, block);
 	} else {
-		if (!get_data_block_bmap(inode, block, &tmp, 0))
-			blknr = tmp.b_blocknr;
+		struct f2fs_map_blocks map;
+
+		memset(&map, 0, sizeof(map));
+		map.m_lblk = block;
+		map.m_len = 1;
+		map.m_next_pgofs = NULL;
+		map.m_seg_type = NO_CHECK_TYPE;
+
+		if (!f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_BMAP))
+			blknr = map.m_pblk;
 	}
 out:
 	trace_f2fs_bmap(inode, block, blknr);
@@ -3895,7 +3922,7 @@ static int check_swap_activate_fast(struct swap_info_struct *sis,
 	sector_t highest_pblock = 0;
 	int nr_extents = 0;
 	unsigned long nr_pblocks;
-	unsigned long len;
+	u64 len;
 	int ret;
 
 	/*
@@ -3903,29 +3930,31 @@ static int check_swap_activate_fast(struct swap_info_struct *sis,
 	 * to be very smart.
 	 */
 	cur_lblock = 0;
-	last_lblock = logical_to_blk(inode, i_size_read(inode));
+	last_lblock = bytes_to_blks(inode, i_size_read(inode));
 	len = i_size_read(inode);
 
 	while (cur_lblock <= last_lblock && cur_lblock < sis->max) {
-		struct buffer_head map_bh;
+		struct f2fs_map_blocks map;
 		pgoff_t next_pgofs;
 
 		cond_resched();
 
-		memset(&map_bh, 0, sizeof(struct buffer_head));
-		map_bh.b_size = len - cur_lblock;
+		memset(&map, 0, sizeof(map));
+		map.m_lblk = cur_lblock;
+		map.m_len = bytes_to_blks(inode, len) - cur_lblock;
+		map.m_next_pgofs = &next_pgofs;
+		map.m_seg_type = NO_CHECK_TYPE;
 
-		ret = get_data_block(inode, cur_lblock, &map_bh, 0,
-					F2FS_GET_BLOCK_FIEMAP, &next_pgofs);
+		ret = f2fs_map_blocks(inode, &map, 0, F2FS_GET_BLOCK_FIEMAP);
 		if (ret)
 			goto err_out;
 
 		/* hole */
-		if (!buffer_mapped(&map_bh))
+		if (!(map.m_flags & F2FS_MAP_FLAGS))
 			goto err_out;
 
-		pblock = map_bh.b_blocknr;
-		nr_pblocks = logical_to_blk(inode, map_bh.b_size);
+		pblock = map.m_pblk;
+		nr_pblocks = map.m_len;
 
 		if (cur_lblock + nr_pblocks >= sis->max)
 			nr_pblocks = sis->max - cur_lblock;
@@ -3968,7 +3997,6 @@ static int check_swap_activate(struct swap_info_struct *sis,
 	struct inode *inode = mapping->host;
 	unsigned blocks_per_page;
 	unsigned long page_no;
-	unsigned blkbits;
 	sector_t probe_block;
 	sector_t last_block;
 	sector_t lowest_block = -1;
@@ -3979,8 +4007,7 @@ static int check_swap_activate(struct swap_info_struct *sis,
 	if (PAGE_SIZE == F2FS_BLKSIZE)
 		return check_swap_activate_fast(sis, swap_file, span);
 
-	blkbits = inode->i_blkbits;
-	blocks_per_page = PAGE_SIZE >> blkbits;
+	blocks_per_page = bytes_to_blks(inode, PAGE_SIZE);
 
 	/*
 	 * Map all the blocks into the extent list.  This code doesn't try
@@ -3988,7 +4015,7 @@ static int check_swap_activate(struct swap_info_struct *sis,
 	 */
 	probe_block = 0;
 	page_no = 0;
-	last_block = i_size_read(inode) >> blkbits;
+	last_block = bytes_to_blks(inode, i_size_read(inode));
 	while ((probe_block + blocks_per_page) <= last_block &&
 			page_no < sis->max) {
 		unsigned block_in_page;
@@ -4028,7 +4055,7 @@ static int check_swap_activate(struct swap_info_struct *sis,
 			}
 		}
 
-		first_block >>= (PAGE_SHIFT - blkbits);
+		first_block >>= (PAGE_SHIFT - inode->i_blkbits);
 		if (page_no) {	/* exclude the header page */
 			if (first_block < lowest_block)
 				lowest_block = first_block;
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index a8357fd4f5fa..197c914119da 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -145,8 +145,8 @@ static void update_general_status(struct f2fs_sb_info *sbi)
 		si->node_pages = NODE_MAPPING(sbi)->nrpages;
 	if (sbi->meta_inode)
 		si->meta_pages = META_MAPPING(sbi)->nrpages;
-	si->nats = NM_I(sbi)->nat_cnt;
-	si->dirty_nats = NM_I(sbi)->dirty_nat_cnt;
+	si->nats = NM_I(sbi)->nat_cnt[TOTAL_NAT];
+	si->dirty_nats = NM_I(sbi)->nat_cnt[DIRTY_NAT];
 	si->sits = MAIN_SEGS(sbi);
 	si->dirty_sits = SIT_I(sbi)->dirty_sentries;
 	si->free_nids = NM_I(sbi)->nid_cnt[FREE_NID];
@@ -278,9 +278,10 @@ get_cache:
 	si->cache_mem += (NM_I(sbi)->nid_cnt[FREE_NID] +
 				NM_I(sbi)->nid_cnt[PREALLOC_NID]) *
 				sizeof(struct free_nid);
-	si->cache_mem += NM_I(sbi)->nat_cnt * sizeof(struct nat_entry);
-	si->cache_mem += NM_I(sbi)->dirty_nat_cnt *
-					sizeof(struct nat_entry_set);
+	si->cache_mem += NM_I(sbi)->nat_cnt[TOTAL_NAT] *
+				sizeof(struct nat_entry);
+	si->cache_mem += NM_I(sbi)->nat_cnt[DIRTY_NAT] *
+				sizeof(struct nat_entry_set);
 	si->cache_mem += si->inmem_pages * sizeof(struct inmem_pages);
 	for (i = 0; i < MAX_INO_ENTRY; i++)
 		si->cache_mem += sbi->im[i].ino_num * sizeof(struct ino_entry);
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 049500f1e764..e6270a867be1 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -5,6 +5,7 @@
  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
  *             http://www.samsung.com/
  */
+#include <asm/unaligned.h>
 #include <linux/fs.h>
 #include <linux/f2fs_fs.h>
 #include <linux/sched/signal.h>
@@ -206,30 +207,55 @@ static struct f2fs_dir_entry *find_in_block(struct inode *dir,
 /*
  * Test whether a case-insensitive directory entry matches the filename
  * being searched for.
+ *
+ * Returns 1 for a match, 0 for no match, and -errno on an error.
  */
-static bool f2fs_match_ci_name(const struct inode *dir, const struct qstr *name,
+static int f2fs_match_ci_name(const struct inode *dir, const struct qstr *name,
 			       const u8 *de_name, u32 de_name_len)
 {
 	const struct super_block *sb = dir->i_sb;
 	const struct unicode_map *um = sb->s_encoding;
+	struct fscrypt_str decrypted_name = FSTR_INIT(NULL, de_name_len);
 	struct qstr entry = QSTR_INIT(de_name, de_name_len);
 	int res;
 
+	if (IS_ENCRYPTED(dir)) {
+		const struct fscrypt_str encrypted_name =
+			FSTR_INIT((u8 *)de_name, de_name_len);
+
+		if (WARN_ON_ONCE(!fscrypt_has_encryption_key(dir)))
+			return -EINVAL;
+
+		decrypted_name.name = kmalloc(de_name_len, GFP_KERNEL);
+		if (!decrypted_name.name)
+			return -ENOMEM;
+		res = fscrypt_fname_disk_to_usr(dir, 0, 0, &encrypted_name,
+						&decrypted_name);
+		if (res < 0)
+			goto out;
+		entry.name = decrypted_name.name;
+		entry.len = decrypted_name.len;
+	}
+
 	res = utf8_strncasecmp_folded(um, name, &entry);
-	if (res < 0) {
-		/*
-		 * In strict mode, ignore invalid names.  In non-strict mode,
-		 * fall back to treating them as opaque byte sequences.
-		 */
-		if (sb_has_strict_encoding(sb) || name->len != entry.len)
-			return false;
-		return !memcmp(name->name, entry.name, name->len);
+	/*
+	 * In strict mode, ignore invalid names.  In non-strict mode,
+	 * fall back to treating them as opaque byte sequences.
+	 */
+	if (res < 0 && !sb_has_strict_encoding(sb)) {
+		res = name->len == entry.len &&
+				memcmp(name->name, entry.name, name->len) == 0;
+	} else {
+		/* utf8_strncasecmp_folded returns 0 on match */
+		res = (res == 0);
 	}
-	return res == 0;
+out:
+	kfree(decrypted_name.name);
+	return res;
 }
 #endif /* CONFIG_UNICODE */
 
-static inline bool f2fs_match_name(const struct inode *dir,
+static inline int f2fs_match_name(const struct inode *dir,
 				   const struct f2fs_filename *fname,
 				   const u8 *de_name, u32 de_name_len)
 {
@@ -256,6 +282,7 @@ struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d,
 	struct f2fs_dir_entry *de;
 	unsigned long bit_pos = 0;
 	int max_len = 0;
+	int res = 0;
 
 	if (max_slots)
 		*max_slots = 0;
@@ -273,10 +300,15 @@ struct f2fs_dir_entry *f2fs_find_target_dentry(const struct f2fs_dentry_ptr *d,
 			continue;
 		}
 
-		if (de->hash_code == fname->hash &&
-		    f2fs_match_name(d->inode, fname, d->filename[bit_pos],
-				    le16_to_cpu(de->name_len)))
-			goto found;
+		if (de->hash_code == fname->hash) {
+			res = f2fs_match_name(d->inode, fname,
+					      d->filename[bit_pos],
+					      le16_to_cpu(de->name_len));
+			if (res < 0)
+				return ERR_PTR(res);
+			if (res)
+				goto found;
+		}
 
 		if (max_slots && max_len > *max_slots)
 			*max_slots = max_len;
@@ -326,7 +358,11 @@ static struct f2fs_dir_entry *find_in_level(struct inode *dir,
 		}
 
 		de = find_in_block(dir, dentry_page, fname, &max_slots);
-		if (de) {
+		if (IS_ERR(de)) {
+			*res_page = ERR_CAST(de);
+			de = NULL;
+			break;
+		} else if (de) {
 			*res_page = dentry_page;
 			break;
 		}
@@ -448,17 +484,39 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de,
 	f2fs_put_page(page, 1);
 }
 
-static void init_dent_inode(const struct f2fs_filename *fname,
+static void init_dent_inode(struct inode *dir, struct inode *inode,
+			    const struct f2fs_filename *fname,
 			    struct page *ipage)
 {
 	struct f2fs_inode *ri;
 
+	if (!fname) /* tmpfile case? */
+		return;
+
 	f2fs_wait_on_page_writeback(ipage, NODE, true, true);
 
 	/* copy name info. to this inode page */
 	ri = F2FS_INODE(ipage);
 	ri->i_namelen = cpu_to_le32(fname->disk_name.len);
 	memcpy(ri->i_name, fname->disk_name.name, fname->disk_name.len);
+	if (IS_ENCRYPTED(dir)) {
+		file_set_enc_name(inode);
+		/*
+		 * Roll-forward recovery doesn't have encryption keys available,
+		 * so it can't compute the dirhash for encrypted+casefolded
+		 * filenames.  Append it to i_name if possible.  Else, disable
+		 * roll-forward recovery of the dentry (i.e., make fsync'ing the
+		 * file force a checkpoint) by setting LOST_PINO.
+		 */
+		if (IS_CASEFOLDED(dir)) {
+			if (fname->disk_name.len + sizeof(f2fs_hash_t) <=
+			    F2FS_NAME_LEN)
+				put_unaligned(fname->hash, (f2fs_hash_t *)
+					&ri->i_name[fname->disk_name.len]);
+			else
+				file_lost_pino(inode);
+		}
+	}
 	set_page_dirty(ipage);
 }
 
@@ -541,11 +599,7 @@ struct page *f2fs_init_inode_metadata(struct inode *inode, struct inode *dir,
 			return page;
 	}
 
-	if (fname) {
-		init_dent_inode(fname, page);
-		if (IS_ENCRYPTED(dir))
-			file_set_enc_name(inode);
-	}
+	init_dent_inode(dir, inode, fname, page);
 
 	/*
 	 * This file should be checkpointed during fsync.
@@ -1091,10 +1145,3 @@ const struct file_operations f2fs_dir_operations = {
 	.compat_ioctl   = f2fs_compat_ioctl,
 #endif
 };
-
-#ifdef CONFIG_UNICODE
-const struct dentry_operations f2fs_dentry_ops = {
-	.d_hash = generic_ci_d_hash,
-	.d_compare = generic_ci_d_compare,
-};
-#endif
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 9f793923dabe..bb11759191dc 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -33,10 +33,8 @@
 #else
 #define f2fs_bug_on(sbi, condition)					\
 	do {								\
-		if (unlikely(condition)) {				\
-			WARN_ON(1);					\
+		if (WARN_ON(condition))					\
 			set_sbi_flag(sbi, SBI_NEED_FSCK);		\
-		}							\
 	} while (0)
 #endif
 
@@ -147,8 +145,10 @@ struct f2fs_mount_info {
 
 	/* For compression */
 	unsigned char compress_algorithm;	/* algorithm type */
-	unsigned compress_log_size;		/* cluster log size */
+	unsigned char compress_log_size;	/* cluster log size */
+	bool compress_chksum;			/* compressed data chksum */
 	unsigned char compress_ext_cnt;		/* extension count */
+	int compress_mode;			/* compression mode */
 	unsigned char extensions[COMPRESS_EXT_NUM][F2FS_EXTENSION_LEN];	/* extensions */
 };
 
@@ -402,85 +402,6 @@ static inline bool __has_cursum_space(struct f2fs_journal *journal,
 	return size <= MAX_SIT_JENTRIES(journal);
 }
 
-/*
- * f2fs-specific ioctl commands
- */
-#define F2FS_IOCTL_MAGIC		0xf5
-#define F2FS_IOC_START_ATOMIC_WRITE	_IO(F2FS_IOCTL_MAGIC, 1)
-#define F2FS_IOC_COMMIT_ATOMIC_WRITE	_IO(F2FS_IOCTL_MAGIC, 2)
-#define F2FS_IOC_START_VOLATILE_WRITE	_IO(F2FS_IOCTL_MAGIC, 3)
-#define F2FS_IOC_RELEASE_VOLATILE_WRITE	_IO(F2FS_IOCTL_MAGIC, 4)
-#define F2FS_IOC_ABORT_VOLATILE_WRITE	_IO(F2FS_IOCTL_MAGIC, 5)
-#define F2FS_IOC_GARBAGE_COLLECT	_IOW(F2FS_IOCTL_MAGIC, 6, __u32)
-#define F2FS_IOC_WRITE_CHECKPOINT	_IO(F2FS_IOCTL_MAGIC, 7)
-#define F2FS_IOC_DEFRAGMENT		_IOWR(F2FS_IOCTL_MAGIC, 8,	\
-						struct f2fs_defragment)
-#define F2FS_IOC_MOVE_RANGE		_IOWR(F2FS_IOCTL_MAGIC, 9,	\
-						struct f2fs_move_range)
-#define F2FS_IOC_FLUSH_DEVICE		_IOW(F2FS_IOCTL_MAGIC, 10,	\
-						struct f2fs_flush_device)
-#define F2FS_IOC_GARBAGE_COLLECT_RANGE	_IOW(F2FS_IOCTL_MAGIC, 11,	\
-						struct f2fs_gc_range)
-#define F2FS_IOC_GET_FEATURES		_IOR(F2FS_IOCTL_MAGIC, 12, __u32)
-#define F2FS_IOC_SET_PIN_FILE		_IOW(F2FS_IOCTL_MAGIC, 13, __u32)
-#define F2FS_IOC_GET_PIN_FILE		_IOR(F2FS_IOCTL_MAGIC, 14, __u32)
-#define F2FS_IOC_PRECACHE_EXTENTS	_IO(F2FS_IOCTL_MAGIC, 15)
-#define F2FS_IOC_RESIZE_FS		_IOW(F2FS_IOCTL_MAGIC, 16, __u64)
-#define F2FS_IOC_GET_COMPRESS_BLOCKS	_IOR(F2FS_IOCTL_MAGIC, 17, __u64)
-#define F2FS_IOC_RELEASE_COMPRESS_BLOCKS				\
-					_IOR(F2FS_IOCTL_MAGIC, 18, __u64)
-#define F2FS_IOC_RESERVE_COMPRESS_BLOCKS				\
-					_IOR(F2FS_IOCTL_MAGIC, 19, __u64)
-#define F2FS_IOC_SEC_TRIM_FILE		_IOW(F2FS_IOCTL_MAGIC, 20,	\
-						struct f2fs_sectrim_range)
-
-/*
- * should be same as XFS_IOC_GOINGDOWN.
- * Flags for going down operation used by FS_IOC_GOINGDOWN
- */
-#define F2FS_IOC_SHUTDOWN	_IOR('X', 125, __u32)	/* Shutdown */
-#define F2FS_GOING_DOWN_FULLSYNC	0x0	/* going down with full sync */
-#define F2FS_GOING_DOWN_METASYNC	0x1	/* going down with metadata */
-#define F2FS_GOING_DOWN_NOSYNC		0x2	/* going down */
-#define F2FS_GOING_DOWN_METAFLUSH	0x3	/* going down with meta flush */
-#define F2FS_GOING_DOWN_NEED_FSCK	0x4	/* going down to trigger fsck */
-
-/*
- * Flags used by F2FS_IOC_SEC_TRIM_FILE
- */
-#define F2FS_TRIM_FILE_DISCARD		0x1	/* send discard command */
-#define F2FS_TRIM_FILE_ZEROOUT		0x2	/* zero out */
-#define F2FS_TRIM_FILE_MASK		0x3
-
-struct f2fs_gc_range {
-	u32 sync;
-	u64 start;
-	u64 len;
-};
-
-struct f2fs_defragment {
-	u64 start;
-	u64 len;
-};
-
-struct f2fs_move_range {
-	u32 dst_fd;		/* destination fd */
-	u64 pos_in;		/* start position in src_fd */
-	u64 pos_out;		/* start position in dst_fd */
-	u64 len;		/* size to move */
-};
-
-struct f2fs_flush_device {
-	u32 dev_num;		/* device number to flush */
-	u32 segments;		/* # of segments to flush */
-};
-
-struct f2fs_sectrim_range {
-	u64 start;
-	u64 len;
-	u64 flags;
-};
-
 /* for inline stuff */
 #define DEF_INLINE_RESERVED_SIZE	1
 static inline int get_extra_isize(struct inode *inode);
@@ -533,9 +454,11 @@ struct f2fs_filename {
 #ifdef CONFIG_UNICODE
 	/*
 	 * For casefolded directories: the casefolded name, but it's left NULL
-	 * if the original name is not valid Unicode or if the filesystem is
-	 * doing an internal operation where usr_fname is also NULL.  In these
-	 * cases we fall back to treating the name as an opaque byte sequence.
+	 * if the original name is not valid Unicode, if the directory is both
+	 * casefolded and encrypted and its encryption key is unavailable, or if
+	 * the filesystem is doing an internal operation where usr_fname is also
+	 * NULL.  In all these cases we fall back to treating the name as an
+	 * opaque byte sequence.
 	 */
 	struct fscrypt_str cf_name;
 #endif
@@ -753,7 +676,9 @@ enum {
 	FI_ATOMIC_REVOKE_REQUEST, /* request to drop atomic data */
 	FI_VERITY_IN_PROGRESS,	/* building fs-verity Merkle tree */
 	FI_COMPRESSED_FILE,	/* indicate file's data can be compressed */
+	FI_COMPRESS_CORRUPT,	/* indicate compressed cluster is corrupted */
 	FI_MMAP_FILE,		/* indicate file was mmapped */
+	FI_ENABLE_COMPRESS,	/* enable compression in "user" compression mode */
 	FI_MAX,			/* max flag, never be used */
 };
 
@@ -810,6 +735,7 @@ struct f2fs_inode_info {
 	atomic_t i_compr_blocks;		/* # of compressed blocks */
 	unsigned char i_compress_algorithm;	/* algorithm type */
 	unsigned char i_log_cluster_size;	/* log of cluster size */
+	unsigned short i_compress_flag;		/* compress flag */
 	unsigned int i_cluster_size;		/* cluster size */
 };
 
@@ -894,6 +820,13 @@ enum nid_state {
 	MAX_NID_STATE,
 };
 
+enum nat_state {
+	TOTAL_NAT,
+	DIRTY_NAT,
+	RECLAIMABLE_NAT,
+	MAX_NAT_STATE,
+};
+
 struct f2fs_nm_info {
 	block_t nat_blkaddr;		/* base disk address of NAT */
 	nid_t max_nid;			/* maximum possible node ids */
@@ -909,8 +842,7 @@ struct f2fs_nm_info {
 	struct rw_semaphore nat_tree_lock;	/* protect nat_tree_lock */
 	struct list_head nat_entries;	/* cached nat entry list (clean) */
 	spinlock_t nat_list_lock;	/* protect clean nat entry list */
-	unsigned int nat_cnt;		/* the # of cached nat entries */
-	unsigned int dirty_nat_cnt;	/* total num of nat entries in set */
+	unsigned int nat_cnt[MAX_NAT_STATE]; /* the # of cached nat entries */
 	unsigned int nat_blocks;	/* # of nat blocks */
 
 	/* free node ids management */
@@ -1320,6 +1252,18 @@ enum fsync_mode {
 	FSYNC_MODE_NOBARRIER,	/* fsync behaves nobarrier based on posix */
 };
 
+enum {
+	COMPR_MODE_FS,		/*
+				 * automatically compress compression
+				 * enabled files
+				 */
+	COMPR_MODE_USER,	/*
+				 * automatical compression is disabled.
+				 * user can control the file compression
+				 * using ioctls
+				 */
+};
+
 /*
  * this value is set in page as a private data which indicate that
  * the page is atomically written, and it is in inmem_pages list.
@@ -1349,9 +1293,15 @@ enum compress_algorithm_type {
 	COMPRESS_MAX,
 };
 
-#define COMPRESS_DATA_RESERVED_SIZE		5
+enum compress_flag {
+	COMPRESS_CHKSUM,
+	COMPRESS_MAX_FLAG,
+};
+
+#define COMPRESS_DATA_RESERVED_SIZE		4
 struct compress_data {
 	__le32 clen;			/* compressed data size */
+	__le32 chksum;			/* compressed data chksum */
 	__le32 reserved[COMPRESS_DATA_RESERVED_SIZE];	/* reserved */
 	u8 cdata[];			/* compressed data */
 };
@@ -1404,6 +1354,7 @@ struct decompress_io_ctx {
 	size_t rlen;			/* valid data length in rbuf */
 	size_t clen;			/* valid data length in cbuf */
 	atomic_t pending_pages;		/* in-flight compressed page count */
+	atomic_t verity_pages;		/* in-flight page count for verity */
 	bool failed;			/* indicate IO error during decompression */
 	void *private;			/* payload buffer for specified decompression algorithm */
 	void *private2;			/* extra payload buffer */
@@ -1446,7 +1397,7 @@ struct f2fs_sb_info {
 	int cur_cp_pack;			/* remain current cp pack */
 	spinlock_t cp_lock;			/* for flag in ckpt */
 	struct inode *meta_inode;		/* cache meta blocks */
-	struct mutex cp_mutex;			/* checkpoint procedure lock */
+	struct rw_semaphore cp_global_sem;	/* checkpoint procedure lock */
 	struct rw_semaphore cp_rwsem;		/* blocking FS operations */
 	struct rw_semaphore node_write;		/* locking node writes */
 	struct rw_semaphore node_change;	/* locking node change */
@@ -1496,6 +1447,7 @@ struct f2fs_sb_info {
 	loff_t max_file_blocks;			/* max block index of file */
 	int dir_level;				/* directory level */
 	int readdir_ra;				/* readahead inode in readdir */
+	u64 max_io_bytes;			/* max io bytes to merge IOs */
 
 	block_t user_block_count;		/* # of user blocks */
 	block_t total_valid_block_count;	/* # of valid blocks */
@@ -1671,13 +1623,6 @@ static inline bool f2fs_is_multi_device(struct f2fs_sb_info *sbi)
 	return sbi->s_ndevs > 1;
 }
 
-/* For write statistics. Suppose sector size is 512 bytes,
- * and the return value is in kbytes. s is of struct f2fs_sb_info.
- */
-#define BD_PART_WRITTEN(s)						 \
-	(((u64)part_stat_read((s)->sb->s_bdev, sectors[STAT_WRITE]) -   \
-		(s)->sectors_written_start) >> 1)
-
 static inline void f2fs_update_time(struct f2fs_sb_info *sbi, int type)
 {
 	unsigned long now = jiffies;
@@ -2477,24 +2422,31 @@ static inline void *f2fs_kmem_cache_alloc(struct kmem_cache *cachep,
 	return entry;
 }
 
-static inline bool is_idle(struct f2fs_sb_info *sbi, int type)
+static inline bool is_inflight_io(struct f2fs_sb_info *sbi, int type)
 {
-	if (sbi->gc_mode == GC_URGENT_HIGH)
-		return true;
-
 	if (get_pages(sbi, F2FS_RD_DATA) || get_pages(sbi, F2FS_RD_NODE) ||
 		get_pages(sbi, F2FS_RD_META) || get_pages(sbi, F2FS_WB_DATA) ||
 		get_pages(sbi, F2FS_WB_CP_DATA) ||
 		get_pages(sbi, F2FS_DIO_READ) ||
 		get_pages(sbi, F2FS_DIO_WRITE))
-		return false;
+		return true;
 
 	if (type != DISCARD_TIME && SM_I(sbi) && SM_I(sbi)->dcc_info &&
 			atomic_read(&SM_I(sbi)->dcc_info->queued_discard))
-		return false;
+		return true;
 
 	if (SM_I(sbi) && SM_I(sbi)->fcc_info &&
 			atomic_read(&SM_I(sbi)->fcc_info->queued_flush))
+		return true;
+	return false;
+}
+
+static inline bool is_idle(struct f2fs_sb_info *sbi, int type)
+{
+	if (sbi->gc_mode == GC_URGENT_HIGH)
+		return true;
+
+	if (is_inflight_io(sbi, type))
 		return false;
 
 	if (sbi->gc_mode == GC_URGENT_LOW &&
@@ -2829,6 +2781,22 @@ static inline int f2fs_compressed_file(struct inode *inode)
 		is_inode_flag_set(inode, FI_COMPRESSED_FILE);
 }
 
+static inline bool f2fs_need_compress_data(struct inode *inode)
+{
+	int compress_mode = F2FS_OPTION(F2FS_I_SB(inode)).compress_mode;
+
+	if (!f2fs_compressed_file(inode))
+		return false;
+
+	if (compress_mode == COMPR_MODE_FS)
+		return true;
+	else if (compress_mode == COMPR_MODE_USER &&
+			is_inode_flag_set(inode, FI_ENABLE_COMPRESS))
+		return true;
+
+	return false;
+}
+
 static inline unsigned int addrs_per_inode(struct inode *inode)
 {
 	unsigned int addrs = CUR_ADDRS_PER_INODE(inode) -
@@ -3445,6 +3413,7 @@ void f2fs_update_dirty_page(struct inode *inode, struct page *page);
 void f2fs_remove_dirty_inode(struct inode *inode);
 int f2fs_sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type);
 void f2fs_wait_on_all_pages(struct f2fs_sb_info *sbi, int type);
+u64 f2fs_get_sectors_written(struct f2fs_sb_info *sbi);
 int f2fs_write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc);
 void f2fs_init_ino_entry_info(struct f2fs_sb_info *sbi);
 int __init f2fs_create_checkpoint_caches(void);
@@ -3769,9 +3738,6 @@ static inline void f2fs_update_sit_info(struct f2fs_sb_info *sbi) {}
 #endif
 
 extern const struct file_operations f2fs_dir_operations;
-#ifdef CONFIG_UNICODE
-extern const struct dentry_operations f2fs_dentry_ops;
-#endif
 extern const struct file_operations f2fs_file_operations;
 extern const struct inode_operations f2fs_file_inode_operations;
 extern const struct address_space_operations f2fs_dblock_aops;
@@ -3963,6 +3929,9 @@ static inline void set_compress_context(struct inode *inode)
 			F2FS_OPTION(sbi).compress_algorithm;
 	F2FS_I(inode)->i_log_cluster_size =
 			F2FS_OPTION(sbi).compress_log_size;
+	F2FS_I(inode)->i_compress_flag =
+			F2FS_OPTION(sbi).compress_chksum ?
+				1 << COMPRESS_CHKSUM : 0;
 	F2FS_I(inode)->i_cluster_size =
 			1 << F2FS_I(inode)->i_log_cluster_size;
 	F2FS_I(inode)->i_flags |= F2FS_COMPR_FL;
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index a9fc482a0e60..f585545277d7 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -31,6 +31,7 @@
 #include "gc.h"
 #include "trace.h"
 #include <trace/events/f2fs.h>
+#include <uapi/linux/f2fs.h>
 
 static vm_fault_t f2fs_filemap_fault(struct vm_fault *vmf)
 {
@@ -412,9 +413,14 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
 		goto fail;
 
 	/* handle inline data case */
-	if (f2fs_has_inline_data(inode) && whence == SEEK_HOLE) {
-		data_ofs = isize;
-		goto found;
+	if (f2fs_has_inline_data(inode)) {
+		if (whence == SEEK_HOLE) {
+			data_ofs = isize;
+			goto found;
+		} else if (whence == SEEK_DATA) {
+			data_ofs = offset;
+			goto found;
+		}
 	}
 
 	pgofs = (pgoff_t)(offset >> PAGE_SHIFT);
@@ -2470,26 +2476,19 @@ out:
 	return ret;
 }
 
-static int f2fs_ioc_gc_range(struct file *filp, unsigned long arg)
+static int __f2fs_ioc_gc_range(struct file *filp, struct f2fs_gc_range *range)
 {
-	struct inode *inode = file_inode(filp);
-	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
-	struct f2fs_gc_range range;
+	struct f2fs_sb_info *sbi = F2FS_I_SB(file_inode(filp));
 	u64 end;
 	int ret;
 
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
-
-	if (copy_from_user(&range, (struct f2fs_gc_range __user *)arg,
-							sizeof(range)))
-		return -EFAULT;
-
 	if (f2fs_readonly(sbi->sb))
 		return -EROFS;
 
-	end = range.start + range.len;
-	if (end < range.start || range.start < MAIN_BLKADDR(sbi) ||
+	end = range->start + range->len;
+	if (end < range->start || range->start < MAIN_BLKADDR(sbi) ||
 					end >= MAX_BLKADDR(sbi))
 		return -EINVAL;
 
@@ -2498,7 +2497,7 @@ static int f2fs_ioc_gc_range(struct file *filp, unsigned long arg)
 		return ret;
 
 do_more:
-	if (!range.sync) {
+	if (!range->sync) {
 		if (!down_write_trylock(&sbi->gc_lock)) {
 			ret = -EBUSY;
 			goto out;
@@ -2507,20 +2506,30 @@ do_more:
 		down_write(&sbi->gc_lock);
 	}
 
-	ret = f2fs_gc(sbi, range.sync, true, GET_SEGNO(sbi, range.start));
+	ret = f2fs_gc(sbi, range->sync, true, GET_SEGNO(sbi, range->start));
 	if (ret) {
 		if (ret == -EBUSY)
 			ret = -EAGAIN;
 		goto out;
 	}
-	range.start += BLKS_PER_SEC(sbi);
-	if (range.start <= end)
+	range->start += BLKS_PER_SEC(sbi);
+	if (range->start <= end)
 		goto do_more;
 out:
 	mnt_drop_write_file(filp);
 	return ret;
 }
 
+static int f2fs_ioc_gc_range(struct file *filp, unsigned long arg)
+{
+	struct f2fs_gc_range range;
+
+	if (copy_from_user(&range, (struct f2fs_gc_range __user *)arg,
+							sizeof(range)))
+		return -EFAULT;
+	return __f2fs_ioc_gc_range(filp, &range);
+}
+
 static int f2fs_ioc_write_checkpoint(struct file *filp, unsigned long arg)
 {
 	struct inode *inode = file_inode(filp);
@@ -2857,9 +2866,9 @@ out:
 	return ret;
 }
 
-static int f2fs_ioc_move_range(struct file *filp, unsigned long arg)
+static int __f2fs_ioc_move_range(struct file *filp,
+				struct f2fs_move_range *range)
 {
-	struct f2fs_move_range range;
 	struct fd dst;
 	int err;
 
@@ -2867,11 +2876,7 @@ static int f2fs_ioc_move_range(struct file *filp, unsigned long arg)
 			!(filp->f_mode & FMODE_WRITE))
 		return -EBADF;
 
-	if (copy_from_user(&range, (struct f2fs_move_range __user *)arg,
-							sizeof(range)))
-		return -EFAULT;
-
-	dst = fdget(range.dst_fd);
+	dst = fdget(range->dst_fd);
 	if (!dst.file)
 		return -EBADF;
 
@@ -2884,21 +2889,25 @@ static int f2fs_ioc_move_range(struct file *filp, unsigned long arg)
 	if (err)
 		goto err_out;
 
-	err = f2fs_move_file_range(filp, range.pos_in, dst.file,
-					range.pos_out, range.len);
+	err = f2fs_move_file_range(filp, range->pos_in, dst.file,
+					range->pos_out, range->len);
 
 	mnt_drop_write_file(filp);
-	if (err)
-		goto err_out;
-
-	if (copy_to_user((struct f2fs_move_range __user *)arg,
-						&range, sizeof(range)))
-		err = -EFAULT;
 err_out:
 	fdput(dst);
 	return err;
 }
 
+static int f2fs_ioc_move_range(struct file *filp, unsigned long arg)
+{
+	struct f2fs_move_range range;
+
+	if (copy_from_user(&range, (struct f2fs_move_range __user *)arg,
+							sizeof(range)))
+		return -EFAULT;
+	return __f2fs_ioc_move_range(filp, &range);
+}
+
 static int f2fs_ioc_flush_device(struct file *filp, unsigned long arg)
 {
 	struct inode *inode = file_inode(filp);
@@ -3935,13 +3944,265 @@ err:
 	return ret;
 }
 
-long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+static int f2fs_ioc_get_compress_option(struct file *filp, unsigned long arg)
 {
-	if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(filp)))))
-		return -EIO;
-	if (!f2fs_is_checkpoint_ready(F2FS_I_SB(file_inode(filp))))
-		return -ENOSPC;
+	struct inode *inode = file_inode(filp);
+	struct f2fs_comp_option option;
+
+	if (!f2fs_sb_has_compression(F2FS_I_SB(inode)))
+		return -EOPNOTSUPP;
+
+	inode_lock_shared(inode);
+
+	if (!f2fs_compressed_file(inode)) {
+		inode_unlock_shared(inode);
+		return -ENODATA;
+	}
+
+	option.algorithm = F2FS_I(inode)->i_compress_algorithm;
+	option.log_cluster_size = F2FS_I(inode)->i_log_cluster_size;
+
+	inode_unlock_shared(inode);
+
+	if (copy_to_user((struct f2fs_comp_option __user *)arg, &option,
+				sizeof(option)))
+		return -EFAULT;
+
+	return 0;
+}
+
+static int f2fs_ioc_set_compress_option(struct file *filp, unsigned long arg)
+{
+	struct inode *inode = file_inode(filp);
+	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+	struct f2fs_comp_option option;
+	int ret = 0;
 
+	if (!f2fs_sb_has_compression(sbi))
+		return -EOPNOTSUPP;
+
+	if (!(filp->f_mode & FMODE_WRITE))
+		return -EBADF;
+
+	if (copy_from_user(&option, (struct f2fs_comp_option __user *)arg,
+				sizeof(option)))
+		return -EFAULT;
+
+	if (!f2fs_compressed_file(inode) ||
+			option.log_cluster_size < MIN_COMPRESS_LOG_SIZE ||
+			option.log_cluster_size > MAX_COMPRESS_LOG_SIZE ||
+			option.algorithm >= COMPRESS_MAX)
+		return -EINVAL;
+
+	file_start_write(filp);
+	inode_lock(inode);
+
+	if (f2fs_is_mmap_file(inode) || get_dirty_pages(inode)) {
+		ret = -EBUSY;
+		goto out;
+	}
+
+	if (inode->i_size != 0) {
+		ret = -EFBIG;
+		goto out;
+	}
+
+	F2FS_I(inode)->i_compress_algorithm = option.algorithm;
+	F2FS_I(inode)->i_log_cluster_size = option.log_cluster_size;
+	F2FS_I(inode)->i_cluster_size = 1 << option.log_cluster_size;
+	f2fs_mark_inode_dirty_sync(inode, true);
+
+	if (!f2fs_is_compress_backend_ready(inode))
+		f2fs_warn(sbi, "compression algorithm is successfully set, "
+			"but current kernel doesn't support this algorithm.");
+out:
+	inode_unlock(inode);
+	file_end_write(filp);
+
+	return ret;
+}
+
+static int redirty_blocks(struct inode *inode, pgoff_t page_idx, int len)
+{
+	DEFINE_READAHEAD(ractl, NULL, inode->i_mapping, page_idx);
+	struct address_space *mapping = inode->i_mapping;
+	struct page *page;
+	pgoff_t redirty_idx = page_idx;
+	int i, page_len = 0, ret = 0;
+
+	page_cache_ra_unbounded(&ractl, len, 0);
+
+	for (i = 0; i < len; i++, page_idx++) {
+		page = read_cache_page(mapping, page_idx, NULL, NULL);
+		if (IS_ERR(page)) {
+			ret = PTR_ERR(page);
+			break;
+		}
+		page_len++;
+	}
+
+	for (i = 0; i < page_len; i++, redirty_idx++) {
+		page = find_lock_page(mapping, redirty_idx);
+		if (!page)
+			ret = -ENOENT;
+		set_page_dirty(page);
+		f2fs_put_page(page, 1);
+		f2fs_put_page(page, 0);
+	}
+
+	return ret;
+}
+
+static int f2fs_ioc_decompress_file(struct file *filp, unsigned long arg)
+{
+	struct inode *inode = file_inode(filp);
+	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+	struct f2fs_inode_info *fi = F2FS_I(inode);
+	pgoff_t page_idx = 0, last_idx;
+	unsigned int blk_per_seg = sbi->blocks_per_seg;
+	int cluster_size = F2FS_I(inode)->i_cluster_size;
+	int count, ret;
+
+	if (!f2fs_sb_has_compression(sbi) ||
+			F2FS_OPTION(sbi).compress_mode != COMPR_MODE_USER)
+		return -EOPNOTSUPP;
+
+	if (!(filp->f_mode & FMODE_WRITE))
+		return -EBADF;
+
+	if (!f2fs_compressed_file(inode))
+		return -EINVAL;
+
+	f2fs_balance_fs(F2FS_I_SB(inode), true);
+
+	file_start_write(filp);
+	inode_lock(inode);
+
+	if (!f2fs_is_compress_backend_ready(inode)) {
+		ret = -EOPNOTSUPP;
+		goto out;
+	}
+
+	if (f2fs_is_mmap_file(inode)) {
+		ret = -EBUSY;
+		goto out;
+	}
+
+	ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
+	if (ret)
+		goto out;
+
+	if (!atomic_read(&fi->i_compr_blocks))
+		goto out;
+
+	last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
+
+	count = last_idx - page_idx;
+	while (count) {
+		int len = min(cluster_size, count);
+
+		ret = redirty_blocks(inode, page_idx, len);
+		if (ret < 0)
+			break;
+
+		if (get_dirty_pages(inode) >= blk_per_seg)
+			filemap_fdatawrite(inode->i_mapping);
+
+		count -= len;
+		page_idx += len;
+	}
+
+	if (!ret)
+		ret = filemap_write_and_wait_range(inode->i_mapping, 0,
+							LLONG_MAX);
+
+	if (ret)
+		f2fs_warn(sbi, "%s: The file might be partially decompressed "
+				"(errno=%d). Please delete the file.\n",
+				__func__, ret);
+out:
+	inode_unlock(inode);
+	file_end_write(filp);
+
+	return ret;
+}
+
+static int f2fs_ioc_compress_file(struct file *filp, unsigned long arg)
+{
+	struct inode *inode = file_inode(filp);
+	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+	pgoff_t page_idx = 0, last_idx;
+	unsigned int blk_per_seg = sbi->blocks_per_seg;
+	int cluster_size = F2FS_I(inode)->i_cluster_size;
+	int count, ret;
+
+	if (!f2fs_sb_has_compression(sbi) ||
+			F2FS_OPTION(sbi).compress_mode != COMPR_MODE_USER)
+		return -EOPNOTSUPP;
+
+	if (!(filp->f_mode & FMODE_WRITE))
+		return -EBADF;
+
+	if (!f2fs_compressed_file(inode))
+		return -EINVAL;
+
+	f2fs_balance_fs(F2FS_I_SB(inode), true);
+
+	file_start_write(filp);
+	inode_lock(inode);
+
+	if (!f2fs_is_compress_backend_ready(inode)) {
+		ret = -EOPNOTSUPP;
+		goto out;
+	}
+
+	if (f2fs_is_mmap_file(inode)) {
+		ret = -EBUSY;
+		goto out;
+	}
+
+	ret = filemap_write_and_wait_range(inode->i_mapping, 0, LLONG_MAX);
+	if (ret)
+		goto out;
+
+	set_inode_flag(inode, FI_ENABLE_COMPRESS);
+
+	last_idx = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
+
+	count = last_idx - page_idx;
+	while (count) {
+		int len = min(cluster_size, count);
+
+		ret = redirty_blocks(inode, page_idx, len);
+		if (ret < 0)
+			break;
+
+		if (get_dirty_pages(inode) >= blk_per_seg)
+			filemap_fdatawrite(inode->i_mapping);
+
+		count -= len;
+		page_idx += len;
+	}
+
+	if (!ret)
+		ret = filemap_write_and_wait_range(inode->i_mapping, 0,
+							LLONG_MAX);
+
+	clear_inode_flag(inode, FI_ENABLE_COMPRESS);
+
+	if (ret)
+		f2fs_warn(sbi, "%s: The file might be partially compressed "
+				"(errno=%d). Please delete the file.\n",
+				__func__, ret);
+out:
+	inode_unlock(inode);
+	file_end_write(filp);
+
+	return ret;
+}
+
+static long __f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
 	switch (cmd) {
 	case FS_IOC_GETFLAGS:
 		return f2fs_ioc_getflags(filp, arg);
@@ -4023,11 +4284,29 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 		return f2fs_reserve_compress_blocks(filp, arg);
 	case F2FS_IOC_SEC_TRIM_FILE:
 		return f2fs_sec_trim_file(filp, arg);
+	case F2FS_IOC_GET_COMPRESS_OPTION:
+		return f2fs_ioc_get_compress_option(filp, arg);
+	case F2FS_IOC_SET_COMPRESS_OPTION:
+		return f2fs_ioc_set_compress_option(filp, arg);
+	case F2FS_IOC_DECOMPRESS_FILE:
+		return f2fs_ioc_decompress_file(filp, arg);
+	case F2FS_IOC_COMPRESS_FILE:
+		return f2fs_ioc_compress_file(filp, arg);
 	default:
 		return -ENOTTY;
 	}
 }
 
+long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
+{
+	if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(filp)))))
+		return -EIO;
+	if (!f2fs_is_checkpoint_ready(F2FS_I_SB(file_inode(filp))))
+		return -ENOSPC;
+
+	return __f2fs_ioctl(filp, cmd, arg);
+}
+
 static ssize_t f2fs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
 {
 	struct file *file = iocb->ki_filp;
@@ -4144,8 +4423,63 @@ out:
 }
 
 #ifdef CONFIG_COMPAT
+struct compat_f2fs_gc_range {
+	u32 sync;
+	compat_u64 start;
+	compat_u64 len;
+};
+#define F2FS_IOC32_GARBAGE_COLLECT_RANGE	_IOW(F2FS_IOCTL_MAGIC, 11,\
+						struct compat_f2fs_gc_range)
+
+static int f2fs_compat_ioc_gc_range(struct file *file, unsigned long arg)
+{
+	struct compat_f2fs_gc_range __user *urange;
+	struct f2fs_gc_range range;
+	int err;
+
+	urange = compat_ptr(arg);
+	err = get_user(range.sync, &urange->sync);
+	err |= get_user(range.start, &urange->start);
+	err |= get_user(range.len, &urange->len);
+	if (err)
+		return -EFAULT;
+
+	return __f2fs_ioc_gc_range(file, &range);
+}
+
+struct compat_f2fs_move_range {
+	u32 dst_fd;
+	compat_u64 pos_in;
+	compat_u64 pos_out;
+	compat_u64 len;
+};
+#define F2FS_IOC32_MOVE_RANGE		_IOWR(F2FS_IOCTL_MAGIC, 9,	\
+					struct compat_f2fs_move_range)
+
+static int f2fs_compat_ioc_move_range(struct file *file, unsigned long arg)
+{
+	struct compat_f2fs_move_range __user *urange;
+	struct f2fs_move_range range;
+	int err;
+
+	urange = compat_ptr(arg);
+	err = get_user(range.dst_fd, &urange->dst_fd);
+	err |= get_user(range.pos_in, &urange->pos_in);
+	err |= get_user(range.pos_out, &urange->pos_out);
+	err |= get_user(range.len, &urange->len);
+	if (err)
+		return -EFAULT;
+
+	return __f2fs_ioc_move_range(file, &range);
+}
+
 long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
+	if (unlikely(f2fs_cp_error(F2FS_I_SB(file_inode(file)))))
+		return -EIO;
+	if (!f2fs_is_checkpoint_ready(F2FS_I_SB(file_inode(file))))
+		return -ENOSPC;
+
 	switch (cmd) {
 	case FS_IOC32_GETFLAGS:
 		cmd = FS_IOC_GETFLAGS;
@@ -4156,6 +4490,10 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	case FS_IOC32_GETVERSION:
 		cmd = FS_IOC_GETVERSION;
 		break;
+	case F2FS_IOC32_GARBAGE_COLLECT_RANGE:
+		return f2fs_compat_ioc_gc_range(file, arg);
+	case F2FS_IOC32_MOVE_RANGE:
+		return f2fs_compat_ioc_move_range(file, arg);
 	case F2FS_IOC_START_ATOMIC_WRITE:
 	case F2FS_IOC_COMMIT_ATOMIC_WRITE:
 	case F2FS_IOC_START_VOLATILE_WRITE:
@@ -4173,10 +4511,8 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	case FS_IOC_GET_ENCRYPTION_KEY_STATUS:
 	case FS_IOC_GET_ENCRYPTION_NONCE:
 	case F2FS_IOC_GARBAGE_COLLECT:
-	case F2FS_IOC_GARBAGE_COLLECT_RANGE:
 	case F2FS_IOC_WRITE_CHECKPOINT:
 	case F2FS_IOC_DEFRAGMENT:
-	case F2FS_IOC_MOVE_RANGE:
 	case F2FS_IOC_FLUSH_DEVICE:
 	case F2FS_IOC_GET_FEATURES:
 	case FS_IOC_FSGETXATTR:
@@ -4193,11 +4529,15 @@ long f2fs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 	case F2FS_IOC_RELEASE_COMPRESS_BLOCKS:
 	case F2FS_IOC_RESERVE_COMPRESS_BLOCKS:
 	case F2FS_IOC_SEC_TRIM_FILE:
+	case F2FS_IOC_GET_COMPRESS_OPTION:
+	case F2FS_IOC_SET_COMPRESS_OPTION:
+	case F2FS_IOC_DECOMPRESS_FILE:
+	case F2FS_IOC_COMPRESS_FILE:
 		break;
 	default:
 		return -ENOIOCTLCMD;
 	}
-	return f2fs_ioctl(file, cmd, (unsigned long) compat_ptr(arg));
+	return __f2fs_ioctl(file, cmd, (unsigned long) compat_ptr(arg));
 }
 #endif
 
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 05641a1e36cc..3ef84e6ded41 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -1986,7 +1986,7 @@ int f2fs_resize_fs(struct f2fs_sb_info *sbi, __u64 block_count)
 
 	freeze_super(sbi->sb);
 	down_write(&sbi->gc_lock);
-	mutex_lock(&sbi->cp_mutex);
+	down_write(&sbi->cp_global_sem);
 
 	spin_lock(&sbi->stat_lock);
 	if (shrunk_blocks + valid_user_blocks(sbi) +
@@ -2031,7 +2031,7 @@ recover_out:
 		spin_unlock(&sbi->stat_lock);
 	}
 out_err:
-	mutex_unlock(&sbi->cp_mutex);
+	up_write(&sbi->cp_global_sem);
 	up_write(&sbi->gc_lock);
 	thaw_super(sbi->sb);
 	clear_sbi_flag(sbi, SBI_IS_RESIZEFS);
diff --git a/fs/f2fs/hash.c b/fs/f2fs/hash.c
index de841aaf3c43..e3beac546c63 100644
--- a/fs/f2fs/hash.c
+++ b/fs/f2fs/hash.c
@@ -111,7 +111,9 @@ void f2fs_hash_filename(const struct inode *dir, struct f2fs_filename *fname)
 		 * If the casefolded name is provided, hash it instead of the
 		 * on-disk name.  If the casefolded name is *not* provided, that
 		 * should only be because the name wasn't valid Unicode, so fall
-		 * back to treating the name as an opaque byte sequence.
+		 * back to treating the name as an opaque byte sequence.  Note
+		 * that to handle encrypted directories, the fallback must use
+		 * usr_fname (plaintext) rather than disk_name (ciphertext).
 		 */
 		WARN_ON_ONCE(!fname->usr_fname->name);
 		if (fname->cf_name.name) {
@@ -121,6 +123,13 @@ void f2fs_hash_filename(const struct inode *dir, struct f2fs_filename *fname)
 			name = fname->usr_fname->name;
 			len = fname->usr_fname->len;
 		}
+		if (IS_ENCRYPTED(dir)) {
+			struct qstr tmp = QSTR_INIT(name, len);
+
+			fname->hash =
+				cpu_to_le32(fscrypt_fname_siphash(dir, &tmp));
+			return;
+		}
 	}
 #endif
 	fname->hash = cpu_to_le32(TEA_hash_name(name, len));
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index 70384e31788d..806ebabf5870 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -188,7 +188,8 @@ int f2fs_convert_inline_inode(struct inode *inode)
 	struct page *ipage, *page;
 	int err = 0;
 
-	if (!f2fs_has_inline_data(inode))
+	if (!f2fs_has_inline_data(inode) ||
+			f2fs_hw_is_readonly(sbi) || f2fs_readonly(sbi->sb))
 		return 0;
 
 	page = f2fs_grab_cache_page(inode->i_mapping, 0, false);
@@ -266,7 +267,7 @@ int f2fs_recover_inline_data(struct inode *inode, struct page *npage)
 	 * [prev.] [next] of inline_data flag
 	 *    o       o  -> recover inline_data
 	 *    o       x  -> remove inline_data, and then recover data blocks
-	 *    x       o  -> remove inline_data, and then recover inline_data
+	 *    x       o  -> remove data blocks, and then recover inline_data
 	 *    x       x  -> recover data blocks
 	 */
 	if (IS_INODE(npage))
@@ -298,6 +299,7 @@ process_inline:
 		if (IS_ERR(ipage))
 			return PTR_ERR(ipage);
 		f2fs_truncate_inline_inode(inode, ipage, 0);
+		stat_dec_inline_inode(inode);
 		clear_inode_flag(inode, FI_INLINE_DATA);
 		f2fs_put_page(ipage, 1);
 	} else if (ri && (ri->i_inline & F2FS_INLINE_DATA)) {
@@ -306,6 +308,7 @@ process_inline:
 		ret = f2fs_truncate_blocks(inode, 0, false);
 		if (ret)
 			return ret;
+		stat_inc_inline_inode(inode);
 		goto process_inline;
 	}
 	return 0;
@@ -332,6 +335,10 @@ struct f2fs_dir_entry *f2fs_find_in_inline_dir(struct inode *dir,
 	make_dentry_ptr_inline(dir, &d, inline_dentry);
 	de = f2fs_find_target_dentry(&d, fname, NULL);
 	unlock_page(ipage);
+	if (IS_ERR(de)) {
+		*res_page = ERR_CAST(de);
+		de = NULL;
+	}
 	if (de)
 		*res_page = ipage;
 	else
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 657db2fb6739..349d9cb933ee 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -456,6 +456,7 @@ static int do_read_inode(struct inode *inode)
 					le64_to_cpu(ri->i_compr_blocks));
 			fi->i_compress_algorithm = ri->i_compress_algorithm;
 			fi->i_log_cluster_size = ri->i_log_cluster_size;
+			fi->i_compress_flag = le16_to_cpu(ri->i_compress_flag);
 			fi->i_cluster_size = 1 << fi->i_log_cluster_size;
 			set_inode_flag(inode, FI_COMPRESSED_FILE);
 		}
@@ -634,6 +635,8 @@ void f2fs_update_inode(struct inode *inode, struct page *node_page)
 					&F2FS_I(inode)->i_compr_blocks));
 			ri->i_compress_algorithm =
 				F2FS_I(inode)->i_compress_algorithm;
+			ri->i_compress_flag =
+				cpu_to_le16(F2FS_I(inode)->i_compress_flag);
 			ri->i_log_cluster_size =
 				F2FS_I(inode)->i_log_cluster_size;
 		}
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 8fa37d1434de..6edb1ab579a1 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -497,6 +497,7 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
 	}
 
 	err = f2fs_prepare_lookup(dir, dentry, &fname);
+	generic_set_encrypted_ci_d_ops(dentry);
 	if (err == -ENOENT)
 		goto out_splice;
 	if (err)
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index d5d8ce077f29..3a24423ac65f 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -62,8 +62,8 @@ bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type)
 				sizeof(struct free_nid)) >> PAGE_SHIFT;
 		res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2);
 	} else if (type == NAT_ENTRIES) {
-		mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >>
-							PAGE_SHIFT;
+		mem_size = (nm_i->nat_cnt[TOTAL_NAT] *
+				sizeof(struct nat_entry)) >> PAGE_SHIFT;
 		res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2);
 		if (excess_cached_nats(sbi))
 			res = false;
@@ -109,7 +109,7 @@ static void clear_node_page_dirty(struct page *page)
 
 static struct page *get_current_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
 {
-	return f2fs_get_meta_page(sbi, current_nat_addr(sbi, nid));
+	return f2fs_get_meta_page_retry(sbi, current_nat_addr(sbi, nid));
 }
 
 static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
@@ -177,7 +177,8 @@ static struct nat_entry *__init_nat_entry(struct f2fs_nm_info *nm_i,
 	list_add_tail(&ne->list, &nm_i->nat_entries);
 	spin_unlock(&nm_i->nat_list_lock);
 
-	nm_i->nat_cnt++;
+	nm_i->nat_cnt[TOTAL_NAT]++;
+	nm_i->nat_cnt[RECLAIMABLE_NAT]++;
 	return ne;
 }
 
@@ -207,7 +208,8 @@ static unsigned int __gang_lookup_nat_cache(struct f2fs_nm_info *nm_i,
 static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct nat_entry *e)
 {
 	radix_tree_delete(&nm_i->nat_root, nat_get_nid(e));
-	nm_i->nat_cnt--;
+	nm_i->nat_cnt[TOTAL_NAT]--;
+	nm_i->nat_cnt[RECLAIMABLE_NAT]--;
 	__free_nat_entry(e);
 }
 
@@ -253,7 +255,8 @@ static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i,
 	if (get_nat_flag(ne, IS_DIRTY))
 		goto refresh_list;
 
-	nm_i->dirty_nat_cnt++;
+	nm_i->nat_cnt[DIRTY_NAT]++;
+	nm_i->nat_cnt[RECLAIMABLE_NAT]--;
 	set_nat_flag(ne, IS_DIRTY, true);
 refresh_list:
 	spin_lock(&nm_i->nat_list_lock);
@@ -273,7 +276,8 @@ static void __clear_nat_cache_dirty(struct f2fs_nm_info *nm_i,
 
 	set_nat_flag(ne, IS_DIRTY, false);
 	set->entry_cnt--;
-	nm_i->dirty_nat_cnt--;
+	nm_i->nat_cnt[DIRTY_NAT]--;
+	nm_i->nat_cnt[RECLAIMABLE_NAT]++;
 }
 
 static unsigned int __gang_lookup_nat_set(struct f2fs_nm_info *nm_i,
@@ -2590,9 +2594,15 @@ int f2fs_recover_inline_xattr(struct inode *inode, struct page *page)
 
 	ri = F2FS_INODE(page);
 	if (ri->i_inline & F2FS_INLINE_XATTR) {
-		set_inode_flag(inode, FI_INLINE_XATTR);
+		if (!f2fs_has_inline_xattr(inode)) {
+			set_inode_flag(inode, FI_INLINE_XATTR);
+			stat_inc_inline_xattr(inode);
+		}
 	} else {
-		clear_inode_flag(inode, FI_INLINE_XATTR);
+		if (f2fs_has_inline_xattr(inode)) {
+			stat_dec_inline_xattr(inode);
+			clear_inode_flag(inode, FI_INLINE_XATTR);
+		}
 		goto update_inode;
 	}
 
@@ -2944,14 +2954,17 @@ int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 	LIST_HEAD(sets);
 	int err = 0;
 
-	/* during unmount, let's flush nat_bits before checking dirty_nat_cnt */
+	/*
+	 * during unmount, let's flush nat_bits before checking
+	 * nat_cnt[DIRTY_NAT].
+	 */
 	if (enabled_nat_bits(sbi, cpc)) {
 		down_write(&nm_i->nat_tree_lock);
 		remove_nats_in_journal(sbi);
 		up_write(&nm_i->nat_tree_lock);
 	}
 
-	if (!nm_i->dirty_nat_cnt)
+	if (!nm_i->nat_cnt[DIRTY_NAT])
 		return 0;
 
 	down_write(&nm_i->nat_tree_lock);
@@ -2962,7 +2975,8 @@ int f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
 	 * into nat entry set.
 	 */
 	if (enabled_nat_bits(sbi, cpc) ||
-		!__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL))
+		!__has_cursum_space(journal,
+			nm_i->nat_cnt[DIRTY_NAT], NAT_JOURNAL))
 		remove_nats_in_journal(sbi);
 
 	while ((found = __gang_lookup_nat_set(nm_i,
@@ -3086,7 +3100,6 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
 						F2FS_RESERVED_NODE_NUM;
 	nm_i->nid_cnt[FREE_NID] = 0;
 	nm_i->nid_cnt[PREALLOC_NID] = 0;
-	nm_i->nat_cnt = 0;
 	nm_i->ram_thresh = DEF_RAM_THRESHOLD;
 	nm_i->ra_nid_pages = DEF_RA_NID_PAGES;
 	nm_i->dirty_nats_ratio = DEF_DIRTY_NAT_RATIO_THRESHOLD;
@@ -3220,7 +3233,7 @@ void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi)
 			__del_from_nat_cache(nm_i, natvec[idx]);
 		}
 	}
-	f2fs_bug_on(sbi, nm_i->nat_cnt);
+	f2fs_bug_on(sbi, nm_i->nat_cnt[TOTAL_NAT]);
 
 	/* destroy nat set cache */
 	nid = 0;
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index 69e5859e993c..f84541b57acb 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -126,13 +126,13 @@ static inline void raw_nat_from_node_info(struct f2fs_nat_entry *raw_ne,
 
 static inline bool excess_dirty_nats(struct f2fs_sb_info *sbi)
 {
-	return NM_I(sbi)->dirty_nat_cnt >= NM_I(sbi)->max_nid *
+	return NM_I(sbi)->nat_cnt[DIRTY_NAT] >= NM_I(sbi)->max_nid *
 					NM_I(sbi)->dirty_nats_ratio / 100;
 }
 
 static inline bool excess_cached_nats(struct f2fs_sb_info *sbi)
 {
-	return NM_I(sbi)->nat_cnt >= DEF_NAT_CACHE_THRESHOLD;
+	return NM_I(sbi)->nat_cnt[TOTAL_NAT] >= DEF_NAT_CACHE_THRESHOLD;
 }
 
 static inline bool excess_dirty_nodes(struct f2fs_sb_info *sbi)
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 4f12ade6410a..da75d5d52f0a 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -5,6 +5,7 @@
  * Copyright (c) 2012 Samsung Electronics Co., Ltd.
  *             http://www.samsung.com/
  */
+#include <asm/unaligned.h>
 #include <linux/fs.h>
 #include <linux/f2fs_fs.h>
 #include "f2fs.h"
@@ -128,7 +129,16 @@ static int init_recovered_filename(const struct inode *dir,
 	}
 
 	/* Compute the hash of the filename */
-	if (IS_CASEFOLDED(dir)) {
+	if (IS_ENCRYPTED(dir) && IS_CASEFOLDED(dir)) {
+		/*
+		 * In this case the hash isn't computable without the key, so it
+		 * was saved on-disk.
+		 */
+		if (fname->disk_name.len + sizeof(f2fs_hash_t) > F2FS_NAME_LEN)
+			return -EINVAL;
+		fname->hash = get_unaligned((f2fs_hash_t *)
+				&raw_inode->i_name[fname->disk_name.len]);
+	} else if (IS_CASEFOLDED(dir)) {
 		err = f2fs_init_casefolded_name(dir, fname);
 		if (err)
 			return err;
@@ -789,7 +799,7 @@ int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
 	INIT_LIST_HEAD(&dir_list);
 
 	/* prevent checkpoint */
-	mutex_lock(&sbi->cp_mutex);
+	down_write(&sbi->cp_global_sem);
 
 	/* step #1: find fsynced inode numbers */
 	err = find_fsync_dnodes(sbi, &inode_list, check_only);
@@ -840,7 +850,7 @@ skip:
 	if (!err)
 		clear_sbi_flag(sbi, SBI_POR_DOING);
 
-	mutex_unlock(&sbi->cp_mutex);
+	up_write(&sbi->cp_global_sem);
 
 	/* let's drop all the directory inodes for clean checkpoint */
 	destroy_fsync_dnodes(&dir_list, err);
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 1596502f7375..deca74cb17df 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -529,31 +529,38 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi, bool from_bg)
 	else
 		f2fs_build_free_nids(sbi, false, false);
 
-	if (!is_idle(sbi, REQ_TIME) &&
-		(!excess_dirty_nats(sbi) && !excess_dirty_nodes(sbi)))
+	if (excess_dirty_nats(sbi) || excess_dirty_nodes(sbi) ||
+		excess_prefree_segs(sbi))
+		goto do_sync;
+
+	/* there is background inflight IO or foreground operation recently */
+	if (is_inflight_io(sbi, REQ_TIME) ||
+		(!f2fs_time_over(sbi, REQ_TIME) && rwsem_is_locked(&sbi->cp_rwsem)))
 		return;
 
+	/* exceed periodical checkpoint timeout threshold */
+	if (f2fs_time_over(sbi, CP_TIME))
+		goto do_sync;
+
 	/* checkpoint is the only way to shrink partial cached entries */
-	if (!f2fs_available_free_memory(sbi, NAT_ENTRIES) ||
-			!f2fs_available_free_memory(sbi, INO_ENTRIES) ||
-			excess_prefree_segs(sbi) ||
-			excess_dirty_nats(sbi) ||
-			excess_dirty_nodes(sbi) ||
-			f2fs_time_over(sbi, CP_TIME)) {
-		if (test_opt(sbi, DATA_FLUSH) && from_bg) {
-			struct blk_plug plug;
-
-			mutex_lock(&sbi->flush_lock);
-
-			blk_start_plug(&plug);
-			f2fs_sync_dirty_inodes(sbi, FILE_INODE);
-			blk_finish_plug(&plug);
+	if (f2fs_available_free_memory(sbi, NAT_ENTRIES) ||
+		f2fs_available_free_memory(sbi, INO_ENTRIES))
+		return;
 
-			mutex_unlock(&sbi->flush_lock);
-		}
-		f2fs_sync_fs(sbi->sb, true);
-		stat_inc_bg_cp_count(sbi->stat_info);
+do_sync:
+	if (test_opt(sbi, DATA_FLUSH) && from_bg) {
+		struct blk_plug plug;
+
+		mutex_lock(&sbi->flush_lock);
+
+		blk_start_plug(&plug);
+		f2fs_sync_dirty_inodes(sbi, FILE_INODE);
+		blk_finish_plug(&plug);
+
+		mutex_unlock(&sbi->flush_lock);
 	}
+	f2fs_sync_fs(sbi->sb, true);
+	stat_inc_bg_cp_count(sbi->stat_info);
 }
 
 static int __submit_flush_wait(struct f2fs_sb_info *sbi,
@@ -3254,7 +3261,7 @@ static int __get_segment_type_6(struct f2fs_io_info *fio)
 			else
 				return CURSEG_COLD_DATA;
 		}
-		if (file_is_cold(inode) || f2fs_compressed_file(inode))
+		if (file_is_cold(inode) || f2fs_need_compress_data(inode))
 			return CURSEG_COLD_DATA;
 		if (file_is_hot(inode) ||
 				is_inode_flag_set(inode, FI_HOT_DATA) ||
@@ -4544,7 +4551,7 @@ static void init_dirty_segmap(struct f2fs_sb_info *sbi)
 		return;
 
 	mutex_lock(&dirty_i->seglist_lock);
-	for (segno = 0; segno < MAIN_SECS(sbi); segno += blks_per_sec) {
+	for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) {
 		valid_blocks = get_valid_blocks(sbi, segno, true);
 		secno = GET_SEC_FROM_SEG(sbi, segno);
 
diff --git a/fs/f2fs/shrinker.c b/fs/f2fs/shrinker.c
index d66de5999a26..dd3c3c7a90ec 100644
--- a/fs/f2fs/shrinker.c
+++ b/fs/f2fs/shrinker.c
@@ -18,9 +18,7 @@ static unsigned int shrinker_run_no;
 
 static unsigned long __count_nat_entries(struct f2fs_sb_info *sbi)
 {
-	long count = NM_I(sbi)->nat_cnt - NM_I(sbi)->dirty_nat_cnt;
-
-	return count > 0 ? count : 0;
+	return NM_I(sbi)->nat_cnt[RECLAIMABLE_NAT];
 }
 
 static unsigned long __count_free_nids(struct f2fs_sb_info *sbi)
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index af9f449da64b..b4a07fe62d1a 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -146,6 +146,8 @@ enum {
 	Opt_compress_algorithm,
 	Opt_compress_log_size,
 	Opt_compress_extension,
+	Opt_compress_chksum,
+	Opt_compress_mode,
 	Opt_atgc,
 	Opt_err,
 };
@@ -214,6 +216,8 @@ static match_table_t f2fs_tokens = {
 	{Opt_compress_algorithm, "compress_algorithm=%s"},
 	{Opt_compress_log_size, "compress_log_size=%u"},
 	{Opt_compress_extension, "compress_extension=%s"},
+	{Opt_compress_chksum, "compress_chksum"},
+	{Opt_compress_mode, "compress_mode=%s"},
 	{Opt_atgc, "atgc"},
 	{Opt_err, NULL},
 };
@@ -934,10 +938,29 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
 			F2FS_OPTION(sbi).compress_ext_cnt++;
 			kfree(name);
 			break;
+		case Opt_compress_chksum:
+			F2FS_OPTION(sbi).compress_chksum = true;
+			break;
+		case Opt_compress_mode:
+			name = match_strdup(&args[0]);
+			if (!name)
+				return -ENOMEM;
+			if (!strcmp(name, "fs")) {
+				F2FS_OPTION(sbi).compress_mode = COMPR_MODE_FS;
+			} else if (!strcmp(name, "user")) {
+				F2FS_OPTION(sbi).compress_mode = COMPR_MODE_USER;
+			} else {
+				kfree(name);
+				return -EINVAL;
+			}
+			kfree(name);
+			break;
 #else
 		case Opt_compress_algorithm:
 		case Opt_compress_log_size:
 		case Opt_compress_extension:
+		case Opt_compress_chksum:
+		case Opt_compress_mode:
 			f2fs_info(sbi, "compression options not supported");
 			break;
 #endif
@@ -1523,6 +1546,14 @@ static inline void f2fs_show_compress_options(struct seq_file *seq,
 		seq_printf(seq, ",compress_extension=%s",
 			F2FS_OPTION(sbi).extensions[i]);
 	}
+
+	if (F2FS_OPTION(sbi).compress_chksum)
+		seq_puts(seq, ",compress_chksum");
+
+	if (F2FS_OPTION(sbi).compress_mode == COMPR_MODE_FS)
+		seq_printf(seq, ",compress_mode=%s", "fs");
+	else if (F2FS_OPTION(sbi).compress_mode == COMPR_MODE_USER)
+		seq_printf(seq, ",compress_mode=%s", "user");
 }
 
 static int f2fs_show_options(struct seq_file *seq, struct dentry *root)
@@ -1672,6 +1703,7 @@ static void default_options(struct f2fs_sb_info *sbi)
 	F2FS_OPTION(sbi).compress_algorithm = COMPRESS_LZ4;
 	F2FS_OPTION(sbi).compress_log_size = MIN_COMPRESS_LOG_SIZE;
 	F2FS_OPTION(sbi).compress_ext_cnt = 0;
+	F2FS_OPTION(sbi).compress_mode = COMPR_MODE_FS;
 	F2FS_OPTION(sbi).bggc_mode = BGGC_MODE_ON;
 
 	sbi->sb->s_flags &= ~SB_INLINECRYPT;
@@ -1904,7 +1936,6 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
 
 	if (*flags & SB_RDONLY ||
 		F2FS_OPTION(sbi).whint_mode != org_mount_opt.whint_mode) {
-		writeback_inodes_sb(sb, WB_REASON_SYNC);
 		sync_inodes_sb(sb);
 
 		set_sbi_flag(sbi, SBI_IS_DIRTY);
@@ -2744,7 +2775,6 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
 	block_t total_sections, blocks_per_seg;
 	struct f2fs_super_block *raw_super = (struct f2fs_super_block *)
 					(bh->b_data + F2FS_SUPER_OFFSET);
-	unsigned int blocksize;
 	size_t crc_offset = 0;
 	__u32 crc = 0;
 
@@ -2770,18 +2800,11 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
 		}
 	}
 
-	/* Currently, support only 4KB page cache size */
-	if (F2FS_BLKSIZE != PAGE_SIZE) {
-		f2fs_info(sbi, "Invalid page_cache_size (%lu), supports only 4KB",
-			  PAGE_SIZE);
-		return -EFSCORRUPTED;
-	}
-
 	/* Currently, support only 4KB block size */
-	blocksize = 1 << le32_to_cpu(raw_super->log_blocksize);
-	if (blocksize != F2FS_BLKSIZE) {
-		f2fs_info(sbi, "Invalid blocksize (%u), supports only 4KB",
-			  blocksize);
+	if (le32_to_cpu(raw_super->log_blocksize) != F2FS_BLKSIZE_BITS) {
+		f2fs_info(sbi, "Invalid log_blocksize (%u), supports only %u",
+			  le32_to_cpu(raw_super->log_blocksize),
+			  F2FS_BLKSIZE_BITS);
 		return -EFSCORRUPTED;
 	}
 
@@ -3071,9 +3094,9 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
 	sbi->total_node_count =
 		(le32_to_cpu(raw_super->segment_count_nat) / 2)
 			* sbi->blocks_per_seg * NAT_ENTRY_PER_BLOCK;
-	sbi->root_ino_num = le32_to_cpu(raw_super->root_ino);
-	sbi->node_ino_num = le32_to_cpu(raw_super->node_ino);
-	sbi->meta_ino_num = le32_to_cpu(raw_super->meta_ino);
+	F2FS_ROOT_INO(sbi) = le32_to_cpu(raw_super->root_ino);
+	F2FS_NODE_INO(sbi) = le32_to_cpu(raw_super->node_ino);
+	F2FS_META_INO(sbi) = le32_to_cpu(raw_super->meta_ino);
 	sbi->cur_victim_sec = NULL_SECNO;
 	sbi->next_victim_seg[BG_GC] = NULL_SEGNO;
 	sbi->next_victim_seg[FG_GC] = NULL_SEGNO;
@@ -3399,12 +3422,6 @@ static int f2fs_setup_casefold(struct f2fs_sb_info *sbi)
 		struct unicode_map *encoding;
 		__u16 encoding_flags;
 
-		if (f2fs_sb_has_encrypt(sbi)) {
-			f2fs_err(sbi,
-				"Can't mount with encoding and encryption");
-			return -EINVAL;
-		}
-
 		if (f2fs_sb_read_encoding(sbi->raw_super, &encoding_info,
 					  &encoding_flags)) {
 			f2fs_err(sbi,
@@ -3427,7 +3444,6 @@ static int f2fs_setup_casefold(struct f2fs_sb_info *sbi)
 
 		sbi->sb->s_encoding = encoding;
 		sbi->sb->s_encoding_flags = encoding_flags;
-		sbi->sb->s_d_op = &f2fs_dentry_ops;
 	}
 #else
 	if (f2fs_sb_has_casefold(sbi)) {
@@ -3559,7 +3575,7 @@ try_onemore:
 	sbi->valid_super_block = valid_super_block;
 	init_rwsem(&sbi->gc_lock);
 	mutex_init(&sbi->writepages);
-	mutex_init(&sbi->cp_mutex);
+	init_rwsem(&sbi->cp_global_sem);
 	init_rwsem(&sbi->node_write);
 	init_rwsem(&sbi->node_change);
 
@@ -3700,8 +3716,7 @@ try_onemore:
 	}
 
 	/* For write statistics */
-	sbi->sectors_written_start =
-		(u64)part_stat_read(sb->s_bdev, sectors[STAT_WRITE]);
+	sbi->sectors_written_start = f2fs_get_sectors_written(sbi);
 
 	/* Read accumulated write IO statistics if exists */
 	seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE);
@@ -3916,6 +3931,7 @@ free_bio_info:
 
 #ifdef CONFIG_UNICODE
 	utf8_unload(sb->s_encoding);
+	sb->s_encoding = NULL;
 #endif
 free_options:
 #ifdef CONFIG_QUOTA
diff --git a/fs/f2fs/sysfs.c b/fs/f2fs/sysfs.c
index 24e876e849c5..30bae57428d1 100644
--- a/fs/f2fs/sysfs.c
+++ b/fs/f2fs/sysfs.c
@@ -92,7 +92,8 @@ static ssize_t lifetime_write_kbytes_show(struct f2fs_attr *a,
 {
 	return sprintf(buf, "%llu\n",
 			(unsigned long long)(sbi->kbytes_written +
-			BD_PART_WRITTEN(sbi)));
+			((f2fs_get_sectors_written(sbi) -
+				sbi->sectors_written_start) >> 1)));
 }
 
 static ssize_t features_show(struct f2fs_attr *a,
@@ -557,6 +558,7 @@ F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info,
 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, iostat_enable, iostat_enable);
 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, iostat_period_ms, iostat_period_ms);
 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, readdir_ra, readdir_ra);
+F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_io_bytes, max_io_bytes);
 F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, gc_pin_file_thresh, gc_pin_file_threshold);
 F2FS_RW_ATTR(F2FS_SBI, f2fs_super_block, extension_list, extension_list);
 #ifdef CONFIG_F2FS_FAULT_INJECTION
@@ -641,6 +643,7 @@ static struct attribute *f2fs_attrs[] = {
 	ATTR_LIST(iostat_enable),
 	ATTR_LIST(iostat_period_ms),
 	ATTR_LIST(readdir_ra),
+	ATTR_LIST(max_io_bytes),
 	ATTR_LIST(gc_pin_file_thresh),
 	ATTR_LIST(extension_list),
 #ifdef CONFIG_F2FS_FAULT_INJECTION
diff --git a/fs/file.c b/fs/file.c
index 8434e0afecc7..c0b60961c672 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -694,8 +694,10 @@ int __close_range(unsigned fd, unsigned max_fd, unsigned int flags)
 		 * If the requested range is greater than the current maximum,
 		 * we're closing everything so only copy all file descriptors
 		 * beneath the lowest file descriptor.
+		 * If the caller requested all fds to be made cloexec copy all
+		 * of the file descriptors since they still want to use them.
 		 */
-		if (max_fd >= cur_max)
+		if (!(flags & CLOSE_RANGE_CLOEXEC) && (max_fd >= cur_max))
 			max_unshare_fds = fd;
 
 		ret = unshare_fd(CLONE_FILES, max_unshare_fds, &fds);
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index e6005c78bfa9..acfb55834af2 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -2321,10 +2321,6 @@ void __mark_inode_dirty(struct inode *inode, int flags)
 
 			wb = locked_inode_to_wb_and_lock_list(inode);
 
-			WARN((wb->bdi->capabilities & BDI_CAP_WRITEBACK) &&
-			     !test_bit(WB_registered, &wb->state),
-			     "bdi-%s not registered\n", bdi_dev_name(wb->bdi));
-
 			inode->dirtied_when = jiffies;
 			if (dirtytime)
 				inode->dirtied_time_when = jiffies;
diff --git a/fs/fuse/acl.c b/fs/fuse/acl.c
index 5a48cee6d7d3..f529075a2ce8 100644
--- a/fs/fuse/acl.c
+++ b/fs/fuse/acl.c
@@ -19,6 +19,9 @@ struct posix_acl *fuse_get_acl(struct inode *inode, int type)
 	void *value = NULL;
 	struct posix_acl *acl;
 
+	if (fuse_is_bad(inode))
+		return ERR_PTR(-EIO);
+
 	if (!fc->posix_acl || fc->no_getxattr)
 		return NULL;
 
@@ -53,6 +56,9 @@ int fuse_set_acl(struct inode *inode, struct posix_acl *acl, int type)
 	const char *name;
 	int ret;
 
+	if (fuse_is_bad(inode))
+		return -EIO;
+
 	if (!fc->posix_acl || fc->no_setxattr)
 		return -EOPNOTSUPP;
 
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index ff7dbeb16f88..78f9f209078c 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -202,10 +202,10 @@ static int fuse_dentry_revalidate(struct dentry *entry, unsigned int flags)
 	int ret;
 
 	inode = d_inode_rcu(entry);
-	if (inode && is_bad_inode(inode))
+	if (inode && fuse_is_bad(inode))
 		goto invalid;
 	else if (time_before64(fuse_dentry_time(entry), get_jiffies_64()) ||
-		 (flags & LOOKUP_REVAL)) {
+		 (flags & (LOOKUP_EXCL | LOOKUP_REVAL))) {
 		struct fuse_entry_out outarg;
 		FUSE_ARGS(args);
 		struct fuse_forget_link *forget;
@@ -328,12 +328,11 @@ static struct vfsmount *fuse_dentry_automount(struct path *path)
 	if (!fm)
 		goto out_put_fsc;
 
-	refcount_set(&fm->count, 1);
 	fsc->s_fs_info = fm;
 	sb = sget_fc(fsc, NULL, set_anon_super_fc);
 	if (IS_ERR(sb)) {
 		err = PTR_ERR(sb);
-		fuse_mount_put(fm);
+		kfree(fm);
 		goto out_put_fsc;
 	}
 	fm->fc = fuse_conn_get(fc);
@@ -463,6 +462,9 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
 	bool outarg_valid = true;
 	bool locked;
 
+	if (fuse_is_bad(dir))
+		return ERR_PTR(-EIO);
+
 	locked = fuse_lock_inode(dir);
 	err = fuse_lookup_name(dir->i_sb, get_node_id(dir), &entry->d_name,
 			       &outarg, &inode);
@@ -542,6 +544,12 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry,
 	inarg.flags = flags;
 	inarg.mode = mode;
 	inarg.umask = current_umask();
+
+	if (fm->fc->handle_killpriv_v2 && (flags & O_TRUNC) &&
+	    !(flags & O_EXCL) && !capable(CAP_FSETID)) {
+		inarg.open_flags |= FUSE_OPEN_KILL_SUIDGID;
+	}
+
 	args.opcode = FUSE_CREATE;
 	args.nodeid = get_node_id(dir);
 	args.in_numargs = 2;
@@ -606,6 +614,9 @@ static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
 	struct fuse_conn *fc = get_fuse_conn(dir);
 	struct dentry *res = NULL;
 
+	if (fuse_is_bad(dir))
+		return -EIO;
+
 	if (d_in_lookup(entry)) {
 		res = fuse_lookup(dir, entry, 0);
 		if (IS_ERR(res))
@@ -654,6 +665,9 @@ static int create_new_entry(struct fuse_mount *fm, struct fuse_args *args,
 	int err;
 	struct fuse_forget_link *forget;
 
+	if (fuse_is_bad(dir))
+		return -EIO;
+
 	forget = fuse_alloc_forget();
 	if (!forget)
 		return -ENOMEM;
@@ -781,6 +795,9 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry)
 	struct fuse_mount *fm = get_fuse_mount(dir);
 	FUSE_ARGS(args);
 
+	if (fuse_is_bad(dir))
+		return -EIO;
+
 	args.opcode = FUSE_UNLINK;
 	args.nodeid = get_node_id(dir);
 	args.in_numargs = 1;
@@ -817,6 +834,9 @@ static int fuse_rmdir(struct inode *dir, struct dentry *entry)
 	struct fuse_mount *fm = get_fuse_mount(dir);
 	FUSE_ARGS(args);
 
+	if (fuse_is_bad(dir))
+		return -EIO;
+
 	args.opcode = FUSE_RMDIR;
 	args.nodeid = get_node_id(dir);
 	args.in_numargs = 1;
@@ -895,6 +915,9 @@ static int fuse_rename2(struct inode *olddir, struct dentry *oldent,
 	struct fuse_conn *fc = get_fuse_conn(olddir);
 	int err;
 
+	if (fuse_is_bad(olddir))
+		return -EIO;
+
 	if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
 		return -EINVAL;
 
@@ -1030,7 +1053,7 @@ static int fuse_do_getattr(struct inode *inode, struct kstat *stat,
 	if (!err) {
 		if (fuse_invalid_attr(&outarg.attr) ||
 		    (inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
-			make_bad_inode(inode);
+			fuse_make_bad(inode);
 			err = -EIO;
 		} else {
 			fuse_change_attributes(inode, &outarg.attr,
@@ -1232,6 +1255,9 @@ static int fuse_permission(struct inode *inode, int mask)
 	bool refreshed = false;
 	int err = 0;
 
+	if (fuse_is_bad(inode))
+		return -EIO;
+
 	if (!fuse_allow_current_process(fc))
 		return -EACCES;
 
@@ -1327,7 +1353,7 @@ static const char *fuse_get_link(struct dentry *dentry, struct inode *inode,
 	int err;
 
 	err = -EIO;
-	if (is_bad_inode(inode))
+	if (fuse_is_bad(inode))
 		goto out_err;
 
 	if (fc->cache_symlinks)
@@ -1375,7 +1401,7 @@ static int fuse_dir_fsync(struct file *file, loff_t start, loff_t end,
 	struct fuse_conn *fc = get_fuse_conn(inode);
 	int err;
 
-	if (is_bad_inode(inode))
+	if (fuse_is_bad(inode))
 		return -EIO;
 
 	if (fc->no_fsyncdir)
@@ -1649,10 +1675,20 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
 		inarg.valid |= FATTR_FH;
 		inarg.fh = ff->fh;
 	}
+
+	/* Kill suid/sgid for non-directory chown unconditionally */
+	if (fc->handle_killpriv_v2 && !S_ISDIR(inode->i_mode) &&
+	    attr->ia_valid & (ATTR_UID | ATTR_GID))
+		inarg.valid |= FATTR_KILL_SUIDGID;
+
 	if (attr->ia_valid & ATTR_SIZE) {
 		/* For mandatory locking in truncate */
 		inarg.valid |= FATTR_LOCKOWNER;
 		inarg.lock_owner = fuse_lock_owner_id(fc, current->files);
+
+		/* Kill suid/sgid for truncate only if no CAP_FSETID */
+		if (fc->handle_killpriv_v2 && !capable(CAP_FSETID))
+			inarg.valid |= FATTR_KILL_SUIDGID;
 	}
 	fuse_setattr_fill(fc, &args, inode, &inarg, &outarg);
 	err = fuse_simple_request(fm, &args);
@@ -1664,7 +1700,7 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
 
 	if (fuse_invalid_attr(&outarg.attr) ||
 	    (inode->i_mode ^ outarg.attr.mode) & S_IFMT) {
-		make_bad_inode(inode);
+		fuse_make_bad(inode);
 		err = -EIO;
 		goto error;
 	}
@@ -1727,6 +1763,9 @@ static int fuse_setattr(struct dentry *entry, struct iattr *attr)
 	struct file *file = (attr->ia_valid & ATTR_FILE) ? attr->ia_file : NULL;
 	int ret;
 
+	if (fuse_is_bad(inode))
+		return -EIO;
+
 	if (!fuse_allow_current_process(get_fuse_conn(inode)))
 		return -EACCES;
 
@@ -1740,7 +1779,7 @@ static int fuse_setattr(struct dentry *entry, struct iattr *attr)
 		 *
 		 * This should be done on write(), truncate() and chown().
 		 */
-		if (!fc->handle_killpriv) {
+		if (!fc->handle_killpriv && !fc->handle_killpriv_v2) {
 			/*
 			 * ia_mode calculation may have used stale i_mode.
 			 * Refresh and recalculate.
@@ -1785,6 +1824,9 @@ static int fuse_getattr(const struct path *path, struct kstat *stat,
 	struct inode *inode = d_inode(path->dentry);
 	struct fuse_conn *fc = get_fuse_conn(inode);
 
+	if (fuse_is_bad(inode))
+		return -EIO;
+
 	if (!fuse_allow_current_process(fc)) {
 		if (!request_mask) {
 			/*
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index c03034e8c152..8cccecb55fb8 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -42,6 +42,12 @@ static int fuse_send_open(struct fuse_mount *fm, u64 nodeid, struct file *file,
 	inarg.flags = file->f_flags & ~(O_CREAT | O_EXCL | O_NOCTTY);
 	if (!fm->fc->atomic_o_trunc)
 		inarg.flags &= ~O_TRUNC;
+
+	if (fm->fc->handle_killpriv_v2 &&
+	    (inarg.flags & O_TRUNC) && !capable(CAP_FSETID)) {
+		inarg.open_flags |= FUSE_OPEN_KILL_SUIDGID;
+	}
+
 	args.opcode = opcode;
 	args.nodeid = nodeid;
 	args.in_numargs = 1;
@@ -226,6 +232,9 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir)
 	bool dax_truncate = (file->f_flags & O_TRUNC) &&
 			  fc->atomic_o_trunc && FUSE_IS_DAX(inode);
 
+	if (fuse_is_bad(inode))
+		return -EIO;
+
 	err = generic_file_open(inode, file);
 	if (err)
 		return err;
@@ -463,7 +472,7 @@ static int fuse_flush(struct file *file, fl_owner_t id)
 	FUSE_ARGS(args);
 	int err;
 
-	if (is_bad_inode(inode))
+	if (fuse_is_bad(inode))
 		return -EIO;
 
 	err = write_inode_now(inode, 1);
@@ -535,7 +544,7 @@ static int fuse_fsync(struct file *file, loff_t start, loff_t end,
 	struct fuse_conn *fc = get_fuse_conn(inode);
 	int err;
 
-	if (is_bad_inode(inode))
+	if (fuse_is_bad(inode))
 		return -EIO;
 
 	inode_lock(inode);
@@ -859,7 +868,7 @@ static int fuse_readpage(struct file *file, struct page *page)
 	int err;
 
 	err = -EIO;
-	if (is_bad_inode(inode))
+	if (fuse_is_bad(inode))
 		goto out;
 
 	err = fuse_do_readpage(file, page);
@@ -952,7 +961,7 @@ static void fuse_readahead(struct readahead_control *rac)
 	struct fuse_conn *fc = get_fuse_conn(inode);
 	unsigned int i, max_pages, nr_pages = 0;
 
-	if (is_bad_inode(inode))
+	if (fuse_is_bad(inode))
 		return;
 
 	max_pages = min_t(unsigned int, fc->max_pages,
@@ -1097,6 +1106,8 @@ static ssize_t fuse_send_write_pages(struct fuse_io_args *ia,
 
 	fuse_write_args_fill(ia, ff, pos, count);
 	ia->write.in.flags = fuse_write_flags(iocb);
+	if (fm->fc->handle_killpriv_v2 && !capable(CAP_FSETID))
+		ia->write.in.write_flags |= FUSE_WRITE_KILL_SUIDGID;
 
 	err = fuse_simple_request(fm, &ap->args);
 	if (!err && ia->write.out.size > count)
@@ -1260,17 +1271,24 @@ static ssize_t fuse_cache_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	ssize_t written_buffered = 0;
 	struct inode *inode = mapping->host;
 	ssize_t err;
+	struct fuse_conn *fc = get_fuse_conn(inode);
 	loff_t endbyte = 0;
 
-	if (get_fuse_conn(inode)->writeback_cache) {
+	if (fc->writeback_cache) {
 		/* Update size (EOF optimization) and mode (SUID clearing) */
 		err = fuse_update_attributes(mapping->host, file);
 		if (err)
 			return err;
 
+		if (fc->handle_killpriv_v2 &&
+		    should_remove_suid(file_dentry(file))) {
+			goto writethrough;
+		}
+
 		return generic_file_write_iter(iocb, from);
 	}
 
+writethrough:
 	inode_lock(inode);
 
 	/* We can write back this queue in page reclaim */
@@ -1451,7 +1469,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
 
 		if (write) {
 			if (!capable(CAP_FSETID))
-				ia->write.in.write_flags |= FUSE_WRITE_KILL_PRIV;
+				ia->write.in.write_flags |= FUSE_WRITE_KILL_SUIDGID;
 
 			nres = fuse_send_write(ia, pos, nbytes, owner);
 		} else {
@@ -1555,7 +1573,7 @@ static ssize_t fuse_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
 	struct fuse_file *ff = file->private_data;
 	struct inode *inode = file_inode(file);
 
-	if (is_bad_inode(inode))
+	if (fuse_is_bad(inode))
 		return -EIO;
 
 	if (FUSE_IS_DAX(inode))
@@ -1573,7 +1591,7 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
 	struct fuse_file *ff = file->private_data;
 	struct inode *inode = file_inode(file);
 
-	if (is_bad_inode(inode))
+	if (fuse_is_bad(inode))
 		return -EIO;
 
 	if (FUSE_IS_DAX(inode))
@@ -2172,7 +2190,7 @@ static int fuse_writepages(struct address_space *mapping,
 	int err;
 
 	err = -EIO;
-	if (is_bad_inode(inode))
+	if (fuse_is_bad(inode))
 		goto out;
 
 	data.inode = inode;
@@ -2281,6 +2299,9 @@ static int fuse_launder_page(struct page *page)
 	int err = 0;
 	if (clear_page_dirty_for_io(page)) {
 		struct inode *inode = page->mapping->host;
+
+		/* Serialize with pending writeback for the same page */
+		fuse_wait_on_page_writeback(inode, page->index);
 		err = fuse_writepage_locked(page);
 		if (!err)
 			fuse_wait_on_page_writeback(inode, page->index);
@@ -2954,7 +2975,7 @@ long fuse_ioctl_common(struct file *file, unsigned int cmd,
 	if (!fuse_allow_current_process(fc))
 		return -EACCES;
 
-	if (is_bad_inode(inode))
+	if (fuse_is_bad(inode))
 		return -EIO;
 
 	return fuse_do_ioctl(file, cmd, arg, flags);
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index d51598017d13..7c4b8cb93f9f 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -172,6 +172,8 @@ enum {
 	FUSE_I_INIT_RDPLUS,
 	/** An operation changing file size is in progress  */
 	FUSE_I_SIZE_UNSTABLE,
+	/* Bad inode */
+	FUSE_I_BAD,
 };
 
 struct fuse_conn;
@@ -636,6 +638,14 @@ struct fuse_conn {
 	unsigned int legacy_opts_show:1;
 
 	/*
+	 * fs kills suid/sgid/cap on write/chown/trunc. suid is killed on
+	 * write/trunc only if caller did not have CAP_FSETID.  sgid is killed
+	 * on write/truncate only if caller did not have CAP_FSETID as well as
+	 * file has group execute permission.
+	 */
+	unsigned handle_killpriv_v2:1;
+
+	/*
 	 * The following bitfields are only for optimization purposes
 	 * and hence races in setting them will not cause malfunction
 	 */
@@ -801,9 +811,6 @@ struct fuse_mount {
 	/* Underlying (potentially shared) connection to the FUSE server */
 	struct fuse_conn *fc;
 
-	/* Refcount */
-	refcount_t count;
-
 	/*
 	 * Super block for this connection (fc->killsb must be held when
 	 * accessing this).
@@ -821,9 +828,7 @@ static inline struct fuse_mount *get_fuse_mount_super(struct super_block *sb)
 
 static inline struct fuse_conn *get_fuse_conn_super(struct super_block *sb)
 {
-	struct fuse_mount *fm = get_fuse_mount_super(sb);
-
-	return fm ? fm->fc : NULL;
+	return get_fuse_mount_super(sb)->fc;
 }
 
 static inline struct fuse_mount *get_fuse_mount(struct inode *inode)
@@ -833,9 +838,7 @@ static inline struct fuse_mount *get_fuse_mount(struct inode *inode)
 
 static inline struct fuse_conn *get_fuse_conn(struct inode *inode)
 {
-	struct fuse_mount *fm = get_fuse_mount(inode);
-
-	return fm ? fm->fc : NULL;
+	return get_fuse_mount_super(inode->i_sb)->fc;
 }
 
 static inline struct fuse_inode *get_fuse_inode(struct inode *inode)
@@ -858,6 +861,16 @@ static inline u64 fuse_get_attr_version(struct fuse_conn *fc)
 	return atomic64_read(&fc->attr_version);
 }
 
+static inline void fuse_make_bad(struct inode *inode)
+{
+	set_bit(FUSE_I_BAD, &get_fuse_inode(inode)->state);
+}
+
+static inline bool fuse_is_bad(struct inode *inode)
+{
+	return unlikely(test_bit(FUSE_I_BAD, &get_fuse_inode(inode)->state));
+}
+
 /** Device operations */
 extern const struct file_operations fuse_dev_operations;
 
@@ -1024,16 +1037,6 @@ void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm,
  */
 void fuse_conn_put(struct fuse_conn *fc);
 
-/**
- * Acquire reference to fuse_mount
- */
-struct fuse_mount *fuse_mount_get(struct fuse_mount *fm);
-
-/**
- * Release reference to fuse_mount
- */
-void fuse_mount_put(struct fuse_mount *fm);
-
 struct fuse_dev *fuse_dev_alloc_install(struct fuse_conn *fc);
 struct fuse_dev *fuse_dev_alloc(void);
 void fuse_dev_install(struct fuse_dev *fud, struct fuse_conn *fc);
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 1a47afc95f80..b0e18b470e91 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -132,7 +132,7 @@ static void fuse_evict_inode(struct inode *inode)
 			fi->forget = NULL;
 		}
 	}
-	if (S_ISREG(inode->i_mode) && !is_bad_inode(inode)) {
+	if (S_ISREG(inode->i_mode) && !fuse_is_bad(inode)) {
 		WARN_ON(!list_empty(&fi->write_files));
 		WARN_ON(!list_empty(&fi->queued_writes));
 	}
@@ -204,6 +204,16 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr,
 		inode->i_mode &= ~S_ISVTX;
 
 	fi->orig_ino = attr->ino;
+
+	/*
+	 * We are refreshing inode data and it is possible that another
+	 * client set suid/sgid or security.capability xattr. So clear
+	 * S_NOSEC. Ideally, we could have cleared it only if suid/sgid
+	 * was set or if security.capability xattr was set. But we don't
+	 * know if security.capability has been set or not. So clear it
+	 * anyway. Its less efficient but should be safe.
+	 */
+	inode->i_flags &= ~S_NOSEC;
 }
 
 void fuse_change_attributes(struct inode *inode, struct fuse_attr *attr,
@@ -342,7 +352,7 @@ retry:
 		unlock_new_inode(inode);
 	} else if ((inode->i_mode ^ attr->mode) & S_IFMT) {
 		/* Inode has changed type, any I/O on the old should fail */
-		make_bad_inode(inode);
+		fuse_make_bad(inode);
 		iput(inode);
 		goto retry;
 	}
@@ -452,7 +462,8 @@ static void fuse_put_super(struct super_block *sb)
 {
 	struct fuse_mount *fm = get_fuse_mount_super(sb);
 
-	fuse_mount_put(fm);
+	fuse_conn_put(fm->fc);
+	kfree(fm);
 }
 
 static void convert_fuse_statfs(struct kstatfs *stbuf, struct fuse_kstatfs *attr)
@@ -705,7 +716,6 @@ void fuse_conn_init(struct fuse_conn *fc, struct fuse_mount *fm,
 	INIT_LIST_HEAD(&fc->mounts);
 	list_add(&fm->fc_entry, &fc->mounts);
 	fm->fc = fc;
-	refcount_set(&fm->count, 1);
 }
 EXPORT_SYMBOL_GPL(fuse_conn_init);
 
@@ -732,23 +742,6 @@ struct fuse_conn *fuse_conn_get(struct fuse_conn *fc)
 }
 EXPORT_SYMBOL_GPL(fuse_conn_get);
 
-void fuse_mount_put(struct fuse_mount *fm)
-{
-	if (refcount_dec_and_test(&fm->count)) {
-		if (fm->fc)
-			fuse_conn_put(fm->fc);
-		kfree(fm);
-	}
-}
-EXPORT_SYMBOL_GPL(fuse_mount_put);
-
-struct fuse_mount *fuse_mount_get(struct fuse_mount *fm)
-{
-	refcount_inc(&fm->count);
-	return fm;
-}
-EXPORT_SYMBOL_GPL(fuse_mount_get);
-
 static struct inode *fuse_get_root_inode(struct super_block *sb, unsigned mode)
 {
 	struct fuse_attr attr;
@@ -1055,6 +1048,10 @@ static void process_init_reply(struct fuse_mount *fm, struct fuse_args *args,
 			    !fuse_dax_check_alignment(fc, arg->map_alignment)) {
 				ok = false;
 			}
+			if (arg->flags & FUSE_HANDLE_KILLPRIV_V2) {
+				fc->handle_killpriv_v2 = 1;
+				fm->sb->s_flags |= SB_NOSEC;
+			}
 		} else {
 			ra_pages = fc->max_read / PAGE_SIZE;
 			fc->no_lock = 1;
@@ -1097,7 +1094,8 @@ void fuse_send_init(struct fuse_mount *fm)
 		FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT |
 		FUSE_PARALLEL_DIROPS | FUSE_HANDLE_KILLPRIV | FUSE_POSIX_ACL |
 		FUSE_ABORT_ERROR | FUSE_MAX_PAGES | FUSE_CACHE_SYMLINKS |
-		FUSE_NO_OPENDIR_SUPPORT | FUSE_EXPLICIT_INVAL_DATA;
+		FUSE_NO_OPENDIR_SUPPORT | FUSE_EXPLICIT_INVAL_DATA |
+		FUSE_HANDLE_KILLPRIV_V2;
 #ifdef CONFIG_FUSE_DAX
 	if (fm->fc->dax)
 		ia->in.flags |= FUSE_MAP_ALIGNMENT;
@@ -1465,7 +1463,8 @@ static int fuse_fill_super(struct super_block *sb, struct fs_context *fsc)
 	return 0;
 
  err_put_conn:
-	fuse_mount_put(fm);
+	fuse_conn_put(fc);
+	kfree(fm);
 	sb->s_fs_info = NULL;
  err_fput:
 	fput(file);
@@ -1557,7 +1556,7 @@ void fuse_conn_destroy(struct fuse_mount *fm)
 }
 EXPORT_SYMBOL_GPL(fuse_conn_destroy);
 
-static void fuse_kill_sb_anon(struct super_block *sb)
+static void fuse_sb_destroy(struct super_block *sb)
 {
 	struct fuse_mount *fm = get_fuse_mount_super(sb);
 	bool last;
@@ -1567,6 +1566,11 @@ static void fuse_kill_sb_anon(struct super_block *sb)
 		if (last)
 			fuse_conn_destroy(fm);
 	}
+}
+
+static void fuse_kill_sb_anon(struct super_block *sb)
+{
+	fuse_sb_destroy(sb);
 	kill_anon_super(sb);
 }
 
@@ -1583,14 +1587,7 @@ MODULE_ALIAS_FS("fuse");
 #ifdef CONFIG_BLOCK
 static void fuse_kill_sb_blk(struct super_block *sb)
 {
-	struct fuse_mount *fm = get_fuse_mount_super(sb);
-	bool last;
-
-	if (fm) {
-		last = fuse_mount_remove(fm);
-		if (last)
-			fuse_conn_destroy(fm);
-	}
+	fuse_sb_destroy(sb);
 	kill_block_super(sb);
 }
 
diff --git a/fs/fuse/readdir.c b/fs/fuse/readdir.c
index 3b5e91045871..3441ffa740f3 100644
--- a/fs/fuse/readdir.c
+++ b/fs/fuse/readdir.c
@@ -207,7 +207,7 @@ retry:
 			dput(dentry);
 			goto retry;
 		}
-		if (is_bad_inode(inode)) {
+		if (fuse_is_bad(inode)) {
 			dput(dentry);
 			return -EIO;
 		}
@@ -568,7 +568,7 @@ int fuse_readdir(struct file *file, struct dir_context *ctx)
 	struct inode *inode = file_inode(file);
 	int err;
 
-	if (is_bad_inode(inode))
+	if (fuse_is_bad(inode))
 		return -EIO;
 
 	mutex_lock(&ff->readdir.lock);
diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
index 21a9e534417c..8868ac31a3c0 100644
--- a/fs/fuse/virtio_fs.c
+++ b/fs/fuse/virtio_fs.c
@@ -1402,18 +1402,6 @@ static int virtio_fs_test_super(struct super_block *sb,
 	return fsc_fm->fc->iq.priv == sb_fm->fc->iq.priv;
 }
 
-static int virtio_fs_set_super(struct super_block *sb,
-			       struct fs_context *fsc)
-{
-	int err;
-
-	err = get_anon_bdev(&sb->s_dev);
-	if (!err)
-		fuse_mount_get(fsc->s_fs_info);
-
-	return err;
-}
-
 static int virtio_fs_get_tree(struct fs_context *fsc)
 {
 	struct virtio_fs *fs;
@@ -1432,22 +1420,14 @@ static int virtio_fs_get_tree(struct fs_context *fsc)
 		return -EINVAL;
 	}
 
+	err = -ENOMEM;
 	fc = kzalloc(sizeof(struct fuse_conn), GFP_KERNEL);
-	if (!fc) {
-		mutex_lock(&virtio_fs_mutex);
-		virtio_fs_put(fs);
-		mutex_unlock(&virtio_fs_mutex);
-		return -ENOMEM;
-	}
+	if (!fc)
+		goto out_err;
 
 	fm = kzalloc(sizeof(struct fuse_mount), GFP_KERNEL);
-	if (!fm) {
-		mutex_lock(&virtio_fs_mutex);
-		virtio_fs_put(fs);
-		mutex_unlock(&virtio_fs_mutex);
-		kfree(fc);
-		return -ENOMEM;
-	}
+	if (!fm)
+		goto out_err;
 
 	fuse_conn_init(fc, fm, get_user_ns(current_user_ns()),
 		       &virtio_fs_fiq_ops, fs);
@@ -1456,14 +1436,20 @@ static int virtio_fs_get_tree(struct fs_context *fsc)
 	fc->auto_submounts = true;
 
 	fsc->s_fs_info = fm;
-	sb = sget_fc(fsc, virtio_fs_test_super, virtio_fs_set_super);
-	fuse_mount_put(fm);
+	sb = sget_fc(fsc, virtio_fs_test_super, set_anon_super_fc);
+	if (fsc->s_fs_info) {
+		fuse_conn_put(fc);
+		kfree(fm);
+	}
 	if (IS_ERR(sb))
 		return PTR_ERR(sb);
 
 	if (!sb->s_root) {
 		err = virtio_fs_fill_super(sb, fsc);
 		if (err) {
+			fuse_conn_put(fc);
+			kfree(fm);
+			sb->s_fs_info = NULL;
 			deactivate_locked_super(sb);
 			return err;
 		}
@@ -1474,6 +1460,13 @@ static int virtio_fs_get_tree(struct fs_context *fsc)
 	WARN_ON(fsc->root);
 	fsc->root = dget(sb->s_root);
 	return 0;
+
+out_err:
+	kfree(fc);
+	mutex_lock(&virtio_fs_mutex);
+	virtio_fs_put(fs);
+	mutex_unlock(&virtio_fs_mutex);
+	return err;
 }
 
 static const struct fs_context_operations virtio_fs_context_ops = {
diff --git a/fs/fuse/xattr.c b/fs/fuse/xattr.c
index 371bdcbc7233..cdea18de94f7 100644
--- a/fs/fuse/xattr.c
+++ b/fs/fuse/xattr.c
@@ -113,6 +113,9 @@ ssize_t fuse_listxattr(struct dentry *entry, char *list, size_t size)
 	struct fuse_getxattr_out outarg;
 	ssize_t ret;
 
+	if (fuse_is_bad(inode))
+		return -EIO;
+
 	if (!fuse_allow_current_process(fm->fc))
 		return -EACCES;
 
@@ -178,6 +181,9 @@ static int fuse_xattr_get(const struct xattr_handler *handler,
 			 struct dentry *dentry, struct inode *inode,
 			 const char *name, void *value, size_t size)
 {
+	if (fuse_is_bad(inode))
+		return -EIO;
+
 	return fuse_getxattr(inode, name, value, size);
 }
 
@@ -186,6 +192,9 @@ static int fuse_xattr_set(const struct xattr_handler *handler,
 			  const char *name, const void *value, size_t size,
 			  int flags)
 {
+	if (fuse_is_bad(inode))
+		return -EIO;
+
 	if (!value)
 		return fuse_removexattr(inode, name);
 
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index 35a6fd103761..d87a5bc3607b 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -857,12 +857,6 @@ static void delete_work_func(struct work_struct *work)
 	clear_bit(GLF_PENDING_DELETE, &gl->gl_flags);
 	spin_unlock(&gl->gl_lockref.lock);
 
-	/* If someone's using this glock to create a new dinode, the block must
-	   have been freed by another node, then re-used, in which case our
-	   iopen callback is too late after the fact. Ignore it. */
-	if (test_bit(GLF_INODE_CREATING, &gl->gl_flags))
-		goto out;
-
 	if (test_bit(GLF_DEMOTE, &gl->gl_flags)) {
 		/*
 		 * If we can evict the inode, give the remote node trying to
@@ -2112,8 +2106,6 @@ static const char *gflags2str(char *buf, const struct gfs2_glock *gl)
 		*p++ = 'o';
 	if (test_bit(GLF_BLOCKING, gflags))
 		*p++ = 'b';
-	if (test_bit(GLF_INODE_CREATING, gflags))
-		*p++ = 'c';
 	if (test_bit(GLF_PENDING_DELETE, gflags))
 		*p++ = 'P';
 	if (test_bit(GLF_FREEING, gflags))
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index f8858d995b24..8e1ab8ed4abc 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -348,7 +348,6 @@ enum {
 	GLF_LRU				= 13,
 	GLF_OBJECT			= 14, /* Used only for tracing */
 	GLF_BLOCKING			= 15,
-	GLF_INODE_CREATING		= 16, /* Inode creation occurring */
 	GLF_PENDING_DELETE		= 17,
 	GLF_FREEING			= 18, /* Wait for glock to be freed */
 };
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 65ae4fc28ede..c1b77e8d6b1c 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -36,6 +36,10 @@
 #include "super.h"
 #include "glops.h"
 
+static const struct inode_operations gfs2_file_iops;
+static const struct inode_operations gfs2_dir_iops;
+static const struct inode_operations gfs2_symlink_iops;
+
 static int iget_test(struct inode *inode, void *opaque)
 {
 	u64 no_addr = *(u64 *)opaque;
@@ -605,7 +609,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 	struct inode *inode = NULL;
 	struct gfs2_inode *dip = GFS2_I(dir), *ip;
 	struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
-	struct gfs2_glock *io_gl = NULL;
+	struct gfs2_glock *io_gl;
 	int error, free_vfs_inode = 1;
 	u32 aflags = 0;
 	unsigned blocks = 1;
@@ -746,8 +750,6 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 	init_dinode(dip, ip, symname);
 	gfs2_trans_end(sdp);
 
-	BUG_ON(test_and_set_bit(GLF_INODE_CREATING, &io_gl->gl_flags));
-
 	error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh);
 	if (error)
 		goto fail_gunlock2;
@@ -793,7 +795,6 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
 	gfs2_glock_dq_uninit(ghs);
 	gfs2_qa_put(ip);
 	gfs2_glock_dq_uninit(ghs + 1);
-	clear_bit(GLF_INODE_CREATING, &io_gl->gl_flags);
 	gfs2_glock_put(io_gl);
 	gfs2_qa_put(dip);
 	return error;
@@ -802,7 +803,6 @@ fail_gunlock3:
 	glock_clear_object(io_gl, ip);
 	gfs2_glock_dq_uninit(&ip->i_iopen_gh);
 fail_gunlock2:
-	clear_bit(GLF_INODE_CREATING, &io_gl->gl_flags);
 	glock_clear_object(io_gl, ip);
 	gfs2_glock_put(io_gl);
 fail_free_inode:
@@ -2136,7 +2136,7 @@ static int gfs2_update_time(struct inode *inode, struct timespec64 *time,
 	return generic_update_time(inode, time, flags);
 }
 
-const struct inode_operations gfs2_file_iops = {
+static const struct inode_operations gfs2_file_iops = {
 	.permission = gfs2_permission,
 	.setattr = gfs2_setattr,
 	.getattr = gfs2_getattr,
@@ -2147,7 +2147,7 @@ const struct inode_operations gfs2_file_iops = {
 	.update_time = gfs2_update_time,
 };
 
-const struct inode_operations gfs2_dir_iops = {
+static const struct inode_operations gfs2_dir_iops = {
 	.create = gfs2_create,
 	.lookup = gfs2_lookup,
 	.link = gfs2_link,
@@ -2168,7 +2168,7 @@ const struct inode_operations gfs2_dir_iops = {
 	.atomic_open = gfs2_atomic_open,
 };
 
-const struct inode_operations gfs2_symlink_iops = {
+static const struct inode_operations gfs2_symlink_iops = {
 	.get_link = gfs2_get_link,
 	.permission = gfs2_permission,
 	.setattr = gfs2_setattr,
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index b52ecf4ffe63..8073b8d2c7fa 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -107,9 +107,6 @@ extern int gfs2_open_common(struct inode *inode, struct file *file);
 extern loff_t gfs2_seek_data(struct file *file, loff_t offset);
 extern loff_t gfs2_seek_hole(struct file *file, loff_t offset);
 
-extern const struct inode_operations gfs2_file_iops;
-extern const struct inode_operations gfs2_dir_iops;
-extern const struct inode_operations gfs2_symlink_iops;
 extern const struct file_operations gfs2_file_fops_nolock;
 extern const struct file_operations gfs2_dir_fops_nolock;
 
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index b3d951ab8068..2f56acc41c04 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -353,7 +353,6 @@ int gfs2_statfs_sync(struct super_block *sb, int type)
 	struct buffer_head *m_bh, *l_bh;
 	int error;
 
-	sb_start_write(sb);
 	error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, GL_NOCACHE,
 				   &gh);
 	if (error)
@@ -392,7 +391,6 @@ out_bh:
 out_unlock:
 	gfs2_glock_dq_uninit(&gh);
 out:
-	sb_end_write(sb);
 	return error;
 }
 
diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c
index 0fba3bf64189..a374397f4273 100644
--- a/fs/gfs2/util.c
+++ b/fs/gfs2/util.c
@@ -137,7 +137,7 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp)
 	gfs2_glock_dq(&sdp->sd_jinode_gh);
 	if (test_bit(SDF_FS_FROZEN, &sdp->sd_flags)) {
 		/* Make sure gfs2_unfreeze works if partially-frozen */
-		flush_workqueue(gfs2_freeze_wq);
+		flush_work(&sdp->sd_freeze_work);
 		atomic_set(&sdp->sd_freeze_state, SFS_FROZEN);
 		thaw_super(sdp->sd_vfs);
 	} else {
diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h
index d7562981b3a0..a4443dd8a94b 100644
--- a/fs/gfs2/util.h
+++ b/fs/gfs2/util.h
@@ -151,7 +151,7 @@ extern int check_journal_clean(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
 			       bool verbose);
 
 #define gfs2_io_error(sdp) \
-gfs2_io_error_i((sdp), __func__, __FILE__, __LINE__);
+gfs2_io_error_i((sdp), __func__, __FILE__, __LINE__)
 
 
 void gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh,
@@ -159,10 +159,10 @@ void gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh,
 			bool withdraw);
 
 #define gfs2_io_error_bh_wd(sdp, bh) \
-gfs2_io_error_bh_i((sdp), (bh), __func__, __FILE__, __LINE__, true);
+gfs2_io_error_bh_i((sdp), (bh), __func__, __FILE__, __LINE__, true)
 
 #define gfs2_io_error_bh(sdp, bh) \
-gfs2_io_error_bh_i((sdp), (bh), __func__, __FILE__, __LINE__, false);
+gfs2_io_error_bh_i((sdp), (bh), __func__, __FILE__, __LINE__, false)
 
 
 extern struct kmem_cache *gfs2_glock_cachep;
diff --git a/fs/jffs2/debug.h b/fs/jffs2/debug.h
index 4fd9be4cbc98..40e203b6e5c1 100644
--- a/fs/jffs2/debug.h
+++ b/fs/jffs2/debug.h
@@ -13,6 +13,7 @@
 #ifndef _JFFS2_DEBUG_H_
 #define _JFFS2_DEBUG_H_
 
+#include <linux/printk.h>
 #include <linux/sched.h>
 
 #ifndef CONFIG_JFFS2_FS_DEBUG
@@ -99,73 +100,73 @@ do {						\
 #ifdef JFFS2_DBG_READINODE_MESSAGES
 #define dbg_readinode(fmt, ...)	JFFS2_DEBUG(fmt, ##__VA_ARGS__)
 #else
-#define dbg_readinode(fmt, ...)
+#define dbg_readinode(fmt, ...)	no_printk(fmt, ##__VA_ARGS__)
 #endif
 #ifdef JFFS2_DBG_READINODE2_MESSAGES
 #define dbg_readinode2(fmt, ...)	JFFS2_DEBUG(fmt, ##__VA_ARGS__)
 #else
-#define dbg_readinode2(fmt, ...)
+#define dbg_readinode2(fmt, ...)	no_printk(fmt, ##__VA_ARGS__)
 #endif
 
 /* Fragtree build debugging messages */
 #ifdef JFFS2_DBG_FRAGTREE_MESSAGES
 #define dbg_fragtree(fmt, ...)	JFFS2_DEBUG(fmt, ##__VA_ARGS__)
 #else
-#define dbg_fragtree(fmt, ...)
+#define dbg_fragtree(fmt, ...)	no_printk(fmt, ##__VA_ARGS__)
 #endif
 #ifdef JFFS2_DBG_FRAGTREE2_MESSAGES
 #define dbg_fragtree2(fmt, ...)	JFFS2_DEBUG(fmt, ##__VA_ARGS__)
 #else
-#define dbg_fragtree2(fmt, ...)
+#define dbg_fragtree2(fmt, ...)	no_printk(fmt, ##__VA_ARGS__)
 #endif
 
 /* Directory entry list manilulation debugging messages */
 #ifdef JFFS2_DBG_DENTLIST_MESSAGES
 #define dbg_dentlist(fmt, ...)	JFFS2_DEBUG(fmt, ##__VA_ARGS__)
 #else
-#define dbg_dentlist(fmt, ...)
+#define dbg_dentlist(fmt, ...)	no_printk(fmt, ##__VA_ARGS__)
 #endif
 
 /* Print the messages about manipulating node_refs */
 #ifdef JFFS2_DBG_NODEREF_MESSAGES
 #define dbg_noderef(fmt, ...)	JFFS2_DEBUG(fmt, ##__VA_ARGS__)
 #else
-#define dbg_noderef(fmt, ...)
+#define dbg_noderef(fmt, ...)	no_printk(fmt, ##__VA_ARGS__)
 #endif
 
 /* Manipulations with the list of inodes (JFFS2 inocache) */
 #ifdef JFFS2_DBG_INOCACHE_MESSAGES
 #define dbg_inocache(fmt, ...)	JFFS2_DEBUG(fmt, ##__VA_ARGS__)
 #else
-#define dbg_inocache(fmt, ...)
+#define dbg_inocache(fmt, ...)	no_printk(fmt, ##__VA_ARGS__)
 #endif
 
 /* Summary debugging messages */
 #ifdef JFFS2_DBG_SUMMARY_MESSAGES
 #define dbg_summary(fmt, ...)	JFFS2_DEBUG(fmt, ##__VA_ARGS__)
 #else
-#define dbg_summary(fmt, ...)
+#define dbg_summary(fmt, ...)	no_printk(fmt, ##__VA_ARGS__)
 #endif
 
 /* File system build messages */
 #ifdef JFFS2_DBG_FSBUILD_MESSAGES
 #define dbg_fsbuild(fmt, ...)	JFFS2_DEBUG(fmt, ##__VA_ARGS__)
 #else
-#define dbg_fsbuild(fmt, ...)
+#define dbg_fsbuild(fmt, ...)	no_printk(fmt, ##__VA_ARGS__)
 #endif
 
 /* Watch the object allocations */
 #ifdef JFFS2_DBG_MEMALLOC_MESSAGES
 #define dbg_memalloc(fmt, ...)	JFFS2_DEBUG(fmt, ##__VA_ARGS__)
 #else
-#define dbg_memalloc(fmt, ...)
+#define dbg_memalloc(fmt, ...)	no_printk(fmt, ##__VA_ARGS__)
 #endif
 
 /* Watch the XATTR subsystem */
 #ifdef JFFS2_DBG_XATTR_MESSAGES
 #define dbg_xattr(fmt, ...)  JFFS2_DEBUG(fmt, ##__VA_ARGS__)
 #else
-#define dbg_xattr(fmt, ...)
+#define dbg_xattr(fmt, ...)  no_printk(fmt, ##__VA_ARGS__)
 #endif 
 
 /* "Sanity" checks */
diff --git a/fs/jffs2/jffs2_fs_sb.h b/fs/jffs2/jffs2_fs_sb.h
index 778275f48a87..5a7091746f68 100644
--- a/fs/jffs2/jffs2_fs_sb.h
+++ b/fs/jffs2/jffs2_fs_sb.h
@@ -38,6 +38,7 @@ struct jffs2_mount_opts {
 	 * users. This is implemented simply by means of not allowing the
 	 * latter users to write to the file system if the amount if the
 	 * available space is less then 'rp_size'. */
+	bool set_rp_size;
 	unsigned int rp_size;
 };
 
diff --git a/fs/jffs2/nodelist.h b/fs/jffs2/nodelist.h
index 8ff4d1a1e774..2e98fa277dab 100644
--- a/fs/jffs2/nodelist.h
+++ b/fs/jffs2/nodelist.h
@@ -349,14 +349,14 @@ static inline struct jffs2_node_frag *frag_last(struct rb_root *root)
 #define frag_parent(frag) rb_entry(rb_parent(&(frag)->rb), struct jffs2_node_frag, rb)
 #define frag_left(frag) rb_entry((frag)->rb.rb_left, struct jffs2_node_frag, rb)
 #define frag_right(frag) rb_entry((frag)->rb.rb_right, struct jffs2_node_frag, rb)
-#define frag_erase(frag, list) rb_erase(&frag->rb, list);
+#define frag_erase(frag, list) rb_erase(&frag->rb, list)
 
 #define tn_next(tn) rb_entry(rb_next(&(tn)->rb), struct jffs2_tmp_dnode_info, rb)
 #define tn_prev(tn) rb_entry(rb_prev(&(tn)->rb), struct jffs2_tmp_dnode_info, rb)
 #define tn_parent(tn) rb_entry(rb_parent(&(tn)->rb), struct jffs2_tmp_dnode_info, rb)
 #define tn_left(tn) rb_entry((tn)->rb.rb_left, struct jffs2_tmp_dnode_info, rb)
 #define tn_right(tn) rb_entry((tn)->rb.rb_right, struct jffs2_tmp_dnode_info, rb)
-#define tn_erase(tn, list) rb_erase(&tn->rb, list);
+#define tn_erase(tn, list) rb_erase(&tn->rb, list)
 #define tn_last(list) rb_entry(rb_last(list), struct jffs2_tmp_dnode_info, rb)
 #define tn_first(list) rb_entry(rb_first(list), struct jffs2_tmp_dnode_info, rb)
 
diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c
index 2f6f0b140c05..03b4f99614be 100644
--- a/fs/jffs2/readinode.c
+++ b/fs/jffs2/readinode.c
@@ -672,6 +672,22 @@ static inline int read_direntry(struct jffs2_sb_info *c, struct jffs2_raw_node_r
 			jffs2_free_full_dirent(fd);
 			return -EIO;
 		}
+
+#ifdef CONFIG_JFFS2_SUMMARY
+		/*
+		 * we use CONFIG_JFFS2_SUMMARY because without it, we
+		 * have checked it while mounting
+		 */
+		crc = crc32(0, fd->name, rd->nsize);
+		if (unlikely(crc != je32_to_cpu(rd->name_crc))) {
+			JFFS2_NOTICE("name CRC failed on dirent node at"
+			   "%#08x: read %#08x,calculated %#08x\n",
+			   ref_offset(ref), je32_to_cpu(rd->node_crc), crc);
+			jffs2_mark_node_obsolete(c, ref);
+			jffs2_free_full_dirent(fd);
+			return 0;
+		}
+#endif
 	}
 
 	fd->nhash = full_name_hash(NULL, fd->name, rd->nsize);
diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c
index 05d7878dfad1..81ca58c10b72 100644
--- a/fs/jffs2/super.c
+++ b/fs/jffs2/super.c
@@ -88,7 +88,7 @@ static int jffs2_show_options(struct seq_file *s, struct dentry *root)
 
 	if (opts->override_compr)
 		seq_printf(s, ",compr=%s", jffs2_compr_name(opts->compr));
-	if (opts->rp_size)
+	if (opts->set_rp_size)
 		seq_printf(s, ",rp_size=%u", opts->rp_size / 1024);
 
 	return 0;
@@ -202,11 +202,8 @@ static int jffs2_parse_param(struct fs_context *fc, struct fs_parameter *param)
 	case Opt_rp_size:
 		if (result.uint_32 > UINT_MAX / 1024)
 			return invalf(fc, "jffs2: rp_size unrepresentable");
-		opt = result.uint_32 * 1024;
-		if (opt > c->mtd->size)
-			return invalf(fc, "jffs2: Too large reserve pool specified, max is %llu KB",
-				      c->mtd->size / 1024);
-		c->mount_opts.rp_size = opt;
+		c->mount_opts.rp_size = result.uint_32 * 1024;
+		c->mount_opts.set_rp_size = true;
 		break;
 	default:
 		return -EINVAL;
@@ -215,11 +212,30 @@ static int jffs2_parse_param(struct fs_context *fc, struct fs_parameter *param)
 	return 0;
 }
 
+static inline void jffs2_update_mount_opts(struct fs_context *fc)
+{
+	struct jffs2_sb_info *new_c = fc->s_fs_info;
+	struct jffs2_sb_info *c = JFFS2_SB_INFO(fc->root->d_sb);
+
+	mutex_lock(&c->alloc_sem);
+	if (new_c->mount_opts.override_compr) {
+		c->mount_opts.override_compr = new_c->mount_opts.override_compr;
+		c->mount_opts.compr = new_c->mount_opts.compr;
+	}
+	if (new_c->mount_opts.set_rp_size) {
+		c->mount_opts.set_rp_size = new_c->mount_opts.set_rp_size;
+		c->mount_opts.rp_size = new_c->mount_opts.rp_size;
+	}
+	mutex_unlock(&c->alloc_sem);
+}
+
 static int jffs2_reconfigure(struct fs_context *fc)
 {
 	struct super_block *sb = fc->root->d_sb;
 
 	sync_filesystem(sb);
+	jffs2_update_mount_opts(fc);
+
 	return jffs2_do_remount_fs(sb, fc);
 }
 
@@ -249,6 +265,10 @@ static int jffs2_fill_super(struct super_block *sb, struct fs_context *fc)
 	c->mtd = sb->s_mtd;
 	c->os_priv = sb;
 
+	if (c->mount_opts.rp_size > c->mtd->size)
+		return invalf(fc, "jffs2: Too large reserve pool specified, max is %llu KB",
+			      c->mtd->size / 1024);
+
 	/* Initialize JFFS2 superblock locks, the further initialization will
 	 * be done later */
 	mutex_init(&c->alloc_sem);
diff --git a/fs/libfs.c b/fs/libfs.c
index 7124c2e8df2f..d1c3bade9f30 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -1451,4 +1451,74 @@ int generic_ci_d_hash(const struct dentry *dentry, struct qstr *str)
 	return 0;
 }
 EXPORT_SYMBOL(generic_ci_d_hash);
+
+static const struct dentry_operations generic_ci_dentry_ops = {
+	.d_hash = generic_ci_d_hash,
+	.d_compare = generic_ci_d_compare,
+};
+#endif
+
+#ifdef CONFIG_FS_ENCRYPTION
+static const struct dentry_operations generic_encrypted_dentry_ops = {
+	.d_revalidate = fscrypt_d_revalidate,
+};
+#endif
+
+#if defined(CONFIG_FS_ENCRYPTION) && defined(CONFIG_UNICODE)
+static const struct dentry_operations generic_encrypted_ci_dentry_ops = {
+	.d_hash = generic_ci_d_hash,
+	.d_compare = generic_ci_d_compare,
+	.d_revalidate = fscrypt_d_revalidate,
+};
+#endif
+
+/**
+ * generic_set_encrypted_ci_d_ops - helper for setting d_ops for given dentry
+ * @dentry:	dentry to set ops on
+ *
+ * Casefolded directories need d_hash and d_compare set, so that the dentries
+ * contained in them are handled case-insensitively.  Note that these operations
+ * are needed on the parent directory rather than on the dentries in it, and
+ * while the casefolding flag can be toggled on and off on an empty directory,
+ * dentry_operations can't be changed later.  As a result, if the filesystem has
+ * casefolding support enabled at all, we have to give all dentries the
+ * casefolding operations even if their inode doesn't have the casefolding flag
+ * currently (and thus the casefolding ops would be no-ops for now).
+ *
+ * Encryption works differently in that the only dentry operation it needs is
+ * d_revalidate, which it only needs on dentries that have the no-key name flag.
+ * The no-key flag can't be set "later", so we don't have to worry about that.
+ *
+ * Finally, to maximize compatibility with overlayfs (which isn't compatible
+ * with certain dentry operations) and to avoid taking an unnecessary
+ * performance hit, we use custom dentry_operations for each possible
+ * combination rather than always installing all operations.
+ */
+void generic_set_encrypted_ci_d_ops(struct dentry *dentry)
+{
+#ifdef CONFIG_FS_ENCRYPTION
+	bool needs_encrypt_ops = dentry->d_flags & DCACHE_NOKEY_NAME;
+#endif
+#ifdef CONFIG_UNICODE
+	bool needs_ci_ops = dentry->d_sb->s_encoding;
+#endif
+#if defined(CONFIG_FS_ENCRYPTION) && defined(CONFIG_UNICODE)
+	if (needs_encrypt_ops && needs_ci_ops) {
+		d_set_d_op(dentry, &generic_encrypted_ci_dentry_ops);
+		return;
+	}
 #endif
+#ifdef CONFIG_FS_ENCRYPTION
+	if (needs_encrypt_ops) {
+		d_set_d_op(dentry, &generic_encrypted_dentry_ops);
+		return;
+	}
+#endif
+#ifdef CONFIG_UNICODE
+	if (needs_ci_ops) {
+		d_set_d_op(dentry, &generic_ci_dentry_ops);
+		return;
+	}
+#endif
+}
+EXPORT_SYMBOL(generic_set_encrypted_ci_d_ops);
diff --git a/fs/lockd/host.c b/fs/lockd/host.c
index 0afb6d59bad0..f802223e71ab 100644
--- a/fs/lockd/host.c
+++ b/fs/lockd/host.c
@@ -163,7 +163,7 @@ static struct nlm_host *nlm_alloc_host(struct nlm_lookup_host_info *ni,
 	host->h_nsmhandle  = nsm;
 	host->h_addrbuf    = nsm->sm_addrbuf;
 	host->net	   = ni->net;
-	host->h_cred	   = get_cred(ni->cred),
+	host->h_cred	   = get_cred(ni->cred);
 	strlcpy(host->nodename, utsname()->nodename, sizeof(host->nodename));
 
 out:
@@ -439,12 +439,7 @@ nlm_bind_host(struct nlm_host *host)
 	 * RPC rebind is required
 	 */
 	if ((clnt = host->h_rpcclnt) != NULL) {
-		if (time_after_eq(jiffies, host->h_nextrebind)) {
-			rpc_force_rebind(clnt);
-			host->h_nextrebind = jiffies + NLM_HOST_REBIND;
-			dprintk("lockd: next rebind in %lu jiffies\n",
-					host->h_nextrebind - jiffies);
-		}
+		nlm_rebind_host(host);
 	} else {
 		unsigned long increment = nlmsvc_timeout;
 		struct rpc_timeout timeparms = {
@@ -494,13 +489,20 @@ nlm_bind_host(struct nlm_host *host)
 	return clnt;
 }
 
-/*
- * Force a portmap lookup of the remote lockd port
+/**
+ * nlm_rebind_host - If needed, force a portmap lookup of the peer's lockd port
+ * @host: NLM host handle for peer
+ *
+ * This is not needed when using a connection-oriented protocol, such as TCP.
+ * The existing autobind mechanism is sufficient to force a rebind when
+ * required, e.g. on connection state transitions.
  */
 void
 nlm_rebind_host(struct nlm_host *host)
 {
-	dprintk("lockd: rebind host %s\n", host->h_name);
+	if (host->h_proto != IPPROTO_UDP)
+		return;
+
 	if (host->h_rpcclnt && time_after_eq(jiffies, host->h_nextrebind)) {
 		rpc_force_rebind(host->h_rpcclnt);
 		host->h_nextrebind = jiffies + NLM_HOST_REBIND;
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 4b8cc93913f7..ff5c4d0d6d13 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -571,7 +571,7 @@ static int nfs_start_lockd(struct nfs_server *server)
 					1 : 0,
 		.net		= clp->cl_net,
 		.nlmclnt_ops 	= clp->cl_nfs_mod->rpc_ops->nlmclnt_ops,
-		.cred		= current_cred(),
+		.cred		= server->cred,
 	};
 
 	if (nlm_init.nfs_version > 3)
@@ -781,8 +781,8 @@ static void nfs_server_set_fsinfo(struct nfs_server *server,
 	server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL);
 
 	server->dtsize = nfs_block_size(fsinfo->dtpref, NULL);
-	if (server->dtsize > PAGE_SIZE * NFS_MAX_READDIR_PAGES)
-		server->dtsize = PAGE_SIZE * NFS_MAX_READDIR_PAGES;
+	if (server->dtsize > NFS_MAX_FILE_IO_SIZE)
+		server->dtsize = NFS_MAX_FILE_IO_SIZE;
 	if (server->dtsize > server->rsize)
 		server->dtsize = server->rsize;
 
@@ -985,7 +985,7 @@ struct nfs_server *nfs_create_server(struct fs_context *fc)
 	if (!server)
 		return ERR_PTR(-ENOMEM);
 
-	server->cred = get_cred(current_cred());
+	server->cred = get_cred(fc->cred);
 
 	error = -ENOMEM;
 	fattr = nfs_alloc_fattr();
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 8a24fe20dccf..ef827ae193d2 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -68,7 +68,7 @@ const struct address_space_operations nfs_dir_aops = {
 	.freepage = nfs_readdir_clear_array,
 };
 
-static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir, const struct cred *cred)
+static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir)
 {
 	struct nfs_inode *nfsi = NFS_I(dir);
 	struct nfs_open_dir_context *ctx;
@@ -78,7 +78,6 @@ static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir
 		ctx->attr_gencount = nfsi->attr_gencount;
 		ctx->dir_cookie = 0;
 		ctx->dup_cookie = 0;
-		ctx->cred = get_cred(cred);
 		spin_lock(&dir->i_lock);
 		if (list_empty(&nfsi->open_files) &&
 		    (nfsi->cache_validity & NFS_INO_DATA_INVAL_DEFER))
@@ -96,7 +95,6 @@ static void put_nfs_open_dir_context(struct inode *dir, struct nfs_open_dir_cont
 	spin_lock(&dir->i_lock);
 	list_del(&ctx->list);
 	spin_unlock(&dir->i_lock);
-	put_cred(ctx->cred);
 	kfree(ctx);
 }
 
@@ -113,7 +111,7 @@ nfs_opendir(struct inode *inode, struct file *filp)
 
 	nfs_inc_stats(inode, NFSIOS_VFSOPEN);
 
-	ctx = alloc_nfs_open_dir_context(inode, current_cred());
+	ctx = alloc_nfs_open_dir_context(inode);
 	if (IS_ERR(ctx)) {
 		res = PTR_ERR(ctx);
 		goto out;
@@ -133,43 +131,55 @@ nfs_closedir(struct inode *inode, struct file *filp)
 struct nfs_cache_array_entry {
 	u64 cookie;
 	u64 ino;
-	struct qstr string;
+	const char *name;
+	unsigned int name_len;
 	unsigned char d_type;
 };
 
 struct nfs_cache_array {
-	int size;
-	int eof_index;
 	u64 last_cookie;
+	unsigned int size;
+	unsigned char page_full : 1,
+		      page_is_eof : 1,
+		      cookies_are_ordered : 1;
 	struct nfs_cache_array_entry array[];
 };
 
-typedef struct {
+struct nfs_readdir_descriptor {
 	struct file	*file;
 	struct page	*page;
 	struct dir_context *ctx;
-	unsigned long	page_index;
-	u64		*dir_cookie;
+	pgoff_t		page_index;
+	u64		dir_cookie;
 	u64		last_cookie;
+	u64		dup_cookie;
 	loff_t		current_index;
 	loff_t		prev_index;
 
+	__be32		verf[NFS_DIR_VERIFIER_SIZE];
 	unsigned long	dir_verifier;
 	unsigned long	timestamp;
 	unsigned long	gencount;
+	unsigned long	attr_gencount;
 	unsigned int	cache_entry_index;
+	signed char duped;
 	bool plus;
 	bool eof;
-} nfs_readdir_descriptor_t;
+};
 
-static
-void nfs_readdir_init_array(struct page *page)
+static void nfs_readdir_array_init(struct nfs_cache_array *array)
+{
+	memset(array, 0, sizeof(struct nfs_cache_array));
+}
+
+static void nfs_readdir_page_init_array(struct page *page, u64 last_cookie)
 {
 	struct nfs_cache_array *array;
 
 	array = kmap_atomic(page);
-	memset(array, 0, sizeof(struct nfs_cache_array));
-	array->eof_index = -1;
+	nfs_readdir_array_init(array);
+	array->last_cookie = last_cookie;
+	array->cookies_are_ordered = 1;
 	kunmap_atomic(array);
 }
 
@@ -184,61 +194,177 @@ void nfs_readdir_clear_array(struct page *page)
 
 	array = kmap_atomic(page);
 	for (i = 0; i < array->size; i++)
-		kfree(array->array[i].string.name);
-	array->size = 0;
+		kfree(array->array[i].name);
+	nfs_readdir_array_init(array);
 	kunmap_atomic(array);
 }
 
+static struct page *
+nfs_readdir_page_array_alloc(u64 last_cookie, gfp_t gfp_flags)
+{
+	struct page *page = alloc_page(gfp_flags);
+	if (page)
+		nfs_readdir_page_init_array(page, last_cookie);
+	return page;
+}
+
+static void nfs_readdir_page_array_free(struct page *page)
+{
+	if (page) {
+		nfs_readdir_clear_array(page);
+		put_page(page);
+	}
+}
+
+static void nfs_readdir_array_set_eof(struct nfs_cache_array *array)
+{
+	array->page_is_eof = 1;
+	array->page_full = 1;
+}
+
+static bool nfs_readdir_array_is_full(struct nfs_cache_array *array)
+{
+	return array->page_full;
+}
+
 /*
  * the caller is responsible for freeing qstr.name
  * when called by nfs_readdir_add_to_array, the strings will be freed in
  * nfs_clear_readdir_array()
  */
-static
-int nfs_readdir_make_qstr(struct qstr *string, const char *name, unsigned int len)
+static const char *nfs_readdir_copy_name(const char *name, unsigned int len)
 {
-	string->len = len;
-	string->name = kmemdup_nul(name, len, GFP_KERNEL);
-	if (string->name == NULL)
-		return -ENOMEM;
+	const char *ret = kmemdup_nul(name, len, GFP_KERNEL);
+
 	/*
 	 * Avoid a kmemleak false positive. The pointer to the name is stored
 	 * in a page cache page which kmemleak does not scan.
 	 */
-	kmemleak_not_leak(string->name);
-	string->hash = full_name_hash(NULL, name, len);
+	if (ret != NULL)
+		kmemleak_not_leak(ret);
+	return ret;
+}
+
+/*
+ * Check that the next array entry lies entirely within the page bounds
+ */
+static int nfs_readdir_array_can_expand(struct nfs_cache_array *array)
+{
+	struct nfs_cache_array_entry *cache_entry;
+
+	if (array->page_full)
+		return -ENOSPC;
+	cache_entry = &array->array[array->size + 1];
+	if ((char *)cache_entry - (char *)array > PAGE_SIZE) {
+		array->page_full = 1;
+		return -ENOSPC;
+	}
 	return 0;
 }
 
 static
 int nfs_readdir_add_to_array(struct nfs_entry *entry, struct page *page)
 {
-	struct nfs_cache_array *array = kmap(page);
+	struct nfs_cache_array *array;
 	struct nfs_cache_array_entry *cache_entry;
+	const char *name;
 	int ret;
 
-	cache_entry = &array->array[array->size];
+	name = nfs_readdir_copy_name(entry->name, entry->len);
+	if (!name)
+		return -ENOMEM;
 
-	/* Check that this entry lies within the page bounds */
-	ret = -ENOSPC;
-	if ((char *)&cache_entry[1] - (char *)page_address(page) > PAGE_SIZE)
+	array = kmap_atomic(page);
+	ret = nfs_readdir_array_can_expand(array);
+	if (ret) {
+		kfree(name);
 		goto out;
+	}
 
+	cache_entry = &array->array[array->size];
 	cache_entry->cookie = entry->prev_cookie;
 	cache_entry->ino = entry->ino;
 	cache_entry->d_type = entry->d_type;
-	ret = nfs_readdir_make_qstr(&cache_entry->string, entry->name, entry->len);
-	if (ret)
-		goto out;
+	cache_entry->name_len = entry->len;
+	cache_entry->name = name;
 	array->last_cookie = entry->cookie;
+	if (array->last_cookie <= cache_entry->cookie)
+		array->cookies_are_ordered = 0;
 	array->size++;
 	if (entry->eof != 0)
-		array->eof_index = array->size;
+		nfs_readdir_array_set_eof(array);
 out:
-	kunmap(page);
+	kunmap_atomic(array);
+	return ret;
+}
+
+static struct page *nfs_readdir_page_get_locked(struct address_space *mapping,
+						pgoff_t index, u64 last_cookie)
+{
+	struct page *page;
+
+	page = grab_cache_page(mapping, index);
+	if (page && !PageUptodate(page)) {
+		nfs_readdir_page_init_array(page, last_cookie);
+		if (invalidate_inode_pages2_range(mapping, index + 1, -1) < 0)
+			nfs_zap_mapping(mapping->host, mapping);
+		SetPageUptodate(page);
+	}
+
+	return page;
+}
+
+static u64 nfs_readdir_page_last_cookie(struct page *page)
+{
+	struct nfs_cache_array *array;
+	u64 ret;
+
+	array = kmap_atomic(page);
+	ret = array->last_cookie;
+	kunmap_atomic(array);
 	return ret;
 }
 
+static bool nfs_readdir_page_needs_filling(struct page *page)
+{
+	struct nfs_cache_array *array;
+	bool ret;
+
+	array = kmap_atomic(page);
+	ret = !nfs_readdir_array_is_full(array);
+	kunmap_atomic(array);
+	return ret;
+}
+
+static void nfs_readdir_page_set_eof(struct page *page)
+{
+	struct nfs_cache_array *array;
+
+	array = kmap_atomic(page);
+	nfs_readdir_array_set_eof(array);
+	kunmap_atomic(array);
+}
+
+static void nfs_readdir_page_unlock_and_put(struct page *page)
+{
+	unlock_page(page);
+	put_page(page);
+}
+
+static struct page *nfs_readdir_page_get_next(struct address_space *mapping,
+					      pgoff_t index, u64 cookie)
+{
+	struct page *page;
+
+	page = nfs_readdir_page_get_locked(mapping, index, cookie);
+	if (page) {
+		if (nfs_readdir_page_last_cookie(page) == cookie)
+			return page;
+		nfs_readdir_page_unlock_and_put(page);
+	}
+	return NULL;
+}
+
 static inline
 int is_32bit_api(void)
 {
@@ -258,8 +384,8 @@ bool nfs_readdir_use_cookie(const struct file *filp)
 	return true;
 }
 
-static
-int nfs_readdir_search_for_pos(struct nfs_cache_array *array, nfs_readdir_descriptor_t *desc)
+static int nfs_readdir_search_for_pos(struct nfs_cache_array *array,
+				      struct nfs_readdir_descriptor *desc)
 {
 	loff_t diff = desc->ctx->pos - desc->current_index;
 	unsigned int index;
@@ -267,13 +393,13 @@ int nfs_readdir_search_for_pos(struct nfs_cache_array *array, nfs_readdir_descri
 	if (diff < 0)
 		goto out_eof;
 	if (diff >= array->size) {
-		if (array->eof_index >= 0)
+		if (array->page_is_eof)
 			goto out_eof;
 		return -EAGAIN;
 	}
 
 	index = (unsigned int)diff;
-	*desc->dir_cookie = array->array[index].cookie;
+	desc->dir_cookie = array->array[index].cookie;
 	desc->cache_entry_index = index;
 	return 0;
 out_eof:
@@ -290,41 +416,55 @@ nfs_readdir_inode_mapping_valid(struct nfs_inode *nfsi)
 	return !test_bit(NFS_INO_INVALIDATING, &nfsi->flags);
 }
 
-static
-int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_descriptor_t *desc)
+static bool nfs_readdir_array_cookie_in_range(struct nfs_cache_array *array,
+					      u64 cookie)
+{
+	if (!array->cookies_are_ordered)
+		return true;
+	/* Optimisation for monotonically increasing cookies */
+	if (cookie >= array->last_cookie)
+		return false;
+	if (array->size && cookie < array->array[0].cookie)
+		return false;
+	return true;
+}
+
+static int nfs_readdir_search_for_cookie(struct nfs_cache_array *array,
+					 struct nfs_readdir_descriptor *desc)
 {
 	int i;
 	loff_t new_pos;
 	int status = -EAGAIN;
 
+	if (!nfs_readdir_array_cookie_in_range(array, desc->dir_cookie))
+		goto check_eof;
+
 	for (i = 0; i < array->size; i++) {
-		if (array->array[i].cookie == *desc->dir_cookie) {
+		if (array->array[i].cookie == desc->dir_cookie) {
 			struct nfs_inode *nfsi = NFS_I(file_inode(desc->file));
-			struct nfs_open_dir_context *ctx = desc->file->private_data;
 
 			new_pos = desc->current_index + i;
-			if (ctx->attr_gencount != nfsi->attr_gencount ||
+			if (desc->attr_gencount != nfsi->attr_gencount ||
 			    !nfs_readdir_inode_mapping_valid(nfsi)) {
-				ctx->duped = 0;
-				ctx->attr_gencount = nfsi->attr_gencount;
+				desc->duped = 0;
+				desc->attr_gencount = nfsi->attr_gencount;
 			} else if (new_pos < desc->prev_index) {
-				if (ctx->duped > 0
-				    && ctx->dup_cookie == *desc->dir_cookie) {
+				if (desc->duped > 0
+				    && desc->dup_cookie == desc->dir_cookie) {
 					if (printk_ratelimit()) {
 						pr_notice("NFS: directory %pD2 contains a readdir loop."
 								"Please contact your server vendor.  "
-								"The file: %.*s has duplicate cookie %llu\n",
-								desc->file, array->array[i].string.len,
-								array->array[i].string.name, *desc->dir_cookie);
+								"The file: %s has duplicate cookie %llu\n",
+								desc->file, array->array[i].name, desc->dir_cookie);
 					}
 					status = -ELOOP;
 					goto out;
 				}
-				ctx->dup_cookie = *desc->dir_cookie;
-				ctx->duped = -1;
+				desc->dup_cookie = desc->dir_cookie;
+				desc->duped = -1;
 			}
 			if (nfs_readdir_use_cookie(desc->file))
-				desc->ctx->pos = *desc->dir_cookie;
+				desc->ctx->pos = desc->dir_cookie;
 			else
 				desc->ctx->pos = new_pos;
 			desc->prev_index = new_pos;
@@ -332,24 +472,24 @@ int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_des
 			return 0;
 		}
 	}
-	if (array->eof_index >= 0) {
+check_eof:
+	if (array->page_is_eof) {
 		status = -EBADCOOKIE;
-		if (*desc->dir_cookie == array->last_cookie)
+		if (desc->dir_cookie == array->last_cookie)
 			desc->eof = true;
 	}
 out:
 	return status;
 }
 
-static
-int nfs_readdir_search_array(nfs_readdir_descriptor_t *desc)
+static int nfs_readdir_search_array(struct nfs_readdir_descriptor *desc)
 {
 	struct nfs_cache_array *array;
 	int status;
 
-	array = kmap(desc->page);
+	array = kmap_atomic(desc->page);
 
-	if (*desc->dir_cookie == 0)
+	if (desc->dir_cookie == 0)
 		status = nfs_readdir_search_for_pos(array, desc);
 	else
 		status = nfs_readdir_search_for_cookie(array, desc);
@@ -359,17 +499,29 @@ int nfs_readdir_search_array(nfs_readdir_descriptor_t *desc)
 		desc->current_index += array->size;
 		desc->page_index++;
 	}
-	kunmap(desc->page);
+	kunmap_atomic(array);
 	return status;
 }
 
 /* Fill a page with xdr information before transferring to the cache page */
-static
-int nfs_readdir_xdr_filler(struct page **pages, nfs_readdir_descriptor_t *desc,
-			struct nfs_entry *entry, struct file *file, struct inode *inode)
+static int nfs_readdir_xdr_filler(struct nfs_readdir_descriptor *desc,
+				  __be32 *verf, u64 cookie,
+				  struct page **pages, size_t bufsize,
+				  __be32 *verf_res)
 {
-	struct nfs_open_dir_context *ctx = file->private_data;
-	const struct cred *cred = ctx->cred;
+	struct inode *inode = file_inode(desc->file);
+	struct nfs_readdir_arg arg = {
+		.dentry = file_dentry(desc->file),
+		.cred = desc->file->f_cred,
+		.verf = verf,
+		.cookie = cookie,
+		.pages = pages,
+		.page_len = bufsize,
+		.plus = desc->plus,
+	};
+	struct nfs_readdir_res res = {
+		.verf = verf_res,
+	};
 	unsigned long	timestamp, gencount;
 	int		error;
 
@@ -377,14 +529,13 @@ int nfs_readdir_xdr_filler(struct page **pages, nfs_readdir_descriptor_t *desc,
 	timestamp = jiffies;
 	gencount = nfs_inc_attr_generation_counter();
 	desc->dir_verifier = nfs_save_change_attribute(inode);
-	error = NFS_PROTO(inode)->readdir(file_dentry(file), cred, entry->cookie, pages,
-					  NFS_SERVER(inode)->dtsize, desc->plus);
+	error = NFS_PROTO(inode)->readdir(&arg, &res);
 	if (error < 0) {
 		/* We requested READDIRPLUS, but the server doesn't grok it */
 		if (error == -ENOTSUPP && desc->plus) {
 			NFS_SERVER(inode)->caps &= ~NFS_CAP_READDIRPLUS;
 			clear_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
-			desc->plus = false;
+			desc->plus = arg.plus = false;
 			goto again;
 		}
 		goto error;
@@ -395,7 +546,7 @@ error:
 	return error;
 }
 
-static int xdr_decode(nfs_readdir_descriptor_t *desc,
+static int xdr_decode(struct nfs_readdir_descriptor *desc,
 		      struct nfs_entry *entry, struct xdr_stream *xdr)
 {
 	struct inode *inode = file_inode(desc->file);
@@ -557,24 +708,23 @@ out:
 }
 
 /* Perform conversion from xdr to cache array */
-static
-int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *entry,
-				struct page **xdr_pages, struct page *page, unsigned int buflen)
-{
+static int nfs_readdir_page_filler(struct nfs_readdir_descriptor *desc,
+				   struct nfs_entry *entry,
+				   struct page **xdr_pages,
+				   unsigned int buflen,
+				   struct page **arrays,
+				   size_t narrays)
+{
+	struct address_space *mapping = desc->file->f_mapping;
 	struct xdr_stream stream;
 	struct xdr_buf buf;
-	struct page *scratch;
-	struct nfs_cache_array *array;
-	unsigned int count = 0;
+	struct page *scratch, *new, *page = *arrays;
 	int status;
 
 	scratch = alloc_page(GFP_KERNEL);
 	if (scratch == NULL)
 		return -ENOMEM;
 
-	if (buflen == 0)
-		goto out_nopages;
-
 	xdr_init_decode_pages(&stream, &buf, xdr_pages, buflen);
 	xdr_set_scratch_page(&stream, scratch);
 
@@ -583,209 +733,238 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en
 			entry->label->len = NFS4_MAXLABELLEN;
 
 		status = xdr_decode(desc, entry, &stream);
-		if (status != 0) {
-			if (status == -EAGAIN)
-				status = 0;
+		if (status != 0)
 			break;
-		}
-
-		count++;
 
 		if (desc->plus)
 			nfs_prime_dcache(file_dentry(desc->file), entry,
 					desc->dir_verifier);
 
 		status = nfs_readdir_add_to_array(entry, page);
-		if (status != 0)
-			break;
-	} while (!entry->eof);
+		if (status != -ENOSPC)
+			continue;
 
-out_nopages:
-	if (count == 0 || (status == -EBADCOOKIE && entry->eof != 0)) {
-		array = kmap(page);
-		array->eof_index = array->size;
+		if (page->mapping != mapping) {
+			if (!--narrays)
+				break;
+			new = nfs_readdir_page_array_alloc(entry->prev_cookie,
+							   GFP_KERNEL);
+			if (!new)
+				break;
+			arrays++;
+			*arrays = page = new;
+		} else {
+			new = nfs_readdir_page_get_next(mapping,
+							page->index + 1,
+							entry->prev_cookie);
+			if (!new)
+				break;
+			if (page != *arrays)
+				nfs_readdir_page_unlock_and_put(page);
+			page = new;
+		}
+		status = nfs_readdir_add_to_array(entry, page);
+	} while (!status && !entry->eof);
+
+	switch (status) {
+	case -EBADCOOKIE:
+		if (entry->eof) {
+			nfs_readdir_page_set_eof(page);
+			status = 0;
+		}
+		break;
+	case -ENOSPC:
+	case -EAGAIN:
 		status = 0;
-		kunmap(page);
+		break;
 	}
 
+	if (page != *arrays)
+		nfs_readdir_page_unlock_and_put(page);
+
 	put_page(scratch);
 	return status;
 }
 
-static
-void nfs_readdir_free_pages(struct page **pages, unsigned int npages)
+static void nfs_readdir_free_pages(struct page **pages, size_t npages)
 {
-	unsigned int i;
-	for (i = 0; i < npages; i++)
-		put_page(pages[i]);
+	while (npages--)
+		put_page(pages[npages]);
+	kfree(pages);
 }
 
 /*
  * nfs_readdir_alloc_pages() will allocate pages that must be freed with a call
  * to nfs_readdir_free_pages()
  */
-static
-int nfs_readdir_alloc_pages(struct page **pages, unsigned int npages)
+static struct page **nfs_readdir_alloc_pages(size_t npages)
 {
-	unsigned int i;
+	struct page **pages;
+	size_t i;
 
+	pages = kmalloc_array(npages, sizeof(*pages), GFP_KERNEL);
+	if (!pages)
+		return NULL;
 	for (i = 0; i < npages; i++) {
 		struct page *page = alloc_page(GFP_KERNEL);
 		if (page == NULL)
 			goto out_freepages;
 		pages[i] = page;
 	}
-	return 0;
+	return pages;
 
 out_freepages:
 	nfs_readdir_free_pages(pages, i);
-	return -ENOMEM;
+	return NULL;
 }
 
-static
-int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page, struct inode *inode)
+static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor *desc,
+				    __be32 *verf_arg, __be32 *verf_res,
+				    struct page **arrays, size_t narrays)
 {
-	struct page *pages[NFS_MAX_READDIR_PAGES];
-	struct nfs_entry entry;
-	struct file	*file = desc->file;
-	struct nfs_cache_array *array;
+	struct page **pages;
+	struct page *page = *arrays;
+	struct nfs_entry *entry;
+	size_t array_size;
+	struct inode *inode = file_inode(desc->file);
+	size_t dtsize = NFS_SERVER(inode)->dtsize;
 	int status = -ENOMEM;
-	unsigned int array_size = ARRAY_SIZE(pages);
-
-	nfs_readdir_init_array(page);
 
-	entry.prev_cookie = 0;
-	entry.cookie = desc->last_cookie;
-	entry.eof = 0;
-	entry.fh = nfs_alloc_fhandle();
-	entry.fattr = nfs_alloc_fattr();
-	entry.server = NFS_SERVER(inode);
-	if (entry.fh == NULL || entry.fattr == NULL)
+	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+	if (!entry)
+		return -ENOMEM;
+	entry->cookie = nfs_readdir_page_last_cookie(page);
+	entry->fh = nfs_alloc_fhandle();
+	entry->fattr = nfs_alloc_fattr();
+	entry->server = NFS_SERVER(inode);
+	if (entry->fh == NULL || entry->fattr == NULL)
 		goto out;
 
-	entry.label = nfs4_label_alloc(NFS_SERVER(inode), GFP_NOWAIT);
-	if (IS_ERR(entry.label)) {
-		status = PTR_ERR(entry.label);
+	entry->label = nfs4_label_alloc(NFS_SERVER(inode), GFP_NOWAIT);
+	if (IS_ERR(entry->label)) {
+		status = PTR_ERR(entry->label);
 		goto out;
 	}
 
-	array = kmap(page);
+	array_size = (dtsize + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	pages = nfs_readdir_alloc_pages(array_size);
+	if (!pages)
+		goto out_release_label;
 
-	status = nfs_readdir_alloc_pages(pages, array_size);
-	if (status < 0)
-		goto out_release_array;
 	do {
 		unsigned int pglen;
-		status = nfs_readdir_xdr_filler(pages, desc, &entry, file, inode);
-
+		status = nfs_readdir_xdr_filler(desc, verf_arg, entry->cookie,
+						pages, dtsize,
+						verf_res);
 		if (status < 0)
 			break;
+
 		pglen = status;
-		status = nfs_readdir_page_filler(desc, &entry, pages, page, pglen);
-		if (status < 0) {
-			if (status == -ENOSPC)
-				status = 0;
+		if (pglen == 0) {
+			nfs_readdir_page_set_eof(page);
 			break;
 		}
-	} while (array->eof_index < 0);
+
+		status = nfs_readdir_page_filler(desc, entry, pages, pglen,
+						 arrays, narrays);
+	} while (!status && nfs_readdir_page_needs_filling(page));
 
 	nfs_readdir_free_pages(pages, array_size);
-out_release_array:
-	kunmap(page);
-	nfs4_label_free(entry.label);
+out_release_label:
+	nfs4_label_free(entry->label);
 out:
-	nfs_free_fattr(entry.fattr);
-	nfs_free_fhandle(entry.fh);
+	nfs_free_fattr(entry->fattr);
+	nfs_free_fhandle(entry->fh);
+	kfree(entry);
 	return status;
 }
 
-/*
- * Now we cache directories properly, by converting xdr information
- * to an array that can be used for lookups later.  This results in
- * fewer cache pages, since we can store more information on each page.
- * We only need to convert from xdr once so future lookups are much simpler
- */
-static
-int nfs_readdir_filler(void *data, struct page* page)
+static void nfs_readdir_page_put(struct nfs_readdir_descriptor *desc)
 {
-	nfs_readdir_descriptor_t *desc = data;
-	struct inode	*inode = file_inode(desc->file);
-	int ret;
-
-	ret = nfs_readdir_xdr_to_array(desc, page, inode);
-	if (ret < 0)
-		goto error;
-	SetPageUptodate(page);
-
-	if (invalidate_inode_pages2_range(inode->i_mapping, page->index + 1, -1) < 0) {
-		/* Should never happen */
-		nfs_zap_mapping(inode, inode->i_mapping);
-	}
-	unlock_page(page);
-	return 0;
- error:
-	nfs_readdir_clear_array(page);
-	unlock_page(page);
-	return ret;
+	put_page(desc->page);
+	desc->page = NULL;
 }
 
-static
-void cache_page_release(nfs_readdir_descriptor_t *desc)
+static void
+nfs_readdir_page_unlock_and_put_cached(struct nfs_readdir_descriptor *desc)
 {
-	put_page(desc->page);
-	desc->page = NULL;
+	unlock_page(desc->page);
+	nfs_readdir_page_put(desc);
 }
 
-static
-struct page *get_cache_page(nfs_readdir_descriptor_t *desc)
+static struct page *
+nfs_readdir_page_get_cached(struct nfs_readdir_descriptor *desc)
 {
-	return read_cache_page(desc->file->f_mapping, desc->page_index,
-			nfs_readdir_filler, desc);
+	return nfs_readdir_page_get_locked(desc->file->f_mapping,
+					   desc->page_index,
+					   desc->last_cookie);
 }
 
 /*
  * Returns 0 if desc->dir_cookie was found on page desc->page_index
  * and locks the page to prevent removal from the page cache.
  */
-static
-int find_and_lock_cache_page(nfs_readdir_descriptor_t *desc)
+static int find_and_lock_cache_page(struct nfs_readdir_descriptor *desc)
 {
 	struct inode *inode = file_inode(desc->file);
 	struct nfs_inode *nfsi = NFS_I(inode);
+	__be32 verf[NFS_DIR_VERIFIER_SIZE];
 	int res;
 
-	desc->page = get_cache_page(desc);
-	if (IS_ERR(desc->page))
-		return PTR_ERR(desc->page);
-	res = lock_page_killable(desc->page);
-	if (res != 0)
-		goto error;
-	res = -EAGAIN;
-	if (desc->page->mapping != NULL) {
-		res = nfs_readdir_search_array(desc);
-		if (res == 0) {
-			nfsi->page_index = desc->page_index;
-			return 0;
+	desc->page = nfs_readdir_page_get_cached(desc);
+	if (!desc->page)
+		return -ENOMEM;
+	if (nfs_readdir_page_needs_filling(desc->page)) {
+		res = nfs_readdir_xdr_to_array(desc, nfsi->cookieverf, verf,
+					       &desc->page, 1);
+		if (res < 0) {
+			nfs_readdir_page_unlock_and_put_cached(desc);
+			if (res == -EBADCOOKIE || res == -ENOTSYNC) {
+				invalidate_inode_pages2(desc->file->f_mapping);
+				desc->page_index = 0;
+				return -EAGAIN;
+			}
+			return res;
 		}
+		memcpy(nfsi->cookieverf, verf, sizeof(nfsi->cookieverf));
 	}
-	unlock_page(desc->page);
-error:
-	cache_page_release(desc);
+	res = nfs_readdir_search_array(desc);
+	if (res == 0) {
+		nfsi->page_index = desc->page_index;
+		return 0;
+	}
+	nfs_readdir_page_unlock_and_put_cached(desc);
 	return res;
 }
 
+static bool nfs_readdir_dont_search_cache(struct nfs_readdir_descriptor *desc)
+{
+	struct address_space *mapping = desc->file->f_mapping;
+	struct inode *dir = file_inode(desc->file);
+	unsigned int dtsize = NFS_SERVER(dir)->dtsize;
+	loff_t size = i_size_read(dir);
+
+	/*
+	 * Default to uncached readdir if the page cache is empty, and
+	 * we're looking for a non-zero cookie in a large directory.
+	 */
+	return desc->dir_cookie != 0 && mapping->nrpages == 0 && size > dtsize;
+}
+
 /* Search for desc->dir_cookie from the beginning of the page cache */
-static inline
-int readdir_search_pagecache(nfs_readdir_descriptor_t *desc)
+static int readdir_search_pagecache(struct nfs_readdir_descriptor *desc)
 {
 	int res;
 
-	if (desc->page_index == 0) {
-		desc->current_index = 0;
-		desc->prev_index = 0;
-		desc->last_cookie = 0;
-	}
+	if (nfs_readdir_dont_search_cache(desc))
+		return -EBADCOOKIE;
+
 	do {
+		if (desc->page_index == 0) {
+			desc->current_index = 0;
+			desc->prev_index = 0;
+			desc->last_cookie = 0;
+		}
 		res = find_and_lock_cache_page(desc);
 	} while (res == -EAGAIN);
 	return res;
@@ -794,43 +973,41 @@ int readdir_search_pagecache(nfs_readdir_descriptor_t *desc)
 /*
  * Once we've found the start of the dirent within a page: fill 'er up...
  */
-static 
-int nfs_do_filldir(nfs_readdir_descriptor_t *desc)
+static void nfs_do_filldir(struct nfs_readdir_descriptor *desc)
 {
 	struct file	*file = desc->file;
-	int i = 0;
-	int res = 0;
-	struct nfs_cache_array *array = NULL;
-	struct nfs_open_dir_context *ctx = file->private_data;
+	struct nfs_inode *nfsi = NFS_I(file_inode(file));
+	struct nfs_cache_array *array;
+	unsigned int i = 0;
 
 	array = kmap(desc->page);
 	for (i = desc->cache_entry_index; i < array->size; i++) {
 		struct nfs_cache_array_entry *ent;
 
 		ent = &array->array[i];
-		if (!dir_emit(desc->ctx, ent->string.name, ent->string.len,
+		if (!dir_emit(desc->ctx, ent->name, ent->name_len,
 		    nfs_compat_user_ino64(ent->ino), ent->d_type)) {
 			desc->eof = true;
 			break;
 		}
+		memcpy(desc->verf, nfsi->cookieverf, sizeof(desc->verf));
 		if (i < (array->size-1))
-			*desc->dir_cookie = array->array[i+1].cookie;
+			desc->dir_cookie = array->array[i+1].cookie;
 		else
-			*desc->dir_cookie = array->last_cookie;
+			desc->dir_cookie = array->last_cookie;
 		if (nfs_readdir_use_cookie(file))
-			desc->ctx->pos = *desc->dir_cookie;
+			desc->ctx->pos = desc->dir_cookie;
 		else
 			desc->ctx->pos++;
-		if (ctx->duped != 0)
-			ctx->duped = 1;
+		if (desc->duped != 0)
+			desc->duped = 1;
 	}
-	if (array->eof_index >= 0)
+	if (array->page_is_eof)
 		desc->eof = true;
 
 	kunmap(desc->page);
-	dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n",
-			(unsigned long long)*desc->dir_cookie, res);
-	return res;
+	dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %llu\n",
+			(unsigned long long)desc->dir_cookie);
 }
 
 /*
@@ -845,40 +1022,41 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc)
  *	 we should already have a complete representation of the
  *	 directory in the page cache by the time we get here.
  */
-static inline
-int uncached_readdir(nfs_readdir_descriptor_t *desc)
+static int uncached_readdir(struct nfs_readdir_descriptor *desc)
 {
-	struct page	*page = NULL;
-	int		status;
-	struct inode *inode = file_inode(desc->file);
-	struct nfs_open_dir_context *ctx = desc->file->private_data;
+	struct page	**arrays;
+	size_t		i, sz = 512;
+	__be32		verf[NFS_DIR_VERIFIER_SIZE];
+	int		status = -ENOMEM;
 
-	dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %Lu\n",
-			(unsigned long long)*desc->dir_cookie);
+	dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %llu\n",
+			(unsigned long long)desc->dir_cookie);
 
-	page = alloc_page(GFP_HIGHUSER);
-	if (!page) {
-		status = -ENOMEM;
+	arrays = kcalloc(sz, sizeof(*arrays), GFP_KERNEL);
+	if (!arrays)
+		goto out;
+	arrays[0] = nfs_readdir_page_array_alloc(desc->dir_cookie, GFP_KERNEL);
+	if (!arrays[0])
 		goto out;
-	}
 
 	desc->page_index = 0;
-	desc->last_cookie = *desc->dir_cookie;
-	desc->page = page;
-	ctx->duped = 0;
-
-	status = nfs_readdir_xdr_to_array(desc, page, inode);
-	if (status < 0)
-		goto out_release;
-
-	status = nfs_do_filldir(desc);
-
- out_release:
-	nfs_readdir_clear_array(desc->page);
-	cache_page_release(desc);
- out:
-	dfprintk(DIRCACHE, "NFS: %s: returns %d\n",
-			__func__, status);
+	desc->last_cookie = desc->dir_cookie;
+	desc->duped = 0;
+
+	status = nfs_readdir_xdr_to_array(desc, desc->verf, verf, arrays, sz);
+
+	for (i = 0; !desc->eof && i < sz && arrays[i]; i++) {
+		desc->page = arrays[i];
+		nfs_do_filldir(desc);
+	}
+	desc->page = NULL;
+
+
+	for (i = 0; i < sz && arrays[i]; i++)
+		nfs_readdir_page_array_free(arrays[i]);
+out:
+	kfree(arrays);
+	dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __func__, status);
 	return status;
 }
 
@@ -891,14 +1069,8 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
 	struct dentry	*dentry = file_dentry(file);
 	struct inode	*inode = d_inode(dentry);
 	struct nfs_open_dir_context *dir_ctx = file->private_data;
-	nfs_readdir_descriptor_t my_desc = {
-		.file = file,
-		.ctx = ctx,
-		.dir_cookie = &dir_ctx->dir_cookie,
-		.plus = nfs_use_readdirplus(inode, ctx),
-	},
-			*desc = &my_desc;
-	int res = 0;
+	struct nfs_readdir_descriptor *desc;
+	int res;
 
 	dfprintk(FILE, "NFS: readdir(%pD2) starting at cookie %llu\n",
 			file, (long long)ctx->pos);
@@ -910,10 +1082,27 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
 	 * to either find the entry with the appropriate number or
 	 * revalidate the cookie.
 	 */
-	if (ctx->pos == 0 || nfs_attribute_cache_expired(inode))
+	if (ctx->pos == 0 || nfs_attribute_cache_expired(inode)) {
 		res = nfs_revalidate_mapping(inode, file->f_mapping);
-	if (res < 0)
+		if (res < 0)
+			goto out;
+	}
+
+	res = -ENOMEM;
+	desc = kzalloc(sizeof(*desc), GFP_KERNEL);
+	if (!desc)
 		goto out;
+	desc->file = file;
+	desc->ctx = ctx;
+	desc->plus = nfs_use_readdirplus(inode, ctx);
+
+	spin_lock(&file->f_lock);
+	desc->dir_cookie = dir_ctx->dir_cookie;
+	desc->dup_cookie = dir_ctx->dup_cookie;
+	desc->duped = dir_ctx->duped;
+	desc->attr_gencount = dir_ctx->attr_gencount;
+	memcpy(desc->verf, dir_ctx->verf, sizeof(desc->verf));
+	spin_unlock(&file->f_lock);
 
 	do {
 		res = readdir_search_pagecache(desc);
@@ -921,11 +1110,13 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
 		if (res == -EBADCOOKIE) {
 			res = 0;
 			/* This means either end of directory */
-			if (*desc->dir_cookie && !desc->eof) {
+			if (desc->dir_cookie && !desc->eof) {
 				/* Or that the server has 'lost' a cookie */
 				res = uncached_readdir(desc);
 				if (res == 0)
 					continue;
+				if (res == -EBADCOOKIE || res == -ENOTSYNC)
+					res = 0;
 			}
 			break;
 		}
@@ -940,15 +1131,21 @@ static int nfs_readdir(struct file *file, struct dir_context *ctx)
 		if (res < 0)
 			break;
 
-		res = nfs_do_filldir(desc);
-		unlock_page(desc->page);
-		cache_page_release(desc);
-		if (res < 0)
-			break;
+		nfs_do_filldir(desc);
+		nfs_readdir_page_unlock_and_put_cached(desc);
 	} while (!desc->eof);
+
+	spin_lock(&file->f_lock);
+	dir_ctx->dir_cookie = desc->dir_cookie;
+	dir_ctx->dup_cookie = desc->dup_cookie;
+	dir_ctx->duped = desc->duped;
+	dir_ctx->attr_gencount = desc->attr_gencount;
+	memcpy(dir_ctx->verf, desc->verf, sizeof(dir_ctx->verf));
+	spin_unlock(&file->f_lock);
+
+	kfree(desc);
+
 out:
-	if (res > 0)
-		res = 0;
 	dfprintk(FILE, "NFS: readdir(%pD2) returns %d\n", file, res);
 	return res;
 }
@@ -984,6 +1181,8 @@ static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence)
 			dir_ctx->dir_cookie = offset;
 		else
 			dir_ctx->dir_cookie = 0;
+		if (offset == 0)
+			memset(dir_ctx->verf, 0, sizeof(dir_ctx->verf));
 		dir_ctx->duped = 0;
 	}
 	spin_unlock(&filp->f_lock);
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 4252ce633533..872112bffcab 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -740,16 +740,12 @@ ff_layout_choose_ds_for_read(struct pnfs_layout_segment *lseg,
 	struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg);
 	struct nfs4_ff_layout_mirror *mirror;
 	struct nfs4_pnfs_ds *ds;
-	bool fail_return = false;
 	u32 idx;
 
 	/* mirrors are initially sorted by efficiency */
 	for (idx = start_idx; idx < fls->mirror_array_cnt; idx++) {
-		if (idx+1 == fls->mirror_array_cnt)
-			fail_return = !check_device;
-
 		mirror = FF_LAYOUT_COMP(lseg, idx);
-		ds = nfs4_ff_layout_prepare_ds(lseg, mirror, fail_return);
+		ds = nfs4_ff_layout_prepare_ds(lseg, mirror, false);
 		if (!ds)
 			continue;
 
@@ -1056,7 +1052,7 @@ static void ff_layout_resend_pnfs_read(struct nfs_pgio_header *hdr)
 	u32 idx = hdr->pgio_mirror_idx + 1;
 	u32 new_idx = 0;
 
-	if (ff_layout_choose_any_ds_for_read(hdr->lseg, idx + 1, &new_idx))
+	if (ff_layout_choose_any_ds_for_read(hdr->lseg, idx, &new_idx))
 		ff_layout_send_layouterror(hdr->lseg);
 	else
 		pnfs_error_mark_layout_for_return(hdr->inode, hdr->lseg);
@@ -2284,7 +2280,6 @@ ff_layout_encode_netaddr(struct xdr_stream *xdr, struct nfs4_pnfs_ds_addr *da)
 	struct sockaddr *sap = (struct sockaddr *)&da->da_addr;
 	char portbuf[RPCBIND_MAXUADDRPLEN];
 	char addrbuf[RPCBIND_MAXUADDRLEN];
-	char *netid;
 	unsigned short port;
 	int len, netid_len;
 	__be32 *p;
@@ -2294,18 +2289,13 @@ ff_layout_encode_netaddr(struct xdr_stream *xdr, struct nfs4_pnfs_ds_addr *da)
 		if (ff_layout_ntop4(sap, addrbuf, sizeof(addrbuf)) == 0)
 			return;
 		port = ntohs(((struct sockaddr_in *)sap)->sin_port);
-		netid = "tcp";
-		netid_len = 3;
 		break;
 	case AF_INET6:
 		if (ff_layout_ntop6_noscopeid(sap, addrbuf, sizeof(addrbuf)) == 0)
 			return;
 		port = ntohs(((struct sockaddr_in6 *)sap)->sin6_port);
-		netid = "tcp6";
-		netid_len = 4;
 		break;
 	default:
-		/* we only support tcp and tcp6 */
 		WARN_ON_ONCE(1);
 		return;
 	}
@@ -2313,8 +2303,9 @@ ff_layout_encode_netaddr(struct xdr_stream *xdr, struct nfs4_pnfs_ds_addr *da)
 	snprintf(portbuf, sizeof(portbuf), ".%u.%u", port >> 8, port & 0xff);
 	len = strlcat(addrbuf, portbuf, sizeof(addrbuf));
 
+	netid_len = strlen(da->da_netid);
 	p = xdr_reserve_space(xdr, 4 + netid_len);
-	xdr_encode_opaque(p, netid, netid_len);
+	xdr_encode_opaque(p, da->da_netid, netid_len);
 
 	p = xdr_reserve_space(xdr, 4 + len);
 	xdr_encode_opaque(p, addrbuf, len);
diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c
index 29ec8b09a52d..06894bcdea2d 100644
--- a/fs/nfs/fs_context.c
+++ b/fs/nfs/fs_context.c
@@ -510,13 +510,12 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
 		ctx->nfs_server.protocol = XPRT_TRANSPORT_UDP;
 		break;
 	case Opt_tcp:
-		ctx->flags |= NFS_MOUNT_TCP;
-		ctx->nfs_server.protocol = XPRT_TRANSPORT_TCP;
-		break;
 	case Opt_rdma:
 		ctx->flags |= NFS_MOUNT_TCP; /* for side protocols */
-		ctx->nfs_server.protocol = XPRT_TRANSPORT_RDMA;
-		xprt_load_transport(param->key);
+		ret = xprt_find_transport_ident(param->key);
+		if (ret < 0)
+			goto out_bad_transport;
+		ctx->nfs_server.protocol = ret;
 		break;
 	case Opt_acl:
 		if (result.negated)
@@ -670,11 +669,13 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
 		case Opt_xprt_rdma:
 			/* vector side protocols to TCP */
 			ctx->flags |= NFS_MOUNT_TCP;
-			ctx->nfs_server.protocol = XPRT_TRANSPORT_RDMA;
-			xprt_load_transport(param->string);
+			ret = xprt_find_transport_ident(param->string);
+			if (ret < 0)
+				goto out_bad_transport;
+			ctx->nfs_server.protocol = ret;
 			break;
 		default:
-			return nfs_invalf(fc, "NFS: Unrecognized transport protocol");
+			goto out_bad_transport;
 		}
 
 		ctx->protofamily = protofamily;
@@ -697,7 +698,7 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
 			break;
 		case Opt_xprt_rdma: /* not used for side protocols */
 		default:
-			return nfs_invalf(fc, "NFS: Unrecognized transport protocol");
+			goto out_bad_transport;
 		}
 		ctx->mountfamily = mountfamily;
 		break;
@@ -787,6 +788,8 @@ out_invalid_address:
 	return nfs_invalf(fc, "NFS: Bad IP address specified");
 out_of_bounds:
 	return nfs_invalf(fc, "NFS: Value for '%s' out of range", param->key);
+out_bad_transport:
+	return nfs_invalf(fc, "NFS: Unrecognized transport protocol");
 }
 
 /*
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index aa6493905bbe..522aa10a1a3e 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -229,7 +229,6 @@ static void nfs_zap_caches_locked(struct inode *inode)
 	nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
 	nfsi->attrtimeo_timestamp = jiffies;
 
-	memset(NFS_I(inode)->cookieverf, 0, sizeof(NFS_I(inode)->cookieverf));
 	if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) {
 		nfs_set_cache_invalid(inode, NFS_INO_INVALID_ATTR
 					| NFS_INO_INVALID_DATA
@@ -1237,7 +1236,6 @@ EXPORT_SYMBOL_GPL(nfs_revalidate_inode);
 
 static int nfs_invalidate_mapping(struct inode *inode, struct address_space *mapping)
 {
-	struct nfs_inode *nfsi = NFS_I(inode);
 	int ret;
 
 	if (mapping->nrpages != 0) {
@@ -1250,11 +1248,6 @@ static int nfs_invalidate_mapping(struct inode *inode, struct address_space *map
 		if (ret < 0)
 			return ret;
 	}
-	if (S_ISDIR(inode->i_mode)) {
-		spin_lock(&inode->i_lock);
-		memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
-		spin_unlock(&inode->i_lock);
-	}
 	nfs_inc_stats(inode, NFSIOS_DATAINVALIDATE);
 	nfs_fscache_wait_on_invalidate(inode);
 
@@ -2180,7 +2173,7 @@ static int nfsiod_start(void)
 {
 	struct workqueue_struct *wq;
 	dprintk("RPC:       creating workqueue nfsiod\n");
-	wq = alloc_workqueue("nfsiod", WQ_MEM_RECLAIM, 0);
+	wq = alloc_workqueue("nfsiod", WQ_MEM_RECLAIM | WQ_UNBOUND, 0);
 	if (wq == NULL)
 		return -ENOMEM;
 	nfsiod_workqueue = wq;
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 6673a77884d9..b840d0a91c9d 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -56,12 +56,6 @@ static inline bool nfs_lookup_is_soft_revalidate(const struct dentry *dentry)
 #define NFS_UNSPEC_RETRANS	(UINT_MAX)
 #define NFS_UNSPEC_TIMEO	(UINT_MAX)
 
-/*
- * Maximum number of pages that readdir can use for creating
- * a vmapped array of pages.
- */
-#define NFS_MAX_READDIR_PAGES 8
-
 struct nfs_client_initdata {
 	unsigned long init_flags;
 	const char *hostname;			/* Hostname of the server */
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index f6676af37d5d..7fba7711e6b3 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -34,6 +34,7 @@
  * Declare the space requirements for NFS arguments and replies as
  * number of 32bit-words
  */
+#define NFS_pagepad_sz		(1) /* Page padding */
 #define NFS_fhandle_sz		(8)
 #define NFS_sattr_sz		(8)
 #define NFS_filename_sz		(1+(NFS2_MAXNAMLEN>>2))
@@ -56,11 +57,11 @@
 
 #define NFS_attrstat_sz		(1+NFS_fattr_sz)
 #define NFS_diropres_sz		(1+NFS_fhandle_sz+NFS_fattr_sz)
-#define NFS_readlinkres_sz	(2+1)
-#define NFS_readres_sz		(1+NFS_fattr_sz+1+1)
+#define NFS_readlinkres_sz	(2+NFS_pagepad_sz)
+#define NFS_readres_sz		(1+NFS_fattr_sz+1+NFS_pagepad_sz)
 #define NFS_writeres_sz         (NFS_attrstat_sz)
 #define NFS_stat_sz		(1)
-#define NFS_readdirres_sz	(1+1)
+#define NFS_readdirres_sz	(1+NFS_pagepad_sz)
 #define NFS_statfsres_sz	(1+NFS_info_sz)
 
 static int nfs_stat_to_errno(enum nfs_stat);
@@ -592,8 +593,8 @@ static void nfs2_xdr_enc_readlinkargs(struct rpc_rqst *req,
 	const struct nfs_readlinkargs *args = data;
 
 	encode_fhandle(xdr, args->fh);
-	rpc_prepare_reply_pages(req, args->pages, args->pgbase,
-				args->pglen, NFS_readlinkres_sz);
+	rpc_prepare_reply_pages(req, args->pages, args->pgbase, args->pglen,
+				NFS_readlinkres_sz - NFS_pagepad_sz);
 }
 
 /*
@@ -628,8 +629,8 @@ static void nfs2_xdr_enc_readargs(struct rpc_rqst *req,
 	const struct nfs_pgio_args *args = data;
 
 	encode_readargs(xdr, args);
-	rpc_prepare_reply_pages(req, args->pages, args->pgbase,
-				args->count, NFS_readres_sz);
+	rpc_prepare_reply_pages(req, args->pages, args->pgbase, args->count,
+				NFS_readres_sz - NFS_pagepad_sz);
 	req->rq_rcv_buf.flags |= XDRBUF_READ;
 }
 
@@ -786,8 +787,8 @@ static void nfs2_xdr_enc_readdirargs(struct rpc_rqst *req,
 	const struct nfs_readdirargs *args = data;
 
 	encode_readdirargs(xdr, args);
-	rpc_prepare_reply_pages(req, args->pages, 0,
-				args->count, NFS_readdirres_sz);
+	rpc_prepare_reply_pages(req, args->pages, 0, args->count,
+				NFS_readdirres_sz - NFS_pagepad_sz);
 }
 
 /*
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index 2397ceedba8a..5c4e23abc345 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -154,14 +154,14 @@ nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
 }
 
 static int
-nfs3_proc_lookup(struct inode *dir, struct dentry *dentry,
-		 struct nfs_fh *fhandle, struct nfs_fattr *fattr,
-		 struct nfs4_label *label)
+__nfs3_proc_lookup(struct inode *dir, const char *name, size_t len,
+		   struct nfs_fh *fhandle, struct nfs_fattr *fattr,
+		   unsigned short task_flags)
 {
 	struct nfs3_diropargs	arg = {
 		.fh		= NFS_FH(dir),
-		.name		= dentry->d_name.name,
-		.len		= dentry->d_name.len
+		.name		= name,
+		.len		= len
 	};
 	struct nfs3_diropres	res = {
 		.fh		= fhandle,
@@ -173,17 +173,11 @@ nfs3_proc_lookup(struct inode *dir, struct dentry *dentry,
 		.rpc_resp	= &res,
 	};
 	int			status;
-	unsigned short task_flags = 0;
-
-	/* Is this is an attribute revalidation, subject to softreval? */
-	if (nfs_lookup_is_soft_revalidate(dentry))
-		task_flags |= RPC_TASK_TIMEOUT;
 
 	res.dir_attr = nfs_alloc_fattr();
 	if (res.dir_attr == NULL)
 		return -ENOMEM;
 
-	dprintk("NFS call  lookup %pd2\n", dentry);
 	nfs_fattr_init(fattr);
 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, task_flags);
 	nfs_refresh_inode(dir, res.dir_attr);
@@ -198,6 +192,37 @@ nfs3_proc_lookup(struct inode *dir, struct dentry *dentry,
 	return status;
 }
 
+static int
+nfs3_proc_lookup(struct inode *dir, struct dentry *dentry,
+		 struct nfs_fh *fhandle, struct nfs_fattr *fattr,
+		 struct nfs4_label *label)
+{
+	unsigned short task_flags = 0;
+
+	/* Is this is an attribute revalidation, subject to softreval? */
+	if (nfs_lookup_is_soft_revalidate(dentry))
+		task_flags |= RPC_TASK_TIMEOUT;
+
+	dprintk("NFS call  lookup %pd2\n", dentry);
+	return __nfs3_proc_lookup(dir, dentry->d_name.name,
+				  dentry->d_name.len, fhandle, fattr,
+				  task_flags);
+}
+
+static int nfs3_proc_lookupp(struct inode *inode, struct nfs_fh *fhandle,
+			     struct nfs_fattr *fattr, struct nfs4_label *label)
+{
+	const char dotdot[] = "..";
+	const size_t len = strlen(dotdot);
+	unsigned short task_flags = 0;
+
+	if (NFS_SERVER(inode)->flags & NFS_MOUNT_SOFTREVAL)
+		task_flags |= RPC_TASK_TIMEOUT;
+
+	return __nfs3_proc_lookup(inode, dotdot, len, fhandle, fattr,
+				  task_flags);
+}
+
 static int nfs3_proc_access(struct inode *inode, struct nfs_access_entry *entry)
 {
 	struct nfs3_accessargs	arg = {
@@ -637,37 +662,36 @@ out:
  * Also note that this implementation handles both plain readdir and
  * readdirplus.
  */
-static int
-nfs3_proc_readdir(struct dentry *dentry, const struct cred *cred,
-		  u64 cookie, struct page **pages, unsigned int count, bool plus)
+static int nfs3_proc_readdir(struct nfs_readdir_arg *nr_arg,
+			     struct nfs_readdir_res *nr_res)
 {
-	struct inode		*dir = d_inode(dentry);
-	__be32			*verf = NFS_I(dir)->cookieverf;
+	struct inode		*dir = d_inode(nr_arg->dentry);
 	struct nfs3_readdirargs	arg = {
 		.fh		= NFS_FH(dir),
-		.cookie		= cookie,
-		.verf		= {verf[0], verf[1]},
-		.plus		= plus,
-		.count		= count,
-		.pages		= pages
+		.cookie		= nr_arg->cookie,
+		.plus		= nr_arg->plus,
+		.count		= nr_arg->page_len,
+		.pages		= nr_arg->pages
 	};
 	struct nfs3_readdirres	res = {
-		.verf		= verf,
-		.plus		= plus
+		.verf		= nr_res->verf,
+		.plus		= nr_arg->plus,
 	};
 	struct rpc_message	msg = {
 		.rpc_proc	= &nfs3_procedures[NFS3PROC_READDIR],
 		.rpc_argp	= &arg,
 		.rpc_resp	= &res,
-		.rpc_cred	= cred,
+		.rpc_cred	= nr_arg->cred,
 	};
 	int status = -ENOMEM;
 
-	if (plus)
+	if (nr_arg->plus)
 		msg.rpc_proc = &nfs3_procedures[NFS3PROC_READDIRPLUS];
+	if (arg.cookie)
+		memcpy(arg.verf, nr_arg->verf, sizeof(arg.verf));
 
-	dprintk("NFS call  readdir%s %d\n",
-			plus? "plus" : "", (unsigned int) cookie);
+	dprintk("NFS call  readdir%s %llu\n", nr_arg->plus ? "plus" : "",
+		(unsigned long long)nr_arg->cookie);
 
 	res.dir_attr = nfs_alloc_fattr();
 	if (res.dir_attr == NULL)
@@ -680,8 +704,8 @@ nfs3_proc_readdir(struct dentry *dentry, const struct cred *cred,
 
 	nfs_free_fattr(res.dir_attr);
 out:
-	dprintk("NFS reply readdir%s: %d\n",
-			plus? "plus" : "", status);
+	dprintk("NFS reply readdir%s: %d\n", nr_arg->plus ? "plus" : "",
+		status);
 	return status;
 }
 
@@ -1004,6 +1028,7 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
 	.getattr	= nfs3_proc_getattr,
 	.setattr	= nfs3_proc_setattr,
 	.lookup		= nfs3_proc_lookup,
+	.lookupp	= nfs3_proc_lookupp,
 	.access		= nfs3_proc_access,
 	.readlink	= nfs3_proc_readlink,
 	.create		= nfs3_proc_create,
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 69971f6c840d..ca10072644ff 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -33,6 +33,7 @@
  * Declare the space requirements for NFS arguments and replies as
  * number of 32bit-words
  */
+#define NFS3_pagepad_sz		(1) /* Page padding */
 #define NFS3_fhandle_sz		(1+16)
 #define NFS3_fh_sz		(NFS3_fhandle_sz)	/* shorthand */
 #define NFS3_sattr_sz		(15)
@@ -69,13 +70,13 @@
 #define NFS3_removeres_sz	(NFS3_setattrres_sz)
 #define NFS3_lookupres_sz	(1+NFS3_fh_sz+(2 * NFS3_post_op_attr_sz))
 #define NFS3_accessres_sz	(1+NFS3_post_op_attr_sz+1)
-#define NFS3_readlinkres_sz	(1+NFS3_post_op_attr_sz+1+1)
-#define NFS3_readres_sz		(1+NFS3_post_op_attr_sz+3+1)
+#define NFS3_readlinkres_sz	(1+NFS3_post_op_attr_sz+1+NFS3_pagepad_sz)
+#define NFS3_readres_sz		(1+NFS3_post_op_attr_sz+3+NFS3_pagepad_sz)
 #define NFS3_writeres_sz	(1+NFS3_wcc_data_sz+4)
 #define NFS3_createres_sz	(1+NFS3_fh_sz+NFS3_post_op_attr_sz+NFS3_wcc_data_sz)
 #define NFS3_renameres_sz	(1+(2 * NFS3_wcc_data_sz))
 #define NFS3_linkres_sz		(1+NFS3_post_op_attr_sz+NFS3_wcc_data_sz)
-#define NFS3_readdirres_sz	(1+NFS3_post_op_attr_sz+2+1)
+#define NFS3_readdirres_sz	(1+NFS3_post_op_attr_sz+2+NFS3_pagepad_sz)
 #define NFS3_fsstatres_sz	(1+NFS3_post_op_attr_sz+13)
 #define NFS3_fsinfores_sz	(1+NFS3_post_op_attr_sz+12)
 #define NFS3_pathconfres_sz	(1+NFS3_post_op_attr_sz+6)
@@ -85,7 +86,8 @@
 #define ACL3_setaclargs_sz	(NFS3_fh_sz+1+ \
 				XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE))
 #define ACL3_getaclres_sz	(1+NFS3_post_op_attr_sz+1+ \
-				XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE)+1)
+				XDR_QUADLEN(NFS_ACL_INLINE_BUFSIZE)+\
+				NFS3_pagepad_sz)
 #define ACL3_setaclres_sz	(1+NFS3_post_op_attr_sz)
 
 static int nfs3_stat_to_errno(enum nfs_stat);
@@ -909,8 +911,8 @@ static void nfs3_xdr_enc_readlink3args(struct rpc_rqst *req,
 	const struct nfs3_readlinkargs *args = data;
 
 	encode_nfs_fh3(xdr, args->fh);
-	rpc_prepare_reply_pages(req, args->pages, args->pgbase,
-				args->pglen, NFS3_readlinkres_sz);
+	rpc_prepare_reply_pages(req, args->pages, args->pgbase, args->pglen,
+				NFS3_readlinkres_sz - NFS3_pagepad_sz);
 }
 
 /*
@@ -939,7 +941,8 @@ static void nfs3_xdr_enc_read3args(struct rpc_rqst *req,
 				   const void *data)
 {
 	const struct nfs_pgio_args *args = data;
-	unsigned int replen = args->replen ? args->replen : NFS3_readres_sz;
+	unsigned int replen = args->replen ? args->replen :
+					     NFS3_readres_sz - NFS3_pagepad_sz;
 
 	encode_read3args(xdr, args);
 	rpc_prepare_reply_pages(req, args->pages, args->pgbase,
@@ -1239,8 +1242,8 @@ static void nfs3_xdr_enc_readdir3args(struct rpc_rqst *req,
 	const struct nfs3_readdirargs *args = data;
 
 	encode_readdir3args(xdr, args);
-	rpc_prepare_reply_pages(req, args->pages, 0,
-				args->count, NFS3_readdirres_sz);
+	rpc_prepare_reply_pages(req, args->pages, 0, args->count,
+				NFS3_readdirres_sz - NFS3_pagepad_sz);
 }
 
 /*
@@ -1281,8 +1284,8 @@ static void nfs3_xdr_enc_readdirplus3args(struct rpc_rqst *req,
 	const struct nfs3_readdirargs *args = data;
 
 	encode_readdirplus3args(xdr, args);
-	rpc_prepare_reply_pages(req, args->pages, 0,
-				args->count, NFS3_readdirres_sz);
+	rpc_prepare_reply_pages(req, args->pages, 0, args->count,
+				NFS3_readdirres_sz - NFS3_pagepad_sz);
 }
 
 /*
@@ -1328,7 +1331,7 @@ static void nfs3_xdr_enc_getacl3args(struct rpc_rqst *req,
 	if (args->mask & (NFS_ACL | NFS_DFACL)) {
 		rpc_prepare_reply_pages(req, args->pages, 0,
 					NFSACL_MAXPAGES << PAGE_SHIFT,
-					ACL3_getaclres_sz);
+					ACL3_getaclres_sz - NFS3_pagepad_sz);
 		req->rq_rcv_buf.flags |= XDRBUF_SPARSE_PAGES;
 	}
 }
@@ -1648,7 +1651,7 @@ static int nfs3_xdr_dec_read3res(struct rpc_rqst *req, struct xdr_stream *xdr,
 	result->op_status = status;
 	if (status != NFS3_OK)
 		goto out_status;
-	result->replen = 4 + ((xdr_stream_pos(xdr) - pos) >> 2);
+	result->replen = 3 + ((xdr_stream_pos(xdr) - pos) >> 2);
 	error = decode_read3resok(xdr, result);
 out:
 	return error;
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 4fc61e3d098d..f3fd935620fc 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -1173,14 +1173,12 @@ static int _nfs42_proc_setxattr(struct inode *inode, const char *name,
 }
 
 static ssize_t _nfs42_proc_getxattr(struct inode *inode, const char *name,
-				void *buf, size_t buflen)
+				void *buf, size_t buflen, struct page **pages,
+				size_t plen)
 {
 	struct nfs_server *server = NFS_SERVER(inode);
-	struct page *pages[NFS4XATTR_MAXPAGES] = {};
 	struct nfs42_getxattrargs arg = {
 		.fh		= NFS_FH(inode),
-		.xattr_pages	= pages,
-		.xattr_len	= buflen,
 		.xattr_name	= name,
 	};
 	struct nfs42_getxattrres res;
@@ -1189,7 +1187,10 @@ static ssize_t _nfs42_proc_getxattr(struct inode *inode, const char *name,
 		.rpc_argp	= &arg,
 		.rpc_resp	= &res,
 	};
-	int ret, np;
+	ssize_t ret;
+
+	arg.xattr_len = plen;
+	arg.xattr_pages = pages;
 
 	ret = nfs4_call_sync(server->client, server, &msg, &arg.seq_args,
 	    &res.seq_res, 0);
@@ -1214,10 +1215,6 @@ static ssize_t _nfs42_proc_getxattr(struct inode *inode, const char *name,
 		_copy_from_pages(buf, pages, 0, res.xattr_len);
 	}
 
-	np = DIV_ROUND_UP(res.xattr_len, PAGE_SIZE);
-	while (--np >= 0)
-		__free_page(pages[np]);
-
 	return res.xattr_len;
 }
 
@@ -1292,16 +1289,45 @@ ssize_t nfs42_proc_getxattr(struct inode *inode, const char *name,
 			      void *buf, size_t buflen)
 {
 	struct nfs4_exception exception = { };
-	ssize_t err;
+	ssize_t err, np, i;
+	struct page **pages;
 
+	np = nfs_page_array_len(0, buflen ?: XATTR_SIZE_MAX);
+	pages = kmalloc_array(np, sizeof(*pages), GFP_KERNEL);
+	if (!pages)
+		return -ENOMEM;
+
+	for (i = 0; i < np; i++) {
+		pages[i] = alloc_page(GFP_KERNEL);
+		if (!pages[i]) {
+			np = i + 1;
+			err = -ENOMEM;
+			goto out;
+		}
+	}
+
+	/*
+	 * The GETXATTR op has no length field in the call, and the
+	 * xattr data is at the end of the reply.
+	 *
+	 * There is no downside in using the page-aligned length. It will
+	 * allow receiving and caching xattrs that are too large for the
+	 * caller but still fit in the page-rounded value.
+	 */
 	do {
-		err = _nfs42_proc_getxattr(inode, name, buf, buflen);
+		err = _nfs42_proc_getxattr(inode, name, buf, buflen,
+			pages, np * PAGE_SIZE);
 		if (err >= 0)
 			break;
 		err = nfs4_handle_exception(NFS_SERVER(inode), err,
 				&exception);
 	} while (exception.retry);
 
+out:
+	while (--np >= 0)
+		__free_page(pages[np]);
+	kfree(pages);
+
 	return err;
 }
 
diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
index ea7dd8cbfac9..c8bad735e4c1 100644
--- a/fs/nfs/nfs42xdr.c
+++ b/fs/nfs/nfs42xdr.c
@@ -191,7 +191,7 @@
 
 #define encode_getxattr_maxsz   (op_encode_hdr_maxsz + 1 + \
 				 nfs4_xattr_name_maxsz)
-#define decode_getxattr_maxsz   (op_decode_hdr_maxsz + 1 + 1)
+#define decode_getxattr_maxsz   (op_decode_hdr_maxsz + 1 + pagepad_maxsz)
 #define encode_setxattr_maxsz   (op_encode_hdr_maxsz + \
 				 1 + nfs4_xattr_name_maxsz + 1)
 #define decode_setxattr_maxsz   (op_decode_hdr_maxsz + decode_change_info_maxsz)
@@ -489,6 +489,12 @@ static int decode_getxattr(struct xdr_stream *xdr,
 		return -EIO;
 
 	len = be32_to_cpup(p);
+
+	/*
+	 * Only check against the page length here. The actual
+	 * requested length may be smaller, but that is only
+	 * checked against after possibly caching a valid reply.
+	 */
 	if (len > req->rq_rcv_buf.page_len)
 		return -ERANGE;
 
@@ -1019,56 +1025,80 @@ static int decode_deallocate(struct xdr_stream *xdr, struct nfs42_falloc_res *re
 	return decode_op_hdr(xdr, OP_DEALLOCATE);
 }
 
-static int decode_read_plus_data(struct xdr_stream *xdr, struct nfs_pgio_res *res,
-				 uint32_t *eof)
+static int decode_read_plus_data(struct xdr_stream *xdr,
+				 struct nfs_pgio_args *args,
+				 struct nfs_pgio_res *res)
 {
 	uint32_t count, recvd;
 	uint64_t offset;
 	__be32 *p;
 
 	p = xdr_inline_decode(xdr, 8 + 4);
-	if (unlikely(!p))
-		return -EIO;
+	if (!p)
+		return 1;
 
 	p = xdr_decode_hyper(p, &offset);
 	count = be32_to_cpup(p);
-	recvd = xdr_align_data(xdr, res->count, count);
-	res->count += recvd;
-
-	if (count > recvd) {
-		dprintk("NFS: server cheating in read reply: "
-				"count %u > recvd %u\n", count, recvd);
-		*eof = 0;
+	recvd = xdr_align_data(xdr, res->count, xdr_align_size(count));
+	if (recvd > count)
+		recvd = count;
+	if (res->count + recvd > args->count) {
+		if (args->count > res->count)
+			res->count += args->count - res->count;
 		return 1;
 	}
-
+	res->count += recvd;
+	if (count > recvd)
+		return 1;
 	return 0;
 }
 
-static int decode_read_plus_hole(struct xdr_stream *xdr, struct nfs_pgio_res *res,
-				 uint32_t *eof)
+static int decode_read_plus_hole(struct xdr_stream *xdr,
+				 struct nfs_pgio_args *args,
+				 struct nfs_pgio_res *res, uint32_t *eof)
 {
 	uint64_t offset, length, recvd;
 	__be32 *p;
 
 	p = xdr_inline_decode(xdr, 8 + 8);
-	if (unlikely(!p))
-		return -EIO;
+	if (!p)
+		return 1;
 
 	p = xdr_decode_hyper(p, &offset);
 	p = xdr_decode_hyper(p, &length);
+	if (offset != args->offset + res->count) {
+		/* Server returned an out-of-sequence extent */
+		if (offset > args->offset + res->count ||
+		    offset + length < args->offset + res->count) {
+			dprintk("NFS: server returned out of sequence extent: "
+				"offset/size = %llu/%llu != expected %llu\n",
+				(unsigned long long)offset,
+				(unsigned long long)length,
+				(unsigned long long)(args->offset +
+						     res->count));
+			return 1;
+		}
+		length -= args->offset + res->count - offset;
+	}
+	if (length + res->count > args->count) {
+		*eof = 0;
+		if (unlikely(res->count >= args->count))
+			return 1;
+		length = args->count - res->count;
+	}
 	recvd = xdr_expand_hole(xdr, res->count, length);
 	res->count += recvd;
 
-	if (recvd < length) {
-		*eof = 0;
+	if (recvd < length)
 		return 1;
-	}
 	return 0;
 }
 
 static int decode_read_plus(struct xdr_stream *xdr, struct nfs_pgio_res *res)
 {
+	struct nfs_pgio_header *hdr =
+		container_of(res, struct nfs_pgio_header, res);
+	struct nfs_pgio_args *args = &hdr->args;
 	uint32_t eof, segments, type;
 	int status, i;
 	__be32 *p;
@@ -1081,6 +1111,7 @@ static int decode_read_plus(struct xdr_stream *xdr, struct nfs_pgio_res *res)
 	if (unlikely(!p))
 		return -EIO;
 
+	res->count = 0;
 	eof = be32_to_cpup(p++);
 	segments = be32_to_cpup(p++);
 	if (segments == 0)
@@ -1088,26 +1119,31 @@ static int decode_read_plus(struct xdr_stream *xdr, struct nfs_pgio_res *res)
 
 	for (i = 0; i < segments; i++) {
 		p = xdr_inline_decode(xdr, 4);
-		if (unlikely(!p))
-			return -EIO;
+		if (!p)
+			goto early_out;
 
 		type = be32_to_cpup(p++);
 		if (type == NFS4_CONTENT_DATA)
-			status = decode_read_plus_data(xdr, res, &eof);
+			status = decode_read_plus_data(xdr, args, res);
 		else if (type == NFS4_CONTENT_HOLE)
-			status = decode_read_plus_hole(xdr, res, &eof);
+			status = decode_read_plus_hole(xdr, args, res, &eof);
 		else
 			return -EINVAL;
 
 		if (status < 0)
 			return status;
 		if (status > 0)
-			break;
+			goto early_out;
 	}
 
 out:
 	res->eof = eof;
 	return 0;
+early_out:
+	if (unlikely(!i))
+		return -EIO;
+	res->eof = 0;
+	return 0;
 }
 
 static int decode_seek(struct xdr_stream *xdr, struct nfs42_seek_res *res)
@@ -1476,18 +1512,16 @@ static void nfs4_xdr_enc_getxattr(struct rpc_rqst *req, struct xdr_stream *xdr,
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
-	size_t plen;
+	uint32_t replen;
 
 	encode_compound_hdr(xdr, req, &hdr);
 	encode_sequence(xdr, &args->seq_args, &hdr);
 	encode_putfh(xdr, args->fh, &hdr);
+	replen = hdr.replen + op_decode_hdr_maxsz + 1;
 	encode_getxattr(xdr, args->xattr_name, &hdr);
 
-	plen = args->xattr_len ? args->xattr_len : XATTR_SIZE_MAX;
-
-	rpc_prepare_reply_pages(req, args->xattr_pages, 0, plen,
-	    hdr.replen);
-	req->rq_rcv_buf.flags |= XDRBUF_SPARSE_PAGES;
+	rpc_prepare_reply_pages(req, args->xattr_pages, 0, args->xattr_len,
+				replen);
 
 	encode_nops(&hdr);
 }
@@ -1520,14 +1554,15 @@ static void nfs4_xdr_enc_listxattrs(struct rpc_rqst *req,
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
+	uint32_t replen;
 
 	encode_compound_hdr(xdr, req, &hdr);
 	encode_sequence(xdr, &args->seq_args, &hdr);
 	encode_putfh(xdr, args->fh, &hdr);
+	replen = hdr.replen + op_decode_hdr_maxsz + 2 + 1;
 	encode_listxattrs(xdr, args, &hdr);
 
-	rpc_prepare_reply_pages(req, args->xattr_pages, 0, args->count,
-	    hdr.replen);
+	rpc_prepare_reply_pages(req, args->xattr_pages, 0, args->count, replen);
 
 	encode_nops(&hdr);
 }
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index be7915c861ce..86acffe7335c 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -1153,7 +1153,7 @@ struct nfs_server *nfs4_create_server(struct fs_context *fc)
 	if (!server)
 		return ERR_PTR(-ENOMEM);
 
-	server->cred = get_cred(current_cred());
+	server->cred = get_cred(fc->cred);
 
 	auth_probe = ctx->auth_info.flavor_len < 1;
 
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index e89468678ae1..0ce04e0e5d82 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -184,6 +184,8 @@ static int nfs4_map_errors(int err)
 		return -EPROTONOSUPPORT;
 	case -NFS4ERR_FILE_OPEN:
 		return -EBUSY;
+	case -NFS4ERR_NOT_SAME:
+		return -ENOTSYNC;
 	default:
 		dprintk("%s could not handle NFSv4 error %d\n",
 				__func__, -err);
@@ -4397,6 +4399,10 @@ static int _nfs4_proc_lookupp(struct inode *inode,
 		.rpc_argp = &args,
 		.rpc_resp = &res,
 	};
+	unsigned short task_flags = 0;
+
+	if (NFS_SERVER(inode)->flags & NFS_MOUNT_SOFTREVAL)
+		task_flags |= RPC_TASK_TIMEOUT;
 
 	args.bitmask = nfs4_bitmask(server, label);
 
@@ -4404,7 +4410,7 @@ static int _nfs4_proc_lookupp(struct inode *inode,
 
 	dprintk("NFS call  lookupp ino=0x%lx\n", inode->i_ino);
 	status = nfs4_call_sync(clnt, server, &msg, &args.seq_args,
-				&res.seq_res, 0);
+				&res.seq_res, task_flags);
 	dprintk("NFS reply lookupp: %d\n", status);
 	return status;
 }
@@ -4957,35 +4963,40 @@ static int nfs4_proc_mkdir(struct inode *dir, struct dentry *dentry,
 	return err;
 }
 
-static int _nfs4_proc_readdir(struct dentry *dentry, const struct cred *cred,
-		u64 cookie, struct page **pages, unsigned int count, bool plus)
+static int _nfs4_proc_readdir(struct nfs_readdir_arg *nr_arg,
+			      struct nfs_readdir_res *nr_res)
 {
-	struct inode		*dir = d_inode(dentry);
+	struct inode		*dir = d_inode(nr_arg->dentry);
+	struct nfs_server	*server = NFS_SERVER(dir);
 	struct nfs4_readdir_arg args = {
 		.fh = NFS_FH(dir),
-		.pages = pages,
+		.pages = nr_arg->pages,
 		.pgbase = 0,
-		.count = count,
-		.bitmask = NFS_SERVER(d_inode(dentry))->attr_bitmask,
-		.plus = plus,
+		.count = nr_arg->page_len,
+		.plus = nr_arg->plus,
 	};
 	struct nfs4_readdir_res res;
 	struct rpc_message msg = {
 		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READDIR],
 		.rpc_argp = &args,
 		.rpc_resp = &res,
-		.rpc_cred = cred,
+		.rpc_cred = nr_arg->cred,
 	};
 	int			status;
 
-	dprintk("%s: dentry = %pd2, cookie = %Lu\n", __func__,
-			dentry,
-			(unsigned long long)cookie);
-	nfs4_setup_readdir(cookie, NFS_I(dir)->cookieverf, dentry, &args);
+	dprintk("%s: dentry = %pd2, cookie = %llu\n", __func__,
+		nr_arg->dentry, (unsigned long long)nr_arg->cookie);
+	if (!(server->caps & NFS_CAP_SECURITY_LABEL))
+		args.bitmask = server->attr_bitmask_nl;
+	else
+		args.bitmask = server->attr_bitmask;
+
+	nfs4_setup_readdir(nr_arg->cookie, nr_arg->verf, nr_arg->dentry, &args);
 	res.pgbase = args.pgbase;
-	status = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &msg, &args.seq_args, &res.seq_res, 0);
+	status = nfs4_call_sync(server->client, server, &msg, &args.seq_args,
+			&res.seq_res, 0);
 	if (status >= 0) {
-		memcpy(NFS_I(dir)->cookieverf, res.verifier.data, NFS4_VERIFIER_SIZE);
+		memcpy(nr_res->verf, res.verifier.data, NFS4_VERIFIER_SIZE);
 		status += args.pgbase;
 	}
 
@@ -4995,19 +5006,18 @@ static int _nfs4_proc_readdir(struct dentry *dentry, const struct cred *cred,
 	return status;
 }
 
-static int nfs4_proc_readdir(struct dentry *dentry, const struct cred *cred,
-		u64 cookie, struct page **pages, unsigned int count, bool plus)
+static int nfs4_proc_readdir(struct nfs_readdir_arg *arg,
+			     struct nfs_readdir_res *res)
 {
 	struct nfs4_exception exception = {
 		.interruptible = true,
 	};
 	int err;
 	do {
-		err = _nfs4_proc_readdir(dentry, cred, cookie,
-				pages, count, plus);
-		trace_nfs4_readdir(d_inode(dentry), err);
-		err = nfs4_handle_exception(NFS_SERVER(d_inode(dentry)), err,
-				&exception);
+		err = _nfs4_proc_readdir(arg, res);
+		trace_nfs4_readdir(d_inode(arg->dentry), err);
+		err = nfs4_handle_exception(NFS_SERVER(d_inode(arg->dentry)),
+					    err, &exception);
 	} while (exception.retry);
 	return err;
 }
@@ -5310,17 +5320,17 @@ static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr)
 }
 
 #if defined CONFIG_NFS_V4_2 && defined CONFIG_NFS_V4_2_READ_PLUS
-static void nfs42_read_plus_support(struct nfs_server *server, struct rpc_message *msg)
+static void nfs42_read_plus_support(struct nfs_pgio_header *hdr,
+				    struct rpc_message *msg)
 {
-	if (server->caps & NFS_CAP_READ_PLUS)
+	/* Note: We don't use READ_PLUS with pNFS yet */
+	if (nfs_server_capable(hdr->inode, NFS_CAP_READ_PLUS) && !hdr->ds_clp)
 		msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ_PLUS];
-	else
-		msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
 }
 #else
-static void nfs42_read_plus_support(struct nfs_server *server, struct rpc_message *msg)
+static void nfs42_read_plus_support(struct nfs_pgio_header *hdr,
+				    struct rpc_message *msg)
 {
-	msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
 }
 #endif /* CONFIG_NFS_V4_2 */
 
@@ -5330,7 +5340,8 @@ static void nfs4_proc_read_setup(struct nfs_pgio_header *hdr,
 	hdr->timestamp   = jiffies;
 	if (!hdr->pgio_done_cb)
 		hdr->pgio_done_cb = nfs4_read_done_cb;
-	nfs42_read_plus_support(NFS_SERVER(hdr->inode), msg);
+	msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
+	nfs42_read_plus_support(hdr, msg);
 	nfs4_init_sequence(&hdr->args.seq_args, &hdr->res.seq_res, 0, 0);
 }
 
@@ -9651,6 +9662,8 @@ _nfs4_proc_getdeviceinfo(struct nfs_server *server,
 	if (res.notification != args.notify_types)
 		pdev->nocache = 1;
 
+	trace_nfs4_getdeviceinfo(server, &pdev->dev_id, status);
+
 	dprintk("<-- %s status=%d\n", __func__, status);
 
 	return status;
diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h
index b996ee23f1ba..3de425f59b3a 100644
--- a/fs/nfs/nfs4session.h
+++ b/fs/nfs/nfs4session.h
@@ -34,7 +34,7 @@ enum nfs4_slot_tbl_state {
 	NFS4_SLOT_TBL_DRAINING,
 };
 
-#define SLOT_TABLE_SZ DIV_ROUND_UP(NFS4_MAX_SLOT_TABLE, 8*sizeof(long))
+#define SLOT_TABLE_SZ DIV_ROUND_UP(NFS4_MAX_SLOT_TABLE, BITS_PER_LONG)
 struct nfs4_slot_table {
 	struct nfs4_session *session;		/* Parent session */
 	struct nfs4_slot *slots;		/* seqid per slot */
diff --git a/fs/nfs/nfs4super.c b/fs/nfs/nfs4super.c
index 93f5c1678ec2..984cc42ee54d 100644
--- a/fs/nfs/nfs4super.c
+++ b/fs/nfs/nfs4super.c
@@ -67,7 +67,7 @@ static void nfs4_evict_inode(struct inode *inode)
 	nfs_inode_evict_delegation(inode);
 	/* Note that above delegreturn would trigger pnfs return-on-close */
 	pnfs_return_layout(inode);
-	pnfs_destroy_layout(NFS_I(inode));
+	pnfs_destroy_layout_final(NFS_I(inode));
 	/* First call standard NFS clear_inode() code */
 	nfs_clear_inode(inode);
 	nfs4_xattr_cache_zap(inode);
diff --git a/fs/nfs/nfs4trace.h b/fs/nfs/nfs4trace.h
index 484c1da96dea..48d761e593fb 100644
--- a/fs/nfs/nfs4trace.h
+++ b/fs/nfs/nfs4trace.h
@@ -2189,6 +2189,81 @@ DEFINE_PNFS_LAYOUT_EVENT(pnfs_mds_fallback_write_done);
 DEFINE_PNFS_LAYOUT_EVENT(pnfs_mds_fallback_read_pagelist);
 DEFINE_PNFS_LAYOUT_EVENT(pnfs_mds_fallback_write_pagelist);
 
+DECLARE_EVENT_CLASS(nfs4_deviceid_event,
+		TP_PROTO(
+			const struct nfs_client *clp,
+			const struct nfs4_deviceid *deviceid
+		),
+
+		TP_ARGS(clp, deviceid),
+
+		TP_STRUCT__entry(
+			__string(dstaddr, clp->cl_hostname)
+			__array(unsigned char, deviceid, NFS4_DEVICEID4_SIZE)
+		),
+
+		TP_fast_assign(
+			__assign_str(dstaddr, clp->cl_hostname);
+			memcpy(__entry->deviceid, deviceid->data,
+			       NFS4_DEVICEID4_SIZE);
+		),
+
+		TP_printk(
+			"deviceid=%s, dstaddr=%s",
+			__print_hex(__entry->deviceid, NFS4_DEVICEID4_SIZE),
+			__get_str(dstaddr)
+		)
+);
+#define DEFINE_PNFS_DEVICEID_EVENT(name) \
+	DEFINE_EVENT(nfs4_deviceid_event, name, \
+			TP_PROTO(const struct nfs_client *clp, \
+				const struct nfs4_deviceid *deviceid \
+			), \
+			TP_ARGS(clp, deviceid))
+DEFINE_PNFS_DEVICEID_EVENT(nfs4_deviceid_free);
+
+DECLARE_EVENT_CLASS(nfs4_deviceid_status,
+		TP_PROTO(
+			const struct nfs_server *server,
+			const struct nfs4_deviceid *deviceid,
+			int status
+		),
+
+		TP_ARGS(server, deviceid, status),
+
+		TP_STRUCT__entry(
+			__field(dev_t, dev)
+			__field(int, status)
+			__string(dstaddr, server->nfs_client->cl_hostname)
+			__array(unsigned char, deviceid, NFS4_DEVICEID4_SIZE)
+		),
+
+		TP_fast_assign(
+			__entry->dev = server->s_dev;
+			__entry->status = status;
+			__assign_str(dstaddr, server->nfs_client->cl_hostname);
+			memcpy(__entry->deviceid, deviceid->data,
+			       NFS4_DEVICEID4_SIZE);
+		),
+
+		TP_printk(
+			"dev=%02x:%02x: deviceid=%s, dstaddr=%s, status=%d",
+			MAJOR(__entry->dev), MINOR(__entry->dev),
+			__print_hex(__entry->deviceid, NFS4_DEVICEID4_SIZE),
+			__get_str(dstaddr),
+			__entry->status
+		)
+);
+#define DEFINE_PNFS_DEVICEID_STATUS(name) \
+	DEFINE_EVENT(nfs4_deviceid_status, name, \
+			TP_PROTO(const struct nfs_server *server, \
+				const struct nfs4_deviceid *deviceid, \
+				int status \
+			), \
+			TP_ARGS(server, deviceid, status))
+DEFINE_PNFS_DEVICEID_STATUS(nfs4_getdeviceinfo);
+DEFINE_PNFS_DEVICEID_STATUS(nfs4_find_deviceid);
+
 DECLARE_EVENT_CLASS(nfs4_flexfiles_io_event,
 		TP_PROTO(
 			const struct nfs_pgio_header *hdr
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 2eabe5add344..ac6b79ee9355 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -84,6 +84,7 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
 /* lock,open owner id:
  * we currently use size 2 (u64) out of (NFS4_OPAQUE_LIMIT  >> 2)
  */
+#define pagepad_maxsz		(1)
 #define open_owner_id_maxsz	(1 + 2 + 1 + 1 + 2)
 #define lock_owner_id_maxsz	(1 + 1 + 4)
 #define decode_lockowner_maxsz	(1 + XDR_QUADLEN(IDMAP_NAMESZ))
@@ -215,14 +216,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
 				 nfs4_fattr_bitmap_maxsz)
 #define encode_read_maxsz	(op_encode_hdr_maxsz + \
 				 encode_stateid_maxsz + 3)
-#define decode_read_maxsz	(op_decode_hdr_maxsz + 2 + 1)
+#define decode_read_maxsz	(op_decode_hdr_maxsz + 2 + pagepad_maxsz)
 #define encode_readdir_maxsz	(op_encode_hdr_maxsz + \
 				 2 + encode_verifier_maxsz + 5 + \
 				nfs4_label_maxsz)
 #define decode_readdir_maxsz	(op_decode_hdr_maxsz + \
-				 decode_verifier_maxsz + 1)
+				 decode_verifier_maxsz + pagepad_maxsz)
 #define encode_readlink_maxsz	(op_encode_hdr_maxsz)
-#define decode_readlink_maxsz	(op_decode_hdr_maxsz + 1 + 1)
+#define decode_readlink_maxsz	(op_decode_hdr_maxsz + 1 + pagepad_maxsz)
 #define encode_write_maxsz	(op_encode_hdr_maxsz + \
 				 encode_stateid_maxsz + 4)
 #define decode_write_maxsz	(op_decode_hdr_maxsz + \
@@ -284,14 +285,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
 #define decode_delegreturn_maxsz (op_decode_hdr_maxsz)
 #define encode_getacl_maxsz	(encode_getattr_maxsz)
 #define decode_getacl_maxsz	(op_decode_hdr_maxsz + \
-				 nfs4_fattr_bitmap_maxsz + 1 + 1)
+				 nfs4_fattr_bitmap_maxsz + 1 + pagepad_maxsz)
 #define encode_setacl_maxsz	(op_encode_hdr_maxsz + \
 				 encode_stateid_maxsz + 3)
 #define decode_setacl_maxsz	(decode_setattr_maxsz)
 #define encode_fs_locations_maxsz \
 				(encode_getattr_maxsz)
 #define decode_fs_locations_maxsz \
-				(1)
+				(pagepad_maxsz)
 #define encode_secinfo_maxsz	(op_encode_hdr_maxsz + nfs4_name_maxsz)
 #define decode_secinfo_maxsz	(op_decode_hdr_maxsz + 1 + ((NFS_MAX_SECFLAVORS * (16 + GSS_OID_MAX_LEN)) / 4))
 
@@ -393,12 +394,13 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
 				  /* devaddr4 payload is read into page */ \
 				1 /* notification bitmap length */ + \
 				1 /* notification bitmap, word 0 */ + \
-				1 /* possible XDR padding */)
+				pagepad_maxsz /* possible XDR padding */)
 #define encode_layoutget_maxsz	(op_encode_hdr_maxsz + 10 + \
 				encode_stateid_maxsz)
 #define decode_layoutget_maxsz	(op_decode_hdr_maxsz + 8 + \
 				decode_stateid_maxsz + \
-				XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE) + 1)
+				XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE) + \
+				pagepad_maxsz)
 #define encode_layoutcommit_maxsz (op_encode_hdr_maxsz +          \
 				2 /* offset */ + \
 				2 /* length */ + \
@@ -2342,7 +2344,7 @@ static void nfs4_xdr_enc_open(struct rpc_rqst *req, struct xdr_stream *xdr,
 		encode_layoutget(xdr, args->lg_args, &hdr);
 		rpc_prepare_reply_pages(req, args->lg_args->layout.pages, 0,
 					args->lg_args->layout.pglen,
-					hdr.replen);
+					hdr.replen - pagepad_maxsz);
 	}
 	encode_nops(&hdr);
 }
@@ -2388,7 +2390,7 @@ static void nfs4_xdr_enc_open_noattr(struct rpc_rqst *req,
 		encode_layoutget(xdr, args->lg_args, &hdr);
 		rpc_prepare_reply_pages(req, args->lg_args->layout.pages, 0,
 					args->lg_args->layout.pglen,
-					hdr.replen);
+					hdr.replen - pagepad_maxsz);
 	}
 	encode_nops(&hdr);
 }
@@ -2499,7 +2501,7 @@ static void nfs4_xdr_enc_readlink(struct rpc_rqst *req, struct xdr_stream *xdr,
 	encode_readlink(xdr, args, req, &hdr);
 
 	rpc_prepare_reply_pages(req, args->pages, args->pgbase,
-				args->pglen, hdr.replen);
+				args->pglen, hdr.replen - pagepad_maxsz);
 	encode_nops(&hdr);
 }
 
@@ -2520,7 +2522,7 @@ static void nfs4_xdr_enc_readdir(struct rpc_rqst *req, struct xdr_stream *xdr,
 	encode_readdir(xdr, args, req, &hdr);
 
 	rpc_prepare_reply_pages(req, args->pages, args->pgbase,
-				args->count, hdr.replen);
+				args->count, hdr.replen - pagepad_maxsz);
 	encode_nops(&hdr);
 }
 
@@ -2541,7 +2543,7 @@ static void nfs4_xdr_enc_read(struct rpc_rqst *req, struct xdr_stream *xdr,
 	encode_read(xdr, args, &hdr);
 
 	rpc_prepare_reply_pages(req, args->pages, args->pgbase,
-				args->count, hdr.replen);
+				args->count, hdr.replen - pagepad_maxsz);
 	req->rq_rcv_buf.flags |= XDRBUF_READ;
 	encode_nops(&hdr);
 }
@@ -2588,7 +2590,7 @@ static void nfs4_xdr_enc_getacl(struct rpc_rqst *req, struct xdr_stream *xdr,
 			ARRAY_SIZE(nfs4_acl_bitmap), &hdr);
 
 	rpc_prepare_reply_pages(req, args->acl_pages, 0,
-				args->acl_len, replen + 1);
+				args->acl_len, replen);
 	encode_nops(&hdr);
 }
 
@@ -2810,7 +2812,7 @@ static void nfs4_xdr_enc_fs_locations(struct rpc_rqst *req,
 	}
 
 	rpc_prepare_reply_pages(req, (struct page **)&args->page, 0,
-				PAGE_SIZE, replen + 1);
+				PAGE_SIZE, replen);
 	encode_nops(&hdr);
 }
 
@@ -3009,15 +3011,19 @@ static void nfs4_xdr_enc_getdeviceinfo(struct rpc_rqst *req,
 	struct compound_hdr hdr = {
 		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
 	};
+	uint32_t replen;
 
 	encode_compound_hdr(xdr, req, &hdr);
 	encode_sequence(xdr, &args->seq_args, &hdr);
+
+	replen = hdr.replen + op_decode_hdr_maxsz + 2;
+
 	encode_getdeviceinfo(xdr, args, &hdr);
 
-	/* set up reply kvec. Subtract notification bitmap max size (2)
-	 * so that notification bitmap is put in xdr_buf tail */
+	/* set up reply kvec. device_addr4 opaque data is read into the
+	 * pages */
 	rpc_prepare_reply_pages(req, args->pdev->pages, args->pdev->pgbase,
-				args->pdev->pglen, hdr.replen - 2);
+				args->pdev->pglen, replen);
 	encode_nops(&hdr);
 }
 
@@ -3039,7 +3045,7 @@ static void nfs4_xdr_enc_layoutget(struct rpc_rqst *req,
 	encode_layoutget(xdr, args, &hdr);
 
 	rpc_prepare_reply_pages(req, args->layout.pages, 0,
-				args->layout.pglen, hdr.replen);
+				args->layout.pglen, hdr.replen - pagepad_maxsz);
 	encode_nops(&hdr);
 }
 
@@ -5331,11 +5337,11 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req,
 		res->acl_len = attrlen;
 
 		/* Check for receive buffer overflow */
-		if (res->acl_len > (xdr->nwords << 2) ||
+		if (res->acl_len > xdr_stream_remaining(xdr) ||
 		    res->acl_len + res->acl_data_offset > xdr->buf->page_len) {
 			res->acl_flags |= NFS4_ACL_TRUNC;
-			dprintk("NFS: acl reply: attrlen %u > page_len %u\n",
-					attrlen, xdr->nwords << 2);
+			dprintk("NFS: acl reply: attrlen %u > page_len %zu\n",
+				attrlen, xdr_stream_remaining(xdr));
 		}
 	} else
 		status = -EOPNOTSUPP;
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 0e50b9d45c32..07f59dc8cb2e 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -294,6 +294,7 @@ void
 pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo)
 {
 	struct inode *inode;
+	unsigned long i_state;
 
 	if (!lo)
 		return;
@@ -304,8 +305,12 @@ pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo)
 		if (!list_empty(&lo->plh_segs))
 			WARN_ONCE(1, "NFS: BUG unfreed layout segments.\n");
 		pnfs_detach_layout_hdr(lo);
+		i_state = inode->i_state;
 		spin_unlock(&inode->i_lock);
 		pnfs_free_layout_hdr(lo);
+		/* Notify pnfs_destroy_layout_final() that we're done */
+		if (i_state & (I_FREEING | I_CLEAR))
+			wake_up_var(lo);
 	}
 }
 
@@ -734,8 +739,7 @@ pnfs_free_lseg_list(struct list_head *free_me)
 	}
 }
 
-void
-pnfs_destroy_layout(struct nfs_inode *nfsi)
+static struct pnfs_layout_hdr *__pnfs_destroy_layout(struct nfs_inode *nfsi)
 {
 	struct pnfs_layout_hdr *lo;
 	LIST_HEAD(tmp_list);
@@ -753,9 +757,34 @@ pnfs_destroy_layout(struct nfs_inode *nfsi)
 		pnfs_put_layout_hdr(lo);
 	} else
 		spin_unlock(&nfsi->vfs_inode.i_lock);
+	return lo;
+}
+
+void pnfs_destroy_layout(struct nfs_inode *nfsi)
+{
+	__pnfs_destroy_layout(nfsi);
 }
 EXPORT_SYMBOL_GPL(pnfs_destroy_layout);
 
+static bool pnfs_layout_removed(struct nfs_inode *nfsi,
+				struct pnfs_layout_hdr *lo)
+{
+	bool ret;
+
+	spin_lock(&nfsi->vfs_inode.i_lock);
+	ret = nfsi->layout != lo;
+	spin_unlock(&nfsi->vfs_inode.i_lock);
+	return ret;
+}
+
+void pnfs_destroy_layout_final(struct nfs_inode *nfsi)
+{
+	struct pnfs_layout_hdr *lo = __pnfs_destroy_layout(nfsi);
+
+	if (lo)
+		wait_var_event(lo, pnfs_layout_removed(nfsi, lo));
+}
+
 static bool
 pnfs_layout_add_bulk_destroy_list(struct inode *inode,
 		struct list_head *layout_list)
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 2661c44c62db..bbd3de1025f2 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -51,6 +51,8 @@ struct nfs4_pnfs_ds_addr {
 	size_t			da_addrlen;
 	struct list_head	da_node;  /* nfs4_pnfs_dev_hlist dev_dslist */
 	char			*da_remotestr;	/* human readable addr+port */
+	const char		*da_netid;
+	int			da_transport;
 };
 
 struct nfs4_pnfs_ds {
@@ -266,6 +268,7 @@ struct pnfs_layout_segment *pnfs_layout_process(struct nfs4_layoutget *lgp);
 void pnfs_layoutget_free(struct nfs4_layoutget *lgp);
 void pnfs_free_lseg_list(struct list_head *tmp_list);
 void pnfs_destroy_layout(struct nfs_inode *);
+void pnfs_destroy_layout_final(struct nfs_inode *);
 void pnfs_destroy_all_layouts(struct nfs_client *);
 int pnfs_destroy_layouts_byfsid(struct nfs_client *clp,
 		struct nfs_fsid *fsid,
@@ -710,6 +713,10 @@ static inline void pnfs_destroy_layout(struct nfs_inode *nfsi)
 {
 }
 
+static inline void pnfs_destroy_layout_final(struct nfs_inode *nfsi)
+{
+}
+
 static inline struct pnfs_layout_segment *
 pnfs_get_lseg(struct pnfs_layout_segment *lseg)
 {
diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c
index 537b80d693f1..ddbbf4fcda86 100644
--- a/fs/nfs/pnfs_dev.c
+++ b/fs/nfs/pnfs_dev.c
@@ -34,6 +34,8 @@
 #include "internal.h"
 #include "pnfs.h"
 
+#include "nfs4trace.h"
+
 #define NFSDBG_FACILITY		NFSDBG_PNFS
 
 /*
@@ -192,24 +194,28 @@ nfs4_find_get_deviceid(struct nfs_server *server,
 
 	d = __nfs4_find_get_deviceid(server, id, hash);
 	if (d)
-		return d;
+		goto found;
 
 	new = nfs4_get_device_info(server, id, cred, gfp_mask);
-	if (!new)
+	if (!new) {
+		trace_nfs4_find_deviceid(server, id, -ENOENT);
 		return new;
+	}
 
 	spin_lock(&nfs4_deviceid_lock);
 	d = __nfs4_find_get_deviceid(server, id, hash);
 	if (d) {
 		spin_unlock(&nfs4_deviceid_lock);
 		server->pnfs_curr_ld->free_deviceid_node(new);
-		return d;
+	} else {
+		atomic_inc(&new->ref);
+		hlist_add_head_rcu(&new->node, &nfs4_deviceid_cache[hash]);
+		spin_unlock(&nfs4_deviceid_lock);
+		d = new;
 	}
-	hlist_add_head_rcu(&new->node, &nfs4_deviceid_cache[hash]);
-	atomic_inc(&new->ref);
-	spin_unlock(&nfs4_deviceid_lock);
-
-	return new;
+found:
+	trace_nfs4_find_deviceid(server, id, 0);
+	return d;
 }
 EXPORT_SYMBOL_GPL(nfs4_find_get_deviceid);
 
@@ -278,6 +284,7 @@ nfs4_put_deviceid_node(struct nfs4_deviceid_node *d)
 	}
 	if (!atomic_dec_and_test(&d->ref))
 		return false;
+	trace_nfs4_deviceid_free(d->nfs_client, &d->deviceid);
 	d->ld->free_deviceid_node(d);
 	return true;
 }
diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
index 679767ac258d..2efcfdd348a1 100644
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -661,6 +661,21 @@ _data_server_lookup_locked(const struct list_head *dsaddrs)
 	return NULL;
 }
 
+static struct nfs4_pnfs_ds_addr *nfs4_pnfs_ds_addr_alloc(gfp_t gfp_flags)
+{
+	struct nfs4_pnfs_ds_addr *da = kzalloc(sizeof(*da), gfp_flags);
+	if (da)
+		INIT_LIST_HEAD(&da->da_node);
+	return da;
+}
+
+static void nfs4_pnfs_ds_addr_free(struct nfs4_pnfs_ds_addr *da)
+{
+	kfree(da->da_remotestr);
+	kfree(da->da_netid);
+	kfree(da);
+}
+
 static void destroy_ds(struct nfs4_pnfs_ds *ds)
 {
 	struct nfs4_pnfs_ds_addr *da;
@@ -676,8 +691,7 @@ static void destroy_ds(struct nfs4_pnfs_ds *ds)
 				      struct nfs4_pnfs_ds_addr,
 				      da_node);
 		list_del_init(&da->da_node);
-		kfree(da->da_remotestr);
-		kfree(da);
+		nfs4_pnfs_ds_addr_free(da);
 	}
 
 	kfree(ds->ds_remotestr);
@@ -854,12 +868,17 @@ static int _nfs4_pnfs_v3_ds_connect(struct nfs_server *mds_srv,
 
 		if (!IS_ERR(clp)) {
 			struct xprt_create xprt_args = {
-				.ident = XPRT_TRANSPORT_TCP,
+				.ident = da->da_transport,
 				.net = clp->cl_net,
 				.dstaddr = (struct sockaddr *)&da->da_addr,
 				.addrlen = da->da_addrlen,
 				.servername = clp->cl_hostname,
 			};
+
+			if (da->da_transport != clp->cl_proto)
+				continue;
+			if (da->da_addr.ss_family != clp->cl_addr.ss_family)
+				continue;
 			/* Add this address as an alias */
 			rpc_clnt_add_xprt(clp->cl_rpcclient, &xprt_args,
 					rpc_clnt_test_and_add_xprt, NULL);
@@ -867,7 +886,7 @@ static int _nfs4_pnfs_v3_ds_connect(struct nfs_server *mds_srv,
 		}
 		clp = get_v3_ds_connect(mds_srv,
 				(struct sockaddr *)&da->da_addr,
-				da->da_addrlen, IPPROTO_TCP,
+				da->da_addrlen, da->da_transport,
 				timeo, retrans);
 		if (IS_ERR(clp))
 			continue;
@@ -905,7 +924,7 @@ static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv,
 
 		if (!IS_ERR(clp) && clp->cl_mvops->session_trunk) {
 			struct xprt_create xprt_args = {
-				.ident = XPRT_TRANSPORT_TCP,
+				.ident = da->da_transport,
 				.net = clp->cl_net,
 				.dstaddr = (struct sockaddr *)&da->da_addr,
 				.addrlen = da->da_addrlen,
@@ -913,17 +932,21 @@ static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv,
 			};
 			struct nfs4_add_xprt_data xprtdata = {
 				.clp = clp,
-				.cred = nfs4_get_clid_cred(clp),
 			};
 			struct rpc_add_xprt_test rpcdata = {
 				.add_xprt_test = clp->cl_mvops->session_trunk,
 				.data = &xprtdata,
 			};
 
+			if (da->da_transport != clp->cl_proto)
+				continue;
+			if (da->da_addr.ss_family != clp->cl_addr.ss_family)
+				continue;
 			/**
 			* Test this address for session trunking and
 			* add as an alias
 			*/
+			xprtdata.cred = nfs4_get_clid_cred(clp),
 			rpc_clnt_add_xprt(clp->cl_rpcclient, &xprt_args,
 					  rpc_clnt_setup_test_and_add_xprt,
 					  &rpcdata);
@@ -932,8 +955,9 @@ static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv,
 		} else {
 			clp = nfs4_set_ds_client(mds_srv,
 						(struct sockaddr *)&da->da_addr,
-						da->da_addrlen, IPPROTO_TCP,
-						timeo, retrans, minor_version);
+						da->da_addrlen,
+						da->da_transport, timeo,
+						retrans, minor_version);
 			if (IS_ERR(clp))
 				continue;
 
@@ -1021,55 +1045,26 @@ nfs4_decode_mp_ds_addr(struct net *net, struct xdr_stream *xdr, gfp_t gfp_flags)
 	struct nfs4_pnfs_ds_addr *da = NULL;
 	char *buf, *portstr;
 	__be16 port;
-	int nlen, rlen;
+	ssize_t nlen, rlen;
 	int tmp[2];
-	__be32 *p;
-	char *netid, *match_netid;
-	size_t len, match_netid_len;
+	char *netid;
+	size_t len;
 	char *startsep = "";
 	char *endsep = "";
 
 
 	/* r_netid */
-	p = xdr_inline_decode(xdr, 4);
-	if (unlikely(!p))
+	nlen = xdr_stream_decode_string_dup(xdr, &netid, XDR_MAX_NETOBJ,
+					    gfp_flags);
+	if (unlikely(nlen < 0))
 		goto out_err;
-	nlen = be32_to_cpup(p++);
-
-	p = xdr_inline_decode(xdr, nlen);
-	if (unlikely(!p))
-		goto out_err;
-
-	netid = kmalloc(nlen+1, gfp_flags);
-	if (unlikely(!netid))
-		goto out_err;
-
-	netid[nlen] = '\0';
-	memcpy(netid, p, nlen);
 
 	/* r_addr: ip/ip6addr with port in dec octets - see RFC 5665 */
-	p = xdr_inline_decode(xdr, 4);
-	if (unlikely(!p))
-		goto out_free_netid;
-	rlen = be32_to_cpup(p);
-
-	p = xdr_inline_decode(xdr, rlen);
-	if (unlikely(!p))
-		goto out_free_netid;
-
 	/* port is ".ABC.DEF", 8 chars max */
-	if (rlen > INET6_ADDRSTRLEN + IPV6_SCOPE_ID_LEN + 8) {
-		dprintk("%s: Invalid address, length %d\n", __func__,
-			rlen);
-		goto out_free_netid;
-	}
-	buf = kmalloc(rlen + 1, gfp_flags);
-	if (!buf) {
-		dprintk("%s: Not enough memory\n", __func__);
+	rlen = xdr_stream_decode_string_dup(xdr, &buf, INET6_ADDRSTRLEN +
+					    IPV6_SCOPE_ID_LEN + 8, gfp_flags);
+	if (unlikely(rlen < 0))
 		goto out_free_netid;
-	}
-	buf[rlen] = '\0';
-	memcpy(buf, p, rlen);
 
 	/* replace port '.' with '-' */
 	portstr = strrchr(buf, '.');
@@ -1089,12 +1084,10 @@ nfs4_decode_mp_ds_addr(struct net *net, struct xdr_stream *xdr, gfp_t gfp_flags)
 	}
 	*portstr = '\0';
 
-	da = kzalloc(sizeof(*da), gfp_flags);
+	da = nfs4_pnfs_ds_addr_alloc(gfp_flags);
 	if (unlikely(!da))
 		goto out_free_buf;
 
-	INIT_LIST_HEAD(&da->da_node);
-
 	if (!rpc_pton(net, buf, portstr-buf, (struct sockaddr *)&da->da_addr,
 		      sizeof(da->da_addr))) {
 		dprintk("%s: error parsing address %s\n", __func__, buf);
@@ -1109,15 +1102,11 @@ nfs4_decode_mp_ds_addr(struct net *net, struct xdr_stream *xdr, gfp_t gfp_flags)
 	case AF_INET:
 		((struct sockaddr_in *)&da->da_addr)->sin_port = port;
 		da->da_addrlen = sizeof(struct sockaddr_in);
-		match_netid = "tcp";
-		match_netid_len = 3;
 		break;
 
 	case AF_INET6:
 		((struct sockaddr_in6 *)&da->da_addr)->sin6_port = port;
 		da->da_addrlen = sizeof(struct sockaddr_in6);
-		match_netid = "tcp6";
-		match_netid_len = 4;
 		startsep = "[";
 		endsep = "]";
 		break;
@@ -1128,12 +1117,15 @@ nfs4_decode_mp_ds_addr(struct net *net, struct xdr_stream *xdr, gfp_t gfp_flags)
 		goto out_free_da;
 	}
 
-	if (nlen != match_netid_len || strncmp(netid, match_netid, nlen)) {
-		dprintk("%s: ERROR: r_netid \"%s\" != \"%s\"\n",
-			__func__, netid, match_netid);
+	da->da_transport = xprt_find_transport_ident(netid);
+	if (da->da_transport < 0) {
+		dprintk("%s: ERROR: unknown r_netid \"%s\"\n",
+			__func__, netid);
 		goto out_free_da;
 	}
 
+	da->da_netid = netid;
+
 	/* save human readable address */
 	len = strlen(startsep) + strlen(buf) + strlen(endsep) + 7;
 	da->da_remotestr = kzalloc(len, gfp_flags);
@@ -1145,7 +1137,6 @@ nfs4_decode_mp_ds_addr(struct net *net, struct xdr_stream *xdr, gfp_t gfp_flags)
 
 	dprintk("%s: Parsed DS addr %s\n", __func__, da->da_remotestr);
 	kfree(buf);
-	kfree(netid);
 	return da;
 
 out_free_da:
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 15c865cc837f..73ab7c59d3a7 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -499,26 +499,26 @@ nfs_proc_rmdir(struct inode *dir, const struct qstr *name)
  * sure it is syntactically correct; the entries itself are decoded
  * from nfs_readdir by calling the decode_entry function directly.
  */
-static int
-nfs_proc_readdir(struct dentry *dentry, const struct cred *cred,
-		 u64 cookie, struct page **pages, unsigned int count, bool plus)
+static int nfs_proc_readdir(struct nfs_readdir_arg *nr_arg,
+			    struct nfs_readdir_res *nr_res)
 {
-	struct inode		*dir = d_inode(dentry);
+	struct inode		*dir = d_inode(nr_arg->dentry);
 	struct nfs_readdirargs	arg = {
 		.fh		= NFS_FH(dir),
-		.cookie		= cookie,
-		.count		= count,
-		.pages		= pages,
+		.cookie		= nr_arg->cookie,
+		.count		= nr_arg->page_len,
+		.pages		= nr_arg->pages,
 	};
 	struct rpc_message	msg = {
 		.rpc_proc	= &nfs_procedures[NFSPROC_READDIR],
 		.rpc_argp	= &arg,
-		.rpc_cred	= cred,
+		.rpc_cred	= nr_arg->cred,
 	};
 	int			status;
 
-	dprintk("NFS call  readdir %d\n", (unsigned int)cookie);
+	dprintk("NFS call  readdir %llu\n", (unsigned long long)nr_arg->cookie);
 	status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+	nr_res->verf[0] = nr_res->verf[1] = 0;
 
 	nfs_invalidate_atime(dir);
 
diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
index d77c624c61f6..53fcbf79bdca 100644
--- a/fs/nfsd/filecache.c
+++ b/fs/nfsd/filecache.c
@@ -600,7 +600,7 @@ static struct notifier_block nfsd_file_lease_notifier = {
 static int
 nfsd_file_fsnotify_handle_event(struct fsnotify_mark *mark, u32 mask,
 				struct inode *inode, struct inode *dir,
-				const struct qstr *name)
+				const struct qstr *name, u32 cookie)
 {
 	trace_nfsd_file_fsnotify_handle_event(inode, mask);
 
diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index 5486aaca60b0..e85e13c50d6d 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -72,7 +72,7 @@ static void dnotify_recalc_inode_mask(struct fsnotify_mark *fsn_mark)
  */
 static int dnotify_handle_event(struct fsnotify_mark *inode_mark, u32 mask,
 				struct inode *inode, struct inode *dir,
-				const struct qstr *name)
+				const struct qstr *name, u32 cookie)
 {
 	struct dnotify_mark *dn_mark;
 	struct dnotify_struct *dn;
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 9167884a61ec..1192c9953620 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -268,12 +268,11 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group,
 			continue;
 
 		/*
-		 * If the event is for a child and this mark is on a parent not
+		 * If the event is on a child and this mark is on a parent not
 		 * watching children, don't send it!
 		 */
-		if (event_mask & FS_EVENT_ON_CHILD &&
-		    type == FSNOTIFY_OBJ_TYPE_INODE &&
-		     !(mark->mask & FS_EVENT_ON_CHILD))
+		if (type == FSNOTIFY_OBJ_TYPE_PARENT &&
+		    !(mark->mask & FS_EVENT_ON_CHILD))
 			continue;
 
 		marks_mask |= mark->mask;
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 8d3ad5ef2925..30d422b8c0fc 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -152,6 +152,13 @@ static bool fsnotify_event_needs_parent(struct inode *inode, struct mount *mnt,
 	if (mask & FS_ISDIR)
 		return false;
 
+	/*
+	 * All events that are possible on child can also may be reported with
+	 * parent/name info to inode/sb/mount.  Otherwise, a watching parent
+	 * could result in events reported with unexpected name info to sb/mount.
+	 */
+	BUILD_BUG_ON(FS_EVENTS_POSS_ON_CHILD & ~FS_EVENTS_POSS_TO_PARENT);
+
 	/* Did either inode/sb/mount subscribe for events with parent/name? */
 	marks_mask |= fsnotify_parent_needed_mask(inode->i_fsnotify_mask);
 	marks_mask |= fsnotify_parent_needed_mask(inode->i_sb->s_fsnotify_mask);
@@ -232,47 +239,76 @@ notify:
 }
 EXPORT_SYMBOL_GPL(__fsnotify_parent);
 
+static int fsnotify_handle_inode_event(struct fsnotify_group *group,
+				       struct fsnotify_mark *inode_mark,
+				       u32 mask, const void *data, int data_type,
+				       struct inode *dir, const struct qstr *name,
+				       u32 cookie)
+{
+	const struct path *path = fsnotify_data_path(data, data_type);
+	struct inode *inode = fsnotify_data_inode(data, data_type);
+	const struct fsnotify_ops *ops = group->ops;
+
+	if (WARN_ON_ONCE(!ops->handle_inode_event))
+		return 0;
+
+	if ((inode_mark->mask & FS_EXCL_UNLINK) &&
+	    path && d_unlinked(path->dentry))
+		return 0;
+
+	/* Check interest of this mark in case event was sent with two marks */
+	if (!(mask & inode_mark->mask & ALL_FSNOTIFY_EVENTS))
+		return 0;
+
+	return ops->handle_inode_event(inode_mark, mask, inode, dir, name, cookie);
+}
+
 static int fsnotify_handle_event(struct fsnotify_group *group, __u32 mask,
 				 const void *data, int data_type,
 				 struct inode *dir, const struct qstr *name,
 				 u32 cookie, struct fsnotify_iter_info *iter_info)
 {
 	struct fsnotify_mark *inode_mark = fsnotify_iter_inode_mark(iter_info);
-	struct fsnotify_mark *child_mark = fsnotify_iter_child_mark(iter_info);
-	struct inode *inode = fsnotify_data_inode(data, data_type);
-	const struct fsnotify_ops *ops = group->ops;
+	struct fsnotify_mark *parent_mark = fsnotify_iter_parent_mark(iter_info);
 	int ret;
 
-	if (WARN_ON_ONCE(!ops->handle_inode_event))
-		return 0;
-
 	if (WARN_ON_ONCE(fsnotify_iter_sb_mark(iter_info)) ||
 	    WARN_ON_ONCE(fsnotify_iter_vfsmount_mark(iter_info)))
 		return 0;
 
-	/*
-	 * An event can be sent on child mark iterator instead of inode mark
-	 * iterator because of other groups that have interest of this inode
-	 * and have marks on both parent and child.  We can simplify this case.
-	 */
-	if (!inode_mark) {
-		inode_mark = child_mark;
-		child_mark = NULL;
+	if (parent_mark) {
+		/*
+		 * parent_mark indicates that the parent inode is watching
+		 * children and interested in this event, which is an event
+		 * possible on child. But is *this mark* watching children and
+		 * interested in this event?
+		 */
+		if (parent_mark->mask & FS_EVENT_ON_CHILD) {
+			ret = fsnotify_handle_inode_event(group, parent_mark, mask,
+							  data, data_type, dir, name, 0);
+			if (ret)
+				return ret;
+		}
+		if (!inode_mark)
+			return 0;
+	}
+
+	if (mask & FS_EVENT_ON_CHILD) {
+		/*
+		 * Some events can be sent on both parent dir and child marks
+		 * (e.g. FS_ATTRIB).  If both parent dir and child are
+		 * watching, report the event once to parent dir with name (if
+		 * interested) and once to child without name (if interested).
+		 * The child watcher is expecting an event without a file name
+		 * and without the FS_EVENT_ON_CHILD flag.
+		 */
+		mask &= ~FS_EVENT_ON_CHILD;
 		dir = NULL;
 		name = NULL;
 	}
 
-	ret = ops->handle_inode_event(inode_mark, mask, inode, dir, name);
-	if (ret || !child_mark)
-		return ret;
-
-	/*
-	 * Some events can be sent on both parent dir and child marks
-	 * (e.g. FS_ATTRIB).  If both parent dir and child are watching,
-	 * report the event once to parent dir with name and once to child
-	 * without name.
-	 */
-	return ops->handle_inode_event(child_mark, mask, inode, NULL, NULL);
+	return fsnotify_handle_inode_event(group, inode_mark, mask, data, data_type,
+					   dir, name, cookie);
 }
 
 static int send_to_group(__u32 mask, const void *data, int data_type,
@@ -430,7 +466,7 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
 	struct fsnotify_iter_info iter_info = {};
 	struct super_block *sb;
 	struct mount *mnt = NULL;
-	struct inode *child = NULL;
+	struct inode *parent = NULL;
 	int ret = 0;
 	__u32 test_mask, marks_mask;
 
@@ -442,11 +478,10 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
 		inode = dir;
 	} else if (mask & FS_EVENT_ON_CHILD) {
 		/*
-		 * Event on child - report on TYPE_INODE to dir if it is
-		 * watching children and on TYPE_CHILD to child.
+		 * Event on child - report on TYPE_PARENT to dir if it is
+		 * watching children and on TYPE_INODE to child.
 		 */
-		child = inode;
-		inode = dir;
+		parent = dir;
 	}
 	sb = inode->i_sb;
 
@@ -460,7 +495,7 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
 	if (!sb->s_fsnotify_marks &&
 	    (!mnt || !mnt->mnt_fsnotify_marks) &&
 	    (!inode || !inode->i_fsnotify_marks) &&
-	    (!child || !child->i_fsnotify_marks))
+	    (!parent || !parent->i_fsnotify_marks))
 		return 0;
 
 	marks_mask = sb->s_fsnotify_mask;
@@ -468,8 +503,8 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
 		marks_mask |= mnt->mnt_fsnotify_mask;
 	if (inode)
 		marks_mask |= inode->i_fsnotify_mask;
-	if (child)
-		marks_mask |= child->i_fsnotify_mask;
+	if (parent)
+		marks_mask |= parent->i_fsnotify_mask;
 
 
 	/*
@@ -492,9 +527,9 @@ int fsnotify(__u32 mask, const void *data, int data_type, struct inode *dir,
 		iter_info.marks[FSNOTIFY_OBJ_TYPE_INODE] =
 			fsnotify_first_mark(&inode->i_fsnotify_marks);
 	}
-	if (child) {
-		iter_info.marks[FSNOTIFY_OBJ_TYPE_CHILD] =
-			fsnotify_first_mark(&child->i_fsnotify_marks);
+	if (parent) {
+		iter_info.marks[FSNOTIFY_OBJ_TYPE_PARENT] =
+			fsnotify_first_mark(&parent->i_fsnotify_marks);
 	}
 
 	/*
diff --git a/fs/notify/inotify/inotify.h b/fs/notify/inotify/inotify.h
index 4327d0e9c364..2007e3711916 100644
--- a/fs/notify/inotify/inotify.h
+++ b/fs/notify/inotify/inotify.h
@@ -24,11 +24,10 @@ static inline struct inotify_event_info *INOTIFY_E(struct fsnotify_event *fse)
 
 extern void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark,
 					   struct fsnotify_group *group);
-extern int inotify_handle_event(struct fsnotify_group *group, u32 mask,
-				const void *data, int data_type,
-				struct inode *dir,
-				const struct qstr *file_name, u32 cookie,
-				struct fsnotify_iter_info *iter_info);
+extern int inotify_handle_inode_event(struct fsnotify_mark *inode_mark,
+				      u32 mask, struct inode *inode,
+				      struct inode *dir,
+				      const struct qstr *name, u32 cookie);
 
 extern const struct fsnotify_ops inotify_fsnotify_ops;
 extern struct kmem_cache *inotify_inode_mark_cachep;
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index 9ddcbadc98e2..1901d799909b 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -55,25 +55,21 @@ static int inotify_merge(struct list_head *list,
 	return event_compare(last_event, event);
 }
 
-static int inotify_one_event(struct fsnotify_group *group, u32 mask,
-			     struct fsnotify_mark *inode_mark,
-			     const struct path *path,
-			     const struct qstr *file_name, u32 cookie)
+int inotify_handle_inode_event(struct fsnotify_mark *inode_mark, u32 mask,
+			       struct inode *inode, struct inode *dir,
+			       const struct qstr *name, u32 cookie)
 {
 	struct inotify_inode_mark *i_mark;
 	struct inotify_event_info *event;
 	struct fsnotify_event *fsn_event;
+	struct fsnotify_group *group = inode_mark->group;
 	int ret;
 	int len = 0;
 	int alloc_len = sizeof(struct inotify_event_info);
 	struct mem_cgroup *old_memcg;
 
-	if ((inode_mark->mask & FS_EXCL_UNLINK) &&
-	    path && d_unlinked(path->dentry))
-		return 0;
-
-	if (file_name) {
-		len = file_name->len;
+	if (name) {
+		len = name->len;
 		alloc_len += len + 1;
 	}
 
@@ -117,7 +113,7 @@ static int inotify_one_event(struct fsnotify_group *group, u32 mask,
 	event->sync_cookie = cookie;
 	event->name_len = len;
 	if (len)
-		strcpy(event->name, file_name->name);
+		strcpy(event->name, name->name);
 
 	ret = fsnotify_add_event(group, fsn_event, inotify_merge);
 	if (ret) {
@@ -131,37 +127,6 @@ static int inotify_one_event(struct fsnotify_group *group, u32 mask,
 	return 0;
 }
 
-int inotify_handle_event(struct fsnotify_group *group, u32 mask,
-			 const void *data, int data_type, struct inode *dir,
-			 const struct qstr *file_name, u32 cookie,
-			 struct fsnotify_iter_info *iter_info)
-{
-	const struct path *path = fsnotify_data_path(data, data_type);
-	struct fsnotify_mark *inode_mark = fsnotify_iter_inode_mark(iter_info);
-	struct fsnotify_mark *child_mark = fsnotify_iter_child_mark(iter_info);
-	int ret = 0;
-
-	if (WARN_ON(fsnotify_iter_vfsmount_mark(iter_info)))
-		return 0;
-
-	/*
-	 * Some events cannot be sent on both parent and child marks
-	 * (e.g. IN_CREATE).  Those events are always sent on inode_mark.
-	 * For events that are possible on both parent and child (e.g. IN_OPEN),
-	 * event is sent on inode_mark with name if the parent is watching and
-	 * is sent on child_mark without name if child is watching.
-	 * If both parent and child are watching, report the event with child's
-	 * name here and report another event without child's name below.
-	 */
-	if (inode_mark)
-		ret = inotify_one_event(group, mask, inode_mark, path,
-					file_name, cookie);
-	if (ret || !child_mark)
-		return ret;
-
-	return inotify_one_event(group, mask, child_mark, path, NULL, 0);
-}
-
 static void inotify_freeing_mark(struct fsnotify_mark *fsn_mark, struct fsnotify_group *group)
 {
 	inotify_ignored_and_remove_idr(fsn_mark, group);
@@ -227,7 +192,7 @@ static void inotify_free_mark(struct fsnotify_mark *fsn_mark)
 }
 
 const struct fsnotify_ops inotify_fsnotify_ops = {
-	.handle_event = inotify_handle_event,
+	.handle_inode_event = inotify_handle_inode_event,
 	.free_group_priv = inotify_free_group_priv,
 	.free_event = inotify_free_event,
 	.freeing_mark = inotify_freeing_mark,
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 186722ba3894..59c177011a0f 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -37,6 +37,15 @@
 
 #include <asm/ioctls.h>
 
+/*
+ * An inotify watch requires allocating an inotify_inode_mark structure as
+ * well as pinning the watched inode. Doubling the size of a VFS inode
+ * should be more than enough to cover the additional filesystem inode
+ * size increase.
+ */
+#define INOTIFY_WATCH_COST	(sizeof(struct inotify_inode_mark) + \
+				 2 * sizeof(struct inode))
+
 /* configurable via /proc/sys/fs/inotify/ */
 static int inotify_max_queued_events __read_mostly;
 
@@ -486,14 +495,10 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark,
 				    struct fsnotify_group *group)
 {
 	struct inotify_inode_mark *i_mark;
-	struct fsnotify_iter_info iter_info = { };
-
-	fsnotify_iter_set_report_type_mark(&iter_info, FSNOTIFY_OBJ_TYPE_INODE,
-					   fsn_mark);
 
 	/* Queue ignore event for the watch */
-	inotify_handle_event(group, FS_IN_IGNORED, NULL, FSNOTIFY_EVENT_NONE,
-			     NULL, NULL, 0, &iter_info);
+	inotify_handle_inode_event(fsn_mark, FS_IN_IGNORED, NULL, NULL, NULL,
+				   0);
 
 	i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark);
 	/* remove this mark from the idr */
@@ -801,6 +806,18 @@ out:
  */
 static int __init inotify_user_setup(void)
 {
+	unsigned long watches_max;
+	struct sysinfo si;
+
+	si_meminfo(&si);
+	/*
+	 * Allow up to 1% of addressable memory to be allocated for inotify
+	 * watches (per user) limited to the range [8192, 1048576].
+	 */
+	watches_max = (((si.totalram - si.totalhigh) / 100) << PAGE_SHIFT) /
+			INOTIFY_WATCH_COST;
+	watches_max = clamp(watches_max, 8192UL, 1048576UL);
+
 	BUILD_BUG_ON(IN_ACCESS != FS_ACCESS);
 	BUILD_BUG_ON(IN_MODIFY != FS_MODIFY);
 	BUILD_BUG_ON(IN_ATTRIB != FS_ATTRIB);
@@ -827,7 +844,7 @@ static int __init inotify_user_setup(void)
 
 	inotify_max_queued_events = 16384;
 	init_user_ns.ucount_max[UCOUNT_INOTIFY_INSTANCES] = 128;
-	init_user_ns.ucount_max[UCOUNT_INOTIFY_WATCHES] = 8192;
+	init_user_ns.ucount_max[UCOUNT_INOTIFY_WATCHES] = watches_max;
 
 	return 0;
 }
diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c
index af375e049aae..ec8ae4257975 100644
--- a/fs/orangefs/file.c
+++ b/fs/orangefs/file.c
@@ -663,6 +663,8 @@ const struct file_operations orangefs_file_operations = {
 	.unlocked_ioctl	= orangefs_ioctl,
 	.mmap		= orangefs_file_mmap,
 	.open		= generic_file_open,
+	.splice_read    = generic_file_splice_read,
+	.splice_write   = iter_file_splice_write,
 	.flush		= orangefs_flush,
 	.release	= orangefs_file_release,
 	.fsync		= orangefs_fsync,
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index 955ecd4030f0..e5b616c93e11 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -275,7 +275,8 @@ int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat)
 	return err;
 }
 
-struct ovl_fh *ovl_encode_real_fh(struct dentry *real, bool is_upper)
+struct ovl_fh *ovl_encode_real_fh(struct ovl_fs *ofs, struct dentry *real,
+				  bool is_upper)
 {
 	struct ovl_fh *fh;
 	int fh_type, dwords;
@@ -319,7 +320,8 @@ struct ovl_fh *ovl_encode_real_fh(struct dentry *real, bool is_upper)
 	if (is_upper)
 		fh->fb.flags |= OVL_FH_FLAG_PATH_UPPER;
 	fh->fb.len = sizeof(fh->fb) + buflen;
-	fh->fb.uuid = *uuid;
+	if (ofs->config.uuid)
+		fh->fb.uuid = *uuid;
 
 	return fh;
 
@@ -328,8 +330,8 @@ out_err:
 	return ERR_PTR(err);
 }
 
-int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
-		   struct dentry *upper)
+int ovl_set_origin(struct ovl_fs *ofs, struct dentry *dentry,
+		   struct dentry *lower, struct dentry *upper)
 {
 	const struct ovl_fh *fh = NULL;
 	int err;
@@ -340,7 +342,7 @@ int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
 	 * up and a pure upper inode.
 	 */
 	if (ovl_can_decode_fh(lower->d_sb)) {
-		fh = ovl_encode_real_fh(lower, false);
+		fh = ovl_encode_real_fh(ofs, lower, false);
 		if (IS_ERR(fh))
 			return PTR_ERR(fh);
 	}
@@ -352,7 +354,8 @@ int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
 				 fh ? fh->fb.len : 0, 0);
 	kfree(fh);
 
-	return err;
+	/* Ignore -EPERM from setting "user.*" on symlink/special */
+	return err == -EPERM ? 0 : err;
 }
 
 /* Store file handle of @upper dir in @index dir entry */
@@ -362,7 +365,7 @@ static int ovl_set_upper_fh(struct ovl_fs *ofs, struct dentry *upper,
 	const struct ovl_fh *fh;
 	int err;
 
-	fh = ovl_encode_real_fh(upper, true);
+	fh = ovl_encode_real_fh(ofs, upper, true);
 	if (IS_ERR(fh))
 		return PTR_ERR(fh);
 
@@ -380,6 +383,7 @@ static int ovl_set_upper_fh(struct ovl_fs *ofs, struct dentry *upper,
 static int ovl_create_index(struct dentry *dentry, struct dentry *origin,
 			    struct dentry *upper)
 {
+	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
 	struct dentry *indexdir = ovl_indexdir(dentry->d_sb);
 	struct inode *dir = d_inode(indexdir);
 	struct dentry *index = NULL;
@@ -402,7 +406,7 @@ static int ovl_create_index(struct dentry *dentry, struct dentry *origin,
 	if (WARN_ON(ovl_test_flag(OVL_INDEX, d_inode(dentry))))
 		return -EIO;
 
-	err = ovl_get_index_name(origin, &name);
+	err = ovl_get_index_name(ofs, origin, &name);
 	if (err)
 		return err;
 
@@ -411,7 +415,7 @@ static int ovl_create_index(struct dentry *dentry, struct dentry *origin,
 	if (IS_ERR(temp))
 		goto free_name;
 
-	err = ovl_set_upper_fh(OVL_FS(dentry->d_sb), upper, temp);
+	err = ovl_set_upper_fh(ofs, upper, temp);
 	if (err)
 		goto out;
 
@@ -521,7 +525,7 @@ static int ovl_copy_up_inode(struct ovl_copy_up_ctx *c, struct dentry *temp)
 	 * hard link.
 	 */
 	if (c->origin) {
-		err = ovl_set_origin(c->dentry, c->lowerpath.dentry, temp);
+		err = ovl_set_origin(ofs, c->dentry, c->lowerpath.dentry, temp);
 		if (err)
 			return err;
 	}
@@ -700,7 +704,7 @@ out_dput:
 static int ovl_do_copy_up(struct ovl_copy_up_ctx *c)
 {
 	int err;
-	struct ovl_fs *ofs = c->dentry->d_sb->s_fs_info;
+	struct ovl_fs *ofs = OVL_FS(c->dentry->d_sb);
 	bool to_index = false;
 
 	/*
@@ -722,7 +726,7 @@ static int ovl_do_copy_up(struct ovl_copy_up_ctx *c)
 
 	if (to_index) {
 		c->destdir = ovl_indexdir(c->dentry->d_sb);
-		err = ovl_get_index_name(c->lowerpath.dentry, &c->destname);
+		err = ovl_get_index_name(ofs, c->lowerpath.dentry, &c->destname);
 		if (err)
 			return err;
 	} else if (WARN_ON(!c->parent)) {
diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c
index ed35be3fafc6..41ebf52f1bbc 100644
--- a/fs/overlayfs/export.c
+++ b/fs/overlayfs/export.c
@@ -211,7 +211,8 @@ static int ovl_check_encode_origin(struct dentry *dentry)
 	return 1;
 }
 
-static int ovl_dentry_to_fid(struct dentry *dentry, u32 *fid, int buflen)
+static int ovl_dentry_to_fid(struct ovl_fs *ofs, struct dentry *dentry,
+			     u32 *fid, int buflen)
 {
 	struct ovl_fh *fh = NULL;
 	int err, enc_lower;
@@ -226,7 +227,7 @@ static int ovl_dentry_to_fid(struct dentry *dentry, u32 *fid, int buflen)
 		goto fail;
 
 	/* Encode an upper or lower file handle */
-	fh = ovl_encode_real_fh(enc_lower ? ovl_dentry_lower(dentry) :
+	fh = ovl_encode_real_fh(ofs, enc_lower ? ovl_dentry_lower(dentry) :
 				ovl_dentry_upper(dentry), !enc_lower);
 	if (IS_ERR(fh))
 		return PTR_ERR(fh);
@@ -249,6 +250,7 @@ fail:
 static int ovl_encode_fh(struct inode *inode, u32 *fid, int *max_len,
 			 struct inode *parent)
 {
+	struct ovl_fs *ofs = OVL_FS(inode->i_sb);
 	struct dentry *dentry;
 	int bytes, buflen = *max_len << 2;
 
@@ -260,7 +262,7 @@ static int ovl_encode_fh(struct inode *inode, u32 *fid, int *max_len,
 	if (WARN_ON(!dentry))
 		return FILEID_INVALID;
 
-	bytes = ovl_dentry_to_fid(dentry, fid, buflen);
+	bytes = ovl_dentry_to_fid(ofs, dentry, fid, buflen);
 	dput(dentry);
 	if (bytes <= 0)
 		return FILEID_INVALID;
@@ -680,7 +682,7 @@ static struct dentry *ovl_upper_fh_to_d(struct super_block *sb,
 	if (!ovl_upper_mnt(ofs))
 		return ERR_PTR(-EACCES);
 
-	upper = ovl_decode_real_fh(fh, ovl_upper_mnt(ofs), true);
+	upper = ovl_decode_real_fh(ofs, fh, ovl_upper_mnt(ofs), true);
 	if (IS_ERR_OR_NULL(upper))
 		return upper;
 
diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c
index efccb7c1f9bc..bd9dd38347ae 100644
--- a/fs/overlayfs/file.c
+++ b/fs/overlayfs/file.c
@@ -53,9 +53,10 @@ static struct file *ovl_open_realfile(const struct file *file,
 	err = inode_permission(realinode, MAY_OPEN | acc_mode);
 	if (err) {
 		realfile = ERR_PTR(err);
-	} else if (!inode_owner_or_capable(realinode)) {
-		realfile = ERR_PTR(-EPERM);
 	} else {
+		if (!inode_owner_or_capable(realinode))
+			flags &= ~O_NOATIME;
+
 		realfile = open_with_fake_path(&file->f_path, flags, realinode,
 					       current_cred());
 	}
@@ -75,12 +76,6 @@ static int ovl_change_flags(struct file *file, unsigned int flags)
 	struct inode *inode = file_inode(file);
 	int err;
 
-	flags |= OVL_OPEN_FLAGS;
-
-	/* If some flag changed that cannot be changed then something's amiss */
-	if (WARN_ON((file->f_flags ^ flags) & ~OVL_SETFL_MASK))
-		return -EIO;
-
 	flags &= OVL_SETFL_MASK;
 
 	if (((flags ^ file->f_flags) & O_APPEND) && IS_APPEND(inode))
@@ -397,48 +392,6 @@ out_unlock:
 	return ret;
 }
 
-static ssize_t ovl_splice_read(struct file *in, loff_t *ppos,
-			 struct pipe_inode_info *pipe, size_t len,
-			 unsigned int flags)
-{
-	ssize_t ret;
-	struct fd real;
-	const struct cred *old_cred;
-
-	ret = ovl_real_fdget(in, &real);
-	if (ret)
-		return ret;
-
-	old_cred = ovl_override_creds(file_inode(in)->i_sb);
-	ret = generic_file_splice_read(real.file, ppos, pipe, len, flags);
-	revert_creds(old_cred);
-
-	ovl_file_accessed(in);
-	fdput(real);
-	return ret;
-}
-
-static ssize_t
-ovl_splice_write(struct pipe_inode_info *pipe, struct file *out,
-			  loff_t *ppos, size_t len, unsigned int flags)
-{
-	struct fd real;
-	const struct cred *old_cred;
-	ssize_t ret;
-
-	ret = ovl_real_fdget(out, &real);
-	if (ret)
-		return ret;
-
-	old_cred = ovl_override_creds(file_inode(out)->i_sb);
-	ret = iter_file_splice_write(pipe, real.file, ppos, len, flags);
-	revert_creds(old_cred);
-
-	ovl_file_accessed(out);
-	fdput(real);
-	return ret;
-}
-
 static int ovl_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 {
 	struct fd real;
@@ -541,46 +494,31 @@ static long ovl_real_ioctl(struct file *file, unsigned int cmd,
 			   unsigned long arg)
 {
 	struct fd real;
-	const struct cred *old_cred;
 	long ret;
 
 	ret = ovl_real_fdget(file, &real);
 	if (ret)
 		return ret;
 
-	old_cred = ovl_override_creds(file_inode(file)->i_sb);
 	ret = security_file_ioctl(real.file, cmd, arg);
-	if (!ret)
+	if (!ret) {
+		/*
+		 * Don't override creds, since we currently can't safely check
+		 * permissions before doing so.
+		 */
 		ret = vfs_ioctl(real.file, cmd, arg);
-	revert_creds(old_cred);
+	}
 
 	fdput(real);
 
 	return ret;
 }
 
-static unsigned int ovl_iflags_to_fsflags(unsigned int iflags)
-{
-	unsigned int flags = 0;
-
-	if (iflags & S_SYNC)
-		flags |= FS_SYNC_FL;
-	if (iflags & S_APPEND)
-		flags |= FS_APPEND_FL;
-	if (iflags & S_IMMUTABLE)
-		flags |= FS_IMMUTABLE_FL;
-	if (iflags & S_NOATIME)
-		flags |= FS_NOATIME_FL;
-
-	return flags;
-}
-
 static long ovl_ioctl_set_flags(struct file *file, unsigned int cmd,
-				unsigned long arg, unsigned int flags)
+				unsigned long arg)
 {
 	long ret;
 	struct inode *inode = file_inode(file);
-	unsigned int oldflags;
 
 	if (!inode_owner_or_capable(inode))
 		return -EACCES;
@@ -591,10 +529,13 @@ static long ovl_ioctl_set_flags(struct file *file, unsigned int cmd,
 
 	inode_lock(inode);
 
-	/* Check the capability before cred override */
-	oldflags = ovl_iflags_to_fsflags(READ_ONCE(inode->i_flags));
-	ret = vfs_ioc_setflags_prepare(inode, oldflags, flags);
-	if (ret)
+	/*
+	 * Prevent copy up if immutable and has no CAP_LINUX_IMMUTABLE
+	 * capability.
+	 */
+	ret = -EPERM;
+	if (!ovl_has_upperdata(inode) && IS_IMMUTABLE(inode) &&
+	    !capable(CAP_LINUX_IMMUTABLE))
 		goto unlock;
 
 	ret = ovl_maybe_copy_up(file_dentry(file), O_WRONLY);
@@ -613,46 +554,6 @@ unlock:
 
 }
 
-static long ovl_ioctl_set_fsflags(struct file *file, unsigned int cmd,
-				  unsigned long arg)
-{
-	unsigned int flags;
-
-	if (get_user(flags, (int __user *) arg))
-		return -EFAULT;
-
-	return ovl_ioctl_set_flags(file, cmd, arg, flags);
-}
-
-static unsigned int ovl_fsxflags_to_fsflags(unsigned int xflags)
-{
-	unsigned int flags = 0;
-
-	if (xflags & FS_XFLAG_SYNC)
-		flags |= FS_SYNC_FL;
-	if (xflags & FS_XFLAG_APPEND)
-		flags |= FS_APPEND_FL;
-	if (xflags & FS_XFLAG_IMMUTABLE)
-		flags |= FS_IMMUTABLE_FL;
-	if (xflags & FS_XFLAG_NOATIME)
-		flags |= FS_NOATIME_FL;
-
-	return flags;
-}
-
-static long ovl_ioctl_set_fsxflags(struct file *file, unsigned int cmd,
-				   unsigned long arg)
-{
-	struct fsxattr fa;
-
-	memset(&fa, 0, sizeof(fa));
-	if (copy_from_user(&fa, (void __user *) arg, sizeof(fa)))
-		return -EFAULT;
-
-	return ovl_ioctl_set_flags(file, cmd, arg,
-				   ovl_fsxflags_to_fsflags(fa.fsx_xflags));
-}
-
 long ovl_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
 	long ret;
@@ -663,12 +564,9 @@ long ovl_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 		ret = ovl_real_ioctl(file, cmd, arg);
 		break;
 
-	case FS_IOC_SETFLAGS:
-		ret = ovl_ioctl_set_fsflags(file, cmd, arg);
-		break;
-
 	case FS_IOC_FSSETXATTR:
-		ret = ovl_ioctl_set_fsxflags(file, cmd, arg);
+	case FS_IOC_SETFLAGS:
+		ret = ovl_ioctl_set_flags(file, cmd, arg);
 		break;
 
 	default:
@@ -801,8 +699,8 @@ const struct file_operations ovl_file_operations = {
 #ifdef CONFIG_COMPAT
 	.compat_ioctl	= ovl_compat_ioctl,
 #endif
-	.splice_read    = ovl_splice_read,
-	.splice_write   = ovl_splice_write,
+	.splice_read    = generic_file_splice_read,
+	.splice_write   = iter_file_splice_write,
 
 	.copy_file_range	= ovl_copy_file_range,
 	.remap_file_range	= ovl_remap_file_range,
diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c
index b584dca845ba..d739e14c6814 100644
--- a/fs/overlayfs/inode.c
+++ b/fs/overlayfs/inode.c
@@ -329,8 +329,14 @@ static const char *ovl_get_link(struct dentry *dentry,
 
 bool ovl_is_private_xattr(struct super_block *sb, const char *name)
 {
-	return strncmp(name, OVL_XATTR_PREFIX,
-		       sizeof(OVL_XATTR_PREFIX) - 1) == 0;
+	struct ovl_fs *ofs = sb->s_fs_info;
+
+	if (ofs->config.userxattr)
+		return strncmp(name, OVL_XATTR_USER_PREFIX,
+			       sizeof(OVL_XATTR_USER_PREFIX) - 1) == 0;
+	else
+		return strncmp(name, OVL_XATTR_TRUSTED_PREFIX,
+			       sizeof(OVL_XATTR_TRUSTED_PREFIX) - 1) == 0;
 }
 
 int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name,
@@ -476,7 +482,7 @@ static int ovl_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 		      u64 start, u64 len)
 {
 	int err;
-	struct inode *realinode = ovl_inode_real(inode);
+	struct inode *realinode = ovl_inode_realdata(inode);
 	const struct cred *old_cred;
 
 	if (!realinode->i_op->fiemap)
@@ -690,7 +696,7 @@ static void ovl_fill_inode(struct inode *inode, umode_t mode, dev_t rdev)
  * For the first, copy up case, the union nlink does not change, whether the
  * operation succeeds or fails, but the upper inode nlink may change.
  * Therefore, before copy up, we store the union nlink value relative to the
- * lower inode nlink in the index inode xattr trusted.overlay.nlink.
+ * lower inode nlink in the index inode xattr .overlay.nlink.
  *
  * For the second, upper hardlink case, the union nlink should be incremented
  * or decremented IFF the operation succeeds, aligned with nlink change of the
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index a6162c4076db..3fe05fb5d145 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -150,17 +150,22 @@ invalid:
 	goto out;
 }
 
-struct dentry *ovl_decode_real_fh(struct ovl_fh *fh, struct vfsmount *mnt,
-				  bool connected)
+struct dentry *ovl_decode_real_fh(struct ovl_fs *ofs, struct ovl_fh *fh,
+				  struct vfsmount *mnt, bool connected)
 {
 	struct dentry *real;
 	int bytes;
 
+	if (!capable(CAP_DAC_READ_SEARCH))
+		return NULL;
+
 	/*
 	 * Make sure that the stored uuid matches the uuid of the lower
 	 * layer where file handle will be decoded.
+	 * In case of uuid=off option just make sure that stored uuid is null.
 	 */
-	if (!uuid_equal(&fh->fb.uuid, &mnt->mnt_sb->s_uuid))
+	if (ofs->config.uuid ? !uuid_equal(&fh->fb.uuid, &mnt->mnt_sb->s_uuid) :
+			      !uuid_is_null(&fh->fb.uuid))
 		return NULL;
 
 	bytes = (fh->fb.len - offsetof(struct ovl_fb, fid));
@@ -354,7 +359,7 @@ int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected,
 		    ofs->layers[i].fs->bad_uuid)
 			continue;
 
-		origin = ovl_decode_real_fh(fh, ofs->layers[i].mnt,
+		origin = ovl_decode_real_fh(ofs, fh, ofs->layers[i].mnt,
 					    connected);
 		if (origin)
 			break;
@@ -450,7 +455,7 @@ int ovl_verify_set_fh(struct ovl_fs *ofs, struct dentry *dentry,
 	struct ovl_fh *fh;
 	int err;
 
-	fh = ovl_encode_real_fh(real, is_upper);
+	fh = ovl_encode_real_fh(ofs, real, is_upper);
 	err = PTR_ERR(fh);
 	if (IS_ERR(fh)) {
 		fh = NULL;
@@ -488,7 +493,7 @@ struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index)
 	if (IS_ERR_OR_NULL(fh))
 		return ERR_CAST(fh);
 
-	upper = ovl_decode_real_fh(fh, ovl_upper_mnt(ofs), true);
+	upper = ovl_decode_real_fh(ofs, fh, ovl_upper_mnt(ofs), true);
 	kfree(fh);
 
 	if (IS_ERR_OR_NULL(upper))
@@ -640,12 +645,13 @@ static int ovl_get_index_name_fh(struct ovl_fh *fh, struct qstr *name)
  * index dir was cleared. Either way, that index cannot be used to indentify
  * the overlay inode.
  */
-int ovl_get_index_name(struct dentry *origin, struct qstr *name)
+int ovl_get_index_name(struct ovl_fs *ofs, struct dentry *origin,
+		       struct qstr *name)
 {
 	struct ovl_fh *fh;
 	int err;
 
-	fh = ovl_encode_real_fh(origin, false);
+	fh = ovl_encode_real_fh(ofs, origin, false);
 	if (IS_ERR(fh))
 		return PTR_ERR(fh);
 
@@ -694,7 +700,7 @@ struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper,
 	bool is_dir = d_is_dir(origin);
 	int err;
 
-	err = ovl_get_index_name(origin, &name);
+	err = ovl_get_index_name(ofs, origin, &name);
 	if (err)
 		return ERR_PTR(err);
 
@@ -805,7 +811,7 @@ static int ovl_fix_origin(struct ovl_fs *ofs, struct dentry *dentry,
 	if (err)
 		return err;
 
-	err = ovl_set_origin(dentry, lower, upper);
+	err = ovl_set_origin(ofs, dentry, lower, upper);
 	if (!err)
 		err = ovl_set_impure(dentry->d_parent, upper->d_parent);
 
@@ -1003,6 +1009,8 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
 	 * Just make sure a corresponding data dentry has been found.
 	 */
 	if (d.metacopy || (uppermetacopy && !ctr)) {
+		pr_warn_ratelimited("metacopy with no lower data found - abort lookup (%pd2)\n",
+				    dentry);
 		err = -EIO;
 		goto out_put;
 	} else if (!d.is_dir && upperdentry && !ctr && origin_path) {
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index f8880aa2ba0e..b487e48c7fd4 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -22,7 +22,9 @@ enum ovl_path_type {
 #define OVL_TYPE_MERGE(type)	((type) & __OVL_PATH_MERGE)
 #define OVL_TYPE_ORIGIN(type)	((type) & __OVL_PATH_ORIGIN)
 
-#define OVL_XATTR_PREFIX XATTR_TRUSTED_PREFIX "overlay."
+#define OVL_XATTR_NAMESPACE "overlay."
+#define OVL_XATTR_TRUSTED_PREFIX XATTR_TRUSTED_PREFIX OVL_XATTR_NAMESPACE
+#define OVL_XATTR_USER_PREFIX XATTR_USER_PREFIX OVL_XATTR_NAMESPACE
 
 enum ovl_xattr {
 	OVL_XATTR_OPAQUE,
@@ -113,10 +115,10 @@ struct ovl_fh {
 #define OVL_FH_FID_OFFSET	(OVL_FH_WIRE_OFFSET + \
 				 offsetof(struct ovl_fb, fid))
 
-extern const char *ovl_xattr_table[];
+extern const char *const ovl_xattr_table[][2];
 static inline const char *ovl_xattr(struct ovl_fs *ofs, enum ovl_xattr ox)
 {
-	return ovl_xattr_table[ox];
+	return ovl_xattr_table[ox][ofs->config.userxattr];
 }
 
 static inline int ovl_do_rmdir(struct inode *dir, struct dentry *dentry)
@@ -383,8 +385,8 @@ static inline int ovl_check_fh_len(struct ovl_fh *fh, int fh_len)
 	return ovl_check_fb_len(&fh->fb, fh_len - OVL_FH_WIRE_OFFSET);
 }
 
-struct dentry *ovl_decode_real_fh(struct ovl_fh *fh, struct vfsmount *mnt,
-				  bool connected);
+struct dentry *ovl_decode_real_fh(struct ovl_fs *ofs, struct ovl_fh *fh,
+				  struct vfsmount *mnt, bool connected);
 int ovl_check_origin_fh(struct ovl_fs *ofs, struct ovl_fh *fh, bool connected,
 			struct dentry *upperdentry, struct ovl_path **stackp);
 int ovl_verify_set_fh(struct ovl_fs *ofs, struct dentry *dentry,
@@ -392,7 +394,8 @@ int ovl_verify_set_fh(struct ovl_fs *ofs, struct dentry *dentry,
 		      bool set);
 struct dentry *ovl_index_upper(struct ovl_fs *ofs, struct dentry *index);
 int ovl_verify_index(struct ovl_fs *ofs, struct dentry *index);
-int ovl_get_index_name(struct dentry *origin, struct qstr *name);
+int ovl_get_index_name(struct ovl_fs *ofs, struct dentry *origin,
+		       struct qstr *name);
 struct dentry *ovl_get_index_fh(struct ovl_fs *ofs, struct ovl_fh *fh);
 struct dentry *ovl_lookup_index(struct ovl_fs *ofs, struct dentry *upper,
 				struct dentry *origin, bool verify);
@@ -514,9 +517,10 @@ int ovl_maybe_copy_up(struct dentry *dentry, int flags);
 int ovl_copy_xattr(struct super_block *sb, struct dentry *old,
 		   struct dentry *new);
 int ovl_set_attr(struct dentry *upper, struct kstat *stat);
-struct ovl_fh *ovl_encode_real_fh(struct dentry *real, bool is_upper);
-int ovl_set_origin(struct dentry *dentry, struct dentry *lower,
-		   struct dentry *upper);
+struct ovl_fh *ovl_encode_real_fh(struct ovl_fs *ofs, struct dentry *real,
+				  bool is_upper);
+int ovl_set_origin(struct ovl_fs *ofs, struct dentry *dentry,
+		   struct dentry *lower, struct dentry *upper);
 
 /* export.c */
 extern const struct export_operations ovl_export_operations;
diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index 1b5a2094df8e..fbd5e27ce66b 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -14,9 +14,11 @@ struct ovl_config {
 	bool redirect_follow;
 	const char *redirect_mode;
 	bool index;
+	bool uuid;
 	bool nfs_export;
 	int xino;
 	bool metacopy;
+	bool userxattr;
 	bool ovl_volatile;
 };
 
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 290983bcfbb3..2bd570cbe8a4 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -79,7 +79,7 @@ static void ovl_dentry_release(struct dentry *dentry)
 static struct dentry *ovl_d_real(struct dentry *dentry,
 				 const struct inode *inode)
 {
-	struct dentry *real;
+	struct dentry *real = NULL, *lower;
 
 	/* It's an overlay file */
 	if (inode && d_inode(dentry) == inode)
@@ -98,9 +98,10 @@ static struct dentry *ovl_d_real(struct dentry *dentry,
 	if (real && !inode && ovl_has_upperdata(d_inode(dentry)))
 		return real;
 
-	real = ovl_dentry_lowerdata(dentry);
-	if (!real)
+	lower = ovl_dentry_lowerdata(dentry);
+	if (!lower)
 		goto bug;
+	real = lower;
 
 	/* Handle recursion */
 	real = d_real(real, inode);
@@ -108,8 +109,10 @@ static struct dentry *ovl_d_real(struct dentry *dentry,
 	if (!inode || inode == d_inode(real))
 		return real;
 bug:
-	WARN(1, "ovl_d_real(%pd4, %s:%lu): real dentry not found\n", dentry,
-	     inode ? inode->i_sb->s_id : "NULL", inode ? inode->i_ino : 0);
+	WARN(1, "%s(%pd4, %s:%lu): real dentry (%p/%lu) not found\n",
+	     __func__, dentry, inode ? inode->i_sb->s_id : "NULL",
+	     inode ? inode->i_ino : 0, real,
+	     real && d_inode(real) ? d_inode(real)->i_ino : 0);
 	return dentry;
 }
 
@@ -356,6 +359,8 @@ static int ovl_show_options(struct seq_file *m, struct dentry *dentry)
 		seq_printf(m, ",redirect_dir=%s", ofs->config.redirect_mode);
 	if (ofs->config.index != ovl_index_def)
 		seq_printf(m, ",index=%s", ofs->config.index ? "on" : "off");
+	if (!ofs->config.uuid)
+		seq_puts(m, ",uuid=off");
 	if (ofs->config.nfs_export != ovl_nfs_export_def)
 		seq_printf(m, ",nfs_export=%s", ofs->config.nfs_export ?
 						"on" : "off");
@@ -410,7 +415,10 @@ enum {
 	OPT_REDIRECT_DIR,
 	OPT_INDEX_ON,
 	OPT_INDEX_OFF,
+	OPT_UUID_ON,
+	OPT_UUID_OFF,
 	OPT_NFS_EXPORT_ON,
+	OPT_USERXATTR,
 	OPT_NFS_EXPORT_OFF,
 	OPT_XINO_ON,
 	OPT_XINO_OFF,
@@ -429,6 +437,9 @@ static const match_table_t ovl_tokens = {
 	{OPT_REDIRECT_DIR,		"redirect_dir=%s"},
 	{OPT_INDEX_ON,			"index=on"},
 	{OPT_INDEX_OFF,			"index=off"},
+	{OPT_USERXATTR,			"userxattr"},
+	{OPT_UUID_ON,			"uuid=on"},
+	{OPT_UUID_OFF,			"uuid=off"},
 	{OPT_NFS_EXPORT_ON,		"nfs_export=on"},
 	{OPT_NFS_EXPORT_OFF,		"nfs_export=off"},
 	{OPT_XINO_ON,			"xino=on"},
@@ -549,6 +560,14 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
 			index_opt = true;
 			break;
 
+		case OPT_UUID_ON:
+			config->uuid = true;
+			break;
+
+		case OPT_UUID_OFF:
+			config->uuid = false;
+			break;
+
 		case OPT_NFS_EXPORT_ON:
 			config->nfs_export = true;
 			nfs_export_opt = true;
@@ -585,6 +604,10 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
 			config->ovl_volatile = true;
 			break;
 
+		case OPT_USERXATTR:
+			config->userxattr = true;
+			break;
+
 		default:
 			pr_err("unrecognized mount option \"%s\" or missing value\n",
 					p);
@@ -688,6 +711,28 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
 		}
 	}
 
+
+	/* Resolve userxattr -> !redirect && !metacopy dependency */
+	if (config->userxattr) {
+		if (config->redirect_follow && redirect_opt) {
+			pr_err("conflicting options: userxattr,redirect_dir=%s\n",
+			       config->redirect_mode);
+			return -EINVAL;
+		}
+		if (config->metacopy && metacopy_opt) {
+			pr_err("conflicting options: userxattr,metacopy=on\n");
+			return -EINVAL;
+		}
+		/*
+		 * Silently disable default setting of redirect and metacopy.
+		 * This shall be the default in the future as well: these
+		 * options must be explicitly enabled if used together with
+		 * userxattr.
+		 */
+		config->redirect_dir = config->redirect_follow = false;
+		config->metacopy = false;
+	}
+
 	return 0;
 }
 
@@ -1037,8 +1082,14 @@ ovl_posix_acl_default_xattr_handler = {
 	.set = ovl_posix_acl_xattr_set,
 };
 
-static const struct xattr_handler ovl_own_xattr_handler = {
-	.prefix	= OVL_XATTR_PREFIX,
+static const struct xattr_handler ovl_own_trusted_xattr_handler = {
+	.prefix	= OVL_XATTR_TRUSTED_PREFIX,
+	.get = ovl_own_xattr_get,
+	.set = ovl_own_xattr_set,
+};
+
+static const struct xattr_handler ovl_own_user_xattr_handler = {
+	.prefix	= OVL_XATTR_USER_PREFIX,
 	.get = ovl_own_xattr_get,
 	.set = ovl_own_xattr_set,
 };
@@ -1049,12 +1100,22 @@ static const struct xattr_handler ovl_other_xattr_handler = {
 	.set = ovl_other_xattr_set,
 };
 
-static const struct xattr_handler *ovl_xattr_handlers[] = {
+static const struct xattr_handler *ovl_trusted_xattr_handlers[] = {
 #ifdef CONFIG_FS_POSIX_ACL
 	&ovl_posix_acl_access_xattr_handler,
 	&ovl_posix_acl_default_xattr_handler,
 #endif
-	&ovl_own_xattr_handler,
+	&ovl_own_trusted_xattr_handler,
+	&ovl_other_xattr_handler,
+	NULL
+};
+
+static const struct xattr_handler *ovl_user_xattr_handlers[] = {
+#ifdef CONFIG_FS_POSIX_ACL
+	&ovl_posix_acl_access_xattr_handler,
+	&ovl_posix_acl_default_xattr_handler,
+#endif
+	&ovl_own_user_xattr_handler,
 	&ovl_other_xattr_handler,
 	NULL
 };
@@ -1317,7 +1378,7 @@ static int ovl_make_workdir(struct super_block *sb, struct ovl_fs *ofs,
 		pr_warn("upper fs does not support RENAME_WHITEOUT.\n");
 
 	/*
-	 * Check if upper/work fs supports trusted.overlay.* xattr
+	 * Check if upper/work fs supports (trusted|user).overlay.* xattr
 	 */
 	err = ovl_do_setxattr(ofs, ofs->workdir, OVL_XATTR_OPAQUE, "0", 1);
 	if (err) {
@@ -1456,10 +1517,10 @@ static int ovl_get_indexdir(struct super_block *sb, struct ovl_fs *ofs,
 
 		/*
 		 * Verify upper root is exclusively associated with index dir.
-		 * Older kernels stored upper fh in "trusted.overlay.origin"
+		 * Older kernels stored upper fh in ".overlay.origin"
 		 * xattr. If that xattr exists, verify that it is a match to
 		 * upper dir file handle. In any case, verify or set xattr
-		 * "trusted.overlay.upper" to indicate that index may have
+		 * ".overlay.upper" to indicate that index may have
 		 * directory entries.
 		 */
 		if (ovl_check_origin_xattr(ofs, ofs->indexdir)) {
@@ -1877,6 +1938,7 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 	ofs->share_whiteout = true;
 
 	ofs->config.index = ovl_index_def;
+	ofs->config.uuid = true;
 	ofs->config.nfs_export = ovl_nfs_export_def;
 	ofs->config.xino = ovl_xino_def();
 	ofs->config.metacopy = ovl_metacopy_def;
@@ -1956,6 +2018,11 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 	if (!ovl_upper_mnt(ofs))
 		sb->s_flags |= SB_RDONLY;
 
+	if (!ofs->config.uuid && ofs->numfs > 1) {
+		pr_warn("The uuid=off requires a single fs for lower and upper, falling back to uuid=on.\n");
+		ofs->config.uuid = true;
+	}
+
 	if (!ovl_force_readonly(ofs) && ofs->config.index) {
 		err = ovl_get_indexdir(sb, ofs, oe, &upperpath);
 		if (err)
@@ -1991,7 +2058,8 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 	cap_lower(cred->cap_effective, CAP_SYS_RESOURCE);
 
 	sb->s_magic = OVERLAYFS_SUPER_MAGIC;
-	sb->s_xattr = ovl_xattr_handlers;
+	sb->s_xattr = ofs->config.userxattr ? ovl_user_xattr_handlers :
+		ovl_trusted_xattr_handlers;
 	sb->s_fs_info = ofs;
 	sb->s_flags |= SB_POSIXACL;
 	sb->s_iflags |= SB_I_SKIP_SYNC;
@@ -2028,6 +2096,7 @@ static struct dentry *ovl_mount(struct file_system_type *fs_type, int flags,
 static struct file_system_type ovl_fs_type = {
 	.owner		= THIS_MODULE,
 	.name		= "overlay",
+	.fs_flags	= FS_USERNS_MOUNT,
 	.mount		= ovl_mount,
 	.kill_sb	= kill_anon_super,
 };
diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c
index 23f475627d07..6569031af3cd 100644
--- a/fs/overlayfs/util.c
+++ b/fs/overlayfs/util.c
@@ -50,6 +50,9 @@ const struct cred *ovl_override_creds(struct super_block *sb)
  */
 int ovl_can_decode_fh(struct super_block *sb)
 {
+	if (!capable(CAP_DAC_READ_SEARCH))
+		return 0;
+
 	if (!sb->s_export_op || !sb->s_export_op->fh_to_dentry)
 		return 0;
 
@@ -582,9 +585,10 @@ bool ovl_check_dir_xattr(struct super_block *sb, struct dentry *dentry,
 #define OVL_XATTR_METACOPY_POSTFIX	"metacopy"
 
 #define OVL_XATTR_TAB_ENTRY(x) \
-	[x] = OVL_XATTR_PREFIX x ## _POSTFIX
+	[x] = { [false] = OVL_XATTR_TRUSTED_PREFIX x ## _POSTFIX, \
+		[true] = OVL_XATTR_USER_PREFIX x ## _POSTFIX }
 
-const char *ovl_xattr_table[] = {
+const char *const ovl_xattr_table[][2] = {
 	OVL_XATTR_TAB_ENTRY(OVL_XATTR_OPAQUE),
 	OVL_XATTR_TAB_ENTRY(OVL_XATTR_REDIRECT),
 	OVL_XATTR_TAB_ENTRY(OVL_XATTR_ORIGIN),
@@ -716,6 +720,7 @@ bool ovl_need_index(struct dentry *dentry)
 /* Caller must hold OVL_I(inode)->lock */
 static void ovl_cleanup_index(struct dentry *dentry)
 {
+	struct ovl_fs *ofs = OVL_FS(dentry->d_sb);
 	struct dentry *indexdir = ovl_indexdir(dentry->d_sb);
 	struct inode *dir = indexdir->d_inode;
 	struct dentry *lowerdentry = ovl_dentry_lower(dentry);
@@ -725,7 +730,7 @@ static void ovl_cleanup_index(struct dentry *dentry)
 	struct qstr name = { };
 	int err;
 
-	err = ovl_get_index_name(lowerdentry, &name);
+	err = ovl_get_index_name(ofs, lowerdentry, &name);
 	if (err)
 		goto fail;
 
@@ -879,6 +884,13 @@ int ovl_check_metacopy_xattr(struct ovl_fs *ofs, struct dentry *dentry)
 	if (res < 0) {
 		if (res == -ENODATA || res == -EOPNOTSUPP)
 			return 0;
+		/*
+		 * getxattr on user.* may fail with EACCES in case there's no
+		 * read permission on the inode.  Not much we can do, other than
+		 * tell the caller that this is not a metacopy inode.
+		 */
+		if (ofs->config.userxattr && res == -EACCES)
+			return 0;
 		goto out;
 	}
 
diff --git a/fs/pstore/ftrace.c b/fs/pstore/ftrace.c
index 5c0450701293..5939595f0115 100644
--- a/fs/pstore/ftrace.c
+++ b/fs/pstore/ftrace.c
@@ -26,8 +26,9 @@ static u64 pstore_ftrace_stamp;
 static void notrace pstore_ftrace_call(unsigned long ip,
 				       unsigned long parent_ip,
 				       struct ftrace_ops *op,
-				       struct pt_regs *regs)
+				       struct ftrace_regs *fregs)
 {
+	int bit;
 	unsigned long flags;
 	struct pstore_ftrace_record rec = {};
 	struct pstore_record record = {
@@ -40,6 +41,10 @@ static void notrace pstore_ftrace_call(unsigned long ip,
 	if (unlikely(oops_in_progress))
 		return;
 
+	bit = ftrace_test_recursion_trylock(ip, parent_ip);
+	if (bit < 0)
+		return;
+
 	local_irq_save(flags);
 
 	rec.ip = ip;
@@ -49,6 +54,7 @@ static void notrace pstore_ftrace_call(unsigned long ip,
 	psinfo->write(&record);
 
 	local_irq_restore(flags);
+	ftrace_test_recursion_unlock(bit);
 }
 
 static struct ftrace_ops pstore_ftrace_ops __read_mostly = {
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index bb02989d92b6..4f1373463766 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -2455,7 +2455,7 @@ int dquot_resume(struct super_block *sb, int type)
 		ret = dquot_load_quota_sb(sb, cnt, dqopt->info[cnt].dqi_fmt_id,
 					  flags);
 		if (ret < 0)
-			vfs_cleanup_quota_inode(sb, type);
+			vfs_cleanup_quota_inode(sb, cnt);
 	}
 
 	return ret;
diff --git a/fs/quota/quota_tree.c b/fs/quota/quota_tree.c
index a6f856f341dc..c5562c871c8b 100644
--- a/fs/quota/quota_tree.c
+++ b/fs/quota/quota_tree.c
@@ -62,7 +62,7 @@ static ssize_t read_blk(struct qtree_mem_dqinfo *info, uint blk, char *buf)
 
 	memset(buf, 0, info->dqi_usable_bs);
 	return sb->s_op->quota_read(sb, info->dqi_type, buf,
-	       info->dqi_usable_bs, blk << info->dqi_blocksize_bits);
+	       info->dqi_usable_bs, (loff_t)blk << info->dqi_blocksize_bits);
 }
 
 static ssize_t write_blk(struct qtree_mem_dqinfo *info, uint blk, char *buf)
@@ -71,7 +71,7 @@ static ssize_t write_blk(struct qtree_mem_dqinfo *info, uint blk, char *buf)
 	ssize_t ret;
 
 	ret = sb->s_op->quota_write(sb, info->dqi_type, buf,
-	       info->dqi_usable_bs, blk << info->dqi_blocksize_bits);
+	       info->dqi_usable_bs, (loff_t)blk << info->dqi_blocksize_bits);
 	if (ret != info->dqi_usable_bs) {
 		quota_error(sb, "dquota write failed");
 		if (ret >= 0)
@@ -284,7 +284,7 @@ static uint find_free_dqentry(struct qtree_mem_dqinfo *info,
 			    blk);
 		goto out_buf;
 	}
-	dquot->dq_off = (blk << info->dqi_blocksize_bits) +
+	dquot->dq_off = ((loff_t)blk << info->dqi_blocksize_bits) +
 			sizeof(struct qt_disk_dqdbheader) +
 			i * info->dqi_entry_size;
 	kfree(buf);
@@ -559,7 +559,7 @@ static loff_t find_block_dqentry(struct qtree_mem_dqinfo *info,
 		ret = -EIO;
 		goto out_buf;
 	} else {
-		ret = (blk << info->dqi_blocksize_bits) + sizeof(struct
+		ret = ((loff_t)blk << info->dqi_blocksize_bits) + sizeof(struct
 		  qt_disk_dqdbheader) + i * info->dqi_entry_size;
 	}
 out_buf:
diff --git a/fs/quota/quota_v2.c b/fs/quota/quota_v2.c
index e69a2bfdd81c..c21106557a37 100644
--- a/fs/quota/quota_v2.c
+++ b/fs/quota/quota_v2.c
@@ -157,6 +157,25 @@ static int v2_read_file_info(struct super_block *sb, int type)
 		qinfo->dqi_entry_size = sizeof(struct v2r1_disk_dqblk);
 		qinfo->dqi_ops = &v2r1_qtree_ops;
 	}
+	ret = -EUCLEAN;
+	/* Some sanity checks of the read headers... */
+	if ((loff_t)qinfo->dqi_blocks << qinfo->dqi_blocksize_bits >
+	    i_size_read(sb_dqopt(sb)->files[type])) {
+		quota_error(sb, "Number of blocks too big for quota file size (%llu > %llu).",
+		    (loff_t)qinfo->dqi_blocks << qinfo->dqi_blocksize_bits,
+		    i_size_read(sb_dqopt(sb)->files[type]));
+		goto out;
+	}
+	if (qinfo->dqi_free_blk >= qinfo->dqi_blocks) {
+		quota_error(sb, "Free block number too big (%u >= %u).",
+			    qinfo->dqi_free_blk, qinfo->dqi_blocks);
+		goto out;
+	}
+	if (qinfo->dqi_free_entry >= qinfo->dqi_blocks) {
+		quota_error(sb, "Block with free entry too big (%u >= %u).",
+			    qinfo->dqi_free_entry, qinfo->dqi_blocks);
+		goto out;
+	}
 	ret = 0;
 out:
 	up_read(&dqopt->dqio_sem);
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index 8bf88d690729..476a7ff49482 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -454,6 +454,12 @@ static int is_leaf(char *buf, int blocksize, struct buffer_head *bh)
 					 "(second one): %h", ih);
 			return 0;
 		}
+		if (is_direntry_le_ih(ih) && (ih_item_len(ih) < (ih_entry_count(ih) * IH_SIZE))) {
+			reiserfs_warning(NULL, "reiserfs-5093",
+					 "item entry count seems wrong %h",
+					 ih);
+			return 0;
+		}
 		prev_location = ih_location(ih);
 	}
 
diff --git a/fs/remap_range.c b/fs/remap_range.c
index e6099beefa97..77dba3a49e65 100644
--- a/fs/remap_range.c
+++ b/fs/remap_range.c
@@ -456,8 +456,16 @@ loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos,
 	if (ret)
 		return ret;
 
+	/*
+	 * This is redundant if called from vfs_dedupe_file_range(), but other
+	 * callers need it and it's not performance sesitive...
+	 */
+	ret = remap_verify_area(src_file, src_pos, len, false);
+	if (ret)
+		goto out_drop_write;
+
 	ret = remap_verify_area(dst_file, dst_pos, len, true);
-	if (ret < 0)
+	if (ret)
 		goto out_drop_write;
 
 	ret = -EPERM;
diff --git a/fs/ubifs/auth.c b/fs/ubifs/auth.c
index 0886d835f597..51a7c8c2c3f0 100644
--- a/fs/ubifs/auth.c
+++ b/fs/ubifs/auth.c
@@ -337,8 +337,10 @@ int ubifs_init_authentication(struct ubifs_info *c)
 	c->authenticated = true;
 
 	c->log_hash = ubifs_hash_get_desc(c);
-	if (IS_ERR(c->log_hash))
+	if (IS_ERR(c->log_hash)) {
+		err = PTR_ERR(c->log_hash);
 		goto out_free_hmac;
+	}
 
 	err = 0;
 
diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c
index b5cdac9b0368..c4fc1047fc07 100644
--- a/fs/ubifs/commit.c
+++ b/fs/ubifs/commit.c
@@ -701,13 +701,13 @@ out:
 
 out_dump:
 	ubifs_err(c, "dumping index node (iip=%d)", i->iip);
-	ubifs_dump_node(c, idx);
+	ubifs_dump_node(c, idx, ubifs_idx_node_sz(c, c->fanout));
 	list_del(&i->list);
 	kfree(i);
 	if (!list_empty(&list)) {
 		i = list_entry(list.prev, struct idx_node, list);
 		ubifs_err(c, "dumping parent index node");
-		ubifs_dump_node(c, &i->idx);
+		ubifs_dump_node(c, &i->idx, ubifs_idx_node_sz(c, c->fanout));
 	}
 out_free:
 	while (!list_empty(&list)) {
diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c
index ebff43f8009c..1bbb9fe661b1 100644
--- a/fs/ubifs/debug.c
+++ b/fs/ubifs/debug.c
@@ -291,9 +291,9 @@ void ubifs_dump_inode(struct ubifs_info *c, const struct inode *inode)
 	kfree(pdent);
 }
 
-void ubifs_dump_node(const struct ubifs_info *c, const void *node)
+void ubifs_dump_node(const struct ubifs_info *c, const void *node, int node_len)
 {
-	int i, n;
+	int i, n, type, safe_len, max_node_len, min_node_len;
 	union ubifs_key key;
 	const struct ubifs_ch *ch = node;
 	char key_buf[DBG_KEY_BUF_LEN];
@@ -306,10 +306,40 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node)
 		return;
 	}
 
+	/* Skip dumping unknown type node */
+	type = ch->node_type;
+	if (type < 0 || type >= UBIFS_NODE_TYPES_CNT) {
+		pr_err("node type %d was not recognized\n", type);
+		return;
+	}
+
 	spin_lock(&dbg_lock);
 	dump_ch(node);
 
-	switch (ch->node_type) {
+	if (c->ranges[type].max_len == 0) {
+		max_node_len = min_node_len = c->ranges[type].len;
+	} else {
+		max_node_len = c->ranges[type].max_len;
+		min_node_len = c->ranges[type].min_len;
+	}
+	safe_len = le32_to_cpu(ch->len);
+	safe_len = safe_len > 0 ? safe_len : 0;
+	safe_len = min3(safe_len, max_node_len, node_len);
+	if (safe_len < min_node_len) {
+		pr_err("node len(%d) is too short for %s, left %d bytes:\n",
+		       safe_len, dbg_ntype(type),
+		       safe_len > UBIFS_CH_SZ ?
+		       safe_len - (int)UBIFS_CH_SZ : 0);
+		if (safe_len > UBIFS_CH_SZ)
+			print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 32, 1,
+				       (void *)node + UBIFS_CH_SZ,
+				       safe_len - UBIFS_CH_SZ, 0);
+		goto out_unlock;
+	}
+	if (safe_len != le32_to_cpu(ch->len))
+		pr_err("\ttruncated node length      %d\n", safe_len);
+
+	switch (type) {
 	case UBIFS_PAD_NODE:
 	{
 		const struct ubifs_pad_node *pad = node;
@@ -453,7 +483,8 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node)
 		pr_err("\tnlen           %d\n", nlen);
 		pr_err("\tname           ");
 
-		if (nlen > UBIFS_MAX_NLEN)
+		if (nlen > UBIFS_MAX_NLEN ||
+		    nlen > safe_len - UBIFS_DENT_NODE_SZ)
 			pr_err("(bad name length, not printing, bad or corrupted node)");
 		else {
 			for (i = 0; i < nlen && dent->name[i]; i++)
@@ -467,7 +498,6 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node)
 	case UBIFS_DATA_NODE:
 	{
 		const struct ubifs_data_node *dn = node;
-		int dlen = le32_to_cpu(ch->len) - UBIFS_DATA_NODE_SZ;
 
 		key_read(c, &dn->key, &key);
 		pr_err("\tkey            %s\n",
@@ -475,10 +505,13 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node)
 		pr_err("\tsize           %u\n", le32_to_cpu(dn->size));
 		pr_err("\tcompr_typ      %d\n",
 		       (int)le16_to_cpu(dn->compr_type));
-		pr_err("\tdata size      %d\n", dlen);
-		pr_err("\tdata:\n");
+		pr_err("\tdata size      %u\n",
+		       le32_to_cpu(ch->len) - (unsigned int)UBIFS_DATA_NODE_SZ);
+		pr_err("\tdata (length = %d):\n",
+		       safe_len - (int)UBIFS_DATA_NODE_SZ);
 		print_hex_dump(KERN_ERR, "\t", DUMP_PREFIX_OFFSET, 32, 1,
-			       (void *)&dn->data, dlen, 0);
+			       (void *)&dn->data,
+			       safe_len - (int)UBIFS_DATA_NODE_SZ, 0);
 		break;
 	}
 	case UBIFS_TRUN_NODE:
@@ -495,13 +528,16 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node)
 	case UBIFS_IDX_NODE:
 	{
 		const struct ubifs_idx_node *idx = node;
+		int max_child_cnt = (safe_len - UBIFS_IDX_NODE_SZ) /
+				    (ubifs_idx_node_sz(c, 1) -
+				    UBIFS_IDX_NODE_SZ);
 
-		n = le16_to_cpu(idx->child_cnt);
-		pr_err("\tchild_cnt      %d\n", n);
+		n = min_t(int, le16_to_cpu(idx->child_cnt), max_child_cnt);
+		pr_err("\tchild_cnt      %d\n", (int)le16_to_cpu(idx->child_cnt));
 		pr_err("\tlevel          %d\n", (int)le16_to_cpu(idx->level));
 		pr_err("\tBranches:\n");
 
-		for (i = 0; i < n && i < c->fanout - 1; i++) {
+		for (i = 0; i < n && i < c->fanout; i++) {
 			const struct ubifs_branch *br;
 
 			br = ubifs_idx_branch(c, idx, i);
@@ -525,7 +561,7 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node)
 				le64_to_cpu(orph->cmt_no) & LLONG_MAX);
 		pr_err("\tlast node flag %llu\n",
 		       (unsigned long long)(le64_to_cpu(orph->cmt_no)) >> 63);
-		n = (le32_to_cpu(ch->len) - UBIFS_ORPH_NODE_SZ) >> 3;
+		n = (safe_len - UBIFS_ORPH_NODE_SZ) >> 3;
 		pr_err("\t%d orphan inode numbers:\n", n);
 		for (i = 0; i < n; i++)
 			pr_err("\t  ino %llu\n",
@@ -537,9 +573,10 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node)
 		break;
 	}
 	default:
-		pr_err("node type %d was not recognized\n",
-		       (int)ch->node_type);
+		pr_err("node type %d was not recognized\n", type);
 	}
+
+out_unlock:
 	spin_unlock(&dbg_lock);
 }
 
@@ -764,7 +801,7 @@ void ubifs_dump_lpt_info(struct ubifs_info *c)
 	pr_err("\tnnode_sz:      %d\n", c->nnode_sz);
 	pr_err("\tltab_sz:       %d\n", c->ltab_sz);
 	pr_err("\tlsave_sz:      %d\n", c->lsave_sz);
-	pr_err("\tbig_lpt:       %d\n", c->big_lpt);
+	pr_err("\tbig_lpt:       %u\n", c->big_lpt);
 	pr_err("\tlpt_hght:      %d\n", c->lpt_hght);
 	pr_err("\tpnode_cnt:     %d\n", c->pnode_cnt);
 	pr_err("\tnnode_cnt:     %d\n", c->nnode_cnt);
@@ -791,22 +828,6 @@ void ubifs_dump_lpt_info(struct ubifs_info *c)
 	spin_unlock(&dbg_lock);
 }
 
-void ubifs_dump_sleb(const struct ubifs_info *c,
-		     const struct ubifs_scan_leb *sleb, int offs)
-{
-	struct ubifs_scan_node *snod;
-
-	pr_err("(pid %d) start dumping scanned data from LEB %d:%d\n",
-	       current->pid, sleb->lnum, offs);
-
-	list_for_each_entry(snod, &sleb->nodes, list) {
-		cond_resched();
-		pr_err("Dumping node at LEB %d:%d len %d\n",
-		       sleb->lnum, snod->offs, snod->len);
-		ubifs_dump_node(c, snod->node);
-	}
-}
-
 void ubifs_dump_leb(const struct ubifs_info *c, int lnum)
 {
 	struct ubifs_scan_leb *sleb;
@@ -834,7 +855,7 @@ void ubifs_dump_leb(const struct ubifs_info *c, int lnum)
 		cond_resched();
 		pr_err("Dumping node at LEB %d:%d len %d\n", lnum,
 		       snod->offs, snod->len);
-		ubifs_dump_node(c, snod->node);
+		ubifs_dump_node(c, snod->node, c->leb_size - snod->offs);
 	}
 
 	pr_err("(pid %d) finish dumping LEB %d\n", current->pid, lnum);
@@ -1012,7 +1033,7 @@ void dbg_save_space_info(struct ubifs_info *c)
  *
  * This function compares current flash space information with the information
  * which was saved when the 'dbg_save_space_info()' function was called.
- * Returns zero if the information has not changed, and %-EINVAL it it has
+ * Returns zero if the information has not changed, and %-EINVAL if it has
  * changed.
  */
 int dbg_check_space_info(struct ubifs_info *c)
@@ -1212,7 +1233,7 @@ static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1,
 		ubifs_err(c, "but it should have key %s according to tnc",
 			  dbg_snprintf_key(c, &zbr1->key, key_buf,
 					   DBG_KEY_BUF_LEN));
-		ubifs_dump_node(c, dent1);
+		ubifs_dump_node(c, dent1, UBIFS_MAX_DENT_NODE_SZ);
 		goto out_free;
 	}
 
@@ -1224,7 +1245,7 @@ static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1,
 		ubifs_err(c, "but it should have key %s according to tnc",
 			  dbg_snprintf_key(c, &zbr2->key, key_buf,
 					   DBG_KEY_BUF_LEN));
-		ubifs_dump_node(c, dent2);
+		ubifs_dump_node(c, dent2, UBIFS_MAX_DENT_NODE_SZ);
 		goto out_free;
 	}
 
@@ -1243,9 +1264,9 @@ static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1,
 			  dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN));
 
 	ubifs_msg(c, "first node at %d:%d\n", zbr1->lnum, zbr1->offs);
-	ubifs_dump_node(c, dent1);
+	ubifs_dump_node(c, dent1, UBIFS_MAX_DENT_NODE_SZ);
 	ubifs_msg(c, "second node at %d:%d\n", zbr2->lnum, zbr2->offs);
-	ubifs_dump_node(c, dent2);
+	ubifs_dump_node(c, dent2, UBIFS_MAX_DENT_NODE_SZ);
 
 out_free:
 	kfree(dent2);
@@ -2110,7 +2131,7 @@ out:
 
 out_dump:
 	ubifs_msg(c, "dump of node at LEB %d:%d", zbr->lnum, zbr->offs);
-	ubifs_dump_node(c, node);
+	ubifs_dump_node(c, node, zbr->len);
 out_free:
 	kfree(node);
 	return err;
@@ -2243,7 +2264,7 @@ out_dump:
 
 	ubifs_msg(c, "dump of the inode %lu sitting in LEB %d:%d",
 		  (unsigned long)fscki->inum, zbr->lnum, zbr->offs);
-	ubifs_dump_node(c, ino);
+	ubifs_dump_node(c, ino, zbr->len);
 	kfree(ino);
 	return -EINVAL;
 }
@@ -2314,12 +2335,12 @@ int dbg_check_data_nodes_order(struct ubifs_info *c, struct list_head *head)
 
 		if (sa->type != UBIFS_DATA_NODE) {
 			ubifs_err(c, "bad node type %d", sa->type);
-			ubifs_dump_node(c, sa->node);
+			ubifs_dump_node(c, sa->node, c->leb_size - sa->offs);
 			return -EINVAL;
 		}
 		if (sb->type != UBIFS_DATA_NODE) {
 			ubifs_err(c, "bad node type %d", sb->type);
-			ubifs_dump_node(c, sb->node);
+			ubifs_dump_node(c, sb->node, c->leb_size - sb->offs);
 			return -EINVAL;
 		}
 
@@ -2350,8 +2371,8 @@ int dbg_check_data_nodes_order(struct ubifs_info *c, struct list_head *head)
 	return 0;
 
 error_dump:
-	ubifs_dump_node(c, sa->node);
-	ubifs_dump_node(c, sb->node);
+	ubifs_dump_node(c, sa->node, c->leb_size - sa->offs);
+	ubifs_dump_node(c, sb->node, c->leb_size - sb->offs);
 	return -EINVAL;
 }
 
@@ -2382,13 +2403,13 @@ int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head)
 		if (sa->type != UBIFS_INO_NODE && sa->type != UBIFS_DENT_NODE &&
 		    sa->type != UBIFS_XENT_NODE) {
 			ubifs_err(c, "bad node type %d", sa->type);
-			ubifs_dump_node(c, sa->node);
+			ubifs_dump_node(c, sa->node, c->leb_size - sa->offs);
 			return -EINVAL;
 		}
 		if (sb->type != UBIFS_INO_NODE && sb->type != UBIFS_DENT_NODE &&
 		    sb->type != UBIFS_XENT_NODE) {
 			ubifs_err(c, "bad node type %d", sb->type);
-			ubifs_dump_node(c, sb->node);
+			ubifs_dump_node(c, sb->node, c->leb_size - sb->offs);
 			return -EINVAL;
 		}
 
@@ -2438,11 +2459,10 @@ int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head)
 
 error_dump:
 	ubifs_msg(c, "dumping first node");
-	ubifs_dump_node(c, sa->node);
+	ubifs_dump_node(c, sa->node, c->leb_size - sa->offs);
 	ubifs_msg(c, "dumping second node");
-	ubifs_dump_node(c, sb->node);
+	ubifs_dump_node(c, sb->node, c->leb_size - sb->offs);
 	return -EINVAL;
-	return 0;
 }
 
 static inline int chance(unsigned int n, unsigned int out_of)
diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h
index 7763639a426b..ed966108da80 100644
--- a/fs/ubifs/debug.h
+++ b/fs/ubifs/debug.h
@@ -242,7 +242,8 @@ const char *dbg_get_key_dump(const struct ubifs_info *c,
 const char *dbg_snprintf_key(const struct ubifs_info *c,
 			     const union ubifs_key *key, char *buffer, int len);
 void ubifs_dump_inode(struct ubifs_info *c, const struct inode *inode);
-void ubifs_dump_node(const struct ubifs_info *c, const void *node);
+void ubifs_dump_node(const struct ubifs_info *c, const void *node,
+		     int node_len);
 void ubifs_dump_budget_req(const struct ubifs_budget_req *req);
 void ubifs_dump_lstats(const struct ubifs_lp_stats *lst);
 void ubifs_dump_budg(struct ubifs_info *c, const struct ubifs_budg_info *bi);
@@ -251,8 +252,6 @@ void ubifs_dump_lprop(const struct ubifs_info *c,
 void ubifs_dump_lprops(struct ubifs_info *c);
 void ubifs_dump_lpt_info(struct ubifs_info *c);
 void ubifs_dump_leb(const struct ubifs_info *c, int lnum);
-void ubifs_dump_sleb(const struct ubifs_info *c,
-		     const struct ubifs_scan_leb *sleb, int offs);
 void ubifs_dump_znode(const struct ubifs_info *c,
 		      const struct ubifs_znode *znode);
 void ubifs_dump_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap,
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c
index 1f33a5598b93..9a6b8660425a 100644
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -203,6 +203,7 @@ static struct dentry *ubifs_lookup(struct inode *dir, struct dentry *dentry,
 	dbg_gen("'%pd' in dir ino %lu", dentry, dir->i_ino);
 
 	err = fscrypt_prepare_lookup(dir, dentry, &nm);
+	generic_set_encrypted_ci_d_ops(dentry);
 	if (err == -ENOENT)
 		return d_splice_alias(NULL, dentry);
 	if (err)
@@ -843,7 +844,7 @@ out_fname:
  *
  * This function checks if directory @dir is empty. Returns zero if the
  * directory is empty, %-ENOTEMPTY if it is not, and other negative error codes
- * in case of of errors.
+ * in case of errors.
  */
 int ubifs_check_dir_empty(struct inode *dir)
 {
@@ -1631,9 +1632,7 @@ const struct inode_operations ubifs_dir_inode_operations = {
 	.rename      = ubifs_rename,
 	.setattr     = ubifs_setattr,
 	.getattr     = ubifs_getattr,
-#ifdef CONFIG_UBIFS_FS_XATTR
 	.listxattr   = ubifs_listxattr,
-#endif
 	.update_time = ubifs_update_time,
 	.tmpfile     = ubifs_tmpfile,
 };
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index b77d1637bbbc..2bc7780d2963 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -92,7 +92,7 @@ static int read_block(struct inode *inode, void *addr, unsigned int block,
 dump:
 	ubifs_err(c, "bad data node (block %u, inode %lu)",
 		  block, inode->i_ino);
-	ubifs_dump_node(c, dn);
+	ubifs_dump_node(c, dn, UBIFS_MAX_DATA_NODE_SZ);
 	return -EINVAL;
 }
 
@@ -205,7 +205,7 @@ static void release_new_page_budget(struct ubifs_info *c)
  * @c: UBIFS file-system description object
  *
  * This is a helper function which releases budget corresponding to the budget
- * of changing one one page of data which already exists on the flash media.
+ * of changing one page of data which already exists on the flash media.
  */
 static void release_existing_page_budget(struct ubifs_info *c)
 {
@@ -1645,9 +1645,7 @@ const struct address_space_operations ubifs_file_address_operations = {
 const struct inode_operations ubifs_file_inode_operations = {
 	.setattr     = ubifs_setattr,
 	.getattr     = ubifs_getattr,
-#ifdef CONFIG_UBIFS_FS_XATTR
 	.listxattr   = ubifs_listxattr,
-#endif
 	.update_time = ubifs_update_time,
 };
 
@@ -1655,9 +1653,7 @@ const struct inode_operations ubifs_symlink_inode_operations = {
 	.get_link    = ubifs_get_link,
 	.setattr     = ubifs_setattr,
 	.getattr     = ubifs_getattr,
-#ifdef CONFIG_UBIFS_FS_XATTR
 	.listxattr   = ubifs_listxattr,
-#endif
 	.update_time = ubifs_update_time,
 };
 
diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c
index 7e4bfaf2871f..00b61dba62b7 100644
--- a/fs/ubifs/io.c
+++ b/fs/ubifs/io.c
@@ -198,6 +198,7 @@ int ubifs_is_mapped(const struct ubifs_info *c, int lnum)
  * ubifs_check_node - check node.
  * @c: UBIFS file-system description object
  * @buf: node to check
+ * @len: node length
  * @lnum: logical eraseblock number
  * @offs: offset within the logical eraseblock
  * @quiet: print no messages
@@ -222,10 +223,10 @@ int ubifs_is_mapped(const struct ubifs_info *c, int lnum)
  * This function returns zero in case of success and %-EUCLEAN in case of bad
  * CRC or magic.
  */
-int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum,
-		     int offs, int quiet, int must_chk_crc)
+int ubifs_check_node(const struct ubifs_info *c, const void *buf, int len,
+		     int lnum, int offs, int quiet, int must_chk_crc)
 {
-	int err = -EINVAL, type, node_len, dump_node = 1;
+	int err = -EINVAL, type, node_len;
 	uint32_t crc, node_crc, magic;
 	const struct ubifs_ch *ch = buf;
 
@@ -278,22 +279,10 @@ int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum,
 out_len:
 	if (!quiet)
 		ubifs_err(c, "bad node length %d", node_len);
-	if (type == UBIFS_DATA_NODE && node_len > UBIFS_DATA_NODE_SZ)
-		dump_node = 0;
 out:
 	if (!quiet) {
 		ubifs_err(c, "bad node at LEB %d:%d", lnum, offs);
-		if (dump_node) {
-			ubifs_dump_node(c, buf);
-		} else {
-			int safe_len = min3(node_len, c->leb_size - offs,
-				(int)UBIFS_MAX_DATA_NODE_SZ);
-			pr_err("\tprevent out-of-bounds memory access\n");
-			pr_err("\ttruncated data node length      %d\n", safe_len);
-			pr_err("\tcorrupted data node:\n");
-			print_hex_dump(KERN_ERR, "\t", DUMP_PREFIX_OFFSET, 32, 1,
-					buf, safe_len, 0);
-		}
+		ubifs_dump_node(c, buf, len);
 		dump_stack();
 	}
 	return err;
@@ -319,7 +308,7 @@ void ubifs_pad(const struct ubifs_info *c, void *buf, int pad)
 {
 	uint32_t crc;
 
-	ubifs_assert(c, pad >= 0 && !(pad & 7));
+	ubifs_assert(c, pad >= 0);
 
 	if (pad >= UBIFS_PAD_NODE_SZ) {
 		struct ubifs_ch *ch = buf;
@@ -730,7 +719,7 @@ out_timers:
 int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
 {
 	struct ubifs_info *c = wbuf->c;
-	int err, written, n, aligned_len = ALIGN(len, 8);
+	int err, n, written = 0, aligned_len = ALIGN(len, 8);
 
 	dbg_io("%d bytes (%s) to jhead %s wbuf at LEB %d:%d", len,
 	       dbg_ntype(((struct ubifs_ch *)buf)->node_type),
@@ -764,6 +753,10 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
 		 * write-buffer.
 		 */
 		memcpy(wbuf->buf + wbuf->used, buf, len);
+		if (aligned_len > len) {
+			ubifs_assert(c, aligned_len - len < 8);
+			ubifs_pad(c, wbuf->buf + wbuf->used + len, aligned_len - len);
+		}
 
 		if (aligned_len == wbuf->avail) {
 			dbg_io("flush jhead %s wbuf to LEB %d:%d",
@@ -793,8 +786,6 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
 		goto exit;
 	}
 
-	written = 0;
-
 	if (wbuf->used) {
 		/*
 		 * The node is large enough and does not fit entirely within
@@ -856,13 +847,18 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len)
 	}
 
 	spin_lock(&wbuf->lock);
-	if (aligned_len)
+	if (aligned_len) {
 		/*
 		 * And now we have what's left and what does not take whole
 		 * max. write unit, so write it to the write-buffer and we are
 		 * done.
 		 */
 		memcpy(wbuf->buf, buf + written, len);
+		if (aligned_len > len) {
+			ubifs_assert(c, aligned_len - len < 8);
+			ubifs_pad(c, wbuf->buf + len, aligned_len - len);
+		}
+	}
 
 	if (c->leb_size - wbuf->offs >= c->max_write_size)
 		wbuf->size = c->max_write_size;
@@ -890,7 +886,7 @@ exit:
 out:
 	ubifs_err(c, "cannot write %d bytes to LEB %d:%d, error %d",
 		  len, wbuf->lnum, wbuf->offs, err);
-	ubifs_dump_node(c, buf);
+	ubifs_dump_node(c, buf, written + len);
 	dump_stack();
 	ubifs_dump_leb(c, wbuf->lnum);
 	return err;
@@ -933,7 +929,7 @@ int ubifs_write_node_hmac(struct ubifs_info *c, void *buf, int len, int lnum,
 
 	err = ubifs_leb_write(c, lnum, buf, offs, buf_len);
 	if (err)
-		ubifs_dump_node(c, buf);
+		ubifs_dump_node(c, buf, len);
 
 	return err;
 }
@@ -1016,7 +1012,7 @@ int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len,
 		goto out;
 	}
 
-	err = ubifs_check_node(c, buf, lnum, offs, 0, 0);
+	err = ubifs_check_node(c, buf, len, lnum, offs, 0, 0);
 	if (err) {
 		ubifs_err(c, "expected node type %d", type);
 		return err;
@@ -1032,7 +1028,7 @@ int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len,
 
 out:
 	ubifs_err(c, "bad node at LEB %d:%d", lnum, offs);
-	ubifs_dump_node(c, buf);
+	ubifs_dump_node(c, buf, len);
 	dump_stack();
 	return -EINVAL;
 }
@@ -1046,7 +1042,7 @@ out:
  * @lnum: logical eraseblock number
  * @offs: offset within the logical eraseblock
  *
- * This function reads a node of known type and and length, checks it and
+ * This function reads a node of known type and length, checks it and
  * stores in @buf. Returns zero in case of success, %-EUCLEAN if CRC mismatched
  * and a negative error code in case of failure.
  */
@@ -1072,7 +1068,7 @@ int ubifs_read_node(const struct ubifs_info *c, void *buf, int type, int len,
 		goto out;
 	}
 
-	err = ubifs_check_node(c, buf, lnum, offs, 0, 0);
+	err = ubifs_check_node(c, buf, len, lnum, offs, 0, 0);
 	if (err) {
 		ubifs_errc(c, "expected node type %d", type);
 		return err;
@@ -1090,7 +1086,7 @@ out:
 	ubifs_errc(c, "bad node at LEB %d:%d, LEB mapping status %d", lnum,
 		   offs, ubi_is_mapped(c->ubi, lnum));
 	if (!c->probing) {
-		ubifs_dump_node(c, buf);
+		ubifs_dump_node(c, buf, len);
 		dump_stack();
 	}
 	return -EINVAL;
diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c
index 091c2ad8f211..03410ae0813a 100644
--- a/fs/ubifs/journal.c
+++ b/fs/ubifs/journal.c
@@ -1559,7 +1559,8 @@ int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode,
 			if (dn_len <= 0 || dn_len > UBIFS_BLOCK_SIZE) {
 				ubifs_err(c, "bad data node (block %u, inode %lu)",
 					  blk, inode->i_ino);
-				ubifs_dump_node(c, dn);
+				ubifs_dump_node(c, dn, sz - UBIFS_INO_NODE_SZ -
+						UBIFS_TRUN_NODE_SZ);
 				goto out_free;
 			}
 
diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c
index 6e0a153b7194..778a22bf9a92 100644
--- a/fs/ubifs/lpt.c
+++ b/fs/ubifs/lpt.c
@@ -851,7 +851,7 @@ int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first,
 	dbg_lp("lsave_sz %d", c->lsave_sz);
 	dbg_lp("lsave_cnt %d", c->lsave_cnt);
 	dbg_lp("lpt_hght %d", c->lpt_hght);
-	dbg_lp("big_lpt %d", c->big_lpt);
+	dbg_lp("big_lpt %u", c->big_lpt);
 	dbg_lp("LPT root is at %d:%d", c->lpt_lnum, c->lpt_offs);
 	dbg_lp("LPT head is at %d:%d", c->nhead_lnum, c->nhead_offs);
 	dbg_lp("LPT ltab is at %d:%d", c->ltab_lnum, c->ltab_offs);
@@ -1824,7 +1824,7 @@ static int lpt_init_rd(struct ubifs_info *c)
 	dbg_lp("lsave_sz %d", c->lsave_sz);
 	dbg_lp("lsave_cnt %d", c->lsave_cnt);
 	dbg_lp("lpt_hght %d", c->lpt_hght);
-	dbg_lp("big_lpt %d", c->big_lpt);
+	dbg_lp("big_lpt %u", c->big_lpt);
 	dbg_lp("LPT root is at %d:%d", c->lpt_lnum, c->lpt_offs);
 	dbg_lp("LPT head is at %d:%d", c->nhead_lnum, c->nhead_offs);
 	dbg_lp("LPT ltab is at %d:%d", c->ltab_lnum, c->ltab_offs);
diff --git a/fs/ubifs/master.c b/fs/ubifs/master.c
index 911d0555b9f2..0df9a3dd0aaa 100644
--- a/fs/ubifs/master.c
+++ b/fs/ubifs/master.c
@@ -314,7 +314,7 @@ static int validate_master(const struct ubifs_info *c)
 
 out:
 	ubifs_err(c, "bad master node at offset %d error %d", c->mst_offs, err);
-	ubifs_dump_node(c, c->mst_node);
+	ubifs_dump_node(c, c->mst_node, c->mst_node_alsz);
 	return -EINVAL;
 }
 
@@ -392,7 +392,7 @@ int ubifs_read_master(struct ubifs_info *c)
 		if (c->leb_cnt < old_leb_cnt ||
 		    c->leb_cnt < UBIFS_MIN_LEB_CNT) {
 			ubifs_err(c, "bad leb_cnt on master node");
-			ubifs_dump_node(c, c->mst_node);
+			ubifs_dump_node(c, c->mst_node, c->mst_node_alsz);
 			return -EINVAL;
 		}
 
diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c
index 0fb61956146d..4909321d84cf 100644
--- a/fs/ubifs/orphan.c
+++ b/fs/ubifs/orphan.c
@@ -646,7 +646,8 @@ static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
 		if (snod->type != UBIFS_ORPH_NODE) {
 			ubifs_err(c, "invalid node type %d in orphan area at %d:%d",
 				  snod->type, sleb->lnum, snod->offs);
-			ubifs_dump_node(c, snod->node);
+			ubifs_dump_node(c, snod->node,
+					c->leb_size - snod->offs);
 			err = -EINVAL;
 			goto out_free;
 		}
@@ -674,7 +675,8 @@ static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb,
 			if (!first) {
 				ubifs_err(c, "out of order commit number %llu in orphan node at %d:%d",
 					  cmt_no, sleb->lnum, snod->offs);
-				ubifs_dump_node(c, snod->node);
+				ubifs_dump_node(c, snod->node,
+						c->leb_size - snod->offs);
 				err = -EINVAL;
 				goto out_free;
 			}
diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c
index f116f7b3f9e5..f0d51dd21c9e 100644
--- a/fs/ubifs/recovery.c
+++ b/fs/ubifs/recovery.c
@@ -352,11 +352,11 @@ out_free:
 	ubifs_err(c, "failed to recover master node");
 	if (mst1) {
 		ubifs_err(c, "dumping first master node");
-		ubifs_dump_node(c, mst1);
+		ubifs_dump_node(c, mst1, c->leb_size - ((void *)mst1 - buf1));
 	}
 	if (mst2) {
 		ubifs_err(c, "dumping second master node");
-		ubifs_dump_node(c, mst2);
+		ubifs_dump_node(c, mst2, c->leb_size - ((void *)mst2 - buf2));
 	}
 	vfree(buf2);
 	vfree(buf1);
@@ -469,7 +469,7 @@ static int no_more_nodes(const struct ubifs_info *c, void *buf, int len,
 	 * The area after the common header size is not empty, so the common
 	 * header must be intact. Check it.
 	 */
-	if (ubifs_check_node(c, buf, lnum, offs, 1, 0) != -EUCLEAN) {
+	if (ubifs_check_node(c, buf, len, lnum, offs, 1, 0) != -EUCLEAN) {
 		dbg_rcvry("unexpected bad common header at %d:%d", lnum, offs);
 		return 0;
 	}
diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c
index 2f8d8f4f411a..79801c9a5b87 100644
--- a/fs/ubifs/replay.c
+++ b/fs/ubifs/replay.c
@@ -574,7 +574,7 @@ static int authenticate_sleb_hash(struct ubifs_info *c, struct shash_desc *log_h
  * @c: UBIFS file-system description object
  * @sleb: the scan LEB to authenticate
  * @log_hash:
- * @is_last: if true, this is is the last LEB
+ * @is_last: if true, this is the last LEB
  *
  * This function iterates over the buds of a single LEB authenticating all buds
  * with the authentication nodes on this LEB. Authentication nodes are written
@@ -827,7 +827,7 @@ out:
 
 out_dump:
 	ubifs_err(c, "bad node is at LEB %d:%d", lnum, snod->offs);
-	ubifs_dump_node(c, snod->node);
+	ubifs_dump_node(c, snod->node, c->leb_size - snod->offs);
 	ubifs_scan_destroy(sleb);
 	return -EINVAL;
 }
@@ -1123,7 +1123,7 @@ out:
 out_dump:
 	ubifs_err(c, "log error detected while replaying the log at LEB %d:%d",
 		  lnum, offs + snod->offs);
-	ubifs_dump_node(c, snod->node);
+	ubifs_dump_node(c, snod->node, c->leb_size - snod->offs);
 	ubifs_scan_destroy(sleb);
 	return -EINVAL;
 }
diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c
index c0d3e4008d23..c160f718c288 100644
--- a/fs/ubifs/sb.c
+++ b/fs/ubifs/sb.c
@@ -503,7 +503,7 @@ static int validate_sb(struct ubifs_info *c, struct ubifs_sb_node *sup)
 
 failed:
 	ubifs_err(c, "bad superblock, error %d", err);
-	ubifs_dump_node(c, sup);
+	ubifs_dump_node(c, sup, ALIGN(UBIFS_SB_NODE_SZ, c->min_io_size));
 	return -EINVAL;
 }
 
diff --git a/fs/ubifs/scan.c b/fs/ubifs/scan.c
index c69cdb5e65bc..84a9157dcc32 100644
--- a/fs/ubifs/scan.c
+++ b/fs/ubifs/scan.c
@@ -76,7 +76,7 @@ int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum,
 	dbg_scan("scanning %s at LEB %d:%d",
 		 dbg_ntype(ch->node_type), lnum, offs);
 
-	if (ubifs_check_node(c, buf, lnum, offs, quiet, 1))
+	if (ubifs_check_node(c, buf, len, lnum, offs, quiet, 1))
 		return SCANNED_A_CORRUPT_NODE;
 
 	if (ch->node_type == UBIFS_PAD_NODE) {
@@ -90,7 +90,7 @@ int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum,
 			if (!quiet) {
 				ubifs_err(c, "bad pad node at LEB %d:%d",
 					  lnum, offs);
-				ubifs_dump_node(c, pad);
+				ubifs_dump_node(c, pad, len);
 			}
 			return SCANNED_A_BAD_PAD_NODE;
 		}
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index cb3acfb7dd1f..138b9426c6c1 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -253,7 +253,7 @@ struct inode *ubifs_iget(struct super_block *sb, unsigned long inum)
 
 out_invalid:
 	ubifs_err(c, "inode %lu validation failed, error %d", inode->i_ino, err);
-	ubifs_dump_node(c, ino);
+	ubifs_dump_node(c, ino, UBIFS_MAX_INO_NODE_SZ);
 	ubifs_dump_inode(c, inode);
 	err = -EINVAL;
 out_ino:
@@ -1572,7 +1572,7 @@ static int mount_ubifs(struct ubifs_info *c)
 	dbg_gen("main area LEBs:      %d (%d - %d)",
 		c->main_lebs, c->main_first, c->leb_cnt - 1);
 	dbg_gen("index LEBs:          %d", c->lst.idx_lebs);
-	dbg_gen("total index bytes:   %lld (%lld KiB, %lld MiB)",
+	dbg_gen("total index bytes:   %llu (%llu KiB, %llu MiB)",
 		c->bi.old_idx_sz, c->bi.old_idx_sz >> 10,
 		c->bi.old_idx_sz >> 20);
 	dbg_gen("key hash type:       %d", c->key_hash_type);
@@ -2207,9 +2207,7 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent)
 	if (c->max_inode_sz > MAX_LFS_FILESIZE)
 		sb->s_maxbytes = c->max_inode_sz = MAX_LFS_FILESIZE;
 	sb->s_op = &ubifs_super_operations;
-#ifdef CONFIG_UBIFS_FS_XATTR
 	sb->s_xattr = ubifs_xattr_handlers;
-#endif
 	fscrypt_set_ops(sb, &ubifs_crypt_operations);
 
 	mutex_lock(&c->umount_mutex);
diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c
index 894f1ab14616..488f3da7a6c6 100644
--- a/fs/ubifs/tnc.c
+++ b/fs/ubifs/tnc.c
@@ -316,7 +316,7 @@ static int lnc_add(struct ubifs_info *c, struct ubifs_zbranch *zbr,
 	err = ubifs_validate_entry(c, dent);
 	if (err) {
 		dump_stack();
-		ubifs_dump_node(c, dent);
+		ubifs_dump_node(c, dent, zbr->len);
 		return err;
 	}
 
@@ -349,7 +349,7 @@ static int lnc_add_directly(struct ubifs_info *c, struct ubifs_zbranch *zbr,
 	err = ubifs_validate_entry(c, node);
 	if (err) {
 		dump_stack();
-		ubifs_dump_node(c, node);
+		ubifs_dump_node(c, node, zbr->len);
 		return err;
 	}
 
@@ -377,7 +377,7 @@ static void lnc_free(struct ubifs_zbranch *zbr)
  *
  * This function reads a "hashed" node defined by @zbr from the leaf node cache
  * (in it is there) or from the hash media, in which case the node is also
- * added to LNC. Returns zero in case of success or a negative negative error
+ * added to LNC. Returns zero in case of success or a negative error
  * code in case of failure.
  */
 static int tnc_read_hashed_node(struct ubifs_info *c, struct ubifs_zbranch *zbr,
@@ -1699,7 +1699,7 @@ static int validate_data_node(struct ubifs_info *c, void *buf,
 		goto out_err;
 	}
 
-	err = ubifs_check_node(c, buf, zbr->lnum, zbr->offs, 0, 0);
+	err = ubifs_check_node(c, buf, zbr->len, zbr->lnum, zbr->offs, 0, 0);
 	if (err) {
 		ubifs_err(c, "expected node type %d", UBIFS_DATA_NODE);
 		goto out;
@@ -1733,7 +1733,7 @@ out_err:
 	err = -EINVAL;
 out:
 	ubifs_err(c, "bad node at LEB %d:%d", zbr->lnum, zbr->offs);
-	ubifs_dump_node(c, buf);
+	ubifs_dump_node(c, buf, zbr->len);
 	dump_stack();
 	return err;
 }
diff --git a/fs/ubifs/tnc_misc.c b/fs/ubifs/tnc_misc.c
index ccaf94ea5be3..4d686e34e64d 100644
--- a/fs/ubifs/tnc_misc.c
+++ b/fs/ubifs/tnc_misc.c
@@ -390,7 +390,7 @@ static int read_znode(struct ubifs_info *c, struct ubifs_zbranch *zzbr,
 
 out_dump:
 	ubifs_err(c, "bad indexing node at LEB %d:%d, error %d", lnum, offs, err);
-	ubifs_dump_node(c, idx);
+	ubifs_dump_node(c, idx, c->max_idx_node_sz);
 	kfree(idx);
 	return -EINVAL;
 }
@@ -455,8 +455,7 @@ out:
  * @node: node is returned here
  *
  * This function reads a node defined by @zbr from the flash media. Returns
- * zero in case of success or a negative negative error code in case of
- * failure.
+ * zero in case of success or a negative error code in case of failure.
  */
 int ubifs_tnc_read_node(struct ubifs_info *c, struct ubifs_zbranch *zbr,
 			void *node)
@@ -489,7 +488,7 @@ int ubifs_tnc_read_node(struct ubifs_info *c, struct ubifs_zbranch *zbr,
 			  zbr->lnum, zbr->offs);
 		dbg_tnck(key, "looked for key ");
 		dbg_tnck(&key1, "but found node's key ");
-		ubifs_dump_node(c, node);
+		ubifs_dump_node(c, node, zbr->len);
 		return -EINVAL;
 	}
 
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index 4ffd832e3b93..fc2cdde3b549 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -1719,8 +1719,8 @@ int ubifs_write_node(struct ubifs_info *c, void *node, int len, int lnum,
 		     int offs);
 int ubifs_write_node_hmac(struct ubifs_info *c, void *buf, int len, int lnum,
 			  int offs, int hmac_offs);
-int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum,
-		     int offs, int quiet, int must_chk_crc);
+int ubifs_check_node(const struct ubifs_info *c, const void *buf, int len,
+		     int lnum, int offs, int quiet, int must_chk_crc);
 void ubifs_init_node(struct ubifs_info *c, void *buf, int len, int pad);
 void ubifs_crc_node(struct ubifs_info *c, void *buf, int len);
 void ubifs_prepare_node(struct ubifs_info *c, void *buf, int len, int pad);
@@ -2000,17 +2000,19 @@ int ubifs_getattr(const struct path *path, struct kstat *stat,
 int ubifs_check_dir_empty(struct inode *dir);
 
 /* xattr.c */
-extern const struct xattr_handler *ubifs_xattr_handlers[];
-ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size);
 int ubifs_xattr_set(struct inode *host, const char *name, const void *value,
 		    size_t size, int flags, bool check_lock);
 ssize_t ubifs_xattr_get(struct inode *host, const char *name, void *buf,
 			size_t size);
 
 #ifdef CONFIG_UBIFS_FS_XATTR
+extern const struct xattr_handler *ubifs_xattr_handlers[];
+ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size);
 void ubifs_evict_xattr_inode(struct ubifs_info *c, ino_t xattr_inum);
 int ubifs_purge_xattrs(struct inode *host);
 #else
+#define ubifs_listxattr NULL
+#define ubifs_xattr_handlers NULL
 static inline void ubifs_evict_xattr_inode(struct ubifs_info *c,
 					   ino_t xattr_inum) { }
 static inline int ubifs_purge_xattrs(struct inode *host)
diff --git a/fs/xattr.c b/fs/xattr.c
index cd7a563e8bcd..fd57153b1f61 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -276,8 +276,16 @@ vfs_setxattr(struct dentry *dentry, const char *name, const void *value,
 {
 	struct inode *inode = dentry->d_inode;
 	struct inode *delegated_inode = NULL;
+	const void  *orig_value = value;
 	int error;
 
+	if (size && strcmp(name, XATTR_NAME_CAPS) == 0) {
+		error = cap_convert_nscap(dentry, &value, size);
+		if (error < 0)
+			return error;
+		size = error;
+	}
+
 retry_deleg:
 	inode_lock(inode);
 	error = __vfs_setxattr_locked(dentry, name, value, size, flags,
@@ -289,6 +297,9 @@ retry_deleg:
 		if (!error)
 			goto retry_deleg;
 	}
+	if (value != orig_value)
+		kfree(value);
+
 	return error;
 }
 EXPORT_SYMBOL_GPL(vfs_setxattr);
@@ -537,12 +548,6 @@ setxattr(struct dentry *d, const char __user *name, const void __user *value,
 		if ((strcmp(kname, XATTR_NAME_POSIX_ACL_ACCESS) == 0) ||
 		    (strcmp(kname, XATTR_NAME_POSIX_ACL_DEFAULT) == 0))
 			posix_acl_fix_xattr_from_user(kvalue, size);
-		else if (strcmp(kname, XATTR_NAME_CAPS) == 0) {
-			error = cap_convert_nscap(d, &kvalue, size);
-			if (error < 0)
-				goto out;
-			size = error;
-		}
 	}
 
 	error = vfs_setxattr(d, kname, kvalue, size, flags);
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 15640015be9d..7cb9f064ac64 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -690,9 +690,9 @@ xfs_alloc_read_agfl(
 	xfs_mount_t	*mp,		/* mount point structure */
 	xfs_trans_t	*tp,		/* transaction pointer */
 	xfs_agnumber_t	agno,		/* allocation group number */
-	xfs_buf_t	**bpp)		/* buffer for the ag free block array */
+	struct xfs_buf	**bpp)		/* buffer for the ag free block array */
 {
-	xfs_buf_t	*bp;		/* return value */
+	struct xfs_buf	*bp;		/* return value */
 	int		error;
 
 	ASSERT(agno != NULLAGNUMBER);
@@ -2647,12 +2647,12 @@ out_no_agbp:
 int				/* error */
 xfs_alloc_get_freelist(
 	xfs_trans_t	*tp,	/* transaction pointer */
-	xfs_buf_t	*agbp,	/* buffer containing the agf structure */
+	struct xfs_buf	*agbp,	/* buffer containing the agf structure */
 	xfs_agblock_t	*bnop,	/* block address retrieved from freelist */
 	int		btreeblk) /* destination is a AGF btree */
 {
 	struct xfs_agf	*agf = agbp->b_addr;
-	xfs_buf_t	*agflbp;/* buffer for a.g. freelist structure */
+	struct xfs_buf	*agflbp;/* buffer for a.g. freelist structure */
 	xfs_agblock_t	bno;	/* block number returned */
 	__be32		*agfl_bno;
 	int		error;
@@ -2711,7 +2711,7 @@ xfs_alloc_get_freelist(
 void
 xfs_alloc_log_agf(
 	xfs_trans_t	*tp,	/* transaction pointer */
-	xfs_buf_t	*bp,	/* buffer for a.g. freelist header */
+	struct xfs_buf	*bp,	/* buffer for a.g. freelist header */
 	int		fields)	/* mask of fields to be logged (XFS_AGF_...) */
 {
 	int	first;		/* first byte offset */
@@ -2757,7 +2757,7 @@ xfs_alloc_pagf_init(
 	xfs_agnumber_t		agno,	/* allocation group number */
 	int			flags)	/* XFS_ALLOC_FLAGS_... */
 {
-	xfs_buf_t		*bp;
+	struct xfs_buf		*bp;
 	int			error;
 
 	error = xfs_alloc_read_agf(mp, tp, agno, flags, &bp);
@@ -2772,8 +2772,8 @@ xfs_alloc_pagf_init(
 int					/* error */
 xfs_alloc_put_freelist(
 	xfs_trans_t		*tp,	/* transaction pointer */
-	xfs_buf_t		*agbp,	/* buffer for a.g. freelist header */
-	xfs_buf_t		*agflbp,/* buffer for a.g. free block array */
+	struct xfs_buf		*agbp,	/* buffer for a.g. freelist header */
+	struct xfs_buf		*agflbp,/* buffer for a.g. free block array */
 	xfs_agblock_t		bno,	/* block being freed */
 	int			btreeblk) /* block came from a AGF btree */
 {
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index d9a692484eae..bc446418e227 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -321,7 +321,7 @@ xfs_bmap_check_leaf_extents(
 	struct xfs_ifork	*ifp = XFS_IFORK_PTR(ip, whichfork);
 	struct xfs_btree_block	*block;	/* current btree block */
 	xfs_fsblock_t		bno;	/* block # of "block" */
-	xfs_buf_t		*bp;	/* buffer for "block" */
+	struct xfs_buf		*bp;	/* buffer for "block" */
 	int			error;	/* error return value */
 	xfs_extnum_t		i=0, j;	/* index into the extents list */
 	int			level;	/* btree level, for checking */
@@ -592,7 +592,7 @@ xfs_bmap_btree_to_extents(
 	struct xfs_btree_block	*rblock = ifp->if_broot;
 	struct xfs_btree_block	*cblock;/* child btree block */
 	xfs_fsblock_t		cbno;	/* child block number */
-	xfs_buf_t		*cbp;	/* child block's buffer */
+	struct xfs_buf		*cbp;	/* child block's buffer */
 	int			error;	/* error return value */
 	__be64			*pp;	/* ptr to block address */
 	struct xfs_owner_info	oinfo;
@@ -830,7 +830,7 @@ xfs_bmap_local_to_extents(
 	int		flags;		/* logging flags returned */
 	struct xfs_ifork *ifp;		/* inode fork pointer */
 	xfs_alloc_arg_t	args;		/* allocation arguments */
-	xfs_buf_t	*bp;		/* buffer for extent block */
+	struct xfs_buf	*bp;		/* buffer for extent block */
 	struct xfs_bmbt_irec rec;
 	struct xfs_iext_cursor icur;
 
@@ -6226,23 +6226,17 @@ xfs_bmap_validate_extent(
 	struct xfs_bmbt_irec	*irec)
 {
 	struct xfs_mount	*mp = ip->i_mount;
-	xfs_fsblock_t		endfsb;
-	bool			isrt;
 
-	isrt = XFS_IS_REALTIME_INODE(ip);
-	endfsb = irec->br_startblock + irec->br_blockcount - 1;
-	if (isrt && whichfork == XFS_DATA_FORK) {
-		if (!xfs_verify_rtbno(mp, irec->br_startblock))
-			return __this_address;
-		if (!xfs_verify_rtbno(mp, endfsb))
+	if (!xfs_verify_fileext(mp, irec->br_startoff, irec->br_blockcount))
+		return __this_address;
+
+	if (XFS_IS_REALTIME_INODE(ip) && whichfork == XFS_DATA_FORK) {
+		if (!xfs_verify_rtext(mp, irec->br_startblock,
+					  irec->br_blockcount))
 			return __this_address;
 	} else {
-		if (!xfs_verify_fsbno(mp, irec->br_startblock))
-			return __this_address;
-		if (!xfs_verify_fsbno(mp, endfsb))
-			return __this_address;
-		if (XFS_FSB_TO_AGNO(mp, irec->br_startblock) !=
-		    XFS_FSB_TO_AGNO(mp, endfsb))
+		if (!xfs_verify_fsbext(mp, irec->br_startblock,
+					   irec->br_blockcount))
 			return __this_address;
 	}
 	if (irec->br_state != XFS_EXT_NORM && whichfork != XFS_DATA_FORK)
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
index ecec604e6e4d..976659190d27 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c
@@ -639,8 +639,6 @@ xfs_bmbt_change_owner(
 	ASSERT(XFS_IFORK_PTR(ip, whichfork)->if_format == XFS_DINODE_FMT_BTREE);
 
 	cur = xfs_bmbt_init_cursor(ip->i_mount, tp, ip, whichfork);
-	if (!cur)
-		return -ENOMEM;
 	cur->bc_ino.flags |= XFS_BTCUR_BMBT_INVALID_OWNER;
 
 	error = xfs_btree_change_owner(cur, new_owner, buffer_list);
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index 2d25bab68764..c4d7a9241dc3 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -397,7 +397,7 @@ xfs_btree_dup_cursor(
 	xfs_btree_cur_t	*cur,		/* input cursor */
 	xfs_btree_cur_t	**ncur)		/* output cursor */
 {
-	xfs_buf_t	*bp;		/* btree block's buffer pointer */
+	struct xfs_buf	*bp;		/* btree block's buffer pointer */
 	int		error;		/* error return value */
 	int		i;		/* level number of btree block */
 	xfs_mount_t	*mp;		/* mount structure for filesystem */
@@ -701,7 +701,7 @@ xfs_btree_firstrec(
 	int			level)	/* level to change */
 {
 	struct xfs_btree_block	*block;	/* generic btree block pointer */
-	xfs_buf_t		*bp;	/* buffer containing block */
+	struct xfs_buf		*bp;	/* buffer containing block */
 
 	/*
 	 * Get the block pointer for this level.
@@ -731,7 +731,7 @@ xfs_btree_lastrec(
 	int			level)	/* level to change */
 {
 	struct xfs_btree_block	*block;	/* generic btree block pointer */
-	xfs_buf_t		*bp;	/* buffer containing block */
+	struct xfs_buf		*bp;	/* buffer containing block */
 
 	/*
 	 * Get the block pointer for this level.
@@ -993,7 +993,7 @@ STATIC void
 xfs_btree_setbuf(
 	xfs_btree_cur_t		*cur,	/* btree cursor */
 	int			lev,	/* level in btree */
-	xfs_buf_t		*bp)	/* new buffer to set */
+	struct xfs_buf		*bp)	/* new buffer to set */
 {
 	struct xfs_btree_block	*b;	/* btree block */
 
@@ -1636,7 +1636,7 @@ xfs_btree_decrement(
 	int			*stat)		/* success/failure */
 {
 	struct xfs_btree_block	*block;
-	xfs_buf_t		*bp;
+	struct xfs_buf		*bp;
 	int			error;		/* error return value */
 	int			lev;
 	union xfs_btree_ptr	ptr;
@@ -4070,7 +4070,7 @@ xfs_btree_delrec(
 	 * surviving block, and log it.
 	 */
 	xfs_btree_set_numrecs(left, lrecs + rrecs);
-	xfs_btree_get_sibling(cur, right, &cptr, XFS_BB_RIGHTSIB),
+	xfs_btree_get_sibling(cur, right, &cptr, XFS_BB_RIGHTSIB);
 	xfs_btree_set_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB);
 	xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
 
diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h
index dd764da08f6f..630388b72dbe 100644
--- a/fs/xfs/libxfs/xfs_format.h
+++ b/fs/xfs/libxfs/xfs_format.h
@@ -468,11 +468,13 @@ xfs_sb_has_ro_compat_feature(
 #define XFS_SB_FEAT_INCOMPAT_SPINODES	(1 << 1)	/* sparse inode chunks */
 #define XFS_SB_FEAT_INCOMPAT_META_UUID	(1 << 2)	/* metadata UUID */
 #define XFS_SB_FEAT_INCOMPAT_BIGTIME	(1 << 3)	/* large timestamps */
+#define XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR (1 << 4)	/* needs xfs_repair */
 #define XFS_SB_FEAT_INCOMPAT_ALL \
 		(XFS_SB_FEAT_INCOMPAT_FTYPE|	\
 		 XFS_SB_FEAT_INCOMPAT_SPINODES|	\
 		 XFS_SB_FEAT_INCOMPAT_META_UUID| \
-		 XFS_SB_FEAT_INCOMPAT_BIGTIME)
+		 XFS_SB_FEAT_INCOMPAT_BIGTIME| \
+		 XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR)
 
 #define XFS_SB_FEAT_INCOMPAT_UNKNOWN	~XFS_SB_FEAT_INCOMPAT_ALL
 static inline bool
@@ -584,6 +586,12 @@ static inline bool xfs_sb_version_hasinobtcounts(struct xfs_sb *sbp)
 		(sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_INOBTCNT);
 }
 
+static inline bool xfs_sb_version_needsrepair(struct xfs_sb *sbp)
+{
+	return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 &&
+		(sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR);
+}
+
 /*
  * end of superblock version macros
  */
@@ -625,7 +633,6 @@ xfs_is_quota_inode(struct xfs_sb *sbp, xfs_ino_t ino)
 #define XFS_B_TO_FSB(mp,b)	\
 	((((uint64_t)(b)) + (mp)->m_blockmask) >> (mp)->m_sb.sb_blocklog)
 #define XFS_B_TO_FSBT(mp,b)	(((uint64_t)(b)) >> (mp)->m_sb.sb_blocklog)
-#define XFS_B_FSB_OFFSET(mp,b)	((b) & (mp)->m_blockmask)
 
 /*
  * Allocation group header
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index 974e71bc4a3a..69b228fce81a 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -607,13 +607,13 @@ error:
 
 /*
  * Allocate new inodes in the allocation group specified by agbp.
- * Return 0 for success, else error code.
+ * Returns 0 if inodes were allocated in this AG; 1 if there was no space
+ * in this AG; or the usual negative error code.
  */
 STATIC int
 xfs_ialloc_ag_alloc(
 	struct xfs_trans	*tp,
-	struct xfs_buf		*agbp,
-	int			*alloc)
+	struct xfs_buf		*agbp)
 {
 	struct xfs_agi		*agi;
 	struct xfs_alloc_arg	args;
@@ -795,10 +795,9 @@ sparse_alloc:
 		allocmask = (1 << (newlen / XFS_INODES_PER_HOLEMASK_BIT)) - 1;
 	}
 
-	if (args.fsbno == NULLFSBLOCK) {
-		*alloc = 0;
-		return 0;
-	}
+	if (args.fsbno == NULLFSBLOCK)
+		return 1;
+
 	ASSERT(args.len == args.minlen);
 
 	/*
@@ -903,7 +902,6 @@ sparse_alloc:
 	 */
 	xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, (long)newlen);
 	xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, (long)newlen);
-	*alloc = 1;
 	return 0;
 }
 
@@ -1570,7 +1568,7 @@ xfs_dialloc_ag_update_inobt(
  * The caller selected an AG for us, and made sure that free inodes are
  * available.
  */
-STATIC int
+int
 xfs_dialloc_ag(
 	struct xfs_trans	*tp,
 	struct xfs_buf		*agbp,
@@ -1682,65 +1680,78 @@ error_cur:
 	return error;
 }
 
+static int
+xfs_dialloc_roll(
+	struct xfs_trans	**tpp,
+	struct xfs_buf		*agibp)
+{
+	struct xfs_trans	*tp = *tpp;
+	struct xfs_dquot_acct	*dqinfo;
+	int			error;
+
+	/*
+	 * Hold to on to the agibp across the commit so no other allocation can
+	 * come in and take the free inodes we just allocated for our caller.
+	 */
+	xfs_trans_bhold(tp, agibp);
+
+	/*
+	 * We want the quota changes to be associated with the next transaction,
+	 * NOT this one. So, detach the dqinfo from this and attach it to the
+	 * next transaction.
+	 */
+	dqinfo = tp->t_dqinfo;
+	tp->t_dqinfo = NULL;
+
+	error = xfs_trans_roll(&tp);
+
+	/* Re-attach the quota info that we detached from prev trx. */
+	tp->t_dqinfo = dqinfo;
+
+	*tpp = tp;
+	if (error)
+		return error;
+	xfs_trans_bjoin(tp, agibp);
+	return 0;
+}
+
 /*
- * Allocate an inode on disk.
- *
- * Mode is used to tell whether the new inode will need space, and whether it
- * is a directory.
+ * Select and prepare an AG for inode allocation.
  *
- * This function is designed to be called twice if it has to do an allocation
- * to make more free inodes.  On the first call, *IO_agbp should be set to NULL.
- * If an inode is available without having to performn an allocation, an inode
- * number is returned.  In this case, *IO_agbp is set to NULL.  If an allocation
- * needs to be done, xfs_dialloc returns the current AGI buffer in *IO_agbp.
- * The caller should then commit the current transaction, allocate a
- * new transaction, and call xfs_dialloc() again, passing in the previous value
- * of *IO_agbp.  IO_agbp should be held across the transactions. Since the AGI
- * buffer is locked across the two calls, the second call is guaranteed to have
- * a free inode available.
+ * Mode is used to tell whether the new inode is a directory and hence where to
+ * locate it.
  *
- * Once we successfully pick an inode its number is returned and the on-disk
- * data structures are updated.  The inode itself is not read in, since doing so
- * would break ordering constraints with xfs_reclaim.
+ * This function will ensure that the selected AG has free inodes available to
+ * allocate from. The selected AGI will be returned locked to the caller, and it
+ * will allocate more free inodes if required. If no free inodes are found or
+ * can be allocated, no AGI will be returned.
  */
 int
-xfs_dialloc(
-	struct xfs_trans	*tp,
+xfs_dialloc_select_ag(
+	struct xfs_trans	**tpp,
 	xfs_ino_t		parent,
 	umode_t			mode,
-	struct xfs_buf		**IO_agbp,
-	xfs_ino_t		*inop)
+	struct xfs_buf		**IO_agbp)
 {
-	struct xfs_mount	*mp = tp->t_mountp;
+	struct xfs_mount	*mp = (*tpp)->t_mountp;
 	struct xfs_buf		*agbp;
 	xfs_agnumber_t		agno;
 	int			error;
-	int			ialloced;
-	int			noroom = 0;
+	bool			noroom = false;
 	xfs_agnumber_t		start_agno;
 	struct xfs_perag	*pag;
 	struct xfs_ino_geometry	*igeo = M_IGEO(mp);
-	int			okalloc = 1;
+	bool			okalloc = true;
 
-	if (*IO_agbp) {
-		/*
-		 * If the caller passes in a pointer to the AGI buffer,
-		 * continue where we left off before.  In this case, we
-		 * know that the allocation group has free inodes.
-		 */
-		agbp = *IO_agbp;
-		goto out_alloc;
-	}
+	*IO_agbp = NULL;
 
 	/*
 	 * We do not have an agbp, so select an initial allocation
 	 * group for inode allocation.
 	 */
-	start_agno = xfs_ialloc_ag_select(tp, parent, mode);
-	if (start_agno == NULLAGNUMBER) {
-		*inop = NULLFSINO;
+	start_agno = xfs_ialloc_ag_select(*tpp, parent, mode);
+	if (start_agno == NULLAGNUMBER)
 		return 0;
-	}
 
 	/*
 	 * If we have already hit the ceiling of inode blocks then clear
@@ -1753,8 +1764,8 @@ xfs_dialloc(
 	if (igeo->maxicount &&
 	    percpu_counter_read_positive(&mp->m_icount) + igeo->ialloc_inos
 							> igeo->maxicount) {
-		noroom = 1;
-		okalloc = 0;
+		noroom = true;
+		okalloc = false;
 	}
 
 	/*
@@ -1771,9 +1782,9 @@ xfs_dialloc(
 		}
 
 		if (!pag->pagi_init) {
-			error = xfs_ialloc_pagi_init(mp, tp, agno);
+			error = xfs_ialloc_pagi_init(mp, *tpp, agno);
 			if (error)
-				goto out_error;
+				break;
 		}
 
 		/*
@@ -1786,64 +1797,59 @@ xfs_dialloc(
 		 * Then read in the AGI buffer and recheck with the AGI buffer
 		 * lock held.
 		 */
-		error = xfs_ialloc_read_agi(mp, tp, agno, &agbp);
+		error = xfs_ialloc_read_agi(mp, *tpp, agno, &agbp);
 		if (error)
-			goto out_error;
+			break;
 
 		if (pag->pagi_freecount) {
 			xfs_perag_put(pag);
-			goto out_alloc;
+			goto found_ag;
 		}
 
 		if (!okalloc)
 			goto nextag_relse_buffer;
 
+		error = xfs_ialloc_ag_alloc(*tpp, agbp);
+		if (error < 0) {
+			xfs_trans_brelse(*tpp, agbp);
 
-		error = xfs_ialloc_ag_alloc(tp, agbp, &ialloced);
-		if (error) {
-			xfs_trans_brelse(tp, agbp);
-
-			if (error != -ENOSPC)
-				goto out_error;
-
-			xfs_perag_put(pag);
-			*inop = NULLFSINO;
-			return 0;
+			if (error == -ENOSPC)
+				error = 0;
+			break;
 		}
 
-		if (ialloced) {
+		if (error == 0) {
 			/*
-			 * We successfully allocated some inodes, return
-			 * the current context to the caller so that it
-			 * can commit the current transaction and call
-			 * us again where we left off.
+			 * We successfully allocated space for an inode cluster
+			 * in this AG.  Roll the transaction so that we can
+			 * allocate one of the new inodes.
 			 */
 			ASSERT(pag->pagi_freecount > 0);
 			xfs_perag_put(pag);
 
-			*IO_agbp = agbp;
-			*inop = NULLFSINO;
-			return 0;
+			error = xfs_dialloc_roll(tpp, agbp);
+			if (error) {
+				xfs_buf_relse(agbp);
+				return error;
+			}
+			goto found_ag;
 		}
 
 nextag_relse_buffer:
-		xfs_trans_brelse(tp, agbp);
+		xfs_trans_brelse(*tpp, agbp);
 nextag:
 		xfs_perag_put(pag);
 		if (++agno == mp->m_sb.sb_agcount)
 			agno = 0;
-		if (agno == start_agno) {
-			*inop = NULLFSINO;
+		if (agno == start_agno)
 			return noroom ? -ENOSPC : 0;
-		}
 	}
 
-out_alloc:
-	*IO_agbp = NULL;
-	return xfs_dialloc_ag(tp, agbp, parent, inop);
-out_error:
 	xfs_perag_put(pag);
 	return error;
+found_ag:
+	*IO_agbp = agbp;
+	return 0;
 }
 
 /*
@@ -2453,7 +2459,7 @@ out_map:
 void
 xfs_ialloc_log_agi(
 	xfs_trans_t	*tp,		/* transaction pointer */
-	xfs_buf_t	*bp,		/* allocation group header buffer */
+	struct xfs_buf	*bp,		/* allocation group header buffer */
 	int		fields)		/* bitmask of fields to log */
 {
 	int			first;		/* first byte number */
@@ -2674,7 +2680,7 @@ xfs_ialloc_pagi_init(
 	xfs_trans_t	*tp,		/* transaction pointer */
 	xfs_agnumber_t	agno)		/* allocation group number */
 {
-	xfs_buf_t	*bp = NULL;
+	struct xfs_buf	*bp = NULL;
 	int		error;
 
 	error = xfs_ialloc_read_agi(mp, tp, agno, &bp);
diff --git a/fs/xfs/libxfs/xfs_ialloc.h b/fs/xfs/libxfs/xfs_ialloc.h
index 72b3468b97b1..3511086a7ae1 100644
--- a/fs/xfs/libxfs/xfs_ialloc.h
+++ b/fs/xfs/libxfs/xfs_ialloc.h
@@ -37,30 +37,26 @@ xfs_make_iptr(struct xfs_mount *mp, struct xfs_buf *b, int o)
  * Mode is used to tell whether the new inode will need space, and whether
  * it is a directory.
  *
- * To work within the constraint of one allocation per transaction,
- * xfs_dialloc() is designed to be called twice if it has to do an
- * allocation to make more free inodes.  If an inode is
- * available without an allocation, agbp would be set to the current
- * agbp and alloc_done set to false.
- * If an allocation needed to be done, agbp would be set to the
- * inode header of the allocation group and alloc_done set to true.
- * The caller should then commit the current transaction and allocate a new
- * transaction.  xfs_dialloc() should then be called again with
- * the agbp value returned from the previous call.
- *
- * Once we successfully pick an inode its number is returned and the
- * on-disk data structures are updated.  The inode itself is not read
- * in, since doing so would break ordering constraints with xfs_reclaim.
- *
- * *agbp should be set to NULL on the first call, *alloc_done set to FALSE.
+ * There are two phases to inode allocation: selecting an AG and ensuring
+ * that it contains free inodes, followed by allocating one of the free
+ * inodes. xfs_dialloc_select_ag() does the former and returns a locked AGI
+ * to the caller, ensuring that followup call to xfs_dialloc_ag() will
+ * have free inodes to allocate from. xfs_dialloc_ag() will return the inode
+ * number of the free inode we allocated.
  */
 int					/* error */
-xfs_dialloc(
-	struct xfs_trans *tp,		/* transaction pointer */
+xfs_dialloc_select_ag(
+	struct xfs_trans **tpp,		/* double pointer of transaction */
 	xfs_ino_t	parent,		/* parent inode (directory) */
 	umode_t		mode,		/* mode bits for new inode */
-	struct xfs_buf	**agbp,		/* buf for a.g. inode header */
-	xfs_ino_t	*inop);		/* inode number allocated */
+	struct xfs_buf	**IO_agbp);
+
+int
+xfs_dialloc_ag(
+	struct xfs_trans	*tp,
+	struct xfs_buf		*agbp,
+	xfs_ino_t		parent,
+	xfs_ino_t		*inop);
 
 /*
  * Free disk inode.  Carefully avoids touching the incore inode, all
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c
index cc919a2ee870..4c5831646bd9 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.c
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
@@ -672,11 +672,6 @@ xfs_inobt_cur(
 		return error;
 
 	cur = xfs_inobt_init_cursor(mp, tp, *agi_bpp, agno, which);
-	if (!cur) {
-		xfs_trans_brelse(tp, *agi_bpp);
-		*agi_bpp = NULL;
-		return -ENOMEM;
-	}
 	*curpp = cur;
 	return 0;
 }
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index c667c63f2cb0..4d7410e49db4 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -547,10 +547,6 @@ xfs_dinode_verify(
 	if ((flags2 & XFS_DIFLAG2_REFLINK) && (flags & XFS_DIFLAG_REALTIME))
 		return __this_address;
 
-	/* don't let reflink and dax mix */
-	if ((flags2 & XFS_DIFLAG2_REFLINK) && (flags2 & XFS_DIFLAG2_DAX))
-		return __this_address;
-
 	/* COW extent size hint validation */
 	fa = xfs_inode_validate_cowextsize(mp, be32_to_cpu(dip->di_cowextsize),
 			mode, flags, flags2);
diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c
index 2076627243b0..2037b9f23069 100644
--- a/fs/xfs/libxfs/xfs_refcount.c
+++ b/fs/xfs/libxfs/xfs_refcount.c
@@ -1179,10 +1179,6 @@ xfs_refcount_finish_one(
 			return error;
 
 		rcur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno);
-		if (!rcur) {
-			error = -ENOMEM;
-			goto out_cur;
-		}
 		rcur->bc_ag.refc.nr_ops = nr_ops;
 		rcur->bc_ag.refc.shape_changes = shape_changes;
 	}
@@ -1217,11 +1213,6 @@ xfs_refcount_finish_one(
 		trace_xfs_refcount_finish_one_leftover(mp, agno, type,
 				bno, blockcount, new_agbno, *new_len);
 	return error;
-
-out_cur:
-	xfs_trans_brelse(tp, agbp);
-
-	return error;
 }
 
 /*
diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
index 2668ebe02865..10e0cf9949a2 100644
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -2404,10 +2404,6 @@ xfs_rmap_finish_one(
 			return -EFSCORRUPTED;
 
 		rcur = xfs_rmapbt_init_cursor(mp, tp, agbp, agno);
-		if (!rcur) {
-			error = -ENOMEM;
-			goto out_cur;
-		}
 	}
 	*pcur = rcur;
 
@@ -2446,11 +2442,6 @@ xfs_rmap_finish_one(
 		error = -EFSCORRUPTED;
 	}
 	return error;
-
-out_cur:
-	xfs_trans_brelse(tp, agbp);
-
-	return error;
 }
 
 /*
diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c
index 6c1aba16113c..fe3a49575ff3 100644
--- a/fs/xfs/libxfs/xfs_rtbitmap.c
+++ b/fs/xfs/libxfs/xfs_rtbitmap.c
@@ -56,9 +56,9 @@ xfs_rtbuf_get(
 	xfs_trans_t	*tp,		/* transaction pointer */
 	xfs_rtblock_t	block,		/* block number in bitmap or summary */
 	int		issum,		/* is summary not bitmap */
-	xfs_buf_t	**bpp)		/* output: buffer for the block */
+	struct xfs_buf	**bpp)		/* output: buffer for the block */
 {
-	xfs_buf_t	*bp;		/* block buffer, result */
+	struct xfs_buf	*bp;		/* block buffer, result */
 	xfs_inode_t	*ip;		/* bitmap or summary inode */
 	xfs_bmbt_irec_t	map;
 	int		nmap = 1;
@@ -101,7 +101,7 @@ xfs_rtfind_back(
 	xfs_rtword_t	*b;		/* current word in buffer */
 	int		bit;		/* bit number in the word */
 	xfs_rtblock_t	block;		/* bitmap block number */
-	xfs_buf_t	*bp;		/* buf for the block */
+	struct xfs_buf	*bp;		/* buf for the block */
 	xfs_rtword_t	*bufp;		/* starting word in buffer */
 	int		error;		/* error value */
 	xfs_rtblock_t	firstbit;	/* first useful bit in the word */
@@ -276,7 +276,7 @@ xfs_rtfind_forw(
 	xfs_rtword_t	*b;		/* current word in buffer */
 	int		bit;		/* bit number in the word */
 	xfs_rtblock_t	block;		/* bitmap block number */
-	xfs_buf_t	*bp;		/* buf for the block */
+	struct xfs_buf	*bp;		/* buf for the block */
 	xfs_rtword_t	*bufp;		/* starting word in buffer */
 	int		error;		/* error value */
 	xfs_rtblock_t	i;		/* current bit number rel. to start */
@@ -447,11 +447,11 @@ xfs_rtmodify_summary_int(
 	int		log,		/* log2 of extent size */
 	xfs_rtblock_t	bbno,		/* bitmap block number */
 	int		delta,		/* change to make to summary info */
-	xfs_buf_t	**rbpp,		/* in/out: summary block buffer */
+	struct xfs_buf	**rbpp,		/* in/out: summary block buffer */
 	xfs_fsblock_t	*rsb,		/* in/out: summary block number */
 	xfs_suminfo_t	*sum)		/* out: summary info for this block */
 {
-	xfs_buf_t	*bp;		/* buffer for the summary block */
+	struct xfs_buf	*bp;		/* buffer for the summary block */
 	int		error;		/* error value */
 	xfs_fsblock_t	sb;		/* summary fsblock */
 	int		so;		/* index into the summary file */
@@ -517,7 +517,7 @@ xfs_rtmodify_summary(
 	int		log,		/* log2 of extent size */
 	xfs_rtblock_t	bbno,		/* bitmap block number */
 	int		delta,		/* change to make to summary info */
-	xfs_buf_t	**rbpp,		/* in/out: summary block buffer */
+	struct xfs_buf	**rbpp,		/* in/out: summary block buffer */
 	xfs_fsblock_t	*rsb)		/* in/out: summary block number */
 {
 	return xfs_rtmodify_summary_int(mp, tp, log, bbno,
@@ -539,7 +539,7 @@ xfs_rtmodify_range(
 	xfs_rtword_t	*b;		/* current word in buffer */
 	int		bit;		/* bit number in the word */
 	xfs_rtblock_t	block;		/* bitmap block number */
-	xfs_buf_t	*bp;		/* buf for the block */
+	struct xfs_buf	*bp;		/* buf for the block */
 	xfs_rtword_t	*bufp;		/* starting word in buffer */
 	int		error;		/* error value */
 	xfs_rtword_t	*first;		/* first used word in the buffer */
@@ -690,7 +690,7 @@ xfs_rtfree_range(
 	xfs_trans_t	*tp,		/* transaction pointer */
 	xfs_rtblock_t	start,		/* starting block to free */
 	xfs_extlen_t	len,		/* length to free */
-	xfs_buf_t	**rbpp,		/* in/out: summary block buffer */
+	struct xfs_buf	**rbpp,		/* in/out: summary block buffer */
 	xfs_fsblock_t	*rsb)		/* in/out: summary block number */
 {
 	xfs_rtblock_t	end;		/* end of the freed extent */
@@ -773,7 +773,7 @@ xfs_rtcheck_range(
 	xfs_rtword_t	*b;		/* current word in buffer */
 	int		bit;		/* bit number in the word */
 	xfs_rtblock_t	block;		/* bitmap block number */
-	xfs_buf_t	*bp;		/* buf for the block */
+	struct xfs_buf	*bp;		/* buf for the block */
 	xfs_rtword_t	*bufp;		/* starting word in buffer */
 	int		error;		/* error value */
 	xfs_rtblock_t	i;		/* current bit number rel. to start */
@@ -969,7 +969,7 @@ xfs_rtfree_extent(
 	int		error;		/* error value */
 	xfs_mount_t	*mp;		/* file system mount structure */
 	xfs_fsblock_t	sb;		/* summary file block number */
-	xfs_buf_t	*sumbp = NULL;	/* summary file block buffer */
+	struct xfs_buf	*sumbp = NULL;	/* summary file block buffer */
 
 	mp = tp->t_mountp;
 
diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c
index 5aeafa59ed27..bbda117e5d85 100644
--- a/fs/xfs/libxfs/xfs_sb.c
+++ b/fs/xfs/libxfs/xfs_sb.c
@@ -360,21 +360,18 @@ xfs_validate_sb_common(
 		}
 	}
 
-	if (sbp->sb_unit) {
-		if (!xfs_sb_version_hasdalign(sbp) ||
-		    sbp->sb_unit > sbp->sb_width ||
-		    (sbp->sb_width % sbp->sb_unit) != 0) {
-			xfs_notice(mp, "SB stripe unit sanity check failed");
-			return -EFSCORRUPTED;
-		}
-	} else if (xfs_sb_version_hasdalign(sbp)) {
+	/*
+	 * Either (sb_unit and !hasdalign) or (!sb_unit and hasdalign)
+	 * would imply the image is corrupted.
+	 */
+	if (!!sbp->sb_unit ^ xfs_sb_version_hasdalign(sbp)) {
 		xfs_notice(mp, "SB stripe alignment sanity check failed");
 		return -EFSCORRUPTED;
-	} else if (sbp->sb_width) {
-		xfs_notice(mp, "SB stripe width sanity check failed");
-		return -EFSCORRUPTED;
 	}
 
+	if (!xfs_validate_stripe_geometry(mp, XFS_FSB_TO_B(mp, sbp->sb_unit),
+			XFS_FSB_TO_B(mp, sbp->sb_width), 0, false))
+		return -EFSCORRUPTED;
 
 	if (xfs_sb_version_hascrc(&mp->m_sb) &&
 	    sbp->sb_blocksize < XFS_MIN_CRC_BLOCKSIZE) {
@@ -383,17 +380,6 @@ xfs_validate_sb_common(
 	}
 
 	/*
-	 * Until this is fixed only page-sized or smaller data blocks work.
-	 */
-	if (unlikely(sbp->sb_blocksize > PAGE_SIZE)) {
-		xfs_warn(mp,
-		"File system with blocksize %d bytes. "
-		"Only pagesize (%ld) or less will currently work.",
-				sbp->sb_blocksize, PAGE_SIZE);
-		return -ENOSYS;
-	}
-
-	/*
 	 * Currently only very few inode sizes are supported.
 	 */
 	switch (sbp->sb_inodesize) {
@@ -408,22 +394,6 @@ xfs_validate_sb_common(
 		return -ENOSYS;
 	}
 
-	if (xfs_sb_validate_fsb_count(sbp, sbp->sb_dblocks) ||
-	    xfs_sb_validate_fsb_count(sbp, sbp->sb_rblocks)) {
-		xfs_warn(mp,
-		"file system too large to be mounted on this system.");
-		return -EFBIG;
-	}
-
-	/*
-	 * Don't touch the filesystem if a user tool thinks it owns the primary
-	 * superblock.  mkfs doesn't clear the flag from secondary supers, so
-	 * we don't check them at all.
-	 */
-	if (XFS_BUF_ADDR(bp) == XFS_SB_DADDR && sbp->sb_inprogress) {
-		xfs_warn(mp, "Offline file system operation in progress!");
-		return -EFSCORRUPTED;
-	}
 	return 0;
 }
 
@@ -1233,3 +1203,61 @@ xfs_sb_get_secondary(
 	*bpp = bp;
 	return 0;
 }
+
+/*
+ * sunit, swidth, sectorsize(optional with 0) should be all in bytes,
+ * so users won't be confused by values in error messages.
+ */
+bool
+xfs_validate_stripe_geometry(
+	struct xfs_mount	*mp,
+	__s64			sunit,
+	__s64			swidth,
+	int			sectorsize,
+	bool			silent)
+{
+	if (swidth > INT_MAX) {
+		if (!silent)
+			xfs_notice(mp,
+"stripe width (%lld) is too large", swidth);
+		return false;
+	}
+
+	if (sunit > swidth) {
+		if (!silent)
+			xfs_notice(mp,
+"stripe unit (%lld) is larger than the stripe width (%lld)", sunit, swidth);
+		return false;
+	}
+
+	if (sectorsize && (int)sunit % sectorsize) {
+		if (!silent)
+			xfs_notice(mp,
+"stripe unit (%lld) must be a multiple of the sector size (%d)",
+				   sunit, sectorsize);
+		return false;
+	}
+
+	if (sunit && !swidth) {
+		if (!silent)
+			xfs_notice(mp,
+"invalid stripe unit (%lld) and stripe width of 0", sunit);
+		return false;
+	}
+
+	if (!sunit && swidth) {
+		if (!silent)
+			xfs_notice(mp,
+"invalid stripe width (%lld) and stripe unit of 0", swidth);
+		return false;
+	}
+
+	if (sunit && (int)swidth % (int)sunit) {
+		if (!silent)
+			xfs_notice(mp,
+"stripe width (%lld) must be a multiple of the stripe unit (%lld)",
+				   swidth, sunit);
+		return false;
+	}
+	return true;
+}
diff --git a/fs/xfs/libxfs/xfs_sb.h b/fs/xfs/libxfs/xfs_sb.h
index 92465a9a5162..f79f9dc632b6 100644
--- a/fs/xfs/libxfs/xfs_sb.h
+++ b/fs/xfs/libxfs/xfs_sb.h
@@ -42,4 +42,7 @@ extern int	xfs_sb_get_secondary(struct xfs_mount *mp,
 				struct xfs_trans *tp, xfs_agnumber_t agno,
 				struct xfs_buf **bpp);
 
+extern bool	xfs_validate_stripe_geometry(struct xfs_mount *mp,
+		__s64 sunit, __s64 swidth, int sectorsize, bool silent);
+
 #endif	/* __XFS_SB_H__ */
diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h
index c795ae47b3c9..8c61a461bf7b 100644
--- a/fs/xfs/libxfs/xfs_shared.h
+++ b/fs/xfs/libxfs/xfs_shared.h
@@ -62,7 +62,6 @@ void	xfs_log_get_max_trans_res(struct xfs_mount *mp,
 #define	XFS_TRANS_SB_DIRTY	0x02	/* superblock is modified */
 #define	XFS_TRANS_PERM_LOG_RES	0x04	/* xact took a permanent log res */
 #define	XFS_TRANS_SYNC		0x08	/* make commit synchronous */
-#define XFS_TRANS_DQ_DIRTY	0x10	/* at least one dquot in trx dirty */
 #define XFS_TRANS_RESERVE	0x20    /* OK to use reserved data blocks */
 #define XFS_TRANS_NO_WRITECOUNT 0x40	/* do not elevate SB writecount */
 #define XFS_TRANS_RES_FDBLKS	0x80	/* reserve newly freed blocks */
diff --git a/fs/xfs/libxfs/xfs_types.c b/fs/xfs/libxfs/xfs_types.c
index 4f595546a639..b254fbeaaa50 100644
--- a/fs/xfs/libxfs/xfs_types.c
+++ b/fs/xfs/libxfs/xfs_types.c
@@ -61,6 +61,29 @@ xfs_verify_fsbno(
 	return xfs_verify_agbno(mp, agno, XFS_FSB_TO_AGBNO(mp, fsbno));
 }
 
+/*
+ * Verify that a data device extent is fully contained inside the filesystem,
+ * does not cross an AG boundary, and does not point at static metadata.
+ */
+bool
+xfs_verify_fsbext(
+	struct xfs_mount	*mp,
+	xfs_fsblock_t		fsbno,
+	xfs_fsblock_t		len)
+{
+	if (fsbno + len <= fsbno)
+		return false;
+
+	if (!xfs_verify_fsbno(mp, fsbno))
+		return false;
+
+	if (!xfs_verify_fsbno(mp, fsbno + len - 1))
+		return false;
+
+	return  XFS_FSB_TO_AGNO(mp, fsbno) ==
+		XFS_FSB_TO_AGNO(mp, fsbno + len - 1);
+}
+
 /* Calculate the first and last possible inode number in an AG. */
 void
 xfs_agino_range(
@@ -175,6 +198,22 @@ xfs_verify_rtbno(
 	return rtbno < mp->m_sb.sb_rblocks;
 }
 
+/* Verify that a realtime device extent is fully contained inside the volume. */
+bool
+xfs_verify_rtext(
+	struct xfs_mount	*mp,
+	xfs_rtblock_t		rtbno,
+	xfs_rtblock_t		len)
+{
+	if (rtbno + len <= rtbno)
+		return false;
+
+	if (!xfs_verify_rtbno(mp, rtbno))
+		return false;
+
+	return xfs_verify_rtbno(mp, rtbno + len - 1);
+}
+
 /* Calculate the range of valid icount values. */
 void
 xfs_icount_range(
@@ -219,3 +258,28 @@ xfs_verify_dablk(
 
 	return dabno <= max_dablk;
 }
+
+/* Check that a file block offset does not exceed the maximum. */
+bool
+xfs_verify_fileoff(
+	struct xfs_mount	*mp,
+	xfs_fileoff_t		off)
+{
+	return off <= XFS_MAX_FILEOFF;
+}
+
+/* Check that a range of file block offsets do not exceed the maximum. */
+bool
+xfs_verify_fileext(
+	struct xfs_mount	*mp,
+	xfs_fileoff_t		off,
+	xfs_fileoff_t		len)
+{
+	if (off + len <= off)
+		return false;
+
+	if (!xfs_verify_fileoff(mp, off))
+		return false;
+
+	return xfs_verify_fileoff(mp, off + len - 1);
+}
diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h
index 397d94775440..064bd6e8c922 100644
--- a/fs/xfs/libxfs/xfs_types.h
+++ b/fs/xfs/libxfs/xfs_types.h
@@ -184,6 +184,8 @@ xfs_agblock_t xfs_ag_block_count(struct xfs_mount *mp, xfs_agnumber_t agno);
 bool xfs_verify_agbno(struct xfs_mount *mp, xfs_agnumber_t agno,
 		xfs_agblock_t agbno);
 bool xfs_verify_fsbno(struct xfs_mount *mp, xfs_fsblock_t fsbno);
+bool xfs_verify_fsbext(struct xfs_mount *mp, xfs_fsblock_t fsbno,
+		xfs_fsblock_t len);
 
 void xfs_agino_range(struct xfs_mount *mp, xfs_agnumber_t agno,
 		xfs_agino_t *first, xfs_agino_t *last);
@@ -195,9 +197,14 @@ bool xfs_verify_ino(struct xfs_mount *mp, xfs_ino_t ino);
 bool xfs_internal_inum(struct xfs_mount *mp, xfs_ino_t ino);
 bool xfs_verify_dir_ino(struct xfs_mount *mp, xfs_ino_t ino);
 bool xfs_verify_rtbno(struct xfs_mount *mp, xfs_rtblock_t rtbno);
+bool xfs_verify_rtext(struct xfs_mount *mp, xfs_rtblock_t rtbno,
+		xfs_rtblock_t len);
 bool xfs_verify_icount(struct xfs_mount *mp, unsigned long long icount);
 bool xfs_verify_dablk(struct xfs_mount *mp, xfs_fileoff_t off);
 void xfs_icount_range(struct xfs_mount *mp, unsigned long long *min,
 		unsigned long long *max);
+bool xfs_verify_fileoff(struct xfs_mount *mp, xfs_fileoff_t off);
+bool xfs_verify_fileext(struct xfs_mount *mp, xfs_fileoff_t off,
+		xfs_fileoff_t len);
 
 #endif	/* __XFS_TYPES_H__ */
diff --git a/fs/xfs/scrub/agheader_repair.c b/fs/xfs/scrub/agheader_repair.c
index 401f71579ce6..23690f824ffa 100644
--- a/fs/xfs/scrub/agheader_repair.c
+++ b/fs/xfs/scrub/agheader_repair.c
@@ -829,8 +829,6 @@ xrep_agi_calc_from_btrees(
 
 		cur = xfs_inobt_init_cursor(mp, sc->tp, agi_bp, sc->sa.agno,
 				XFS_BTNUM_FINO);
-		if (error)
-			goto err;
 		error = xfs_btree_count_blocks(cur, &blocks);
 		if (error)
 			goto err;
diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c
index fed56d213a3f..33559c3a4bc3 100644
--- a/fs/xfs/scrub/bmap.c
+++ b/fs/xfs/scrub/bmap.c
@@ -319,7 +319,6 @@ xchk_bmap_iextent(
 	struct xfs_bmbt_irec	*irec)
 {
 	struct xfs_mount	*mp = info->sc->mp;
-	xfs_filblks_t		end;
 	int			error = 0;
 
 	/*
@@ -330,6 +329,10 @@ xchk_bmap_iextent(
 		xchk_fblock_set_corrupt(info->sc, info->whichfork,
 				irec->br_startoff);
 
+	if (!xfs_verify_fileext(mp, irec->br_startoff, irec->br_blockcount))
+		xchk_fblock_set_corrupt(info->sc, info->whichfork,
+				irec->br_startoff);
+
 	xchk_bmap_dirattr_extent(ip, info, irec);
 
 	/* There should never be a "hole" extent in either extent list. */
@@ -349,20 +352,12 @@ xchk_bmap_iextent(
 	if (irec->br_blockcount > MAXEXTLEN)
 		xchk_fblock_set_corrupt(info->sc, info->whichfork,
 				irec->br_startoff);
-	if (irec->br_startblock + irec->br_blockcount <= irec->br_startblock)
-		xchk_fblock_set_corrupt(info->sc, info->whichfork,
-				irec->br_startoff);
-	end = irec->br_startblock + irec->br_blockcount - 1;
 	if (info->is_rt &&
-	    (!xfs_verify_rtbno(mp, irec->br_startblock) ||
-	     !xfs_verify_rtbno(mp, end)))
+	    !xfs_verify_rtext(mp, irec->br_startblock, irec->br_blockcount))
 		xchk_fblock_set_corrupt(info->sc, info->whichfork,
 				irec->br_startoff);
 	if (!info->is_rt &&
-	    (!xfs_verify_fsbno(mp, irec->br_startblock) ||
-	     !xfs_verify_fsbno(mp, end) ||
-	     XFS_FSB_TO_AGNO(mp, irec->br_startblock) !=
-				XFS_FSB_TO_AGNO(mp, end)))
+	    !xfs_verify_fsbext(mp, irec->br_startblock, irec->br_blockcount))
 		xchk_fblock_set_corrupt(info->sc, info->whichfork,
 				irec->br_startoff);
 
@@ -563,10 +558,6 @@ xchk_bmap_check_ag_rmaps(
 		return error;
 
 	cur = xfs_rmapbt_init_cursor(sc->mp, sc->tp, agf, agno);
-	if (!cur) {
-		error = -ENOMEM;
-		goto out_agf;
-	}
 
 	sbcri.sc = sc;
 	sbcri.whichfork = whichfork;
@@ -575,7 +566,6 @@ xchk_bmap_check_ag_rmaps(
 		error = 0;
 
 	xfs_btree_del_cursor(cur, error);
-out_agf:
 	xfs_trans_brelse(sc->tp, agf);
 	return error;
 }
diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c
index 18876056e5e0..8ea6d4aa3f55 100644
--- a/fs/xfs/scrub/common.c
+++ b/fs/xfs/scrub/common.c
@@ -466,8 +466,6 @@ xchk_ag_btcur_init(
 		/* Set up a bnobt cursor for cross-referencing. */
 		sa->bno_cur = xfs_allocbt_init_cursor(mp, sc->tp, sa->agf_bp,
 				agno, XFS_BTNUM_BNO);
-		if (!sa->bno_cur)
-			goto err;
 	}
 
 	if (sa->agf_bp &&
@@ -475,8 +473,6 @@ xchk_ag_btcur_init(
 		/* Set up a cntbt cursor for cross-referencing. */
 		sa->cnt_cur = xfs_allocbt_init_cursor(mp, sc->tp, sa->agf_bp,
 				agno, XFS_BTNUM_CNT);
-		if (!sa->cnt_cur)
-			goto err;
 	}
 
 	/* Set up a inobt cursor for cross-referencing. */
@@ -484,8 +480,6 @@ xchk_ag_btcur_init(
 	    xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_INO)) {
 		sa->ino_cur = xfs_inobt_init_cursor(mp, sc->tp, sa->agi_bp,
 					agno, XFS_BTNUM_INO);
-		if (!sa->ino_cur)
-			goto err;
 	}
 
 	/* Set up a finobt cursor for cross-referencing. */
@@ -493,8 +487,6 @@ xchk_ag_btcur_init(
 	    xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_FINO)) {
 		sa->fino_cur = xfs_inobt_init_cursor(mp, sc->tp, sa->agi_bp,
 				agno, XFS_BTNUM_FINO);
-		if (!sa->fino_cur)
-			goto err;
 	}
 
 	/* Set up a rmapbt cursor for cross-referencing. */
@@ -502,8 +494,6 @@ xchk_ag_btcur_init(
 	    xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_RMAP)) {
 		sa->rmap_cur = xfs_rmapbt_init_cursor(mp, sc->tp, sa->agf_bp,
 				agno);
-		if (!sa->rmap_cur)
-			goto err;
 	}
 
 	/* Set up a refcountbt cursor for cross-referencing. */
@@ -511,13 +501,9 @@ xchk_ag_btcur_init(
 	    xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_REFC)) {
 		sa->refc_cur = xfs_refcountbt_init_cursor(mp, sc->tp,
 				sa->agf_bp, agno);
-		if (!sa->refc_cur)
-			goto err;
 	}
 
 	return 0;
-err:
-	return -ENOMEM;
 }
 
 /* Release the AG header context and btree cursors. */
diff --git a/fs/xfs/scrub/dir.c b/fs/xfs/scrub/dir.c
index b045e95c2ea7..178b3455a170 100644
--- a/fs/xfs/scrub/dir.c
+++ b/fs/xfs/scrub/dir.c
@@ -66,8 +66,18 @@ xchk_dir_check_ftype(
 	 * eofblocks cleanup (which allocates what would be a nested
 	 * transaction), we can't use DONTCACHE here because DONTCACHE
 	 * inodes can trigger immediate inactive cleanup of the inode.
+	 *
+	 * If _iget returns -EINVAL or -ENOENT then the child inode number is
+	 * garbage and the directory is corrupt.  If the _iget returns
+	 * -EFSCORRUPTED or -EFSBADCRC then the child is corrupt which is a
+	 *  cross referencing error.  Any other error is an operational error.
 	 */
 	error = xfs_iget(mp, sdc->sc->tp, inum, 0, 0, &ip);
+	if (error == -EINVAL || error == -ENOENT) {
+		error = -EFSCORRUPTED;
+		xchk_fblock_process_error(sdc->sc, XFS_DATA_FORK, 0, &error);
+		goto out;
+	}
 	if (!xchk_fblock_xref_process_error(sdc->sc, XFS_DATA_FORK, offset,
 			&error))
 		goto out;
@@ -105,6 +115,7 @@ xchk_dir_actor(
 	struct xfs_name		xname;
 	xfs_ino_t		lookup_ino;
 	xfs_dablk_t		offset;
+	bool			checked_ftype = false;
 	int			error = 0;
 
 	sdc = container_of(dir_iter, struct xchk_dir_ctx, dir_iter);
@@ -133,6 +144,7 @@ xchk_dir_actor(
 		if (xfs_sb_version_hasftype(&mp->m_sb) && type != DT_DIR)
 			xchk_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK,
 					offset);
+		checked_ftype = true;
 		if (ino != ip->i_ino)
 			xchk_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK,
 					offset);
@@ -144,6 +156,7 @@ xchk_dir_actor(
 		if (xfs_sb_version_hasftype(&mp->m_sb) && type != DT_DIR)
 			xchk_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK,
 					offset);
+		checked_ftype = true;
 		if (ip->i_ino == mp->m_sb.sb_rootino && ino != ip->i_ino)
 			xchk_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK,
 					offset);
@@ -167,9 +180,11 @@ xchk_dir_actor(
 	}
 
 	/* Verify the file type.  This function absorbs error codes. */
-	error = xchk_dir_check_ftype(sdc, offset, lookup_ino, type);
-	if (error)
-		goto out;
+	if (!checked_ftype) {
+		error = xchk_dir_check_ftype(sdc, offset, lookup_ino, type);
+		if (error)
+			goto out;
+	}
 out:
 	/*
 	 * A negative error code returned here is supposed to cause the
diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c
index bb25ff1b770d..faf65eb5bd31 100644
--- a/fs/xfs/scrub/inode.c
+++ b/fs/xfs/scrub/inode.c
@@ -185,10 +185,6 @@ xchk_inode_flags2(
 	if ((flags & XFS_DIFLAG_REALTIME) && (flags2 & XFS_DIFLAG2_REFLINK))
 		goto bad;
 
-	/* dax and reflink make no sense, currently */
-	if ((flags2 & XFS_DIFLAG2_DAX) && (flags2 & XFS_DIFLAG2_REFLINK))
-		goto bad;
-
 	/* no bigtime iflag without the bigtime feature */
 	if (xfs_dinode_has_bigtime(dip) &&
 	    !xfs_sb_version_hasbigtime(&mp->m_sb))
diff --git a/fs/xfs/scrub/parent.c b/fs/xfs/scrub/parent.c
index 855aa8bcab64..66c35f6dfc24 100644
--- a/fs/xfs/scrub/parent.c
+++ b/fs/xfs/scrub/parent.c
@@ -164,13 +164,13 @@ xchk_parent_validate(
 	 * can't use DONTCACHE here because DONTCACHE inodes can trigger
 	 * immediate inactive cleanup of the inode.
 	 *
-	 * If _iget returns -EINVAL then the parent inode number is garbage
-	 * and the directory is corrupt.  If the _iget returns -EFSCORRUPTED
-	 * or -EFSBADCRC then the parent is corrupt which is a cross
-	 * referencing error.  Any other error is an operational error.
+	 * If _iget returns -EINVAL or -ENOENT then the parent inode number is
+	 * garbage and the directory is corrupt.  If the _iget returns
+	 * -EFSCORRUPTED or -EFSBADCRC then the parent is corrupt which is a
+	 *  cross referencing error.  Any other error is an operational error.
 	 */
 	error = xfs_iget(mp, sc->tp, dnum, XFS_IGET_UNTRUSTED, 0, &dp);
-	if (error == -EINVAL) {
+	if (error == -EINVAL || error == -ENOENT) {
 		error = -EFSCORRUPTED;
 		xchk_fblock_process_error(sc, XFS_DATA_FORK, 0, &error);
 		goto out;
diff --git a/fs/xfs/scrub/rtbitmap.c b/fs/xfs/scrub/rtbitmap.c
index 76e4ffe0315b..d409ca592178 100644
--- a/fs/xfs/scrub/rtbitmap.c
+++ b/fs/xfs/scrub/rtbitmap.c
@@ -52,9 +52,7 @@ xchk_rtbitmap_rec(
 	startblock = rec->ar_startext * tp->t_mountp->m_sb.sb_rextsize;
 	blockcount = rec->ar_extcount * tp->t_mountp->m_sb.sb_rextsize;
 
-	if (startblock + blockcount <= startblock ||
-	    !xfs_verify_rtbno(sc->mp, startblock) ||
-	    !xfs_verify_rtbno(sc->mp, startblock + blockcount - 1))
+	if (!xfs_verify_rtext(sc->mp, startblock, blockcount))
 		xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0);
 	return 0;
 }
diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c
index c544951a0c07..779cb73b3d00 100644
--- a/fs/xfs/xfs_acl.c
+++ b/fs/xfs/xfs_acl.c
@@ -16,6 +16,7 @@
 #include "xfs_acl.h"
 #include "xfs_da_format.h"
 #include "xfs_da_btree.h"
+#include "xfs_trans.h"
 
 #include <linux/posix_acl_xattr.h>
 
@@ -212,21 +213,28 @@ __xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
 }
 
 static int
-xfs_set_mode(struct inode *inode, umode_t mode)
+xfs_acl_set_mode(
+	struct inode		*inode,
+	umode_t			mode)
 {
-	int error = 0;
-
-	if (mode != inode->i_mode) {
-		struct iattr iattr;
+	struct xfs_inode	*ip = XFS_I(inode);
+	struct xfs_mount	*mp = ip->i_mount;
+	struct xfs_trans	*tp;
+	int			error;
 
-		iattr.ia_valid = ATTR_MODE | ATTR_CTIME;
-		iattr.ia_mode = mode;
-		iattr.ia_ctime = current_time(inode);
+	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp);
+	if (error)
+		return error;
 
-		error = xfs_setattr_nonsize(XFS_I(inode), &iattr, XFS_ATTR_NOACL);
-	}
+	xfs_ilock(ip, XFS_ILOCK_EXCL);
+	xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+	inode->i_mode = mode;
+	inode->i_ctime = current_time(inode);
+	xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
 
-	return error;
+	if (mp->m_flags & XFS_MOUNT_WSYNC)
+		xfs_trans_set_sync(tp);
+	return xfs_trans_commit(tp);
 }
 
 int
@@ -251,18 +259,14 @@ xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
 	}
 
  set_acl:
-	error =  __xfs_set_acl(inode, acl, type);
-	if (error)
-		return error;
-
 	/*
 	 * We set the mode after successfully updating the ACL xattr because the
 	 * xattr update can fail at ENOSPC and we don't want to change the mode
 	 * if the ACL update hasn't been applied.
 	 */
-	if (set_mode)
-		error = xfs_set_mode(inode, mode);
-
+	error =  __xfs_set_acl(inode, acl, type);
+	if (!error && set_mode && mode != inode->i_mode)
+		error = xfs_acl_set_mode(inode, mode);
 	return error;
 }
 
diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c
index 9e16a4d0f97c..93e4d8ae6e92 100644
--- a/fs/xfs/xfs_bmap_item.c
+++ b/fs/xfs/xfs_bmap_item.c
@@ -417,6 +417,40 @@ const struct xfs_defer_op_type xfs_bmap_update_defer_type = {
 	.cancel_item	= xfs_bmap_update_cancel_item,
 };
 
+/* Is this recovered BUI ok? */
+static inline bool
+xfs_bui_validate(
+	struct xfs_mount		*mp,
+	struct xfs_bui_log_item		*buip)
+{
+	struct xfs_map_extent		*bmap;
+
+	/* Only one mapping operation per BUI... */
+	if (buip->bui_format.bui_nextents != XFS_BUI_MAX_FAST_EXTENTS)
+		return false;
+
+	bmap = &buip->bui_format.bui_extents[0];
+
+	if (bmap->me_flags & ~XFS_BMAP_EXTENT_FLAGS)
+		return false;
+
+	switch (bmap->me_flags & XFS_BMAP_EXTENT_TYPE_MASK) {
+	case XFS_BMAP_MAP:
+	case XFS_BMAP_UNMAP:
+		break;
+	default:
+		return false;
+	}
+
+	if (!xfs_verify_ino(mp, bmap->me_owner))
+		return false;
+
+	if (!xfs_verify_fileext(mp, bmap->me_startoff, bmap->me_len))
+		return false;
+
+	return xfs_verify_fsbext(mp, bmap->me_startblock, bmap->me_len);
+}
+
 /*
  * Process a bmap update intent item that was recovered from the log.
  * We need to update some inode's bmbt.
@@ -433,47 +467,24 @@ xfs_bui_item_recover(
 	struct xfs_mount		*mp = lip->li_mountp;
 	struct xfs_map_extent		*bmap;
 	struct xfs_bud_log_item		*budp;
-	xfs_fsblock_t			startblock_fsb;
-	xfs_fsblock_t			inode_fsb;
 	xfs_filblks_t			count;
 	xfs_exntst_t			state;
 	unsigned int			bui_type;
 	int				whichfork;
 	int				error = 0;
 
-	/* Only one mapping operation per BUI... */
-	if (buip->bui_format.bui_nextents != XFS_BUI_MAX_FAST_EXTENTS)
+	if (!xfs_bui_validate(mp, buip)) {
+		XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+				&buip->bui_format, sizeof(buip->bui_format));
 		return -EFSCORRUPTED;
+	}
 
-	/*
-	 * First check the validity of the extent described by the
-	 * BUI.  If anything is bad, then toss the BUI.
-	 */
 	bmap = &buip->bui_format.bui_extents[0];
-	startblock_fsb = XFS_BB_TO_FSB(mp,
-			   XFS_FSB_TO_DADDR(mp, bmap->me_startblock));
-	inode_fsb = XFS_BB_TO_FSB(mp, XFS_FSB_TO_DADDR(mp,
-			XFS_INO_TO_FSB(mp, bmap->me_owner)));
 	state = (bmap->me_flags & XFS_BMAP_EXTENT_UNWRITTEN) ?
 			XFS_EXT_UNWRITTEN : XFS_EXT_NORM;
 	whichfork = (bmap->me_flags & XFS_BMAP_EXTENT_ATTR_FORK) ?
 			XFS_ATTR_FORK : XFS_DATA_FORK;
 	bui_type = bmap->me_flags & XFS_BMAP_EXTENT_TYPE_MASK;
-	switch (bui_type) {
-	case XFS_BMAP_MAP:
-	case XFS_BMAP_UNMAP:
-		break;
-	default:
-		return -EFSCORRUPTED;
-	}
-	if (startblock_fsb == 0 ||
-	    bmap->me_len == 0 ||
-	    inode_fsb == 0 ||
-	    startblock_fsb >= mp->m_sb.sb_dblocks ||
-	    bmap->me_len >= mp->m_sb.sb_agblocks ||
-	    inode_fsb >= mp->m_sb.sb_dblocks ||
-	    (bmap->me_flags & ~XFS_BMAP_EXTENT_FLAGS))
-		return -EFSCORRUPTED;
 
 	/* Grab the inode. */
 	error = xfs_iget(mp, NULL, bmap->me_owner, 0, 0, &ip);
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 4e4cf91f4f9f..f8400bbd6473 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -278,7 +278,7 @@ _xfs_buf_alloc(
  */
 STATIC int
 _xfs_buf_get_pages(
-	xfs_buf_t		*bp,
+	struct xfs_buf		*bp,
 	int			page_count)
 {
 	/* Make sure that we have a page list */
@@ -302,7 +302,7 @@ _xfs_buf_get_pages(
  */
 STATIC void
 _xfs_buf_free_pages(
-	xfs_buf_t	*bp)
+	struct xfs_buf	*bp)
 {
 	if (bp->b_pages != bp->b_page_array) {
 		kmem_free(bp->b_pages);
@@ -319,7 +319,7 @@ _xfs_buf_free_pages(
  */
 static void
 xfs_buf_free(
-	xfs_buf_t		*bp)
+	struct xfs_buf		*bp)
 {
 	trace_xfs_buf_free(bp, _RET_IP_);
 
@@ -352,7 +352,7 @@ xfs_buf_free(
  */
 STATIC int
 xfs_buf_allocate_memory(
-	xfs_buf_t		*bp,
+	struct xfs_buf		*bp,
 	uint			flags)
 {
 	size_t			size;
@@ -463,7 +463,7 @@ out_free_pages:
  */
 STATIC int
 _xfs_buf_map_pages(
-	xfs_buf_t		*bp,
+	struct xfs_buf		*bp,
 	uint			flags)
 {
 	ASSERT(bp->b_flags & _XBF_PAGES);
@@ -590,7 +590,7 @@ xfs_buf_find(
 	struct xfs_buf		**found_bp)
 {
 	struct xfs_perag	*pag;
-	xfs_buf_t		*bp;
+	struct xfs_buf		*bp;
 	struct xfs_buf_map	cmap = { .bm_bn = map[0].bm_bn };
 	xfs_daddr_t		eofs;
 	int			i;
@@ -762,7 +762,7 @@ found:
 
 int
 _xfs_buf_read(
-	xfs_buf_t		*bp,
+	struct xfs_buf		*bp,
 	xfs_buf_flags_t		flags)
 {
 	ASSERT(!(flags & XBF_WRITE));
@@ -1005,7 +1005,7 @@ xfs_buf_get_uncached(
  */
 void
 xfs_buf_hold(
-	xfs_buf_t		*bp)
+	struct xfs_buf		*bp)
 {
 	trace_xfs_buf_hold(bp, _RET_IP_);
 	atomic_inc(&bp->b_hold);
@@ -1017,7 +1017,7 @@ xfs_buf_hold(
  */
 void
 xfs_buf_rele(
-	xfs_buf_t		*bp)
+	struct xfs_buf		*bp)
 {
 	struct xfs_perag	*pag = bp->b_pag;
 	bool			release;
@@ -1161,7 +1161,7 @@ xfs_buf_unlock(
 
 STATIC void
 xfs_buf_wait_unpin(
-	xfs_buf_t		*bp)
+	struct xfs_buf		*bp)
 {
 	DECLARE_WAITQUEUE	(wait, current);
 
@@ -1373,7 +1373,7 @@ xfs_buf_ioend_work(
 	struct work_struct	*work)
 {
 	struct xfs_buf		*bp =
-		container_of(work, xfs_buf_t, b_ioend_work);
+		container_of(work, struct xfs_buf, b_ioend_work);
 
 	xfs_buf_ioend(bp);
 }
@@ -1388,7 +1388,7 @@ xfs_buf_ioend_async(
 
 void
 __xfs_buf_ioerror(
-	xfs_buf_t		*bp,
+	struct xfs_buf		*bp,
 	int			error,
 	xfs_failaddr_t		failaddr)
 {
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index bfd2907e7bc4..5d91a31298a4 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -124,7 +124,7 @@ struct xfs_buf_ops {
 	xfs_failaddr_t (*verify_struct)(struct xfs_buf *bp);
 };
 
-typedef struct xfs_buf {
+struct xfs_buf {
 	/*
 	 * first cacheline holds all the fields needed for an uncontended cache
 	 * hit to be fully processed. The semaphore straddles the cacheline
@@ -190,7 +190,7 @@ typedef struct xfs_buf {
 	int			b_last_error;
 
 	const struct xfs_buf_ops	*b_ops;
-} xfs_buf_t;
+};
 
 /* Finding and Reading Buffers */
 struct xfs_buf *xfs_buf_incore(struct xfs_buftarg *target,
@@ -253,16 +253,16 @@ int _xfs_buf_read(struct xfs_buf *bp, xfs_buf_flags_t flags);
 void xfs_buf_hold(struct xfs_buf *bp);
 
 /* Releasing Buffers */
-extern void xfs_buf_rele(xfs_buf_t *);
+extern void xfs_buf_rele(struct xfs_buf *);
 
 /* Locking and Unlocking Buffers */
-extern int xfs_buf_trylock(xfs_buf_t *);
-extern void xfs_buf_lock(xfs_buf_t *);
-extern void xfs_buf_unlock(xfs_buf_t *);
+extern int xfs_buf_trylock(struct xfs_buf *);
+extern void xfs_buf_lock(struct xfs_buf *);
+extern void xfs_buf_unlock(struct xfs_buf *);
 #define xfs_buf_islocked(bp) \
 	((bp)->b_sema.count <= 0)
 
-static inline void xfs_buf_relse(xfs_buf_t *bp)
+static inline void xfs_buf_relse(struct xfs_buf *bp)
 {
 	xfs_buf_unlock(bp);
 	xfs_buf_rele(bp);
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 0356f2e340a1..dc0be2a639cc 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -412,7 +412,7 @@ xfs_buf_item_unpin(
 	int			remove)
 {
 	struct xfs_buf_log_item	*bip = BUF_ITEM(lip);
-	xfs_buf_t		*bp = bip->bli_buf;
+	struct xfs_buf		*bp = bip->bli_buf;
 	int			stale = bip->bli_flags & XFS_BLI_STALE;
 	int			freed;
 
@@ -942,7 +942,7 @@ xfs_buf_item_free(
  */
 void
 xfs_buf_item_relse(
-	xfs_buf_t	*bp)
+	struct xfs_buf	*bp)
 {
 	struct xfs_buf_log_item	*bip = bp->b_log_item;
 
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index 6c11bfc3d452..93223ebb3372 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -578,6 +578,15 @@ const struct xfs_defer_op_type xfs_agfl_free_defer_type = {
 	.cancel_item	= xfs_extent_free_cancel_item,
 };
 
+/* Is this recovered EFI ok? */
+static inline bool
+xfs_efi_validate_ext(
+	struct xfs_mount		*mp,
+	struct xfs_extent		*extp)
+{
+	return xfs_verify_fsbext(mp, extp->ext_start, extp->ext_len);
+}
+
 /*
  * Process an extent free intent item that was recovered from
  * the log.  We need to free the extents that it describes.
@@ -592,7 +601,6 @@ xfs_efi_item_recover(
 	struct xfs_efd_log_item		*efdp;
 	struct xfs_trans		*tp;
 	struct xfs_extent		*extp;
-	xfs_fsblock_t			startblock_fsb;
 	int				i;
 	int				error = 0;
 
@@ -602,14 +610,13 @@ xfs_efi_item_recover(
 	 * just toss the EFI.
 	 */
 	for (i = 0; i < efip->efi_format.efi_nextents; i++) {
-		extp = &efip->efi_format.efi_extents[i];
-		startblock_fsb = XFS_BB_TO_FSB(mp,
-				   XFS_FSB_TO_DADDR(mp, extp->ext_start));
-		if (startblock_fsb == 0 ||
-		    extp->ext_len == 0 ||
-		    startblock_fsb >= mp->m_sb.sb_dblocks ||
-		    extp->ext_len >= mp->m_sb.sb_agblocks)
+		if (!xfs_efi_validate_ext(mp,
+					&efip->efi_format.efi_extents[i])) {
+			XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+					&efip->efi_format,
+					sizeof(efip->efi_format));
 			return -EFSCORRUPTED;
+		}
 	}
 
 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp);
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index b7c5783a031c..959ce91a3755 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -28,7 +28,7 @@ xfs_growfs_data_private(
 	xfs_mount_t		*mp,		/* mount point for filesystem */
 	xfs_growfs_data_t	*in)		/* growfs data input struct */
 {
-	xfs_buf_t		*bp;
+	struct xfs_buf		*bp;
 	int			error;
 	xfs_agnumber_t		nagcount;
 	xfs_agnumber_t		nagimax = 0;
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 2bfbcf28b1bd..b7352bc4c815 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -761,68 +761,26 @@ xfs_inode_inherit_flags2(
 }
 
 /*
- * Allocate an inode on disk and return a copy of its in-core version.
- * The in-core inode is locked exclusively.  Set mode, nlink, and rdev
- * appropriately within the inode.  The uid and gid for the inode are
- * set according to the contents of the given cred structure.
- *
- * Use xfs_dialloc() to allocate the on-disk inode. If xfs_dialloc()
- * has a free inode available, call xfs_iget() to obtain the in-core
- * version of the allocated inode.  Finally, fill in the inode and
- * log its initial contents.  In this case, ialloc_context would be
- * set to NULL.
- *
- * If xfs_dialloc() does not have an available inode, it will replenish
- * its supply by doing an allocation. Since we can only do one
- * allocation within a transaction without deadlocks, we must commit
- * the current transaction before returning the inode itself.
- * In this case, therefore, we will set ialloc_context and return.
- * The caller should then commit the current transaction, start a new
- * transaction, and call xfs_ialloc() again to actually get the inode.
- *
- * To ensure that some other process does not grab the inode that
- * was allocated during the first call to xfs_ialloc(), this routine
- * also returns the [locked] bp pointing to the head of the freelist
- * as ialloc_context.  The caller should hold this buffer across
- * the commit and pass it back into this routine on the second call.
- *
- * If we are allocating quota inodes, we do not have a parent inode
- * to attach to or associate with (i.e. pip == NULL) because they
- * are not linked into the directory structure - they are attached
- * directly to the superblock - and so have no parent.
+ * Initialise a newly allocated inode and return the in-core inode to the
+ * caller locked exclusively.
  */
 static int
-xfs_ialloc(
-	xfs_trans_t	*tp,
-	xfs_inode_t	*pip,
-	umode_t		mode,
-	xfs_nlink_t	nlink,
-	dev_t		rdev,
-	prid_t		prid,
-	xfs_buf_t	**ialloc_context,
-	xfs_inode_t	**ipp)
-{
-	struct xfs_mount *mp = tp->t_mountp;
-	xfs_ino_t	ino;
-	xfs_inode_t	*ip;
-	uint		flags;
-	int		error;
-	struct timespec64 tv;
-	struct inode	*inode;
-
-	/*
-	 * Call the space management code to pick
-	 * the on-disk inode to be allocated.
-	 */
-	error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode,
-			    ialloc_context, &ino);
-	if (error)
-		return error;
-	if (*ialloc_context || ino == NULLFSINO) {
-		*ipp = NULL;
-		return 0;
-	}
-	ASSERT(*ialloc_context == NULL);
+xfs_init_new_inode(
+	struct xfs_trans	*tp,
+	struct xfs_inode	*pip,
+	xfs_ino_t		ino,
+	umode_t			mode,
+	xfs_nlink_t		nlink,
+	dev_t			rdev,
+	prid_t			prid,
+	struct xfs_inode	**ipp)
+{
+	struct xfs_mount	*mp = tp->t_mountp;
+	struct xfs_inode	*ip;
+	unsigned int		flags;
+	int			error;
+	struct timespec64	tv;
+	struct inode		*inode;
 
 	/*
 	 * Protect against obviously corrupt allocation btree records. Later
@@ -837,14 +795,13 @@ xfs_ialloc(
 	}
 
 	/*
-	 * Get the in-core inode with the lock held exclusively.
-	 * This is because we're setting fields here we need
-	 * to prevent others from looking at until we're done.
+	 * Get the in-core inode with the lock held exclusively to prevent
+	 * others from looking at until we're done.
 	 */
-	error = xfs_iget(mp, tp, ino, XFS_IGET_CREATE,
-			 XFS_ILOCK_EXCL, &ip);
+	error = xfs_iget(mp, tp, ino, XFS_IGET_CREATE, XFS_ILOCK_EXCL, &ip);
 	if (error)
 		return error;
+
 	ASSERT(ip != NULL);
 	inode = VFS_I(ip);
 	inode->i_mode = mode;
@@ -932,143 +889,51 @@ xfs_ialloc(
 }
 
 /*
- * Allocates a new inode from disk and return a pointer to the
- * incore copy. This routine will internally commit the current
- * transaction and allocate a new one if the Space Manager needed
- * to do an allocation to replenish the inode free-list.
- *
- * This routine is designed to be called from xfs_create and
- * xfs_create_dir.
+ * Allocates a new inode from disk and return a pointer to the incore copy. This
+ * routine will internally commit the current transaction and allocate a new one
+ * if we needed to allocate more on-disk free inodes to perform the requested
+ * operation.
  *
+ * If we are allocating quota inodes, we do not have a parent inode to attach to
+ * or associate with (i.e. dp == NULL) because they are not linked into the
+ * directory structure - they are attached directly to the superblock - and so
+ * have no parent.
  */
 int
 xfs_dir_ialloc(
-	xfs_trans_t	**tpp,		/* input: current transaction;
-					   output: may be a new transaction. */
-	xfs_inode_t	*dp,		/* directory within whose allocate
-					   the inode. */
-	umode_t		mode,
-	xfs_nlink_t	nlink,
-	dev_t		rdev,
-	prid_t		prid,		/* project id */
-	xfs_inode_t	**ipp)		/* pointer to inode; it will be
-					   locked. */
-{
-	xfs_trans_t	*tp;
-	xfs_inode_t	*ip;
-	xfs_buf_t	*ialloc_context = NULL;
-	int		code;
-	void		*dqinfo;
-	uint		tflags;
-
-	tp = *tpp;
-	ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
+	struct xfs_trans	**tpp,
+	struct xfs_inode	*dp,
+	umode_t			mode,
+	xfs_nlink_t		nlink,
+	dev_t			rdev,
+	prid_t			prid,
+	struct xfs_inode	**ipp)
+{
+	struct xfs_buf		*agibp;
+	xfs_ino_t		parent_ino = dp ? dp->i_ino : 0;
+	xfs_ino_t		ino;
+	int			error;
 
-	/*
-	 * xfs_ialloc will return a pointer to an incore inode if
-	 * the Space Manager has an available inode on the free
-	 * list. Otherwise, it will do an allocation and replenish
-	 * the freelist.  Since we can only do one allocation per
-	 * transaction without deadlocks, we will need to commit the
-	 * current transaction and start a new one.  We will then
-	 * need to call xfs_ialloc again to get the inode.
-	 *
-	 * If xfs_ialloc did an allocation to replenish the freelist,
-	 * it returns the bp containing the head of the freelist as
-	 * ialloc_context. We will hold a lock on it across the
-	 * transaction commit so that no other process can steal
-	 * the inode(s) that we've just allocated.
-	 */
-	code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid, &ialloc_context,
-			&ip);
+	ASSERT((*tpp)->t_flags & XFS_TRANS_PERM_LOG_RES);
 
 	/*
-	 * Return an error if we were unable to allocate a new inode.
-	 * This should only happen if we run out of space on disk or
-	 * encounter a disk error.
+	 * Call the space management code to pick the on-disk inode to be
+	 * allocated.
 	 */
-	if (code) {
-		*ipp = NULL;
-		return code;
-	}
-	if (!ialloc_context && !ip) {
-		*ipp = NULL;
-		return -ENOSPC;
-	}
-
-	/*
-	 * If the AGI buffer is non-NULL, then we were unable to get an
-	 * inode in one operation.  We need to commit the current
-	 * transaction and call xfs_ialloc() again.  It is guaranteed
-	 * to succeed the second time.
-	 */
-	if (ialloc_context) {
-		/*
-		 * Normally, xfs_trans_commit releases all the locks.
-		 * We call bhold to hang on to the ialloc_context across
-		 * the commit.  Holding this buffer prevents any other
-		 * processes from doing any allocations in this
-		 * allocation group.
-		 */
-		xfs_trans_bhold(tp, ialloc_context);
-
-		/*
-		 * We want the quota changes to be associated with the next
-		 * transaction, NOT this one. So, detach the dqinfo from this
-		 * and attach it to the next transaction.
-		 */
-		dqinfo = NULL;
-		tflags = 0;
-		if (tp->t_dqinfo) {
-			dqinfo = (void *)tp->t_dqinfo;
-			tp->t_dqinfo = NULL;
-			tflags = tp->t_flags & XFS_TRANS_DQ_DIRTY;
-			tp->t_flags &= ~(XFS_TRANS_DQ_DIRTY);
-		}
-
-		code = xfs_trans_roll(&tp);
-
-		/*
-		 * Re-attach the quota info that we detached from prev trx.
-		 */
-		if (dqinfo) {
-			tp->t_dqinfo = dqinfo;
-			tp->t_flags |= tflags;
-		}
-
-		if (code) {
-			xfs_buf_relse(ialloc_context);
-			*tpp = tp;
-			*ipp = NULL;
-			return code;
-		}
-		xfs_trans_bjoin(tp, ialloc_context);
-
-		/*
-		 * Call ialloc again. Since we've locked out all
-		 * other allocations in this allocation group,
-		 * this call should always succeed.
-		 */
-		code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid,
-				  &ialloc_context, &ip);
-
-		/*
-		 * If we get an error at this point, return to the caller
-		 * so that the current transaction can be aborted.
-		 */
-		if (code) {
-			*tpp = tp;
-			*ipp = NULL;
-			return code;
-		}
-		ASSERT(!ialloc_context && ip);
+	error = xfs_dialloc_select_ag(tpp, parent_ino, mode, &agibp);
+	if (error)
+		return error;
 
-	}
+	if (!agibp)
+		return -ENOSPC;
 
-	*ipp = ip;
-	*tpp = tp;
+	/* Allocate an inode from the selected AG */
+	error = xfs_dialloc_ag(*tpp, agibp, parent_ino, &ino);
+	if (error)
+		return error;
+	ASSERT(ino != NULLFSINO);
 
-	return 0;
+	return xfs_init_new_inode(*tpp, dp, ino, mode, nlink, rdev, prid, ipp);
 }
 
 /*
@@ -1521,7 +1386,7 @@ xfs_itruncate_extents_flags(
 	 * the page cache can't scale that far.
 	 */
 	first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size);
-	if (first_unmap_block >= XFS_MAX_FILEOFF) {
+	if (!xfs_verify_fileoff(mp, first_unmap_block)) {
 		WARN_ON_ONCE(first_unmap_block > XFS_MAX_FILEOFF);
 		return 0;
 	}
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 751a3d1d7d84..eca333f5f715 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -407,9 +407,9 @@ void		xfs_lock_two_inodes(struct xfs_inode *ip0, uint ip0_mode,
 xfs_extlen_t	xfs_get_extsz_hint(struct xfs_inode *ip);
 xfs_extlen_t	xfs_get_cowextsz_hint(struct xfs_inode *ip);
 
-int		xfs_dir_ialloc(struct xfs_trans **, struct xfs_inode *, umode_t,
-			       xfs_nlink_t, dev_t, prid_t,
-			       struct xfs_inode **);
+int xfs_dir_ialloc(struct xfs_trans **tpp, struct xfs_inode *dp, umode_t mode,
+		   xfs_nlink_t nlink, dev_t dev, prid_t prid,
+		   struct xfs_inode **ipp);
 
 static inline int
 xfs_itruncate_extents(
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 1414ab79eacf..67c8dc9de8aa 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -206,10 +206,8 @@ xfs_generic_create(
 	xfs_finish_inode_setup(ip);
 
  out_free_acl:
-	if (default_acl)
-		posix_acl_release(default_acl);
-	if (acl)
-		posix_acl_release(acl);
+	posix_acl_release(default_acl);
+	posix_acl_release(acl);
 	return error;
 
  out_cleanup_inode:
@@ -648,11 +646,10 @@ xfs_vn_change_ok(
  * Caution: The caller of this function is responsible for calling
  * setattr_prepare() or otherwise verifying the change is fine.
  */
-int
+static int
 xfs_setattr_nonsize(
 	struct xfs_inode	*ip,
-	struct iattr		*iattr,
-	int			flags)
+	struct iattr		*iattr)
 {
 	xfs_mount_t		*mp = ip->i_mount;
 	struct inode		*inode = VFS_I(ip);
@@ -809,7 +806,7 @@ xfs_setattr_nonsize(
 	 *	     to attr_set.  No previous user of the generic
 	 * 	     Posix ACL code seems to care about this issue either.
 	 */
-	if ((mask & ATTR_MODE) && !(flags & XFS_ATTR_NOACL)) {
+	if (mask & ATTR_MODE) {
 		error = posix_acl_chmod(inode, inode->i_mode);
 		if (error)
 			return error;
@@ -826,22 +823,6 @@ out_dqrele:
 	return error;
 }
 
-int
-xfs_vn_setattr_nonsize(
-	struct dentry		*dentry,
-	struct iattr		*iattr)
-{
-	struct xfs_inode	*ip = XFS_I(d_inode(dentry));
-	int error;
-
-	trace_xfs_setattr(ip);
-
-	error = xfs_vn_change_ok(dentry, iattr);
-	if (error)
-		return error;
-	return xfs_setattr_nonsize(ip, iattr, 0);
-}
-
 /*
  * Truncate file.  Must have write permission and not be a directory.
  *
@@ -881,7 +862,7 @@ xfs_setattr_size(
 		 * Use the regular setattr path to update the timestamps.
 		 */
 		iattr->ia_valid &= ~ATTR_SIZE;
-		return xfs_setattr_nonsize(ip, iattr, 0);
+		return xfs_setattr_nonsize(ip, iattr);
 	}
 
 	/*
@@ -1069,11 +1050,11 @@ xfs_vn_setattr(
 	struct dentry		*dentry,
 	struct iattr		*iattr)
 {
+	struct inode		*inode = d_inode(dentry);
+	struct xfs_inode	*ip = XFS_I(inode);
 	int			error;
 
 	if (iattr->ia_valid & ATTR_SIZE) {
-		struct inode		*inode = d_inode(dentry);
-		struct xfs_inode	*ip = XFS_I(inode);
 		uint			iolock;
 
 		xfs_ilock(ip, XFS_MMAPLOCK_EXCL);
@@ -1088,7 +1069,11 @@ xfs_vn_setattr(
 		error = xfs_vn_setattr_size(dentry, iattr);
 		xfs_iunlock(ip, XFS_MMAPLOCK_EXCL);
 	} else {
-		error = xfs_vn_setattr_nonsize(dentry, iattr);
+		trace_xfs_setattr(ip);
+
+		error = xfs_vn_change_ok(dentry, iattr);
+		if (!error)
+			error = xfs_setattr_nonsize(ip, iattr);
 	}
 
 	return error;
diff --git a/fs/xfs/xfs_iops.h b/fs/xfs/xfs_iops.h
index 4d24ff309f59..99ca745c1071 100644
--- a/fs/xfs/xfs_iops.h
+++ b/fs/xfs/xfs_iops.h
@@ -13,15 +13,7 @@ extern const struct file_operations xfs_dir_file_operations;
 
 extern ssize_t xfs_vn_listxattr(struct dentry *, char *data, size_t size);
 
-/*
- * Internal setattr interfaces.
- */
-#define XFS_ATTR_NOACL		0x01	/* Don't call posix_acl_chmod */
-
 extern void xfs_setattr_time(struct xfs_inode *ip, struct iattr *iattr);
-extern int xfs_setattr_nonsize(struct xfs_inode *ip, struct iattr *vap,
-			       int flags);
-extern int xfs_vn_setattr_nonsize(struct dentry *dentry, struct iattr *vap);
 extern int xfs_vn_setattr_size(struct dentry *dentry, struct iattr *vap);
 
 #endif /* __XFS_IOPS_H__ */
diff --git a/fs/xfs/xfs_iwalk.c b/fs/xfs/xfs_iwalk.c
index 2a45138831e3..eae3aff9bc97 100644
--- a/fs/xfs/xfs_iwalk.c
+++ b/fs/xfs/xfs_iwalk.c
@@ -363,7 +363,7 @@ xfs_iwalk_run_callbacks(
 	/* Delete cursor but remember the last record we cached... */
 	xfs_iwalk_del_inobt(tp, curpp, agi_bpp, 0);
 	irec = &iwag->recs[iwag->nr_recs - 1];
-	ASSERT(next_agino == irec->ir_startino + XFS_INODES_PER_CHUNK);
+	ASSERT(next_agino >= irec->ir_startino + XFS_INODES_PER_CHUNK);
 
 	error = xfs_iwalk_ag_recs(iwag);
 	if (error)
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index 87886b7f77da..97f31308de03 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -2559,8 +2559,11 @@ xlog_recover_process_intents(
 		spin_unlock(&ailp->ail_lock);
 		error = lip->li_ops->iop_recover(lip, &capture_list);
 		spin_lock(&ailp->ail_lock);
-		if (error)
+		if (error) {
+			trace_xlog_intent_recovery_failed(log->l_mp, error,
+					lip->li_ops->iop_recover);
 			break;
+		}
 	}
 
 	xfs_trans_ail_cursor_done(&cur);
@@ -2628,7 +2631,7 @@ xlog_recover_clear_agi_bucket(
 {
 	xfs_trans_t	*tp;
 	xfs_agi_t	*agi;
-	xfs_buf_t	*agibp;
+	struct xfs_buf	*agibp;
 	int		offset;
 	int		error;
 
@@ -2746,7 +2749,7 @@ xlog_recover_process_iunlinks(
 	xfs_mount_t	*mp;
 	xfs_agnumber_t	agno;
 	xfs_agi_t	*agi;
-	xfs_buf_t	*agibp;
+	struct xfs_buf	*agibp;
 	xfs_agino_t	agino;
 	int		bucket;
 	int		error;
@@ -3498,8 +3501,8 @@ xlog_recover_check_summary(
 	struct xlog	*log)
 {
 	xfs_mount_t	*mp;
-	xfs_buf_t	*agfbp;
-	xfs_buf_t	*agibp;
+	struct xfs_buf	*agfbp;
+	struct xfs_buf	*agibp;
 	xfs_agnumber_t	agno;
 	uint64_t	freeblks;
 	uint64_t	itotal;
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index b2a9abee8b2b..c134eb4aeaa8 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -737,15 +737,15 @@ xfs_qm_destroy_quotainfo(
  */
 STATIC int
 xfs_qm_qino_alloc(
-	xfs_mount_t	*mp,
-	xfs_inode_t	**ip,
-	uint		flags)
+	struct xfs_mount	*mp,
+	struct xfs_inode	**ipp,
+	unsigned int		flags)
 {
-	xfs_trans_t	*tp;
-	int		error;
-	bool		need_alloc = true;
+	struct xfs_trans	*tp;
+	int			error;
+	bool			need_alloc = true;
 
-	*ip = NULL;
+	*ipp = NULL;
 	/*
 	 * With superblock that doesn't have separate pquotino, we
 	 * share an inode between gquota and pquota. If the on-disk
@@ -771,7 +771,7 @@ xfs_qm_qino_alloc(
 				return -EFSCORRUPTED;
 		}
 		if (ino != NULLFSINO) {
-			error = xfs_iget(mp, NULL, ino, 0, 0, ip);
+			error = xfs_iget(mp, NULL, ino, 0, 0, ipp);
 			if (error)
 				return error;
 			mp->m_sb.sb_gquotino = NULLFSINO;
@@ -787,7 +787,7 @@ xfs_qm_qino_alloc(
 		return error;
 
 	if (need_alloc) {
-		error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, ip);
+		error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, ipp);
 		if (error) {
 			xfs_trans_cancel(tp);
 			return error;
@@ -812,11 +812,11 @@ xfs_qm_qino_alloc(
 		mp->m_sb.sb_qflags = mp->m_qflags & XFS_ALL_QUOTA_ACCT;
 	}
 	if (flags & XFS_QMOPT_UQUOTA)
-		mp->m_sb.sb_uquotino = (*ip)->i_ino;
+		mp->m_sb.sb_uquotino = (*ipp)->i_ino;
 	else if (flags & XFS_QMOPT_GQUOTA)
-		mp->m_sb.sb_gquotino = (*ip)->i_ino;
+		mp->m_sb.sb_gquotino = (*ipp)->i_ino;
 	else
-		mp->m_sb.sb_pquotino = (*ip)->i_ino;
+		mp->m_sb.sb_pquotino = (*ipp)->i_ino;
 	spin_unlock(&mp->m_sb_lock);
 	xfs_log_sb(tp);
 
@@ -826,7 +826,7 @@ xfs_qm_qino_alloc(
 		xfs_alert(mp, "%s failed (error %d)!", __func__, error);
 	}
 	if (need_alloc)
-		xfs_finish_inode_setup(*ip);
+		xfs_finish_inode_setup(*ipp);
 	return error;
 }
 
diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c
index 7529eb63ce94..07ebccbbf4df 100644
--- a/fs/xfs/xfs_refcount_item.c
+++ b/fs/xfs/xfs_refcount_item.c
@@ -417,6 +417,31 @@ const struct xfs_defer_op_type xfs_refcount_update_defer_type = {
 	.cancel_item	= xfs_refcount_update_cancel_item,
 };
 
+/* Is this recovered CUI ok? */
+static inline bool
+xfs_cui_validate_phys(
+	struct xfs_mount		*mp,
+	struct xfs_phys_extent		*refc)
+{
+	if (!xfs_sb_version_hasreflink(&mp->m_sb))
+		return false;
+
+	if (refc->pe_flags & ~XFS_REFCOUNT_EXTENT_FLAGS)
+		return false;
+
+	switch (refc->pe_flags & XFS_REFCOUNT_EXTENT_TYPE_MASK) {
+	case XFS_REFCOUNT_INCREASE:
+	case XFS_REFCOUNT_DECREASE:
+	case XFS_REFCOUNT_ALLOC_COW:
+	case XFS_REFCOUNT_FREE_COW:
+		break;
+	default:
+		return false;
+	}
+
+	return xfs_verify_fsbext(mp, refc->pe_startblock, refc->pe_len);
+}
+
 /*
  * Process a refcount update intent item that was recovered from the log.
  * We need to update the refcountbt.
@@ -433,11 +458,9 @@ xfs_cui_item_recover(
 	struct xfs_trans		*tp;
 	struct xfs_btree_cur		*rcur = NULL;
 	struct xfs_mount		*mp = lip->li_mountp;
-	xfs_fsblock_t			startblock_fsb;
 	xfs_fsblock_t			new_fsb;
 	xfs_extlen_t			new_len;
 	unsigned int			refc_type;
-	bool				op_ok;
 	bool				requeue_only = false;
 	enum xfs_refcount_intent_type	type;
 	int				i;
@@ -449,26 +472,13 @@ xfs_cui_item_recover(
 	 * just toss the CUI.
 	 */
 	for (i = 0; i < cuip->cui_format.cui_nextents; i++) {
-		refc = &cuip->cui_format.cui_extents[i];
-		startblock_fsb = XFS_BB_TO_FSB(mp,
-				   XFS_FSB_TO_DADDR(mp, refc->pe_startblock));
-		switch (refc->pe_flags & XFS_REFCOUNT_EXTENT_TYPE_MASK) {
-		case XFS_REFCOUNT_INCREASE:
-		case XFS_REFCOUNT_DECREASE:
-		case XFS_REFCOUNT_ALLOC_COW:
-		case XFS_REFCOUNT_FREE_COW:
-			op_ok = true;
-			break;
-		default:
-			op_ok = false;
-			break;
-		}
-		if (!op_ok || startblock_fsb == 0 ||
-		    refc->pe_len == 0 ||
-		    startblock_fsb >= mp->m_sb.sb_dblocks ||
-		    refc->pe_len >= mp->m_sb.sb_agblocks ||
-		    (refc->pe_flags & ~XFS_REFCOUNT_EXTENT_FLAGS))
+		if (!xfs_cui_validate_phys(mp,
+					&cuip->cui_format.cui_extents[i])) {
+			XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+					&cuip->cui_format,
+					sizeof(cuip->cui_format));
 			return -EFSCORRUPTED;
+		}
 	}
 
 	/*
diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c
index 7adc996ca6e3..49cebd68b672 100644
--- a/fs/xfs/xfs_rmap_item.c
+++ b/fs/xfs/xfs_rmap_item.c
@@ -460,6 +460,42 @@ const struct xfs_defer_op_type xfs_rmap_update_defer_type = {
 	.cancel_item	= xfs_rmap_update_cancel_item,
 };
 
+/* Is this recovered RUI ok? */
+static inline bool
+xfs_rui_validate_map(
+	struct xfs_mount		*mp,
+	struct xfs_map_extent		*rmap)
+{
+	if (!xfs_sb_version_hasrmapbt(&mp->m_sb))
+		return false;
+
+	if (rmap->me_flags & ~XFS_RMAP_EXTENT_FLAGS)
+		return false;
+
+	switch (rmap->me_flags & XFS_RMAP_EXTENT_TYPE_MASK) {
+	case XFS_RMAP_EXTENT_MAP:
+	case XFS_RMAP_EXTENT_MAP_SHARED:
+	case XFS_RMAP_EXTENT_UNMAP:
+	case XFS_RMAP_EXTENT_UNMAP_SHARED:
+	case XFS_RMAP_EXTENT_CONVERT:
+	case XFS_RMAP_EXTENT_CONVERT_SHARED:
+	case XFS_RMAP_EXTENT_ALLOC:
+	case XFS_RMAP_EXTENT_FREE:
+		break;
+	default:
+		return false;
+	}
+
+	if (!XFS_RMAP_NON_INODE_OWNER(rmap->me_owner) &&
+	    !xfs_verify_ino(mp, rmap->me_owner))
+		return false;
+
+	if (!xfs_verify_fileext(mp, rmap->me_startoff, rmap->me_len))
+		return false;
+
+	return xfs_verify_fsbext(mp, rmap->me_startblock, rmap->me_len);
+}
+
 /*
  * Process an rmap update intent item that was recovered from the log.
  * We need to update the rmapbt.
@@ -475,10 +511,8 @@ xfs_rui_item_recover(
 	struct xfs_trans		*tp;
 	struct xfs_btree_cur		*rcur = NULL;
 	struct xfs_mount		*mp = lip->li_mountp;
-	xfs_fsblock_t			startblock_fsb;
 	enum xfs_rmap_intent_type	type;
 	xfs_exntst_t			state;
-	bool				op_ok;
 	int				i;
 	int				whichfork;
 	int				error = 0;
@@ -489,30 +523,13 @@ xfs_rui_item_recover(
 	 * just toss the RUI.
 	 */
 	for (i = 0; i < ruip->rui_format.rui_nextents; i++) {
-		rmap = &ruip->rui_format.rui_extents[i];
-		startblock_fsb = XFS_BB_TO_FSB(mp,
-				   XFS_FSB_TO_DADDR(mp, rmap->me_startblock));
-		switch (rmap->me_flags & XFS_RMAP_EXTENT_TYPE_MASK) {
-		case XFS_RMAP_EXTENT_MAP:
-		case XFS_RMAP_EXTENT_MAP_SHARED:
-		case XFS_RMAP_EXTENT_UNMAP:
-		case XFS_RMAP_EXTENT_UNMAP_SHARED:
-		case XFS_RMAP_EXTENT_CONVERT:
-		case XFS_RMAP_EXTENT_CONVERT_SHARED:
-		case XFS_RMAP_EXTENT_ALLOC:
-		case XFS_RMAP_EXTENT_FREE:
-			op_ok = true;
-			break;
-		default:
-			op_ok = false;
-			break;
-		}
-		if (!op_ok || startblock_fsb == 0 ||
-		    rmap->me_len == 0 ||
-		    startblock_fsb >= mp->m_sb.sb_dblocks ||
-		    rmap->me_len >= mp->m_sb.sb_agblocks ||
-		    (rmap->me_flags & ~XFS_RMAP_EXTENT_FLAGS))
+		if (!xfs_rui_validate_map(mp,
+					&ruip->rui_format.rui_extents[i])) {
+			XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp,
+					&ruip->rui_format,
+					sizeof(ruip->rui_format));
 			return -EFSCORRUPTED;
+		}
 	}
 
 	error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate,
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index ede1baf31413..b4999fb01ff7 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -32,7 +32,7 @@ xfs_rtget_summary(
 	xfs_trans_t	*tp,		/* transaction pointer */
 	int		log,		/* log2 of extent size */
 	xfs_rtblock_t	bbno,		/* bitmap block number */
-	xfs_buf_t	**rbpp,		/* in/out: summary block buffer */
+	struct xfs_buf	**rbpp,		/* in/out: summary block buffer */
 	xfs_fsblock_t	*rsb,		/* in/out: summary block number */
 	xfs_suminfo_t	*sum)		/* out: summary info for this block */
 {
@@ -50,7 +50,7 @@ xfs_rtany_summary(
 	int		low,		/* low log2 extent size */
 	int		high,		/* high log2 extent size */
 	xfs_rtblock_t	bbno,		/* bitmap block number */
-	xfs_buf_t	**rbpp,		/* in/out: summary block buffer */
+	struct xfs_buf	**rbpp,		/* in/out: summary block buffer */
 	xfs_fsblock_t	*rsb,		/* in/out: summary block number */
 	int		*stat)		/* out: any good extents here? */
 {
@@ -104,7 +104,7 @@ xfs_rtcopy_summary(
 	xfs_trans_t	*tp)		/* transaction pointer */
 {
 	xfs_rtblock_t	bbno;		/* bitmap block number */
-	xfs_buf_t	*bp;		/* summary buffer */
+	struct xfs_buf	*bp;		/* summary buffer */
 	int		error;		/* error return value */
 	int		log;		/* summary level number (log length) */
 	xfs_suminfo_t	sum;		/* summary data */
@@ -144,7 +144,7 @@ xfs_rtallocate_range(
 	xfs_trans_t	*tp,		/* transaction pointer */
 	xfs_rtblock_t	start,		/* start block to allocate */
 	xfs_extlen_t	len,		/* length to allocate */
-	xfs_buf_t	**rbpp,		/* in/out: summary block buffer */
+	struct xfs_buf	**rbpp,		/* in/out: summary block buffer */
 	xfs_fsblock_t	*rsb)		/* in/out: summary block number */
 {
 	xfs_rtblock_t	end;		/* end of the allocated extent */
@@ -226,7 +226,7 @@ xfs_rtallocate_extent_block(
 	xfs_extlen_t	maxlen,		/* maximum length to allocate */
 	xfs_extlen_t	*len,		/* out: actual length allocated */
 	xfs_rtblock_t	*nextp,		/* out: next block to try */
-	xfs_buf_t	**rbpp,		/* in/out: summary block buffer */
+	struct xfs_buf	**rbpp,		/* in/out: summary block buffer */
 	xfs_fsblock_t	*rsb,		/* in/out: summary block number */
 	xfs_extlen_t	prod,		/* extent product factor */
 	xfs_rtblock_t	*rtblock)	/* out: start block allocated */
@@ -345,7 +345,7 @@ xfs_rtallocate_extent_exact(
 	xfs_extlen_t	minlen,		/* minimum length to allocate */
 	xfs_extlen_t	maxlen,		/* maximum length to allocate */
 	xfs_extlen_t	*len,		/* out: actual length allocated */
-	xfs_buf_t	**rbpp,		/* in/out: summary block buffer */
+	struct xfs_buf	**rbpp,		/* in/out: summary block buffer */
 	xfs_fsblock_t	*rsb,		/* in/out: summary block number */
 	xfs_extlen_t	prod,		/* extent product factor */
 	xfs_rtblock_t	*rtblock)	/* out: start block allocated */
@@ -424,7 +424,7 @@ xfs_rtallocate_extent_near(
 	xfs_extlen_t	minlen,		/* minimum length to allocate */
 	xfs_extlen_t	maxlen,		/* maximum length to allocate */
 	xfs_extlen_t	*len,		/* out: actual length allocated */
-	xfs_buf_t	**rbpp,		/* in/out: summary block buffer */
+	struct xfs_buf	**rbpp,		/* in/out: summary block buffer */
 	xfs_fsblock_t	*rsb,		/* in/out: summary block number */
 	xfs_extlen_t	prod,		/* extent product factor */
 	xfs_rtblock_t	*rtblock)	/* out: start block allocated */
@@ -626,7 +626,7 @@ xfs_rtallocate_extent_size(
 	xfs_extlen_t	minlen,		/* minimum length to allocate */
 	xfs_extlen_t	maxlen,		/* maximum length to allocate */
 	xfs_extlen_t	*len,		/* out: actual length allocated */
-	xfs_buf_t	**rbpp,		/* in/out: summary block buffer */
+	struct xfs_buf	**rbpp,		/* in/out: summary block buffer */
 	xfs_fsblock_t	*rsb,		/* in/out: summary block number */
 	xfs_extlen_t	prod,		/* extent product factor */
 	xfs_rtblock_t	*rtblock)	/* out: start block allocated */
@@ -900,7 +900,7 @@ xfs_growfs_rt(
 	xfs_growfs_rt_t	*in)		/* growfs rt input struct */
 {
 	xfs_rtblock_t	bmbno;		/* bitmap block number */
-	xfs_buf_t	*bp;		/* temporary buffer */
+	struct xfs_buf	*bp;		/* temporary buffer */
 	int		error;		/* error return value */
 	xfs_mount_t	*nmp;		/* new (fake) mount structure */
 	xfs_rfsblock_t	nrblocks;	/* new number of realtime blocks */
@@ -1151,7 +1151,7 @@ xfs_rtallocate_extent(
 	int		error;		/* error value */
 	xfs_rtblock_t	r;		/* result allocated block */
 	xfs_fsblock_t	sb;		/* summary file block number */
-	xfs_buf_t	*sumbp;		/* summary file block buffer */
+	struct xfs_buf	*sumbp;		/* summary file block buffer */
 
 	ASSERT(xfs_isilocked(mp->m_rbmip, XFS_ILOCK_EXCL));
 	ASSERT(minlen > 0 && minlen <= maxlen);
diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h
index 93e77b221355..ed885620589c 100644
--- a/fs/xfs/xfs_rtalloc.h
+++ b/fs/xfs/xfs_rtalloc.h
@@ -115,10 +115,10 @@ int xfs_rtmodify_range(struct xfs_mount *mp, struct xfs_trans *tp,
 		       xfs_rtblock_t start, xfs_extlen_t len, int val);
 int xfs_rtmodify_summary_int(struct xfs_mount *mp, struct xfs_trans *tp,
 			     int log, xfs_rtblock_t bbno, int delta,
-			     xfs_buf_t **rbpp, xfs_fsblock_t *rsb,
+			     struct xfs_buf **rbpp, xfs_fsblock_t *rsb,
 			     xfs_suminfo_t *sum);
 int xfs_rtmodify_summary(struct xfs_mount *mp, struct xfs_trans *tp, int log,
-			 xfs_rtblock_t bbno, int delta, xfs_buf_t **rbpp,
+			 xfs_rtblock_t bbno, int delta, struct xfs_buf **rbpp,
 			 xfs_fsblock_t *rsb);
 int xfs_rtfree_range(struct xfs_mount *mp, struct xfs_trans *tp,
 		     xfs_rtblock_t start, xfs_extlen_t len,
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index e3e229e52512..813be879a5e5 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -199,10 +199,12 @@ xfs_fs_show_options(
 		seq_printf(m, ",swidth=%d",
 				(int)XFS_FSB_TO_BB(mp, mp->m_swidth));
 
-	if (mp->m_qflags & (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD))
-		seq_puts(m, ",usrquota");
-	else if (mp->m_qflags & XFS_UQUOTA_ACCT)
-		seq_puts(m, ",uqnoenforce");
+	if (mp->m_qflags & XFS_UQUOTA_ACCT) {
+		if (mp->m_qflags & XFS_UQUOTA_ENFD)
+			seq_puts(m, ",usrquota");
+		else
+			seq_puts(m, ",uqnoenforce");
+	}
 
 	if (mp->m_qflags & XFS_PQUOTA_ACCT) {
 		if (mp->m_qflags & XFS_PQUOTA_ENFD)
@@ -1159,7 +1161,7 @@ suffix_kstrtoint(
  * NOTE: mp->m_super is NULL here!
  */
 static int
-xfs_fc_parse_param(
+xfs_fs_parse_param(
 	struct fs_context	*fc,
 	struct fs_parameter	*param)
 {
@@ -1317,7 +1319,7 @@ xfs_fc_parse_param(
 }
 
 static int
-xfs_fc_validate_params(
+xfs_fs_validate_params(
 	struct xfs_mount	*mp)
 {
 	/*
@@ -1386,7 +1388,7 @@ xfs_fc_validate_params(
 }
 
 static int
-xfs_fc_fill_super(
+xfs_fs_fill_super(
 	struct super_block	*sb,
 	struct fs_context	*fc)
 {
@@ -1396,7 +1398,7 @@ xfs_fc_fill_super(
 
 	mp->m_super = sb;
 
-	error = xfs_fc_validate_params(mp);
+	error = xfs_fs_validate_params(mp);
 	if (error)
 		goto out_free_names;
 
@@ -1467,6 +1469,45 @@ xfs_fc_fill_super(
 #endif
 	}
 
+	/* Filesystem claims it needs repair, so refuse the mount. */
+	if (xfs_sb_version_needsrepair(&mp->m_sb)) {
+		xfs_warn(mp, "Filesystem needs repair.  Please run xfs_repair.");
+		error = -EFSCORRUPTED;
+		goto out_free_sb;
+	}
+
+	/*
+	 * Don't touch the filesystem if a user tool thinks it owns the primary
+	 * superblock.  mkfs doesn't clear the flag from secondary supers, so
+	 * we don't check them at all.
+	 */
+	if (mp->m_sb.sb_inprogress) {
+		xfs_warn(mp, "Offline file system operation in progress!");
+		error = -EFSCORRUPTED;
+		goto out_free_sb;
+	}
+
+	/*
+	 * Until this is fixed only page-sized or smaller data blocks work.
+	 */
+	if (mp->m_sb.sb_blocksize > PAGE_SIZE) {
+		xfs_warn(mp,
+		"File system with blocksize %d bytes. "
+		"Only pagesize (%ld) or less will currently work.",
+				mp->m_sb.sb_blocksize, PAGE_SIZE);
+		error = -ENOSYS;
+		goto out_free_sb;
+	}
+
+	/* Ensure this filesystem fits in the page cache limits */
+	if (xfs_sb_validate_fsb_count(&mp->m_sb, mp->m_sb.sb_dblocks) ||
+	    xfs_sb_validate_fsb_count(&mp->m_sb, mp->m_sb.sb_rblocks)) {
+		xfs_warn(mp,
+		"file system too large to be mounted on this system.");
+		error = -EFBIG;
+		goto out_free_sb;
+	}
+
 	/*
 	 * XFS block mappings use 54 bits to store the logical block offset.
 	 * This should suffice to handle the maximum file size that the VFS
@@ -1478,7 +1519,7 @@ xfs_fc_fill_super(
 	 * Avoid integer overflow by comparing the maximum bmbt offset to the
 	 * maximum pagecache offset in units of fs blocks.
 	 */
-	if (XFS_B_TO_FSBT(mp, MAX_LFS_FILESIZE) > XFS_MAX_FILEOFF) {
+	if (!xfs_verify_fileoff(mp, XFS_B_TO_FSBT(mp, MAX_LFS_FILESIZE))) {
 		xfs_warn(mp,
 "MAX_LFS_FILESIZE block offset (%llu) exceeds extent map maximum (%llu)!",
 			 XFS_B_TO_FSBT(mp, MAX_LFS_FILESIZE),
@@ -1621,10 +1662,10 @@ xfs_fc_fill_super(
 }
 
 static int
-xfs_fc_get_tree(
+xfs_fs_get_tree(
 	struct fs_context	*fc)
 {
-	return get_tree_bdev(fc, xfs_fc_fill_super);
+	return get_tree_bdev(fc, xfs_fs_fill_super);
 }
 
 static int
@@ -1743,7 +1784,7 @@ xfs_remount_ro(
  * silently ignore all options that we can't actually change.
  */
 static int
-xfs_fc_reconfigure(
+xfs_fs_reconfigure(
 	struct fs_context *fc)
 {
 	struct xfs_mount	*mp = XFS_M(fc->root->d_sb);
@@ -1756,7 +1797,7 @@ xfs_fc_reconfigure(
 	if (XFS_SB_VERSION_NUM(&mp->m_sb) == XFS_SB_VERSION_5)
 		fc->sb_flags |= SB_I_VERSION;
 
-	error = xfs_fc_validate_params(new_mp);
+	error = xfs_fs_validate_params(new_mp);
 	if (error)
 		return error;
 
@@ -1793,7 +1834,7 @@ xfs_fc_reconfigure(
 	return 0;
 }
 
-static void xfs_fc_free(
+static void xfs_fs_free(
 	struct fs_context	*fc)
 {
 	struct xfs_mount	*mp = fc->s_fs_info;
@@ -1809,10 +1850,10 @@ static void xfs_fc_free(
 }
 
 static const struct fs_context_operations xfs_context_ops = {
-	.parse_param = xfs_fc_parse_param,
-	.get_tree    = xfs_fc_get_tree,
-	.reconfigure = xfs_fc_reconfigure,
-	.free        = xfs_fc_free,
+	.parse_param = xfs_fs_parse_param,
+	.get_tree    = xfs_fs_get_tree,
+	.reconfigure = xfs_fs_reconfigure,
+	.free        = xfs_fs_free,
 };
 
 static int xfs_init_fs_context(
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index 8e88a7ca387e..1f43fd7f3209 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -154,7 +154,7 @@ xfs_symlink(
 	const char		*cur_chunk;
 	int			byte_cnt;
 	int			n;
-	xfs_buf_t		*bp;
+	struct xfs_buf		*bp;
 	prid_t			prid;
 	struct xfs_dquot	*udqp = NULL;
 	struct xfs_dquot	*gdqp = NULL;
@@ -365,7 +365,7 @@ STATIC int
 xfs_inactive_symlink_rmt(
 	struct xfs_inode *ip)
 {
-	xfs_buf_t	*bp;
+	struct xfs_buf	*bp;
 	int		done;
 	int		error;
 	int		i;
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 86951652d3ed..5a263ae3d4f0 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -103,6 +103,24 @@ DEFINE_ATTR_LIST_EVENT(xfs_attr_list_notfound);
 DEFINE_ATTR_LIST_EVENT(xfs_attr_leaf_list);
 DEFINE_ATTR_LIST_EVENT(xfs_attr_node_list);
 
+TRACE_EVENT(xlog_intent_recovery_failed,
+	TP_PROTO(struct xfs_mount *mp, int error, void *function),
+	TP_ARGS(mp, error, function),
+	TP_STRUCT__entry(
+		__field(dev_t, dev)
+		__field(int, error)
+		__field(void *, function)
+	),
+	TP_fast_assign(
+		__entry->dev = mp->m_super->s_dev;
+		__entry->error = error;
+		__entry->function = function;
+	),
+	TP_printk("dev %d:%d error %d function %pS",
+		  MAJOR(__entry->dev), MINOR(__entry->dev),
+		  __entry->error, __entry->function)
+);
+
 DECLARE_EVENT_CLASS(xfs_perag_class,
 	TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount,
 		 unsigned long caller_ip),
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index c94e71f741b6..e72730f85af1 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -465,7 +465,7 @@ xfs_trans_apply_sb_deltas(
 	xfs_trans_t	*tp)
 {
 	xfs_dsb_t	*sbp;
-	xfs_buf_t	*bp;
+	struct xfs_buf	*bp;
 	int		whole = 0;
 
 	bp = xfs_trans_getsb(tp);
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 42d63b830cb9..9aced0a00003 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -121,7 +121,7 @@ xfs_trans_get_buf_map(
 	xfs_buf_flags_t		flags,
 	struct xfs_buf		**bpp)
 {
-	xfs_buf_t		*bp;
+	struct xfs_buf		*bp;
 	struct xfs_buf_log_item	*bip;
 	int			error;
 
@@ -401,7 +401,7 @@ xfs_trans_brelse(
 void
 xfs_trans_bhold(
 	xfs_trans_t		*tp,
-	xfs_buf_t		*bp)
+	struct xfs_buf		*bp)
 {
 	struct xfs_buf_log_item	*bip = bp->b_log_item;
 
@@ -422,7 +422,7 @@ xfs_trans_bhold(
 void
 xfs_trans_bhold_release(
 	xfs_trans_t		*tp,
-	xfs_buf_t		*bp)
+	struct xfs_buf		*bp)
 {
 	struct xfs_buf_log_item	*bip = bp->b_log_item;
 
@@ -538,7 +538,7 @@ xfs_trans_log_buf(
 void
 xfs_trans_binval(
 	xfs_trans_t		*tp,
-	xfs_buf_t		*bp)
+	struct xfs_buf		*bp)
 {
 	struct xfs_buf_log_item	*bip = bp->b_log_item;
 	int			i;
@@ -593,7 +593,7 @@ xfs_trans_binval(
 void
 xfs_trans_inode_buf(
 	xfs_trans_t		*tp,
-	xfs_buf_t		*bp)
+	struct xfs_buf		*bp)
 {
 	struct xfs_buf_log_item	*bip = bp->b_log_item;
 
@@ -618,7 +618,7 @@ xfs_trans_inode_buf(
 void
 xfs_trans_stale_inode_buf(
 	xfs_trans_t		*tp,
-	xfs_buf_t		*bp)
+	struct xfs_buf		*bp)
 {
 	struct xfs_buf_log_item	*bip = bp->b_log_item;
 
@@ -643,7 +643,7 @@ xfs_trans_stale_inode_buf(
 void
 xfs_trans_inode_alloc_buf(
 	xfs_trans_t		*tp,
-	xfs_buf_t		*bp)
+	struct xfs_buf		*bp)
 {
 	struct xfs_buf_log_item	*bip = bp->b_log_item;
 
@@ -737,7 +737,7 @@ xfs_trans_buf_copy_type(
 void
 xfs_trans_dquot_buf(
 	xfs_trans_t		*tp,
-	xfs_buf_t		*bp,
+	struct xfs_buf		*bp,
 	uint			type)
 {
 	struct xfs_buf_log_item	*bip = bp->b_log_item;
diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c
index fe45b0c3970c..28b8ac701919 100644
--- a/fs/xfs/xfs_trans_dquot.c
+++ b/fs/xfs/xfs_trans_dquot.c
@@ -84,13 +84,6 @@ xfs_trans_dup_dqinfo(
 
 	xfs_trans_alloc_dqinfo(ntp);
 
-	/*
-	 * Because the quota blk reservation is carried forward,
-	 * it is also necessary to carry forward the DQ_DIRTY flag.
-	 */
-	if (otp->t_flags & XFS_TRANS_DQ_DIRTY)
-		ntp->t_flags |= XFS_TRANS_DQ_DIRTY;
-
 	for (j = 0; j < XFS_QM_TRANS_DQTYPES; j++) {
 		oqa = otp->t_dqinfo->dqs[j];
 		nqa = ntp->t_dqinfo->dqs[j];
@@ -143,9 +136,6 @@ xfs_trans_mod_dquot_byino(
 	    xfs_is_quota_inode(&mp->m_sb, ip->i_ino))
 		return;
 
-	if (tp->t_dqinfo == NULL)
-		xfs_trans_alloc_dqinfo(tp);
-
 	if (XFS_IS_UQUOTA_ON(mp) && ip->i_udquot)
 		(void) xfs_trans_mod_dquot(tp, ip->i_udquot, field, delta);
 	if (XFS_IS_GQUOTA_ON(mp) && ip->i_gdquot)
@@ -204,6 +194,9 @@ xfs_trans_mod_dquot(
 	ASSERT(XFS_IS_QUOTA_RUNNING(tp->t_mountp));
 	qtrx = NULL;
 
+	if (!delta)
+		return;
+
 	if (tp->t_dqinfo == NULL)
 		xfs_trans_alloc_dqinfo(tp);
 	/*
@@ -215,10 +208,8 @@ xfs_trans_mod_dquot(
 	if (qtrx->qt_dquot == NULL)
 		qtrx->qt_dquot = dqp;
 
-	if (delta) {
-		trace_xfs_trans_mod_dquot_before(qtrx);
-		trace_xfs_trans_mod_dquot(tp, dqp, field, delta);
-	}
+	trace_xfs_trans_mod_dquot_before(qtrx);
+	trace_xfs_trans_mod_dquot(tp, dqp, field, delta);
 
 	switch (field) {
 	/* regular disk blk reservation */
@@ -271,10 +262,7 @@ xfs_trans_mod_dquot(
 		ASSERT(0);
 	}
 
-	if (delta)
-		trace_xfs_trans_mod_dquot_after(qtrx);
-
-	tp->t_flags |= XFS_TRANS_DQ_DIRTY;
+	trace_xfs_trans_mod_dquot_after(qtrx);
 }
 
 
@@ -351,7 +339,7 @@ xfs_trans_apply_dquot_deltas(
 	int64_t			totalbdelta;
 	int64_t			totalrtbdelta;
 
-	if (!(tp->t_flags & XFS_TRANS_DQ_DIRTY))
+	if (!tp->t_dqinfo)
 		return;
 
 	ASSERT(tp->t_dqinfo);
@@ -493,7 +481,7 @@ xfs_trans_unreserve_and_mod_dquots(
 	struct xfs_dqtrx	*qtrx, *qa;
 	bool			locked;
 
-	if (!tp->t_dqinfo || !(tp->t_flags & XFS_TRANS_DQ_DIRTY))
+	if (!tp->t_dqinfo)
 		return;
 
 	for (j = 0; j < XFS_QM_TRANS_DQTYPES; j++) {
@@ -698,16 +686,10 @@ xfs_trans_dqresv(
 	 * because we don't have the luxury of a transaction envelope then.
 	 */
 	if (tp) {
-		ASSERT(tp->t_dqinfo);
 		ASSERT(flags & XFS_QMOPT_RESBLK_MASK);
-		if (nblks != 0)
-			xfs_trans_mod_dquot(tp, dqp,
-					    flags & XFS_QMOPT_RESBLK_MASK,
-					    nblks);
-		if (ninos != 0)
-			xfs_trans_mod_dquot(tp, dqp,
-					    XFS_TRANS_DQ_RES_INOS,
-					    ninos);
+		xfs_trans_mod_dquot(tp, dqp, flags & XFS_QMOPT_RESBLK_MASK,
+				    nblks);
+		xfs_trans_mod_dquot(tp, dqp, XFS_TRANS_DQ_RES_INOS, ninos);
 	}
 	ASSERT(dqp->q_blk.reserved >= dqp->q_blk.count);
 	ASSERT(dqp->q_rtb.reserved >= dqp->q_rtb.count);
@@ -752,9 +734,6 @@ xfs_trans_reserve_quota_bydquots(
 	if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp))
 		return 0;
 
-	if (tp && tp->t_dqinfo == NULL)
-		xfs_trans_alloc_dqinfo(tp);
-
 	ASSERT(flags & XFS_QMOPT_RESBLK_MASK);
 
 	if (udqp) {