summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-12-15 18:52:30 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2020-12-15 18:52:30 -0800
commit1a50ede2b3c846761a71c409f53e9121311a13c2 (patch)
tree40f228239b077c2db87580dfd66556ad0ca7046f
parent9867cb1fd510187d8f828540bdb48f78fceb70b3 (diff)
parent716a8bc7f706eeef80ab42c99d9f210eda845c81 (diff)
downloadlinux-1a50ede2b3c846761a71c409f53e9121311a13c2.tar.bz2
Merge tag 'nfsd-5.11' of git://git.linux-nfs.org/projects/cel/cel-2.6
Pull nfsd updates from Chuck Lever: "Several substantial changes this time around: - Previously, exporting an NFS mount via NFSD was considered to be an unsupported feature. With v5.11, the community has attempted to make re-exporting a first-class feature of NFSD. This would enable the Linux in-kernel NFS server to be used as an intermediate cache for a remotely-located primary NFS server, for example, even with other NFS server implementations, like a NetApp filer, as the primary. - A short series of patches brings support for multiple RPC/RDMA data chunks per RPC transaction to the Linux NFS server's RPC/RDMA transport implementation. This is a part of the RPC/RDMA spec that the other premiere NFS/RDMA implementation (Solaris) has had for a very long time, and completes the implementation of RPC/RDMA version 1 in the Linux kernel's NFS server. - Long ago, NFSv4 support was introduced to NFSD using a series of C macros that hid dprintk's and goto's. Over time, the kernel's XDR implementation has been greatly improved, but these C macros have remained and become fallow. A series of patches in this pull request completely replaces those macros with the use of current kernel XDR infrastructure. Benefits include: - More robust input sanitization in NFSD's NFSv4 XDR decoders. - Make it easier to use common kernel library functions that use XDR stream APIs (for example, GSS-API). - Align the structure of the source code with the RFCs so it is easier to learn, verify, and maintain our XDR implementation. - Removal of more than a hundred hidden dprintk() call sites. - Removal of some explicit manipulation of pages to help make the eventual transition to xdr->bvec smoother. - On top of several related fixes in 5.10-rc, there are a few more fixes to get the Linux NFSD implementation of NFSv4.2 inter-server copy up to speed. And as usual, there is a pinch of seasoning in the form of a collection of unrelated minor bug fixes and clean-ups. Many thanks to all who contributed this time around!" * tag 'nfsd-5.11' of git://git.linux-nfs.org/projects/cel/cel-2.6: (131 commits) nfsd: Record NFSv4 pre/post-op attributes as non-atomic nfsd: Set PF_LOCAL_THROTTLE on local filesystems only nfsd: Fix up nfsd to ensure that timeout errors don't result in ESTALE exportfs: Add a function to return the raw output from fh_to_dentry() nfsd: close cached files prior to a REMOVE or RENAME that would replace target nfsd: allow filesystems to opt out of subtree checking nfsd: add a new EXPORT_OP_NOWCC flag to struct export_operations Revert "nfsd4: support change_attr_type attribute" nfsd4: don't query change attribute in v2/v3 case nfsd: minor nfsd4_change_attribute cleanup nfsd: simplify nfsd4_change_info nfsd: only call inode_query_iversion in the I_VERSION case nfs_common: need lock during iterate through the list NFSD: Fix 5 seconds delay when doing inter server copy NFSD: Fix sparse warning in nfs4proc.c SUNRPC: Remove XDRBUF_SPARSE_PAGES flag in gss_proxy upcall sunrpc: clean-up cache downcall nfsd: Fix message level for normal termination NFSD: Remove macros that are no longer used NFSD: Replace READ* macros in nfsd4_decode_compound() ...
-rw-r--r--Documentation/filesystems/nfs/exporting.rst52
-rw-r--r--fs/exportfs/expfs.c32
-rw-r--r--fs/nfs/blocklayout/blocklayout.c2
-rw-r--r--fs/nfs/blocklayout/dev.c2
-rw-r--r--fs/nfs/dir.c2
-rw-r--r--fs/nfs/export.c3
-rw-r--r--fs/nfs/filelayout/filelayout.c2
-rw-r--r--fs/nfs/filelayout/filelayoutdev.c2
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c2
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayoutdev.c2
-rw-r--r--fs/nfs/nfs42xdr.c2
-rw-r--r--fs/nfs/nfs4xdr.c6
-rw-r--r--fs/nfs_common/grace.c6
-rw-r--r--fs/nfsd/export.c6
-rw-r--r--fs/nfsd/filecache.c1
-rw-r--r--fs/nfsd/nfs2acl.c21
-rw-r--r--fs/nfsd/nfs3acl.c8
-rw-r--r--fs/nfsd/nfs3proc.c11
-rw-r--r--fs/nfsd/nfs3xdr.c40
-rw-r--r--fs/nfsd/nfs4proc.c35
-rw-r--r--fs/nfsd/nfs4state.c3
-rw-r--r--fs/nfsd/nfs4xdr.c2551
-rw-r--r--fs/nfsd/nfsd.h9
-rw-r--r--fs/nfsd/nfsfh.c34
-rw-r--r--fs/nfsd/nfsfh.h24
-rw-r--r--fs/nfsd/nfsproc.c25
-rw-r--r--fs/nfsd/nfssvc.c50
-rw-r--r--fs/nfsd/nfsxdr.c16
-rw-r--r--fs/nfsd/trace.c1
-rw-r--r--fs/nfsd/trace.h176
-rw-r--r--fs/nfsd/vfs.c29
-rw-r--r--fs/nfsd/xdr.h2
-rw-r--r--fs/nfsd/xdr3.h2
-rw-r--r--fs/nfsd/xdr4.h43
-rw-r--r--include/linux/exportfs.h13
-rw-r--r--include/linux/iversion.h13
-rw-r--r--include/linux/nfs4.h8
-rw-r--r--include/linux/sunrpc/svc.h22
-rw-r--r--include/linux/sunrpc/svc_rdma.h36
-rw-r--r--include/linux/sunrpc/svc_rdma_pcl.h128
-rw-r--r--include/linux/sunrpc/svc_xprt.h4
-rw-r--r--include/linux/sunrpc/xdr.h91
-rw-r--r--include/trace/events/rpcrdma.h143
-rw-r--r--include/trace/events/sunrpc.h24
-rw-r--r--net/sunrpc/auth_gss/gss_rpc_upcall.c15
-rw-r--r--net/sunrpc/auth_gss/gss_rpc_xdr.c3
-rw-r--r--net/sunrpc/cache.c41
-rw-r--r--net/sunrpc/svc.c16
-rw-r--r--net/sunrpc/svc_xprt.c4
-rw-r--r--net/sunrpc/svcsock.c8
-rw-r--r--net/sunrpc/xdr.c78
-rw-r--r--net/sunrpc/xprtrdma/Makefile2
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_backchannel.c14
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_pcl.c306
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c314
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_rw.c600
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_sendto.c562
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c2
58 files changed, 3462 insertions, 2187 deletions
diff --git a/Documentation/filesystems/nfs/exporting.rst b/Documentation/filesystems/nfs/exporting.rst
index 33d588a01ace..0e98edd353b5 100644
--- a/Documentation/filesystems/nfs/exporting.rst
+++ b/Documentation/filesystems/nfs/exporting.rst
@@ -154,6 +154,11 @@ struct which has the following members:
to find potential names, and matches inode numbers to find the correct
match.
+ flags
+ Some filesystems may need to be handled differently than others. The
+ export_operations struct also includes a flags field that allows the
+ filesystem to communicate such information to nfsd. See the Export
+ Operations Flags section below for more explanation.
A filehandle fragment consists of an array of 1 or more 4byte words,
together with a one byte "type".
@@ -163,3 +168,50 @@ generated by encode_fh, in which case it will have been padded with
nuls. Rather, the encode_fh routine should choose a "type" which
indicates the decode_fh how much of the filehandle is valid, and how
it should be interpreted.
+
+Export Operations Flags
+-----------------------
+In addition to the operation vector pointers, struct export_operations also
+contains a "flags" field that allows the filesystem to communicate to nfsd
+that it may want to do things differently when dealing with it. The
+following flags are defined:
+
+ EXPORT_OP_NOWCC - disable NFSv3 WCC attributes on this filesystem
+ RFC 1813 recommends that servers always send weak cache consistency
+ (WCC) data to the client after each operation. The server should
+ atomically collect attributes about the inode, do an operation on it,
+ and then collect the attributes afterward. This allows the client to
+ skip issuing GETATTRs in some situations but means that the server
+ is calling vfs_getattr for almost all RPCs. On some filesystems
+ (particularly those that are clustered or networked) this is expensive
+ and atomicity is difficult to guarantee. This flag indicates to nfsd
+ that it should skip providing WCC attributes to the client in NFSv3
+ replies when doing operations on this filesystem. Consider enabling
+ this on filesystems that have an expensive ->getattr inode operation,
+ or when atomicity between pre and post operation attribute collection
+ is impossible to guarantee.
+
+ EXPORT_OP_NOSUBTREECHK - disallow subtree checking on this fs
+ Many NFS operations deal with filehandles, which the server must then
+ vet to ensure that they live inside of an exported tree. When the
+ export consists of an entire filesystem, this is trivial. nfsd can just
+ ensure that the filehandle live on the filesystem. When only part of a
+ filesystem is exported however, then nfsd must walk the ancestors of the
+ inode to ensure that it's within an exported subtree. This is an
+ expensive operation and not all filesystems can support it properly.
+ This flag exempts the filesystem from subtree checking and causes
+ exportfs to get back an error if it tries to enable subtree checking
+ on it.
+
+ EXPORT_OP_CLOSE_BEFORE_UNLINK - always close cached files before unlinking
+ On some exportable filesystems (such as NFS) unlinking a file that
+ is still open can cause a fair bit of extra work. For instance,
+ the NFS client will do a "sillyrename" to ensure that the file
+ sticks around while it's still open. When reexporting, that open
+ file is held by nfsd so we usually end up doing a sillyrename, and
+ then immediately deleting the sillyrenamed file just afterward when
+ the link count actually goes to zero. Sometimes this delete can race
+ with other operations (for instance an rmdir of the parent directory).
+ This flag causes nfsd to close any open files for this inode _before_
+ calling into the vfs to do an unlink or a rename that would replace
+ an existing file.
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index 2dd55b172d57..0106eba46d5a 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -417,9 +417,11 @@ int exportfs_encode_fh(struct dentry *dentry, struct fid *fid, int *max_len,
}
EXPORT_SYMBOL_GPL(exportfs_encode_fh);
-struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid,
- int fh_len, int fileid_type,
- int (*acceptable)(void *, struct dentry *), void *context)
+struct dentry *
+exportfs_decode_fh_raw(struct vfsmount *mnt, struct fid *fid, int fh_len,
+ int fileid_type,
+ int (*acceptable)(void *, struct dentry *),
+ void *context)
{
const struct export_operations *nop = mnt->mnt_sb->s_export_op;
struct dentry *result, *alias;
@@ -432,10 +434,8 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid,
if (!nop || !nop->fh_to_dentry)
return ERR_PTR(-ESTALE);
result = nop->fh_to_dentry(mnt->mnt_sb, fid, fh_len, fileid_type);
- if (PTR_ERR(result) == -ENOMEM)
- return ERR_CAST(result);
if (IS_ERR_OR_NULL(result))
- return ERR_PTR(-ESTALE);
+ return result;
/*
* If no acceptance criteria was specified by caller, a disconnected
@@ -561,10 +561,26 @@ struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid,
err_result:
dput(result);
- if (err != -ENOMEM)
- err = -ESTALE;
return ERR_PTR(err);
}
+EXPORT_SYMBOL_GPL(exportfs_decode_fh_raw);
+
+struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid,
+ int fh_len, int fileid_type,
+ int (*acceptable)(void *, struct dentry *),
+ void *context)
+{
+ struct dentry *ret;
+
+ ret = exportfs_decode_fh_raw(mnt, fid, fh_len, fileid_type,
+ acceptable, context);
+ if (IS_ERR_OR_NULL(ret)) {
+ if (ret == ERR_PTR(-ENOMEM))
+ return ret;
+ return ERR_PTR(-ESTALE);
+ }
+ return ret;
+}
EXPORT_SYMBOL_GPL(exportfs_decode_fh);
MODULE_LICENSE("GPL");
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 08108b6d2fa1..3be6836074ae 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -697,7 +697,7 @@ bl_alloc_lseg(struct pnfs_layout_hdr *lo, struct nfs4_layoutget_res *lgr,
xdr_init_decode_pages(&xdr, &buf,
lgr->layoutp->pages, lgr->layoutp->len);
- xdr_set_scratch_buffer(&xdr, page_address(scratch), PAGE_SIZE);
+ xdr_set_scratch_page(&xdr, scratch);
status = -EIO;
p = xdr_inline_decode(&xdr, 4);
diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c
index dec5880ac6de..acb1d22907da 100644
--- a/fs/nfs/blocklayout/dev.c
+++ b/fs/nfs/blocklayout/dev.c
@@ -510,7 +510,7 @@ bl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
goto out;
xdr_init_decode_pages(&xdr, &buf, pdev->pages, pdev->pglen);
- xdr_set_scratch_buffer(&xdr, page_address(scratch), PAGE_SIZE);
+ xdr_set_scratch_page(&xdr, scratch);
p = xdr_inline_decode(&xdr, sizeof(__be32));
if (!p)
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 4e011adaf967..8a24fe20dccf 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -576,7 +576,7 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en
goto out_nopages;
xdr_init_decode_pages(&stream, &buf, xdr_pages, buflen);
- xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
+ xdr_set_scratch_page(&stream, scratch);
do {
if (entry->label)
diff --git a/fs/nfs/export.c b/fs/nfs/export.c
index 3430d6891e89..7412bb164fa7 100644
--- a/fs/nfs/export.c
+++ b/fs/nfs/export.c
@@ -171,4 +171,7 @@ const struct export_operations nfs_export_ops = {
.encode_fh = nfs_encode_fh,
.fh_to_dentry = nfs_fh_to_dentry,
.get_parent = nfs_get_parent,
+ .flags = EXPORT_OP_NOWCC|EXPORT_OP_NOSUBTREECHK|
+ EXPORT_OP_CLOSE_BEFORE_UNLINK|EXPORT_OP_REMOTE_FS|
+ EXPORT_OP_NOATOMIC_ATTR,
};
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index 7f5aa0403e16..d158a500c25c 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -666,7 +666,7 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo,
return -ENOMEM;
xdr_init_decode_pages(&stream, &buf, lgr->layoutp->pages, lgr->layoutp->len);
- xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
+ xdr_set_scratch_page(&stream, scratch);
/* 20 = ufl_util (4), first_stripe_index (4), pattern_offset (8),
* num_fh (4) */
diff --git a/fs/nfs/filelayout/filelayoutdev.c b/fs/nfs/filelayout/filelayoutdev.c
index d913e818858f..86c3f7e69ec4 100644
--- a/fs/nfs/filelayout/filelayoutdev.c
+++ b/fs/nfs/filelayout/filelayoutdev.c
@@ -82,7 +82,7 @@ nfs4_fl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
goto out_err;
xdr_init_decode_pages(&stream, &buf, pdev->pages, pdev->pglen);
- xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
+ xdr_set_scratch_page(&stream, scratch);
/* Get the stripe count (number of stripe index) */
p = xdr_inline_decode(&stream, 4);
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 24bf5797f88a..4252ce633533 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -378,7 +378,7 @@ ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh,
xdr_init_decode_pages(&stream, &buf, lgr->layoutp->pages,
lgr->layoutp->len);
- xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
+ xdr_set_scratch_page(&stream, scratch);
/* stripe unit and mirror_array_cnt */
rc = -EIO;
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
index 3eda40a320a5..c9b61b818ec1 100644
--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
@@ -69,7 +69,7 @@ nfs4_ff_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
INIT_LIST_HEAD(&dsaddrs);
xdr_init_decode_pages(&stream, &buf, pdev->pages, pdev->pglen);
- xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
+ xdr_set_scratch_page(&stream, scratch);
/* multipath count */
p = xdr_inline_decode(&stream, 4);
diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
index 8432bd6b95f0..ea7dd8cbfac9 100644
--- a/fs/nfs/nfs42xdr.c
+++ b/fs/nfs/nfs42xdr.c
@@ -1539,7 +1539,7 @@ static int nfs4_xdr_dec_listxattrs(struct rpc_rqst *rqstp,
struct compound_hdr hdr;
int status;
- xdr_set_scratch_buffer(xdr, page_address(res->scratch), PAGE_SIZE);
+ xdr_set_scratch_page(xdr, res->scratch);
status = decode_compound_hdr(xdr, &hdr);
if (status)
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index c6dbfcae7517..2eabe5add344 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -6403,10 +6403,8 @@ nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
struct compound_hdr hdr;
int status;
- if (res->acl_scratch != NULL) {
- void *p = page_address(res->acl_scratch);
- xdr_set_scratch_buffer(xdr, p, PAGE_SIZE);
- }
+ if (res->acl_scratch != NULL)
+ xdr_set_scratch_page(xdr, res->acl_scratch);
status = decode_compound_hdr(xdr, &hdr);
if (status)
goto out;
diff --git a/fs/nfs_common/grace.c b/fs/nfs_common/grace.c
index b73d9dd37f73..26f2a50eceac 100644
--- a/fs/nfs_common/grace.c
+++ b/fs/nfs_common/grace.c
@@ -69,10 +69,14 @@ __state_in_grace(struct net *net, bool open)
if (!open)
return !list_empty(grace_list);
+ spin_lock(&grace_lock);
list_for_each_entry(lm, grace_list, list) {
- if (lm->block_opens)
+ if (lm->block_opens) {
+ spin_unlock(&grace_lock);
return true;
+ }
}
+ spin_unlock(&grace_lock);
return false;
}
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 21e404e7cb68..81e7bb12aca6 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -408,6 +408,12 @@ static int check_export(struct inode *inode, int *flags, unsigned char *uuid)
return -EINVAL;
}
+ if (inode->i_sb->s_export_op->flags & EXPORT_OP_NOSUBTREECHK &&
+ !(*flags & NFSEXP_NOSUBTREECHECK)) {
+ dprintk("%s: %s does not support subtree checking!\n",
+ __func__, inode->i_sb->s_type->name);
+ return -EINVAL;
+ }
return 0;
}
diff --git a/fs/nfsd/filecache.c b/fs/nfsd/filecache.c
index 3c6c2f7d1688..d77c624c61f6 100644
--- a/fs/nfsd/filecache.c
+++ b/fs/nfsd/filecache.c
@@ -685,6 +685,7 @@ nfsd_file_cache_init(void)
if (IS_ERR(nfsd_file_fsnotify_group)) {
pr_err("nfsd: unable to create fsnotify group: %ld\n",
PTR_ERR(nfsd_file_fsnotify_group));
+ ret = PTR_ERR(nfsd_file_fsnotify_group);
nfsd_file_fsnotify_group = NULL;
goto out_notifier;
}
diff --git a/fs/nfsd/nfs2acl.c b/fs/nfsd/nfs2acl.c
index 6a900f770dd2..b0f66604532a 100644
--- a/fs/nfsd/nfs2acl.c
+++ b/fs/nfsd/nfs2acl.c
@@ -185,10 +185,6 @@ out:
/*
* XDR decode functions
*/
-static int nfsaclsvc_decode_voidarg(struct svc_rqst *rqstp, __be32 *p)
-{
- return 1;
-}
static int nfsaclsvc_decode_getaclargs(struct svc_rqst *rqstp, __be32 *p)
{
@@ -255,15 +251,6 @@ static int nfsaclsvc_decode_accessargs(struct svc_rqst *rqstp, __be32 *p)
* XDR encode functions
*/
-/*
- * There must be an encoding function for void results so svc_process
- * will work properly.
- */
-static int nfsaclsvc_encode_voidres(struct svc_rqst *rqstp, __be32 *p)
-{
- return xdr_ressize_check(rqstp, p);
-}
-
/* GETACL */
static int nfsaclsvc_encode_getaclres(struct svc_rqst *rqstp, __be32 *p)
{
@@ -378,10 +365,10 @@ struct nfsd3_voidargs { int dummy; };
static const struct svc_procedure nfsd_acl_procedures2[5] = {
[ACLPROC2_NULL] = {
.pc_func = nfsacld_proc_null,
- .pc_decode = nfsaclsvc_decode_voidarg,
- .pc_encode = nfsaclsvc_encode_voidres,
- .pc_argsize = sizeof(struct nfsd3_voidargs),
- .pc_ressize = sizeof(struct nfsd3_voidargs),
+ .pc_decode = nfssvc_decode_voidarg,
+ .pc_encode = nfssvc_encode_voidres,
+ .pc_argsize = sizeof(struct nfsd_voidargs),
+ .pc_ressize = sizeof(struct nfsd_voidres),
.pc_cachetype = RC_NOCACHE,
.pc_xdrressize = ST,
},
diff --git a/fs/nfsd/nfs3acl.c b/fs/nfsd/nfs3acl.c
index 34a394e50e1d..7c30876a31a1 100644
--- a/fs/nfsd/nfs3acl.c
+++ b/fs/nfsd/nfs3acl.c
@@ -245,10 +245,10 @@ struct nfsd3_voidargs { int dummy; };
static const struct svc_procedure nfsd_acl_procedures3[3] = {
[ACLPROC3_NULL] = {
.pc_func = nfsd3_proc_null,
- .pc_decode = nfs3svc_decode_voidarg,
- .pc_encode = nfs3svc_encode_voidres,
- .pc_argsize = sizeof(struct nfsd3_voidargs),
- .pc_ressize = sizeof(struct nfsd3_voidargs),
+ .pc_decode = nfssvc_decode_voidarg,
+ .pc_encode = nfssvc_encode_voidres,
+ .pc_argsize = sizeof(struct nfsd_voidargs),
+ .pc_ressize = sizeof(struct nfsd_voidres),
.pc_cachetype = RC_NOCACHE,
.pc_xdrressize = ST,
},
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index a633044b0dc1..76931f4f57c3 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -689,12 +689,9 @@ out:
#define nfsd3_mkdirargs nfsd3_createargs
#define nfsd3_readdirplusargs nfsd3_readdirargs
#define nfsd3_fhandleargs nfsd_fhandle
-#define nfsd3_fhandleres nfsd3_attrstat
#define nfsd3_attrstatres nfsd3_attrstat
#define nfsd3_wccstatres nfsd3_attrstat
#define nfsd3_createres nfsd3_diropres
-#define nfsd3_voidres nfsd3_voidargs
-struct nfsd3_voidargs { int dummy; };
#define ST 1 /* status*/
#define FH 17 /* filehandle with length */
@@ -705,10 +702,10 @@ struct nfsd3_voidargs { int dummy; };
static const struct svc_procedure nfsd_procedures3[22] = {
[NFS3PROC_NULL] = {
.pc_func = nfsd3_proc_null,
- .pc_decode = nfs3svc_decode_voidarg,
- .pc_encode = nfs3svc_encode_voidres,
- .pc_argsize = sizeof(struct nfsd3_voidargs),
- .pc_ressize = sizeof(struct nfsd3_voidres),
+ .pc_decode = nfssvc_decode_voidarg,
+ .pc_encode = nfssvc_encode_voidres,
+ .pc_argsize = sizeof(struct nfsd_voidargs),
+ .pc_ressize = sizeof(struct nfsd_voidres),
.pc_cachetype = RC_NOCACHE,
.pc_xdrressize = ST,
},
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 2277f83da250..821db21ba072 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -206,7 +206,7 @@ static __be32 *
encode_post_op_attr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *fhp)
{
struct dentry *dentry = fhp->fh_dentry;
- if (dentry && d_really_is_positive(dentry)) {
+ if (!fhp->fh_no_wcc && dentry && d_really_is_positive(dentry)) {
__be32 err;
struct kstat stat;
@@ -259,11 +259,11 @@ void fill_pre_wcc(struct svc_fh *fhp)
{
struct inode *inode;
struct kstat stat;
+ bool v4 = (fhp->fh_maxsize == NFS4_FHSIZE);
__be32 err;
- if (fhp->fh_pre_saved)
+ if (fhp->fh_no_wcc || fhp->fh_pre_saved)
return;
-
inode = d_inode(fhp->fh_dentry);
err = fh_getattr(fhp, &stat);
if (err) {
@@ -272,11 +272,12 @@ void fill_pre_wcc(struct svc_fh *fhp)
stat.ctime = inode->i_ctime;
stat.size = inode->i_size;
}
+ if (v4)
+ fhp->fh_pre_change = nfsd4_change_attribute(&stat, inode);
fhp->fh_pre_mtime = stat.mtime;
fhp->fh_pre_ctime = stat.ctime;
fhp->fh_pre_size = stat.size;
- fhp->fh_pre_change = nfsd4_change_attribute(&stat, inode);
fhp->fh_pre_saved = true;
}
@@ -285,30 +286,30 @@ void fill_pre_wcc(struct svc_fh *fhp)
*/
void fill_post_wcc(struct svc_fh *fhp)
{
+ bool v4 = (fhp->fh_maxsize == NFS4_FHSIZE);
+ struct inode *inode = d_inode(fhp->fh_dentry);
__be32 err;
+ if (fhp->fh_no_wcc)
+ return;
+
if (fhp->fh_post_saved)
printk("nfsd: inode locked twice during operation.\n");
err = fh_getattr(fhp, &fhp->fh_post_attr);
- fhp->fh_post_change = nfsd4_change_attribute(&fhp->fh_post_attr,
- d_inode(fhp->fh_dentry));
if (err) {
fhp->fh_post_saved = false;
- /* Grab the ctime anyway - set_change_info might use it */
- fhp->fh_post_attr.ctime = d_inode(fhp->fh_dentry)->i_ctime;
+ fhp->fh_post_attr.ctime = inode->i_ctime;
} else
fhp->fh_post_saved = true;
+ if (v4)
+ fhp->fh_post_change =
+ nfsd4_change_attribute(&fhp->fh_post_attr, inode);
}
/*
* XDR decode functions
*/
-int
-nfs3svc_decode_voidarg(struct svc_rqst *rqstp, __be32 *p)
-{
- return 1;
-}
int
nfs3svc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p)
@@ -642,12 +643,6 @@ nfs3svc_decode_commitargs(struct svc_rqst *rqstp, __be32 *p)
* XDR encode functions
*/
-int
-nfs3svc_encode_voidres(struct svc_rqst *rqstp, __be32 *p)
-{
- return xdr_ressize_check(rqstp, p);
-}
-
/* GETATTR */
int
nfs3svc_encode_attrstat(struct svc_rqst *rqstp, __be32 *p)
@@ -707,6 +702,7 @@ int
nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p)
{
struct nfsd3_readlinkres *resp = rqstp->rq_resp;
+ struct kvec *head = rqstp->rq_res.head;
*p++ = resp->status;
p = encode_post_op_attr(rqstp, p, &resp->fh);
@@ -720,6 +716,8 @@ nfs3svc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p)
*p = 0;
rqstp->rq_res.tail[0].iov_len = 4 - (resp->len&3);
}
+ if (svc_encode_result_payload(rqstp, head->iov_len, resp->len))
+ return 0;
return 1;
} else
return xdr_ressize_check(rqstp, p);
@@ -730,6 +728,7 @@ int
nfs3svc_encode_readres(struct svc_rqst *rqstp, __be32 *p)
{
struct nfsd3_readres *resp = rqstp->rq_resp;
+ struct kvec *head = rqstp->rq_res.head;
*p++ = resp->status;
p = encode_post_op_attr(rqstp, p, &resp->fh);
@@ -746,6 +745,9 @@ nfs3svc_encode_readres(struct svc_rqst *rqstp, __be32 *p)
*p = 0;
rqstp->rq_res.tail[0].iov_len = 4 - (resp->count & 3);
}
+ if (svc_encode_result_payload(rqstp, head->iov_len,
+ resp->count))
+ return 0;
return 1;
} else
return xdr_ressize_check(rqstp, p);
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index e83b21778816..4727b7f03c5b 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -257,8 +257,8 @@ do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, stru
* in NFSv4 as in v3 except EXCLUSIVE4_1.
*/
current->fs->umask = open->op_umask;
- status = do_nfsd_create(rqstp, current_fh, open->op_fname.data,
- open->op_fname.len, &open->op_iattr,
+ status = do_nfsd_create(rqstp, current_fh, open->op_fname,
+ open->op_fnamelen, &open->op_iattr,
*resfh, open->op_createmode,
(u32 *)open->op_verf.data,
&open->op_truncate, &open->op_created);
@@ -283,7 +283,7 @@ do_open_lookup(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, stru
* a chance to an acquire a delegation if appropriate.
*/
status = nfsd_lookup(rqstp, current_fh,
- open->op_fname.data, open->op_fname.len, *resfh);
+ open->op_fname, open->op_fnamelen, *resfh);
if (status)
goto out;
status = nfsd_check_obj_isreg(*resfh);
@@ -360,7 +360,7 @@ nfsd4_open(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
bool reclaim = false;
dprintk("NFSD: nfsd4_open filename %.*s op_openowner %p\n",
- (int)open->op_fname.len, open->op_fname.data,
+ (int)open->op_fnamelen, open->op_fname,
open->op_openowner);
/* This check required by spec. */
@@ -1023,8 +1023,8 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
write->wr_how_written = write->wr_stable_how;
- nvecs = svc_fill_write_vector(rqstp, write->wr_pagelist,
- &write->wr_head, write->wr_buflen);
+ nvecs = svc_fill_write_vector(rqstp, write->wr_payload.pages,
+ write->wr_payload.head, write->wr_buflen);
WARN_ON_ONCE(nvecs > ARRAY_SIZE(rqstp->rq_vec));
status = nfsd_vfs_write(rqstp, &cstate->current_fh, nf,
@@ -1425,7 +1425,7 @@ static __be32 nfsd4_do_copy(struct nfsd4_copy *copy, bool sync)
return status;
}
-static int dup_copy_fields(struct nfsd4_copy *src, struct nfsd4_copy *dst)
+static void dup_copy_fields(struct nfsd4_copy *src, struct nfsd4_copy *dst)
{
dst->cp_src_pos = src->cp_src_pos;
dst->cp_dst_pos = src->cp_dst_pos;
@@ -1444,8 +1444,6 @@ static int dup_copy_fields(struct nfsd4_copy *src, struct nfsd4_copy *dst)
memcpy(&dst->stateid, &src->stateid, sizeof(src->stateid));
memcpy(&dst->c_fh, &src->c_fh, sizeof(src->c_fh));
dst->ss_mnt = src->ss_mnt;
-
- return 0;
}
static void cleanup_async_copy(struct nfsd4_copy *copy)
@@ -1539,9 +1537,7 @@ nfsd4_copy(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
refcount_set(&async_copy->refcount, 1);
memcpy(&copy->cp_res.cb_stateid, &copy->cp_stateid,
sizeof(copy->cp_stateid));
- status = dup_copy_fields(copy, async_copy);
- if (status)
- goto out_err;
+ dup_copy_fields(copy, async_copy);
async_copy->copy_task = kthread_create(nfsd4_do_async_copy,
async_copy, "%s", "copy thread");
if (IS_ERR(async_copy->copy_task))
@@ -2276,7 +2272,7 @@ static void svcxdr_init_encode(struct svc_rqst *rqstp,
xdr->end = head->iov_base + PAGE_SIZE - rqstp->rq_auth_slack;
/* Tail and page_len should be zero at this point: */
buf->len = buf->head[0].iov_len;
- xdr->scratch.iov_len = 0;
+ xdr_reset_scratch_buffer(xdr);
xdr->page_ptr = buf->pages - 1;
buf->buflen = PAGE_SIZE * (1 + rqstp->rq_page_end - buf->pages)
- rqstp->rq_auth_slack;
@@ -3282,7 +3278,7 @@ int nfsd4_max_reply(struct svc_rqst *rqstp, struct nfsd4_op *op)
void warn_on_nonidempotent_op(struct nfsd4_op *op)
{
if (OPDESC(op)->op_flags & OP_MODIFIES_SOMETHING) {
- pr_err("unable to encode reply to nonidempotent op %d (%s)\n",
+ pr_err("unable to encode reply to nonidempotent op %u (%s)\n",
op->opnum, nfsd4_op_name(op->opnum));
WARN_ON_ONCE(1);
}
@@ -3295,16 +3291,13 @@ static const char *nfsd4_op_name(unsigned opnum)
return "unknown_operation";
}
-#define nfsd4_voidres nfsd4_voidargs
-struct nfsd4_voidargs { int dummy; };
-
static const struct svc_procedure nfsd_procedures4[2] = {
[NFSPROC4_NULL] = {
.pc_func = nfsd4_proc_null,
- .pc_decode = nfs4svc_decode_voidarg,
- .pc_encode = nfs4svc_encode_voidres,
- .pc_argsize = sizeof(struct nfsd4_voidargs),
- .pc_ressize = sizeof(struct nfsd4_voidres),
+ .pc_decode = nfssvc_decode_voidarg,
+ .pc_encode = nfssvc_encode_voidres,
+ .pc_argsize = sizeof(struct nfsd_voidargs),
+ .pc_ressize = sizeof(struct nfsd_voidres),
.pc_cachetype = RC_NOCACHE,
.pc_xdrressize = 1,
},
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index d7f27ed6b794..1d2cd6a88f61 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -769,6 +769,7 @@ static int nfs4_init_cp_state(struct nfsd_net *nn, copy_stateid_t *stid,
spin_lock(&nn->s2s_cp_lock);
new_id = idr_alloc_cyclic(&nn->s2s_cp_stateids, stid, 0, 0, GFP_NOWAIT);
stid->stid.si_opaque.so_id = new_id;
+ stid->stid.si_generation = 1;
spin_unlock(&nn->s2s_cp_lock);
idr_preload_end();
if (new_id < 0)
@@ -3066,7 +3067,7 @@ nfsd4_exchange_id(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
rpc_ntop(sa, addr_str, sizeof(addr_str));
dprintk("%s rqstp=%p exid=%p clname.len=%u clname.data=%p "
- "ip_addr=%s flags %x, spa_how %d\n",
+ "ip_addr=%s flags %x, spa_how %u\n",
__func__, rqstp, exid, exid->clname.len, exid->clname.data,
addr_str, exid->flags, exid->spa_how);
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 833a2c64dfe8..45ee6b12ce5b 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -54,6 +54,8 @@
#include "pnfs.h"
#include "filecache.h"
+#include "trace.h"
+
#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
#include <linux/security.h>
#endif
@@ -90,6 +92,8 @@ check_filename(char *str, int len)
if (len == 0)
return nfserr_inval;
+ if (len > NFS4_MAXNAMLEN)
+ return nfserr_nametoolong;
if (isdotent(str, len))
return nfserr_badname;
for (i = 0; i < len; i++)
@@ -98,122 +102,6 @@ check_filename(char *str, int len)
return 0;
}
-#define DECODE_HEAD \
- __be32 *p; \
- __be32 status
-#define DECODE_TAIL \
- status = 0; \
-out: \
- return status; \
-xdr_error: \
- dprintk("NFSD: xdr error (%s:%d)\n", \
- __FILE__, __LINE__); \
- status = nfserr_bad_xdr; \
- goto out
-
-#define READMEM(x,nbytes) do { \
- x = (char *)p; \
- p += XDR_QUADLEN(nbytes); \
-} while (0)
-#define SAVEMEM(x,nbytes) do { \
- if (!(x = (p==argp->tmp || p == argp->tmpp) ? \
- savemem(argp, p, nbytes) : \
- (char *)p)) { \
- dprintk("NFSD: xdr error (%s:%d)\n", \
- __FILE__, __LINE__); \
- goto xdr_error; \
- } \
- p += XDR_QUADLEN(nbytes); \
-} while (0)
-#define COPYMEM(x,nbytes) do { \
- memcpy((x), p, nbytes); \
- p += XDR_QUADLEN(nbytes); \
-} while (0)
-
-/* READ_BUF, read_buf(): nbytes must be <= PAGE_SIZE */
-#define READ_BUF(nbytes) do { \
- if (nbytes <= (u32)((char *)argp->end - (char *)argp->p)) { \
- p = argp->p; \
- argp->p += XDR_QUADLEN(nbytes); \
- } else if (!(p = read_buf(argp, nbytes))) { \
- dprintk("NFSD: xdr error (%s:%d)\n", \
- __FILE__, __LINE__); \
- goto xdr_error; \
- } \
-} while (0)
-
-static void next_decode_page(struct nfsd4_compoundargs *argp)
-{
- argp->p = page_address(argp->pagelist[0]);
- argp->pagelist++;
- if (argp->pagelen < PAGE_SIZE) {
- argp->end = argp->p + XDR_QUADLEN(argp->pagelen);
- argp->pagelen = 0;
- } else {
- argp->end = argp->p + (PAGE_SIZE>>2);
- argp->pagelen -= PAGE_SIZE;
- }
-}
-
-static __be32 *read_buf(struct nfsd4_compoundargs *argp, u32 nbytes)
-{
- /* We want more bytes than seem to be available.
- * Maybe we need a new page, maybe we have just run out
- */
- unsigned int avail = (char *)argp->end - (char *)argp->p;
- __be32 *p;
-
- if (argp->pagelen == 0) {
- struct kvec *vec = &argp->rqstp->rq_arg.tail[0];
-
- if (!argp->tail) {
- argp->tail = true;
- avail = vec->iov_len;
- argp->p = vec->iov_base;
- argp->end = vec->iov_base + avail;
- }
-
- if (avail < nbytes)
- return NULL;
-
- p = argp->p;
- argp->p += XDR_QUADLEN(nbytes);
- return p;
- }
-
- if (avail + argp->pagelen < nbytes)
- return NULL;
- if (avail + PAGE_SIZE < nbytes) /* need more than a page !! */
- return NULL;
- /* ok, we can do it with the current plus the next page */
- if (nbytes <= sizeof(argp->tmp))
- p = argp->tmp;
- else {
- kfree(argp->tmpp);
- p = argp->tmpp = kmalloc(nbytes, GFP_KERNEL);
- if (!p)
- return NULL;
-
- }
- /*
- * The following memcpy is safe because read_buf is always
- * called with nbytes > avail, and the two cases above both
- * guarantee p points to at least nbytes bytes.
- */
- memcpy(p, argp->p, avail);
- next_decode_page(argp);
- memcpy(((char*)p)+avail, argp->p, (nbytes - avail));
- argp->p += XDR_QUADLEN(nbytes - avail);
- return p;
-}
-
-static unsigned int compoundargs_bytes_left(struct nfsd4_compoundargs *argp)
-{
- unsigned int this = (char *)argp->end - (char *)argp->p;
-
- return this + argp->pagelen;
-}
-
static int zero_clientid(clientid_t *clid)
{
return (clid->cl_boot == 0) && (clid->cl_id == 0);
@@ -259,118 +147,243 @@ svcxdr_dupstr(struct nfsd4_compoundargs *argp, void *buf, u32 len)
return p;
}
+/*
+ * NFSv4 basic data type decoders
+ */
+
+/*
+ * This helper handles variable-length opaques which belong to protocol
+ * elements that this implementation does not support.
+ */
static __be32
-svcxdr_construct_vector(struct nfsd4_compoundargs *argp, struct kvec *head,
- struct page ***pagelist, u32 buflen)
+nfsd4_decode_ignored_string(struct nfsd4_compoundargs *argp, u32 maxlen)
{
- int avail;
- int len;
- int pages;
+ u32 len;
- /* Sorry .. no magic macros for this.. *
- * READ_BUF(write->wr_buflen);
- * SAVEMEM(write->wr_buf, write->wr_buflen);
- */
- avail = (char *)argp->end - (char *)argp->p;
- if (avail + argp->pagelen < buflen) {
- dprintk("NFSD: xdr error (%s:%d)\n",
- __FILE__, __LINE__);
+ if (xdr_stream_decode_u32(argp->xdr, &len) < 0)
+ return nfserr_bad_xdr;
+ if (maxlen && len > maxlen)
+ return nfserr_bad_xdr;
+ if (!xdr_inline_decode(argp->xdr, len))
return nfserr_bad_xdr;
- }
- head->iov_base = argp->p;
- head->iov_len = avail;
- *pagelist = argp->pagelist;
- len = XDR_QUADLEN(buflen) << 2;
- if (len >= avail) {
- len -= avail;
+ return nfs_ok;
+}
- pages = len >> PAGE_SHIFT;
- argp->pagelist += pages;
- argp->pagelen -= pages * PAGE_SIZE;
- len -= pages * PAGE_SIZE;
+static __be32
+nfsd4_decode_opaque(struct nfsd4_compoundargs *argp, struct xdr_netobj *o)
+{
+ __be32 *p;
+ u32 len;
- next_decode_page(argp);
- }
- argp->p += XDR_QUADLEN(len);
+ if (xdr_stream_decode_u32(argp->xdr, &len) < 0)
+ return nfserr_bad_xdr;
+ if (len == 0 || len > NFS4_OPAQUE_LIMIT)
+ return nfserr_bad_xdr;
+ p = xdr_inline_decode(argp->xdr, len);
+ if (!p)
+ return nfserr_bad_xdr;
+ o->data = svcxdr_tmpalloc(argp, len);
+ if (!o->data)
+ return nfserr_jukebox;
+ o->len = len;
+ memcpy(o->data, p, len);
- return 0;
+ return nfs_ok;
}
-/**
- * savemem - duplicate a chunk of memory for later processing
- * @argp: NFSv4 compound argument structure to be freed with
- * @p: pointer to be duplicated
- * @nbytes: length to be duplicated
- *
- * Returns a pointer to a copy of @nbytes bytes of memory at @p
- * that are preserved until processing of the NFSv4 compound
- * operation described by @argp finishes.
- */
-static char *savemem(struct nfsd4_compoundargs *argp, __be32 *p, int nbytes)
+static __be32
+nfsd4_decode_component4(struct nfsd4_compoundargs *argp, char **namp, u32 *lenp)
{
- void *ret;
+ __be32 *p, status;
- ret = svcxdr_tmpalloc(argp, nbytes);
- if (!ret)
- return NULL;
- memcpy(ret, p, nbytes);
- return ret;
+ if (xdr_stream_decode_u32(argp->xdr, lenp) < 0)
+ return nfserr_bad_xdr;
+ p = xdr_inline_decode(argp->xdr, *lenp);
+ if (!p)
+ return nfserr_bad_xdr;
+ status = check_filename((char *)p, *lenp);
+ if (status)
+ return status;
+ *namp = svcxdr_tmpalloc(argp, *lenp);
+ if (!*namp)
+ return nfserr_jukebox;
+ memcpy(*namp, p, *lenp);
+
+ return nfs_ok;
}
static __be32
-nfsd4_decode_time(struct nfsd4_compoundargs *argp, struct timespec64 *tv)
+nfsd4_decode_nfstime4(struct nfsd4_compoundargs *argp, struct timespec64 *tv)
{
- DECODE_HEAD;
+ __be32 *p;
- READ_BUF(12);
+ p = xdr_inline_decode(argp->xdr, XDR_UNIT * 3);
+ if (!p)
+ return nfserr_bad_xdr;
p = xdr_decode_hyper(p, &tv->tv_sec);
tv->tv_nsec = be32_to_cpup(p++);
if (tv->tv_nsec >= (u32)1000000000)
return nfserr_inval;
+ return nfs_ok;
+}
- DECODE_TAIL;
+static __be32
+nfsd4_decode_verifier4(struct nfsd4_compoundargs *argp, nfs4_verifier *verf)
+{
+ __be32 *p;
+
+ p = xdr_inline_decode(argp->xdr, NFS4_VERIFIER_SIZE);
+ if (!p)
+ return nfserr_bad_xdr;
+ memcpy(verf->data, p, sizeof(verf->data));
+ return nfs_ok;
}
+/**
+ * nfsd4_decode_bitmap4 - Decode an NFSv4 bitmap4
+ * @argp: NFSv4 compound argument structure
+ * @bmval: pointer to an array of u32's to decode into
+ * @bmlen: size of the @bmval array
+ *
+ * The server needs to return nfs_ok rather than nfserr_bad_xdr when
+ * encountering bitmaps containing bits it does not recognize. This
+ * includes bits in bitmap words past WORDn, where WORDn is the last
+ * bitmap WORD the implementation currently supports. Thus we are
+ * careful here to simply ignore bits in bitmap words that this
+ * implementation has yet to support explicitly.
+ *
+ * Return values:
+ * %nfs_ok: @bmval populated successfully
+ * %nfserr_bad_xdr: the encoded bitmap was invalid
+ */
static __be32
-nfsd4_decode_bitmap(struct nfsd4_compoundargs *argp, u32 *bmval)
+nfsd4_decode_bitmap4(struct nfsd4_compoundargs *argp, u32 *bmval, u32 bmlen)
{
- u32 bmlen;
- DECODE_HEAD;
+ u32 i, count;
+ __be32 *p;
+
+ if (xdr_stream_decode_u32(argp->xdr, &count) < 0)
+ return nfserr_bad_xdr;
+ /* request sanity */
+ if (count > 1000)
+ return nfserr_bad_xdr;
+ p = xdr_inline_decode(argp->xdr, count << 2);
+ if (!p)
+ return nfserr_bad_xdr;
+ i = 0;
+ while (i < count)
+ bmval[i++] = be32_to_cpup(p++);
+ while (i < bmlen)
+ bmval[i++] = 0;
+
+ return nfs_ok;
+}
+
+static __be32
+nfsd4_decode_nfsace4(struct nfsd4_compoundargs *argp, struct nfs4_ace *ace)
+{
+ __be32 *p, status;
+ u32 length;
+
+ if (xdr_stream_decode_u32(argp->xdr, &ace->type) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u32(argp->xdr, &ace->flag) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u32(argp->xdr, &ace->access_mask) < 0)
+ return nfserr_bad_xdr;
+
+ if (xdr_stream_decode_u32(argp->xdr, &length) < 0)
+ return nfserr_bad_xdr;
+ p = xdr_inline_decode(argp->xdr, length);
+ if (!p)
+ return nfserr_bad_xdr;
+ ace->whotype = nfs4_acl_get_whotype((char *)p, length);
+ if (ace->whotype != NFS4_ACL_WHO_NAMED)
+ status = nfs_ok;
+ else if (ace->flag & NFS4_ACE_IDENTIFIER_GROUP)
+ status = nfsd_map_name_to_gid(argp->rqstp,
+ (char *)p, length, &ace->who_gid);
+ else
+ status = nfsd_map_name_to_uid(argp->rqstp,
+ (char *)p, length, &ace->who_uid);
+
+ return status;
+}
+
+/* A counted array of nfsace4's */
+static noinline __be32
+nfsd4_decode_acl(struct nfsd4_compoundargs *argp, struct nfs4_acl **acl)
+{
+ struct nfs4_ace *ace;
+ __be32 status;
+ u32 count;
+
+ if (xdr_stream_decode_u32(argp->xdr, &count) < 0)
+ return nfserr_bad_xdr;
+
+ if (count > xdr_stream_remaining(argp->xdr) / 20)
+ /*
+ * Even with 4-byte names there wouldn't be
+ * space for that many aces; something fishy is
+ * going on:
+ */
+ return nfserr_fbig;
+
+ *acl = svcxdr_tmpalloc(argp, nfs4_acl_bytes(count));
+ if (*acl == NULL)
+ return nfserr_jukebox;
+
+ (*acl)->naces = count;
+ for (ace = (*acl)->aces; ace < (*acl)->aces + count; ace++) {
+ status = nfsd4_decode_nfsace4(argp, ace);
+ if (status)
+ return status;
+ }
+
+ return nfs_ok;
+}
- bmval[0] = 0;
- bmval[1] = 0;
- bmval[2] = 0;
+static noinline __be32
+nfsd4_decode_security_label(struct nfsd4_compoundargs *argp,
+ struct xdr_netobj *label)
+{
+ u32 lfs, pi, length;
+ __be32 *p;
- READ_BUF(4);
- bmlen = be32_to_cpup(p++);
- if (bmlen > 1000)
- goto xdr_error;
+ if (xdr_stream_decode_u32(argp->xdr, &lfs) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u32(argp->xdr, &pi) < 0)
+ return nfserr_bad_xdr;
- READ_BUF(bmlen << 2);
- if (bmlen > 0)
- bmval[0] = be32_to_cpup(p++);
- if (bmlen > 1)
- bmval[1] = be32_to_cpup(p++);
- if (bmlen > 2)
- bmval[2] = be32_to_cpup(p++);
+ if (xdr_stream_decode_u32(argp->xdr, &length) < 0)
+ return nfserr_bad_xdr;
+ if (length > NFS4_MAXLABELLEN)
+ return nfserr_badlabel;
+ p = xdr_inline_decode(argp->xdr, length);
+ if (!p)
+ return nfserr_bad_xdr;
+ label->len = length;
+ label->data = svcxdr_dupstr(argp, p, length);
+ if (!label->data)
+ return nfserr_jukebox;
- DECODE_TAIL;
+ return nfs_ok;
}
static __be32
-nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
- struct iattr *iattr, struct nfs4_acl **acl,
- struct xdr_netobj *label, int *umask)
+nfsd4_decode_fattr4(struct nfsd4_compoundargs *argp, u32 *bmval, u32 bmlen,
+ struct iattr *iattr, struct nfs4_acl **acl,
+ struct xdr_netobj *label, int *umask)
{
- int expected_len, len = 0;
- u32 dummy32;
- char *buf;
+ unsigned int starting_pos;
+ u32 attrlist4_count;
+ __be32 *p, status;
- DECODE_HEAD;
iattr->ia_valid = 0;
- if ((status = nfsd4_decode_bitmap(argp, bmval)))
- return status;
+ status = nfsd4_decode_bitmap4(argp, bmval, bmlen);
+ if (status)
+ return nfserr_bad_xdr;
if (bmval[0] & ~NFSD_WRITEABLE_ATTRS_WORD0
|| bmval[1] & ~NFSD_WRITEABLE_ATTRS_WORD1
@@ -380,96 +393,69 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
return nfserr_attrnotsupp;
}
- READ_BUF(4);
- expected_len = be32_to_cpup(p++);
+ if (xdr_stream_decode_u32(argp->xdr, &attrlist4_count) < 0)
+ return nfserr_bad_xdr;
+ starting_pos = xdr_stream_pos(argp->xdr);
if (bmval[0] & FATTR4_WORD0_SIZE) {
- READ_BUF(8);
- len += 8;
- p = xdr_decode_hyper(p, &iattr->ia_size);
+ u64 size;
+
+ if (xdr_stream_decode_u64(argp->xdr, &size) < 0)
+ return nfserr_bad_xdr;
+ iattr->ia_size = size;
iattr->ia_valid |= ATTR_SIZE;
}
if (bmval[0] & FATTR4_WORD0_ACL) {
- u32 nace;
- struct nfs4_ace *ace;
-
- READ_BUF(4); len += 4;
- nace = be32_to_cpup(p++);
-
- if (nace > compoundargs_bytes_left(argp)/20)
- /*
- * Even with 4-byte names there wouldn't be
- * space for that many aces; something fishy is
- * going on:
- */
- return nfserr_fbig;
-
- *acl = svcxdr_tmpalloc(argp, nfs4_acl_bytes(nace));
- if (*acl == NULL)
- return nfserr_jukebox;
-
- (*acl)->naces = nace;
- for (ace = (*acl)->aces; ace < (*acl)->aces + nace; ace++) {
- READ_BUF(16); len += 16;
- ace->type = be32_to_cpup(p++);
- ace->flag = be32_to_cpup(p++);
- ace->access_mask = be32_to_cpup(p++);
- dummy32 = be32_to_cpup(p++);
- READ_BUF(dummy32);
- len += XDR_QUADLEN(dummy32) << 2;
- READMEM(buf, dummy32);
- ace->whotype = nfs4_acl_get_whotype(buf, dummy32);
- status = nfs_ok;
- if (ace->whotype != NFS4_ACL_WHO_NAMED)
- ;
- else if (ace->flag & NFS4_ACE_IDENTIFIER_GROUP)
- status = nfsd_map_name_to_gid(argp->rqstp,
- buf, dummy32, &ace->who_gid);
- else
- status = nfsd_map_name_to_uid(argp->rqstp,
- buf, dummy32, &ace->who_uid);
- if (status)
- return status;
- }
+ status = nfsd4_decode_acl(argp, acl);
+ if (status)
+ return status;
} else
*acl = NULL;
if (bmval[1] & FATTR4_WORD1_MODE) {
- READ_BUF(4);
- len += 4;
- iattr->ia_mode = be32_to_cpup(p++);
+ u32 mode;
+
+ if (xdr_stream_decode_u32(argp->xdr, &mode) < 0)
+ return nfserr_bad_xdr;
+ iattr->ia_mode = mode;
iattr->ia_mode &= (S_IFMT | S_IALLUGO);
iattr->ia_valid |= ATTR_MODE;
}
if (bmval[1] & FATTR4_WORD1_OWNER) {
- READ_BUF(4);
- len += 4;
- dummy32 = be32_to_cpup(p++);
- READ_BUF(dummy32);
- len += (XDR_QUADLEN(dummy32) << 2);
- READMEM(buf, dummy32);
- if ((status = nfsd_map_name_to_uid(argp->rqstp, buf, dummy32, &iattr->ia_uid)))
+ u32 length;
+
+ if (xdr_stream_decode_u32(argp->xdr, &length) < 0)
+ return nfserr_bad_xdr;
+ p = xdr_inline_decode(argp->xdr, length);
+ if (!p)
+ return nfserr_bad_xdr;
+ status = nfsd_map_name_to_uid(argp->rqstp, (char *)p, length,
+ &iattr->ia_uid);
+ if (status)
return status;
iattr->ia_valid |= ATTR_UID;
}
if (bmval[1] & FATTR4_WORD1_OWNER_GROUP) {
- READ_BUF(4);
- len += 4;
- dummy32 = be32_to_cpup(p++);
- READ_BUF(dummy32);
- len += (XDR_QUADLEN(dummy32) << 2);
- READMEM(buf, dummy32);
- if ((status = nfsd_map_name_to_gid(argp->rqstp, buf, dummy32, &iattr->ia_gid)))
+ u32 length;
+
+ if (xdr_stream_decode_u32(argp->xdr, &length) < 0)
+ return nfserr_bad_xdr;
+ p = xdr_inline_decode(argp->xdr, length);
+ if (!p)
+ return nfserr_bad_xdr;
+ status = nfsd_map_name_to_gid(argp->rqstp, (char *)p, length,
+ &iattr->ia_gid);
+ if (status)
return status;
iattr->ia_valid |= ATTR_GID;
}
if (bmval[1] & FATTR4_WORD1_TIME_ACCESS_SET) {
- READ_BUF(4);
- len += 4;
- dummy32 = be32_to_cpup(p++);
- switch (dummy32) {
+ u32 set_it;
+
+ if (xdr_stream_decode_u32(argp->xdr, &set_it) < 0)
+ return nfserr_bad_xdr;
+ switch (set_it) {
case NFS4_SET_TO_CLIENT_TIME:
- len += 12;
- status = nfsd4_decode_time(argp, &iattr->ia_atime);
+ status = nfsd4_decode_nfstime4(argp, &iattr->ia_atime);
if (status)
return status;
iattr->ia_valid |= (ATTR_ATIME | ATTR_ATIME_SET);
@@ -478,17 +464,17 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
iattr->ia_valid |= ATTR_ATIME;
break;
default:
- goto xdr_error;
+ return nfserr_bad_xdr;
}
}
if (bmval[1] & FATTR4_WORD1_TIME_MODIFY_SET) {
- READ_BUF(4);
- len += 4;
- dummy32 = be32_to_cpup(p++);
- switch (dummy32) {
+ u32 set_it;
+
+ if (xdr_stream_decode_u32(argp->xdr, &set_it) < 0)
+ return nfserr_bad_xdr;
+ switch (set_it) {
case NFS4_SET_TO_CLIENT_TIME:
- len += 12;
- status = nfsd4_decode_time(argp, &iattr->ia_mtime);
+ status = nfsd4_decode_nfstime4(argp, &iattr->ia_mtime);
if (status)
return status;
iattr->ia_valid |= (ATTR_MTIME | ATTR_MTIME_SET);
@@ -497,222 +483,329 @@ nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval,
iattr->ia_valid |= ATTR_MTIME;
break;
default:
- goto xdr_error;
+ return nfserr_bad_xdr;
}
}
-
label->len = 0;
if (IS_ENABLED(CONFIG_NFSD_V4_SECURITY_LABEL) &&
bmval[2] & FATTR4_WORD2_SECURITY_LABEL) {
- READ_BUF(4);
- len += 4;
- dummy32 = be32_to_cpup(p++); /* lfs: we don't use it */
- READ_BUF(4);
- len += 4;
- dummy32 = be32_to_cpup(p++); /* pi: we don't use it either */
- READ_BUF(4);
- len += 4;
- dummy32 = be32_to_cpup(p++);
- READ_BUF(dummy32);
- if (dummy32 > NFS4_MAXLABELLEN)
- return nfserr_badlabel;
- len += (XDR_QUADLEN(dummy32) << 2);
- READMEM(buf, dummy32);
- label->len = dummy32;
- label->data = svcxdr_dupstr(argp, buf, dummy32);
- if (!label->data)
- return nfserr_jukebox;
+ status = nfsd4_decode_security_label(argp, label);
+ if (status)
+ return status;
}
if (bmval[2] & FATTR4_WORD2_MODE_UMASK) {
+ u32 mode, mask;
+
if (!umask)
- goto xdr_error;
- READ_BUF(8);
- len += 8;
- dummy32 = be32_to_cpup(p++);
- iattr->ia_mode = dummy32 & (S_IFMT | S_IALLUGO);
- dummy32 = be32_to_cpup(p++);
- *umask = dummy32 & S_IRWXUGO;
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u32(argp->xdr, &mode) < 0)
+ return nfserr_bad_xdr;
+ iattr->ia_mode = mode & (S_IFMT | S_IALLUGO);
+ if (xdr_stream_decode_u32(argp->xdr, &mask) < 0)
+ return nfserr_bad_xdr;
+ *umask = mask & S_IRWXUGO;
iattr->ia_valid |= ATTR_MODE;
}
- if (len != expected_len)
- goto xdr_error;
- DECODE_TAIL;
+ /* request sanity: did attrlist4 contain the expected number of words? */
+ if (attrlist4_count != xdr_stream_pos(argp->xdr) - starting_pos)
+ return nfserr_bad_xdr;
+
+ return nfs_ok;
}
static __be32
-nfsd4_decode_stateid(struct nfsd4_compoundargs *argp, stateid_t *sid)
+nfsd4_decode_stateid4(struct nfsd4_compoundargs *argp, stateid_t *sid)
{
- DECODE_HEAD;
+ __be32 *p;
- READ_BUF(sizeof(stateid_t));
+ p = xdr_inline_decode(argp->xdr, NFS4_STATEID_SIZE);
+ if (!p)
+ return nfserr_bad_xdr;
sid->si_generation = be32_to_cpup(p++);
- COPYMEM(&sid->si_opaque, sizeof(stateid_opaque_t));
+ memcpy(&sid->si_opaque, p, sizeof(sid->si_opaque));
+ return nfs_ok;
+}
+
+static __be32
+nfsd4_decode_clientid4(struct nfsd4_compoundargs *argp, clientid_t *clientid)
+{
+ __be32 *p;
- DECODE_TAIL;
+ p = xdr_inline_decode(argp->xdr, sizeof(__be64));
+ if (!p)
+ return nfserr_bad_xdr;
+ memcpy(clientid, p, sizeof(*clientid));
+ return nfs_ok;
}
static __be32
-nfsd4_decode_access(struct nfsd4_compoundargs *argp, struct nfsd4_access *access)
+nfsd4_decode_state_owner4(struct nfsd4_compoundargs *argp,
+ clientid_t *clientid, struct xdr_netobj *owner)
{
- DECODE_HEAD;
+ __be32 status;
+
+ status = nfsd4_decode_clientid4(argp, clientid);
+ if (status)
+ return status;
+ return nfsd4_decode_opaque(argp, owner);
+}
- READ_BUF(4);
- access->ac_req_access = be32_to_cpup(p++);
+#ifdef CONFIG_NFSD_PNFS
+static __be32
+nfsd4_decode_deviceid4(struct nfsd4_compoundargs *argp,
+ struct nfsd4_deviceid *devid)
+{
+ __be32 *p;
- DECODE_TAIL;
+ p = xdr_inline_decode(argp->xdr, NFS4_DEVICEID4_SIZE);
+ if (!p)
+ return nfserr_bad_xdr;
+ memcpy(devid, p, sizeof(*devid));
+ return nfs_ok;
}
-static __be32 nfsd4_decode_cb_sec(struct nfsd4_compoundargs *argp, struct nfsd4_cb_sec *cbs)
+static __be32
+nfsd4_decode_layoutupdate4(struct nfsd4_compoundargs *argp,
+ struct nfsd4_layoutcommit *lcp)
{
- DECODE_HEAD;
- struct user_namespace *userns = nfsd_user_namespace(argp->rqstp);
- u32 dummy, uid, gid;
- char *machine_name;
- int i;
- int nr_secflavs;
+ if (xdr_stream_decode_u32(argp->xdr, &lcp->lc_layout_type) < 0)
+ return nfserr_bad_xdr;
+ if (lcp->lc_layout_type < LAYOUT_NFSV4_1_FILES)
+ return nfserr_bad_xdr;
+ if (lcp->lc_layout_type >= LAYOUT_TYPE_MAX)
+ return nfserr_bad_xdr;
+
+ if (xdr_stream_decode_u32(argp->xdr, &lcp->lc_up_len) < 0)
+ return nfserr_bad_xdr;
+ if (lcp->lc_up_len > 0) {
+ lcp->lc_up_layout = xdr_inline_decode(argp->xdr, lcp->lc_up_len);
+ if (!lcp->lc_up_layout)
+ return nfserr_bad_xdr;
+ }
+
+ return nfs_ok;
+}
+
+static __be32
+nfsd4_decode_layoutreturn4(struct nfsd4_compoundargs *argp,
+ struct nfsd4_layoutreturn *lrp)
+{
+ __be32 status;
+
+ if (xdr_stream_decode_u32(argp->xdr, &lrp->lr_return_type) < 0)
+ return nfserr_bad_xdr;
+ switch (lrp->lr_return_type) {
+ case RETURN_FILE:
+ if (xdr_stream_decode_u64(argp->xdr, &lrp->lr_seg.offset) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u64(argp->xdr, &lrp->lr_seg.length) < 0)
+ return nfserr_bad_xdr;
+ status = nfsd4_decode_stateid4(argp, &lrp->lr_sid);
+ if (status)
+ return status;
+ if (xdr_stream_decode_u32(argp->xdr, &lrp->lrf_body_len) < 0)
+ return nfserr_bad_xdr;
+ if (lrp->lrf_body_len > 0) {
+ lrp->lrf_body = xdr_inline_decode(argp->xdr, lrp->lrf_body_len);
+ if (!lrp->lrf_body)
+ return nfserr_bad_xdr;
+ }
+ break;
+ case RETURN_FSID:
+ case RETURN_ALL:
+ lrp->lr_seg.offset = 0;
+ lrp->lr_seg.length = NFS4_MAX_UINT64;
+ break;
+ default:
+ return nfserr_bad_xdr;
+ }
+
+ return nfs_ok;
+}
+
+#endif /* CONFIG_NFSD_PNFS */
+
+static __be32
+nfsd4_decode_sessionid4(struct nfsd4_compoundargs *argp,
+ struct nfs4_sessionid *sessionid)
+{
+ __be32 *p;
+
+ p = xdr_inline_decode(argp->xdr, NFS4_MAX_SESSIONID_LEN);
+ if (!p)
+ return nfserr_bad_xdr;
+ memcpy(sessionid->data, p, sizeof(sessionid->data));
+ return nfs_ok;
+}
+
+/* Defined in Appendix A of RFC 5531 */
+static __be32
+nfsd4_decode_authsys_parms(struct nfsd4_compoundargs *argp,
+ struct nfsd4_cb_sec *cbs)
+{
+ u32 stamp, gidcount, uid, gid;
+ __be32 *p, status;
+
+ if (xdr_stream_decode_u32(argp->xdr, &stamp) < 0)
+ return nfserr_bad_xdr;
+ /* machine name */
+ status = nfsd4_decode_ignored_string(argp, 255);
+ if (status)
+ return status;
+ if (xdr_stream_decode_u32(argp->xdr, &uid) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u32(argp->xdr, &gid) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u32(argp->xdr, &gidcount) < 0)
+ return nfserr_bad_xdr;
+ if (gidcount > 16)
+ return nfserr_bad_xdr;
+ p = xdr_inline_decode(argp->xdr, gidcount << 2);
+ if (!p)
+ return nfserr_bad_xdr;
+ if (cbs->flavor == (u32)(-1)) {
+ struct user_namespace *userns = nfsd_user_namespace(argp->rqstp);
+
+ kuid_t kuid = make_kuid(userns, uid);
+ kgid_t kgid = make_kgid(userns, gid);
+ if (uid_valid(kuid) && gid_valid(kgid)) {
+ cbs->uid = kuid;
+ cbs->gid = kgid;
+ cbs->flavor = RPC_AUTH_UNIX;
+ } else {
+ dprintk("RPC_AUTH_UNIX with invalid uid or gid, ignoring!\n");
+ }
+ }
+
+ return nfs_ok;
+}
+
+static __be32
+nfsd4_decode_gss_cb_handles4(struct nfsd4_compoundargs *argp,
+ struct nfsd4_cb_sec *cbs)
+{
+ __be32 status;
+ u32 service;
+
+ dprintk("RPC_AUTH_GSS callback secflavor not supported!\n");
+
+ if (xdr_stream_decode_u32(argp->xdr, &service) < 0)
+ return nfserr_bad_xdr;
+ if (service < RPC_GSS_SVC_NONE || service > RPC_GSS_SVC_PRIVACY)
+ return nfserr_bad_xdr;
+ /* gcbp_handle_from_server */
+ status = nfsd4_decode_ignored_string(argp, 0);
+ if (status)
+ return status;
+ /* gcbp_handle_from_client */
+ status = nfsd4_decode_ignored_string(argp, 0);
+ if (status)
+ return status;
+
+ return nfs_ok;
+}
+
+/* a counted array of callback_sec_parms4 items */
+static __be32
+nfsd4_decode_cb_sec(struct nfsd4_compoundargs *argp, struct nfsd4_cb_sec *cbs)
+{
+ u32 i, secflavor, nr_secflavs;
+ __be32 status;
/* callback_sec_params4 */
- READ_BUF(4);
- nr_secflavs = be32_to_cpup(p++);
+ if (xdr_stream_decode_u32(argp->xdr, &nr_secflavs) < 0)
+ return nfserr_bad_xdr;
if (nr_secflavs)
cbs->flavor = (u32)(-1);
else
/* Is this legal? Be generous, take it to mean AUTH_NONE: */
cbs->flavor = 0;
+
for (i = 0; i < nr_secflavs; ++i) {
- READ_BUF(4);
- dummy = be32_to_cpup(p++);
- switch (dummy) {
+ if (xdr_stream_decode_u32(argp->xdr, &secflavor) < 0)
+ return nfserr_bad_xdr;
+ switch (secflavor) {
case RPC_AUTH_NULL:
- /* Nothing to read */
+ /* void */
if (cbs->flavor == (u32)(-1))
cbs->flavor = RPC_AUTH_NULL;
break;
case RPC_AUTH_UNIX:
- READ_BUF(8);
- /* stamp */
- dummy = be32_to_cpup(p++);
-
- /* machine name */
- dummy = be32_to_cpup(p++);
- READ_BUF(dummy);
- SAVEMEM(machine_name, dummy);
-
- /* uid, gid */
- READ_BUF(8);
- uid = be32_to_cpup(p++);
- gid = be32_to_cpup(p++);
-
- /* more gids */
- READ_BUF(4);
- dummy = be32_to_cpup(p++);
- READ_BUF(dummy * 4);
- if (cbs->flavor == (u32)(-1)) {
- kuid_t kuid = make_kuid(userns, uid);
- kgid_t kgid = make_kgid(userns, gid);
- if (uid_valid(kuid) && gid_valid(kgid)) {
- cbs->uid = kuid;
- cbs->gid = kgid;
- cbs->flavor = RPC_AUTH_UNIX;
- } else {
- dprintk("RPC_AUTH_UNIX with invalid"
- "uid or gid ignoring!\n");
- }
- }
+ status = nfsd4_decode_authsys_parms(argp, cbs);
+ if (status)
+ return status;
break;
case RPC_AUTH_GSS:
- dprintk("RPC_AUTH_GSS callback secflavor "
- "not supported!\n");
- READ_BUF(8);
- /* gcbp_service */
- dummy = be32_to_cpup(p++);
- /* gcbp_handle_from_server */
- dummy = be32_to_cpup(p++);
- READ_BUF(dummy);
- p += XDR_QUADLEN(dummy);
- /* gcbp_handle_from_client */
- READ_BUF(4);
- dummy = be32_to_cpup(p++);
- READ_BUF(dummy);
+ status = nfsd4_decode_gss_cb_handles4(argp, cbs);
+ if (status)
+ return status;
break;
default:
- dprintk("Illegal callback secflavor\n");
return nfserr_inval;
}
}
- DECODE_TAIL;
-}
-static __be32 nfsd4_decode_backchannel_ctl(struct nfsd4_compoundargs *argp, struct nfsd4_backchannel_ctl *bc)
-{
- DECODE_HEAD;
+ return nfs_ok;
+}
- READ_BUF(4);
- bc->bc_cb_program = be32_to_cpup(p++);
- nfsd4_decode_cb_sec(argp, &bc->bc_cb_sec);
- DECODE_TAIL;
-}
+/*
+ * NFSv4 operation argument decoders
+ */
-static __be32 nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp, struct nfsd4_bind_conn_to_session *bcts)
+static __be32
+nfsd4_decode_access(struct nfsd4_compoundargs *argp,
+ struct nfsd4_access *access)
{
- DECODE_HEAD;
-
- READ_BUF(NFS4_MAX_SESSIONID_LEN + 8);
- COPYMEM(bcts->sessionid.data, NFS4_MAX_SESSIONID_LEN);
- bcts->dir = be32_to_cpup(p++);
- /* XXX: skipping ctsa_use_conn_in_rdma_mode. Perhaps Tom Tucker
- * could help us figure out we should be using it. */
- DECODE_TAIL;
+ if (xdr_stream_decode_u32(argp->xdr, &access->ac_req_access) < 0)
+ return nfserr_bad_xdr;
+ return nfs_ok;
}
static __be32
nfsd4_decode_close(struct nfsd4_compoundargs *argp, struct nfsd4_close *close)
{
- DECODE_HEAD;
-
- READ_BUF(4);
- close->cl_seqid = be32_to_cpup(p++);
- return nfsd4_decode_stateid(argp, &close->cl_stateid);
-
- DECODE_TAIL;
+ if (xdr_stream_decode_u32(argp->xdr, &close->cl_seqid) < 0)
+ return nfserr_bad_xdr;
+ return nfsd4_decode_stateid4(argp, &close->cl_stateid);
}
static __be32
nfsd4_decode_commit(struct nfsd4_compoundargs *argp, struct nfsd4_commit *commit)
{
- DECODE_HEAD;
-
- READ_BUF(12);
- p = xdr_decode_hyper(p, &commit->co_offset);
- commit->co_count = be32_to_cpup(p++);
-
- DECODE_TAIL;
+ if (xdr_stream_decode_u64(argp->xdr, &commit->co_offset) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u32(argp->xdr, &commit->co_count) < 0)
+ return nfserr_bad_xdr;
+ return nfs_ok;
}
static __be32
nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create)
{
- DECODE_HEAD;
+ __be32 *p, status;
- READ_BUF(4);
- create->cr_type = be32_to_cpup(p++);
+ if (xdr_stream_decode_u32(argp->xdr, &create->cr_type) < 0)
+ return nfserr_bad_xdr;
switch (create->cr_type) {
case NF4LNK:
- READ_BUF(4);
- create->cr_datalen = be32_to_cpup(p++);
- READ_BUF(create->cr_datalen);
+ if (xdr_stream_decode_u32(argp->xdr, &create->cr_datalen) < 0)
+ return nfserr_bad_xdr;
+ p = xdr_inline_decode(argp->xdr, create->cr_datalen);
+ if (!p)
+ return nfserr_bad_xdr;
create->cr_data = svcxdr_dupstr(argp, p, create->cr_datalen);
if (!create->cr_data)
return nfserr_jukebox;
break;
case NF4BLK:
case NF4CHR:
- READ_BUF(8);
- create->cr_specdata1 = be32_to_cpup(p++);
- create->cr_specdata2 = be32_to_cpup(p++);
+ if (xdr_stream_decode_u32(argp->xdr, &create->cr_specdata1) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u32(argp->xdr, &create->cr_specdata2) < 0)
+ return nfserr_bad_xdr;
break;
case NF4SOCK:
case NF4FIFO:
@@ -720,151 +813,210 @@ nfsd4_decode_create(struct nfsd4_compoundargs *argp, struct nfsd4_create *create
default:
break;
}
-
- READ_BUF(4);
- create->cr_namelen = be32_to_cpup(p++);
- READ_BUF(create->cr_namelen);
- SAVEMEM(create->cr_name, create->cr_namelen);
- if ((status = check_filename(create->cr_name, create->cr_namelen)))
+ status = nfsd4_decode_component4(argp, &create->cr_name,
+ &create->cr_namelen);
+ if (status)
return status;
-
- status = nfsd4_decode_fattr(argp, create->cr_bmval, &create->cr_iattr,
- &create->cr_acl, &create->cr_label,
- &create->cr_umask);
+ status = nfsd4_decode_fattr4(argp, create->cr_bmval,
+ ARRAY_SIZE(create->cr_bmval),
+ &create->cr_iattr, &create->cr_acl,
+ &create->cr_label, &create->cr_umask);
if (status)
- goto out;
+ return status;
- DECODE_TAIL;
+ return nfs_ok;
}
static inline __be32
nfsd4_decode_delegreturn(struct nfsd4_compoundargs *argp, struct nfsd4_delegreturn *dr)
{
- return nfsd4_decode_stateid(argp, &dr->dr_stateid);
+ return nfsd4_decode_stateid4(argp, &dr->dr_stateid);
}
static inline __be32
nfsd4_decode_getattr(struct nfsd4_compoundargs *argp, struct nfsd4_getattr *getattr)
{
- return nfsd4_decode_bitmap(argp, getattr->ga_bmval);
+ return nfsd4_decode_bitmap4(argp, getattr->ga_bmval,
+ ARRAY_SIZE(getattr->ga_bmval));
}
static __be32
nfsd4_decode_link(struct nfsd4_compoundargs *argp, struct nfsd4_link *link)
{
- DECODE_HEAD;
+ return nfsd4_decode_component4(argp, &link->li_name, &link->li_namelen);
+}
- READ_BUF(4);
- link->li_namelen = be32_to_cpup(p++);
- READ_BUF(link->li_namelen);
- SAVEMEM(link->li_name, link->li_namelen);
- if ((status = check_filename(link->li_name, link->li_namelen)))
+static __be32
+nfsd4_decode_open_to_lock_owner4(struct nfsd4_compoundargs *argp,
+ struct nfsd4_lock *lock)
+{
+ __be32 status;
+
+ if (xdr_stream_decode_u32(argp->xdr, &lock->lk_new_open_seqid) < 0)
+ return nfserr_bad_xdr;
+ status = nfsd4_decode_stateid4(argp, &lock->lk_new_open_stateid);
+ if (status)
return status;
+ if (xdr_stream_decode_u32(argp->xdr, &lock->lk_new_lock_seqid) < 0)
+ return nfserr_bad_xdr;
+ return nfsd4_decode_state_owner4(argp, &lock->lk_new_clientid,
+ &lock->lk_new_owner);
+}
- DECODE_TAIL;
+static __be32
+nfsd4_decode_exist_lock_owner4(struct nfsd4_compoundargs *argp,
+ struct nfsd4_lock *lock)
+{
+ __be32 status;
+
+ status = nfsd4_decode_stateid4(argp, &lock->lk_old_lock_stateid);
+ if (status)
+ return status;
+ if (xdr_stream_decode_u32(argp->xdr, &lock->lk_old_lock_seqid) < 0)
+ return nfserr_bad_xdr;
+
+ return nfs_ok;
}
static __be32
-nfsd4_decode_lock(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock)
+nfsd4_decode_locker4(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock)
{
- DECODE_HEAD;
+ if (xdr_stream_decode_bool(argp->xdr, &lock->lk_is_new) < 0)
+ return nfserr_bad_xdr;
+ if (lock->lk_is_new)
+ return nfsd4_decode_open_to_lock_owner4(argp, lock);
+ return nfsd4_decode_exist_lock_owner4(argp, lock);
+}
- /*
- * type, reclaim(boolean), offset, length, new_lock_owner(boolean)
- */
- READ_BUF(28);
- lock->lk_type = be32_to_cpup(p++);
+static __be32
+nfsd4_decode_lock(struct nfsd4_compoundargs *argp, struct nfsd4_lock *lock)
+{
+ if (xdr_stream_decode_u32(argp->xdr, &lock->lk_type) < 0)
+ return nfserr_bad_xdr;
if ((lock->lk_type < NFS4_READ_LT) || (lock->lk_type > NFS4_WRITEW_LT))
- goto xdr_error;
- lock->lk_reclaim = be32_to_cpup(p++);
- p = xdr_decode_hyper(p, &lock->lk_offset);
- p = xdr_decode_hyper(p, &lock->lk_length);
- lock->lk_is_new = be32_to_cpup(p++);
-
- if (lock->lk_is_new) {
- READ_BUF(4);
- lock->lk_new_open_seqid = be32_to_cpup(p++);
- status = nfsd4_decode_stateid(argp, &lock->lk_new_open_stateid);
- if (status)
- return status;
- READ_BUF(8 + sizeof(clientid_t));
- lock->lk_new_lock_seqid = be32_to_cpup(p++);
- COPYMEM(&lock->lk_new_clientid, sizeof(clientid_t));
- lock->lk_new_owner.len = be32_to_cpup(p++);
- READ_BUF(lock->lk_new_owner.len);
- READMEM(lock->lk_new_owner.data, lock->lk_new_owner.len);
- } else {
- status = nfsd4_decode_stateid(argp, &lock->lk_old_lock_stateid);
- if (status)
- return status;
- READ_BUF(4);
- lock->lk_old_lock_seqid = be32_to_cpup(p++);
- }
-
- DECODE_TAIL;
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_bool(argp->xdr, &lock->lk_reclaim) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u64(argp->xdr, &lock->lk_offset) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u64(argp->xdr, &lock->lk_length) < 0)
+ return nfserr_bad_xdr;
+ return nfsd4_decode_locker4(argp, lock);
}
static __be32
nfsd4_decode_lockt(struct nfsd4_compoundargs *argp, struct nfsd4_lockt *lockt)
{
- DECODE_HEAD;
-
- READ_BUF(32);
- lockt->lt_type = be32_to_cpup(p++);
- if((lockt->lt_type < NFS4_READ_LT) || (lockt->lt_type > NFS4_WRITEW_LT))
- goto xdr_error;
- p = xdr_decode_hyper(p, &lockt->lt_offset);
- p = xdr_decode_hyper(p, &lockt->lt_length);
- COPYMEM(&lockt->lt_clientid, 8);
- lockt->lt_owner.len = be32_to_cpup(p++);
- READ_BUF(lockt->lt_owner.len);
- READMEM(lockt->lt_owner.data, lockt->lt_owner.len);
-
- DECODE_TAIL;
+ if (xdr_stream_decode_u32(argp->xdr, &lockt->lt_type) < 0)
+ return nfserr_bad_xdr;
+ if ((lockt->lt_type < NFS4_READ_LT) || (lockt->lt_type > NFS4_WRITEW_LT))
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u64(argp->xdr, &lockt->lt_offset) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u64(argp->xdr, &lockt->lt_length) < 0)
+ return nfserr_bad_xdr;
+ return nfsd4_decode_state_owner4(argp, &lockt->lt_clientid,
+ &lockt->lt_owner);
}
static __be32
nfsd4_decode_locku(struct nfsd4_compoundargs *argp, struct nfsd4_locku *locku)
{
- DECODE_HEAD;
+ __be32 status;
- READ_BUF(8);
- locku->lu_type = be32_to_cpup(p++);
+ if (xdr_stream_decode_u32(argp->xdr, &locku->lu_type) < 0)
+ return nfserr_bad_xdr;
if ((locku->lu_type < NFS4_READ_LT) || (locku->lu_type > NFS4_WRITEW_LT))
- goto xdr_error;
- locku->lu_seqid = be32_to_cpup(p++);
- status = nfsd4_decode_stateid(argp, &locku->lu_stateid);
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u32(argp->xdr, &locku->lu_seqid) < 0)
+ return nfserr_bad_xdr;
+ status = nfsd4_decode_stateid4(argp, &locku->lu_stateid);
if (status)
return status;
- READ_BUF(16);
- p = xdr_decode_hyper(p, &locku->lu_offset);
- p = xdr_decode_hyper(p, &locku->lu_length);
+ if (xdr_stream_decode_u64(argp->xdr, &locku->lu_offset) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u64(argp->xdr, &locku->lu_length) < 0)
+ return nfserr_bad_xdr;
- DECODE_TAIL;
+ return nfs_ok;
}
static __be32
nfsd4_decode_lookup(struct nfsd4_compoundargs *argp, struct nfsd4_lookup *lookup)
{
- DECODE_HEAD;
+ return nfsd4_decode_component4(argp, &lookup->lo_name, &lookup->lo_len);
+}
- READ_BUF(4);
- lookup->lo_len = be32_to_cpup(p++);
- READ_BUF(lookup->lo_len);
- SAVEMEM(lookup->lo_name, lookup->lo_len);
- if ((status = check_filename(lookup->lo_name, lookup->lo_len)))
- return status;
+static __be32
+nfsd4_decode_createhow4(struct nfsd4_compoundargs *argp, struct nfsd4_open *open)
+{
+ __be32 status;
- DECODE_TAIL;
+ if (xdr_stream_decode_u32(argp->xdr, &open->op_createmode) < 0)
+ return nfserr_bad_xdr;
+ switch (open->op_createmode) {
+ case NFS4_CREATE_UNCHECKED:
+ case NFS4_CREATE_GUARDED:
+ status = nfsd4_decode_fattr4(argp, open->op_bmval,
+ ARRAY_SIZE(open->op_bmval),
+ &open->op_iattr, &open->op_acl,
+ &open->op_label, &open->op_umask);
+ if (status)
+ return status;
+ break;
+ case NFS4_CREATE_EXCLUSIVE:
+ status = nfsd4_decode_verifier4(argp, &open->op_verf);
+ if (status)
+ return status;
+ break;
+ case NFS4_CREATE_EXCLUSIVE4_1:
+ if (argp->minorversion < 1)
+ return nfserr_bad_xdr;
+ status = nfsd4_decode_verifier4(argp, &open->op_verf);
+ if (status)
+ return status;
+ status = nfsd4_decode_fattr4(argp, open->op_bmval,
+ ARRAY_SIZE(open->op_bmval),
+ &open->op_iattr, &open->op_acl,
+ &open->op_label, &open->op_umask);
+ if (status)
+ return status;
+ break;
+ default:
+ return nfserr_bad_xdr;
+ }
+
+ return nfs_ok;
+}
+
+static __be32
+nfsd4_decode_openflag4(struct nfsd4_compoundargs *argp, struct nfsd4_open *open)
+{
+ __be32 status;
+
+ if (xdr_stream_decode_u32(argp->xdr, &open->op_create) < 0)
+ return nfserr_bad_xdr;
+ switch (open->op_create) {
+ case NFS4_OPEN_NOCREATE:
+ break;
+ case NFS4_OPEN_CREATE:
+ status = nfsd4_decode_createhow4(argp, open);
+ if (status)
+ return status;
+ break;
+ default:
+ return nfserr_bad_xdr;
+ }
+
+ return nfs_ok;
}
static __be32 nfsd4_decode_share_access(struct nfsd4_compoundargs *argp, u32 *share_access, u32 *deleg_want, u32 *deleg_when)
{
- __be32 *p;
u32 w;
- READ_BUF(4);
- w = be32_to_cpup(p++);
+ if (xdr_stream_decode_u32(argp->xdr, &w) < 0)
+ return nfserr_bad_xdr;
*share_access = w & NFS4_SHARE_ACCESS_MASK;
*deleg_want = w & NFS4_SHARE_WANT_MASK;
if (deleg_when)
@@ -907,206 +1059,153 @@ static __be32 nfsd4_decode_share_access(struct nfsd4_compoundargs *argp, u32 *sh
NFS4_SHARE_PUSH_DELEG_WHEN_UNCONTENDED):
return nfs_ok;
}
-xdr_error:
return nfserr_bad_xdr;
}
static __be32 nfsd4_decode_share_deny(struct nfsd4_compoundargs *argp, u32 *x)
{
- __be32 *p;
-
- READ_BUF(4);
- *x = be32_to_cpup(p++);
- /* Note: unlinke access bits, deny bits may be zero. */
- if (*x & ~NFS4_SHARE_DENY_BOTH)
+ if (xdr_stream_decode_u32(argp->xdr, x) < 0)
return nfserr_bad_xdr;
- return nfs_ok;
-xdr_error:
- return nfserr_bad_xdr;
-}
-
-static __be32 nfsd4_decode_opaque(struct nfsd4_compoundargs *argp, struct xdr_netobj *o)
-{
- __be32 *p;
-
- READ_BUF(4);
- o->len = be32_to_cpup(p++);
-
- if (o->len == 0 || o->len > NFS4_OPAQUE_LIMIT)
+ /* Note: unlike access bits, deny bits may be zero. */
+ if (*x & ~NFS4_SHARE_DENY_BOTH)
return nfserr_bad_xdr;
- READ_BUF(o->len);
- SAVEMEM(o->data, o->len);
return nfs_ok;
-xdr_error:
- return nfserr_bad_xdr;
}
static __be32
-nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open)
+nfsd4_decode_open_claim4(struct nfsd4_compoundargs *argp,
+ struct nfsd4_open *open)
{
- DECODE_HEAD;
- u32 dummy;
-
- memset(open->op_bmval, 0, sizeof(open->op_bmval));
- open->op_iattr.ia_valid = 0;
- open->op_openowner = NULL;
-
- open->op_xdr_error = 0;
- /* seqid, share_access, share_deny, clientid, ownerlen */
- READ_BUF(4);
- open->op_seqid = be32_to_cpup(p++);
- /* decode, yet ignore deleg_when until supported */
- status = nfsd4_decode_share_access(argp, &open->op_share_access,
- &open->op_deleg_want, &dummy);
- if (status)
- goto xdr_error;
- status = nfsd4_decode_share_deny(argp, &open->op_share_deny);
- if (status)
- goto xdr_error;
- READ_BUF(sizeof(clientid_t));
- COPYMEM(&open->op_clientid, sizeof(clientid_t));
- status = nfsd4_decode_opaque(argp, &open->op_owner);
- if (status)
- goto xdr_error;
- READ_BUF(4);
- open->op_create = be32_to_cpup(p++);
- switch (open->op_create) {
- case NFS4_OPEN_NOCREATE:
- break;
- case NFS4_OPEN_CREATE:
- READ_BUF(4);
- open->op_createmode = be32_to_cpup(p++);
- switch (open->op_createmode) {
- case NFS4_CREATE_UNCHECKED:
- case NFS4_CREATE_GUARDED:
- status = nfsd4_decode_fattr(argp, open->op_bmval,
- &open->op_iattr, &open->op_acl, &open->op_label,
- &open->op_umask);
- if (status)
- goto out;
- break;
- case NFS4_CREATE_EXCLUSIVE:
- READ_BUF(NFS4_VERIFIER_SIZE);
- COPYMEM(open->op_verf.data, NFS4_VERIFIER_SIZE);
- break;
- case NFS4_CREATE_EXCLUSIVE4_1:
- if (argp->minorversion < 1)
- goto xdr_error;
- READ_BUF(NFS4_VERIFIER_SIZE);
- COPYMEM(open->op_verf.data, NFS4_VERIFIER_SIZE);
- status = nfsd4_decode_fattr(argp, open->op_bmval,
- &open->op_iattr, &open->op_acl, &open->op_label,
- &open->op_umask);
- if (status)
- goto out;
- break;
- default:
- goto xdr_error;
- }
- break;
- default:
- goto xdr_error;
- }
+ __be32 status;
- /* open_claim */
- READ_BUF(4);
- open->op_claim_type = be32_to_cpup(p++);
+ if (xdr_stream_decode_u32(argp->xdr, &open->op_claim_type) < 0)
+ return nfserr_bad_xdr;
switch (open->op_claim_type) {
case NFS4_OPEN_CLAIM_NULL:
case NFS4_OPEN_CLAIM_DELEGATE_PREV:
- READ_BUF(4);
- open->op_fname.len = be32_to_cpup(p++);
- READ_BUF(open->op_fname.len);
- SAVEMEM(open->op_fname.data, open->op_fname.len);
- if ((status = check_filename(open->op_fname.data, open->op_fname.len)))
+ status = nfsd4_decode_component4(argp, &open->op_fname,
+ &open->op_fnamelen);
+ if (status)
return status;
break;
case NFS4_OPEN_CLAIM_PREVIOUS:
- READ_BUF(4);
- open->op_delegate_type = be32_to_cpup(p++);
+ if (xdr_stream_decode_u32(argp->xdr, &open->op_delegate_type) < 0)
+ return nfserr_bad_xdr;
break;
case NFS4_OPEN_CLAIM_DELEGATE_CUR:
- status = nfsd4_decode_stateid(argp, &open->op_delegate_stateid);
+ status = nfsd4_decode_stateid4(argp, &open->op_delegate_stateid);
if (status)
return status;
- READ_BUF(4);
- open->op_fname.len = be32_to_cpup(p++);
- READ_BUF(open->op_fname.len);
- SAVEMEM(open->op_fname.data, open->op_fname.len);
- if ((status = check_filename(open->op_fname.data, open->op_fname.len)))
+ status = nfsd4_decode_component4(argp, &open->op_fname,
+ &open->op_fnamelen);
+ if (status)
return status;
break;
case NFS4_OPEN_CLAIM_FH:
case NFS4_OPEN_CLAIM_DELEG_PREV_FH:
if (argp->minorversion < 1)
- goto xdr_error;
+ return nfserr_bad_xdr;
/* void */
break;
case NFS4_OPEN_CLAIM_DELEG_CUR_FH:
if (argp->minorversion < 1)
- goto xdr_error;
- status = nfsd4_decode_stateid(argp, &open->op_delegate_stateid);
+ return nfserr_bad_xdr;
+ status = nfsd4_decode_stateid4(argp, &open->op_delegate_stateid);
if (status)
return status;
break;
default:
- goto xdr_error;
+ return nfserr_bad_xdr;
}
- DECODE_TAIL;
+ return nfs_ok;
+}
+
+static __be32
+nfsd4_decode_open(struct nfsd4_compoundargs *argp, struct nfsd4_open *open)
+{
+ __be32 status;
+ u32 dummy;
+
+ memset(open->op_bmval, 0, sizeof(open->op_bmval));
+ open->op_iattr.ia_valid = 0;
+ open->op_openowner = NULL;
+
+ open->op_xdr_error = 0;
+ if (xdr_stream_decode_u32(argp->xdr, &open->op_seqid) < 0)
+ return nfserr_bad_xdr;
+ /* deleg_want is ignored */
+ status = nfsd4_decode_share_access(argp, &open->op_share_access,
+ &open->op_deleg_want, &dummy);
+ if (status)
+ return status;
+ status = nfsd4_decode_share_deny(argp, &open->op_share_deny);
+ if (status)
+ return status;
+ status = nfsd4_decode_state_owner4(argp, &open->op_clientid,
+ &open->op_owner);
+ if (status)
+ return status;
+ status = nfsd4_decode_openflag4(argp, open);
+ if (status)
+ return status;
+ return nfsd4_decode_open_claim4(argp, open);
}
static __be32
nfsd4_decode_open_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_open_confirm *open_conf)
{
- DECODE_HEAD;
+ __be32 status;
if (argp->minorversion >= 1)
return nfserr_notsupp;
- status = nfsd4_decode_stateid(argp, &open_conf->oc_req_stateid);
+ status = nfsd4_decode_stateid4(argp, &open_conf->oc_req_stateid);
if (status)
return status;
- READ_BUF(4);
- open_conf->oc_seqid = be32_to_cpup(p++);
+ if (xdr_stream_decode_u32(argp->xdr, &open_conf->oc_seqid) < 0)
+ return nfserr_bad_xdr;
- DECODE_TAIL;
+ return nfs_ok;
}
static __be32
nfsd4_decode_open_downgrade(struct nfsd4_compoundargs *argp, struct nfsd4_open_downgrade *open_down)
{
- DECODE_HEAD;
-
- status = nfsd4_decode_stateid(argp, &open_down->od_stateid);
+ __be32 status;
+
+ status = nfsd4_decode_stateid4(argp, &open_down->od_stateid);
if (status)
return status;
- READ_BUF(4);
- open_down->od_seqid = be32_to_cpup(p++);
+ if (xdr_stream_decode_u32(argp->xdr, &open_down->od_seqid) < 0)
+ return nfserr_bad_xdr;
+ /* deleg_want is ignored */
status = nfsd4_decode_share_access(argp, &open_down->od_share_access,
&open_down->od_deleg_want, NULL);
if (status)
return status;
- status = nfsd4_decode_share_deny(argp, &open_down->od_share_deny);
- if (status)
- return status;
- DECODE_TAIL;
+ return nfsd4_decode_share_deny(argp, &open_down->od_share_deny);
}
static __be32
nfsd4_decode_putfh(struct nfsd4_compoundargs *argp, struct nfsd4_putfh *putfh)
{
- DECODE_HEAD;
+ __be32 *p;
- READ_BUF(4);
- putfh->pf_fhlen = be32_to_cpup(p++);
+ if (xdr_stream_decode_u32(argp->xdr, &putfh->pf_fhlen) < 0)
+ return nfserr_bad_xdr;
if (putfh->pf_fhlen > NFS4_FHSIZE)
- goto xdr_error;
- READ_BUF(putfh->pf_fhlen);
- SAVEMEM(putfh->pf_fhval, putfh->pf_fhlen);
+ return nfserr_bad_xdr;
+ p = xdr_inline_decode(argp->xdr, putfh->pf_fhlen);
+ if (!p)
+ return nfserr_bad_xdr;
+ putfh->pf_fhval = svcxdr_tmpalloc(argp, putfh->pf_fhlen);
+ if (!putfh->pf_fhval)
+ return nfserr_jukebox;
+ memcpy(putfh->pf_fhval, p, putfh->pf_fhlen);
- DECODE_TAIL;
+ return nfs_ok;
}
static __be32
@@ -1120,109 +1219,68 @@ nfsd4_decode_putpubfh(struct nfsd4_compoundargs *argp, void *p)
static __be32
nfsd4_decode_read(struct nfsd4_compoundargs *argp, struct nfsd4_read *read)
{
- DECODE_HEAD;
+ __be32 status;
- status = nfsd4_decode_stateid(argp, &read->rd_stateid);
+ status = nfsd4_decode_stateid4(argp, &read->rd_stateid);
if (status)
return status;
- READ_BUF(12);
- p = xdr_decode_hyper(p, &read->rd_offset);
- read->rd_length = be32_to_cpup(p++);
+ if (xdr_stream_decode_u64(argp->xdr, &read->rd_offset) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u32(argp->xdr, &read->rd_length) < 0)
+ return nfserr_bad_xdr;
- DECODE_TAIL;
+ return nfs_ok;
}
static __be32
nfsd4_decode_readdir(struct nfsd4_compoundargs *argp, struct nfsd4_readdir *readdir)
{
- DECODE_HEAD;
+ __be32 status;
- READ_BUF(24);
- p = xdr_decode_hyper(p, &readdir->rd_cookie);
- COPYMEM(readdir->rd_verf.data, sizeof(readdir->rd_verf.data));
- readdir->rd_dircount = be32_to_cpup(p++);
- readdir->rd_maxcount = be32_to_cpup(p++);
- if ((status = nfsd4_decode_bitmap(argp, readdir->rd_bmval)))
- goto out;
+ if (xdr_stream_decode_u64(argp->xdr, &readdir->rd_cookie) < 0)
+ return nfserr_bad_xdr;
+ status = nfsd4_decode_verifier4(argp, &readdir->rd_verf);
+ if (status)
+ return status;
+ if (xdr_stream_decode_u32(argp->xdr, &readdir->rd_dircount) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u32(argp->xdr, &readdir->rd_maxcount) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_uint32_array(argp->xdr, readdir->rd_bmval,
+ ARRAY_SIZE(readdir->rd_bmval)) < 0)
+ return nfserr_bad_xdr;
- DECODE_TAIL;
+ return nfs_ok;
}
static __be32
nfsd4_decode_remove(struct nfsd4_compoundargs *argp, struct nfsd4_remove *remove)
{
- DECODE_HEAD;
-
- READ_BUF(4);
- remove->rm_namelen = be32_to_cpup(p++);
- READ_BUF(remove->rm_namelen);
- SAVEMEM(remove->rm_name, remove->rm_namelen);
- if ((status = check_filename(remove->rm_name, remove->rm_namelen)))
- return status;
-
- DECODE_TAIL;
+ return nfsd4_decode_component4(argp, &remove->rm_name, &remove->rm_namelen);
}
static __be32
nfsd4_decode_rename(struct nfsd4_compoundargs *argp, struct nfsd4_rename *rename)
{
- DECODE_HEAD;
-
- READ_BUF(4);
- rename->rn_snamelen = be32_to_cpup(p++);
- READ_BUF(rename->rn_snamelen);
- SAVEMEM(rename->rn_sname, rename->rn_snamelen);
- READ_BUF(4);
- rename->rn_tnamelen = be32_to_cpup(p++);
- READ_BUF(rename->rn_tnamelen);
- SAVEMEM(rename->rn_tname, rename->rn_tnamelen);
- if ((status = check_filename(rename->rn_sname, rename->rn_snamelen)))
- return status;
- if ((status = check_filename(rename->rn_tname, rename->rn_tnamelen)))
- return status;
+ __be32 status;
- DECODE_TAIL;
+ status = nfsd4_decode_component4(argp, &rename->rn_sname, &rename->rn_snamelen);
+ if (status)
+ return status;
+ return nfsd4_decode_component4(argp, &rename->rn_tname, &rename->rn_tnamelen);
}
static __be32
nfsd4_decode_renew(struct nfsd4_compoundargs *argp, clientid_t *clientid)
{
- DECODE_HEAD;
-
- if (argp->minorversion >= 1)
- return nfserr_notsupp;
-
- READ_BUF(sizeof(clientid_t));
- COPYMEM(clientid, sizeof(clientid_t));
-
- DECODE_TAIL;
+ return nfsd4_decode_clientid4(argp, clientid);
}
static __be32
nfsd4_decode_secinfo(struct nfsd4_compoundargs *argp,
struct nfsd4_secinfo *secinfo)
{
- DECODE_HEAD;
-
- READ_BUF(4);
- secinfo->si_namelen = be32_to_cpup(p++);
- READ_BUF(secinfo->si_namelen);
- SAVEMEM(secinfo->si_name, secinfo->si_namelen);
- status = check_filename(secinfo->si_name, secinfo->si_namelen);
- if (status)
- return status;
- DECODE_TAIL;
-}
-
-static __be32
-nfsd4_decode_secinfo_no_name(struct nfsd4_compoundargs *argp,
- struct nfsd4_secinfo_no_name *sin)
-{
- DECODE_HEAD;
-
- READ_BUF(4);
- sin->sin_style = be32_to_cpup(p++);
- DECODE_TAIL;
+ return nfsd4_decode_component4(argp, &secinfo->si_name, &secinfo->si_namelen);
}
static __be32
@@ -1230,362 +1288,381 @@ nfsd4_decode_setattr(struct nfsd4_compoundargs *argp, struct nfsd4_setattr *seta
{
__be32 status;
- status = nfsd4_decode_stateid(argp, &setattr->sa_stateid);
+ status = nfsd4_decode_stateid4(argp, &setattr->sa_stateid);
if (status)
return status;
- return nfsd4_decode_fattr(argp, setattr->sa_bmval, &setattr->sa_iattr,
- &setattr->sa_acl, &setattr->sa_label, NULL);
+ return nfsd4_decode_fattr4(argp, setattr->sa_bmval,
+ ARRAY_SIZE(setattr->sa_bmval),
+ &setattr->sa_iattr, &setattr->sa_acl,
+ &setattr->sa_label, NULL);
}
static __be32
nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, struct nfsd4_setclientid *setclientid)
{
- DECODE_HEAD;
+ __be32 *p, status;
if (argp->minorversion >= 1)
return nfserr_notsupp;
- READ_BUF(NFS4_VERIFIER_SIZE);
- COPYMEM(setclientid->se_verf.data, NFS4_VERIFIER_SIZE);
-
+ status = nfsd4_decode_verifier4(argp, &setclientid->se_verf);
+ if (status)
+ return status;
status = nfsd4_decode_opaque(argp, &setclientid->se_name);
if (status)
+ return status;
+ if (xdr_stream_decode_u32(argp->xdr, &setclientid->se_callback_prog) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u32(argp->xdr, &setclientid->se_callback_netid_len) < 0)
+ return nfserr_bad_xdr;
+ p = xdr_inline_decode(argp->xdr, setclientid->se_callback_netid_len);
+ if (!p)
return nfserr_bad_xdr;
- READ_BUF(8);
- setclientid->se_callback_prog = be32_to_cpup(p++);
- setclientid->se_callback_netid_len = be32_to_cpup(p++);
- READ_BUF(setclientid->se_callback_netid_len);
- SAVEMEM(setclientid->se_callback_netid_val, setclientid->se_callback_netid_len);
- READ_BUF(4);
- setclientid->se_callback_addr_len = be32_to_cpup(p++);
+ setclientid->se_callback_netid_val = svcxdr_tmpalloc(argp,
+ setclientid->se_callback_netid_len);
+ if (!setclientid->se_callback_netid_val)
+ return nfserr_jukebox;
+ memcpy(setclientid->se_callback_netid_val, p,
+ setclientid->se_callback_netid_len);
- READ_BUF(setclientid->se_callback_addr_len);
- SAVEMEM(setclientid->se_callback_addr_val, setclientid->se_callback_addr_len);
- READ_BUF(4);
- setclientid->se_callback_ident = be32_to_cpup(p++);
+ if (xdr_stream_decode_u32(argp->xdr, &setclientid->se_callback_addr_len) < 0)
+ return nfserr_bad_xdr;
+ p = xdr_inline_decode(argp->xdr, setclientid->se_callback_addr_len);
+ if (!p)
+ return nfserr_bad_xdr;
+ setclientid->se_callback_addr_val = svcxdr_tmpalloc(argp,
+ setclientid->se_callback_addr_len);
+ if (!setclientid->se_callback_addr_val)
+ return nfserr_jukebox;
+ memcpy(setclientid->se_callback_addr_val, p,
+ setclientid->se_callback_addr_len);
+ if (xdr_stream_decode_u32(argp->xdr, &setclientid->se_callback_ident) < 0)
+ return nfserr_bad_xdr;
- DECODE_TAIL;
+ return nfs_ok;
}
static __be32
nfsd4_decode_setclientid_confirm(struct nfsd4_compoundargs *argp, struct nfsd4_setclientid_confirm *scd_c)
{
- DECODE_HEAD;
+ __be32 status;
if (argp->minorversion >= 1)
return nfserr_notsupp;
- READ_BUF(8 + NFS4_VERIFIER_SIZE);
- COPYMEM(&scd_c->sc_clientid, 8);
- COPYMEM(&scd_c->sc_confirm, NFS4_VERIFIER_SIZE);
-
- DECODE_TAIL;
+ status = nfsd4_decode_clientid4(argp, &scd_c->sc_clientid);
+ if (status)
+ return status;
+ return nfsd4_decode_verifier4(argp, &scd_c->sc_confirm);
}
/* Also used for NVERIFY */
static __be32
nfsd4_decode_verify(struct nfsd4_compoundargs *argp, struct nfsd4_verify *verify)
{
- DECODE_HEAD;
+ __be32 *p, status;
- if ((status = nfsd4_decode_bitmap(argp, verify->ve_bmval)))
- goto out;
+ status = nfsd4_decode_bitmap4(argp, verify->ve_bmval,
+ ARRAY_SIZE(verify->ve_bmval));
+ if (status)
+ return status;
/* For convenience's sake, we compare raw xdr'd attributes in
* nfsd4_proc_verify */
- READ_BUF(4);
- verify->ve_attrlen = be32_to_cpup(p++);
- READ_BUF(verify->ve_attrlen);
- SAVEMEM(verify->ve_attrval, verify->ve_attrlen);
+ if (xdr_stream_decode_u32(argp->xdr, &verify->ve_attrlen) < 0)
+ return nfserr_bad_xdr;
+ p = xdr_inline_decode(argp->xdr, verify->ve_attrlen);
+ if (!p)
+ return nfserr_bad_xdr;
+ verify->ve_attrval = svcxdr_tmpalloc(argp, verify->ve_attrlen);
+ if (!verify->ve_attrval)
+ return nfserr_jukebox;
+ memcpy(verify->ve_attrval, p, verify->ve_attrlen);
- DECODE_TAIL;
+ return nfs_ok;
}
static __be32
nfsd4_decode_write(struct nfsd4_compoundargs *argp, struct nfsd4_write *write)
{
- DECODE_HEAD;
+ __be32 status;
- status = nfsd4_decode_stateid(argp, &write->wr_stateid);
+ status = nfsd4_decode_stateid4(argp, &write->wr_stateid);
if (status)
return status;
- READ_BUF(16);
- p = xdr_decode_hyper(p, &write->wr_offset);
- write->wr_stable_how = be32_to_cpup(p++);
+ if (xdr_stream_decode_u64(argp->xdr, &write->wr_offset) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u32(argp->xdr, &write->wr_stable_how) < 0)
+ return nfserr_bad_xdr;
if (write->wr_stable_how > NFS_FILE_SYNC)
- goto xdr_error;
- write->wr_buflen = be32_to_cpup(p++);
-
- status = svcxdr_construct_vector(argp, &write->wr_head,
- &write->wr_pagelist, write->wr_buflen);
- if (status)
- return status;
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u32(argp->xdr, &write->wr_buflen) < 0)
+ return nfserr_bad_xdr;
+ if (!xdr_stream_subsegment(argp->xdr, &write->wr_payload, write->wr_buflen))
+ return nfserr_bad_xdr;
- DECODE_TAIL;
+ return nfs_ok;
}
static __be32
nfsd4_decode_release_lockowner(struct nfsd4_compoundargs *argp, struct nfsd4_release_lockowner *rlockowner)
{
- DECODE_HEAD;
+ __be32 status;
if (argp->minorversion >= 1)
return nfserr_notsupp;
- READ_BUF(12);
- COPYMEM(&rlockowner->rl_clientid, sizeof(clientid_t));
- rlockowner->rl_owner.len = be32_to_cpup(p++);
- READ_BUF(rlockowner->rl_owner.len);
- READMEM(rlockowner->rl_owner.data, rlockowner->rl_owner.len);
+ status = nfsd4_decode_state_owner4(argp, &rlockowner->rl_clientid,
+ &rlockowner->rl_owner);
+ if (status)
+ return status;
if (argp->minorversion && !zero_clientid(&rlockowner->rl_clientid))
return nfserr_inval;
- DECODE_TAIL;
+
+ return nfs_ok;
+}
+
+static __be32 nfsd4_decode_backchannel_ctl(struct nfsd4_compoundargs *argp, struct nfsd4_backchannel_ctl *bc)
+{
+ if (xdr_stream_decode_u32(argp->xdr, &bc->bc_cb_program) < 0)
+ return nfserr_bad_xdr;
+ return nfsd4_decode_cb_sec(argp, &bc->bc_cb_sec);
+}
+
+static __be32 nfsd4_decode_bind_conn_to_session(struct nfsd4_compoundargs *argp, struct nfsd4_bind_conn_to_session *bcts)
+{
+ u32 use_conn_in_rdma_mode;
+ __be32 status;
+
+ status = nfsd4_decode_sessionid4(argp, &bcts->sessionid);
+ if (status)
+ return status;
+ if (xdr_stream_decode_u32(argp->xdr, &bcts->dir) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u32(argp->xdr, &use_conn_in_rdma_mode) < 0)
+ return nfserr_bad_xdr;
+
+ return nfs_ok;
}
static __be32
-nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp,
- struct nfsd4_exchange_id *exid)
+nfsd4_decode_state_protect_ops(struct nfsd4_compoundargs *argp,
+ struct nfsd4_exchange_id *exid)
{
- int dummy, tmp;
- DECODE_HEAD;
+ __be32 status;
- READ_BUF(NFS4_VERIFIER_SIZE);
- COPYMEM(exid->verifier.data, NFS4_VERIFIER_SIZE);
+ status = nfsd4_decode_bitmap4(argp, exid->spo_must_enforce,
+ ARRAY_SIZE(exid->spo_must_enforce));
+ if (status)
+ return nfserr_bad_xdr;
+ status = nfsd4_decode_bitmap4(argp, exid->spo_must_allow,
+ ARRAY_SIZE(exid->spo_must_allow));
+ if (status)
+ return nfserr_bad_xdr;
- status = nfsd4_decode_opaque(argp, &exid->clname);
+ return nfs_ok;
+}
+
+/*
+ * This implementation currently does not support SP4_SSV.
+ * This decoder simply skips over these arguments.
+ */
+static noinline __be32
+nfsd4_decode_ssv_sp_parms(struct nfsd4_compoundargs *argp,
+ struct nfsd4_exchange_id *exid)
+{
+ u32 count, window, num_gss_handles;
+ __be32 status;
+
+ /* ssp_ops */
+ status = nfsd4_decode_state_protect_ops(argp, exid);
if (status)
+ return status;
+
+ /* ssp_hash_algs<> */
+ if (xdr_stream_decode_u32(argp->xdr, &count) < 0)
+ return nfserr_bad_xdr;
+ while (count--) {
+ status = nfsd4_decode_ignored_string(argp, 0);
+ if (status)
+ return status;
+ }
+
+ /* ssp_encr_algs<> */
+ if (xdr_stream_decode_u32(argp->xdr, &count) < 0)
+ return nfserr_bad_xdr;
+ while (count--) {
+ status = nfsd4_decode_ignored_string(argp, 0);
+ if (status)
+ return status;
+ }
+
+ if (xdr_stream_decode_u32(argp->xdr, &window) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u32(argp->xdr, &num_gss_handles) < 0)
return nfserr_bad_xdr;
- READ_BUF(4);
- exid->flags = be32_to_cpup(p++);
+ return nfs_ok;
+}
- /* Ignore state_protect4_a */
- READ_BUF(4);
- exid->spa_how = be32_to_cpup(p++);
+static __be32
+nfsd4_decode_state_protect4_a(struct nfsd4_compoundargs *argp,
+ struct nfsd4_exchange_id *exid)
+{
+ __be32 status;
+
+ if (xdr_stream_decode_u32(argp->xdr, &exid->spa_how) < 0)
+ return nfserr_bad_xdr;
switch (exid->spa_how) {
case SP4_NONE:
break;
case SP4_MACH_CRED:
- /* spo_must_enforce */
- status = nfsd4_decode_bitmap(argp,
- exid->spo_must_enforce);
+ status = nfsd4_decode_state_protect_ops(argp, exid);
if (status)
- goto out;
- /* spo_must_allow */
- status = nfsd4_decode_bitmap(argp, exid->spo_must_allow);
- if (status)
- goto out;
+ return status;
break;
case SP4_SSV:
- /* ssp_ops */
- READ_BUF(4);
- dummy = be32_to_cpup(p++);
- READ_BUF(dummy * 4);
- p += dummy;
-
- READ_BUF(4);
- dummy = be32_to_cpup(p++);
- READ_BUF(dummy * 4);
- p += dummy;
-
- /* ssp_hash_algs<> */
- READ_BUF(4);
- tmp = be32_to_cpup(p++);
- while (tmp--) {
- READ_BUF(4);
- dummy = be32_to_cpup(p++);
- READ_BUF(dummy);
- p += XDR_QUADLEN(dummy);
- }
-
- /* ssp_encr_algs<> */
- READ_BUF(4);
- tmp = be32_to_cpup(p++);
- while (tmp--) {
- READ_BUF(4);
- dummy = be32_to_cpup(p++);
- READ_BUF(dummy);
- p += XDR_QUADLEN(dummy);
- }
-
- /* ignore ssp_window and ssp_num_gss_handles: */
- READ_BUF(8);
+ status = nfsd4_decode_ssv_sp_parms(argp, exid);
+ if (status)
+ return status;
break;
default:
- goto xdr_error;
+ return nfserr_bad_xdr;
}
- READ_BUF(4); /* nfs_impl_id4 array length */
- dummy = be32_to_cpup(p++);
+ return nfs_ok;
+}
- if (dummy > 1)
- goto xdr_error;
+static __be32
+nfsd4_decode_nfs_impl_id4(struct nfsd4_compoundargs *argp,
+ struct nfsd4_exchange_id *exid)
+{
+ __be32 status;
+ u32 count;
- if (dummy == 1) {
+ if (xdr_stream_decode_u32(argp->xdr, &count) < 0)
+ return nfserr_bad_xdr;
+ switch (count) {
+ case 0:
+ break;
+ case 1:
+ /* Note that RFC 8881 places no length limit on
+ * nii_domain, but this implementation permits no
+ * more than NFS4_OPAQUE_LIMIT bytes */
status = nfsd4_decode_opaque(argp, &exid->nii_domain);
if (status)
- goto xdr_error;
-
- /* nii_name */
+ return status;
+ /* Note that RFC 8881 places no length limit on
+ * nii_name, but this implementation permits no
+ * more than NFS4_OPAQUE_LIMIT bytes */
status = nfsd4_decode_opaque(argp, &exid->nii_name);
if (status)
- goto xdr_error;
-
- /* nii_date */
- status = nfsd4_decode_time(argp, &exid->nii_time);
+ return status;
+ status = nfsd4_decode_nfstime4(argp, &exid->nii_time);
if (status)
- goto xdr_error;
+ return status;
+ break;
+ default:
+ return nfserr_bad_xdr;
}
- DECODE_TAIL;
-}
-static __be32
-nfsd4_decode_create_session(struct nfsd4_compoundargs *argp,
- struct nfsd4_create_session *sess)
-{
- DECODE_HEAD;
-
- READ_BUF(16);
- COPYMEM(&sess->clientid, 8);
- sess->seqid = be32_to_cpup(p++);
- sess->flags = be32_to_cpup(p++);
-
- /* Fore channel attrs */
- READ_BUF(28);
- p++; /* headerpadsz is always 0 */
- sess->fore_channel.maxreq_sz = be32_to_cpup(p++);
- sess->fore_channel.maxresp_sz = be32_to_cpup(p++);
- sess->fore_channel.maxresp_cached = be32_to_cpup(p++);
- sess->fore_channel.maxops = be32_to_cpup(p++);
- sess->fore_channel.maxreqs = be32_to_cpup(p++);
- sess->fore_channel.nr_rdma_attrs = be32_to_cpup(p++);
- if (sess->fore_channel.nr_rdma_attrs == 1) {
- READ_BUF(4);
- sess->fore_channel.rdma_attrs = be32_to_cpup(p++);
- } else if (sess->fore_channel.nr_rdma_attrs > 1) {
- dprintk("Too many fore channel attr bitmaps!\n");
- goto xdr_error;
- }
-
- /* Back channel attrs */
- READ_BUF(28);
- p++; /* headerpadsz is always 0 */
- sess->back_channel.maxreq_sz = be32_to_cpup(p++);
- sess->back_channel.maxresp_sz = be32_to_cpup(p++);
- sess->back_channel.maxresp_cached = be32_to_cpup(p++);
- sess->back_channel.maxops = be32_to_cpup(p++);
- sess->back_channel.maxreqs = be32_to_cpup(p++);
- sess->back_channel.nr_rdma_attrs = be32_to_cpup(p++);
- if (sess->back_channel.nr_rdma_attrs == 1) {
- READ_BUF(4);
- sess->back_channel.rdma_attrs = be32_to_cpup(p++);
- } else if (sess->back_channel.nr_rdma_attrs > 1) {
- dprintk("Too many back channel attr bitmaps!\n");
- goto xdr_error;
- }
-
- READ_BUF(4);
- sess->callback_prog = be32_to_cpup(p++);
- nfsd4_decode_cb_sec(argp, &sess->cb_sec);
- DECODE_TAIL;
+ return nfs_ok;
}
static __be32
-nfsd4_decode_destroy_session(struct nfsd4_compoundargs *argp,
- struct nfsd4_destroy_session *destroy_session)
+nfsd4_decode_exchange_id(struct nfsd4_compoundargs *argp,
+ struct nfsd4_exchange_id *exid)
{
- DECODE_HEAD;
- READ_BUF(NFS4_MAX_SESSIONID_LEN);
- COPYMEM(destroy_session->sessionid.data, NFS4_MAX_SESSIONID_LEN);
+ __be32 status;
- DECODE_TAIL;
+ status = nfsd4_decode_verifier4(argp, &exid->verifier);
+ if (status)
+ return status;
+ status = nfsd4_decode_opaque(argp, &exid->clname);
+ if (status)
+ return status;
+ if (xdr_stream_decode_u32(argp->xdr, &exid->flags) < 0)
+ return nfserr_bad_xdr;
+ status = nfsd4_decode_state_protect4_a(argp, exid);
+ if (status)
+ return status;
+ return nfsd4_decode_nfs_impl_id4(argp, exid);
}
static __be32
-nfsd4_decode_free_stateid(struct nfsd4_compoundargs *argp,
- struct nfsd4_free_stateid *free_stateid)
+nfsd4_decode_channel_attrs4(struct nfsd4_compoundargs *argp,
+ struct nfsd4_channel_attrs *ca)
{
- DECODE_HEAD;
-
- READ_BUF(sizeof(stateid_t));
- free_stateid->fr_stateid.si_generation = be32_to_cpup(p++);
- COPYMEM(&free_stateid->fr_stateid.si_opaque, sizeof(stateid_opaque_t));
-
- DECODE_TAIL;
-}
+ __be32 *p;
-static __be32
-nfsd4_decode_sequence(struct nfsd4_compoundargs *argp,
- struct nfsd4_sequence *seq)
-{
- DECODE_HEAD;
+ p = xdr_inline_decode(argp->xdr, XDR_UNIT * 7);
+ if (!p)
+ return nfserr_bad_xdr;
- READ_BUF(NFS4_MAX_SESSIONID_LEN + 16);
- COPYMEM(seq->sessionid.data, NFS4_MAX_SESSIONID_LEN);
- seq->seqid = be32_to_cpup(p++);
- seq->slotid = be32_to_cpup(p++);
- seq->maxslots = be32_to_cpup(p++);
- seq->cachethis = be32_to_cpup(p++);
+ /* headerpadsz is ignored */
+ p++;
+ ca->maxreq_sz = be32_to_cpup(p++);
+ ca->maxresp_sz = be32_to_cpup(p++);
+ ca->maxresp_cached = be32_to_cpup(p++);
+ ca->maxops = be32_to_cpup(p++);
+ ca->maxreqs = be32_to_cpup(p++);
+ ca->nr_rdma_attrs = be32_to_cpup(p);
+ switch (ca->nr_rdma_attrs) {
+ case 0:
+ break;
+ case 1:
+ if (xdr_stream_decode_u32(argp->xdr, &ca->rdma_attrs) < 0)
+ return nfserr_bad_xdr;
+ break;
+ default:
+ return nfserr_bad_xdr;
+ }
- DECODE_TAIL;
+ return nfs_ok;
}
static __be32
-nfsd4_decode_test_stateid(struct nfsd4_compoundargs *argp, struct nfsd4_test_stateid *test_stateid)
+nfsd4_decode_create_session(struct nfsd4_compoundargs *argp,
+ struct nfsd4_create_session *sess)
{
- int i;
- __be32 *p, status;
- struct nfsd4_test_stateid_id *stateid;
-
- READ_BUF(4);
- test_stateid->ts_num_ids = ntohl(*p++);
-
- INIT_LIST_HEAD(&test_stateid->ts_stateid_list);
-
- for (i = 0; i < test_stateid->ts_num_ids; i++) {
- stateid = svcxdr_tmpalloc(argp, sizeof(*stateid));
- if (!stateid) {
- status = nfserrno(-ENOMEM);
- goto out;
- }
-
- INIT_LIST_HEAD(&stateid->ts_id_list);
- list_add_tail(&stateid->ts_id_list, &test_stateid->ts_stateid_list);
+ __be32 status;
- status = nfsd4_decode_stateid(argp, &stateid->ts_id_stateid);
- if (status)
- goto out;
- }
+ status = nfsd4_decode_clientid4(argp, &sess->clientid);
+ if (status)
+ return status;
+ if (xdr_stream_decode_u32(argp->xdr, &sess->seqid) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u32(argp->xdr, &sess->flags) < 0)
+ return nfserr_bad_xdr;
+ status = nfsd4_decode_channel_attrs4(argp, &sess->fore_channel);
+ if (status)
+ return status;
+ status = nfsd4_decode_channel_attrs4(argp, &sess->back_channel);
+ if (status)
+ return status;
+ if (xdr_stream_decode_u32(argp->xdr, &sess->callback_prog) < 0)
+ return nfserr_bad_xdr;
+ status = nfsd4_decode_cb_sec(argp, &sess->cb_sec);
+ if (status)
+ return status;
- status = 0;
-out:
- return status;
-xdr_error:
- dprintk("NFSD: xdr error (%s:%d)\n", __FILE__, __LINE__);
- status = nfserr_bad_xdr;
- goto out;
+ return nfs_ok;
}
-static __be32 nfsd4_decode_destroy_clientid(struct nfsd4_compoundargs *argp, struct nfsd4_destroy_clientid *dc)
+static __be32
+nfsd4_decode_destroy_session(struct nfsd4_compoundargs *argp,
+ struct nfsd4_destroy_session *destroy_session)
{
- DECODE_HEAD;
-
- READ_BUF(8);
- COPYMEM(&dc->clientid, 8);
-
- DECODE_TAIL;
+ return nfsd4_decode_sessionid4(argp, &destroy_session->sessionid);
}
-static __be32 nfsd4_decode_reclaim_complete(struct nfsd4_compoundargs *argp, struct nfsd4_reclaim_complete *rc)
+static __be32
+nfsd4_decode_free_stateid(struct nfsd4_compoundargs *argp,
+ struct nfsd4_free_stateid *free_stateid)
{
- DECODE_HEAD;
-
- READ_BUF(4);
- rc->rca_one_fs = be32_to_cpup(p++);
-
- DECODE_TAIL;
+ return nfsd4_decode_stateid4(argp, &free_stateid->fr_stateid);
}
#ifdef CONFIG_NFSD_PNFS
@@ -1593,244 +1670,264 @@ static __be32
nfsd4_decode_getdeviceinfo(struct nfsd4_compoundargs *argp,
struct nfsd4_getdeviceinfo *gdev)
{
- DECODE_HEAD;
- u32 num, i;
-
- READ_BUF(sizeof(struct nfsd4_deviceid) + 3 * 4);
- COPYMEM(&gdev->gd_devid, sizeof(struct nfsd4_deviceid));
- gdev->gd_layout_type = be32_to_cpup(p++);
- gdev->gd_maxcount = be32_to_cpup(p++);
- num = be32_to_cpup(p++);
- if (num) {
- if (num > 1000)
- goto xdr_error;
- READ_BUF(4 * num);
- gdev->gd_notify_types = be32_to_cpup(p++);
- for (i = 1; i < num; i++) {
- if (be32_to_cpup(p++)) {
- status = nfserr_inval;
- goto out;
- }
- }
- }
- DECODE_TAIL;
-}
-
-static __be32
-nfsd4_decode_layoutget(struct nfsd4_compoundargs *argp,
- struct nfsd4_layoutget *lgp)
-{
- DECODE_HEAD;
-
- READ_BUF(36);
- lgp->lg_signal = be32_to_cpup(p++);
- lgp->lg_layout_type = be32_to_cpup(p++);
- lgp->lg_seg.iomode = be32_to_cpup(p++);
- p = xdr_decode_hyper(p, &lgp->lg_seg.offset);
- p = xdr_decode_hyper(p, &lgp->lg_seg.length);
- p = xdr_decode_hyper(p, &lgp->lg_minlength);
+ __be32 status;
- status = nfsd4_decode_stateid(argp, &lgp->lg_sid);
+ status = nfsd4_decode_deviceid4(argp, &gdev->gd_devid);
if (status)
return status;
+ if (xdr_stream_decode_u32(argp->xdr, &gdev->gd_layout_type) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u32(argp->xdr, &gdev->gd_maxcount) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_uint32_array(argp->xdr,
+ &gdev->gd_notify_types, 1) < 0)
+ return nfserr_bad_xdr;
- READ_BUF(4);
- lgp->lg_maxcount = be32_to_cpup(p++);
-
- DECODE_TAIL;
+ return nfs_ok;
}
static __be32
nfsd4_decode_layoutcommit(struct nfsd4_compoundargs *argp,
- struct nfsd4_layoutcommit *lcp)
+ struct nfsd4_layoutcommit *lcp)
{
- DECODE_HEAD;
- u32 timechange;
-
- READ_BUF(20);
- p = xdr_decode_hyper(p, &lcp->lc_seg.offset);
- p = xdr_decode_hyper(p, &lcp->lc_seg.length);
- lcp->lc_reclaim = be32_to_cpup(p++);
+ __be32 *p, status;
- status = nfsd4_decode_stateid(argp, &lcp->lc_sid);
+ if (xdr_stream_decode_u64(argp->xdr, &lcp->lc_seg.offset) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u64(argp->xdr, &lcp->lc_seg.length) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_bool(argp->xdr, &lcp->lc_reclaim) < 0)
+ return nfserr_bad_xdr;
+ status = nfsd4_decode_stateid4(argp, &lcp->lc_sid);
if (status)
return status;
-
- READ_BUF(4);
- lcp->lc_newoffset = be32_to_cpup(p++);
+ if (xdr_stream_decode_u32(argp->xdr, &lcp->lc_newoffset) < 0)
+ return nfserr_bad_xdr;
if (lcp->lc_newoffset) {
- READ_BUF(8);
- p = xdr_decode_hyper(p, &lcp->lc_last_wr);
+ if (xdr_stream_decode_u64(argp->xdr, &lcp->lc_last_wr) < 0)
+ return nfserr_bad_xdr;
} else
lcp->lc_last_wr = 0;
- READ_BUF(4);
- timechange = be32_to_cpup(p++);
- if (timechange) {
- status = nfsd4_decode_time(argp, &lcp->lc_mtime);
+ p = xdr_inline_decode(argp->xdr, XDR_UNIT);
+ if (!p)
+ return nfserr_bad_xdr;
+ if (xdr_item_is_present(p)) {
+ status = nfsd4_decode_nfstime4(argp, &lcp->lc_mtime);
if (status)
return status;
} else {
lcp->lc_mtime.tv_nsec = UTIME_NOW;
}
- READ_BUF(8);
- lcp->lc_layout_type = be32_to_cpup(p++);
+ return nfsd4_decode_layoutupdate4(argp, lcp);
+}
- /*
- * Save the layout update in XDR format and let the layout driver deal
- * with it later.
- */
- lcp->lc_up_len = be32_to_cpup(p++);
- if (lcp->lc_up_len > 0) {
- READ_BUF(lcp->lc_up_len);
- READMEM(lcp->lc_up_layout, lcp->lc_up_len);
- }
+static __be32
+nfsd4_decode_layoutget(struct nfsd4_compoundargs *argp,
+ struct nfsd4_layoutget *lgp)
+{
+ __be32 status;
- DECODE_TAIL;
+ if (xdr_stream_decode_u32(argp->xdr, &lgp->lg_signal) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u32(argp->xdr, &lgp->lg_layout_type) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u32(argp->xdr, &lgp->lg_seg.iomode) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u64(argp->xdr, &lgp->lg_seg.offset) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u64(argp->xdr, &lgp->lg_seg.length) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u64(argp->xdr, &lgp->lg_minlength) < 0)
+ return nfserr_bad_xdr;
+ status = nfsd4_decode_stateid4(argp, &lgp->lg_sid);
+ if (status)
+ return status;
+ if (xdr_stream_decode_u32(argp->xdr, &lgp->lg_maxcount) < 0)
+ return nfserr_bad_xdr;
+
+ return nfs_ok;
}
static __be32
nfsd4_decode_layoutreturn(struct nfsd4_compoundargs *argp,
struct nfsd4_layoutreturn *lrp)
{
- DECODE_HEAD;
+ if (xdr_stream_decode_bool(argp->xdr, &lrp->lr_reclaim) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u32(argp->xdr, &lrp->lr_layout_type) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u32(argp->xdr, &lrp->lr_seg.iomode) < 0)
+ return nfserr_bad_xdr;
+ return nfsd4_decode_layoutreturn4(argp, lrp);
+}
+#endif /* CONFIG_NFSD_PNFS */
- READ_BUF(16);
- lrp->lr_reclaim = be32_to_cpup(p++);
- lrp->lr_layout_type = be32_to_cpup(p++);
- lrp->lr_seg.iomode = be32_to_cpup(p++);
- lrp->lr_return_type = be32_to_cpup(p++);
- if (lrp->lr_return_type == RETURN_FILE) {
- READ_BUF(16);
- p = xdr_decode_hyper(p, &lrp->lr_seg.offset);
- p = xdr_decode_hyper(p, &lrp->lr_seg.length);
+static __be32 nfsd4_decode_secinfo_no_name(struct nfsd4_compoundargs *argp,
+ struct nfsd4_secinfo_no_name *sin)
+{
+ if (xdr_stream_decode_u32(argp->xdr, &sin->sin_style) < 0)
+ return nfserr_bad_xdr;
+ return nfs_ok;
+}
- status = nfsd4_decode_stateid(argp, &lrp->lr_sid);
- if (status)
- return status;
+static __be32
+nfsd4_decode_sequence(struct nfsd4_compoundargs *argp,
+ struct nfsd4_sequence *seq)
+{
+ __be32 *p, status;
- READ_BUF(4);
- lrp->lrf_body_len = be32_to_cpup(p++);
- if (lrp->lrf_body_len > 0) {
- READ_BUF(lrp->lrf_body_len);
- READMEM(lrp->lrf_body, lrp->lrf_body_len);
- }
- } else {
- lrp->lr_seg.offset = 0;
- lrp->lr_seg.length = NFS4_MAX_UINT64;
- }
+ status = nfsd4_decode_sessionid4(argp, &seq->sessionid);
+ if (status)
+ return status;
+ p = xdr_inline_decode(argp->xdr, XDR_UNIT * 4);
+ if (!p)
+ return nfserr_bad_xdr;
+ seq->seqid = be32_to_cpup(p++);
+ seq->slotid = be32_to_cpup(p++);
+ seq->maxslots = be32_to_cpup(p++);
+ seq->cachethis = be32_to_cpup(p);
- DECODE_TAIL;
+ return nfs_ok;
}
-#endif /* CONFIG_NFSD_PNFS */
static __be32
-nfsd4_decode_fallocate(struct nfsd4_compoundargs *argp,
- struct nfsd4_fallocate *fallocate)
+nfsd4_decode_test_stateid(struct nfsd4_compoundargs *argp, struct nfsd4_test_stateid *test_stateid)
{
- DECODE_HEAD;
+ struct nfsd4_test_stateid_id *stateid;
+ __be32 status;
+ u32 i;
- status = nfsd4_decode_stateid(argp, &fallocate->falloc_stateid);
- if (status)
- return status;
+ if (xdr_stream_decode_u32(argp->xdr, &test_stateid->ts_num_ids) < 0)
+ return nfserr_bad_xdr;
+
+ INIT_LIST_HEAD(&test_stateid->ts_stateid_list);
+ for (i = 0; i < test_stateid->ts_num_ids; i++) {
+ stateid = svcxdr_tmpalloc(argp, sizeof(*stateid));
+ if (!stateid)
+ return nfserrno(-ENOMEM); /* XXX: not jukebox? */
+ INIT_LIST_HEAD(&stateid->ts_id_list);
+ list_add_tail(&stateid->ts_id_list, &test_stateid->ts_stateid_list);
+ status = nfsd4_decode_stateid4(argp, &stateid->ts_id_stateid);
+ if (status)
+ return status;
+ }
+
+ return nfs_ok;
+}
- READ_BUF(16);
- p = xdr_decode_hyper(p, &fallocate->falloc_offset);
- xdr_decode_hyper(p, &fallocate->falloc_length);
+static __be32 nfsd4_decode_destroy_clientid(struct nfsd4_compoundargs *argp,
+ struct nfsd4_destroy_clientid *dc)
+{
+ return nfsd4_decode_clientid4(argp, &dc->clientid);
+}
- DECODE_TAIL;
+static __be32 nfsd4_decode_reclaim_complete(struct nfsd4_compoundargs *argp,
+ struct nfsd4_reclaim_complete *rc)
+{
+ if (xdr_stream_decode_bool(argp->xdr, &rc->rca_one_fs) < 0)
+ return nfserr_bad_xdr;
+ return nfs_ok;
}
static __be32
-nfsd4_decode_clone(struct nfsd4_compoundargs *argp, struct nfsd4_clone *clone)
+nfsd4_decode_fallocate(struct nfsd4_compoundargs *argp,
+ struct nfsd4_fallocate *fallocate)
{
- DECODE_HEAD;
+ __be32 status;
- status = nfsd4_decode_stateid(argp, &clone->cl_src_stateid);
- if (status)
- return status;
- status = nfsd4_decode_stateid(argp, &clone->cl_dst_stateid);
+ status = nfsd4_decode_stateid4(argp, &fallocate->falloc_stateid);
if (status)
return status;
+ if (xdr_stream_decode_u64(argp->xdr, &fallocate->falloc_offset) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u64(argp->xdr, &fallocate->falloc_length) < 0)
+ return nfserr_bad_xdr;
- READ_BUF(8 + 8 + 8);
- p = xdr_decode_hyper(p, &clone->cl_src_pos);
- p = xdr_decode_hyper(p, &clone->cl_dst_pos);
- p = xdr_decode_hyper(p, &clone->cl_count);
- DECODE_TAIL;
+ return nfs_ok;
}
static __be32 nfsd4_decode_nl4_server(struct nfsd4_compoundargs *argp,
struct nl4_server *ns)
{
- DECODE_HEAD;
struct nfs42_netaddr *naddr;
+ __be32 *p;
- READ_BUF(4);
- ns->nl4_type = be32_to_cpup(p++);
+ if (xdr_stream_decode_u32(argp->xdr, &ns->nl4_type) < 0)
+ return nfserr_bad_xdr;
/* currently support for 1 inter-server source server */
switch (ns->nl4_type) {
case NL4_NETADDR:
naddr = &ns->u.nl4_addr;
- READ_BUF(4);
- naddr->netid_len = be32_to_cpup(p++);
+ if (xdr_stream_decode_u32(argp->xdr, &naddr->netid_len) < 0)
+ return nfserr_bad_xdr;
if (naddr->netid_len > RPCBIND_MAXNETIDLEN)
- goto xdr_error;
+ return nfserr_bad_xdr;
- READ_BUF(naddr->netid_len + 4); /* 4 for uaddr len */
- COPYMEM(naddr->netid, naddr->netid_len);
+ p = xdr_inline_decode(argp->xdr, naddr->netid_len);
+ if (!p)
+ return nfserr_bad_xdr;
+ memcpy(naddr->netid, p, naddr->netid_len);
- naddr->addr_len = be32_to_cpup(p++);
+ if (xdr_stream_decode_u32(argp->xdr, &naddr->addr_len) < 0)
+ return nfserr_bad_xdr;
if (naddr->addr_len > RPCBIND_MAXUADDRLEN)
- goto xdr_error;
+ return nfserr_bad_xdr;
- READ_BUF(naddr->addr_len);
- COPYMEM(naddr->addr, naddr->addr_len);
+ p = xdr_inline_decode(argp->xdr, naddr->addr_len);
+ if (!p)
+ return nfserr_bad_xdr;
+ memcpy(naddr->addr, p, naddr->addr_len);
break;
default:
- goto xdr_error;
+ return nfserr_bad_xdr;
}
- DECODE_TAIL;
+
+ return nfs_ok;
}
static __be32
nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy)
{
- DECODE_HEAD;
struct nl4_server *ns_dummy;
- int i, count;
+ u32 consecutive, i, count;
+ __be32 status;
- status = nfsd4_decode_stateid(argp, &copy->cp_src_stateid);
+ status = nfsd4_decode_stateid4(argp, &copy->cp_src_stateid);
if (status)
return status;
- status = nfsd4_decode_stateid(argp, &copy->cp_dst_stateid);
+ status = nfsd4_decode_stateid4(argp, &copy->cp_dst_stateid);
if (status)
return status;
+ if (xdr_stream_decode_u64(argp->xdr, &copy->cp_src_pos) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u64(argp->xdr, &copy->cp_dst_pos) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u64(argp->xdr, &copy->cp_count) < 0)
+ return nfserr_bad_xdr;
+ /* ca_consecutive: we always do consecutive copies */
+ if (xdr_stream_decode_u32(argp->xdr, &consecutive) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u32(argp->xdr, &copy->cp_synchronous) < 0)
+ return nfserr_bad_xdr;
- READ_BUF(8 + 8 + 8 + 4 + 4 + 4);
- p = xdr_decode_hyper(p, &copy->cp_src_pos);
- p = xdr_decode_hyper(p, &copy->cp_dst_pos);
- p = xdr_decode_hyper(p, &copy->cp_count);
- p++; /* ca_consecutive: we always do consecutive copies */
- copy->cp_synchronous = be32_to_cpup(p++);
-
- count = be32_to_cpup(p++);
-
+ if (xdr_stream_decode_u32(argp->xdr, &count) < 0)
+ return nfserr_bad_xdr;
copy->cp_intra = false;
if (count == 0) { /* intra-server copy */
copy->cp_intra = true;
- goto intra;
+ return nfs_ok;
}
- /* decode all the supplied server addresses but use first */
+ /* decode all the supplied server addresses but use only the first */
status = nfsd4_decode_nl4_server(argp, &copy->cp_src);
if (status)
return status;
ns_dummy = kmalloc(sizeof(struct nl4_server), GFP_KERNEL);
if (ns_dummy == NULL)
- return nfserrno(-ENOMEM);
+ return nfserrno(-ENOMEM); /* XXX: jukebox? */
for (i = 0; i < count - 1; i++) {
status = nfsd4_decode_nl4_server(argp, ns_dummy);
if (status) {
@@ -1839,44 +1936,64 @@ nfsd4_decode_copy(struct nfsd4_compoundargs *argp, struct nfsd4_copy *copy)
}
}
kfree(ns_dummy);
-intra:
- DECODE_TAIL;
+ return nfs_ok;
+}
+
+static __be32
+nfsd4_decode_copy_notify(struct nfsd4_compoundargs *argp,
+ struct nfsd4_copy_notify *cn)
+{
+ __be32 status;
+
+ status = nfsd4_decode_stateid4(argp, &cn->cpn_src_stateid);
+ if (status)
+ return status;
+ return nfsd4_decode_nl4_server(argp, &cn->cpn_dst);
}
static __be32
nfsd4_decode_offload_status(struct nfsd4_compoundargs *argp,
struct nfsd4_offload_status *os)
{
- return nfsd4_decode_stateid(argp, &os->stateid);
+ return nfsd4_decode_stateid4(argp, &os->stateid);
}
static __be32
-nfsd4_decode_copy_notify(struct nfsd4_compoundargs *argp,
- struct nfsd4_copy_notify *cn)
+nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek)
{
__be32 status;
- status = nfsd4_decode_stateid(argp, &cn->cpn_src_stateid);
+ status = nfsd4_decode_stateid4(argp, &seek->seek_stateid);
if (status)
return status;
- return nfsd4_decode_nl4_server(argp, &cn->cpn_dst);
+ if (xdr_stream_decode_u64(argp->xdr, &seek->seek_offset) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u32(argp->xdr, &seek->seek_whence) < 0)
+ return nfserr_bad_xdr;
+
+ return nfs_ok;
}
static __be32
-nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek)
+nfsd4_decode_clone(struct nfsd4_compoundargs *argp, struct nfsd4_clone *clone)
{
- DECODE_HEAD;
+ __be32 status;
- status = nfsd4_decode_stateid(argp, &seek->seek_stateid);
+ status = nfsd4_decode_stateid4(argp, &clone->cl_src_stateid);
if (status)
return status;
+ status = nfsd4_decode_stateid4(argp, &clone->cl_dst_stateid);
+ if (status)
+ return status;
+ if (xdr_stream_decode_u64(argp->xdr, &clone->cl_src_pos) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u64(argp->xdr, &clone->cl_dst_pos) < 0)
+ return nfserr_bad_xdr;
+ if (xdr_stream_decode_u64(argp->xdr, &clone->cl_count) < 0)
+ return nfserr_bad_xdr;
- READ_BUF(8 + 4);
- p = xdr_decode_hyper(p, &seek->seek_offset);
- seek->seek_whence = be32_to_cpup(p);
-
- DECODE_TAIL;
+ return nfs_ok;
}
/*
@@ -1889,13 +2006,14 @@ nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek)
*/
/*
- * Decode data into buffer. Uses head and pages constructed by
- * svcxdr_construct_vector.
+ * Decode data into buffer.
*/
static __be32
-nfsd4_vbuf_from_vector(struct nfsd4_compoundargs *argp, struct kvec *head,
- struct page **pages, char **bufp, u32 buflen)
+nfsd4_vbuf_from_vector(struct nfsd4_compoundargs *argp, struct xdr_buf *xdr,
+ char **bufp, u32 buflen)
{
+ struct page **pages = xdr->pages;
+ struct kvec *head = xdr->head;
char *tmp, *dp;
u32 len;
@@ -1938,25 +2056,22 @@ nfsd4_vbuf_from_vector(struct nfsd4_compoundargs *argp, struct kvec *head,
static __be32
nfsd4_decode_xattr_name(struct nfsd4_compoundargs *argp, char **namep)
{
- DECODE_HEAD;
char *name, *sp, *dp;
u32 namelen, cnt;
+ __be32 *p;
- READ_BUF(4);
- namelen = be32_to_cpup(p++);
-
+ if (xdr_stream_decode_u32(argp->xdr, &namelen) < 0)
+ return nfserr_bad_xdr;
if (namelen > (XATTR_NAME_MAX - XATTR_USER_PREFIX_LEN))
return nfserr_nametoolong;
-
if (namelen == 0)
- goto xdr_error;
-
- READ_BUF(namelen);
-
+ return nfserr_bad_xdr;
+ p = xdr_inline_decode(argp->xdr, namelen);
+ if (!p)
+ return nfserr_bad_xdr;
name = svcxdr_tmpalloc(argp, namelen + XATTR_USER_PREFIX_LEN + 1);
if (!name)
return nfserr_jukebox;
-
memcpy(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN);
/*
@@ -1969,14 +2084,14 @@ nfsd4_decode_xattr_name(struct nfsd4_compoundargs *argp, char **namep)
while (cnt-- > 0) {
if (*sp == '\0')
- goto xdr_error;
+ return nfserr_bad_xdr;
*dp++ = *sp++;
}
*dp = '\0';
*namep = name;
- DECODE_TAIL;
+ return nfs_ok;
}
/*
@@ -2008,13 +2123,11 @@ static __be32
nfsd4_decode_setxattr(struct nfsd4_compoundargs *argp,
struct nfsd4_setxattr *setxattr)
{
- DECODE_HEAD;
u32 flags, maxcount, size;
- struct kvec head;
- struct page **pagelist;
+ __be32 status;
- READ_BUF(4);
- flags = be32_to_cpup(p++);
+ if (xdr_stream_decode_u32(argp->xdr, &flags) < 0)
+ return nfserr_bad_xdr;
if (flags > SETXATTR4_REPLACE)
return nfserr_inval;
@@ -2027,33 +2140,32 @@ nfsd4_decode_setxattr(struct nfsd4_compoundargs *argp,
maxcount = svc_max_payload(argp->rqstp);
maxcount = min_t(u32, XATTR_SIZE_MAX, maxcount);
- READ_BUF(4);
- size = be32_to_cpup(p++);
+ if (xdr_stream_decode_u32(argp->xdr, &size) < 0)
+ return nfserr_bad_xdr;
if (size > maxcount)
return nfserr_xattr2big;
setxattr->setxa_len = size;
if (size > 0) {
- status = svcxdr_construct_vector(argp, &head, &pagelist, size);
- if (status)
- return status;
+ struct xdr_buf payload;
- status = nfsd4_vbuf_from_vector(argp, &head, pagelist,
- &setxattr->setxa_buf, size);
+ if (!xdr_stream_subsegment(argp->xdr, &payload, size))
+ return nfserr_bad_xdr;
+ status = nfsd4_vbuf_from_vector(argp, &payload,
+ &setxattr->setxa_buf, size);
}
- DECODE_TAIL;
+ return nfs_ok;
}
static __be32
nfsd4_decode_listxattrs(struct nfsd4_compoundargs *argp,
struct nfsd4_listxattrs *listxattrs)
{
- DECODE_HEAD;
u32 maxcount;
- READ_BUF(12);
- p = xdr_decode_hyper(p, &listxattrs->lsxa_cookie);
+ if (xdr_stream_decode_u64(argp->xdr, &listxattrs->lsxa_cookie) < 0)
+ return nfserr_bad_xdr;
/*
* If the cookie is too large to have even one user.x attribute
@@ -2063,7 +2175,8 @@ nfsd4_decode_listxattrs(struct nfsd4_compoundargs *argp,
(XATTR_LIST_MAX / (XATTR_USER_PREFIX_LEN + 2)))
return nfserr_badcookie;
- maxcount = be32_to_cpup(p++);
+ if (xdr_stream_decode_u32(argp->xdr, &maxcount) < 0)
+ return nfserr_bad_xdr;
if (maxcount < 8)
/* Always need at least 2 words (length and one character) */
return nfserr_inval;
@@ -2071,7 +2184,7 @@ nfsd4_decode_listxattrs(struct nfsd4_compoundargs *argp,
maxcount = min(maxcount, svc_max_payload(argp->rqstp));
listxattrs->lsxa_maxcount = maxcount;
- DECODE_TAIL;
+ return nfs_ok;
}
static __be32
@@ -2198,43 +2311,54 @@ nfsd4_opnum_in_range(struct nfsd4_compoundargs *argp, struct nfsd4_op *op)
return true;
}
-static __be32
+static int
nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
{
- DECODE_HEAD;
struct nfsd4_op *op;
bool cachethis = false;
int auth_slack= argp->rqstp->rq_auth_slack;
int max_reply = auth_slack + 8; /* opcnt, status */
int readcount = 0;
int readbytes = 0;
+ __be32 *p;
int i;
- READ_BUF(4);
- argp->taglen = be32_to_cpup(p++);
- READ_BUF(argp->taglen);
- SAVEMEM(argp->tag, argp->taglen);
- READ_BUF(8);
- argp->minorversion = be32_to_cpup(p++);
- argp->opcnt = be32_to_cpup(p++);
- max_reply += 4 + (XDR_QUADLEN(argp->taglen) << 2);
-
- if (argp->taglen > NFSD4_MAX_TAGLEN)
- goto xdr_error;
+ if (xdr_stream_decode_u32(argp->xdr, &argp->taglen) < 0)
+ return 0;
+ max_reply += XDR_UNIT;
+ argp->tag = NULL;
+ if (unlikely(argp->taglen)) {
+ if (argp->taglen > NFSD4_MAX_TAGLEN)
+ return 0;
+ p = xdr_inline_decode(argp->xdr, argp->taglen);
+ if (!p)
+ return 0;
+ argp->tag = svcxdr_tmpalloc(argp, argp->taglen);
+ if (!argp->tag)
+ return 0;
+ memcpy(argp->tag, p, argp->taglen);
+ max_reply += xdr_align_size(argp->taglen);
+ }
+
+ if (xdr_stream_decode_u32(argp->xdr, &argp->minorversion) < 0)
+ return 0;
+ if (xdr_stream_decode_u32(argp->xdr, &argp->opcnt) < 0)
+ return 0;
+
/*
* NFS4ERR_RESOURCE is a more helpful error than GARBAGE_ARGS
* here, so we return success at the xdr level so that
* nfsd4_proc can handle this is an NFS-level error.
*/
if (argp->opcnt > NFSD_MAX_OPS_PER_COMPOUND)
- return 0;
+ return 1;
if (argp->opcnt > ARRAY_SIZE(argp->iops)) {
argp->ops = kzalloc(argp->opcnt * sizeof(*argp->ops), GFP_KERNEL);
if (!argp->ops) {
argp->ops = argp->iops;
dprintk("nfsd: couldn't allocate room for COMPOUND\n");
- goto xdr_error;
+ return 0;
}
}
@@ -2245,12 +2369,16 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
op = &argp->ops[i];
op->replay = NULL;
- READ_BUF(4);
- op->opnum = be32_to_cpup(p++);
-
- if (nfsd4_opnum_in_range(argp, op))
+ if (xdr_stream_decode_u32(argp->xdr, &op->opnum) < 0)
+ return 0;
+ if (nfsd4_opnum_in_range(argp, op)) {
op->status = nfsd4_dec_ops[op->opnum](argp, &op->u);
- else {
+ if (op->status != nfs_ok)
+ trace_nfsd_compound_decode_err(argp->rqstp,
+ argp->opcnt, i,
+ op->opnum,
+ op->status);
+ } else {
op->opnum = OP_ILLEGAL;
op->status = nfserr_op_illegal;
}
@@ -2289,7 +2417,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
if (readcount > 1 || max_reply > PAGE_SIZE - auth_slack)
clear_bit(RQ_SPLICE_OK, &argp->rqstp->rq_flags);
- DECODE_TAIL;
+ return 1;
}
static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode,
@@ -2298,12 +2426,8 @@ static __be32 *encode_change(__be32 *p, struct kstat *stat, struct inode *inode,
if (exp->ex_flags & NFSEXP_V4ROOT) {
*p++ = cpu_to_be32(convert_to_wallclock(exp->cd->flush_time));
*p++ = 0;
- } else if (IS_I_VERSION(inode)) {
+ } else
p = xdr_encode_hyper(p, nfsd4_change_attribute(stat, inode));
- } else {
- *p++ = cpu_to_be32(stat->ctime.tv_sec);
- *p++ = cpu_to_be32(stat->ctime.tv_nsec);
- }
return p;
}
@@ -2335,15 +2459,8 @@ static __be32 *encode_time_delta(__be32 *p, struct inode *inode)
static __be32 *encode_cinfo(__be32 *p, struct nfsd4_change_info *c)
{
*p++ = cpu_to_be32(c->atomic);
- if (c->change_supported) {
- p = xdr_encode_hyper(p, c->before_change);
- p = xdr_encode_hyper(p, c->after_change);
- } else {
- *p++ = cpu_to_be32(c->before_ctime_sec);
- *p++ = cpu_to_be32(c->before_ctime_nsec);
- *p++ = cpu_to_be32(c->after_ctime_sec);
- *p++ = cpu_to_be32(c->after_ctime_nsec);
- }
+ p = xdr_encode_hyper(p, c->before_change);
+ p = xdr_encode_hyper(p, c->after_change);
return p;
}
@@ -2558,7 +2675,7 @@ static u32 nfs4_file_type(umode_t mode)
case S_IFREG: return NF4REG;
case S_IFSOCK: return NF4SOCK;
default: return NF4BAD;
- };
+ }
}
static inline __be32
@@ -3194,16 +3311,6 @@ out_acl:
goto out;
}
- if (bmval2 & FATTR4_WORD2_CHANGE_ATTR_TYPE) {
- p = xdr_reserve_space(xdr, 4);
- if (!p)
- goto out_resource;
- if (IS_I_VERSION(d_inode(dentry)))
- *p++ = cpu_to_be32(NFS4_CHANGE_TYPE_IS_MONOTONIC_INCR);
- else
- *p++ = cpu_to_be32(NFS4_CHANGE_TYPE_IS_TIME_METADATA);
- }
-
#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
if (bmval2 & FATTR4_WORD2_SECURITY_LABEL) {
status = nfsd4_encode_security_label(xdr, rqstp, context,
@@ -3756,8 +3863,8 @@ static __be32 nfsd4_encode_splice_read(
{
struct xdr_stream *xdr = &resp->xdr;
struct xdr_buf *buf = xdr->buf;
+ int status, space_left;
u32 eof;
- int space_left;
__be32 nfserr;
__be32 *p = xdr->p - 2;
@@ -3768,14 +3875,13 @@ static __be32 nfsd4_encode_splice_read(
nfserr = nfsd_splice_read(read->rd_rqstp, read->rd_fhp,
file, read->rd_offset, &maxcount, &eof);
read->rd_length = maxcount;
- if (nfserr) {
- /*
- * nfsd_splice_actor may have already messed with the
- * page length; reset it so as not to confuse
- * xdr_truncate_encode:
- */
- buf->page_len = 0;
- return nfserr;
+ if (nfserr)
+ goto out_err;
+ status = svc_encode_result_payload(read->rd_rqstp,
+ buf->head[0].iov_len, maxcount);
+ if (status) {
+ nfserr = nfserrno(status);
+ goto out_err;
}
*(p++) = htonl(eof);
@@ -3806,6 +3912,15 @@ static __be32 nfsd4_encode_splice_read(
xdr->end = (__be32 *)((void *)xdr->end + space_left);
return 0;
+
+out_err:
+ /*
+ * nfsd_splice_actor may have already messed with the
+ * page length; reset it so as not to confuse
+ * xdr_truncate_encode in our caller.
+ */
+ buf->page_len = 0;
+ return nfserr;
}
static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
@@ -3829,7 +3944,7 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
read->rd_length = maxcount;
if (nfserr)
return nfserr;
- if (svc_encode_read_payload(resp->rqstp, starting_len + 8, maxcount))
+ if (svc_encode_result_payload(resp->rqstp, starting_len + 8, maxcount))
return nfserr_io;
xdr_truncate_encode(xdr, starting_len + 8 + xdr_align_size(maxcount));
@@ -3897,6 +4012,7 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd
int zero = 0;
struct xdr_stream *xdr = &resp->xdr;
int length_offset = xdr->buf->len;
+ int status;
__be32 *p;
p = xdr_reserve_space(xdr, 4);
@@ -3917,9 +4033,13 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd
(char *)p, &maxcount);
if (nfserr == nfserr_isdir)
nfserr = nfserr_inval;
- if (nfserr) {
- xdr_truncate_encode(xdr, length_offset);
- return nfserr;
+ if (nfserr)
+ goto out_err;
+ status = svc_encode_result_payload(readlink->rl_rqstp, length_offset,
+ maxcount);
+ if (status) {
+ nfserr = nfserrno(status);
+ goto out_err;
}
wire_count = htonl(maxcount);
@@ -3929,6 +4049,10 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd
write_bytes_to_xdr_buf(xdr->buf, length_offset + 4 + maxcount,
&zero, 4 - (maxcount&3));
return 0;
+
+out_err:
+ xdr_truncate_encode(xdr, length_offset);
+ return nfserr;
}
static __be32
@@ -4575,7 +4699,7 @@ nfsd4_encode_copy(struct nfsd4_compoundres *resp, __be32 nfserr,
__be32 *p;
nfserr = nfsd42_encode_write_res(resp, &copy->cp_res,
- copy->cp_synchronous);
+ !!copy->cp_synchronous);
if (nfserr)
return nfserr;
@@ -5182,10 +5306,12 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
if (op->status && opdesc &&
!(opdesc->op_flags & OP_NONTRIVIAL_ERROR_ENCODE))
goto status;
- BUG_ON(op->opnum < 0 || op->opnum >= ARRAY_SIZE(nfsd4_enc_ops) ||
+ BUG_ON(op->opnum >= ARRAY_SIZE(nfsd4_enc_ops) ||
!nfsd4_enc_ops[op->opnum]);
encoder = nfsd4_enc_ops[op->opnum];
op->status = encoder(resp, op->status, &op->u);
+ if (op->status)
+ trace_nfsd_compound_encode_err(rqstp, op->opnum, op->status);
if (opdesc && opdesc->op_release)
opdesc->op_release(&op->u);
xdr_commit_encode(xdr);
@@ -5254,12 +5380,6 @@ nfsd4_encode_replay(struct xdr_stream *xdr, struct nfsd4_op *op)
p = xdr_encode_opaque_fixed(p, rp->rp_buf, rp->rp_buflen);
}
-int
-nfs4svc_encode_voidres(struct svc_rqst *rqstp, __be32 *p)
-{
- return xdr_ressize_check(rqstp, p);
-}
-
void nfsd4_release_compoundargs(struct svc_rqst *rqstp)
{
struct nfsd4_compoundargs *args = rqstp->rq_argp;
@@ -5268,8 +5388,6 @@ void nfsd4_release_compoundargs(struct svc_rqst *rqstp)
kfree(args->ops);
args->ops = args->iops;
}
- kfree(args->tmpp);
- args->tmpp = NULL;
while (args->to_free) {
struct svcxdr_tmpbuf *tb = args->to_free;
args->to_free = tb->next;
@@ -5278,33 +5396,18 @@ void nfsd4_release_compoundargs(struct svc_rqst *rqstp)
}
int
-nfs4svc_decode_voidarg(struct svc_rqst *rqstp, __be32 *p)
-{
- return 1;
-}
-
-int
nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, __be32 *p)
{
struct nfsd4_compoundargs *args = rqstp->rq_argp;
- if (rqstp->rq_arg.head[0].iov_len % 4) {
- /* client is nuts */
- dprintk("%s: compound not properly padded! (peeraddr=%pISc xid=0x%x)",
- __func__, svc_addr(rqstp), be32_to_cpu(rqstp->rq_xid));
- return 0;
- }
- args->p = p;
- args->end = rqstp->rq_arg.head[0].iov_base + rqstp->rq_arg.head[0].iov_len;
- args->pagelist = rqstp->rq_arg.pages;
- args->pagelen = rqstp->rq_arg.page_len;
- args->tail = false;
- args->tmpp = NULL;
+ /* svcxdr_tmp_alloc */
args->to_free = NULL;
+
+ args->xdr = &rqstp->rq_arg_stream;
args->ops = args->iops;
args->rqstp = rqstp;
- return !nfsd4_decode_compound(args);
+ return nfsd4_decode_compound(args);
}
int
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index cb742e17e04a..d63cf8196fed 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -74,6 +74,14 @@ extern unsigned long nfsd_drc_mem_used;
extern const struct seq_operations nfs_exports_op;
/*
+ * Common void argument and result helpers
+ */
+struct nfsd_voidargs { };
+struct nfsd_voidres { };
+int nfssvc_decode_voidarg(struct svc_rqst *rqstp, __be32 *p);
+int nfssvc_encode_voidres(struct svc_rqst *rqstp, __be32 *p);
+
+/*
* Function prototypes.
*/
int nfsd_svc(int nrservs, struct net *net, const struct cred *cred);
@@ -387,7 +395,6 @@ void nfsd_lockd_shutdown(void);
#define NFSD4_2_SUPPORTED_ATTRS_WORD2 \
(NFSD4_1_SUPPORTED_ATTRS_WORD2 | \
- FATTR4_WORD2_CHANGE_ATTR_TYPE | \
FATTR4_WORD2_MODE_UMASK | \
NFSD4_2_SECURITY_ATTRS | \
FATTR4_WORD2_XATTR_SUPPORT)
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index c81dbbad8792..66f2ef67792a 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -268,12 +268,20 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
if (fileid_type == FILEID_ROOT)
dentry = dget(exp->ex_path.dentry);
else {
- dentry = exportfs_decode_fh(exp->ex_path.mnt, fid,
- data_left, fileid_type,
- nfsd_acceptable, exp);
- if (IS_ERR_OR_NULL(dentry))
+ dentry = exportfs_decode_fh_raw(exp->ex_path.mnt, fid,
+ data_left, fileid_type,
+ nfsd_acceptable, exp);
+ if (IS_ERR_OR_NULL(dentry)) {
trace_nfsd_set_fh_dentry_badhandle(rqstp, fhp,
dentry ? PTR_ERR(dentry) : -ESTALE);
+ switch (PTR_ERR(dentry)) {
+ case -ENOMEM:
+ case -ETIMEDOUT:
+ break;
+ default:
+ dentry = ERR_PTR(-ESTALE);
+ }
+ }
}
if (dentry == NULL)
goto out;
@@ -291,6 +299,20 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp)
fhp->fh_dentry = dentry;
fhp->fh_export = exp;
+
+ switch (rqstp->rq_vers) {
+ case 4:
+ if (dentry->d_sb->s_export_op->flags & EXPORT_OP_NOATOMIC_ATTR)
+ fhp->fh_no_atomic_attr = true;
+ break;
+ case 3:
+ if (dentry->d_sb->s_export_op->flags & EXPORT_OP_NOWCC)
+ fhp->fh_no_wcc = true;
+ break;
+ case 2:
+ fhp->fh_no_wcc = true;
+ }
+
return 0;
out:
exp_put(exp);
@@ -559,6 +581,9 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry,
*/
set_version_and_fsid_type(fhp, exp, ref_fh);
+ /* If we have a ref_fh, then copy the fh_no_wcc setting from it. */
+ fhp->fh_no_wcc = ref_fh ? ref_fh->fh_no_wcc : false;
+
if (ref_fh == fhp)
fh_put(ref_fh);
@@ -662,6 +687,7 @@ fh_put(struct svc_fh *fhp)
exp_put(exp);
fhp->fh_export = NULL;
}
+ fhp->fh_no_wcc = false;
return;
}
diff --git a/fs/nfsd/nfsfh.h b/fs/nfsd/nfsfh.h
index 56cfbc361561..cb20c2cd3469 100644
--- a/fs/nfsd/nfsfh.h
+++ b/fs/nfsd/nfsfh.h
@@ -35,6 +35,12 @@ typedef struct svc_fh {
bool fh_locked; /* inode locked by us */
bool fh_want_write; /* remount protection taken */
+ bool fh_no_wcc; /* no wcc data needed */
+ bool fh_no_atomic_attr;
+ /*
+ * wcc data is not atomic with
+ * operation
+ */
int fh_flags; /* FH flags */
#ifdef CONFIG_NFSD_V3
bool fh_post_saved; /* post-op attrs saved */
@@ -54,7 +60,6 @@ typedef struct svc_fh {
struct kstat fh_post_attr; /* full attrs after operation */
u64 fh_post_change; /* nfsv4 change; see above */
#endif /* CONFIG_NFSD_V3 */
-
} svc_fh;
#define NFSD4_FH_FOREIGN (1<<0)
#define SET_FH_FLAG(c, f) ((c)->fh_flags |= (f))
@@ -259,13 +264,16 @@ fh_clear_wcc(struct svc_fh *fhp)
static inline u64 nfsd4_change_attribute(struct kstat *stat,
struct inode *inode)
{
- u64 chattr;
-
- chattr = stat->ctime.tv_sec;
- chattr <<= 30;
- chattr += stat->ctime.tv_nsec;
- chattr += inode_query_iversion(inode);
- return chattr;
+ if (IS_I_VERSION(inode)) {
+ u64 chattr;
+
+ chattr = stat->ctime.tv_sec;
+ chattr <<= 30;
+ chattr += stat->ctime.tv_nsec;
+ chattr += inode_query_iversion(inode);
+ return chattr;
+ } else
+ return time_to_chattr(&stat->ctime);
}
extern void fill_pre_wcc(struct svc_fh *fhp);
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index 0d71549f9d42..9473d048efec 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -609,7 +609,6 @@ nfsd_proc_statfs(struct svc_rqst *rqstp)
* NFSv2 Server procedures.
* Only the results of non-idempotent operations are cached.
*/
-struct nfsd_void { int dummy; };
#define ST 1 /* status */
#define FH 8 /* filehandle */
@@ -618,10 +617,10 @@ struct nfsd_void { int dummy; };
static const struct svc_procedure nfsd_procedures2[18] = {
[NFSPROC_NULL] = {
.pc_func = nfsd_proc_null,
- .pc_decode = nfssvc_decode_void,
- .pc_encode = nfssvc_encode_void,
- .pc_argsize = sizeof(struct nfsd_void),
- .pc_ressize = sizeof(struct nfsd_void),
+ .pc_decode = nfssvc_decode_voidarg,
+ .pc_encode = nfssvc_encode_voidres,
+ .pc_argsize = sizeof(struct nfsd_voidargs),
+ .pc_ressize = sizeof(struct nfsd_voidres),
.pc_cachetype = RC_NOCACHE,
.pc_xdrressize = 0,
},
@@ -647,10 +646,10 @@ static const struct svc_procedure nfsd_procedures2[18] = {
},
[NFSPROC_ROOT] = {
.pc_func = nfsd_proc_root,
- .pc_decode = nfssvc_decode_void,
- .pc_encode = nfssvc_encode_void,
- .pc_argsize = sizeof(struct nfsd_void),
- .pc_ressize = sizeof(struct nfsd_void),
+ .pc_decode = nfssvc_decode_voidarg,
+ .pc_encode = nfssvc_encode_voidres,
+ .pc_argsize = sizeof(struct nfsd_voidargs),
+ .pc_ressize = sizeof(struct nfsd_voidres),
.pc_cachetype = RC_NOCACHE,
.pc_xdrressize = 0,
},
@@ -685,10 +684,10 @@ static const struct svc_procedure nfsd_procedures2[18] = {
},
[NFSPROC_WRITECACHE] = {
.pc_func = nfsd_proc_writecache,
- .pc_decode = nfssvc_decode_void,
- .pc_encode = nfssvc_encode_void,
- .pc_argsize = sizeof(struct nfsd_void),
- .pc_ressize = sizeof(struct nfsd_void),
+ .pc_decode = nfssvc_decode_voidarg,
+ .pc_encode = nfssvc_encode_voidres,
+ .pc_argsize = sizeof(struct nfsd_voidargs),
+ .pc_ressize = sizeof(struct nfsd_voidres),
.pc_cachetype = RC_NOCACHE,
.pc_xdrressize = 0,
},
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 27b1ad136150..00384c332f9b 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -29,6 +29,8 @@
#include "netns.h"
#include "filecache.h"
+#include "trace.h"
+
#define NFSDDBG_FACILITY NFSDDBG_SVC
bool inter_copy_offload_enable;
@@ -527,8 +529,7 @@ static void nfsd_last_thread(struct svc_serv *serv, struct net *net)
return;
nfsd_shutdown_net(net);
- printk(KERN_WARNING "nfsd: last server has exited, flushing export "
- "cache\n");
+ pr_info("nfsd: last server has exited, flushing export cache\n");
nfsd_export_flush(net);
}
@@ -1009,17 +1010,16 @@ int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
struct kvec *resv = &rqstp->rq_res.head[0];
__be32 *p;
- dprintk("nfsd_dispatch: vers %d proc %d\n",
- rqstp->rq_vers, rqstp->rq_proc);
-
if (nfs_request_too_big(rqstp, proc))
- goto out_too_large;
+ goto out_decode_err;
/*
* Give the xdr decoder a chance to change this if it wants
* (necessary in the NFSv4.0 compound case)
*/
rqstp->rq_cachetype = proc->pc_cachetype;
+
+ svcxdr_init_decode(rqstp);
if (!proc->pc_decode(rqstp, argv->iov_base))
goto out_decode_err;
@@ -1050,29 +1050,51 @@ int nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
out_cached_reply:
return 1;
-out_too_large:
- dprintk("nfsd: NFSv%d argument too large\n", rqstp->rq_vers);
- *statp = rpc_garbage_args;
- return 1;
-
out_decode_err:
- dprintk("nfsd: failed to decode arguments!\n");
+ trace_nfsd_garbage_args_err(rqstp);
*statp = rpc_garbage_args;
return 1;
out_update_drop:
- dprintk("nfsd: Dropping request; may be revisited later\n");
nfsd_cache_update(rqstp, RC_NOCACHE, NULL);
out_dropit:
return 0;
out_encode_err:
- dprintk("nfsd: failed to encode result!\n");
+ trace_nfsd_cant_encode_err(rqstp);
nfsd_cache_update(rqstp, RC_NOCACHE, NULL);
*statp = rpc_system_err;
return 1;
}
+/**
+ * nfssvc_decode_voidarg - Decode void arguments
+ * @rqstp: Server RPC transaction context
+ * @p: buffer containing arguments to decode
+ *
+ * Return values:
+ * %0: Arguments were not valid
+ * %1: Decoding was successful
+ */
+int nfssvc_decode_voidarg(struct svc_rqst *rqstp, __be32 *p)
+{
+ return 1;
+}
+
+/**
+ * nfssvc_encode_voidres - Encode void results
+ * @rqstp: Server RPC transaction context
+ * @p: buffer in which to encode results
+ *
+ * Return values:
+ * %0: Local error while encoding
+ * %1: Encoding was successful
+ */
+int nfssvc_encode_voidres(struct svc_rqst *rqstp, __be32 *p)
+{
+ return xdr_ressize_check(rqstp, p);
+}
+
int nfsd_pool_stats_open(struct inode *inode, struct file *file)
{
int ret;
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index 8a288c8fcd57..7aa6e8aca2c1 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -192,11 +192,6 @@ __be32 *nfs2svc_encode_fattr(struct svc_rqst *rqstp, __be32 *p, struct svc_fh *f
/*
* XDR decode functions
*/
-int
-nfssvc_decode_void(struct svc_rqst *rqstp, __be32 *p)
-{
- return xdr_argsize_check(rqstp, p);
-}
int
nfssvc_decode_fhandle(struct svc_rqst *rqstp, __be32 *p)
@@ -423,11 +418,6 @@ nfssvc_decode_readdirargs(struct svc_rqst *rqstp, __be32 *p)
/*
* XDR encode functions
*/
-int
-nfssvc_encode_void(struct svc_rqst *rqstp, __be32 *p)
-{
- return xdr_ressize_check(rqstp, p);
-}
int
nfssvc_encode_stat(struct svc_rqst *rqstp, __be32 *p)
@@ -469,6 +459,7 @@ int
nfssvc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p)
{
struct nfsd_readlinkres *resp = rqstp->rq_resp;
+ struct kvec *head = rqstp->rq_res.head;
*p++ = resp->status;
if (resp->status != nfs_ok)
@@ -483,6 +474,8 @@ nfssvc_encode_readlinkres(struct svc_rqst *rqstp, __be32 *p)
*p = 0;
rqstp->rq_res.tail[0].iov_len = 4 - (resp->len&3);
}
+ if (svc_encode_result_payload(rqstp, head->iov_len, resp->len))
+ return 0;
return 1;
}
@@ -490,6 +483,7 @@ int
nfssvc_encode_readres(struct svc_rqst *rqstp, __be32 *p)
{
struct nfsd_readres *resp = rqstp->rq_resp;
+ struct kvec *head = rqstp->rq_res.head;
*p++ = resp->status;
if (resp->status != nfs_ok)
@@ -507,6 +501,8 @@ nfssvc_encode_readres(struct svc_rqst *rqstp, __be32 *p)
*p = 0;
rqstp->rq_res.tail[0].iov_len = 4 - (resp->count&3);
}
+ if (svc_encode_result_payload(rqstp, head->iov_len, resp->count))
+ return 0;
return 1;
}
diff --git a/fs/nfsd/trace.c b/fs/nfsd/trace.c
index 90967466a1e5..f008b95ceec2 100644
--- a/fs/nfsd/trace.c
+++ b/fs/nfsd/trace.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#define CREATE_TRACE_POINTS
#include "trace.h"
diff --git a/fs/nfsd/trace.h b/fs/nfsd/trace.h
index 99bf07800cd0..92a0973dd671 100644
--- a/fs/nfsd/trace.h
+++ b/fs/nfsd/trace.h
@@ -12,6 +12,100 @@
#include "export.h"
#include "nfsfh.h"
+#define NFSD_TRACE_PROC_ARG_FIELDS \
+ __field(unsigned int, netns_ino) \
+ __field(u32, xid) \
+ __array(unsigned char, server, sizeof(struct sockaddr_in6)) \
+ __array(unsigned char, client, sizeof(struct sockaddr_in6))
+
+#define NFSD_TRACE_PROC_ARG_ASSIGNMENTS \
+ do { \
+ __entry->netns_ino = SVC_NET(rqstp)->ns.inum; \
+ __entry->xid = be32_to_cpu(rqstp->rq_xid); \
+ memcpy(__entry->server, &rqstp->rq_xprt->xpt_local, \
+ rqstp->rq_xprt->xpt_locallen); \
+ memcpy(__entry->client, &rqstp->rq_xprt->xpt_remote, \
+ rqstp->rq_xprt->xpt_remotelen); \
+ } while (0);
+
+#define NFSD_TRACE_PROC_RES_FIELDS \
+ __field(unsigned int, netns_ino) \
+ __field(u32, xid) \
+ __field(unsigned long, status) \
+ __array(unsigned char, server, sizeof(struct sockaddr_in6)) \
+ __array(unsigned char, client, sizeof(struct sockaddr_in6))
+
+#define NFSD_TRACE_PROC_RES_ASSIGNMENTS(error) \
+ do { \
+ __entry->netns_ino = SVC_NET(rqstp)->ns.inum; \
+ __entry->xid = be32_to_cpu(rqstp->rq_xid); \
+ __entry->status = be32_to_cpu(error); \
+ memcpy(__entry->server, &rqstp->rq_xprt->xpt_local, \
+ rqstp->rq_xprt->xpt_locallen); \
+ memcpy(__entry->client, &rqstp->rq_xprt->xpt_remote, \
+ rqstp->rq_xprt->xpt_remotelen); \
+ } while (0);
+
+TRACE_EVENT(nfsd_garbage_args_err,
+ TP_PROTO(
+ const struct svc_rqst *rqstp
+ ),
+ TP_ARGS(rqstp),
+ TP_STRUCT__entry(
+ NFSD_TRACE_PROC_ARG_FIELDS
+
+ __field(u32, vers)
+ __field(u32, proc)
+ ),
+ TP_fast_assign(
+ NFSD_TRACE_PROC_ARG_ASSIGNMENTS
+
+ __entry->vers = rqstp->rq_vers;
+ __entry->proc = rqstp->rq_proc;
+ ),
+ TP_printk("xid=0x%08x vers=%u proc=%u",
+ __entry->xid, __entry->vers, __entry->proc
+ )
+);
+
+TRACE_EVENT(nfsd_cant_encode_err,
+ TP_PROTO(
+ const struct svc_rqst *rqstp
+ ),
+ TP_ARGS(rqstp),
+ TP_STRUCT__entry(
+ NFSD_TRACE_PROC_ARG_FIELDS
+
+ __field(u32, vers)
+ __field(u32, proc)
+ ),
+ TP_fast_assign(
+ NFSD_TRACE_PROC_ARG_ASSIGNMENTS
+
+ __entry->vers = rqstp->rq_vers;
+ __entry->proc = rqstp->rq_proc;
+ ),
+ TP_printk("xid=0x%08x vers=%u proc=%u",
+ __entry->xid, __entry->vers, __entry->proc
+ )
+);
+
+#define show_nfsd_may_flags(x) \
+ __print_flags(x, "|", \
+ { NFSD_MAY_EXEC, "EXEC" }, \
+ { NFSD_MAY_WRITE, "WRITE" }, \
+ { NFSD_MAY_READ, "READ" }, \
+ { NFSD_MAY_SATTR, "SATTR" }, \
+ { NFSD_MAY_TRUNC, "TRUNC" }, \
+ { NFSD_MAY_LOCK, "LOCK" }, \
+ { NFSD_MAY_OWNER_OVERRIDE, "OWNER_OVERRIDE" }, \
+ { NFSD_MAY_LOCAL_ACCESS, "LOCAL_ACCESS" }, \
+ { NFSD_MAY_BYPASS_GSS_ON_ROOT, "BYPASS_GSS_ON_ROOT" }, \
+ { NFSD_MAY_NOT_BREAK_LEASE, "NOT_BREAK_LEASE" }, \
+ { NFSD_MAY_BYPASS_GSS, "BYPASS_GSS" }, \
+ { NFSD_MAY_READ_IF_EXEC, "READ_IF_EXEC" }, \
+ { NFSD_MAY_64BIT_COOKIE, "64BIT_COOKIE" })
+
TRACE_EVENT(nfsd_compound,
TP_PROTO(const struct svc_rqst *rqst,
u32 args_opcnt),
@@ -51,6 +145,56 @@ TRACE_EVENT(nfsd_compound_status,
__get_str(name), __entry->status)
)
+TRACE_EVENT(nfsd_compound_decode_err,
+ TP_PROTO(
+ const struct svc_rqst *rqstp,
+ u32 args_opcnt,
+ u32 resp_opcnt,
+ u32 opnum,
+ __be32 status
+ ),
+ TP_ARGS(rqstp, args_opcnt, resp_opcnt, opnum, status),
+ TP_STRUCT__entry(
+ NFSD_TRACE_PROC_RES_FIELDS
+
+ __field(u32, args_opcnt)
+ __field(u32, resp_opcnt)
+ __field(u32, opnum)
+ ),
+ TP_fast_assign(
+ NFSD_TRACE_PROC_RES_ASSIGNMENTS(status)
+
+ __entry->args_opcnt = args_opcnt;
+ __entry->resp_opcnt = resp_opcnt;
+ __entry->opnum = opnum;
+ ),
+ TP_printk("op=%u/%u opnum=%u status=%lu",
+ __entry->resp_opcnt, __entry->args_opcnt,
+ __entry->opnum, __entry->status)
+);
+
+TRACE_EVENT(nfsd_compound_encode_err,
+ TP_PROTO(
+ const struct svc_rqst *rqstp,
+ u32 opnum,
+ __be32 status
+ ),
+ TP_ARGS(rqstp, opnum, status),
+ TP_STRUCT__entry(
+ NFSD_TRACE_PROC_RES_FIELDS
+
+ __field(u32, opnum)
+ ),
+ TP_fast_assign(
+ NFSD_TRACE_PROC_RES_ASSIGNMENTS(status)
+
+ __entry->opnum = opnum;
+ ),
+ TP_printk("opnum=%u status=%lu",
+ __entry->opnum, __entry->status)
+);
+
+
DECLARE_EVENT_CLASS(nfsd_fh_err_class,
TP_PROTO(struct svc_rqst *rqstp,
struct svc_fh *fhp,
@@ -421,6 +565,9 @@ TRACE_EVENT(nfsd_clid_inuse_err,
__entry->cl_boot, __entry->cl_id)
)
+/*
+ * from fs/nfsd/filecache.h
+ */
TRACE_DEFINE_ENUM(NFSD_FILE_HASHED);
TRACE_DEFINE_ENUM(NFSD_FILE_PENDING);
TRACE_DEFINE_ENUM(NFSD_FILE_BREAK_READ);
@@ -435,13 +582,6 @@ TRACE_DEFINE_ENUM(NFSD_FILE_REFERENCED);
{ 1 << NFSD_FILE_BREAK_WRITE, "BREAK_WRITE" }, \
{ 1 << NFSD_FILE_REFERENCED, "REFERENCED"})
-/* FIXME: This should probably be fleshed out in the future. */
-#define show_nf_may(val) \
- __print_flags(val, "|", \
- { NFSD_MAY_READ, "READ" }, \
- { NFSD_MAY_WRITE, "WRITE" }, \
- { NFSD_MAY_NOT_BREAK_LEASE, "NOT_BREAK_LEASE" })
-
DECLARE_EVENT_CLASS(nfsd_file_class,
TP_PROTO(struct nfsd_file *nf),
TP_ARGS(nf),
@@ -461,12 +601,12 @@ DECLARE_EVENT_CLASS(nfsd_file_class,
__entry->nf_may = nf->nf_may;
__entry->nf_file = nf->nf_file;
),
- TP_printk("hash=0x%x inode=0x%p ref=%d flags=%s may=%s file=%p",
+ TP_printk("hash=0x%x inode=%p ref=%d flags=%s may=%s file=%p",
__entry->nf_hashval,
__entry->nf_inode,
__entry->nf_ref,
show_nf_flags(__entry->nf_flags),
- show_nf_may(__entry->nf_may),
+ show_nfsd_may_flags(__entry->nf_may),
__entry->nf_file)
)
@@ -492,10 +632,10 @@ TRACE_EVENT(nfsd_file_acquire,
__field(u32, xid)
__field(unsigned int, hash)
__field(void *, inode)
- __field(unsigned int, may_flags)
+ __field(unsigned long, may_flags)
__field(int, nf_ref)
__field(unsigned long, nf_flags)
- __field(unsigned char, nf_may)
+ __field(unsigned long, nf_may)
__field(struct file *, nf_file)
__field(u32, status)
),
@@ -512,12 +652,12 @@ TRACE_EVENT(nfsd_file_acquire,
__entry->status = be32_to_cpu(status);
),
- TP_printk("xid=0x%x hash=0x%x inode=0x%p may_flags=%s ref=%d nf_flags=%s nf_may=%s nf_file=0x%p status=%u",
+ TP_printk("xid=0x%x hash=0x%x inode=%p may_flags=%s ref=%d nf_flags=%s nf_may=%s nf_file=%p status=%u",
__entry->xid, __entry->hash, __entry->inode,
- show_nf_may(__entry->may_flags), __entry->nf_ref,
- show_nf_flags(__entry->nf_flags),
- show_nf_may(__entry->nf_may), __entry->nf_file,
- __entry->status)
+ show_nfsd_may_flags(__entry->may_flags),
+ __entry->nf_ref, show_nf_flags(__entry->nf_flags),
+ show_nfsd_may_flags(__entry->nf_may),
+ __entry->nf_file, __entry->status)
);
DECLARE_EVENT_CLASS(nfsd_file_search_class,
@@ -533,7 +673,7 @@ DECLARE_EVENT_CLASS(nfsd_file_search_class,
__entry->hash = hash;
__entry->found = found;
),
- TP_printk("hash=0x%x inode=0x%p found=%d", __entry->hash,
+ TP_printk("hash=0x%x inode=%p found=%d", __entry->hash,
__entry->inode, __entry->found)
);
@@ -561,7 +701,7 @@ TRACE_EVENT(nfsd_file_fsnotify_handle_event,
__entry->mode = inode->i_mode;
__entry->mask = mask;
),
- TP_printk("inode=0x%p nlink=%u mode=0%ho mask=0x%x", __entry->inode,
+ TP_printk("inode=%p nlink=%u mode=0%ho mask=0x%x", __entry->inode,
__entry->nlink, __entry->mode, __entry->mask)
);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 1ecaceebee13..04937e51de56 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -978,18 +978,25 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
__be32 *verf)
{
struct file *file = nf->nf_file;
+ struct super_block *sb = file_inode(file)->i_sb;
struct svc_export *exp;
struct iov_iter iter;
__be32 nfserr;
int host_err;
int use_wgather;
loff_t pos = offset;
+ unsigned long exp_op_flags = 0;
unsigned int pflags = current->flags;
rwf_t flags = 0;
+ bool restore_flags = false;
trace_nfsd_write_opened(rqstp, fhp, offset, *cnt);
- if (test_bit(RQ_LOCAL, &rqstp->rq_flags))
+ if (sb->s_export_op)
+ exp_op_flags = sb->s_export_op->flags;
+
+ if (test_bit(RQ_LOCAL, &rqstp->rq_flags) &&
+ !(exp_op_flags & EXPORT_OP_REMOTE_FS)) {
/*
* We want throttling in balance_dirty_pages()
* and shrink_inactive_list() to only consider
@@ -998,6 +1005,8 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf,
* the client's dirty pages or its congested queue.
*/
current->flags |= PF_LOCAL_THROTTLE;
+ restore_flags = true;
+ }
exp = fhp->fh_export;
use_wgather = (rqstp->rq_vers == 2) && EX_WGATHER(exp);
@@ -1049,7 +1058,7 @@ out_nfserr:
trace_nfsd_write_err(rqstp, fhp, offset, host_err);
nfserr = nfserrno(host_err);
}
- if (test_bit(RQ_LOCAL, &rqstp->rq_flags))
+ if (restore_flags)
current_restore_flags(pflags, PF_LOCAL_THROTTLE);
return nfserr;
}
@@ -1724,7 +1733,7 @@ nfsd_rename(struct svc_rqst *rqstp, struct svc_fh *ffhp, char *fname, int flen,
struct inode *fdir, *tdir;
__be32 err;
int host_err;
- bool has_cached = false;
+ bool close_cached = false;
err = fh_verify(rqstp, ffhp, S_IFDIR, NFSD_MAY_REMOVE);
if (err)
@@ -1783,8 +1792,9 @@ retry:
if (ffhp->fh_export->ex_path.dentry != tfhp->fh_export->ex_path.dentry)
goto out_dput_new;
- if (nfsd_has_cached_files(ndentry)) {
- has_cached = true;
+ if ((ndentry->d_sb->s_export_op->flags & EXPORT_OP_CLOSE_BEFORE_UNLINK) &&
+ nfsd_has_cached_files(ndentry)) {
+ close_cached = true;
goto out_dput_old;
} else {
host_err = vfs_rename(fdir, odentry, tdir, ndentry, NULL, 0);
@@ -1805,7 +1815,7 @@ retry:
* as that would do the wrong thing if the two directories
* were the same, so again we do it by hand.
*/
- if (!has_cached) {
+ if (!close_cached) {
fill_post_wcc(ffhp);
fill_post_wcc(tfhp);
}
@@ -1819,8 +1829,8 @@ retry:
* shouldn't be done with locks held however, so we delay it until this
* point and then reattempt the whole shebang.
*/
- if (has_cached) {
- has_cached = false;
+ if (close_cached) {
+ close_cached = false;
nfsd_close_cached_files(ndentry);
dput(ndentry);
goto retry;
@@ -1872,7 +1882,8 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
type = d_inode(rdentry)->i_mode & S_IFMT;
if (type != S_IFDIR) {
- nfsd_close_cached_files(rdentry);
+ if (rdentry->d_sb->s_export_op->flags & EXPORT_OP_CLOSE_BEFORE_UNLINK)
+ nfsd_close_cached_files(rdentry);
host_err = vfs_unlink(dirp, rdentry, NULL);
} else {
host_err = vfs_rmdir(dirp, rdentry);
diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h
index 0ff336b0b25f..ad77387734cc 100644
--- a/fs/nfsd/xdr.h
+++ b/fs/nfsd/xdr.h
@@ -144,7 +144,6 @@ union nfsd_xdrstore {
#define NFS2_SVC_XDRSIZE sizeof(union nfsd_xdrstore)
-int nfssvc_decode_void(struct svc_rqst *, __be32 *);
int nfssvc_decode_fhandle(struct svc_rqst *, __be32 *);
int nfssvc_decode_sattrargs(struct svc_rqst *, __be32 *);
int nfssvc_decode_diropargs(struct svc_rqst *, __be32 *);
@@ -156,7 +155,6 @@ int nfssvc_decode_readlinkargs(struct svc_rqst *, __be32 *);
int nfssvc_decode_linkargs(struct svc_rqst *, __be32 *);
int nfssvc_decode_symlinkargs(struct svc_rqst *, __be32 *);
int nfssvc_decode_readdirargs(struct svc_rqst *, __be32 *);
-int nfssvc_encode_void(struct svc_rqst *, __be32 *);
int nfssvc_encode_stat(struct svc_rqst *, __be32 *);
int nfssvc_encode_attrstat(struct svc_rqst *, __be32 *);
int nfssvc_encode_diropres(struct svc_rqst *, __be32 *);
diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h
index ae6fa6c9cb46..456fcd7a1038 100644
--- a/fs/nfsd/xdr3.h
+++ b/fs/nfsd/xdr3.h
@@ -273,7 +273,6 @@ union nfsd3_xdrstore {
#define NFS3_SVC_XDRSIZE sizeof(union nfsd3_xdrstore)
-int nfs3svc_decode_voidarg(struct svc_rqst *, __be32 *);
int nfs3svc_decode_fhandle(struct svc_rqst *, __be32 *);
int nfs3svc_decode_sattrargs(struct svc_rqst *, __be32 *);
int nfs3svc_decode_diropargs(struct svc_rqst *, __be32 *);
@@ -290,7 +289,6 @@ int nfs3svc_decode_symlinkargs(struct svc_rqst *, __be32 *);
int nfs3svc_decode_readdirargs(struct svc_rqst *, __be32 *);
int nfs3svc_decode_readdirplusargs(struct svc_rqst *, __be32 *);
int nfs3svc_decode_commitargs(struct svc_rqst *, __be32 *);
-int nfs3svc_encode_voidres(struct svc_rqst *, __be32 *);
int nfs3svc_encode_attrstat(struct svc_rqst *, __be32 *);
int nfs3svc_encode_wccstat(struct svc_rqst *, __be32 *);
int nfs3svc_encode_diropres(struct svc_rqst *, __be32 *);
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 679d40af1bbb..a60ff5ce1a37 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -76,12 +76,7 @@ static inline bool nfsd4_has_session(struct nfsd4_compound_state *cs)
struct nfsd4_change_info {
u32 atomic;
- bool change_supported;
- u32 before_ctime_sec;
- u32 before_ctime_nsec;
u64 before_change;
- u32 after_ctime_sec;
- u32 after_ctime_nsec;
u64 after_change;
};
@@ -252,7 +247,8 @@ struct nfsd4_listxattrs {
struct nfsd4_open {
u32 op_claim_type; /* request */
- struct xdr_netobj op_fname; /* request - everything but CLAIM_PREV */
+ u32 op_fnamelen;
+ char * op_fname; /* request - everything but CLAIM_PREV */
u32 op_delegate_type; /* request - CLAIM_PREV only */
stateid_t op_delegate_stateid; /* request - response */
u32 op_why_no_deleg; /* response - DELEG_NONE_EXT only */
@@ -385,13 +381,6 @@ struct nfsd4_setclientid_confirm {
nfs4_verifier sc_confirm;
};
-struct nfsd4_saved_compoundargs {
- __be32 *p;
- __be32 *end;
- int pagelen;
- struct page **pagelist;
-};
-
struct nfsd4_test_stateid_id {
__be32 ts_id_status;
stateid_t ts_id_stateid;
@@ -419,8 +408,7 @@ struct nfsd4_write {
u64 wr_offset; /* request */
u32 wr_stable_how; /* request */
u32 wr_buflen; /* request */
- struct kvec wr_head;
- struct page ** wr_pagelist; /* request */
+ struct xdr_buf wr_payload; /* request */
u32 wr_bytes_written; /* response */
u32 wr_how_written; /* response */
@@ -433,7 +421,7 @@ struct nfsd4_exchange_id {
u32 flags;
clientid_t clientid;
u32 seqid;
- int spa_how;
+ u32 spa_how;
u32 spo_must_enforce[3];
u32 spo_must_allow[3];
struct xdr_netobj nii_domain;
@@ -554,7 +542,7 @@ struct nfsd4_copy {
bool cp_intra;
/* both */
- bool cp_synchronous;
+ u32 cp_synchronous;
/* response */
struct nfsd42_write_res cp_res;
@@ -615,7 +603,7 @@ struct nfsd4_copy_notify {
};
struct nfsd4_op {
- int opnum;
+ u32 opnum;
const struct nfsd4_operation * opdesc;
__be32 status;
union nfsd4_op_u {
@@ -696,15 +684,8 @@ struct svcxdr_tmpbuf {
struct nfsd4_compoundargs {
/* scratch variables for XDR decode */
- __be32 * p;
- __be32 * end;
- struct page ** pagelist;
- int pagelen;
- bool tail;
- __be32 tmp[8];
- __be32 * tmpp;
+ struct xdr_stream *xdr;
struct svcxdr_tmpbuf *to_free;
-
struct svc_rqst *rqstp;
u32 taglen;
@@ -767,22 +748,14 @@ static inline void
set_change_info(struct nfsd4_change_info *cinfo, struct svc_fh *fhp)
{
BUG_ON(!fhp->fh_pre_saved);
- cinfo->atomic = (u32)fhp->fh_post_saved;
- cinfo->change_supported = IS_I_VERSION(d_inode(fhp->fh_dentry));
+ cinfo->atomic = (u32)(fhp->fh_post_saved && !fhp->fh_no_atomic_attr);
cinfo->before_change = fhp->fh_pre_change;
cinfo->after_change = fhp->fh_post_change;
- cinfo->before_ctime_sec = fhp->fh_pre_ctime.tv_sec;
- cinfo->before_ctime_nsec = fhp->fh_pre_ctime.tv_nsec;
- cinfo->after_ctime_sec = fhp->fh_post_attr.ctime.tv_sec;
- cinfo->after_ctime_nsec = fhp->fh_post_attr.ctime.tv_nsec;
-
}
bool nfsd4_mach_creds_match(struct nfs4_client *cl, struct svc_rqst *rqstp);
-int nfs4svc_decode_voidarg(struct svc_rqst *, __be32 *);
-int nfs4svc_encode_voidres(struct svc_rqst *, __be32 *);
int nfs4svc_decode_compoundargs(struct svc_rqst *, __be32 *);
int nfs4svc_encode_compoundres(struct svc_rqst *, __be32 *);
__be32 nfsd4_check_resp_size(struct nfsd4_compoundres *, u32);
diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
index 3ceb72b67a7a..9f4d4bcbf251 100644
--- a/include/linux/exportfs.h
+++ b/include/linux/exportfs.h
@@ -213,12 +213,25 @@ struct export_operations {
bool write, u32 *device_generation);
int (*commit_blocks)(struct inode *inode, struct iomap *iomaps,
int nr_iomaps, struct iattr *iattr);
+#define EXPORT_OP_NOWCC (0x1) /* don't collect v3 wcc data */
+#define EXPORT_OP_NOSUBTREECHK (0x2) /* no subtree checking */
+#define EXPORT_OP_CLOSE_BEFORE_UNLINK (0x4) /* close files before unlink */
+#define EXPORT_OP_REMOTE_FS (0x8) /* Filesystem is remote */
+#define EXPORT_OP_NOATOMIC_ATTR (0x10) /* Filesystem cannot supply
+ atomic attribute updates
+ */
+ unsigned long flags;
};
extern int exportfs_encode_inode_fh(struct inode *inode, struct fid *fid,
int *max_len, struct inode *parent);
extern int exportfs_encode_fh(struct dentry *dentry, struct fid *fid,
int *max_len, int connectable);
+extern struct dentry *exportfs_decode_fh_raw(struct vfsmount *mnt,
+ struct fid *fid, int fh_len,
+ int fileid_type,
+ int (*acceptable)(void *, struct dentry *),
+ void *context);
extern struct dentry *exportfs_decode_fh(struct vfsmount *mnt, struct fid *fid,
int fh_len, int fileid_type, int (*acceptable)(void *, struct dentry *),
void *context);
diff --git a/include/linux/iversion.h b/include/linux/iversion.h
index 2917ef990d43..3bfebde5a1a6 100644
--- a/include/linux/iversion.h
+++ b/include/linux/iversion.h
@@ -328,6 +328,19 @@ inode_query_iversion(struct inode *inode)
return cur >> I_VERSION_QUERIED_SHIFT;
}
+/*
+ * For filesystems without any sort of change attribute, the best we can
+ * do is fake one up from the ctime:
+ */
+static inline u64 time_to_chattr(struct timespec64 *t)
+{
+ u64 chattr = t->tv_sec;
+
+ chattr <<= 32;
+ chattr += t->tv_nsec;
+ return chattr;
+}
+
/**
* inode_eq_iversion_raw - check whether the raw i_version counter has changed
* @inode: inode to check
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 9dc7eeac924f..5b4c67c91f56 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -385,13 +385,6 @@ enum lock_type4 {
NFS4_WRITEW_LT = 4
};
-enum change_attr_type4 {
- NFS4_CHANGE_TYPE_IS_MONOTONIC_INCR = 0,
- NFS4_CHANGE_TYPE_IS_VERSION_COUNTER = 1,
- NFS4_CHANGE_TYPE_IS_VERSION_COUNTER_NOPNFS = 2,
- NFS4_CHANGE_TYPE_IS_TIME_METADATA = 3,
- NFS4_CHANGE_TYPE_IS_UNDEFINED = 4
-};
/* Mandatory Attributes */
#define FATTR4_WORD0_SUPPORTED_ATTRS (1UL << 0)
@@ -459,7 +452,6 @@ enum change_attr_type4 {
#define FATTR4_WORD2_LAYOUT_BLKSIZE (1UL << 1)
#define FATTR4_WORD2_MDSTHRESHOLD (1UL << 4)
#define FATTR4_WORD2_CLONE_BLKSIZE (1UL << 13)
-#define FATTR4_WORD2_CHANGE_ATTR_TYPE (1UL << 15)
#define FATTR4_WORD2_SECURITY_LABEL (1UL << 16)
#define FATTR4_WORD2_MODE_UMASK (1UL << 17)
#define FATTR4_WORD2_XATTR_SUPPORT (1UL << 18)
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 386628b36bc7..34c2a69820e9 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -247,6 +247,8 @@ struct svc_rqst {
size_t rq_xprt_hlen; /* xprt header len */
struct xdr_buf rq_arg;
+ struct xdr_stream rq_arg_stream;
+ struct page *rq_scratch_page;
struct xdr_buf rq_res;
struct page *rq_pages[RPCSVC_MAXPAGES + 1];
struct page * *rq_respages; /* points into rq_pages */
@@ -519,9 +521,9 @@ void svc_wake_up(struct svc_serv *);
void svc_reserve(struct svc_rqst *rqstp, int space);
struct svc_pool * svc_pool_for_cpu(struct svc_serv *serv, int cpu);
char * svc_print_addr(struct svc_rqst *, char *, size_t);
-int svc_encode_read_payload(struct svc_rqst *rqstp,
- unsigned int offset,
- unsigned int length);
+int svc_encode_result_payload(struct svc_rqst *rqstp,
+ unsigned int offset,
+ unsigned int length);
unsigned int svc_fill_write_vector(struct svc_rqst *rqstp,
struct page **pages,
struct kvec *first, size_t total);
@@ -557,4 +559,18 @@ static inline void svc_reserve_auth(struct svc_rqst *rqstp, int space)
svc_reserve(rqstp, space + rqstp->rq_auth_slack);
}
+/**
+ * svcxdr_init_decode - Prepare an xdr_stream for svc Call decoding
+ * @rqstp: controlling server RPC transaction context
+ *
+ */
+static inline void svcxdr_init_decode(struct svc_rqst *rqstp)
+{
+ struct xdr_stream *xdr = &rqstp->rq_arg_stream;
+ struct kvec *argv = rqstp->rq_arg.head;
+
+ xdr_init_decode(xdr, &rqstp->rq_arg, argv->iov_base, NULL);
+ xdr_set_scratch_page(xdr, rqstp->rq_scratch_page);
+}
+
#endif /* SUNRPC_SVC_H */
diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h
index 9dc3a3b88391..294b56e61522 100644
--- a/include/linux/sunrpc/svc_rdma.h
+++ b/include/linux/sunrpc/svc_rdma.h
@@ -47,6 +47,8 @@
#include <linux/sunrpc/svcsock.h>
#include <linux/sunrpc/rpc_rdma.h>
#include <linux/sunrpc/rpc_rdma_cid.h>
+#include <linux/sunrpc/svc_rdma_pcl.h>
+
#include <rdma/ib_verbs.h>
#include <rdma/rdma_cm.h>
@@ -142,10 +144,15 @@ struct svc_rdma_recv_ctxt {
unsigned int rc_page_count;
unsigned int rc_hdr_count;
u32 rc_inv_rkey;
- __be32 *rc_write_list;
- __be32 *rc_reply_chunk;
- unsigned int rc_read_payload_offset;
- unsigned int rc_read_payload_length;
+ __be32 rc_msgtype;
+
+ struct svc_rdma_pcl rc_call_pcl;
+
+ struct svc_rdma_pcl rc_read_pcl;
+ struct svc_rdma_chunk *rc_cur_result_payload;
+ struct svc_rdma_pcl rc_write_pcl;
+ struct svc_rdma_pcl rc_reply_pcl;
+
struct page *rc_pages[RPCSVC_MAXPAGES];
};
@@ -171,6 +178,8 @@ extern void svc_rdma_handle_bc_reply(struct svc_rqst *rqstp,
/* svc_rdma_recvfrom.c */
extern void svc_rdma_recv_ctxts_destroy(struct svcxprt_rdma *rdma);
extern bool svc_rdma_post_recvs(struct svcxprt_rdma *rdma);
+extern struct svc_rdma_recv_ctxt *
+ svc_rdma_recv_ctxt_get(struct svcxprt_rdma *rdma);
extern void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma,
struct svc_rdma_recv_ctxt *ctxt);
extern void svc_rdma_flush_recv_queues(struct svcxprt_rdma *rdma);
@@ -179,16 +188,15 @@ extern int svc_rdma_recvfrom(struct svc_rqst *);
/* svc_rdma_rw.c */
extern void svc_rdma_destroy_rw_ctxts(struct svcxprt_rdma *rdma);
-extern int svc_rdma_recv_read_chunk(struct svcxprt_rdma *rdma,
- struct svc_rqst *rqstp,
- struct svc_rdma_recv_ctxt *head, __be32 *p);
extern int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma,
- __be32 *wr_ch, struct xdr_buf *xdr,
- unsigned int offset,
- unsigned long length);
+ const struct svc_rdma_chunk *chunk,
+ const struct xdr_buf *xdr);
extern int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma,
const struct svc_rdma_recv_ctxt *rctxt,
- struct xdr_buf *xdr);
+ const struct xdr_buf *xdr);
+extern int svc_rdma_process_read_list(struct svcxprt_rdma *rdma,
+ struct svc_rqst *rqstp,
+ struct svc_rdma_recv_ctxt *head);
/* svc_rdma_sendto.c */
extern void svc_rdma_send_ctxts_destroy(struct svcxprt_rdma *rdma);
@@ -201,14 +209,14 @@ extern int svc_rdma_send(struct svcxprt_rdma *rdma,
extern int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
struct svc_rdma_send_ctxt *sctxt,
const struct svc_rdma_recv_ctxt *rctxt,
- struct xdr_buf *xdr);
+ const struct xdr_buf *xdr);
extern void svc_rdma_send_error_msg(struct svcxprt_rdma *rdma,
struct svc_rdma_send_ctxt *sctxt,
struct svc_rdma_recv_ctxt *rctxt,
int status);
extern int svc_rdma_sendto(struct svc_rqst *);
-extern int svc_rdma_read_payload(struct svc_rqst *rqstp, unsigned int offset,
- unsigned int length);
+extern int svc_rdma_result_payload(struct svc_rqst *rqstp, unsigned int offset,
+ unsigned int length);
/* svc_rdma_transport.c */
extern struct svc_xprt_class svc_rdma_class;
diff --git a/include/linux/sunrpc/svc_rdma_pcl.h b/include/linux/sunrpc/svc_rdma_pcl.h
new file mode 100644
index 000000000000..7516ad0fae80
--- /dev/null
+++ b/include/linux/sunrpc/svc_rdma_pcl.h
@@ -0,0 +1,128 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2020, Oracle and/or its affiliates
+ */
+
+#ifndef SVC_RDMA_PCL_H
+#define SVC_RDMA_PCL_H
+
+#include <linux/list.h>
+
+struct svc_rdma_segment {
+ u32 rs_handle;
+ u32 rs_length;
+ u64 rs_offset;
+};
+
+struct svc_rdma_chunk {
+ struct list_head ch_list;
+
+ u32 ch_position;
+ u32 ch_length;
+ u32 ch_payload_length;
+
+ u32 ch_segcount;
+ struct svc_rdma_segment ch_segments[];
+};
+
+struct svc_rdma_pcl {
+ unsigned int cl_count;
+ struct list_head cl_chunks;
+};
+
+/**
+ * pcl_init - Initialize a parsed chunk list
+ * @pcl: parsed chunk list to initialize
+ *
+ */
+static inline void pcl_init(struct svc_rdma_pcl *pcl)
+{
+ INIT_LIST_HEAD(&pcl->cl_chunks);
+}
+
+/**
+ * pcl_is_empty - Return true if parsed chunk list is empty
+ * @pcl: parsed chunk list
+ *
+ */
+static inline bool pcl_is_empty(const struct svc_rdma_pcl *pcl)
+{
+ return list_empty(&pcl->cl_chunks);
+}
+
+/**
+ * pcl_first_chunk - Return first chunk in a parsed chunk list
+ * @pcl: parsed chunk list
+ *
+ * Returns the first chunk in the list, or NULL if the list is empty.
+ */
+static inline struct svc_rdma_chunk *
+pcl_first_chunk(const struct svc_rdma_pcl *pcl)
+{
+ if (pcl_is_empty(pcl))
+ return NULL;
+ return list_first_entry(&pcl->cl_chunks, struct svc_rdma_chunk,
+ ch_list);
+}
+
+/**
+ * pcl_next_chunk - Return next chunk in a parsed chunk list
+ * @pcl: a parsed chunk list
+ * @chunk: chunk in @pcl
+ *
+ * Returns the next chunk in the list, or NULL if @chunk is already last.
+ */
+static inline struct svc_rdma_chunk *
+pcl_next_chunk(const struct svc_rdma_pcl *pcl, struct svc_rdma_chunk *chunk)
+{
+ if (list_is_last(&chunk->ch_list, &pcl->cl_chunks))
+ return NULL;
+ return list_next_entry(chunk, ch_list);
+}
+
+/**
+ * pcl_for_each_chunk - Iterate over chunks in a parsed chunk list
+ * @pos: the loop cursor
+ * @pcl: a parsed chunk list
+ */
+#define pcl_for_each_chunk(pos, pcl) \
+ for (pos = list_first_entry(&(pcl)->cl_chunks, struct svc_rdma_chunk, ch_list); \
+ &pos->ch_list != &(pcl)->cl_chunks; \
+ pos = list_next_entry(pos, ch_list))
+
+/**
+ * pcl_for_each_segment - Iterate over segments in a parsed chunk
+ * @pos: the loop cursor
+ * @chunk: a parsed chunk
+ */
+#define pcl_for_each_segment(pos, chunk) \
+ for (pos = &(chunk)->ch_segments[0]; \
+ pos <= &(chunk)->ch_segments[(chunk)->ch_segcount - 1]; \
+ pos++)
+
+/**
+ * pcl_chunk_end_offset - Return offset of byte range following @chunk
+ * @chunk: chunk in @pcl
+ *
+ * Returns starting offset of the region just after @chunk
+ */
+static inline unsigned int
+pcl_chunk_end_offset(const struct svc_rdma_chunk *chunk)
+{
+ return xdr_align_size(chunk->ch_position + chunk->ch_payload_length);
+}
+
+struct svc_rdma_recv_ctxt;
+
+extern void pcl_free(struct svc_rdma_pcl *pcl);
+extern bool pcl_alloc_call(struct svc_rdma_recv_ctxt *rctxt, __be32 *p);
+extern bool pcl_alloc_read(struct svc_rdma_recv_ctxt *rctxt, __be32 *p);
+extern bool pcl_alloc_write(struct svc_rdma_recv_ctxt *rctxt,
+ struct svc_rdma_pcl *pcl, __be32 *p);
+extern int pcl_process_nonpayloads(const struct svc_rdma_pcl *pcl,
+ const struct xdr_buf *xdr,
+ int (*actor)(const struct xdr_buf *,
+ void *),
+ void *data);
+
+#endif /* SVC_RDMA_PCL_H */
diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index aca35ab5cff2..92455e0d5244 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -21,8 +21,8 @@ struct svc_xprt_ops {
int (*xpo_has_wspace)(struct svc_xprt *);
int (*xpo_recvfrom)(struct svc_rqst *);
int (*xpo_sendto)(struct svc_rqst *);
- int (*xpo_read_payload)(struct svc_rqst *, unsigned int,
- unsigned int);
+ int (*xpo_result_payload)(struct svc_rqst *, unsigned int,
+ unsigned int);
void (*xpo_release_rqst)(struct svc_rqst *);
void (*xpo_detach)(struct svc_xprt *);
void (*xpo_free)(struct svc_xprt *);
diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h
index 9548d075e06d..9b35ce50cf2b 100644
--- a/include/linux/sunrpc/xdr.h
+++ b/include/linux/sunrpc/xdr.h
@@ -183,7 +183,8 @@ xdr_adjust_iovec(struct kvec *iov, __be32 *p)
*/
extern void xdr_shift_buf(struct xdr_buf *, size_t);
extern void xdr_buf_from_iov(struct kvec *, struct xdr_buf *);
-extern int xdr_buf_subsegment(struct xdr_buf *, struct xdr_buf *, unsigned int, unsigned int);
+extern int xdr_buf_subsegment(const struct xdr_buf *buf, struct xdr_buf *subbuf,
+ unsigned int base, unsigned int len);
extern void xdr_buf_trim(struct xdr_buf *, unsigned int);
extern int read_bytes_from_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int);
extern int write_bytes_to_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int);
@@ -247,13 +248,57 @@ extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf,
__be32 *p, struct rpc_rqst *rqst);
extern void xdr_init_decode_pages(struct xdr_stream *xdr, struct xdr_buf *buf,
struct page **pages, unsigned int len);
-extern void xdr_set_scratch_buffer(struct xdr_stream *xdr, void *buf, size_t buflen);
extern __be32 *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes);
extern unsigned int xdr_read_pages(struct xdr_stream *xdr, unsigned int len);
extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len);
extern int xdr_process_buf(struct xdr_buf *buf, unsigned int offset, unsigned int len, int (*actor)(struct scatterlist *, void *), void *data);
extern uint64_t xdr_align_data(struct xdr_stream *, uint64_t, uint32_t);
extern uint64_t xdr_expand_hole(struct xdr_stream *, uint64_t, uint64_t);
+extern bool xdr_stream_subsegment(struct xdr_stream *xdr, struct xdr_buf *subbuf,
+ unsigned int len);
+
+/**
+ * xdr_set_scratch_buffer - Attach a scratch buffer for decoding data.
+ * @xdr: pointer to xdr_stream struct
+ * @buf: pointer to an empty buffer
+ * @buflen: size of 'buf'
+ *
+ * The scratch buffer is used when decoding from an array of pages.
+ * If an xdr_inline_decode() call spans across page boundaries, then
+ * we copy the data into the scratch buffer in order to allow linear
+ * access.
+ */
+static inline void
+xdr_set_scratch_buffer(struct xdr_stream *xdr, void *buf, size_t buflen)
+{
+ xdr->scratch.iov_base = buf;
+ xdr->scratch.iov_len = buflen;
+}
+
+/**
+ * xdr_set_scratch_page - Attach a scratch buffer for decoding data
+ * @xdr: pointer to xdr_stream struct
+ * @page: an anonymous page
+ *
+ * See xdr_set_scratch_buffer().
+ */
+static inline void
+xdr_set_scratch_page(struct xdr_stream *xdr, struct page *page)
+{
+ xdr_set_scratch_buffer(xdr, page_address(page), PAGE_SIZE);
+}
+
+/**
+ * xdr_reset_scratch_buffer - Clear scratch buffer information
+ * @xdr: pointer to xdr_stream struct
+ *
+ * See xdr_set_scratch_buffer().
+ */
+static inline void
+xdr_reset_scratch_buffer(struct xdr_stream *xdr)
+{
+ xdr_set_scratch_buffer(xdr, NULL, 0);
+}
/**
* xdr_stream_remaining - Return the number of bytes remaining in the stream
@@ -506,6 +551,27 @@ static inline bool xdr_item_is_present(const __be32 *p)
}
/**
+ * xdr_stream_decode_bool - Decode a boolean
+ * @xdr: pointer to xdr_stream
+ * @ptr: pointer to a u32 in which to store the result
+ *
+ * Return values:
+ * %0 on success
+ * %-EBADMSG on XDR buffer overflow
+ */
+static inline ssize_t
+xdr_stream_decode_bool(struct xdr_stream *xdr, __u32 *ptr)
+{
+ const size_t count = sizeof(*ptr);
+ __be32 *p = xdr_inline_decode(xdr, count);
+
+ if (unlikely(!p))
+ return -EBADMSG;
+ *ptr = (*p != xdr_zero);
+ return 0;
+}
+
+/**
* xdr_stream_decode_u32 - Decode a 32-bit integer
* @xdr: pointer to xdr_stream
* @ptr: location to store integer
@@ -527,6 +593,27 @@ xdr_stream_decode_u32(struct xdr_stream *xdr, __u32 *ptr)
}
/**
+ * xdr_stream_decode_u64 - Decode a 64-bit integer
+ * @xdr: pointer to xdr_stream
+ * @ptr: location to store 64-bit integer
+ *
+ * Return values:
+ * %0 on success
+ * %-EBADMSG on XDR buffer overflow
+ */
+static inline ssize_t
+xdr_stream_decode_u64(struct xdr_stream *xdr, __u64 *ptr)
+{
+ const size_t count = sizeof(*ptr);
+ __be32 *p = xdr_inline_decode(xdr, count);
+
+ if (unlikely(!p))
+ return -EBADMSG;
+ xdr_decode_hyper(p, ptr);
+ return 0;
+}
+
+/**
* xdr_stream_decode_opaque_fixed - Decode fixed length opaque xdr data
* @xdr: pointer to xdr_stream
* @ptr: location to store data
diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h
index bf1065772228..896aafc37b09 100644
--- a/include/trace/events/rpcrdma.h
+++ b/include/trace/events/rpcrdma.h
@@ -1410,101 +1410,112 @@ DEFINE_BADREQ_EVENT(drop);
DEFINE_BADREQ_EVENT(badproc);
DEFINE_BADREQ_EVENT(parse);
-DECLARE_EVENT_CLASS(svcrdma_segment_event,
+TRACE_EVENT(svcrdma_encode_wseg,
TP_PROTO(
+ const struct svc_rdma_send_ctxt *ctxt,
+ u32 segno,
u32 handle,
u32 length,
u64 offset
),
- TP_ARGS(handle, length, offset),
+ TP_ARGS(ctxt, segno, handle, length, offset),
TP_STRUCT__entry(
+ __field(u32, cq_id)
+ __field(int, completion_id)
+ __field(u32, segno)
__field(u32, handle)
__field(u32, length)
__field(u64, offset)
),
TP_fast_assign(
+ __entry->cq_id = ctxt->sc_cid.ci_queue_id;
+ __entry->completion_id = ctxt->sc_cid.ci_completion_id;
+ __entry->segno = segno;
__entry->handle = handle;
__entry->length = length;
__entry->offset = offset;
),
- TP_printk("%u@0x%016llx:0x%08x",
- __entry->length, (unsigned long long)__entry->offset,
- __entry->handle
+ TP_printk("cq_id=%u cid=%d segno=%u %u@0x%016llx:0x%08x",
+ __entry->cq_id, __entry->completion_id,
+ __entry->segno, __entry->length,
+ (unsigned long long)__entry->offset, __entry->handle
)
);
-#define DEFINE_SEGMENT_EVENT(name) \
- DEFINE_EVENT(svcrdma_segment_event, svcrdma_##name,\
- TP_PROTO( \
- u32 handle, \
- u32 length, \
- u64 offset \
- ), \
- TP_ARGS(handle, length, offset))
-
-DEFINE_SEGMENT_EVENT(decode_wseg);
-DEFINE_SEGMENT_EVENT(encode_rseg);
-DEFINE_SEGMENT_EVENT(send_rseg);
-DEFINE_SEGMENT_EVENT(encode_wseg);
-DEFINE_SEGMENT_EVENT(send_wseg);
-
-DECLARE_EVENT_CLASS(svcrdma_chunk_event,
+TRACE_EVENT(svcrdma_decode_rseg,
TP_PROTO(
- u32 length
+ const struct rpc_rdma_cid *cid,
+ const struct svc_rdma_chunk *chunk,
+ const struct svc_rdma_segment *segment
),
- TP_ARGS(length),
+ TP_ARGS(cid, chunk, segment),
TP_STRUCT__entry(
+ __field(u32, cq_id)
+ __field(int, completion_id)
+ __field(u32, segno)
+ __field(u32, position)
+ __field(u32, handle)
__field(u32, length)
+ __field(u64, offset)
),
TP_fast_assign(
- __entry->length = length;
+ __entry->cq_id = cid->ci_queue_id;
+ __entry->completion_id = cid->ci_completion_id;
+ __entry->segno = chunk->ch_segcount;
+ __entry->position = chunk->ch_position;
+ __entry->handle = segment->rs_handle;
+ __entry->length = segment->rs_length;
+ __entry->offset = segment->rs_offset;
),
- TP_printk("length=%u",
- __entry->length
+ TP_printk("cq_id=%u cid=%d segno=%u position=%u %u@0x%016llx:0x%08x",
+ __entry->cq_id, __entry->completion_id,
+ __entry->segno, __entry->position, __entry->length,
+ (unsigned long long)__entry->offset, __entry->handle
)
);
-#define DEFINE_CHUNK_EVENT(name) \
- DEFINE_EVENT(svcrdma_chunk_event, svcrdma_##name, \
- TP_PROTO( \
- u32 length \
- ), \
- TP_ARGS(length))
-
-DEFINE_CHUNK_EVENT(send_pzr);
-DEFINE_CHUNK_EVENT(encode_write_chunk);
-DEFINE_CHUNK_EVENT(send_write_chunk);
-DEFINE_CHUNK_EVENT(encode_read_chunk);
-DEFINE_CHUNK_EVENT(send_reply_chunk);
-
-TRACE_EVENT(svcrdma_send_read_chunk,
+TRACE_EVENT(svcrdma_decode_wseg,
TP_PROTO(
- u32 length,
- u32 position
+ const struct rpc_rdma_cid *cid,
+ const struct svc_rdma_chunk *chunk,
+ u32 segno
),
- TP_ARGS(length, position),
+ TP_ARGS(cid, chunk, segno),
TP_STRUCT__entry(
+ __field(u32, cq_id)
+ __field(int, completion_id)
+ __field(u32, segno)
+ __field(u32, handle)
__field(u32, length)
- __field(u32, position)
+ __field(u64, offset)
),
TP_fast_assign(
- __entry->length = length;
- __entry->position = position;
+ const struct svc_rdma_segment *segment =
+ &chunk->ch_segments[segno];
+
+ __entry->cq_id = cid->ci_queue_id;
+ __entry->completion_id = cid->ci_completion_id;
+ __entry->segno = segno;
+ __entry->handle = segment->rs_handle;
+ __entry->length = segment->rs_length;
+ __entry->offset = segment->rs_offset;
),
- TP_printk("length=%u position=%u",
- __entry->length, __entry->position
+ TP_printk("cq_id=%u cid=%d segno=%u %u@0x%016llx:0x%08x",
+ __entry->cq_id, __entry->completion_id,
+ __entry->segno, __entry->length,
+ (unsigned long long)__entry->offset, __entry->handle
)
);
@@ -1581,6 +1592,7 @@ DECLARE_EVENT_CLASS(svcrdma_dma_map_class,
TP_ARGS(rdma, dma_addr, length))
DEFINE_SVC_DMA_EVENT(dma_map_page);
+DEFINE_SVC_DMA_EVENT(dma_map_err);
DEFINE_SVC_DMA_EVENT(dma_unmap_page);
TRACE_EVENT(svcrdma_dma_map_rw_err,
@@ -1699,20 +1711,30 @@ TRACE_EVENT(svcrdma_small_wrch_err,
TRACE_EVENT(svcrdma_send_pullup,
TP_PROTO(
- unsigned int len
+ const struct svc_rdma_send_ctxt *ctxt,
+ unsigned int msglen
),
- TP_ARGS(len),
+ TP_ARGS(ctxt, msglen),
TP_STRUCT__entry(
- __field(unsigned int, len)
+ __field(u32, cq_id)
+ __field(int, completion_id)
+ __field(unsigned int, hdrlen)
+ __field(unsigned int, msglen)
),
TP_fast_assign(
- __entry->len = len;
+ __entry->cq_id = ctxt->sc_cid.ci_queue_id;
+ __entry->completion_id = ctxt->sc_cid.ci_completion_id;
+ __entry->hdrlen = ctxt->sc_hdrbuf.len,
+ __entry->msglen = msglen;
),
- TP_printk("len=%u", __entry->len)
+ TP_printk("cq_id=%u cid=%d hdr=%u msg=%u (total %u)",
+ __entry->cq_id, __entry->completion_id,
+ __entry->hdrlen, __entry->msglen,
+ __entry->hdrlen + __entry->msglen)
);
TRACE_EVENT(svcrdma_send_err,
@@ -1819,7 +1841,7 @@ TRACE_EVENT(svcrdma_rq_post_err,
)
);
-TRACE_EVENT(svcrdma_post_chunk,
+DECLARE_EVENT_CLASS(svcrdma_post_chunk_class,
TP_PROTO(
const struct rpc_rdma_cid *cid,
int sqecount
@@ -1845,6 +1867,19 @@ TRACE_EVENT(svcrdma_post_chunk,
)
);
+#define DEFINE_POST_CHUNK_EVENT(name) \
+ DEFINE_EVENT(svcrdma_post_chunk_class, \
+ svcrdma_post_##name##_chunk, \
+ TP_PROTO( \
+ const struct rpc_rdma_cid *cid, \
+ int sqecount \
+ ), \
+ TP_ARGS(cid, sqecount))
+
+DEFINE_POST_CHUNK_EVENT(read);
+DEFINE_POST_CHUNK_EVENT(write);
+DEFINE_POST_CHUNK_EVENT(reply);
+
DEFINE_COMPLETION_EVENT(svcrdma_wc_read);
DEFINE_COMPLETION_EVENT(svcrdma_wc_write);
diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index 2a03263b5f9d..58994e013022 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -1500,30 +1500,6 @@ SVC_RQST_FLAG_LIST
#define show_rqstp_flags(flags) \
__print_flags(flags, "|", SVC_RQST_FLAG_LIST)
-TRACE_EVENT(svc_recv,
- TP_PROTO(struct svc_rqst *rqst, int len),
-
- TP_ARGS(rqst, len),
-
- TP_STRUCT__entry(
- __field(u32, xid)
- __field(int, len)
- __field(unsigned long, flags)
- __string(addr, rqst->rq_xprt->xpt_remotebuf)
- ),
-
- TP_fast_assign(
- __entry->xid = be32_to_cpu(rqst->rq_xid);
- __entry->len = len;
- __entry->flags = rqst->rq_flags;
- __assign_str(addr, rqst->rq_xprt->xpt_remotebuf);
- ),
-
- TP_printk("addr=%s xid=0x%08x len=%d flags=%s",
- __get_str(addr), __entry->xid, __entry->len,
- show_rqstp_flags(__entry->flags))
-);
-
TRACE_DEFINE_ENUM(SVC_GARBAGE);
TRACE_DEFINE_ENUM(SVC_SYSERR);
TRACE_DEFINE_ENUM(SVC_VALID);
diff --git a/net/sunrpc/auth_gss/gss_rpc_upcall.c b/net/sunrpc/auth_gss/gss_rpc_upcall.c
index af9c7f43859c..d1c003a25b0f 100644
--- a/net/sunrpc/auth_gss/gss_rpc_upcall.c
+++ b/net/sunrpc/auth_gss/gss_rpc_upcall.c
@@ -200,7 +200,7 @@ static int gssp_call(struct net *net, struct rpc_message *msg)
static void gssp_free_receive_pages(struct gssx_arg_accept_sec_context *arg)
{
- int i;
+ unsigned int i;
for (i = 0; i < arg->npages && arg->pages[i]; i++)
__free_page(arg->pages[i]);
@@ -210,14 +210,19 @@ static void gssp_free_receive_pages(struct gssx_arg_accept_sec_context *arg)
static int gssp_alloc_receive_pages(struct gssx_arg_accept_sec_context *arg)
{
+ unsigned int i;
+
arg->npages = DIV_ROUND_UP(NGROUPS_MAX * 4, PAGE_SIZE);
arg->pages = kcalloc(arg->npages, sizeof(struct page *), GFP_KERNEL);
- /*
- * XXX: actual pages are allocated by xdr layer in
- * xdr_partial_copy_from_skb.
- */
if (!arg->pages)
return -ENOMEM;
+ for (i = 0; i < arg->npages; i++) {
+ arg->pages[i] = alloc_page(GFP_KERNEL);
+ if (!arg->pages[i]) {
+ gssp_free_receive_pages(arg);
+ return -ENOMEM;
+ }
+ }
return 0;
}
diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.c b/net/sunrpc/auth_gss/gss_rpc_xdr.c
index 2ff7b7083eba..d79f12c2550a 100644
--- a/net/sunrpc/auth_gss/gss_rpc_xdr.c
+++ b/net/sunrpc/auth_gss/gss_rpc_xdr.c
@@ -771,7 +771,6 @@ void gssx_enc_accept_sec_context(struct rpc_rqst *req,
xdr_inline_pages(&req->rq_rcv_buf,
PAGE_SIZE/2 /* pretty arbitrary */,
arg->pages, 0 /* page base */, arg->npages * PAGE_SIZE);
- req->rq_rcv_buf.flags |= XDRBUF_SPARSE_PAGES;
done:
if (err)
dprintk("RPC: gssx_enc_accept_sec_context: %d\n", err);
@@ -789,7 +788,7 @@ int gssx_dec_accept_sec_context(struct rpc_rqst *rqstp,
scratch = alloc_page(GFP_KERNEL);
if (!scratch)
return -ENOMEM;
- xdr_set_scratch_buffer(xdr, page_address(scratch), PAGE_SIZE);
+ xdr_set_scratch_page(xdr, scratch);
/* res->status */
err = gssx_dec_status(xdr, &res->status);
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 20c93b68505e..1a2c1c44bb00 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -778,7 +778,6 @@ void cache_clean_deferred(void *owner)
*/
static DEFINE_SPINLOCK(queue_lock);
-static DEFINE_MUTEX(queue_io_mutex);
struct cache_queue {
struct list_head list;
@@ -906,44 +905,26 @@ static ssize_t cache_do_downcall(char *kaddr, const char __user *buf,
return ret;
}
-static ssize_t cache_slow_downcall(const char __user *buf,
- size_t count, struct cache_detail *cd)
-{
- static char write_buf[32768]; /* protected by queue_io_mutex */
- ssize_t ret = -EINVAL;
-
- if (count >= sizeof(write_buf))
- goto out;
- mutex_lock(&queue_io_mutex);
- ret = cache_do_downcall(write_buf, buf, count, cd);
- mutex_unlock(&queue_io_mutex);
-out:
- return ret;
-}
-
static ssize_t cache_downcall(struct address_space *mapping,
const char __user *buf,
size_t count, struct cache_detail *cd)
{
- struct page *page;
- char *kaddr;
+ char *write_buf;
ssize_t ret = -ENOMEM;
- if (count >= PAGE_SIZE)
- goto out_slow;
+ if (count >= 32768) { /* 32k is max userland buffer, lets check anyway */
+ ret = -EINVAL;
+ goto out;
+ }
- page = find_or_create_page(mapping, 0, GFP_KERNEL);
- if (!page)
- goto out_slow;
+ write_buf = kvmalloc(count + 1, GFP_KERNEL);
+ if (!write_buf)
+ goto out;
- kaddr = kmap(page);
- ret = cache_do_downcall(kaddr, buf, count, cd);
- kunmap(page);
- unlock_page(page);
- put_page(page);
+ ret = cache_do_downcall(write_buf, buf, count, cd);
+ kvfree(write_buf);
+out:
return ret;
-out_slow:
- return cache_slow_downcall(buf, count, cd);
}
static ssize_t cache_write(struct file *filp, const char __user *buf,
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index c211b607239e..4187745887f0 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -614,6 +614,10 @@ svc_rqst_alloc(struct svc_serv *serv, struct svc_pool *pool, int node)
rqstp->rq_server = serv;
rqstp->rq_pool = pool;
+ rqstp->rq_scratch_page = alloc_pages_node(node, GFP_KERNEL, 0);
+ if (!rqstp->rq_scratch_page)
+ goto out_enomem;
+
rqstp->rq_argp = kmalloc_node(serv->sv_xdrsize, GFP_KERNEL, node);
if (!rqstp->rq_argp)
goto out_enomem;
@@ -842,6 +846,7 @@ void
svc_rqst_free(struct svc_rqst *rqstp)
{
svc_release_buffer(rqstp);
+ put_page(rqstp->rq_scratch_page);
kfree(rqstp->rq_resp);
kfree(rqstp->rq_argp);
kfree(rqstp->rq_auth_data);
@@ -1622,7 +1627,7 @@ u32 svc_max_payload(const struct svc_rqst *rqstp)
EXPORT_SYMBOL_GPL(svc_max_payload);
/**
- * svc_encode_read_payload - mark a range of bytes as a READ payload
+ * svc_encode_result_payload - mark a range of bytes as a result payload
* @rqstp: svc_rqst to operate on
* @offset: payload's byte offset in rqstp->rq_res
* @length: size of payload, in bytes
@@ -1630,12 +1635,13 @@ EXPORT_SYMBOL_GPL(svc_max_payload);
* Returns zero on success, or a negative errno if a permanent
* error occurred.
*/
-int svc_encode_read_payload(struct svc_rqst *rqstp, unsigned int offset,
- unsigned int length)
+int svc_encode_result_payload(struct svc_rqst *rqstp, unsigned int offset,
+ unsigned int length)
{
- return rqstp->rq_xprt->xpt_ops->xpo_read_payload(rqstp, offset, length);
+ return rqstp->rq_xprt->xpt_ops->xpo_result_payload(rqstp, offset,
+ length);
}
-EXPORT_SYMBOL_GPL(svc_encode_read_payload);
+EXPORT_SYMBOL_GPL(svc_encode_result_payload);
/**
* svc_fill_write_vector - Construct data argument for VFS write call
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 43cf8dbde898..5fb9164aa690 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -813,8 +813,6 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
len = svc_deferred_recv(rqstp);
else
len = xprt->xpt_ops->xpo_recvfrom(rqstp);
- if (len > 0)
- trace_svc_xdr_recvfrom(rqstp, &rqstp->rq_arg);
rqstp->rq_stime = ktime_get();
rqstp->rq_reserved = serv->sv_max_mesg;
atomic_add(rqstp->rq_reserved, &xprt->xpt_reserved);
@@ -868,7 +866,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
if (serv->sv_stats)
serv->sv_stats->netcnt++;
- trace_svc_recv(rqstp, len);
+ trace_svc_xdr_recvfrom(rqstp, &rqstp->rq_arg);
return len;
out_release:
rqstp->rq_res.len = 0;
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index c2752e2b9ce3..b248f2349437 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -181,8 +181,8 @@ static void svc_set_cmsg_data(struct svc_rqst *rqstp, struct cmsghdr *cmh)
}
}
-static int svc_sock_read_payload(struct svc_rqst *rqstp, unsigned int offset,
- unsigned int length)
+static int svc_sock_result_payload(struct svc_rqst *rqstp, unsigned int offset,
+ unsigned int length)
{
return 0;
}
@@ -635,7 +635,7 @@ static const struct svc_xprt_ops svc_udp_ops = {
.xpo_create = svc_udp_create,
.xpo_recvfrom = svc_udp_recvfrom,
.xpo_sendto = svc_udp_sendto,
- .xpo_read_payload = svc_sock_read_payload,
+ .xpo_result_payload = svc_sock_result_payload,
.xpo_release_rqst = svc_udp_release_rqst,
.xpo_detach = svc_sock_detach,
.xpo_free = svc_sock_free,
@@ -1123,7 +1123,7 @@ static const struct svc_xprt_ops svc_tcp_ops = {
.xpo_create = svc_tcp_create,
.xpo_recvfrom = svc_tcp_recvfrom,
.xpo_sendto = svc_tcp_sendto,
- .xpo_read_payload = svc_sock_read_payload,
+ .xpo_result_payload = svc_sock_result_payload,
.xpo_release_rqst = svc_tcp_release_rqst,
.xpo_detach = svc_tcp_sock_detach,
.xpo_free = svc_sock_free,
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 71e03b930b70..757560a3b06b 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -669,7 +669,7 @@ void xdr_init_encode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p,
struct kvec *iov = buf->head;
int scratch_len = buf->buflen - buf->page_len - buf->tail[0].iov_len;
- xdr_set_scratch_buffer(xdr, NULL, 0);
+ xdr_reset_scratch_buffer(xdr);
BUG_ON(scratch_len < 0);
xdr->buf = buf;
xdr->iov = iov;
@@ -713,7 +713,7 @@ inline void xdr_commit_encode(struct xdr_stream *xdr)
page = page_address(*xdr->page_ptr);
memcpy(xdr->scratch.iov_base, page, shift);
memmove(page, page + shift, (void *)xdr->p - page);
- xdr->scratch.iov_len = 0;
+ xdr_reset_scratch_buffer(xdr);
}
EXPORT_SYMBOL_GPL(xdr_commit_encode);
@@ -743,8 +743,7 @@ static __be32 *xdr_get_next_encode_buffer(struct xdr_stream *xdr,
* the "scratch" iov to track any temporarily unused fragment of
* space at the end of the previous buffer:
*/
- xdr->scratch.iov_base = xdr->p;
- xdr->scratch.iov_len = frag1bytes;
+ xdr_set_scratch_buffer(xdr, xdr->p, frag1bytes);
p = page_address(*xdr->page_ptr);
/*
* Note this is where the next encode will start after we've
@@ -1052,8 +1051,7 @@ void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p,
struct rpc_rqst *rqst)
{
xdr->buf = buf;
- xdr->scratch.iov_base = NULL;
- xdr->scratch.iov_len = 0;
+ xdr_reset_scratch_buffer(xdr);
xdr->nwords = XDR_QUADLEN(buf->len);
if (buf->head[0].iov_len != 0)
xdr_set_iov(xdr, buf->head, buf->len);
@@ -1101,24 +1099,6 @@ static __be32 * __xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes)
return p;
}
-/**
- * xdr_set_scratch_buffer - Attach a scratch buffer for decoding data.
- * @xdr: pointer to xdr_stream struct
- * @buf: pointer to an empty buffer
- * @buflen: size of 'buf'
- *
- * The scratch buffer is used when decoding from an array of pages.
- * If an xdr_inline_decode() call spans across page boundaries, then
- * we copy the data into the scratch buffer in order to allow linear
- * access.
- */
-void xdr_set_scratch_buffer(struct xdr_stream *xdr, void *buf, size_t buflen)
-{
- xdr->scratch.iov_base = buf;
- xdr->scratch.iov_len = buflen;
-}
-EXPORT_SYMBOL_GPL(xdr_set_scratch_buffer);
-
static __be32 *xdr_copy_to_scratch(struct xdr_stream *xdr, size_t nbytes)
{
__be32 *p;
@@ -1379,9 +1359,8 @@ EXPORT_SYMBOL_GPL(xdr_buf_from_iov);
*
* Returns -1 if base of length are out of bounds.
*/
-int
-xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf,
- unsigned int base, unsigned int len)
+int xdr_buf_subsegment(const struct xdr_buf *buf, struct xdr_buf *subbuf,
+ unsigned int base, unsigned int len)
{
subbuf->buflen = subbuf->len = len;
if (base < buf->head[0].iov_len) {
@@ -1429,6 +1408,51 @@ xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf,
EXPORT_SYMBOL_GPL(xdr_buf_subsegment);
/**
+ * xdr_stream_subsegment - set @subbuf to a portion of @xdr
+ * @xdr: an xdr_stream set up for decoding
+ * @subbuf: the result buffer
+ * @nbytes: length of @xdr to extract, in bytes
+ *
+ * Sets up @subbuf to represent a portion of @xdr. The portion
+ * starts at the current offset in @xdr, and extends for a length
+ * of @nbytes. If this is successful, @xdr is advanced to the next
+ * position following that portion.
+ *
+ * Return values:
+ * %true: @subbuf has been initialized, and @xdr has been advanced.
+ * %false: a bounds error has occurred
+ */
+bool xdr_stream_subsegment(struct xdr_stream *xdr, struct xdr_buf *subbuf,
+ unsigned int nbytes)
+{
+ unsigned int remaining, offset, len;
+
+ if (xdr_buf_subsegment(xdr->buf, subbuf, xdr_stream_pos(xdr), nbytes))
+ return false;
+
+ if (subbuf->head[0].iov_len)
+ if (!__xdr_inline_decode(xdr, subbuf->head[0].iov_len))
+ return false;
+
+ remaining = subbuf->page_len;
+ offset = subbuf->page_base;
+ while (remaining) {
+ len = min_t(unsigned int, remaining, PAGE_SIZE) - offset;
+
+ if (xdr->p == xdr->end && !xdr_set_next_buffer(xdr))
+ return false;
+ if (!__xdr_inline_decode(xdr, len))
+ return false;
+
+ remaining -= len;
+ offset = 0;
+ }
+
+ return true;
+}
+EXPORT_SYMBOL_GPL(xdr_stream_subsegment);
+
+/**
* xdr_buf_trim - lop at most "len" bytes off the end of "buf"
* @buf: buf to be trimmed
* @len: number of bytes to reduce "buf" by
diff --git a/net/sunrpc/xprtrdma/Makefile b/net/sunrpc/xprtrdma/Makefile
index 8ed0377d7a18..55b21bae866d 100644
--- a/net/sunrpc/xprtrdma/Makefile
+++ b/net/sunrpc/xprtrdma/Makefile
@@ -4,5 +4,5 @@ obj-$(CONFIG_SUNRPC_XPRT_RDMA) += rpcrdma.o
rpcrdma-y := transport.o rpc_rdma.o verbs.o frwr_ops.o \
svc_rdma.o svc_rdma_backchannel.o svc_rdma_transport.o \
svc_rdma_sendto.o svc_rdma_recvfrom.o svc_rdma_rw.o \
- module.o
+ svc_rdma_pcl.o module.o
rpcrdma-$(CONFIG_SUNRPC_BACKCHANNEL) += backchannel.o
diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
index 5e7c4ba9e147..63f8be974df2 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c
@@ -74,11 +74,17 @@ out_unlock:
*/
static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma,
struct rpc_rqst *rqst,
- struct svc_rdma_send_ctxt *ctxt)
+ struct svc_rdma_send_ctxt *sctxt)
{
+ struct svc_rdma_recv_ctxt *rctxt;
int ret;
- ret = svc_rdma_map_reply_msg(rdma, ctxt, NULL, &rqst->rq_snd_buf);
+ rctxt = svc_rdma_recv_ctxt_get(rdma);
+ if (!rctxt)
+ return -EIO;
+
+ ret = svc_rdma_map_reply_msg(rdma, sctxt, rctxt, &rqst->rq_snd_buf);
+ svc_rdma_recv_ctxt_put(rdma, rctxt);
if (ret < 0)
return -EIO;
@@ -86,8 +92,8 @@ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma,
* the rq_buffer before all retransmits are complete.
*/
get_page(virt_to_page(rqst->rq_buffer));
- ctxt->sc_send_wr.opcode = IB_WR_SEND;
- return svc_rdma_send(rdma, ctxt);
+ sctxt->sc_send_wr.opcode = IB_WR_SEND;
+ return svc_rdma_send(rdma, sctxt);
}
/* Server-side transport endpoint wants a whole page for its send
diff --git a/net/sunrpc/xprtrdma/svc_rdma_pcl.c b/net/sunrpc/xprtrdma/svc_rdma_pcl.c
new file mode 100644
index 000000000000..b63cfeaa2923
--- /dev/null
+++ b/net/sunrpc/xprtrdma/svc_rdma_pcl.c
@@ -0,0 +1,306 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2020 Oracle. All rights reserved.
+ */
+
+#include <linux/sunrpc/svc_rdma.h>
+#include <linux/sunrpc/rpc_rdma.h>
+
+#include "xprt_rdma.h"
+#include <trace/events/rpcrdma.h>
+
+/**
+ * pcl_free - Release all memory associated with a parsed chunk list
+ * @pcl: parsed chunk list
+ *
+ */
+void pcl_free(struct svc_rdma_pcl *pcl)
+{
+ while (!list_empty(&pcl->cl_chunks)) {
+ struct svc_rdma_chunk *chunk;
+
+ chunk = pcl_first_chunk(pcl);
+ list_del(&chunk->ch_list);
+ kfree(chunk);
+ }
+}
+
+static struct svc_rdma_chunk *pcl_alloc_chunk(u32 segcount, u32 position)
+{
+ struct svc_rdma_chunk *chunk;
+
+ chunk = kmalloc(struct_size(chunk, ch_segments, segcount), GFP_KERNEL);
+ if (!chunk)
+ return NULL;
+
+ chunk->ch_position = position;
+ chunk->ch_length = 0;
+ chunk->ch_payload_length = 0;
+ chunk->ch_segcount = 0;
+ return chunk;
+}
+
+static struct svc_rdma_chunk *
+pcl_lookup_position(struct svc_rdma_pcl *pcl, u32 position)
+{
+ struct svc_rdma_chunk *pos;
+
+ pcl_for_each_chunk(pos, pcl) {
+ if (pos->ch_position == position)
+ return pos;
+ }
+ return NULL;
+}
+
+static void pcl_insert_position(struct svc_rdma_pcl *pcl,
+ struct svc_rdma_chunk *chunk)
+{
+ struct svc_rdma_chunk *pos;
+
+ pcl_for_each_chunk(pos, pcl) {
+ if (pos->ch_position > chunk->ch_position)
+ break;
+ }
+ __list_add(&chunk->ch_list, pos->ch_list.prev, &pos->ch_list);
+ pcl->cl_count++;
+}
+
+static void pcl_set_read_segment(const struct svc_rdma_recv_ctxt *rctxt,
+ struct svc_rdma_chunk *chunk,
+ u32 handle, u32 length, u64 offset)
+{
+ struct svc_rdma_segment *segment;
+
+ segment = &chunk->ch_segments[chunk->ch_segcount];
+ segment->rs_handle = handle;
+ segment->rs_length = length;
+ segment->rs_offset = offset;
+
+ trace_svcrdma_decode_rseg(&rctxt->rc_cid, chunk, segment);
+
+ chunk->ch_length += length;
+ chunk->ch_segcount++;
+}
+
+/**
+ * pcl_alloc_call - Construct a parsed chunk list for the Call body
+ * @rctxt: Ingress receive context
+ * @p: Start of an un-decoded Read list
+ *
+ * Assumptions:
+ * - The incoming Read list has already been sanity checked.
+ * - cl_count is already set to the number of segments in
+ * the un-decoded list.
+ * - The list might not be in order by position.
+ *
+ * Return values:
+ * %true: Parsed chunk list was successfully constructed, and
+ * cl_count is updated to be the number of chunks (ie.
+ * unique positions) in the Read list.
+ * %false: Memory allocation failed.
+ */
+bool pcl_alloc_call(struct svc_rdma_recv_ctxt *rctxt, __be32 *p)
+{
+ struct svc_rdma_pcl *pcl = &rctxt->rc_call_pcl;
+ unsigned int i, segcount = pcl->cl_count;
+
+ pcl->cl_count = 0;
+ for (i = 0; i < segcount; i++) {
+ struct svc_rdma_chunk *chunk;
+ u32 position, handle, length;
+ u64 offset;
+
+ p++; /* skip the list discriminator */
+ p = xdr_decode_read_segment(p, &position, &handle,
+ &length, &offset);
+ if (position != 0)
+ continue;
+
+ if (pcl_is_empty(pcl)) {
+ chunk = pcl_alloc_chunk(segcount, position);
+ if (!chunk)
+ return false;
+ pcl_insert_position(pcl, chunk);
+ } else {
+ chunk = list_first_entry(&pcl->cl_chunks,
+ struct svc_rdma_chunk,
+ ch_list);
+ }
+
+ pcl_set_read_segment(rctxt, chunk, handle, length, offset);
+ }
+
+ return true;
+}
+
+/**
+ * pcl_alloc_read - Construct a parsed chunk list for normal Read chunks
+ * @rctxt: Ingress receive context
+ * @p: Start of an un-decoded Read list
+ *
+ * Assumptions:
+ * - The incoming Read list has already been sanity checked.
+ * - cl_count is already set to the number of segments in
+ * the un-decoded list.
+ * - The list might not be in order by position.
+ *
+ * Return values:
+ * %true: Parsed chunk list was successfully constructed, and
+ * cl_count is updated to be the number of chunks (ie.
+ * unique position values) in the Read list.
+ * %false: Memory allocation failed.
+ *
+ * TODO:
+ * - Check for chunk range overlaps
+ */
+bool pcl_alloc_read(struct svc_rdma_recv_ctxt *rctxt, __be32 *p)
+{
+ struct svc_rdma_pcl *pcl = &rctxt->rc_read_pcl;
+ unsigned int i, segcount = pcl->cl_count;
+
+ pcl->cl_count = 0;
+ for (i = 0; i < segcount; i++) {
+ struct svc_rdma_chunk *chunk;
+ u32 position, handle, length;
+ u64 offset;
+
+ p++; /* skip the list discriminator */
+ p = xdr_decode_read_segment(p, &position, &handle,
+ &length, &offset);
+ if (position == 0)
+ continue;
+
+ chunk = pcl_lookup_position(pcl, position);
+ if (!chunk) {
+ chunk = pcl_alloc_chunk(segcount, position);
+ if (!chunk)
+ return false;
+ pcl_insert_position(pcl, chunk);
+ }
+
+ pcl_set_read_segment(rctxt, chunk, handle, length, offset);
+ }
+
+ return true;
+}
+
+/**
+ * pcl_alloc_write - Construct a parsed chunk list from a Write list
+ * @rctxt: Ingress receive context
+ * @pcl: Parsed chunk list to populate
+ * @p: Start of an un-decoded Write list
+ *
+ * Assumptions:
+ * - The incoming Write list has already been sanity checked, and
+ * - cl_count is set to the number of chunks in the un-decoded list.
+ *
+ * Return values:
+ * %true: Parsed chunk list was successfully constructed.
+ * %false: Memory allocation failed.
+ */
+bool pcl_alloc_write(struct svc_rdma_recv_ctxt *rctxt,
+ struct svc_rdma_pcl *pcl, __be32 *p)
+{
+ struct svc_rdma_segment *segment;
+ struct svc_rdma_chunk *chunk;
+ unsigned int i, j;
+ u32 segcount;
+
+ for (i = 0; i < pcl->cl_count; i++) {
+ p++; /* skip the list discriminator */
+ segcount = be32_to_cpup(p++);
+
+ chunk = pcl_alloc_chunk(segcount, 0);
+ if (!chunk)
+ return false;
+ list_add_tail(&chunk->ch_list, &pcl->cl_chunks);
+
+ for (j = 0; j < segcount; j++) {
+ segment = &chunk->ch_segments[j];
+ p = xdr_decode_rdma_segment(p, &segment->rs_handle,
+ &segment->rs_length,
+ &segment->rs_offset);
+ trace_svcrdma_decode_wseg(&rctxt->rc_cid, chunk, j);
+
+ chunk->ch_length += segment->rs_length;
+ chunk->ch_segcount++;
+ }
+ }
+ return true;
+}
+
+static int pcl_process_region(const struct xdr_buf *xdr,
+ unsigned int offset, unsigned int length,
+ int (*actor)(const struct xdr_buf *, void *),
+ void *data)
+{
+ struct xdr_buf subbuf;
+
+ if (!length)
+ return 0;
+ if (xdr_buf_subsegment(xdr, &subbuf, offset, length))
+ return -EMSGSIZE;
+ return actor(&subbuf, data);
+}
+
+/**
+ * pcl_process_nonpayloads - Process non-payload regions inside @xdr
+ * @pcl: Chunk list to process
+ * @xdr: xdr_buf to process
+ * @actor: Function to invoke on each non-payload region
+ * @data: Arguments for @actor
+ *
+ * This mechanism must ignore not only result payloads that were already
+ * sent via RDMA Write, but also XDR padding for those payloads that
+ * the upper layer has added.
+ *
+ * Assumptions:
+ * The xdr->len and ch_position fields are aligned to 4-byte multiples.
+ *
+ * Returns:
+ * On success, zero,
+ * %-EMSGSIZE on XDR buffer overflow, or
+ * The return value of @actor
+ */
+int pcl_process_nonpayloads(const struct svc_rdma_pcl *pcl,
+ const struct xdr_buf *xdr,
+ int (*actor)(const struct xdr_buf *, void *),
+ void *data)
+{
+ struct svc_rdma_chunk *chunk, *next;
+ unsigned int start;
+ int ret;
+
+ chunk = pcl_first_chunk(pcl);
+
+ /* No result payloads were generated */
+ if (!chunk || !chunk->ch_payload_length)
+ return actor(xdr, data);
+
+ /* Process the region before the first result payload */
+ ret = pcl_process_region(xdr, 0, chunk->ch_position, actor, data);
+ if (ret < 0)
+ return ret;
+
+ /* Process the regions between each middle result payload */
+ while ((next = pcl_next_chunk(pcl, chunk))) {
+ if (!next->ch_payload_length)
+ break;
+
+ start = pcl_chunk_end_offset(chunk);
+ ret = pcl_process_region(xdr, start, next->ch_position - start,
+ actor, data);
+ if (ret < 0)
+ return ret;
+
+ chunk = next;
+ }
+
+ /* Process the region after the last result payload */
+ start = pcl_chunk_end_offset(chunk);
+ ret = pcl_process_region(xdr, start, xdr->len - start, actor, data);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index c6ea2903c21a..cbdb71247755 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -93,6 +93,7 @@
* (see rdma_read_complete() below).
*/
+#include <linux/slab.h>
#include <linux/spinlock.h>
#include <asm/unaligned.h>
#include <rdma/ib_verbs.h>
@@ -143,6 +144,10 @@ svc_rdma_recv_ctxt_alloc(struct svcxprt_rdma *rdma)
goto fail2;
svc_rdma_recv_cid_init(rdma, &ctxt->rc_cid);
+ pcl_init(&ctxt->rc_call_pcl);
+ pcl_init(&ctxt->rc_read_pcl);
+ pcl_init(&ctxt->rc_write_pcl);
+ pcl_init(&ctxt->rc_reply_pcl);
ctxt->rc_recv_wr.next = NULL;
ctxt->rc_recv_wr.wr_cqe = &ctxt->rc_cqe;
@@ -189,8 +194,13 @@ void svc_rdma_recv_ctxts_destroy(struct svcxprt_rdma *rdma)
}
}
-static struct svc_rdma_recv_ctxt *
-svc_rdma_recv_ctxt_get(struct svcxprt_rdma *rdma)
+/**
+ * svc_rdma_recv_ctxt_get - Allocate a recv_ctxt
+ * @rdma: controlling svcxprt_rdma
+ *
+ * Returns a recv_ctxt or (rarely) NULL if none are available.
+ */
+struct svc_rdma_recv_ctxt *svc_rdma_recv_ctxt_get(struct svcxprt_rdma *rdma)
{
struct svc_rdma_recv_ctxt *ctxt;
struct llist_node *node;
@@ -202,7 +212,6 @@ svc_rdma_recv_ctxt_get(struct svcxprt_rdma *rdma)
out:
ctxt->rc_page_count = 0;
- ctxt->rc_read_payload_length = 0;
return ctxt;
out_empty:
@@ -226,6 +235,11 @@ void svc_rdma_recv_ctxt_put(struct svcxprt_rdma *rdma,
for (i = 0; i < ctxt->rc_page_count; i++)
put_page(ctxt->rc_pages[i]);
+ pcl_free(&ctxt->rc_call_pcl);
+ pcl_free(&ctxt->rc_read_pcl);
+ pcl_free(&ctxt->rc_write_pcl);
+ pcl_free(&ctxt->rc_reply_pcl);
+
if (!ctxt->rc_temp)
llist_add(&ctxt->rc_node, &rdma->sc_recv_ctxts);
else
@@ -385,100 +399,123 @@ static void svc_rdma_build_arg_xdr(struct svc_rqst *rqstp,
arg->len = ctxt->rc_byte_len;
}
-/* This accommodates the largest possible Write chunk.
- */
-#define MAX_BYTES_WRITE_CHUNK ((u32)(RPCSVC_MAXPAGES << PAGE_SHIFT))
-
-/* This accommodates the largest possible Position-Zero
- * Read chunk or Reply chunk.
- */
-#define MAX_BYTES_SPECIAL_CHUNK ((u32)((RPCSVC_MAXPAGES + 2) << PAGE_SHIFT))
-
-/* Sanity check the Read list.
- *
- * Implementation limits:
- * - This implementation supports only one Read chunk.
+/**
+ * xdr_count_read_segments - Count number of Read segments in Read list
+ * @rctxt: Ingress receive context
+ * @p: Start of an un-decoded Read list
*
- * Sanity checks:
- * - Read list does not overflow Receive buffer.
- * - Segment size limited by largest NFS data payload.
+ * Before allocating anything, ensure the ingress Read list is safe
+ * to use.
*
- * The segment count is limited to how many segments can
- * fit in the transport header without overflowing the
- * buffer. That's about 40 Read segments for a 1KB inline
- * threshold.
+ * The segment count is limited to how many segments can fit in the
+ * transport header without overflowing the buffer. That's about 40
+ * Read segments for a 1KB inline threshold.
*
* Return values:
- * %true: Read list is valid. @rctxt's xdr_stream is updated
- * to point to the first byte past the Read list.
- * %false: Read list is corrupt. @rctxt's xdr_stream is left
- * in an unknown state.
+ * %true: Read list is valid. @rctxt's xdr_stream is updated to point
+ * to the first byte past the Read list. rc_read_pcl and
+ * rc_call_pcl cl_count fields are set to the number of
+ * Read segments in the list.
+ * %false: Read list is corrupt. @rctxt's xdr_stream is left in an
+ * unknown state.
*/
-static bool xdr_check_read_list(struct svc_rdma_recv_ctxt *rctxt)
+static bool xdr_count_read_segments(struct svc_rdma_recv_ctxt *rctxt, __be32 *p)
{
- u32 position, len;
- bool first;
- __be32 *p;
-
- p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p));
- if (!p)
- return false;
-
- len = 0;
- first = true;
+ rctxt->rc_call_pcl.cl_count = 0;
+ rctxt->rc_read_pcl.cl_count = 0;
while (xdr_item_is_present(p)) {
+ u32 position, handle, length;
+ u64 offset;
+
p = xdr_inline_decode(&rctxt->rc_stream,
rpcrdma_readseg_maxsz * sizeof(*p));
if (!p)
return false;
- if (first) {
- position = be32_to_cpup(p);
- first = false;
- } else if (be32_to_cpup(p) != position) {
- return false;
+ xdr_decode_read_segment(p, &position, &handle,
+ &length, &offset);
+ if (position) {
+ if (position & 3)
+ return false;
+ ++rctxt->rc_read_pcl.cl_count;
+ } else {
+ ++rctxt->rc_call_pcl.cl_count;
}
- p += 2;
- len += be32_to_cpup(p);
p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p));
if (!p)
return false;
}
- return len <= MAX_BYTES_SPECIAL_CHUNK;
+ return true;
}
-/* The segment count is limited to how many segments can
- * fit in the transport header without overflowing the
- * buffer. That's about 60 Write segments for a 1KB inline
- * threshold.
+/* Sanity check the Read list.
+ *
+ * Sanity checks:
+ * - Read list does not overflow Receive buffer.
+ * - Chunk size limited by largest NFS data payload.
+ *
+ * Return values:
+ * %true: Read list is valid. @rctxt's xdr_stream is updated
+ * to point to the first byte past the Read list.
+ * %false: Read list is corrupt. @rctxt's xdr_stream is left
+ * in an unknown state.
*/
-static bool xdr_check_write_chunk(struct svc_rdma_recv_ctxt *rctxt, u32 maxlen)
+static bool xdr_check_read_list(struct svc_rdma_recv_ctxt *rctxt)
{
- u32 i, segcount, total;
__be32 *p;
p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p));
if (!p)
return false;
- segcount = be32_to_cpup(p);
+ if (!xdr_count_read_segments(rctxt, p))
+ return false;
+ if (!pcl_alloc_call(rctxt, p))
+ return false;
+ return pcl_alloc_read(rctxt, p);
+}
- total = 0;
- for (i = 0; i < segcount; i++) {
- u32 handle, length;
- u64 offset;
+static bool xdr_check_write_chunk(struct svc_rdma_recv_ctxt *rctxt)
+{
+ u32 segcount;
+ __be32 *p;
- p = xdr_inline_decode(&rctxt->rc_stream,
- rpcrdma_segment_maxsz * sizeof(*p));
- if (!p)
- return false;
+ if (xdr_stream_decode_u32(&rctxt->rc_stream, &segcount))
+ return false;
- xdr_decode_rdma_segment(p, &handle, &length, &offset);
- trace_svcrdma_decode_wseg(handle, length, offset);
+ /* A bogus segcount causes this buffer overflow check to fail. */
+ p = xdr_inline_decode(&rctxt->rc_stream,
+ segcount * rpcrdma_segment_maxsz * sizeof(*p));
+ return p != NULL;
+}
- total += length;
+/**
+ * xdr_count_write_chunks - Count number of Write chunks in Write list
+ * @rctxt: Received header and decoding state
+ * @p: start of an un-decoded Write list
+ *
+ * Before allocating anything, ensure the ingress Write list is
+ * safe to use.
+ *
+ * Return values:
+ * %true: Write list is valid. @rctxt's xdr_stream is updated
+ * to point to the first byte past the Write list, and
+ * the number of Write chunks is in rc_write_pcl.cl_count.
+ * %false: Write list is corrupt. @rctxt's xdr_stream is left
+ * in an indeterminate state.
+ */
+static bool xdr_count_write_chunks(struct svc_rdma_recv_ctxt *rctxt, __be32 *p)
+{
+ rctxt->rc_write_pcl.cl_count = 0;
+ while (xdr_item_is_present(p)) {
+ if (!xdr_check_write_chunk(rctxt))
+ return false;
+ ++rctxt->rc_write_pcl.cl_count;
+ p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p));
+ if (!p)
+ return false;
}
- return total <= maxlen;
+ return true;
}
/* Sanity check the Write list.
@@ -498,24 +535,18 @@ static bool xdr_check_write_chunk(struct svc_rdma_recv_ctxt *rctxt, u32 maxlen)
*/
static bool xdr_check_write_list(struct svc_rdma_recv_ctxt *rctxt)
{
- u32 chcount = 0;
__be32 *p;
p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p));
if (!p)
return false;
- rctxt->rc_write_list = p;
- while (xdr_item_is_present(p)) {
- if (!xdr_check_write_chunk(rctxt, MAX_BYTES_WRITE_CHUNK))
- return false;
- ++chcount;
- p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p));
- if (!p)
- return false;
- }
- if (!chcount)
- rctxt->rc_write_list = NULL;
- return chcount < 2;
+ if (!xdr_count_write_chunks(rctxt, p))
+ return false;
+ if (!pcl_alloc_write(rctxt, &rctxt->rc_write_pcl, p))
+ return false;
+
+ rctxt->rc_cur_result_payload = pcl_first_chunk(&rctxt->rc_write_pcl);
+ return true;
}
/* Sanity check the Reply chunk.
@@ -537,13 +568,14 @@ static bool xdr_check_reply_chunk(struct svc_rdma_recv_ctxt *rctxt)
p = xdr_inline_decode(&rctxt->rc_stream, sizeof(*p));
if (!p)
return false;
- rctxt->rc_reply_chunk = NULL;
- if (xdr_item_is_present(p)) {
- if (!xdr_check_write_chunk(rctxt, MAX_BYTES_SPECIAL_CHUNK))
- return false;
- rctxt->rc_reply_chunk = p;
- }
- return true;
+
+ if (!xdr_item_is_present(p))
+ return true;
+ if (!xdr_check_write_chunk(rctxt))
+ return false;
+
+ rctxt->rc_reply_pcl.cl_count = 1;
+ return pcl_alloc_write(rctxt, &rctxt->rc_reply_pcl, p);
}
/* RPC-over-RDMA Version One private extension: Remote Invalidation.
@@ -552,60 +584,53 @@ static bool xdr_check_reply_chunk(struct svc_rdma_recv_ctxt *rctxt)
*
* If there is exactly one distinct R_key in the received transport
* header, set rc_inv_rkey to that R_key. Otherwise, set it to zero.
- *
- * Perform this operation while the received transport header is
- * still in the CPU cache.
*/
static void svc_rdma_get_inv_rkey(struct svcxprt_rdma *rdma,
struct svc_rdma_recv_ctxt *ctxt)
{
- __be32 inv_rkey, *p;
- u32 i, segcount;
+ struct svc_rdma_segment *segment;
+ struct svc_rdma_chunk *chunk;
+ u32 inv_rkey;
ctxt->rc_inv_rkey = 0;
if (!rdma->sc_snd_w_inv)
return;
- inv_rkey = xdr_zero;
- p = ctxt->rc_recv_buf;
- p += rpcrdma_fixed_maxsz;
-
- /* Read list */
- while (xdr_item_is_present(p++)) {
- p++; /* position */
- if (inv_rkey == xdr_zero)
- inv_rkey = *p;
- else if (inv_rkey != *p)
- return;
- p += 4;
+ inv_rkey = 0;
+ pcl_for_each_chunk(chunk, &ctxt->rc_call_pcl) {
+ pcl_for_each_segment(segment, chunk) {
+ if (inv_rkey == 0)
+ inv_rkey = segment->rs_handle;
+ else if (inv_rkey != segment->rs_handle)
+ return;
+ }
}
-
- /* Write list */
- while (xdr_item_is_present(p++)) {
- segcount = be32_to_cpup(p++);
- for (i = 0; i < segcount; i++) {
- if (inv_rkey == xdr_zero)
- inv_rkey = *p;
- else if (inv_rkey != *p)
+ pcl_for_each_chunk(chunk, &ctxt->rc_read_pcl) {
+ pcl_for_each_segment(segment, chunk) {
+ if (inv_rkey == 0)
+ inv_rkey = segment->rs_handle;
+ else if (inv_rkey != segment->rs_handle)
return;
- p += 4;
}
}
-
- /* Reply chunk */
- if (xdr_item_is_present(p++)) {
- segcount = be32_to_cpup(p++);
- for (i = 0; i < segcount; i++) {
- if (inv_rkey == xdr_zero)
- inv_rkey = *p;
- else if (inv_rkey != *p)
+ pcl_for_each_chunk(chunk, &ctxt->rc_write_pcl) {
+ pcl_for_each_segment(segment, chunk) {
+ if (inv_rkey == 0)
+ inv_rkey = segment->rs_handle;
+ else if (inv_rkey != segment->rs_handle)
return;
- p += 4;
}
}
-
- ctxt->rc_inv_rkey = be32_to_cpu(inv_rkey);
+ pcl_for_each_chunk(chunk, &ctxt->rc_reply_pcl) {
+ pcl_for_each_segment(segment, chunk) {
+ if (inv_rkey == 0)
+ inv_rkey = segment->rs_handle;
+ else if (inv_rkey != segment->rs_handle)
+ return;
+ }
+ }
+ ctxt->rc_inv_rkey = inv_rkey;
}
/**
@@ -641,7 +666,8 @@ static int svc_rdma_xdr_decode_req(struct xdr_buf *rq_arg,
if (*p != rpcrdma_version)
goto out_version;
p += 2;
- switch (*p) {
+ rctxt->rc_msgtype = *p;
+ switch (rctxt->rc_msgtype) {
case rdma_msg:
break;
case rdma_nomsg:
@@ -735,30 +761,28 @@ static void svc_rdma_send_error(struct svcxprt_rdma *rdma,
* the RPC/RDMA header small and fixed in size, so it is
* straightforward to check the RPC header's direction field.
*/
-static bool svc_rdma_is_backchannel_reply(struct svc_xprt *xprt,
- __be32 *rdma_resp)
+static bool svc_rdma_is_reverse_direction_reply(struct svc_xprt *xprt,
+ struct svc_rdma_recv_ctxt *rctxt)
{
- __be32 *p;
+ __be32 *p = rctxt->rc_recv_buf;
if (!xprt->xpt_bc_xprt)
return false;
- p = rdma_resp + 3;
- if (*p++ != rdma_msg)
+ if (rctxt->rc_msgtype != rdma_msg)
return false;
- if (*p++ != xdr_zero)
+ if (!pcl_is_empty(&rctxt->rc_call_pcl))
return false;
- if (*p++ != xdr_zero)
+ if (!pcl_is_empty(&rctxt->rc_read_pcl))
return false;
- if (*p++ != xdr_zero)
+ if (!pcl_is_empty(&rctxt->rc_write_pcl))
return false;
-
- /* XID sanity */
- if (*p++ != *rdma_resp)
+ if (!pcl_is_empty(&rctxt->rc_reply_pcl))
return false;
- /* call direction */
- if (*p == cpu_to_be32(RPC_CALL))
+
+ /* RPC call direction */
+ if (*(p + 8) == cpu_to_be32(RPC_CALL))
return false;
return true;
@@ -800,7 +824,6 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
struct svcxprt_rdma *rdma_xprt =
container_of(xprt, struct svcxprt_rdma, sc_xprt);
struct svc_rdma_recv_ctxt *ctxt;
- __be32 *p;
int ret;
rqstp->rq_xprt_ctxt = NULL;
@@ -833,7 +856,6 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
rqstp->rq_respages = rqstp->rq_pages;
rqstp->rq_next_page = rqstp->rq_respages;
- p = (__be32 *)rqstp->rq_arg.head[0].iov_base;
ret = svc_rdma_xdr_decode_req(&rqstp->rq_arg, ctxt);
if (ret < 0)
goto out_err;
@@ -841,14 +863,14 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
goto out_drop;
rqstp->rq_xprt_hlen = ret;
- if (svc_rdma_is_backchannel_reply(xprt, p))
+ if (svc_rdma_is_reverse_direction_reply(xprt, ctxt))
goto out_backchannel;
svc_rdma_get_inv_rkey(rdma_xprt, ctxt);
- p += rpcrdma_fixed_maxsz;
- if (*p != xdr_zero)
- goto out_readchunk;
+ if (!pcl_is_empty(&ctxt->rc_read_pcl) ||
+ !pcl_is_empty(&ctxt->rc_call_pcl))
+ goto out_readlist;
complete:
rqstp->rq_xprt_ctxt = ctxt;
@@ -856,10 +878,10 @@ complete:
svc_xprt_copy_addrs(rqstp, xprt);
return rqstp->rq_arg.len;
-out_readchunk:
- ret = svc_rdma_recv_read_chunk(rdma_xprt, rqstp, ctxt, p);
+out_readlist:
+ ret = svc_rdma_process_read_list(rdma_xprt, rqstp, ctxt);
if (ret < 0)
- goto out_postfail;
+ goto out_readfail;
return 0;
out_err:
@@ -867,7 +889,7 @@ out_err:
svc_rdma_recv_ctxt_put(rdma_xprt, ctxt);
return 0;
-out_postfail:
+out_readfail:
if (ret == -EINVAL)
svc_rdma_send_error(rdma_xprt, ctxt, ret);
svc_rdma_recv_ctxt_put(rdma_xprt, ctxt);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c
index 80a0c0e87590..0b63e1321d74 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_rw.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c
@@ -190,14 +190,14 @@ static void svc_rdma_cc_release(struct svc_rdma_chunk_ctxt *cc,
* - Stores arguments for the SGL constructor functions
*/
struct svc_rdma_write_info {
+ const struct svc_rdma_chunk *wi_chunk;
+
/* write state of this chunk */
unsigned int wi_seg_off;
unsigned int wi_seg_no;
- unsigned int wi_nsegs;
- __be32 *wi_segs;
/* SGL constructor arguments */
- struct xdr_buf *wi_xdr;
+ const struct xdr_buf *wi_xdr;
unsigned char *wi_base;
unsigned int wi_next_off;
@@ -205,7 +205,8 @@ struct svc_rdma_write_info {
};
static struct svc_rdma_write_info *
-svc_rdma_write_info_alloc(struct svcxprt_rdma *rdma, __be32 *chunk)
+svc_rdma_write_info_alloc(struct svcxprt_rdma *rdma,
+ const struct svc_rdma_chunk *chunk)
{
struct svc_rdma_write_info *info;
@@ -213,10 +214,9 @@ svc_rdma_write_info_alloc(struct svcxprt_rdma *rdma, __be32 *chunk)
if (!info)
return info;
+ info->wi_chunk = chunk;
info->wi_seg_off = 0;
info->wi_seg_no = 0;
- info->wi_nsegs = be32_to_cpup(++chunk);
- info->wi_segs = ++chunk;
svc_rdma_cc_init(rdma, &info->wi_cc);
info->wi_cc.cc_cqe.done = svc_rdma_write_done;
return info;
@@ -258,11 +258,11 @@ static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
/* State for pulling a Read chunk.
*/
struct svc_rdma_read_info {
+ struct svc_rqst *ri_rqst;
struct svc_rdma_recv_ctxt *ri_readctxt;
- unsigned int ri_position;
unsigned int ri_pageno;
unsigned int ri_pageoff;
- unsigned int ri_chunklen;
+ unsigned int ri_totalbytes;
struct svc_rdma_chunk_ctxt ri_cc;
};
@@ -358,7 +358,6 @@ static int svc_rdma_post_chunk_ctxt(struct svc_rdma_chunk_ctxt *cc)
do {
if (atomic_sub_return(cc->cc_sqecount,
&rdma->sc_sq_avail) > 0) {
- trace_svcrdma_post_chunk(&cc->cc_cid, cc->cc_sqecount);
ret = ib_post_send(rdma->sc_qp, first_wr, &bad_wr);
if (ret)
break;
@@ -405,7 +404,7 @@ static void svc_rdma_pagelist_to_sg(struct svc_rdma_write_info *info,
struct svc_rdma_rw_ctxt *ctxt)
{
unsigned int sge_no, sge_bytes, page_off, page_no;
- struct xdr_buf *xdr = info->wi_xdr;
+ const struct xdr_buf *xdr = info->wi_xdr;
struct scatterlist *sg;
struct page **page;
@@ -443,40 +442,36 @@ svc_rdma_build_writes(struct svc_rdma_write_info *info,
{
struct svc_rdma_chunk_ctxt *cc = &info->wi_cc;
struct svcxprt_rdma *rdma = cc->cc_rdma;
+ const struct svc_rdma_segment *seg;
struct svc_rdma_rw_ctxt *ctxt;
- __be32 *seg;
int ret;
- seg = info->wi_segs + info->wi_seg_no * rpcrdma_segment_maxsz;
do {
unsigned int write_len;
- u32 handle, length;
u64 offset;
- if (info->wi_seg_no >= info->wi_nsegs)
+ seg = &info->wi_chunk->ch_segments[info->wi_seg_no];
+ if (!seg)
goto out_overflow;
- xdr_decode_rdma_segment(seg, &handle, &length, &offset);
- offset += info->wi_seg_off;
-
- write_len = min(remaining, length - info->wi_seg_off);
+ write_len = min(remaining, seg->rs_length - info->wi_seg_off);
+ if (!write_len)
+ goto out_overflow;
ctxt = svc_rdma_get_rw_ctxt(rdma,
(write_len >> PAGE_SHIFT) + 2);
if (!ctxt)
return -ENOMEM;
constructor(info, write_len, ctxt);
- ret = svc_rdma_rw_ctx_init(rdma, ctxt, offset, handle,
+ offset = seg->rs_offset + info->wi_seg_off;
+ ret = svc_rdma_rw_ctx_init(rdma, ctxt, offset, seg->rs_handle,
DMA_TO_DEVICE);
if (ret < 0)
return -EIO;
- trace_svcrdma_send_wseg(handle, write_len, offset);
-
list_add(&ctxt->rw_list, &cc->cc_rwctxts);
cc->cc_sqecount += ret;
- if (write_len == length - info->wi_seg_off) {
- seg += 4;
+ if (write_len == seg->rs_length - info->wi_seg_off) {
info->wi_seg_no++;
info->wi_seg_off = 0;
} else {
@@ -489,31 +484,46 @@ svc_rdma_build_writes(struct svc_rdma_write_info *info,
out_overflow:
trace_svcrdma_small_wrch_err(rdma, remaining, info->wi_seg_no,
- info->wi_nsegs);
+ info->wi_chunk->ch_segcount);
return -E2BIG;
}
-/* Send one of an xdr_buf's kvecs by itself. To send a Reply
- * chunk, the whole RPC Reply is written back to the client.
- * This function writes either the head or tail of the xdr_buf
- * containing the Reply.
+/**
+ * svc_rdma_iov_write - Construct RDMA Writes from an iov
+ * @info: pointer to write arguments
+ * @iov: kvec to write
+ *
+ * Returns:
+ * On succes, returns zero
+ * %-E2BIG if the client-provided Write chunk is too small
+ * %-ENOMEM if a resource has been exhausted
+ * %-EIO if an rdma-rw error occurred
*/
-static int svc_rdma_send_xdr_kvec(struct svc_rdma_write_info *info,
- struct kvec *vec)
+static int svc_rdma_iov_write(struct svc_rdma_write_info *info,
+ const struct kvec *iov)
{
- info->wi_base = vec->iov_base;
+ info->wi_base = iov->iov_base;
return svc_rdma_build_writes(info, svc_rdma_vec_to_sg,
- vec->iov_len);
+ iov->iov_len);
}
-/* Send an xdr_buf's page list by itself. A Write chunk is just
- * the page list. A Reply chunk is @xdr's head, page list, and
- * tail. This function is shared between the two types of chunk.
+/**
+ * svc_rdma_pages_write - Construct RDMA Writes from pages
+ * @info: pointer to write arguments
+ * @xdr: xdr_buf with pages to write
+ * @offset: offset into the content of @xdr
+ * @length: number of bytes to write
+ *
+ * Returns:
+ * On succes, returns zero
+ * %-E2BIG if the client-provided Write chunk is too small
+ * %-ENOMEM if a resource has been exhausted
+ * %-EIO if an rdma-rw error occurred
*/
-static int svc_rdma_send_xdr_pagelist(struct svc_rdma_write_info *info,
- struct xdr_buf *xdr,
- unsigned int offset,
- unsigned long length)
+static int svc_rdma_pages_write(struct svc_rdma_write_info *info,
+ const struct xdr_buf *xdr,
+ unsigned int offset,
+ unsigned long length)
{
info->wi_xdr = xdr;
info->wi_next_off = offset - xdr->head[0].iov_len;
@@ -522,12 +532,48 @@ static int svc_rdma_send_xdr_pagelist(struct svc_rdma_write_info *info,
}
/**
+ * svc_rdma_xb_write - Construct RDMA Writes to write an xdr_buf
+ * @xdr: xdr_buf to write
+ * @data: pointer to write arguments
+ *
+ * Returns:
+ * On succes, returns zero
+ * %-E2BIG if the client-provided Write chunk is too small
+ * %-ENOMEM if a resource has been exhausted
+ * %-EIO if an rdma-rw error occurred
+ */
+static int svc_rdma_xb_write(const struct xdr_buf *xdr, void *data)
+{
+ struct svc_rdma_write_info *info = data;
+ int ret;
+
+ if (xdr->head[0].iov_len) {
+ ret = svc_rdma_iov_write(info, &xdr->head[0]);
+ if (ret < 0)
+ return ret;
+ }
+
+ if (xdr->page_len) {
+ ret = svc_rdma_pages_write(info, xdr, xdr->head[0].iov_len,
+ xdr->page_len);
+ if (ret < 0)
+ return ret;
+ }
+
+ if (xdr->tail[0].iov_len) {
+ ret = svc_rdma_iov_write(info, &xdr->tail[0]);
+ if (ret < 0)
+ return ret;
+ }
+
+ return xdr->len;
+}
+
+/**
* svc_rdma_send_write_chunk - Write all segments in a Write chunk
* @rdma: controlling RDMA transport
- * @wr_ch: Write chunk provided by client
+ * @chunk: Write chunk provided by the client
* @xdr: xdr_buf containing the data payload
- * @offset: payload's byte offset in @xdr
- * @length: size of payload, in bytes
*
* Returns a non-negative number of bytes the chunk consumed, or
* %-E2BIG if the payload was larger than the Write chunk,
@@ -536,30 +582,28 @@ static int svc_rdma_send_xdr_pagelist(struct svc_rdma_write_info *info,
* %-ENOTCONN if posting failed (connection is lost),
* %-EIO if rdma_rw initialization failed (DMA mapping, etc).
*/
-int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma, __be32 *wr_ch,
- struct xdr_buf *xdr,
- unsigned int offset, unsigned long length)
+int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma,
+ const struct svc_rdma_chunk *chunk,
+ const struct xdr_buf *xdr)
{
struct svc_rdma_write_info *info;
+ struct svc_rdma_chunk_ctxt *cc;
int ret;
- if (!length)
- return 0;
-
- info = svc_rdma_write_info_alloc(rdma, wr_ch);
+ info = svc_rdma_write_info_alloc(rdma, chunk);
if (!info)
return -ENOMEM;
+ cc = &info->wi_cc;
- ret = svc_rdma_send_xdr_pagelist(info, xdr, offset, length);
- if (ret < 0)
+ ret = svc_rdma_xb_write(xdr, info);
+ if (ret != xdr->len)
goto out_err;
- ret = svc_rdma_post_chunk_ctxt(&info->wi_cc);
+ trace_svcrdma_post_write_chunk(&cc->cc_cid, cc->cc_sqecount);
+ ret = svc_rdma_post_chunk_ctxt(cc);
if (ret < 0)
goto out_err;
-
- trace_svcrdma_send_write_chunk(xdr->page_len);
- return length;
+ return xdr->len;
out_err:
svc_rdma_write_info_free(info);
@@ -581,62 +625,62 @@ out_err:
*/
int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma,
const struct svc_rdma_recv_ctxt *rctxt,
- struct xdr_buf *xdr)
+ const struct xdr_buf *xdr)
{
struct svc_rdma_write_info *info;
- int consumed, ret;
+ struct svc_rdma_chunk_ctxt *cc;
+ struct svc_rdma_chunk *chunk;
+ int ret;
- info = svc_rdma_write_info_alloc(rdma, rctxt->rc_reply_chunk);
+ if (pcl_is_empty(&rctxt->rc_reply_pcl))
+ return 0;
+
+ chunk = pcl_first_chunk(&rctxt->rc_reply_pcl);
+ info = svc_rdma_write_info_alloc(rdma, chunk);
if (!info)
return -ENOMEM;
+ cc = &info->wi_cc;
- ret = svc_rdma_send_xdr_kvec(info, &xdr->head[0]);
+ ret = pcl_process_nonpayloads(&rctxt->rc_write_pcl, xdr,
+ svc_rdma_xb_write, info);
if (ret < 0)
goto out_err;
- consumed = xdr->head[0].iov_len;
-
- /* Send the page list in the Reply chunk only if the
- * client did not provide Write chunks.
- */
- if (!rctxt->rc_write_list && xdr->page_len) {
- ret = svc_rdma_send_xdr_pagelist(info, xdr,
- xdr->head[0].iov_len,
- xdr->page_len);
- if (ret < 0)
- goto out_err;
- consumed += xdr->page_len;
- }
-
- if (xdr->tail[0].iov_len) {
- ret = svc_rdma_send_xdr_kvec(info, &xdr->tail[0]);
- if (ret < 0)
- goto out_err;
- consumed += xdr->tail[0].iov_len;
- }
- ret = svc_rdma_post_chunk_ctxt(&info->wi_cc);
+ trace_svcrdma_post_reply_chunk(&cc->cc_cid, cc->cc_sqecount);
+ ret = svc_rdma_post_chunk_ctxt(cc);
if (ret < 0)
goto out_err;
- trace_svcrdma_send_reply_chunk(consumed);
- return consumed;
+ return xdr->len;
out_err:
svc_rdma_write_info_free(info);
return ret;
}
+/**
+ * svc_rdma_build_read_segment - Build RDMA Read WQEs to pull one RDMA segment
+ * @info: context for ongoing I/O
+ * @segment: co-ordinates of remote memory to be read
+ *
+ * Returns:
+ * %0: the Read WR chain was constructed successfully
+ * %-EINVAL: there were not enough rq_pages to finish
+ * %-ENOMEM: allocating a local resources failed
+ * %-EIO: a DMA mapping error occurred
+ */
static int svc_rdma_build_read_segment(struct svc_rdma_read_info *info,
- struct svc_rqst *rqstp,
- u32 rkey, u32 len, u64 offset)
+ const struct svc_rdma_segment *segment)
{
struct svc_rdma_recv_ctxt *head = info->ri_readctxt;
struct svc_rdma_chunk_ctxt *cc = &info->ri_cc;
+ struct svc_rqst *rqstp = info->ri_rqst;
struct svc_rdma_rw_ctxt *ctxt;
- unsigned int sge_no, seg_len;
+ unsigned int sge_no, seg_len, len;
struct scatterlist *sg;
int ret;
+ len = segment->rs_length;
sge_no = PAGE_ALIGN(info->ri_pageoff + len) >> PAGE_SHIFT;
ctxt = svc_rdma_get_rw_ctxt(cc->cc_rdma, sge_no);
if (!ctxt)
@@ -670,8 +714,8 @@ static int svc_rdma_build_read_segment(struct svc_rdma_read_info *info,
goto out_overrun;
}
- ret = svc_rdma_rw_ctx_init(cc->cc_rdma, ctxt, offset, rkey,
- DMA_FROM_DEVICE);
+ ret = svc_rdma_rw_ctx_init(cc->cc_rdma, ctxt, segment->rs_offset,
+ segment->rs_handle, DMA_FROM_DEVICE);
if (ret < 0)
return -EIO;
@@ -684,54 +728,177 @@ out_overrun:
return -EINVAL;
}
-/* Walk the segments in the Read chunk starting at @p and construct
- * RDMA Read operations to pull the chunk to the server.
+/**
+ * svc_rdma_build_read_chunk - Build RDMA Read WQEs to pull one RDMA chunk
+ * @info: context for ongoing I/O
+ * @chunk: Read chunk to pull
+ *
+ * Return values:
+ * %0: the Read WR chain was constructed successfully
+ * %-EINVAL: there were not enough resources to finish
+ * %-ENOMEM: allocating a local resources failed
+ * %-EIO: a DMA mapping error occurred
*/
-static int svc_rdma_build_read_chunk(struct svc_rqst *rqstp,
- struct svc_rdma_read_info *info,
- __be32 *p)
+static int svc_rdma_build_read_chunk(struct svc_rdma_read_info *info,
+ const struct svc_rdma_chunk *chunk)
{
+ const struct svc_rdma_segment *segment;
int ret;
ret = -EINVAL;
- info->ri_chunklen = 0;
- while (*p++ != xdr_zero && be32_to_cpup(p++) == info->ri_position) {
- u32 handle, length;
- u64 offset;
+ pcl_for_each_segment(segment, chunk) {
+ ret = svc_rdma_build_read_segment(info, segment);
+ if (ret < 0)
+ break;
+ info->ri_totalbytes += segment->rs_length;
+ }
+ return ret;
+}
+
+/**
+ * svc_rdma_copy_inline_range - Copy part of the inline content into pages
+ * @info: context for RDMA Reads
+ * @offset: offset into the Receive buffer of region to copy
+ * @remaining: length of region to copy
+ *
+ * Take a page at a time from rqstp->rq_pages and copy the inline
+ * content from the Receive buffer into that page. Update
+ * info->ri_pageno and info->ri_pageoff so that the next RDMA Read
+ * result will land contiguously with the copied content.
+ *
+ * Return values:
+ * %0: Inline content was successfully copied
+ * %-EINVAL: offset or length was incorrect
+ */
+static int svc_rdma_copy_inline_range(struct svc_rdma_read_info *info,
+ unsigned int offset,
+ unsigned int remaining)
+{
+ struct svc_rdma_recv_ctxt *head = info->ri_readctxt;
+ unsigned char *dst, *src = head->rc_recv_buf;
+ struct svc_rqst *rqstp = info->ri_rqst;
+ unsigned int page_no, numpages;
+
+ numpages = PAGE_ALIGN(info->ri_pageoff + remaining) >> PAGE_SHIFT;
+ for (page_no = 0; page_no < numpages; page_no++) {
+ unsigned int page_len;
+
+ page_len = min_t(unsigned int, remaining,
+ PAGE_SIZE - info->ri_pageoff);
+
+ head->rc_arg.pages[info->ri_pageno] =
+ rqstp->rq_pages[info->ri_pageno];
+ if (!info->ri_pageoff)
+ head->rc_page_count++;
+
+ dst = page_address(head->rc_arg.pages[info->ri_pageno]);
+ memcpy(dst + info->ri_pageno, src + offset, page_len);
+
+ info->ri_totalbytes += page_len;
+ info->ri_pageoff += page_len;
+ if (info->ri_pageoff == PAGE_SIZE) {
+ info->ri_pageno++;
+ info->ri_pageoff = 0;
+ }
+ remaining -= page_len;
+ offset += page_len;
+ }
+
+ return -EINVAL;
+}
+
+/**
+ * svc_rdma_read_multiple_chunks - Construct RDMA Reads to pull data item Read chunks
+ * @info: context for RDMA Reads
+ *
+ * The chunk data lands in head->rc_arg as a series of contiguous pages,
+ * like an incoming TCP call.
+ *
+ * Return values:
+ * %0: RDMA Read WQEs were successfully built
+ * %-EINVAL: client provided too many chunks or segments,
+ * %-ENOMEM: rdma_rw context pool was exhausted,
+ * %-ENOTCONN: posting failed (connection is lost),
+ * %-EIO: rdma_rw initialization failed (DMA mapping, etc).
+ */
+static noinline int svc_rdma_read_multiple_chunks(struct svc_rdma_read_info *info)
+{
+ struct svc_rdma_recv_ctxt *head = info->ri_readctxt;
+ const struct svc_rdma_pcl *pcl = &head->rc_read_pcl;
+ struct svc_rdma_chunk *chunk, *next;
+ struct xdr_buf *buf = &head->rc_arg;
+ unsigned int start, length;
+ int ret;
- p = xdr_decode_rdma_segment(p, &handle, &length, &offset);
- ret = svc_rdma_build_read_segment(info, rqstp, handle, length,
- offset);
+ start = 0;
+ chunk = pcl_first_chunk(pcl);
+ length = chunk->ch_position;
+ ret = svc_rdma_copy_inline_range(info, start, length);
+ if (ret < 0)
+ return ret;
+
+ pcl_for_each_chunk(chunk, pcl) {
+ ret = svc_rdma_build_read_chunk(info, chunk);
if (ret < 0)
+ return ret;
+
+ next = pcl_next_chunk(pcl, chunk);
+ if (!next)
break;
- trace_svcrdma_send_rseg(handle, length, offset);
- info->ri_chunklen += length;
+ start += length;
+ length = next->ch_position - info->ri_totalbytes;
+ ret = svc_rdma_copy_inline_range(info, start, length);
+ if (ret < 0)
+ return ret;
}
- return ret;
+ start += length;
+ length = head->rc_byte_len - start;
+ ret = svc_rdma_copy_inline_range(info, start, length);
+ if (ret < 0)
+ return ret;
+
+ buf->len += info->ri_totalbytes;
+ buf->buflen += info->ri_totalbytes;
+
+ head->rc_hdr_count = 1;
+ buf->head[0].iov_base = page_address(head->rc_pages[0]);
+ buf->head[0].iov_len = min_t(size_t, PAGE_SIZE, info->ri_totalbytes);
+ buf->page_len = info->ri_totalbytes - buf->head[0].iov_len;
+ return 0;
}
-/* Construct RDMA Reads to pull over a normal Read chunk. The chunk
- * data lands in the page list of head->rc_arg.pages.
+/**
+ * svc_rdma_read_data_item - Construct RDMA Reads to pull data item Read chunks
+ * @info: context for RDMA Reads
+ *
+ * The chunk data lands in the page list of head->rc_arg.pages.
*
* Currently NFSD does not look at the head->rc_arg.tail[0] iovec.
* Therefore, XDR round-up of the Read chunk and trailing
* inline content must both be added at the end of the pagelist.
+ *
+ * Return values:
+ * %0: RDMA Read WQEs were successfully built
+ * %-EINVAL: client provided too many chunks or segments,
+ * %-ENOMEM: rdma_rw context pool was exhausted,
+ * %-ENOTCONN: posting failed (connection is lost),
+ * %-EIO: rdma_rw initialization failed (DMA mapping, etc).
*/
-static int svc_rdma_build_normal_read_chunk(struct svc_rqst *rqstp,
- struct svc_rdma_read_info *info,
- __be32 *p)
+static int svc_rdma_read_data_item(struct svc_rdma_read_info *info)
{
struct svc_rdma_recv_ctxt *head = info->ri_readctxt;
+ struct xdr_buf *buf = &head->rc_arg;
+ struct svc_rdma_chunk *chunk;
+ unsigned int length;
int ret;
- ret = svc_rdma_build_read_chunk(rqstp, info, p);
+ chunk = pcl_first_chunk(&head->rc_read_pcl);
+ ret = svc_rdma_build_read_chunk(info, chunk);
if (ret < 0)
goto out;
- trace_svcrdma_send_read_chunk(info->ri_chunklen, info->ri_position);
-
head->rc_hdr_count = 0;
/* Split the Receive buffer between the head and tail
@@ -739,11 +906,9 @@ static int svc_rdma_build_normal_read_chunk(struct svc_rqst *rqstp,
* chunk is not included in either the pagelist or in
* the tail.
*/
- head->rc_arg.tail[0].iov_base =
- head->rc_arg.head[0].iov_base + info->ri_position;
- head->rc_arg.tail[0].iov_len =
- head->rc_arg.head[0].iov_len - info->ri_position;
- head->rc_arg.head[0].iov_len = info->ri_position;
+ buf->tail[0].iov_base = buf->head[0].iov_base + chunk->ch_position;
+ buf->tail[0].iov_len = buf->head[0].iov_len - chunk->ch_position;
+ buf->head[0].iov_len = chunk->ch_position;
/* Read chunk may need XDR roundup (see RFC 8166, s. 3.4.5.2).
*
@@ -754,50 +919,149 @@ static int svc_rdma_build_normal_read_chunk(struct svc_rqst *rqstp,
* Currently these chunks always start at page offset 0,
* thus the rounded-up length never crosses a page boundary.
*/
- info->ri_chunklen = XDR_QUADLEN(info->ri_chunklen) << 2;
-
- head->rc_arg.page_len = info->ri_chunklen;
- head->rc_arg.len += info->ri_chunklen;
- head->rc_arg.buflen += info->ri_chunklen;
+ length = XDR_QUADLEN(info->ri_totalbytes) << 2;
+ buf->page_len = length;
+ buf->len += length;
+ buf->buflen += length;
out:
return ret;
}
-/* Construct RDMA Reads to pull over a Position Zero Read chunk.
- * The start of the data lands in the first page just after
- * the Transport header, and the rest lands in the page list of
+/**
+ * svc_rdma_read_chunk_range - Build RDMA Read WQEs for portion of a chunk
+ * @info: context for RDMA Reads
+ * @chunk: parsed Call chunk to pull
+ * @offset: offset of region to pull
+ * @length: length of region to pull
+ *
+ * Return values:
+ * %0: RDMA Read WQEs were successfully built
+ * %-EINVAL: there were not enough resources to finish
+ * %-ENOMEM: rdma_rw context pool was exhausted,
+ * %-ENOTCONN: posting failed (connection is lost),
+ * %-EIO: rdma_rw initialization failed (DMA mapping, etc).
+ */
+static int svc_rdma_read_chunk_range(struct svc_rdma_read_info *info,
+ const struct svc_rdma_chunk *chunk,
+ unsigned int offset, unsigned int length)
+{
+ const struct svc_rdma_segment *segment;
+ int ret;
+
+ ret = -EINVAL;
+ pcl_for_each_segment(segment, chunk) {
+ struct svc_rdma_segment dummy;
+
+ if (offset > segment->rs_length) {
+ offset -= segment->rs_length;
+ continue;
+ }
+
+ dummy.rs_handle = segment->rs_handle;
+ dummy.rs_length = min_t(u32, length, segment->rs_length) - offset;
+ dummy.rs_offset = segment->rs_offset + offset;
+
+ ret = svc_rdma_build_read_segment(info, &dummy);
+ if (ret < 0)
+ break;
+
+ info->ri_totalbytes += dummy.rs_length;
+ length -= dummy.rs_length;
+ offset = 0;
+ }
+ return ret;
+}
+
+/**
+ * svc_rdma_read_call_chunk - Build RDMA Read WQEs to pull a Long Message
+ * @info: context for RDMA Reads
+ *
+ * Return values:
+ * %0: RDMA Read WQEs were successfully built
+ * %-EINVAL: there were not enough resources to finish
+ * %-ENOMEM: rdma_rw context pool was exhausted,
+ * %-ENOTCONN: posting failed (connection is lost),
+ * %-EIO: rdma_rw initialization failed (DMA mapping, etc).
+ */
+static int svc_rdma_read_call_chunk(struct svc_rdma_read_info *info)
+{
+ struct svc_rdma_recv_ctxt *head = info->ri_readctxt;
+ const struct svc_rdma_chunk *call_chunk =
+ pcl_first_chunk(&head->rc_call_pcl);
+ const struct svc_rdma_pcl *pcl = &head->rc_read_pcl;
+ struct svc_rdma_chunk *chunk, *next;
+ unsigned int start, length;
+ int ret;
+
+ if (pcl_is_empty(pcl))
+ return svc_rdma_build_read_chunk(info, call_chunk);
+
+ start = 0;
+ chunk = pcl_first_chunk(pcl);
+ length = chunk->ch_position;
+ ret = svc_rdma_read_chunk_range(info, call_chunk, start, length);
+ if (ret < 0)
+ return ret;
+
+ pcl_for_each_chunk(chunk, pcl) {
+ ret = svc_rdma_build_read_chunk(info, chunk);
+ if (ret < 0)
+ return ret;
+
+ next = pcl_next_chunk(pcl, chunk);
+ if (!next)
+ break;
+
+ start += length;
+ length = next->ch_position - info->ri_totalbytes;
+ ret = svc_rdma_read_chunk_range(info, call_chunk,
+ start, length);
+ if (ret < 0)
+ return ret;
+ }
+
+ start += length;
+ length = call_chunk->ch_length - start;
+ return svc_rdma_read_chunk_range(info, call_chunk, start, length);
+}
+
+/**
+ * svc_rdma_read_special - Build RDMA Read WQEs to pull a Long Message
+ * @info: context for RDMA Reads
+ *
+ * The start of the data lands in the first page just after the
+ * Transport header, and the rest lands in the page list of
* head->rc_arg.pages.
*
* Assumptions:
- * - A PZRC has an XDR-aligned length (no implicit round-up).
- * - There can be no trailing inline content (IOW, we assume
- * a PZRC is never sent in an RDMA_MSG message, though it's
- * allowed by spec).
+ * - A PZRC is never sent in an RDMA_MSG message, though it's
+ * allowed by spec.
+ *
+ * Return values:
+ * %0: RDMA Read WQEs were successfully built
+ * %-EINVAL: client provided too many chunks or segments,
+ * %-ENOMEM: rdma_rw context pool was exhausted,
+ * %-ENOTCONN: posting failed (connection is lost),
+ * %-EIO: rdma_rw initialization failed (DMA mapping, etc).
*/
-static int svc_rdma_build_pz_read_chunk(struct svc_rqst *rqstp,
- struct svc_rdma_read_info *info,
- __be32 *p)
+static noinline int svc_rdma_read_special(struct svc_rdma_read_info *info)
{
struct svc_rdma_recv_ctxt *head = info->ri_readctxt;
+ struct xdr_buf *buf = &head->rc_arg;
int ret;
- ret = svc_rdma_build_read_chunk(rqstp, info, p);
+ ret = svc_rdma_read_call_chunk(info);
if (ret < 0)
goto out;
- trace_svcrdma_send_pzr(info->ri_chunklen);
-
- head->rc_arg.len += info->ri_chunklen;
- head->rc_arg.buflen += info->ri_chunklen;
+ buf->len += info->ri_totalbytes;
+ buf->buflen += info->ri_totalbytes;
head->rc_hdr_count = 1;
- head->rc_arg.head[0].iov_base = page_address(head->rc_pages[0]);
- head->rc_arg.head[0].iov_len = min_t(size_t, PAGE_SIZE,
- info->ri_chunklen);
-
- head->rc_arg.page_len = info->ri_chunklen -
- head->rc_arg.head[0].iov_len;
+ buf->head[0].iov_base = page_address(head->rc_pages[0]);
+ buf->head[0].iov_len = min_t(size_t, PAGE_SIZE, info->ri_totalbytes);
+ buf->page_len = info->ri_totalbytes - buf->head[0].iov_len;
out:
return ret;
@@ -824,26 +1088,34 @@ static void svc_rdma_save_io_pages(struct svc_rqst *rqstp,
}
/**
- * svc_rdma_recv_read_chunk - Pull a Read chunk from the client
+ * svc_rdma_process_read_list - Pull list of Read chunks from the client
* @rdma: controlling RDMA transport
* @rqstp: set of pages to use as Read sink buffers
* @head: pages under I/O collect here
- * @p: pointer to start of Read chunk
*
- * Returns:
- * %0 if all needed RDMA Reads were posted successfully,
- * %-EINVAL if client provided too many segments,
- * %-ENOMEM if rdma_rw context pool was exhausted,
- * %-ENOTCONN if posting failed (connection is lost),
- * %-EIO if rdma_rw initialization failed (DMA mapping, etc).
+ * The RPC/RDMA protocol assumes that the upper layer's XDR decoders
+ * pull each Read chunk as they decode an incoming RPC message.
*
- * Assumptions:
- * - All Read segments in @p have the same Position value.
+ * On Linux, however, the server needs to have a fully-constructed RPC
+ * message in rqstp->rq_arg when there is a positive return code from
+ * ->xpo_recvfrom. So the Read list is safety-checked immediately when
+ * it is received, then here the whole Read list is pulled all at once.
+ * The ingress RPC message is fully reconstructed once all associated
+ * RDMA Reads have completed.
+ *
+ * Return values:
+ * %1: all needed RDMA Reads were posted successfully,
+ * %-EINVAL: client provided too many chunks or segments,
+ * %-ENOMEM: rdma_rw context pool was exhausted,
+ * %-ENOTCONN: posting failed (connection is lost),
+ * %-EIO: rdma_rw initialization failed (DMA mapping, etc).
*/
-int svc_rdma_recv_read_chunk(struct svcxprt_rdma *rdma, struct svc_rqst *rqstp,
- struct svc_rdma_recv_ctxt *head, __be32 *p)
+int svc_rdma_process_read_list(struct svcxprt_rdma *rdma,
+ struct svc_rqst *rqstp,
+ struct svc_rdma_recv_ctxt *head)
{
struct svc_rdma_read_info *info;
+ struct svc_rdma_chunk_ctxt *cc;
int ret;
/* The request (with page list) is constructed in
@@ -861,23 +1133,29 @@ int svc_rdma_recv_read_chunk(struct svcxprt_rdma *rdma, struct svc_rqst *rqstp,
info = svc_rdma_read_info_alloc(rdma);
if (!info)
return -ENOMEM;
+ cc = &info->ri_cc;
+ info->ri_rqst = rqstp;
info->ri_readctxt = head;
info->ri_pageno = 0;
info->ri_pageoff = 0;
-
- info->ri_position = be32_to_cpup(p + 1);
- if (info->ri_position)
- ret = svc_rdma_build_normal_read_chunk(rqstp, info, p);
- else
- ret = svc_rdma_build_pz_read_chunk(rqstp, info, p);
+ info->ri_totalbytes = 0;
+
+ if (pcl_is_empty(&head->rc_call_pcl)) {
+ if (head->rc_read_pcl.cl_count == 1)
+ ret = svc_rdma_read_data_item(info);
+ else
+ ret = svc_rdma_read_multiple_chunks(info);
+ } else
+ ret = svc_rdma_read_special(info);
if (ret < 0)
goto out_err;
- ret = svc_rdma_post_chunk_ctxt(&info->ri_cc);
+ trace_svcrdma_post_read_chunk(&cc->cc_cid, cc->cc_sqecount);
+ ret = svc_rdma_post_chunk_ctxt(cc);
if (ret < 0)
goto out_err;
svc_rdma_save_io_pages(rqstp, 0, head->rc_page_count);
- return 0;
+ return 1;
out_err:
svc_rdma_read_info_free(info);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
index c3d588b149aa..68af79d4f04f 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c
@@ -358,49 +358,42 @@ static ssize_t svc_rdma_encode_read_list(struct svc_rdma_send_ctxt *sctxt)
/**
* svc_rdma_encode_write_segment - Encode one Write segment
- * @src: matching Write chunk in the RPC Call header
* @sctxt: Send context for the RPC Reply
+ * @chunk: Write chunk to push
* @remaining: remaining bytes of the payload left in the Write chunk
+ * @segno: which segment in the chunk
*
* Return values:
* On success, returns length in bytes of the Reply XDR buffer
- * that was consumed by the Write segment
+ * that was consumed by the Write segment, and updates @remaining
* %-EMSGSIZE on XDR buffer overflow
*/
-static ssize_t svc_rdma_encode_write_segment(__be32 *src,
- struct svc_rdma_send_ctxt *sctxt,
- unsigned int *remaining)
+static ssize_t svc_rdma_encode_write_segment(struct svc_rdma_send_ctxt *sctxt,
+ const struct svc_rdma_chunk *chunk,
+ u32 *remaining, unsigned int segno)
{
+ const struct svc_rdma_segment *segment = &chunk->ch_segments[segno];
+ const size_t len = rpcrdma_segment_maxsz * sizeof(__be32);
+ u32 length;
__be32 *p;
- const size_t len = rpcrdma_segment_maxsz * sizeof(*p);
- u32 handle, length;
- u64 offset;
p = xdr_reserve_space(&sctxt->sc_stream, len);
if (!p)
return -EMSGSIZE;
- xdr_decode_rdma_segment(src, &handle, &length, &offset);
-
- if (*remaining < length) {
- /* segment only partly filled */
- length = *remaining;
- *remaining = 0;
- } else {
- /* entire segment was consumed */
- *remaining -= length;
- }
- xdr_encode_rdma_segment(p, handle, length, offset);
-
- trace_svcrdma_encode_wseg(handle, length, offset);
+ length = min_t(u32, *remaining, segment->rs_length);
+ *remaining -= length;
+ xdr_encode_rdma_segment(p, segment->rs_handle, length,
+ segment->rs_offset);
+ trace_svcrdma_encode_wseg(sctxt, segno, segment->rs_handle, length,
+ segment->rs_offset);
return len;
}
/**
* svc_rdma_encode_write_chunk - Encode one Write chunk
- * @src: matching Write chunk in the RPC Call header
* @sctxt: Send context for the RPC Reply
- * @remaining: size in bytes of the payload in the Write chunk
+ * @chunk: Write chunk to push
*
* Copy a Write chunk from the Call transport header to the
* Reply transport header. Update each segment's length field
@@ -411,33 +404,28 @@ static ssize_t svc_rdma_encode_write_segment(__be32 *src,
* that was consumed by the Write chunk
* %-EMSGSIZE on XDR buffer overflow
*/
-static ssize_t svc_rdma_encode_write_chunk(__be32 *src,
- struct svc_rdma_send_ctxt *sctxt,
- unsigned int remaining)
+static ssize_t svc_rdma_encode_write_chunk(struct svc_rdma_send_ctxt *sctxt,
+ const struct svc_rdma_chunk *chunk)
{
- unsigned int i, nsegs;
+ u32 remaining = chunk->ch_payload_length;
+ unsigned int segno;
ssize_t len, ret;
len = 0;
- trace_svcrdma_encode_write_chunk(remaining);
-
- src++;
ret = xdr_stream_encode_item_present(&sctxt->sc_stream);
if (ret < 0)
- return -EMSGSIZE;
+ return ret;
len += ret;
- nsegs = be32_to_cpup(src++);
- ret = xdr_stream_encode_u32(&sctxt->sc_stream, nsegs);
+ ret = xdr_stream_encode_u32(&sctxt->sc_stream, chunk->ch_segcount);
if (ret < 0)
- return -EMSGSIZE;
+ return ret;
len += ret;
- for (i = nsegs; i; i--) {
- ret = svc_rdma_encode_write_segment(src, sctxt, &remaining);
+ for (segno = 0; segno < chunk->ch_segcount; segno++) {
+ ret = svc_rdma_encode_write_segment(sctxt, chunk, &remaining, segno);
if (ret < 0)
- return -EMSGSIZE;
- src += rpcrdma_segment_maxsz;
+ return ret;
len += ret;
}
@@ -448,32 +436,25 @@ static ssize_t svc_rdma_encode_write_chunk(__be32 *src,
* svc_rdma_encode_write_list - Encode RPC Reply's Write chunk list
* @rctxt: Reply context with information about the RPC Call
* @sctxt: Send context for the RPC Reply
- * @length: size in bytes of the payload in the first Write chunk
- *
- * The client provides a Write chunk list in the Call message. Fill
- * in the segments in the first Write chunk in the Reply's transport
- * header with the number of bytes consumed in each segment.
- * Remaining chunks are returned unused.
- *
- * Assumptions:
- * - Client has provided only one Write chunk
*
* Return values:
* On success, returns length in bytes of the Reply XDR buffer
* that was consumed by the Reply's Write list
* %-EMSGSIZE on XDR buffer overflow
*/
-static ssize_t
-svc_rdma_encode_write_list(const struct svc_rdma_recv_ctxt *rctxt,
- struct svc_rdma_send_ctxt *sctxt,
- unsigned int length)
+static ssize_t svc_rdma_encode_write_list(struct svc_rdma_recv_ctxt *rctxt,
+ struct svc_rdma_send_ctxt *sctxt)
{
+ struct svc_rdma_chunk *chunk;
ssize_t len, ret;
- ret = svc_rdma_encode_write_chunk(rctxt->rc_write_list, sctxt, length);
- if (ret < 0)
- return ret;
- len = ret;
+ len = 0;
+ pcl_for_each_chunk(chunk, &rctxt->rc_write_pcl) {
+ ret = svc_rdma_encode_write_chunk(sctxt, chunk);
+ if (ret < 0)
+ return ret;
+ len += ret;
+ }
/* Terminate the Write list */
ret = xdr_stream_encode_item_absent(&sctxt->sc_stream);
@@ -489,56 +470,174 @@ svc_rdma_encode_write_list(const struct svc_rdma_recv_ctxt *rctxt,
* @sctxt: Send context for the RPC Reply
* @length: size in bytes of the payload in the Reply chunk
*
- * Assumptions:
- * - Reply can always fit in the client-provided Reply chunk
- *
* Return values:
* On success, returns length in bytes of the Reply XDR buffer
* that was consumed by the Reply's Reply chunk
* %-EMSGSIZE on XDR buffer overflow
+ * %-E2BIG if the RPC message is larger than the Reply chunk
*/
static ssize_t
-svc_rdma_encode_reply_chunk(const struct svc_rdma_recv_ctxt *rctxt,
+svc_rdma_encode_reply_chunk(struct svc_rdma_recv_ctxt *rctxt,
struct svc_rdma_send_ctxt *sctxt,
unsigned int length)
{
- return svc_rdma_encode_write_chunk(rctxt->rc_reply_chunk, sctxt,
- length);
+ struct svc_rdma_chunk *chunk;
+
+ if (pcl_is_empty(&rctxt->rc_reply_pcl))
+ return xdr_stream_encode_item_absent(&sctxt->sc_stream);
+
+ chunk = pcl_first_chunk(&rctxt->rc_reply_pcl);
+ if (length > chunk->ch_length)
+ return -E2BIG;
+
+ chunk->ch_payload_length = length;
+ return svc_rdma_encode_write_chunk(sctxt, chunk);
}
-static int svc_rdma_dma_map_page(struct svcxprt_rdma *rdma,
- struct svc_rdma_send_ctxt *ctxt,
- struct page *page,
- unsigned long offset,
- unsigned int len)
+struct svc_rdma_map_data {
+ struct svcxprt_rdma *md_rdma;
+ struct svc_rdma_send_ctxt *md_ctxt;
+};
+
+/**
+ * svc_rdma_page_dma_map - DMA map one page
+ * @data: pointer to arguments
+ * @page: struct page to DMA map
+ * @offset: offset into the page
+ * @len: number of bytes to map
+ *
+ * Returns:
+ * %0 if DMA mapping was successful
+ * %-EIO if the page cannot be DMA mapped
+ */
+static int svc_rdma_page_dma_map(void *data, struct page *page,
+ unsigned long offset, unsigned int len)
{
+ struct svc_rdma_map_data *args = data;
+ struct svcxprt_rdma *rdma = args->md_rdma;
+ struct svc_rdma_send_ctxt *ctxt = args->md_ctxt;
struct ib_device *dev = rdma->sc_cm_id->device;
dma_addr_t dma_addr;
+ ++ctxt->sc_cur_sge_no;
+
dma_addr = ib_dma_map_page(dev, page, offset, len, DMA_TO_DEVICE);
- trace_svcrdma_dma_map_page(rdma, dma_addr, len);
if (ib_dma_mapping_error(dev, dma_addr))
goto out_maperr;
+ trace_svcrdma_dma_map_page(rdma, dma_addr, len);
ctxt->sc_sges[ctxt->sc_cur_sge_no].addr = dma_addr;
ctxt->sc_sges[ctxt->sc_cur_sge_no].length = len;
ctxt->sc_send_wr.num_sge++;
return 0;
out_maperr:
+ trace_svcrdma_dma_map_err(rdma, dma_addr, len);
return -EIO;
}
-/* ib_dma_map_page() is used here because svc_rdma_dma_unmap()
+/**
+ * svc_rdma_iov_dma_map - DMA map an iovec
+ * @data: pointer to arguments
+ * @iov: kvec to DMA map
+ *
+ * ib_dma_map_page() is used here because svc_rdma_dma_unmap()
* handles DMA-unmap and it uses ib_dma_unmap_page() exclusively.
+ *
+ * Returns:
+ * %0 if DMA mapping was successful
+ * %-EIO if the iovec cannot be DMA mapped
*/
-static int svc_rdma_dma_map_buf(struct svcxprt_rdma *rdma,
- struct svc_rdma_send_ctxt *ctxt,
- unsigned char *base,
- unsigned int len)
+static int svc_rdma_iov_dma_map(void *data, const struct kvec *iov)
{
- return svc_rdma_dma_map_page(rdma, ctxt, virt_to_page(base),
- offset_in_page(base), len);
+ if (!iov->iov_len)
+ return 0;
+ return svc_rdma_page_dma_map(data, virt_to_page(iov->iov_base),
+ offset_in_page(iov->iov_base),
+ iov->iov_len);
+}
+
+/**
+ * svc_rdma_xb_dma_map - DMA map all segments of an xdr_buf
+ * @xdr: xdr_buf containing portion of an RPC message to transmit
+ * @data: pointer to arguments
+ *
+ * Returns:
+ * %0 if DMA mapping was successful
+ * %-EIO if DMA mapping failed
+ *
+ * On failure, any DMA mappings that have been already done must be
+ * unmapped by the caller.
+ */
+static int svc_rdma_xb_dma_map(const struct xdr_buf *xdr, void *data)
+{
+ unsigned int len, remaining;
+ unsigned long pageoff;
+ struct page **ppages;
+ int ret;
+
+ ret = svc_rdma_iov_dma_map(data, &xdr->head[0]);
+ if (ret < 0)
+ return ret;
+
+ ppages = xdr->pages + (xdr->page_base >> PAGE_SHIFT);
+ pageoff = offset_in_page(xdr->page_base);
+ remaining = xdr->page_len;
+ while (remaining) {
+ len = min_t(u32, PAGE_SIZE - pageoff, remaining);
+
+ ret = svc_rdma_page_dma_map(data, *ppages++, pageoff, len);
+ if (ret < 0)
+ return ret;
+
+ remaining -= len;
+ pageoff = 0;
+ }
+
+ ret = svc_rdma_iov_dma_map(data, &xdr->tail[0]);
+ if (ret < 0)
+ return ret;
+
+ return xdr->len;
+}
+
+struct svc_rdma_pullup_data {
+ u8 *pd_dest;
+ unsigned int pd_length;
+ unsigned int pd_num_sges;
+};
+
+/**
+ * svc_rdma_xb_count_sges - Count how many SGEs will be needed
+ * @xdr: xdr_buf containing portion of an RPC message to transmit
+ * @data: pointer to arguments
+ *
+ * Returns:
+ * Number of SGEs needed to Send the contents of @xdr inline
+ */
+static int svc_rdma_xb_count_sges(const struct xdr_buf *xdr,
+ void *data)
+{
+ struct svc_rdma_pullup_data *args = data;
+ unsigned int remaining;
+ unsigned long offset;
+
+ if (xdr->head[0].iov_len)
+ ++args->pd_num_sges;
+
+ offset = offset_in_page(xdr->page_base);
+ remaining = xdr->page_len;
+ while (remaining) {
+ ++args->pd_num_sges;
+ remaining -= min_t(u32, PAGE_SIZE - offset, remaining);
+ offset = 0;
+ }
+
+ if (xdr->tail[0].iov_len)
+ ++args->pd_num_sges;
+
+ args->pd_length += xdr->len;
+ return 0;
}
/**
@@ -549,48 +648,71 @@ static int svc_rdma_dma_map_buf(struct svcxprt_rdma *rdma,
* @xdr: xdr_buf containing RPC message to transmit
*
* Returns:
- * %true if pull-up must be used
- * %false otherwise
+ * %true if pull-up must be used
+ * %false otherwise
*/
-static bool svc_rdma_pull_up_needed(struct svcxprt_rdma *rdma,
- struct svc_rdma_send_ctxt *sctxt,
+static bool svc_rdma_pull_up_needed(const struct svcxprt_rdma *rdma,
+ const struct svc_rdma_send_ctxt *sctxt,
const struct svc_rdma_recv_ctxt *rctxt,
- struct xdr_buf *xdr)
+ const struct xdr_buf *xdr)
{
- int elements;
+ /* Resources needed for the transport header */
+ struct svc_rdma_pullup_data args = {
+ .pd_length = sctxt->sc_hdrbuf.len,
+ .pd_num_sges = 1,
+ };
+ int ret;
- /* For small messages, copying bytes is cheaper than DMA mapping.
- */
- if (sctxt->sc_hdrbuf.len + xdr->len < RPCRDMA_PULLUP_THRESH)
+ ret = pcl_process_nonpayloads(&rctxt->rc_write_pcl, xdr,
+ svc_rdma_xb_count_sges, &args);
+ if (ret < 0)
+ return false;
+
+ if (args.pd_length < RPCRDMA_PULLUP_THRESH)
return true;
+ return args.pd_num_sges >= rdma->sc_max_send_sges;
+}
- /* Check whether the xdr_buf has more elements than can
- * fit in a single RDMA Send.
- */
- /* xdr->head */
- elements = 1;
-
- /* xdr->pages */
- if (!rctxt || !rctxt->rc_write_list) {
- unsigned int remaining;
- unsigned long pageoff;
-
- pageoff = xdr->page_base & ~PAGE_MASK;
- remaining = xdr->page_len;
- while (remaining) {
- ++elements;
- remaining -= min_t(u32, PAGE_SIZE - pageoff,
- remaining);
- pageoff = 0;
- }
+/**
+ * svc_rdma_xb_linearize - Copy region of xdr_buf to flat buffer
+ * @xdr: xdr_buf containing portion of an RPC message to copy
+ * @data: pointer to arguments
+ *
+ * Returns:
+ * Always zero.
+ */
+static int svc_rdma_xb_linearize(const struct xdr_buf *xdr,
+ void *data)
+{
+ struct svc_rdma_pullup_data *args = data;
+ unsigned int len, remaining;
+ unsigned long pageoff;
+ struct page **ppages;
+
+ if (xdr->head[0].iov_len) {
+ memcpy(args->pd_dest, xdr->head[0].iov_base, xdr->head[0].iov_len);
+ args->pd_dest += xdr->head[0].iov_len;
}
- /* xdr->tail */
- if (xdr->tail[0].iov_len)
- ++elements;
+ ppages = xdr->pages + (xdr->page_base >> PAGE_SHIFT);
+ pageoff = offset_in_page(xdr->page_base);
+ remaining = xdr->page_len;
+ while (remaining) {
+ len = min_t(u32, PAGE_SIZE - pageoff, remaining);
+ memcpy(args->pd_dest, page_address(*ppages) + pageoff, len);
+ remaining -= len;
+ args->pd_dest += len;
+ pageoff = 0;
+ ppages++;
+ }
- /* assume 1 SGE is needed for the transport header */
- return elements >= rdma->sc_max_send_sges;
+ if (xdr->tail[0].iov_len) {
+ memcpy(args->pd_dest, xdr->tail[0].iov_base, xdr->tail[0].iov_len);
+ args->pd_dest += xdr->tail[0].iov_len;
+ }
+
+ args->pd_length += xdr->len;
+ return 0;
}
/**
@@ -603,54 +725,30 @@ static bool svc_rdma_pull_up_needed(struct svcxprt_rdma *rdma,
* The device is not capable of sending the reply directly.
* Assemble the elements of @xdr into the transport header buffer.
*
- * Returns zero on success, or a negative errno on failure.
+ * Assumptions:
+ * pull_up_needed has determined that @xdr will fit in the buffer.
+ *
+ * Returns:
+ * %0 if pull-up was successful
+ * %-EMSGSIZE if a buffer manipulation problem occurred
*/
-static int svc_rdma_pull_up_reply_msg(struct svcxprt_rdma *rdma,
+static int svc_rdma_pull_up_reply_msg(const struct svcxprt_rdma *rdma,
struct svc_rdma_send_ctxt *sctxt,
const struct svc_rdma_recv_ctxt *rctxt,
const struct xdr_buf *xdr)
{
- unsigned char *dst, *tailbase;
- unsigned int taillen;
-
- dst = sctxt->sc_xprt_buf + sctxt->sc_hdrbuf.len;
- memcpy(dst, xdr->head[0].iov_base, xdr->head[0].iov_len);
- dst += xdr->head[0].iov_len;
-
- tailbase = xdr->tail[0].iov_base;
- taillen = xdr->tail[0].iov_len;
- if (rctxt && rctxt->rc_write_list) {
- u32 xdrpad;
-
- xdrpad = xdr_pad_size(xdr->page_len);
- if (taillen && xdrpad) {
- tailbase += xdrpad;
- taillen -= xdrpad;
- }
- } else {
- unsigned int len, remaining;
- unsigned long pageoff;
- struct page **ppages;
-
- ppages = xdr->pages + (xdr->page_base >> PAGE_SHIFT);
- pageoff = xdr->page_base & ~PAGE_MASK;
- remaining = xdr->page_len;
- while (remaining) {
- len = min_t(u32, PAGE_SIZE - pageoff, remaining);
-
- memcpy(dst, page_address(*ppages) + pageoff, len);
- remaining -= len;
- dst += len;
- pageoff = 0;
- ppages++;
- }
- }
+ struct svc_rdma_pullup_data args = {
+ .pd_dest = sctxt->sc_xprt_buf + sctxt->sc_hdrbuf.len,
+ };
+ int ret;
- if (taillen)
- memcpy(dst, tailbase, taillen);
+ ret = pcl_process_nonpayloads(&rctxt->rc_write_pcl, xdr,
+ svc_rdma_xb_linearize, &args);
+ if (ret < 0)
+ return ret;
- sctxt->sc_sges[0].length += xdr->len;
- trace_svcrdma_send_pullup(sctxt->sc_sges[0].length);
+ sctxt->sc_sges[0].length = sctxt->sc_hdrbuf.len + args.pd_length;
+ trace_svcrdma_send_pullup(sctxt, args.pd_length);
return 0;
}
@@ -660,22 +758,22 @@ static int svc_rdma_pull_up_reply_msg(struct svcxprt_rdma *rdma,
* @rctxt: Write and Reply chunks provided by client
* @xdr: prepared xdr_buf containing RPC message
*
- * Load the xdr_buf into the ctxt's sge array, and DMA map each
- * element as it is added. The Send WR's num_sge field is set.
+ * Returns:
+ * %0 if DMA mapping was successful.
+ * %-EMSGSIZE if a buffer manipulation problem occurred
+ * %-EIO if DMA mapping failed
*
- * Returns zero on success, or a negative errno on failure.
+ * The Send WR's num_sge field is set in all cases.
*/
int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
struct svc_rdma_send_ctxt *sctxt,
const struct svc_rdma_recv_ctxt *rctxt,
- struct xdr_buf *xdr)
+ const struct xdr_buf *xdr)
{
- unsigned int len, remaining;
- unsigned long page_off;
- struct page **ppages;
- unsigned char *base;
- u32 xdr_pad;
- int ret;
+ struct svc_rdma_map_data args = {
+ .md_rdma = rdma,
+ .md_ctxt = sctxt,
+ };
/* Set up the (persistently-mapped) transport header SGE. */
sctxt->sc_send_wr.num_sge = 1;
@@ -684,7 +782,7 @@ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
/* If there is a Reply chunk, nothing follows the transport
* header, and we're done here.
*/
- if (rctxt && rctxt->rc_reply_chunk)
+ if (!pcl_is_empty(&rctxt->rc_reply_pcl))
return 0;
/* For pull-up, svc_rdma_send() will sync the transport header.
@@ -693,58 +791,8 @@ int svc_rdma_map_reply_msg(struct svcxprt_rdma *rdma,
if (svc_rdma_pull_up_needed(rdma, sctxt, rctxt, xdr))
return svc_rdma_pull_up_reply_msg(rdma, sctxt, rctxt, xdr);
- ++sctxt->sc_cur_sge_no;
- ret = svc_rdma_dma_map_buf(rdma, sctxt,
- xdr->head[0].iov_base,
- xdr->head[0].iov_len);
- if (ret < 0)
- return ret;
-
- /* If a Write chunk is present, the xdr_buf's page list
- * is not included inline. However the Upper Layer may
- * have added XDR padding in the tail buffer, and that
- * should not be included inline.
- */
- if (rctxt && rctxt->rc_write_list) {
- base = xdr->tail[0].iov_base;
- len = xdr->tail[0].iov_len;
- xdr_pad = xdr_pad_size(xdr->page_len);
-
- if (len && xdr_pad) {
- base += xdr_pad;
- len -= xdr_pad;
- }
-
- goto tail;
- }
-
- ppages = xdr->pages + (xdr->page_base >> PAGE_SHIFT);
- page_off = xdr->page_base & ~PAGE_MASK;
- remaining = xdr->page_len;
- while (remaining) {
- len = min_t(u32, PAGE_SIZE - page_off, remaining);
-
- ++sctxt->sc_cur_sge_no;
- ret = svc_rdma_dma_map_page(rdma, sctxt, *ppages++,
- page_off, len);
- if (ret < 0)
- return ret;
-
- remaining -= len;
- page_off = 0;
- }
-
- base = xdr->tail[0].iov_base;
- len = xdr->tail[0].iov_len;
-tail:
- if (len) {
- ++sctxt->sc_cur_sge_no;
- ret = svc_rdma_dma_map_buf(rdma, sctxt, base, len);
- if (ret < 0)
- return ret;
- }
-
- return 0;
+ return pcl_process_nonpayloads(&rctxt->rc_write_pcl, xdr,
+ svc_rdma_xb_dma_map, &args);
}
/* The svc_rqst and all resources it owns are released as soon as
@@ -894,9 +942,6 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
container_of(xprt, struct svcxprt_rdma, sc_xprt);
struct svc_rdma_recv_ctxt *rctxt = rqstp->rq_xprt_ctxt;
__be32 *rdma_argp = rctxt->rc_recv_buf;
- __be32 *wr_lst = rctxt->rc_write_list;
- __be32 *rp_ch = rctxt->rc_reply_chunk;
- struct xdr_buf *xdr = &rqstp->rq_res;
struct svc_rdma_send_ctxt *sctxt;
__be32 *p;
int ret;
@@ -914,45 +959,22 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
rpcrdma_fixed_maxsz * sizeof(*p));
if (!p)
goto err0;
+
+ ret = svc_rdma_send_reply_chunk(rdma, rctxt, &rqstp->rq_res);
+ if (ret < 0)
+ goto err2;
+
*p++ = *rdma_argp;
*p++ = *(rdma_argp + 1);
*p++ = rdma->sc_fc_credits;
- *p = rp_ch ? rdma_nomsg : rdma_msg;
+ *p = pcl_is_empty(&rctxt->rc_reply_pcl) ? rdma_msg : rdma_nomsg;
if (svc_rdma_encode_read_list(sctxt) < 0)
goto err0;
- if (wr_lst) {
- /* XXX: Presume the client sent only one Write chunk */
- unsigned long offset;
- unsigned int length;
-
- if (rctxt->rc_read_payload_length) {
- offset = rctxt->rc_read_payload_offset;
- length = rctxt->rc_read_payload_length;
- } else {
- offset = xdr->head[0].iov_len;
- length = xdr->page_len;
- }
- ret = svc_rdma_send_write_chunk(rdma, wr_lst, xdr, offset,
- length);
- if (ret < 0)
- goto err2;
- if (svc_rdma_encode_write_list(rctxt, sctxt, length) < 0)
- goto err0;
- } else {
- if (xdr_stream_encode_item_absent(&sctxt->sc_stream) < 0)
- goto err0;
- }
- if (rp_ch) {
- ret = svc_rdma_send_reply_chunk(rdma, rctxt, &rqstp->rq_res);
- if (ret < 0)
- goto err2;
- if (svc_rdma_encode_reply_chunk(rctxt, sctxt, ret) < 0)
- goto err0;
- } else {
- if (xdr_stream_encode_item_absent(&sctxt->sc_stream) < 0)
- goto err0;
- }
+ if (svc_rdma_encode_write_list(rctxt, sctxt) < 0)
+ goto err0;
+ if (svc_rdma_encode_reply_chunk(rctxt, sctxt, ret) < 0)
+ goto err0;
ret = svc_rdma_send_reply_msg(rdma, sctxt, rctxt, rqstp);
if (ret < 0)
@@ -979,28 +1001,46 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
}
/**
- * svc_rdma_read_payload - special processing for a READ payload
+ * svc_rdma_result_payload - special processing for a result payload
* @rqstp: svc_rqst to operate on
* @offset: payload's byte offset in @xdr
* @length: size of payload, in bytes
*
- * Returns zero on success.
- *
- * For the moment, just record the xdr_buf location of the READ
- * payload. svc_rdma_sendto will use that location later when
- * we actually send the payload.
+ * Return values:
+ * %0 if successful or nothing needed to be done
+ * %-EMSGSIZE on XDR buffer overflow
+ * %-E2BIG if the payload was larger than the Write chunk
+ * %-EINVAL if client provided too many segments
+ * %-ENOMEM if rdma_rw context pool was exhausted
+ * %-ENOTCONN if posting failed (connection is lost)
+ * %-EIO if rdma_rw initialization failed (DMA mapping, etc)
*/
-int svc_rdma_read_payload(struct svc_rqst *rqstp, unsigned int offset,
- unsigned int length)
+int svc_rdma_result_payload(struct svc_rqst *rqstp, unsigned int offset,
+ unsigned int length)
{
struct svc_rdma_recv_ctxt *rctxt = rqstp->rq_xprt_ctxt;
+ struct svc_rdma_chunk *chunk;
+ struct svcxprt_rdma *rdma;
+ struct xdr_buf subbuf;
+ int ret;
- /* XXX: Just one READ payload slot for now, since our
- * transport implementation currently supports only one
- * Write chunk.
- */
- rctxt->rc_read_payload_offset = offset;
- rctxt->rc_read_payload_length = length;
+ chunk = rctxt->rc_cur_result_payload;
+ if (!length || !chunk)
+ return 0;
+ rctxt->rc_cur_result_payload =
+ pcl_next_chunk(&rctxt->rc_write_pcl, chunk);
+ if (length > chunk->ch_length)
+ return -E2BIG;
+ chunk->ch_position = offset;
+ chunk->ch_payload_length = length;
+
+ if (xdr_buf_subsegment(&rqstp->rq_res, &subbuf, offset, length))
+ return -EMSGSIZE;
+
+ rdma = container_of(rqstp->rq_xprt, struct svcxprt_rdma, sc_xprt);
+ ret = svc_rdma_send_write_chunk(rdma, chunk, &subbuf);
+ if (ret < 0)
+ return ret;
return 0;
}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index fb044792b571..afba4e9d5425 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -80,7 +80,7 @@ static const struct svc_xprt_ops svc_rdma_ops = {
.xpo_create = svc_rdma_create,
.xpo_recvfrom = svc_rdma_recvfrom,
.xpo_sendto = svc_rdma_sendto,
- .xpo_read_payload = svc_rdma_read_payload,
+ .xpo_result_payload = svc_rdma_result_payload,
.xpo_release_rqst = svc_rdma_release_rqst,
.xpo_detach = svc_rdma_detach,
.xpo_free = svc_rdma_free,