summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-01-11 11:35:46 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2021-01-11 11:35:46 -0800
commitc912fd05fab97934e4cf579654d0dc4835b4758c (patch)
tree2347f7c161893f46c78f2bbe4f7f9d7458b5c2f3
parentf1ee3e150bd9da2dd60a210926c86cffd4a336ea (diff)
parent7b723008f9c95624c848fad661c01b06e47b20da (diff)
downloadlinux-c912fd05fab97934e4cf579654d0dc4835b4758c.tar.bz2
Merge tag 'nfsd-5.11-1' of git://git.linux-nfs.org/projects/cel/cel-2.6
Pull nfsd fixes from Chuck Lever: - Fix major TCP performance regression - Get NFSv4.2 READ_PLUS regression tests to pass - Improve NFSv4 COMPOUND memory allocation - Fix sparse warning * tag 'nfsd-5.11-1' of git://git.linux-nfs.org/projects/cel/cel-2.6: NFSD: Restore NFSv4 decoding's SAVEMEM functionality SUNRPC: Handle TCP socket sends with kernel_sendpage() again NFSD: Fix sparse warning in nfssvc.c nfsd: Don't set eof on a truncated READ_PLUS nfsd: Fixes for nfsd4_encode_read_plus_data()
-rw-r--r--fs/nfsd/nfs4proc.c5
-rw-r--r--fs/nfsd/nfs4xdr.c56
-rw-r--r--fs/nfsd/nfssvc.c6
-rw-r--r--fs/nfsd/xdr4.h1
-rw-r--r--net/sunrpc/svcsock.c86
5 files changed, 126 insertions, 28 deletions
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 4727b7f03c5b..8d6d2678abad 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -50,6 +50,11 @@
#include "pnfs.h"
#include "trace.h"
+static bool inter_copy_offload_enable;
+module_param(inter_copy_offload_enable, bool, 0644);
+MODULE_PARM_DESC(inter_copy_offload_enable,
+ "Enable inter server to server copy offload. Default: false");
+
#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
#include <linux/security.h>
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 45ee6b12ce5b..eaaa1605b5b5 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -147,6 +147,25 @@ svcxdr_dupstr(struct nfsd4_compoundargs *argp, void *buf, u32 len)
return p;
}
+static void *
+svcxdr_savemem(struct nfsd4_compoundargs *argp, __be32 *p, u32 len)
+{
+ __be32 *tmp;
+
+ /*
+ * The location of the decoded data item is stable,
+ * so @p is OK to use. This is the common case.
+ */
+ if (p != argp->xdr->scratch.iov_base)
+ return p;
+
+ tmp = svcxdr_tmpalloc(argp, len);
+ if (!tmp)
+ return NULL;
+ memcpy(tmp, p, len);
+ return tmp;
+}
+
/*
* NFSv4 basic data type decoders
*/
@@ -183,11 +202,10 @@ nfsd4_decode_opaque(struct nfsd4_compoundargs *argp, struct xdr_netobj *o)
p = xdr_inline_decode(argp->xdr, len);
if (!p)
return nfserr_bad_xdr;
- o->data = svcxdr_tmpalloc(argp, len);
+ o->data = svcxdr_savemem(argp, p, len);
if (!o->data)
return nfserr_jukebox;
o->len = len;
- memcpy(o->data, p, len);
return nfs_ok;
}
@@ -205,10 +223,9 @@ nfsd4_decode_component4(struct nfsd4_compoundargs *argp, char **namp, u32 *lenp)
status = check_filename((char *)p, *lenp);
if (status)
return status;
- *namp = svcxdr_tmpalloc(argp, *lenp);
+ *namp = svcxdr_savemem(argp, p, *lenp);
if (!*namp)
return nfserr_jukebox;
- memcpy(*namp, p, *lenp);
return nfs_ok;
}
@@ -1200,10 +1217,9 @@ nfsd4_decode_putfh(struct nfsd4_compoundargs *argp, struct nfsd4_putfh *putfh)
p = xdr_inline_decode(argp->xdr, putfh->pf_fhlen);
if (!p)
return nfserr_bad_xdr;
- putfh->pf_fhval = svcxdr_tmpalloc(argp, putfh->pf_fhlen);
+ putfh->pf_fhval = svcxdr_savemem(argp, p, putfh->pf_fhlen);
if (!putfh->pf_fhval)
return nfserr_jukebox;
- memcpy(putfh->pf_fhval, p, putfh->pf_fhlen);
return nfs_ok;
}
@@ -1318,24 +1334,20 @@ nfsd4_decode_setclientid(struct nfsd4_compoundargs *argp, struct nfsd4_setclient
p = xdr_inline_decode(argp->xdr, setclientid->se_callback_netid_len);
if (!p)
return nfserr_bad_xdr;
- setclientid->se_callback_netid_val = svcxdr_tmpalloc(argp,
+ setclientid->se_callback_netid_val = svcxdr_savemem(argp, p,
setclientid->se_callback_netid_len);
if (!setclientid->se_callback_netid_val)
return nfserr_jukebox;
- memcpy(setclientid->se_callback_netid_val, p,
- setclientid->se_callback_netid_len);
if (xdr_stream_decode_u32(argp->xdr, &setclientid->se_callback_addr_len) < 0)
return nfserr_bad_xdr;
p = xdr_inline_decode(argp->xdr, setclientid->se_callback_addr_len);
if (!p)
return nfserr_bad_xdr;
- setclientid->se_callback_addr_val = svcxdr_tmpalloc(argp,
+ setclientid->se_callback_addr_val = svcxdr_savemem(argp, p,
setclientid->se_callback_addr_len);
if (!setclientid->se_callback_addr_val)
return nfserr_jukebox;
- memcpy(setclientid->se_callback_addr_val, p,
- setclientid->se_callback_addr_len);
if (xdr_stream_decode_u32(argp->xdr, &setclientid->se_callback_ident) < 0)
return nfserr_bad_xdr;
@@ -1375,10 +1387,9 @@ nfsd4_decode_verify(struct nfsd4_compoundargs *argp, struct nfsd4_verify *verify
p = xdr_inline_decode(argp->xdr, verify->ve_attrlen);
if (!p)
return nfserr_bad_xdr;
- verify->ve_attrval = svcxdr_tmpalloc(argp, verify->ve_attrlen);
+ verify->ve_attrval = svcxdr_savemem(argp, p, verify->ve_attrlen);
if (!verify->ve_attrval)
return nfserr_jukebox;
- memcpy(verify->ve_attrval, p, verify->ve_attrlen);
return nfs_ok;
}
@@ -2333,10 +2344,9 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
p = xdr_inline_decode(argp->xdr, argp->taglen);
if (!p)
return 0;
- argp->tag = svcxdr_tmpalloc(argp, argp->taglen);
+ argp->tag = svcxdr_savemem(argp, p, argp->taglen);
if (!argp->tag)
return 0;
- memcpy(argp->tag, p, argp->taglen);
max_reply += xdr_align_size(argp->taglen);
}
@@ -4756,6 +4766,7 @@ nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp,
resp->rqstp->rq_vec, read->rd_vlen, maxcount, eof);
if (nfserr)
return nfserr;
+ xdr_truncate_encode(xdr, starting_len + 16 + xdr_align_size(*maxcount));
tmp = htonl(NFS4_CONTENT_DATA);
write_bytes_to_xdr_buf(xdr->buf, starting_len, &tmp, 4);
@@ -4763,6 +4774,10 @@ nfsd4_encode_read_plus_data(struct nfsd4_compoundres *resp,
write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp64, 8);
tmp = htonl(*maxcount);
write_bytes_to_xdr_buf(xdr->buf, starting_len + 12, &tmp, 4);
+
+ tmp = xdr_zero;
+ write_bytes_to_xdr_buf(xdr->buf, starting_len + 16 + *maxcount, &tmp,
+ xdr_pad_size(*maxcount));
return nfs_ok;
}
@@ -4855,14 +4870,15 @@ out:
if (nfserr && segments == 0)
xdr_truncate_encode(xdr, starting_len);
else {
- tmp = htonl(eof);
- write_bytes_to_xdr_buf(xdr->buf, starting_len, &tmp, 4);
- tmp = htonl(segments);
- write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp, 4);
if (nfserr) {
xdr_truncate_encode(xdr, last_segment);
nfserr = nfs_ok;
+ eof = 0;
}
+ tmp = htonl(eof);
+ write_bytes_to_xdr_buf(xdr->buf, starting_len, &tmp, 4);
+ tmp = htonl(segments);
+ write_bytes_to_xdr_buf(xdr->buf, starting_len + 4, &tmp, 4);
}
return nfserr;
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 00384c332f9b..f9c9f4c63cc7 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -33,12 +33,6 @@
#define NFSDDBG_FACILITY NFSDDBG_SVC
-bool inter_copy_offload_enable;
-EXPORT_SYMBOL_GPL(inter_copy_offload_enable);
-module_param(inter_copy_offload_enable, bool, 0644);
-MODULE_PARM_DESC(inter_copy_offload_enable,
- "Enable inter server to server copy offload. Default: false");
-
extern struct svc_program nfsd_program;
static int nfsd(void *vrqstp);
#if defined(CONFIG_NFSD_V2_ACL) || defined(CONFIG_NFSD_V3_ACL)
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index a60ff5ce1a37..c300885ae75d 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -568,7 +568,6 @@ struct nfsd4_copy {
struct nfs_fh c_fh;
nfs4_stateid stateid;
};
-extern bool inter_copy_offload_enable;
struct nfsd4_seek {
/* request */
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index b248f2349437..c9766d07eb81 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1062,6 +1062,90 @@ err_noclose:
return 0; /* record not complete */
}
+static int svc_tcp_send_kvec(struct socket *sock, const struct kvec *vec,
+ int flags)
+{
+ return kernel_sendpage(sock, virt_to_page(vec->iov_base),
+ offset_in_page(vec->iov_base),
+ vec->iov_len, flags);
+}
+
+/*
+ * kernel_sendpage() is used exclusively to reduce the number of
+ * copy operations in this path. Therefore the caller must ensure
+ * that the pages backing @xdr are unchanging.
+ *
+ * In addition, the logic assumes that * .bv_len is never larger
+ * than PAGE_SIZE.
+ */
+static int svc_tcp_sendmsg(struct socket *sock, struct msghdr *msg,
+ struct xdr_buf *xdr, rpc_fraghdr marker,
+ unsigned int *sentp)
+{
+ const struct kvec *head = xdr->head;
+ const struct kvec *tail = xdr->tail;
+ struct kvec rm = {
+ .iov_base = &marker,
+ .iov_len = sizeof(marker),
+ };
+ int flags, ret;
+
+ *sentp = 0;
+ xdr_alloc_bvec(xdr, GFP_KERNEL);
+
+ msg->msg_flags = MSG_MORE;
+ ret = kernel_sendmsg(sock, msg, &rm, 1, rm.iov_len);
+ if (ret < 0)
+ return ret;
+ *sentp += ret;
+ if (ret != rm.iov_len)
+ return -EAGAIN;
+
+ flags = head->iov_len < xdr->len ? MSG_MORE | MSG_SENDPAGE_NOTLAST : 0;
+ ret = svc_tcp_send_kvec(sock, head, flags);
+ if (ret < 0)
+ return ret;
+ *sentp += ret;
+ if (ret != head->iov_len)
+ goto out;
+
+ if (xdr->page_len) {
+ unsigned int offset, len, remaining;
+ struct bio_vec *bvec;
+
+ bvec = xdr->bvec;
+ offset = xdr->page_base;
+ remaining = xdr->page_len;
+ flags = MSG_MORE | MSG_SENDPAGE_NOTLAST;
+ while (remaining > 0) {
+ if (remaining <= PAGE_SIZE && tail->iov_len == 0)
+ flags = 0;
+ len = min(remaining, bvec->bv_len);
+ ret = kernel_sendpage(sock, bvec->bv_page,
+ bvec->bv_offset + offset,
+ len, flags);
+ if (ret < 0)
+ return ret;
+ *sentp += ret;
+ if (ret != len)
+ goto out;
+ remaining -= len;
+ offset = 0;
+ bvec++;
+ }
+ }
+
+ if (tail->iov_len) {
+ ret = svc_tcp_send_kvec(sock, tail, 0);
+ if (ret < 0)
+ return ret;
+ *sentp += ret;
+ }
+
+out:
+ return 0;
+}
+
/**
* svc_tcp_sendto - Send out a reply on a TCP socket
* @rqstp: completed svc_rqst
@@ -1089,7 +1173,7 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp)
mutex_lock(&xprt->xpt_mutex);
if (svc_xprt_is_dead(xprt))
goto out_notconn;
- err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, marker, &sent);
+ err = svc_tcp_sendmsg(svsk->sk_sock, &msg, xdr, marker, &sent);
xdr_free_bvec(xdr);
trace_svcsock_tcp_send(xprt, err < 0 ? err : sent);
if (err < 0 || sent != (xdr->len + sizeof(marker)))