summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/infiniband/core/rw.c24
-rw-r--r--fs/lockd/svc.c2
-rw-r--r--fs/nfs/callback.c10
-rw-r--r--fs/nfsd/nfs4proc.c110
-rw-r--r--fs/nfsd/nfs4xdr.c346
-rw-r--r--fs/nfsd/nfssvc.c2
-rw-r--r--fs/nfsd/xdr4.h50
-rw-r--r--include/linux/sunrpc/svc.h6
-rw-r--r--include/linux/sunrpc/svc_xprt.h4
-rw-r--r--include/rdma/rw.h2
-rw-r--r--net/sunrpc/svc.c6
-rw-r--r--net/sunrpc/svcsock.c6
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_rw.c116
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c40
14 files changed, 323 insertions, 401 deletions
diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c
index dbfd854c32c9..6ca607e8e293 100644
--- a/drivers/infiniband/core/rw.c
+++ b/drivers/infiniband/core/rw.c
@@ -643,6 +643,30 @@ void rdma_rw_ctx_destroy_signature(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
}
EXPORT_SYMBOL(rdma_rw_ctx_destroy_signature);
+/**
+ * rdma_rw_mr_factor - return number of MRs required for a payload
+ * @device: device handling the connection
+ * @port_num: port num to which the connection is bound
+ * @maxpages: maximum payload pages per rdma_rw_ctx
+ *
+ * Returns the number of MRs the device requires to move @maxpayload
+ * bytes. The returned value is used during transport creation to
+ * compute max_rdma_ctxts and the size of the transport's Send and
+ * Send Completion Queues.
+ */
+unsigned int rdma_rw_mr_factor(struct ib_device *device, u8 port_num,
+ unsigned int maxpages)
+{
+ unsigned int mr_pages;
+
+ if (rdma_rw_can_use_mr(device, port_num))
+ mr_pages = rdma_rw_fr_page_list_len(device);
+ else
+ mr_pages = device->attrs.max_sge_rd;
+ return DIV_ROUND_UP(maxpages, mr_pages);
+}
+EXPORT_SYMBOL(rdma_rw_mr_factor);
+
void rdma_rw_init_qp(struct ib_device *dev, struct ib_qp_init_attr *attr)
{
u32 factor;
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 726b6cecf430..b995bdc13976 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -396,7 +396,7 @@ out_rqst:
return error;
}
-static struct svc_serv_ops lockd_sv_ops = {
+static const struct svc_serv_ops lockd_sv_ops = {
.svo_shutdown = svc_rpcb_cleanup,
.svo_enqueue_xprt = svc_xprt_do_enqueue,
};
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 34323877ec13..2cddf7f437e6 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -226,26 +226,26 @@ err_bind:
return ret;
}
-static struct svc_serv_ops nfs40_cb_sv_ops = {
+static const struct svc_serv_ops nfs40_cb_sv_ops = {
.svo_function = nfs4_callback_svc,
.svo_enqueue_xprt = svc_xprt_do_enqueue,
.svo_setup = svc_set_num_threads_sync,
.svo_module = THIS_MODULE,
};
#if defined(CONFIG_NFS_V4_1)
-static struct svc_serv_ops nfs41_cb_sv_ops = {
+static const struct svc_serv_ops nfs41_cb_sv_ops = {
.svo_function = nfs41_callback_svc,
.svo_enqueue_xprt = svc_xprt_do_enqueue,
.svo_setup = svc_set_num_threads_sync,
.svo_module = THIS_MODULE,
};
-static struct svc_serv_ops *nfs4_cb_sv_ops[] = {
+static const struct svc_serv_ops *nfs4_cb_sv_ops[] = {
[0] = &nfs40_cb_sv_ops,
[1] = &nfs41_cb_sv_ops,
};
#else
-static struct svc_serv_ops *nfs4_cb_sv_ops[] = {
+static const struct svc_serv_ops *nfs4_cb_sv_ops[] = {
[0] = &nfs40_cb_sv_ops,
[1] = NULL,
};
@@ -254,8 +254,8 @@ static struct svc_serv_ops *nfs4_cb_sv_ops[] = {
static struct svc_serv *nfs_callback_create_svc(int minorversion)
{
struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion];
+ const struct svc_serv_ops *sv_ops;
struct svc_serv *serv;
- struct svc_serv_ops *sv_ops;
/*
* Check whether we're already up and running.
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index d27e75ad25e3..3c69db7d4905 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -784,6 +784,14 @@ out:
return status;
}
+
+static void
+nfsd4_read_release(union nfsd4_op_u *u)
+{
+ if (u->read.rd_filp)
+ fput(u->read.rd_filp);
+}
+
static __be32
nfsd4_readdir(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
union nfsd4_op_u *u)
@@ -912,6 +920,13 @@ nfsd4_secinfo_no_name(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstat
return nfs_ok;
}
+static void
+nfsd4_secinfo_release(union nfsd4_op_u *u)
+{
+ if (u->secinfo.si_exp)
+ exp_put(u->secinfo.si_exp);
+}
+
static __be32
nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
union nfsd4_op_u *u)
@@ -1335,6 +1350,12 @@ out:
return nfserr;
}
+static void
+nfsd4_getdeviceinfo_release(union nfsd4_op_u *u)
+{
+ kfree(u->getdeviceinfo.gd_device);
+}
+
static __be32
nfsd4_layoutget(struct svc_rqst *rqstp,
struct nfsd4_compound_state *cstate, union nfsd4_op_u *u)
@@ -1415,6 +1436,12 @@ out:
return nfserr;
}
+static void
+nfsd4_layoutget_release(union nfsd4_op_u *u)
+{
+ kfree(u->layoutget.lg_content);
+}
+
static __be32
nfsd4_layoutcommit(struct svc_rqst *rqstp,
struct nfsd4_compound_state *cstate, union nfsd4_op_u *u)
@@ -1541,49 +1568,6 @@ static inline void nfsd4_increment_op_stats(u32 opnum)
nfsdstats.nfs4_opcount[opnum]++;
}
-enum nfsd4_op_flags {
- ALLOWED_WITHOUT_FH = 1 << 0, /* No current filehandle required */
- ALLOWED_ON_ABSENT_FS = 1 << 1, /* ops processed on absent fs */
- ALLOWED_AS_FIRST_OP = 1 << 2, /* ops reqired first in compound */
- /* For rfc 5661 section 2.6.3.1.1: */
- OP_HANDLES_WRONGSEC = 1 << 3,
- OP_IS_PUTFH_LIKE = 1 << 4,
- /*
- * These are the ops whose result size we estimate before
- * encoding, to avoid performing an op then not being able to
- * respond or cache a response. This includes writes and setattrs
- * as well as the operations usually called "nonidempotent":
- */
- OP_MODIFIES_SOMETHING = 1 << 5,
- /*
- * Cache compounds containing these ops in the xid-based drc:
- * We use the DRC for compounds containing non-idempotent
- * operations, *except* those that are 4.1-specific (since
- * sessions provide their own EOS), and except for stateful
- * operations other than setclientid and setclientid_confirm
- * (since sequence numbers provide EOS for open, lock, etc in
- * the v4.0 case).
- */
- OP_CACHEME = 1 << 6,
- /*
- * These are ops which clear current state id.
- */
- OP_CLEAR_STATEID = 1 << 7,
-};
-
-struct nfsd4_operation {
- __be32 (*op_func)(struct svc_rqst *, struct nfsd4_compound_state *,
- union nfsd4_op_u *);
- u32 op_flags;
- char *op_name;
- /* Try to get response size before operation */
- u32 (*op_rsize_bop)(struct svc_rqst *, struct nfsd4_op *);
- void (*op_get_currentstateid)(struct nfsd4_compound_state *,
- union nfsd4_op_u *);
- void (*op_set_currentstateid)(struct nfsd4_compound_state *,
- union nfsd4_op_u *);
-};
-
static const struct nfsd4_operation nfsd4_ops[];
static const char *nfsd4_op_name(unsigned opnum);
@@ -1621,7 +1605,7 @@ static __be32 nfs41_check_op_ordering(struct nfsd4_compoundargs *args)
return nfs_ok;
}
-static inline const struct nfsd4_operation *OPDESC(struct nfsd4_op *op)
+const struct nfsd4_operation *OPDESC(struct nfsd4_op *op)
{
return &nfsd4_ops[op->opnum];
}
@@ -1694,7 +1678,6 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
struct nfsd4_compoundargs *args = rqstp->rq_argp;
struct nfsd4_compoundres *resp = rqstp->rq_resp;
struct nfsd4_op *op;
- const struct nfsd4_operation *opdesc;
struct nfsd4_compound_state *cstate = &resp->cstate;
struct svc_fh *current_fh = &cstate->current_fh;
struct svc_fh *save_fh = &cstate->save_fh;
@@ -1747,15 +1730,13 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
goto encode_op;
}
- opdesc = OPDESC(op);
-
if (!current_fh->fh_dentry) {
- if (!(opdesc->op_flags & ALLOWED_WITHOUT_FH)) {
+ if (!(op->opdesc->op_flags & ALLOWED_WITHOUT_FH)) {
op->status = nfserr_nofilehandle;
goto encode_op;
}
} else if (current_fh->fh_export->ex_fslocs.migrated &&
- !(opdesc->op_flags & ALLOWED_ON_ABSENT_FS)) {
+ !(op->opdesc->op_flags & ALLOWED_ON_ABSENT_FS)) {
op->status = nfserr_moved;
goto encode_op;
}
@@ -1763,12 +1744,12 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
fh_clear_wcc(current_fh);
/* If op is non-idempotent */
- if (opdesc->op_flags & OP_MODIFIES_SOMETHING) {
+ if (op->opdesc->op_flags & OP_MODIFIES_SOMETHING) {
/*
* Don't execute this op if we couldn't encode a
* succesful reply:
*/
- u32 plen = opdesc->op_rsize_bop(rqstp, op);
+ u32 plen = op->opdesc->op_rsize_bop(rqstp, op);
/*
* Plus if there's another operation, make sure
* we'll have space to at least encode an error:
@@ -1781,9 +1762,9 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
if (op->status)
goto encode_op;
- if (opdesc->op_get_currentstateid)
- opdesc->op_get_currentstateid(cstate, &op->u);
- op->status = opdesc->op_func(rqstp, cstate, &op->u);
+ if (op->opdesc->op_get_currentstateid)
+ op->opdesc->op_get_currentstateid(cstate, &op->u);
+ op->status = op->opdesc->op_func(rqstp, cstate, &op->u);
/* Only from SEQUENCE */
if (cstate->status == nfserr_replay_cache) {
@@ -1792,10 +1773,10 @@ nfsd4_proc_compound(struct svc_rqst *rqstp)
goto out;
}
if (!op->status) {
- if (opdesc->op_set_currentstateid)
- opdesc->op_set_currentstateid(cstate, &op->u);
+ if (op->opdesc->op_set_currentstateid)
+ op->opdesc->op_set_currentstateid(cstate, &op->u);
- if (opdesc->op_flags & OP_CLEAR_STATEID)
+ if (op->opdesc->op_flags & OP_CLEAR_STATEID)
clear_current_stateid(cstate);
if (need_wrongsec_check(rqstp))
@@ -2160,13 +2141,15 @@ static const struct nfsd4_operation nfsd4_ops[] = {
},
[OP_LOCK] = {
.op_func = nfsd4_lock,
- .op_flags = OP_MODIFIES_SOMETHING,
+ .op_flags = OP_MODIFIES_SOMETHING |
+ OP_NONTRIVIAL_ERROR_ENCODE,
.op_name = "OP_LOCK",
.op_rsize_bop = nfsd4_lock_rsize,
.op_set_currentstateid = nfsd4_set_lockstateid,
},
[OP_LOCKT] = {
.op_func = nfsd4_lockt,
+ .op_flags = OP_NONTRIVIAL_ERROR_ENCODE,
.op_name = "OP_LOCKT",
.op_rsize_bop = nfsd4_lock_rsize,
},
@@ -2238,6 +2221,7 @@ static const struct nfsd4_operation nfsd4_ops[] = {
},
[OP_READ] = {
.op_func = nfsd4_read,
+ .op_release = nfsd4_read_release,
.op_name = "OP_READ",
.op_rsize_bop = nfsd4_read_rsize,
.op_get_currentstateid = nfsd4_get_readstateid,
@@ -2287,6 +2271,7 @@ static const struct nfsd4_operation nfsd4_ops[] = {
},
[OP_SECINFO] = {
.op_func = nfsd4_secinfo,
+ .op_release = nfsd4_secinfo_release,
.op_flags = OP_HANDLES_WRONGSEC,
.op_name = "OP_SECINFO",
.op_rsize_bop = nfsd4_secinfo_rsize,
@@ -2294,14 +2279,16 @@ static const struct nfsd4_operation nfsd4_ops[] = {
[OP_SETATTR] = {
.op_func = nfsd4_setattr,
.op_name = "OP_SETATTR",
- .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME,
+ .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME
+ | OP_NONTRIVIAL_ERROR_ENCODE,
.op_rsize_bop = nfsd4_setattr_rsize,
.op_get_currentstateid = nfsd4_get_setattrstateid,
},
[OP_SETCLIENTID] = {
.op_func = nfsd4_setclientid,
.op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS
- | OP_MODIFIES_SOMETHING | OP_CACHEME,
+ | OP_MODIFIES_SOMETHING | OP_CACHEME
+ | OP_NONTRIVIAL_ERROR_ENCODE,
.op_name = "OP_SETCLIENTID",
.op_rsize_bop = nfsd4_setclientid_rsize,
},
@@ -2388,6 +2375,7 @@ static const struct nfsd4_operation nfsd4_ops[] = {
},
[OP_SECINFO_NO_NAME] = {
.op_func = nfsd4_secinfo_no_name,
+ .op_release = nfsd4_secinfo_release,
.op_flags = OP_HANDLES_WRONGSEC,
.op_name = "OP_SECINFO_NO_NAME",
.op_rsize_bop = nfsd4_secinfo_rsize,
@@ -2408,12 +2396,14 @@ static const struct nfsd4_operation nfsd4_ops[] = {
#ifdef CONFIG_NFSD_PNFS
[OP_GETDEVICEINFO] = {
.op_func = nfsd4_getdeviceinfo,
+ .op_release = nfsd4_getdeviceinfo_release,
.op_flags = ALLOWED_WITHOUT_FH,
.op_name = "OP_GETDEVICEINFO",
.op_rsize_bop = nfsd4_getdeviceinfo_rsize,
},
[OP_LAYOUTGET] = {
.op_func = nfsd4_layoutget,
+ .op_release = nfsd4_layoutget_release,
.op_flags = OP_MODIFIES_SOMETHING,
.op_name = "OP_LAYOUTGET",
.op_rsize_bop = nfsd4_layoutget_rsize,
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 5f940d2a136b..2c61c6b8ae09 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -159,6 +159,25 @@ static __be32 *read_buf(struct nfsd4_compoundargs *argp, u32 nbytes)
*/
unsigned int avail = (char *)argp->end - (char *)argp->p;
__be32 *p;
+
+ if (argp->pagelen == 0) {
+ struct kvec *vec = &argp->rqstp->rq_arg.tail[0];
+
+ if (!argp->tail) {
+ argp->tail = true;
+ avail = vec->iov_len;
+ argp->p = vec->iov_base;
+ argp->end = vec->iov_base + avail;
+ }
+
+ if (avail < nbytes)
+ return NULL;
+
+ p = argp->p;
+ argp->p += XDR_QUADLEN(nbytes);
+ return p;
+ }
+
if (avail + argp->pagelen < nbytes)
return NULL;
if (avail + PAGE_SIZE < nbytes) /* need more than a page !! */
@@ -1778,7 +1797,7 @@ nfsd4_decode_notsupp(struct nfsd4_compoundargs *argp, void *p)
typedef __be32(*nfsd4_dec)(struct nfsd4_compoundargs *argp, void *);
-static nfsd4_dec nfsd4_dec_ops[] = {
+static const nfsd4_dec nfsd4_dec_ops[] = {
[OP_ACCESS] = (nfsd4_dec)nfsd4_decode_access,
[OP_CLOSE] = (nfsd4_dec)nfsd4_decode_close,
[OP_COMMIT] = (nfsd4_dec)nfsd4_decode_commit,
@@ -1927,6 +1946,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
op->opnum = OP_ILLEGAL;
op->status = nfserr_op_illegal;
}
+ op->opdesc = OPDESC(op);
/*
* We'll try to cache the result in the DRC if any one
* op in the compound wants to be cached:
@@ -3102,14 +3122,12 @@ nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_
struct xdr_stream *xdr = &resp->xdr;
__be32 *p;
- if (!nfserr) {
- p = xdr_reserve_space(xdr, 8);
- if (!p)
- return nfserr_resource;
- *p++ = cpu_to_be32(access->ac_supported);
- *p++ = cpu_to_be32(access->ac_resp_access);
- }
- return nfserr;
+ p = xdr_reserve_space(xdr, 8);
+ if (!p)
+ return nfserr_resource;
+ *p++ = cpu_to_be32(access->ac_supported);
+ *p++ = cpu_to_be32(access->ac_resp_access);
+ return 0;
}
static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_bind_conn_to_session *bcts)
@@ -3117,17 +3135,15 @@ static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp,
struct xdr_stream *xdr = &resp->xdr;
__be32 *p;
- if (!nfserr) {
- p = xdr_reserve_space(xdr, NFS4_MAX_SESSIONID_LEN + 8);
- if (!p)
- return nfserr_resource;
- p = xdr_encode_opaque_fixed(p, bcts->sessionid.data,
- NFS4_MAX_SESSIONID_LEN);
- *p++ = cpu_to_be32(bcts->dir);
- /* Upshifting from TCP to RDMA is not supported */
- *p++ = cpu_to_be32(0);
- }
- return nfserr;
+ p = xdr_reserve_space(xdr, NFS4_MAX_SESSIONID_LEN + 8);
+ if (!p)
+ return nfserr_resource;
+ p = xdr_encode_opaque_fixed(p, bcts->sessionid.data,
+ NFS4_MAX_SESSIONID_LEN);
+ *p++ = cpu_to_be32(bcts->dir);
+ /* Upshifting from TCP to RDMA is not supported */
+ *p++ = cpu_to_be32(0);
+ return 0;
}
static __be32
@@ -3135,10 +3151,7 @@ nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_c
{
struct xdr_stream *xdr = &resp->xdr;
- if (!nfserr)
- nfserr = nfsd4_encode_stateid(xdr, &close->cl_stateid);
-
- return nfserr;
+ return nfsd4_encode_stateid(xdr, &close->cl_stateid);
}
@@ -3148,14 +3161,12 @@ nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_
struct xdr_stream *xdr = &resp->xdr;
__be32 *p;
- if (!nfserr) {
- p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE);
- if (!p)
- return nfserr_resource;
- p = xdr_encode_opaque_fixed(p, commit->co_verf.data,
+ p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE);
+ if (!p)
+ return nfserr_resource;
+ p = xdr_encode_opaque_fixed(p, commit->co_verf.data,
NFS4_VERIFIER_SIZE);
- }
- return nfserr;
+ return 0;
}
static __be32
@@ -3164,15 +3175,13 @@ nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_
struct xdr_stream *xdr = &resp->xdr;
__be32 *p;
- if (!nfserr) {
- p = xdr_reserve_space(xdr, 20);
- if (!p)
- return nfserr_resource;
- encode_cinfo(p, &create->cr_cinfo);
- nfserr = nfsd4_encode_bitmap(xdr, create->cr_bmval[0],
- create->cr_bmval[1], create->cr_bmval[2]);
- }
- return nfserr;
+ p = xdr_reserve_space(xdr, 20);
+ if (!p)
+ return nfserr_resource;
+ encode_cinfo(p, &create->cr_cinfo);
+ nfserr = nfsd4_encode_bitmap(xdr, create->cr_bmval[0],
+ create->cr_bmval[1], create->cr_bmval[2]);
+ return 0;
}
static __be32
@@ -3181,13 +3190,8 @@ nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
struct svc_fh *fhp = getattr->ga_fhp;
struct xdr_stream *xdr = &resp->xdr;
- if (nfserr)
- return nfserr;
-
- nfserr = nfsd4_encode_fattr(xdr, fhp, fhp->fh_export, fhp->fh_dentry,
- getattr->ga_bmval,
- resp->rqstp, 0);
- return nfserr;
+ return nfsd4_encode_fattr(xdr, fhp, fhp->fh_export, fhp->fh_dentry,
+ getattr->ga_bmval, resp->rqstp, 0);
}
static __be32
@@ -3198,14 +3202,12 @@ nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh
unsigned int len;
__be32 *p;
- if (!nfserr) {
- len = fhp->fh_handle.fh_size;
- p = xdr_reserve_space(xdr, len + 4);
- if (!p)
- return nfserr_resource;
- p = xdr_encode_opaque(p, &fhp->fh_handle.fh_base, len);
- }
- return nfserr;
+ len = fhp->fh_handle.fh_size;
+ p = xdr_reserve_space(xdr, len + 4);
+ if (!p)
+ return nfserr_resource;
+ p = xdr_encode_opaque(p, &fhp->fh_handle.fh_base, len);
+ return 0;
}
/*
@@ -3275,10 +3277,7 @@ nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_l
{
struct xdr_stream *xdr = &resp->xdr;
- if (!nfserr)
- nfserr = nfsd4_encode_stateid(xdr, &locku->lu_stateid);
-
- return nfserr;
+ return nfsd4_encode_stateid(xdr, &locku->lu_stateid);
}
@@ -3288,13 +3287,11 @@ nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_li
struct xdr_stream *xdr = &resp->xdr;
__be32 *p;
- if (!nfserr) {
- p = xdr_reserve_space(xdr, 20);
- if (!p)
- return nfserr_resource;
- p = encode_cinfo(p, &link->li_cinfo);
- }
- return nfserr;
+ p = xdr_reserve_space(xdr, 20);
+ if (!p)
+ return nfserr_resource;
+ p = encode_cinfo(p, &link->li_cinfo);
+ return 0;
}
@@ -3304,12 +3301,9 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op
struct xdr_stream *xdr = &resp->xdr;
__be32 *p;
- if (nfserr)
- goto out;
-
nfserr = nfsd4_encode_stateid(xdr, &open->op_stateid);
if (nfserr)
- goto out;
+ return nfserr;
p = xdr_reserve_space(xdr, 24);
if (!p)
return nfserr_resource;
@@ -3319,7 +3313,7 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op
nfserr = nfsd4_encode_bitmap(xdr, open->op_bmval[0], open->op_bmval[1],
open->op_bmval[2]);
if (nfserr)
- goto out;
+ return nfserr;
p = xdr_reserve_space(xdr, 4);
if (!p)
@@ -3392,8 +3386,7 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op
BUG();
}
/* XXX save filehandle here */
-out:
- return nfserr;
+ return 0;
}
static __be32
@@ -3401,10 +3394,7 @@ nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, struct
{
struct xdr_stream *xdr = &resp->xdr;
- if (!nfserr)
- nfserr = nfsd4_encode_stateid(xdr, &oc->oc_resp_stateid);
-
- return nfserr;
+ return nfsd4_encode_stateid(xdr, &oc->oc_resp_stateid);
}
static __be32
@@ -3412,10 +3402,7 @@ nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr, struc
{
struct xdr_stream *xdr = &resp->xdr;
- if (!nfserr)
- nfserr = nfsd4_encode_stateid(xdr, &od->od_stateid);
-
- return nfserr;
+ return nfsd4_encode_stateid(xdr, &od->od_stateid);
}
static __be32 nfsd4_encode_splice_read(
@@ -3552,20 +3539,15 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
struct raparms *ra = NULL;
__be32 *p;
- if (nfserr)
- goto out;
-
p = xdr_reserve_space(xdr, 8); /* eof flag and byte count */
if (!p) {
WARN_ON_ONCE(test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags));
- nfserr = nfserr_resource;
- goto out;
+ return nfserr_resource;
}
if (resp->xdr.buf->page_len &&
test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags)) {
WARN_ON_ONCE(1);
- nfserr = nfserr_resource;
- goto out;
+ return nfserr_resource;
}
xdr_commit_encode(xdr);
@@ -3589,9 +3571,6 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
if (nfserr)
xdr_truncate_encode(xdr, starting_len);
-out:
- if (file)
- fput(file);
return nfserr;
}
@@ -3605,9 +3584,6 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd
int length_offset = xdr->buf->len;
__be32 *p;
- if (nfserr)
- return nfserr;
-
p = xdr_reserve_space(xdr, 4);
if (!p)
return nfserr_resource;
@@ -3651,9 +3627,6 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4
int starting_len = xdr->buf->len;
__be32 *p;
- if (nfserr)
- return nfserr;
-
p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE);
if (!p)
return nfserr_resource;
@@ -3739,13 +3712,11 @@ nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_
struct xdr_stream *xdr = &resp->xdr;
__be32 *p;
- if (!nfserr) {
- p = xdr_reserve_space(xdr, 20);
- if (!p)
- return nfserr_resource;
- p = encode_cinfo(p, &remove->rm_cinfo);
- }
- return nfserr;
+ p = xdr_reserve_space(xdr, 20);
+ if (!p)
+ return nfserr_resource;
+ p = encode_cinfo(p, &remove->rm_cinfo);
+ return 0;
}
static __be32
@@ -3754,19 +3725,16 @@ nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_
struct xdr_stream *xdr = &resp->xdr;
__be32 *p;
- if (!nfserr) {
- p = xdr_reserve_space(xdr, 40);
- if (!p)
- return nfserr_resource;
- p = encode_cinfo(p, &rename->rn_sinfo);
- p = encode_cinfo(p, &rename->rn_tinfo);
- }
- return nfserr;
+ p = xdr_reserve_space(xdr, 40);
+ if (!p)
+ return nfserr_resource;
+ p = encode_cinfo(p, &rename->rn_sinfo);
+ p = encode_cinfo(p, &rename->rn_tinfo);
+ return 0;
}
static __be32
-nfsd4_do_encode_secinfo(struct xdr_stream *xdr,
- __be32 nfserr, struct svc_export *exp)
+nfsd4_do_encode_secinfo(struct xdr_stream *xdr, struct svc_export *exp)
{
u32 i, nflavs, supported;
struct exp_flavor_info *flavs;
@@ -3774,9 +3742,6 @@ nfsd4_do_encode_secinfo(struct xdr_stream *xdr,
__be32 *p, *flavorsp;
static bool report = true;
- if (nfserr)
- goto out;
- nfserr = nfserr_resource;
if (exp->ex_nflavors) {
flavs = exp->ex_flavors;
nflavs = exp->ex_nflavors;
@@ -3800,7 +3765,7 @@ nfsd4_do_encode_secinfo(struct xdr_stream *xdr,
supported = 0;
p = xdr_reserve_space(xdr, 4);
if (!p)
- goto out;
+ return nfserr_resource;
flavorsp = p++; /* to be backfilled later */
for (i = 0; i < nflavs; i++) {
@@ -3812,7 +3777,7 @@ nfsd4_do_encode_secinfo(struct xdr_stream *xdr,
p = xdr_reserve_space(xdr, 4 + 4 +
XDR_LEN(info.oid.len) + 4 + 4);
if (!p)
- goto out;
+ return nfserr_resource;
*p++ = cpu_to_be32(RPC_AUTH_GSS);
p = xdr_encode_opaque(p, info.oid.data, info.oid.len);
*p++ = cpu_to_be32(info.qop);
@@ -3821,7 +3786,7 @@ nfsd4_do_encode_secinfo(struct xdr_stream *xdr,
supported++;
p = xdr_reserve_space(xdr, 4);
if (!p)
- goto out;
+ return nfserr_resource;
*p++ = cpu_to_be32(pf);
} else {
if (report)
@@ -3833,11 +3798,7 @@ nfsd4_do_encode_secinfo(struct xdr_stream *xdr,
if (nflavs != supported)
report = false;
*flavorsp = htonl(supported);
- nfserr = 0;
-out:
- if (exp)
- exp_put(exp);
- return nfserr;
+ return 0;
}
static __be32
@@ -3846,7 +3807,7 @@ nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, __be32 nfserr,
{
struct xdr_stream *xdr = &resp->xdr;
- return nfsd4_do_encode_secinfo(xdr, nfserr, secinfo->si_exp);
+ return nfsd4_do_encode_secinfo(xdr, secinfo->si_exp);
}
static __be32
@@ -3855,7 +3816,7 @@ nfsd4_encode_secinfo_no_name(struct nfsd4_compoundres *resp, __be32 nfserr,
{
struct xdr_stream *xdr = &resp->xdr;
- return nfsd4_do_encode_secinfo(xdr, nfserr, secinfo->sin_exp);
+ return nfsd4_do_encode_secinfo(xdr, secinfo->sin_exp);
}
/*
@@ -3916,16 +3877,14 @@ nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_w
struct xdr_stream *xdr = &resp->xdr;
__be32 *p;
- if (!nfserr) {
- p = xdr_reserve_space(xdr, 16);
- if (!p)
- return nfserr_resource;
- *p++ = cpu_to_be32(write->wr_bytes_written);
- *p++ = cpu_to_be32(write->wr_how_written);
- p = xdr_encode_opaque_fixed(p, write->wr_verifier.data,
- NFS4_VERIFIER_SIZE);
- }
- return nfserr;
+ p = xdr_reserve_space(xdr, 16);
+ if (!p)
+ return nfserr_resource;
+ *p++ = cpu_to_be32(write->wr_bytes_written);
+ *p++ = cpu_to_be32(write->wr_how_written);
+ p = xdr_encode_opaque_fixed(p, write->wr_verifier.data,
+ NFS4_VERIFIER_SIZE);
+ return 0;
}
static __be32
@@ -3938,12 +3897,8 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr,
char *server_scope;
int major_id_sz;
int server_scope_sz;
- int status = 0;
uint64_t minor_id = 0;
- if (nfserr)
- return nfserr;
-
major_id = utsname()->nodename;
major_id_sz = strlen(major_id);
server_scope = utsname()->nodename;
@@ -3968,19 +3923,19 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr,
break;
case SP4_MACH_CRED:
/* spo_must_enforce bitmap: */
- status = nfsd4_encode_bitmap(xdr,
+ nfserr = nfsd4_encode_bitmap(xdr,
exid->spo_must_enforce[0],
exid->spo_must_enforce[1],
exid->spo_must_enforce[2]);
- if (status)
- goto out;
+ if (nfserr)
+ return nfserr;
/* spo_must_allow bitmap: */
- status = nfsd4_encode_bitmap(xdr,
+ nfserr = nfsd4_encode_bitmap(xdr,
exid->spo_must_allow[0],
exid->spo_must_allow[1],
exid->spo_must_allow[2]);
- if (status)
- goto out;
+ if (nfserr)
+ return nfserr;
break;
default:
WARN_ON_ONCE(1);
@@ -4007,8 +3962,6 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr,
/* Implementation id */
*p++ = cpu_to_be32(0); /* zero length nfs_impl_id4 array */
return 0;
-out:
- return status;
}
static __be32
@@ -4018,9 +3971,6 @@ nfsd4_encode_create_session(struct nfsd4_compoundres *resp, __be32 nfserr,
struct xdr_stream *xdr = &resp->xdr;
__be32 *p;
- if (nfserr)
- return nfserr;
-
p = xdr_reserve_space(xdr, 24);
if (!p)
return nfserr_resource;
@@ -4074,9 +4024,6 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr,
struct xdr_stream *xdr = &resp->xdr;
__be32 *p;
- if (nfserr)
- return nfserr;
-
p = xdr_reserve_space(xdr, NFS4_MAX_SESSIONID_LEN + 20);
if (!p)
return nfserr_resource;
@@ -4101,9 +4048,6 @@ nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr,
struct nfsd4_test_stateid_id *stateid, *next;
__be32 *p;
- if (nfserr)
- return nfserr;
-
p = xdr_reserve_space(xdr, 4 + (4 * test_stateid->ts_num_ids));
if (!p)
return nfserr_resource;
@@ -4113,7 +4057,7 @@ nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr,
*p++ = stateid->ts_id_status;
}
- return nfserr;
+ return 0;
}
#ifdef CONFIG_NFSD_PNFS
@@ -4126,14 +4070,9 @@ nfsd4_encode_getdeviceinfo(struct nfsd4_compoundres *resp, __be32 nfserr,
u32 starting_len = xdr->buf->len, needed_len;
__be32 *p;
- dprintk("%s: err %d\n", __func__, be32_to_cpu(nfserr));
- if (nfserr)
- goto out;
-
- nfserr = nfserr_resource;
p = xdr_reserve_space(xdr, 4);
if (!p)
- goto out;
+ return nfserr_resource;
*p++ = cpu_to_be32(gdev->gd_layout_type);
@@ -4149,42 +4088,33 @@ nfsd4_encode_getdeviceinfo(struct nfsd4_compoundres *resp, __be32 nfserr,
*/
if (xdr->buf->len + 4 > gdev->gd_maxcount)
goto toosmall;
- goto out;
+ return nfserr;
}
}
- nfserr = nfserr_resource;
if (gdev->gd_notify_types) {
p = xdr_reserve_space(xdr, 4 + 4);
if (!p)
- goto out;
+ return nfserr_resource;
*p++ = cpu_to_be32(1); /* bitmap length */
*p++ = cpu_to_be32(gdev->gd_notify_types);
} else {
p = xdr_reserve_space(xdr, 4);
if (!p)
- goto out;
+ return nfserr_resource;
*p++ = 0;
}
- nfserr = 0;
-out:
- kfree(gdev->gd_device);
- dprintk("%s: done: %d\n", __func__, be32_to_cpu(nfserr));
- return nfserr;
-
+ return 0;
toosmall:
dprintk("%s: maxcount too small\n", __func__);
needed_len = xdr->buf->len + 4 /* notifications */;
xdr_truncate_encode(xdr, starting_len);
p = xdr_reserve_space(xdr, 4);
- if (!p) {
- nfserr = nfserr_resource;
- } else {
- *p++ = cpu_to_be32(needed_len);
- nfserr = nfserr_toosmall;
- }
- goto out;
+ if (!p)
+ return nfserr_resource;
+ *p++ = cpu_to_be32(needed_len);
+ return nfserr_toosmall;
}
static __be32
@@ -4195,14 +4125,9 @@ nfsd4_encode_layoutget(struct nfsd4_compoundres *resp, __be32 nfserr,
const struct nfsd4_layout_ops *ops;
__be32 *p;
- dprintk("%s: err %d\n", __func__, nfserr);
- if (nfserr)
- goto out;
-
- nfserr = nfserr_resource;
p = xdr_reserve_space(xdr, 36 + sizeof(stateid_opaque_t));
if (!p)
- goto out;
+ return nfserr_resource;
*p++ = cpu_to_be32(1); /* we always set return-on-close */
*p++ = cpu_to_be32(lgp->lg_sid.si_generation);
@@ -4216,10 +4141,7 @@ nfsd4_encode_layoutget(struct nfsd4_compoundres *resp, __be32 nfserr,
*p++ = cpu_to_be32(lgp->lg_layout_type);
ops = nfsd4_layout_ops[lgp->lg_layout_type];
- nfserr = ops->encode_layoutget(xdr, lgp);
-out:
- kfree(lgp->lg_content);
- return nfserr;
+ return ops->encode_layoutget(xdr, lgp);
}
static __be32
@@ -4229,9 +4151,6 @@ nfsd4_encode_layoutcommit(struct nfsd4_compoundres *resp, __be32 nfserr,
struct xdr_stream *xdr = &resp->xdr;
__be32 *p;
- if (nfserr)
- return nfserr;
-
p = xdr_reserve_space(xdr, 4);
if (!p)
return nfserr_resource;
@@ -4243,7 +4162,7 @@ nfsd4_encode_layoutcommit(struct nfsd4_compoundres *resp, __be32 nfserr,
p = xdr_encode_hyper(p, lcp->lc_newsize);
}
- return nfs_ok;
+ return 0;
}
static __be32
@@ -4253,16 +4172,13 @@ nfsd4_encode_layoutreturn(struct nfsd4_compoundres *resp, __be32 nfserr,
struct xdr_stream *xdr = &resp->xdr;
__be32 *p;
- if (nfserr)
- return nfserr;
-
p = xdr_reserve_space(xdr, 4);
if (!p)
return nfserr_resource;
*p++ = cpu_to_be32(lrp->lrs_present);
if (lrp->lrs_present)
return nfsd4_encode_stateid(xdr, &lrp->lr_sid);
- return nfs_ok;
+ return 0;
}
#endif /* CONFIG_NFSD_PNFS */
@@ -4289,16 +4205,14 @@ nfsd4_encode_copy(struct nfsd4_compoundres *resp, __be32 nfserr,
{
__be32 *p;
- if (!nfserr) {
- nfserr = nfsd42_encode_write_res(resp, &copy->cp_res);
- if (nfserr)
- return nfserr;
+ nfserr = nfsd42_encode_write_res(resp, &copy->cp_res);
+ if (nfserr)
+ return nfserr;
- p = xdr_reserve_space(&resp->xdr, 4 + 4);
- *p++ = cpu_to_be32(copy->cp_consecutive);
- *p++ = cpu_to_be32(copy->cp_synchronous);
- }
- return nfserr;
+ p = xdr_reserve_space(&resp->xdr, 4 + 4);
+ *p++ = cpu_to_be32(copy->cp_consecutive);
+ *p++ = cpu_to_be32(copy->cp_synchronous);
+ return 0;
}
static __be32
@@ -4307,14 +4221,11 @@ nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr,
{
__be32 *p;
- if (nfserr)
- return nfserr;
-
p = xdr_reserve_space(&resp->xdr, 4 + 8);
*p++ = cpu_to_be32(seek->seek_eof);
p = xdr_encode_hyper(p, seek->seek_pos);
- return nfserr;
+ return 0;
}
static __be32
@@ -4330,7 +4241,7 @@ typedef __be32(* nfsd4_enc)(struct nfsd4_compoundres *, __be32, void *);
* since we don't need to filter out obsolete ops as this is
* done in the decoding phase.
*/
-static nfsd4_enc nfsd4_enc_ops[] = {
+static const nfsd4_enc nfsd4_enc_ops[] = {
[OP_ACCESS] = (nfsd4_enc)nfsd4_encode_access,
[OP_CLOSE] = (nfsd4_enc)nfsd4_encode_close,
[OP_COMMIT] = (nfsd4_enc)nfsd4_encode_commit,
@@ -4449,6 +4360,7 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
struct xdr_stream *xdr = &resp->xdr;
struct nfs4_stateowner *so = resp->cstate.replay_owner;
struct svc_rqst *rqstp = resp->rqstp;
+ const struct nfsd4_operation *opdesc = op->opdesc;
int post_err_offset;
nfsd4_enc encoder;
__be32 *p;
@@ -4463,10 +4375,15 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
if (op->opnum == OP_ILLEGAL)
goto status;
+ if (op->status && opdesc &&
+ !(opdesc->op_flags & OP_NONTRIVIAL_ERROR_ENCODE))
+ goto status;
BUG_ON(op->opnum < 0 || op->opnum >= ARRAY_SIZE(nfsd4_enc_ops) ||
!nfsd4_enc_ops[op->opnum]);
encoder = nfsd4_enc_ops[op->opnum];
op->status = encoder(resp, op->status, &op->u);
+ if (opdesc && opdesc->op_release)
+ opdesc->op_release(&op->u);
xdr_commit_encode(xdr);
/* nfsd4_check_resp_size guarantees enough room for error status */
@@ -4573,6 +4490,7 @@ nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, __be32 *p)
args->end = rqstp->rq_arg.head[0].iov_base + rqstp->rq_arg.head[0].iov_len;
args->pagelist = rqstp->rq_arg.pages;
args->pagelen = rqstp->rq_arg.page_len;
+ args->tail = false;
args->tmpp = NULL;
args->to_free = NULL;
args->ops = args->iops;
diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 063ae7de2c12..7e3af3ef0917 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -475,7 +475,7 @@ static int nfsd_get_default_max_blksize(void)
return ret;
}
-static struct svc_serv_ops nfsd_thread_sv_ops = {
+static const struct svc_serv_ops nfsd_thread_sv_ops = {
.svo_shutdown = nfsd_last_thread,
.svo_function = nfsd,
.svo_enqueue_xprt = svc_xprt_do_enqueue,
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 72c6ad136107..1e4edbf70052 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -538,6 +538,7 @@ struct nfsd4_seek {
struct nfsd4_op {
int opnum;
+ const struct nfsd4_operation * opdesc;
__be32 status;
union nfsd4_op_u {
struct nfsd4_access access;
@@ -614,6 +615,7 @@ struct nfsd4_compoundargs {
__be32 * end;
struct page ** pagelist;
int pagelen;
+ bool tail;
__be32 tmp[8];
__be32 * tmpp;
struct svcxdr_tmpbuf *to_free;
@@ -661,6 +663,7 @@ static inline bool nfsd4_last_compound_op(struct svc_rqst *rqstp)
return argp->opcnt == resp->opcnt;
}
+const struct nfsd4_operation *OPDESC(struct nfsd4_op *op);
int nfsd4_max_reply(struct svc_rqst *rqstp, struct nfsd4_op *op);
void warn_on_nonidempotent_op(struct nfsd4_op *op);
@@ -748,6 +751,53 @@ extern __be32 nfsd4_free_stateid(struct svc_rqst *rqstp,
struct nfsd4_compound_state *, union nfsd4_op_u *);
extern void nfsd4_bump_seqid(struct nfsd4_compound_state *, __be32 nfserr);
+enum nfsd4_op_flags {
+ ALLOWED_WITHOUT_FH = 1 << 0, /* No current filehandle required */
+ ALLOWED_ON_ABSENT_FS = 1 << 1, /* ops processed on absent fs */
+ ALLOWED_AS_FIRST_OP = 1 << 2, /* ops reqired first in compound */
+ /* For rfc 5661 section 2.6.3.1.1: */
+ OP_HANDLES_WRONGSEC = 1 << 3,
+ OP_IS_PUTFH_LIKE = 1 << 4,
+ /*
+ * These are the ops whose result size we estimate before
+ * encoding, to avoid performing an op then not being able to
+ * respond or cache a response. This includes writes and setattrs
+ * as well as the operations usually called "nonidempotent":
+ */
+ OP_MODIFIES_SOMETHING = 1 << 5,
+ /*
+ * Cache compounds containing these ops in the xid-based drc:
+ * We use the DRC for compounds containing non-idempotent
+ * operations, *except* those that are 4.1-specific (since
+ * sessions provide their own EOS), and except for stateful
+ * operations other than setclientid and setclientid_confirm
+ * (since sequence numbers provide EOS for open, lock, etc in
+ * the v4.0 case).
+ */
+ OP_CACHEME = 1 << 6,
+ /*
+ * These are ops which clear current state id.
+ */
+ OP_CLEAR_STATEID = 1 << 7,
+ /* Most ops return only an error on failure; some may do more: */
+ OP_NONTRIVIAL_ERROR_ENCODE = 1 << 8,
+};
+
+struct nfsd4_operation {
+ __be32 (*op_func)(struct svc_rqst *, struct nfsd4_compound_state *,
+ union nfsd4_op_u *);
+ void (*op_release)(union nfsd4_op_u *);
+ u32 op_flags;
+ char *op_name;
+ /* Try to get response size before operation */
+ u32 (*op_rsize_bop)(struct svc_rqst *, struct nfsd4_op *);
+ void (*op_get_currentstateid)(struct nfsd4_compound_state *,
+ union nfsd4_op_u *);
+ void (*op_set_currentstateid)(struct nfsd4_compound_state *,
+ union nfsd4_op_u *);
+};
+
+
#endif
/*
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index a3f8af9bd543..38f561b2dda3 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -99,7 +99,7 @@ struct svc_serv {
unsigned int sv_nrpools; /* number of thread pools */
struct svc_pool * sv_pools; /* array of thread pools */
- struct svc_serv_ops *sv_ops; /* server operations */
+ const struct svc_serv_ops *sv_ops; /* server operations */
#if defined(CONFIG_SUNRPC_BACKCHANNEL)
struct list_head sv_cb_list; /* queue for callback requests
* that arrive over the same
@@ -465,7 +465,7 @@ int svc_rpcb_setup(struct svc_serv *serv, struct net *net);
void svc_rpcb_cleanup(struct svc_serv *serv, struct net *net);
int svc_bind(struct svc_serv *serv, struct net *net);
struct svc_serv *svc_create(struct svc_program *, unsigned int,
- struct svc_serv_ops *);
+ const struct svc_serv_ops *);
struct svc_rqst *svc_rqst_alloc(struct svc_serv *serv,
struct svc_pool *pool, int node);
struct svc_rqst *svc_prepare_thread(struct svc_serv *serv,
@@ -475,7 +475,7 @@ void svc_exit_thread(struct svc_rqst *);
unsigned int svc_pool_map_get(void);
void svc_pool_map_put(void);
struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int,
- struct svc_serv_ops *);
+ const struct svc_serv_ops *);
int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int);
int svc_set_num_threads_sync(struct svc_serv *, struct svc_pool *, int);
int svc_pool_stats_open(struct svc_serv *serv, struct file *file);
diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index ddb7f94a9d06..6a2ad38f5458 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -31,7 +31,7 @@ struct svc_xprt_ops {
struct svc_xprt_class {
const char *xcl_name;
struct module *xcl_owner;
- struct svc_xprt_ops *xcl_ops;
+ const struct svc_xprt_ops *xcl_ops;
struct list_head xcl_list;
u32 xcl_max_payload;
int xcl_ident;
@@ -49,7 +49,7 @@ struct svc_xpt_user {
struct svc_xprt {
struct svc_xprt_class *xpt_class;
- struct svc_xprt_ops *xpt_ops;
+ const struct svc_xprt_ops *xpt_ops;
struct kref xpt_ref;
struct list_head xpt_list;
struct list_head xpt_ready;
diff --git a/include/rdma/rw.h b/include/rdma/rw.h
index 377d865e506d..a3cbbc7b6417 100644
--- a/include/rdma/rw.h
+++ b/include/rdma/rw.h
@@ -81,6 +81,8 @@ struct ib_send_wr *rdma_rw_ctx_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp,
int rdma_rw_ctx_post(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num,
struct ib_cqe *cqe, struct ib_send_wr *chain_wr);
+unsigned int rdma_rw_mr_factor(struct ib_device *device, u8 port_num,
+ unsigned int maxpages);
void rdma_rw_init_qp(struct ib_device *dev, struct ib_qp_init_attr *attr);
int rdma_rw_init_mrs(struct ib_qp *qp, struct ib_qp_init_attr *attr);
void rdma_rw_cleanup_mrs(struct ib_qp *qp);
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 85ce0db5b0a6..aa04666f929d 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -421,7 +421,7 @@ __svc_init_bc(struct svc_serv *serv)
*/
static struct svc_serv *
__svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
- struct svc_serv_ops *ops)
+ const struct svc_serv_ops *ops)
{
struct svc_serv *serv;
unsigned int vers;
@@ -486,7 +486,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
struct svc_serv *
svc_create(struct svc_program *prog, unsigned int bufsize,
- struct svc_serv_ops *ops)
+ const struct svc_serv_ops *ops)
{
return __svc_create(prog, bufsize, /*npools*/1, ops);
}
@@ -494,7 +494,7 @@ EXPORT_SYMBOL_GPL(svc_create);
struct svc_serv *
svc_create_pooled(struct svc_program *prog, unsigned int bufsize,
- struct svc_serv_ops *ops)
+ const struct svc_serv_ops *ops)
{
struct svc_serv *serv;
unsigned int npools = svc_pool_map_get();
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index e18500151236..399fab5d1936 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -693,7 +693,7 @@ static struct svc_xprt *svc_udp_create(struct svc_serv *serv,
return svc_create_socket(serv, IPPROTO_UDP, net, sa, salen, flags);
}
-static struct svc_xprt_ops svc_udp_ops = {
+static const struct svc_xprt_ops svc_udp_ops = {
.xpo_create = svc_udp_create,
.xpo_recvfrom = svc_udp_recvfrom,
.xpo_sendto = svc_udp_sendto,
@@ -1241,7 +1241,7 @@ static void svc_bc_tcp_sock_detach(struct svc_xprt *xprt)
{
}
-static struct svc_xprt_ops svc_tcp_bc_ops = {
+static const struct svc_xprt_ops svc_tcp_bc_ops = {
.xpo_create = svc_bc_tcp_create,
.xpo_detach = svc_bc_tcp_sock_detach,
.xpo_free = svc_bc_sock_free,
@@ -1275,7 +1275,7 @@ static void svc_cleanup_bc_xprt_sock(void)
}
#endif /* CONFIG_SUNRPC_BACKCHANNEL */
-static struct svc_xprt_ops svc_tcp_ops = {
+static const struct svc_xprt_ops svc_tcp_ops = {
.xpo_create = svc_tcp_create,
.xpo_recvfrom = svc_tcp_recvfrom,
.xpo_sendto = svc_tcp_sendto,
diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c
index 933f79bed270..7dcda4597057 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_rw.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c
@@ -660,19 +660,21 @@ out_initerr:
return -EIO;
}
+/* Walk the segments in the Read chunk starting at @p and construct
+ * RDMA Read operations to pull the chunk to the server.
+ */
static int svc_rdma_build_read_chunk(struct svc_rqst *rqstp,
struct svc_rdma_read_info *info,
__be32 *p)
{
int ret;
+ ret = -EINVAL;
info->ri_chunklen = 0;
- while (*p++ != xdr_zero) {
+ while (*p++ != xdr_zero && be32_to_cpup(p++) == info->ri_position) {
u32 rs_handle, rs_length;
u64 rs_offset;
- if (be32_to_cpup(p++) != info->ri_position)
- break;
rs_handle = be32_to_cpup(p++);
rs_length = be32_to_cpup(p++);
p = xdr_decode_hyper(p, &rs_offset);
@@ -689,78 +691,6 @@ static int svc_rdma_build_read_chunk(struct svc_rqst *rqstp,
return ret;
}
-/* If there is inline content following the Read chunk, append it to
- * the page list immediately following the data payload. This has to
- * be done after the reader function has determined how many pages
- * were consumed for RDMA Read.
- *
- * On entry, ri_pageno and ri_pageoff point directly to the end of the
- * page list. On exit, both have been updated to the new "next byte".
- *
- * Assumptions:
- * - Inline content fits entirely in rq_pages[0]
- * - Trailing content is only a handful of bytes
- */
-static int svc_rdma_copy_tail(struct svc_rqst *rqstp,
- struct svc_rdma_read_info *info)
-{
- struct svc_rdma_op_ctxt *head = info->ri_readctxt;
- unsigned int tail_length, remaining;
- u8 *srcp, *destp;
-
- /* Assert that all inline content fits in page 0. This is an
- * implementation limit, not a protocol limit.
- */
- if (head->arg.head[0].iov_len > PAGE_SIZE) {
- pr_warn_once("svcrdma: too much trailing inline content\n");
- return -EINVAL;
- }
-
- srcp = head->arg.head[0].iov_base;
- srcp += info->ri_position;
- tail_length = head->arg.head[0].iov_len - info->ri_position;
- remaining = tail_length;
-
- /* If there is room on the last page in the page list, try to
- * fit the trailing content there.
- */
- if (info->ri_pageoff > 0) {
- unsigned int len;
-
- len = min_t(unsigned int, remaining,
- PAGE_SIZE - info->ri_pageoff);
- destp = page_address(rqstp->rq_pages[info->ri_pageno]);
- destp += info->ri_pageoff;
-
- memcpy(destp, srcp, len);
- srcp += len;
- destp += len;
- info->ri_pageoff += len;
- remaining -= len;
-
- if (info->ri_pageoff == PAGE_SIZE) {
- info->ri_pageno++;
- info->ri_pageoff = 0;
- }
- }
-
- /* Otherwise, a fresh page is needed. */
- if (remaining) {
- head->arg.pages[info->ri_pageno] =
- rqstp->rq_pages[info->ri_pageno];
- head->count++;
-
- destp = page_address(rqstp->rq_pages[info->ri_pageno]);
- memcpy(destp, srcp, remaining);
- info->ri_pageoff += remaining;
- }
-
- head->arg.page_len += tail_length;
- head->arg.len += tail_length;
- head->arg.buflen += tail_length;
- return 0;
-}
-
/* Construct RDMA Reads to pull over a normal Read chunk. The chunk
* data lands in the page list of head->arg.pages.
*
@@ -785,34 +715,28 @@ static int svc_rdma_build_normal_read_chunk(struct svc_rqst *rqstp,
if (ret < 0)
goto out;
- /* Read chunk may need XDR round-up (see RFC 5666, s. 3.7).
+ /* Split the Receive buffer between the head and tail
+ * buffers at Read chunk's position. XDR roundup of the
+ * chunk is not included in either the pagelist or in
+ * the tail.
*/
- if (info->ri_chunklen & 3) {
- u32 padlen = 4 - (info->ri_chunklen & 3);
-
- info->ri_chunklen += padlen;
+ head->arg.tail[0].iov_base =
+ head->arg.head[0].iov_base + info->ri_position;
+ head->arg.tail[0].iov_len =
+ head->arg.head[0].iov_len - info->ri_position;
+ head->arg.head[0].iov_len = info->ri_position;
- /* NB: data payload always starts on XDR alignment,
- * thus the pad can never contain a page boundary.
- */
- info->ri_pageoff += padlen;
- if (info->ri_pageoff == PAGE_SIZE) {
- info->ri_pageno++;
- info->ri_pageoff = 0;
- }
- }
+ /* Read chunk may need XDR roundup (see RFC 5666, s. 3.7).
+ *
+ * NFSv2/3 write decoders need the length of the tail to
+ * contain the size of the roundup padding.
+ */
+ head->arg.tail[0].iov_len += 4 - (info->ri_chunklen & 3);
head->arg.page_len = info->ri_chunklen;
head->arg.len += info->ri_chunklen;
head->arg.buflen += info->ri_chunklen;
- if (info->ri_position < head->arg.head[0].iov_len) {
- ret = svc_rdma_copy_tail(rqstp, info);
- if (ret < 0)
- goto out;
- }
- head->arg.head[0].iov_len = info->ri_position;
-
out:
return ret;
}
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index e660d4965b18..5caf8e722a11 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -51,6 +51,7 @@
#include <linux/workqueue.h>
#include <rdma/ib_verbs.h>
#include <rdma/rdma_cm.h>
+#include <rdma/rw.h>
#include <linux/sunrpc/svc_rdma.h>
#include <linux/export.h>
#include "xprt_rdma.h"
@@ -70,7 +71,7 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt);
static int svc_rdma_secure_port(struct svc_rqst *);
static void svc_rdma_kill_temp_xprt(struct svc_xprt *);
-static struct svc_xprt_ops svc_rdma_ops = {
+static const struct svc_xprt_ops svc_rdma_ops = {
.xpo_create = svc_rdma_create,
.xpo_recvfrom = svc_rdma_recvfrom,
.xpo_sendto = svc_rdma_sendto,
@@ -98,7 +99,7 @@ static struct svc_xprt *svc_rdma_bc_create(struct svc_serv *, struct net *,
static void svc_rdma_bc_detach(struct svc_xprt *);
static void svc_rdma_bc_free(struct svc_xprt *);
-static struct svc_xprt_ops svc_rdma_bc_ops = {
+static const struct svc_xprt_ops svc_rdma_bc_ops = {
.xpo_create = svc_rdma_bc_create,
.xpo_detach = svc_rdma_bc_detach,
.xpo_free = svc_rdma_bc_free,
@@ -167,8 +168,8 @@ static bool svc_rdma_prealloc_ctxts(struct svcxprt_rdma *xprt)
{
unsigned int i;
- /* Each RPC/RDMA credit can consume a number of send
- * and receive WQEs. One ctxt is allocated for each.
+ /* Each RPC/RDMA credit can consume one Receive and
+ * one Send WQE at the same time.
*/
i = xprt->sc_sq_depth + xprt->sc_rq_depth;
@@ -713,7 +714,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
struct ib_qp_init_attr qp_attr;
struct ib_device *dev;
struct sockaddr *sap;
- unsigned int i;
+ unsigned int i, ctxts;
int ret = 0;
listen_rdma = container_of(xprt, struct svcxprt_rdma, sc_xprt);
@@ -742,14 +743,26 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
newxprt->sc_max_sge = min((size_t)dev->attrs.max_sge,
(size_t)RPCSVC_MAXPAGES);
newxprt->sc_max_req_size = svcrdma_max_req_size;
- newxprt->sc_max_requests = min_t(u32, dev->attrs.max_qp_wr,
- svcrdma_max_requests);
- newxprt->sc_fc_credits = cpu_to_be32(newxprt->sc_max_requests);
- newxprt->sc_max_bc_requests = min_t(u32, dev->attrs.max_qp_wr,
- svcrdma_max_bc_requests);
+ newxprt->sc_max_requests = svcrdma_max_requests;
+ newxprt->sc_max_bc_requests = svcrdma_max_bc_requests;
newxprt->sc_rq_depth = newxprt->sc_max_requests +
newxprt->sc_max_bc_requests;
- newxprt->sc_sq_depth = newxprt->sc_rq_depth;
+ if (newxprt->sc_rq_depth > dev->attrs.max_qp_wr) {
+ pr_warn("svcrdma: reducing receive depth to %d\n",
+ dev->attrs.max_qp_wr);
+ newxprt->sc_rq_depth = dev->attrs.max_qp_wr;
+ newxprt->sc_max_requests = newxprt->sc_rq_depth - 2;
+ newxprt->sc_max_bc_requests = 2;
+ }
+ newxprt->sc_fc_credits = cpu_to_be32(newxprt->sc_max_requests);
+ ctxts = rdma_rw_mr_factor(dev, newxprt->sc_port_num, RPCSVC_MAXPAGES);
+ ctxts *= newxprt->sc_max_requests;
+ newxprt->sc_sq_depth = newxprt->sc_rq_depth + ctxts;
+ if (newxprt->sc_sq_depth > dev->attrs.max_qp_wr) {
+ pr_warn("svcrdma: reducing send depth to %d\n",
+ dev->attrs.max_qp_wr);
+ newxprt->sc_sq_depth = dev->attrs.max_qp_wr;
+ }
atomic_set(&newxprt->sc_sq_avail, newxprt->sc_sq_depth);
if (!svc_rdma_prealloc_ctxts(newxprt))
@@ -784,8 +797,8 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
qp_attr.event_handler = qp_event_handler;
qp_attr.qp_context = &newxprt->sc_xprt;
qp_attr.port_num = newxprt->sc_port_num;
- qp_attr.cap.max_rdma_ctxs = newxprt->sc_max_requests;
- qp_attr.cap.max_send_wr = newxprt->sc_sq_depth;
+ qp_attr.cap.max_rdma_ctxs = ctxts;
+ qp_attr.cap.max_send_wr = newxprt->sc_sq_depth - ctxts;
qp_attr.cap.max_recv_wr = newxprt->sc_rq_depth;
qp_attr.cap.max_send_sge = newxprt->sc_max_sge;
qp_attr.cap.max_recv_sge = newxprt->sc_max_sge;
@@ -853,6 +866,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt)
dprintk(" remote address : %pIS:%u\n", sap, rpc_get_port(sap));
dprintk(" max_sge : %d\n", newxprt->sc_max_sge);
dprintk(" sq_depth : %d\n", newxprt->sc_sq_depth);
+ dprintk(" rdma_rw_ctxs : %d\n", ctxts);
dprintk(" max_requests : %d\n", newxprt->sc_max_requests);
dprintk(" ord : %d\n", newxprt->sc_ord);