From 0020939f20ce2881e51488e254fbb7a9b6c515f6 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sat, 6 May 2017 10:38:39 -0400 Subject: nfsd4: move some nfsd4 op definitions to xdr4.h I want code in nfs4xdr.c to have access to this stuff. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 43 ------------------------------------------- fs/nfsd/xdr4.h | 44 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 43 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index d27e75ad25e3..9a627a6c52aa 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1541,49 +1541,6 @@ static inline void nfsd4_increment_op_stats(u32 opnum) nfsdstats.nfs4_opcount[opnum]++; } -enum nfsd4_op_flags { - ALLOWED_WITHOUT_FH = 1 << 0, /* No current filehandle required */ - ALLOWED_ON_ABSENT_FS = 1 << 1, /* ops processed on absent fs */ - ALLOWED_AS_FIRST_OP = 1 << 2, /* ops reqired first in compound */ - /* For rfc 5661 section 2.6.3.1.1: */ - OP_HANDLES_WRONGSEC = 1 << 3, - OP_IS_PUTFH_LIKE = 1 << 4, - /* - * These are the ops whose result size we estimate before - * encoding, to avoid performing an op then not being able to - * respond or cache a response. This includes writes and setattrs - * as well as the operations usually called "nonidempotent": - */ - OP_MODIFIES_SOMETHING = 1 << 5, - /* - * Cache compounds containing these ops in the xid-based drc: - * We use the DRC for compounds containing non-idempotent - * operations, *except* those that are 4.1-specific (since - * sessions provide their own EOS), and except for stateful - * operations other than setclientid and setclientid_confirm - * (since sequence numbers provide EOS for open, lock, etc in - * the v4.0 case). - */ - OP_CACHEME = 1 << 6, - /* - * These are ops which clear current state id. - */ - OP_CLEAR_STATEID = 1 << 7, -}; - -struct nfsd4_operation { - __be32 (*op_func)(struct svc_rqst *, struct nfsd4_compound_state *, - union nfsd4_op_u *); - u32 op_flags; - char *op_name; - /* Try to get response size before operation */ - u32 (*op_rsize_bop)(struct svc_rqst *, struct nfsd4_op *); - void (*op_get_currentstateid)(struct nfsd4_compound_state *, - union nfsd4_op_u *); - void (*op_set_currentstateid)(struct nfsd4_compound_state *, - union nfsd4_op_u *); -}; - static const struct nfsd4_operation nfsd4_ops[]; static const char *nfsd4_op_name(unsigned opnum); diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 72c6ad136107..fcdfe1c3ff4f 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -748,6 +748,50 @@ extern __be32 nfsd4_free_stateid(struct svc_rqst *rqstp, struct nfsd4_compound_state *, union nfsd4_op_u *); extern void nfsd4_bump_seqid(struct nfsd4_compound_state *, __be32 nfserr); +enum nfsd4_op_flags { + ALLOWED_WITHOUT_FH = 1 << 0, /* No current filehandle required */ + ALLOWED_ON_ABSENT_FS = 1 << 1, /* ops processed on absent fs */ + ALLOWED_AS_FIRST_OP = 1 << 2, /* ops reqired first in compound */ + /* For rfc 5661 section 2.6.3.1.1: */ + OP_HANDLES_WRONGSEC = 1 << 3, + OP_IS_PUTFH_LIKE = 1 << 4, + /* + * These are the ops whose result size we estimate before + * encoding, to avoid performing an op then not being able to + * respond or cache a response. This includes writes and setattrs + * as well as the operations usually called "nonidempotent": + */ + OP_MODIFIES_SOMETHING = 1 << 5, + /* + * Cache compounds containing these ops in the xid-based drc: + * We use the DRC for compounds containing non-idempotent + * operations, *except* those that are 4.1-specific (since + * sessions provide their own EOS), and except for stateful + * operations other than setclientid and setclientid_confirm + * (since sequence numbers provide EOS for open, lock, etc in + * the v4.0 case). + */ + OP_CACHEME = 1 << 6, + /* + * These are ops which clear current state id. + */ + OP_CLEAR_STATEID = 1 << 7, +}; + +struct nfsd4_operation { + __be32 (*op_func)(struct svc_rqst *, struct nfsd4_compound_state *, + union nfsd4_op_u *); + u32 op_flags; + char *op_name; + /* Try to get response size before operation */ + u32 (*op_rsize_bop)(struct svc_rqst *, struct nfsd4_op *); + void (*op_get_currentstateid)(struct nfsd4_compound_state *, + union nfsd4_op_u *); + void (*op_set_currentstateid)(struct nfsd4_compound_state *, + union nfsd4_op_u *); +}; + + #endif /* -- cgit v1.2.3 From f4f9ef4a1b0a1ca80b152e28e176d69515bdf7e8 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 6 Jul 2017 17:51:29 -0400 Subject: nfsd4: opdesc will be useful outside nfs4proc.c Trivial cleanup, no change in behavior. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 25 +++++++++++-------------- fs/nfsd/nfs4xdr.c | 1 + fs/nfsd/xdr4.h | 2 ++ 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 9a627a6c52aa..01d7f2456f62 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -1578,7 +1578,7 @@ static __be32 nfs41_check_op_ordering(struct nfsd4_compoundargs *args) return nfs_ok; } -static inline const struct nfsd4_operation *OPDESC(struct nfsd4_op *op) +const struct nfsd4_operation *OPDESC(struct nfsd4_op *op) { return &nfsd4_ops[op->opnum]; } @@ -1651,7 +1651,6 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) struct nfsd4_compoundargs *args = rqstp->rq_argp; struct nfsd4_compoundres *resp = rqstp->rq_resp; struct nfsd4_op *op; - const struct nfsd4_operation *opdesc; struct nfsd4_compound_state *cstate = &resp->cstate; struct svc_fh *current_fh = &cstate->current_fh; struct svc_fh *save_fh = &cstate->save_fh; @@ -1704,15 +1703,13 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) goto encode_op; } - opdesc = OPDESC(op); - if (!current_fh->fh_dentry) { - if (!(opdesc->op_flags & ALLOWED_WITHOUT_FH)) { + if (!(op->opdesc->op_flags & ALLOWED_WITHOUT_FH)) { op->status = nfserr_nofilehandle; goto encode_op; } } else if (current_fh->fh_export->ex_fslocs.migrated && - !(opdesc->op_flags & ALLOWED_ON_ABSENT_FS)) { + !(op->opdesc->op_flags & ALLOWED_ON_ABSENT_FS)) { op->status = nfserr_moved; goto encode_op; } @@ -1720,12 +1717,12 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) fh_clear_wcc(current_fh); /* If op is non-idempotent */ - if (opdesc->op_flags & OP_MODIFIES_SOMETHING) { + if (op->opdesc->op_flags & OP_MODIFIES_SOMETHING) { /* * Don't execute this op if we couldn't encode a * succesful reply: */ - u32 plen = opdesc->op_rsize_bop(rqstp, op); + u32 plen = op->opdesc->op_rsize_bop(rqstp, op); /* * Plus if there's another operation, make sure * we'll have space to at least encode an error: @@ -1738,9 +1735,9 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) if (op->status) goto encode_op; - if (opdesc->op_get_currentstateid) - opdesc->op_get_currentstateid(cstate, &op->u); - op->status = opdesc->op_func(rqstp, cstate, &op->u); + if (op->opdesc->op_get_currentstateid) + op->opdesc->op_get_currentstateid(cstate, &op->u); + op->status = op->opdesc->op_func(rqstp, cstate, &op->u); /* Only from SEQUENCE */ if (cstate->status == nfserr_replay_cache) { @@ -1749,10 +1746,10 @@ nfsd4_proc_compound(struct svc_rqst *rqstp) goto out; } if (!op->status) { - if (opdesc->op_set_currentstateid) - opdesc->op_set_currentstateid(cstate, &op->u); + if (op->opdesc->op_set_currentstateid) + op->opdesc->op_set_currentstateid(cstate, &op->u); - if (opdesc->op_flags & OP_CLEAR_STATEID) + if (op->opdesc->op_flags & OP_CLEAR_STATEID) clear_current_stateid(cstate); if (need_wrongsec_check(rqstp)) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 20fbcab97753..40ed23fda814 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1929,6 +1929,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp) op->opnum = OP_ILLEGAL; op->status = nfserr_op_illegal; } + op->opdesc = OPDESC(op); /* * We'll try to cache the result in the DRC if any one * op in the compound wants to be cached: diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index fcdfe1c3ff4f..90b928006bc7 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -538,6 +538,7 @@ struct nfsd4_seek { struct nfsd4_op { int opnum; + const struct nfsd4_operation * opdesc; __be32 status; union nfsd4_op_u { struct nfsd4_access access; @@ -661,6 +662,7 @@ static inline bool nfsd4_last_compound_op(struct svc_rqst *rqstp) return argp->opcnt == resp->opcnt; } +const struct nfsd4_operation *OPDESC(struct nfsd4_op *op); int nfsd4_max_reply(struct svc_rqst *rqstp, struct nfsd4_op *op); void warn_on_nonidempotent_op(struct nfsd4_op *op); -- cgit v1.2.3 From 34b1744c91ccd44811005822106945fa80ecbff2 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Fri, 5 May 2017 17:09:37 -0400 Subject: nfsd4: define ->op_release for compound ops Run a separate ->op_release function if necessary instead of depending on the xdr encoder to do this. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 32 ++++++++++++++++++++++++++++++++ fs/nfsd/nfs4xdr.c | 9 +++------ fs/nfsd/xdr4.h | 1 + 3 files changed, 36 insertions(+), 6 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 01d7f2456f62..ac53ae2435c5 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -784,6 +784,14 @@ out: return status; } + +static void +nfsd4_read_release(union nfsd4_op_u *u) +{ + if (u->read.rd_filp) + fput(u->read.rd_filp); +} + static __be32 nfsd4_readdir(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) @@ -912,6 +920,13 @@ nfsd4_secinfo_no_name(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstat return nfs_ok; } +static void +nfsd4_secinfo_release(union nfsd4_op_u *u) +{ + if (u->secinfo.si_exp) + exp_put(u->secinfo.si_exp); +} + static __be32 nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) @@ -1335,6 +1350,12 @@ out: return nfserr; } +static void +nfsd4_getdeviceinfo_release(union nfsd4_op_u *u) +{ + kfree(u->getdeviceinfo.gd_device); +} + static __be32 nfsd4_layoutget(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) @@ -1415,6 +1436,12 @@ out: return nfserr; } +static void +nfsd4_layoutget_release(union nfsd4_op_u *u) +{ + kfree(u->layoutget.lg_content); +} + static __be32 nfsd4_layoutcommit(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, union nfsd4_op_u *u) @@ -2192,6 +2219,7 @@ static const struct nfsd4_operation nfsd4_ops[] = { }, [OP_READ] = { .op_func = nfsd4_read, + .op_release = nfsd4_read_release, .op_name = "OP_READ", .op_rsize_bop = nfsd4_read_rsize, .op_get_currentstateid = nfsd4_get_readstateid, @@ -2241,6 +2269,7 @@ static const struct nfsd4_operation nfsd4_ops[] = { }, [OP_SECINFO] = { .op_func = nfsd4_secinfo, + .op_release = nfsd4_secinfo_release, .op_flags = OP_HANDLES_WRONGSEC, .op_name = "OP_SECINFO", .op_rsize_bop = nfsd4_secinfo_rsize, @@ -2342,6 +2371,7 @@ static const struct nfsd4_operation nfsd4_ops[] = { }, [OP_SECINFO_NO_NAME] = { .op_func = nfsd4_secinfo_no_name, + .op_release = nfsd4_secinfo_release, .op_flags = OP_HANDLES_WRONGSEC, .op_name = "OP_SECINFO_NO_NAME", .op_rsize_bop = nfsd4_secinfo_rsize, @@ -2362,12 +2392,14 @@ static const struct nfsd4_operation nfsd4_ops[] = { #ifdef CONFIG_NFSD_PNFS [OP_GETDEVICEINFO] = { .op_func = nfsd4_getdeviceinfo, + .op_release = nfsd4_getdeviceinfo_release, .op_flags = ALLOWED_WITHOUT_FH, .op_name = "OP_GETDEVICEINFO", .op_rsize_bop = nfsd4_getdeviceinfo_rsize, }, [OP_LAYOUTGET] = { .op_func = nfsd4_layoutget, + .op_release = nfsd4_layoutget_release, .op_flags = OP_MODIFIES_SOMETHING, .op_name = "OP_LAYOUTGET", .op_rsize_bop = nfsd4_layoutget_rsize, diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 40ed23fda814..7d683e3aebf0 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3593,8 +3593,6 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, xdr_truncate_encode(xdr, starting_len); out: - if (file) - fput(file); return nfserr; } @@ -3838,8 +3836,6 @@ nfsd4_do_encode_secinfo(struct xdr_stream *xdr, *flavorsp = htonl(supported); nfserr = 0; out: - if (exp) - exp_put(exp); return nfserr; } @@ -4172,7 +4168,6 @@ nfsd4_encode_getdeviceinfo(struct nfsd4_compoundres *resp, __be32 nfserr, nfserr = 0; out: - kfree(gdev->gd_device); dprintk("%s: done: %d\n", __func__, be32_to_cpu(nfserr)); return nfserr; @@ -4221,7 +4216,6 @@ nfsd4_encode_layoutget(struct nfsd4_compoundres *resp, __be32 nfserr, ops = nfsd4_layout_ops[lgp->lg_layout_type]; nfserr = ops->encode_layoutget(xdr, lgp); out: - kfree(lgp->lg_content); return nfserr; } @@ -4452,6 +4446,7 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) struct xdr_stream *xdr = &resp->xdr; struct nfs4_stateowner *so = resp->cstate.replay_owner; struct svc_rqst *rqstp = resp->rqstp; + const struct nfsd4_operation *opdesc = op->opdesc; int post_err_offset; nfsd4_enc encoder; __be32 *p; @@ -4470,6 +4465,8 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) !nfsd4_enc_ops[op->opnum]); encoder = nfsd4_enc_ops[op->opnum]; op->status = encoder(resp, op->status, &op->u); + if (opdesc && opdesc->op_release) + opdesc->op_release(&op->u); xdr_commit_encode(xdr); /* nfsd4_check_resp_size guarantees enough room for error status */ diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 90b928006bc7..1e6274e0e066 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -783,6 +783,7 @@ enum nfsd4_op_flags { struct nfsd4_operation { __be32 (*op_func)(struct svc_rqst *, struct nfsd4_compound_state *, union nfsd4_op_u *); + void (*op_release)(union nfsd4_op_u *); u32 op_flags; char *op_name; /* Try to get response size before operation */ -- cgit v1.2.3 From b7571e4cd39ae860379138b883bc5f57a8553184 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sat, 6 May 2017 10:49:21 -0400 Subject: nfsd4: skip encoder in trivial error cases Most encoders do nothing in the error case. But they can still screw things up in that case: most errors happen very early in rpc processing, possibly before argument fields are filled in and bounds-tested, so encoders that do anything other than immediately bail on error can easily crash in odd error cases. So just handle errors centrally most of the time to remove the chance of error. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4proc.c | 10 +++++++--- fs/nfsd/nfs4xdr.c | 3 +++ fs/nfsd/xdr4.h | 2 ++ 3 files changed, 12 insertions(+), 3 deletions(-) diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index ac53ae2435c5..3c69db7d4905 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -2141,13 +2141,15 @@ static const struct nfsd4_operation nfsd4_ops[] = { }, [OP_LOCK] = { .op_func = nfsd4_lock, - .op_flags = OP_MODIFIES_SOMETHING, + .op_flags = OP_MODIFIES_SOMETHING | + OP_NONTRIVIAL_ERROR_ENCODE, .op_name = "OP_LOCK", .op_rsize_bop = nfsd4_lock_rsize, .op_set_currentstateid = nfsd4_set_lockstateid, }, [OP_LOCKT] = { .op_func = nfsd4_lockt, + .op_flags = OP_NONTRIVIAL_ERROR_ENCODE, .op_name = "OP_LOCKT", .op_rsize_bop = nfsd4_lock_rsize, }, @@ -2277,14 +2279,16 @@ static const struct nfsd4_operation nfsd4_ops[] = { [OP_SETATTR] = { .op_func = nfsd4_setattr, .op_name = "OP_SETATTR", - .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME, + .op_flags = OP_MODIFIES_SOMETHING | OP_CACHEME + | OP_NONTRIVIAL_ERROR_ENCODE, .op_rsize_bop = nfsd4_setattr_rsize, .op_get_currentstateid = nfsd4_get_setattrstateid, }, [OP_SETCLIENTID] = { .op_func = nfsd4_setclientid, .op_flags = ALLOWED_WITHOUT_FH | ALLOWED_ON_ABSENT_FS - | OP_MODIFIES_SOMETHING | OP_CACHEME, + | OP_MODIFIES_SOMETHING | OP_CACHEME + | OP_NONTRIVIAL_ERROR_ENCODE, .op_name = "OP_SETCLIENTID", .op_rsize_bop = nfsd4_setclientid_rsize, }, diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 7d683e3aebf0..08badcf64554 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -4461,6 +4461,9 @@ nfsd4_encode_operation(struct nfsd4_compoundres *resp, struct nfsd4_op *op) if (op->opnum == OP_ILLEGAL) goto status; + if (op->status && opdesc && + !(opdesc->op_flags & OP_NONTRIVIAL_ERROR_ENCODE)) + goto status; BUG_ON(op->opnum < 0 || op->opnum >= ARRAY_SIZE(nfsd4_enc_ops) || !nfsd4_enc_ops[op->opnum]); encoder = nfsd4_enc_ops[op->opnum]; diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index 1e6274e0e066..f8a0b6549a88 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -778,6 +778,8 @@ enum nfsd4_op_flags { * These are ops which clear current state id. */ OP_CLEAR_STATEID = 1 << 7, + /* Most ops return only an error on failure; some may do more: */ + OP_NONTRIVIAL_ERROR_ENCODE = 1 << 8, }; struct nfsd4_operation { -- cgit v1.2.3 From bac966d6065207b6900a4c2a3e6e5f78cfee7b4d Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Sat, 6 May 2017 11:12:04 -0400 Subject: nfsd4: individual encoders no longer see error cases With a few exceptions, most individual encoders don't handle error cases. Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4xdr.c | 309 ++++++++++++++++++------------------------------------ 1 file changed, 103 insertions(+), 206 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 08badcf64554..18b9c2fad29a 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3105,14 +3105,12 @@ nfsd4_encode_access(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ struct xdr_stream *xdr = &resp->xdr; __be32 *p; - if (!nfserr) { - p = xdr_reserve_space(xdr, 8); - if (!p) - return nfserr_resource; - *p++ = cpu_to_be32(access->ac_supported); - *p++ = cpu_to_be32(access->ac_resp_access); - } - return nfserr; + p = xdr_reserve_space(xdr, 8); + if (!p) + return nfserr_resource; + *p++ = cpu_to_be32(access->ac_supported); + *p++ = cpu_to_be32(access->ac_resp_access); + return 0; } static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_bind_conn_to_session *bcts) @@ -3120,17 +3118,15 @@ static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp, struct xdr_stream *xdr = &resp->xdr; __be32 *p; - if (!nfserr) { - p = xdr_reserve_space(xdr, NFS4_MAX_SESSIONID_LEN + 8); - if (!p) - return nfserr_resource; - p = xdr_encode_opaque_fixed(p, bcts->sessionid.data, - NFS4_MAX_SESSIONID_LEN); - *p++ = cpu_to_be32(bcts->dir); - /* Upshifting from TCP to RDMA is not supported */ - *p++ = cpu_to_be32(0); - } - return nfserr; + p = xdr_reserve_space(xdr, NFS4_MAX_SESSIONID_LEN + 8); + if (!p) + return nfserr_resource; + p = xdr_encode_opaque_fixed(p, bcts->sessionid.data, + NFS4_MAX_SESSIONID_LEN); + *p++ = cpu_to_be32(bcts->dir); + /* Upshifting from TCP to RDMA is not supported */ + *p++ = cpu_to_be32(0); + return 0; } static __be32 @@ -3138,10 +3134,7 @@ nfsd4_encode_close(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_c { struct xdr_stream *xdr = &resp->xdr; - if (!nfserr) - nfserr = nfsd4_encode_stateid(xdr, &close->cl_stateid); - - return nfserr; + return nfsd4_encode_stateid(xdr, &close->cl_stateid); } @@ -3151,14 +3144,12 @@ nfsd4_encode_commit(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ struct xdr_stream *xdr = &resp->xdr; __be32 *p; - if (!nfserr) { - p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE); - if (!p) - return nfserr_resource; - p = xdr_encode_opaque_fixed(p, commit->co_verf.data, + p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE); + if (!p) + return nfserr_resource; + p = xdr_encode_opaque_fixed(p, commit->co_verf.data, NFS4_VERIFIER_SIZE); - } - return nfserr; + return 0; } static __be32 @@ -3167,15 +3158,13 @@ nfsd4_encode_create(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ struct xdr_stream *xdr = &resp->xdr; __be32 *p; - if (!nfserr) { - p = xdr_reserve_space(xdr, 20); - if (!p) - return nfserr_resource; - encode_cinfo(p, &create->cr_cinfo); - nfserr = nfsd4_encode_bitmap(xdr, create->cr_bmval[0], - create->cr_bmval[1], create->cr_bmval[2]); - } - return nfserr; + p = xdr_reserve_space(xdr, 20); + if (!p) + return nfserr_resource; + encode_cinfo(p, &create->cr_cinfo); + nfserr = nfsd4_encode_bitmap(xdr, create->cr_bmval[0], + create->cr_bmval[1], create->cr_bmval[2]); + return 0; } static __be32 @@ -3184,13 +3173,8 @@ nfsd4_encode_getattr(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 struct svc_fh *fhp = getattr->ga_fhp; struct xdr_stream *xdr = &resp->xdr; - if (nfserr) - return nfserr; - - nfserr = nfsd4_encode_fattr(xdr, fhp, fhp->fh_export, fhp->fh_dentry, - getattr->ga_bmval, - resp->rqstp, 0); - return nfserr; + return nfsd4_encode_fattr(xdr, fhp, fhp->fh_export, fhp->fh_dentry, + getattr->ga_bmval, resp->rqstp, 0); } static __be32 @@ -3201,14 +3185,12 @@ nfsd4_encode_getfh(struct nfsd4_compoundres *resp, __be32 nfserr, struct svc_fh unsigned int len; __be32 *p; - if (!nfserr) { - len = fhp->fh_handle.fh_size; - p = xdr_reserve_space(xdr, len + 4); - if (!p) - return nfserr_resource; - p = xdr_encode_opaque(p, &fhp->fh_handle.fh_base, len); - } - return nfserr; + len = fhp->fh_handle.fh_size; + p = xdr_reserve_space(xdr, len + 4); + if (!p) + return nfserr_resource; + p = xdr_encode_opaque(p, &fhp->fh_handle.fh_base, len); + return 0; } /* @@ -3278,10 +3260,7 @@ nfsd4_encode_locku(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_l { struct xdr_stream *xdr = &resp->xdr; - if (!nfserr) - nfserr = nfsd4_encode_stateid(xdr, &locku->lu_stateid); - - return nfserr; + return nfsd4_encode_stateid(xdr, &locku->lu_stateid); } @@ -3291,13 +3270,11 @@ nfsd4_encode_link(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_li struct xdr_stream *xdr = &resp->xdr; __be32 *p; - if (!nfserr) { - p = xdr_reserve_space(xdr, 20); - if (!p) - return nfserr_resource; - p = encode_cinfo(p, &link->li_cinfo); - } - return nfserr; + p = xdr_reserve_space(xdr, 20); + if (!p) + return nfserr_resource; + p = encode_cinfo(p, &link->li_cinfo); + return 0; } @@ -3307,12 +3284,9 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op struct xdr_stream *xdr = &resp->xdr; __be32 *p; - if (nfserr) - goto out; - nfserr = nfsd4_encode_stateid(xdr, &open->op_stateid); if (nfserr) - goto out; + return nfserr; p = xdr_reserve_space(xdr, 24); if (!p) return nfserr_resource; @@ -3322,7 +3296,7 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op nfserr = nfsd4_encode_bitmap(xdr, open->op_bmval[0], open->op_bmval[1], open->op_bmval[2]); if (nfserr) - goto out; + return nfserr; p = xdr_reserve_space(xdr, 4); if (!p) @@ -3395,8 +3369,7 @@ nfsd4_encode_open(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_op BUG(); } /* XXX save filehandle here */ -out: - return nfserr; + return 0; } static __be32 @@ -3404,10 +3377,7 @@ nfsd4_encode_open_confirm(struct nfsd4_compoundres *resp, __be32 nfserr, struct { struct xdr_stream *xdr = &resp->xdr; - if (!nfserr) - nfserr = nfsd4_encode_stateid(xdr, &oc->oc_resp_stateid); - - return nfserr; + return nfsd4_encode_stateid(xdr, &oc->oc_resp_stateid); } static __be32 @@ -3415,10 +3385,7 @@ nfsd4_encode_open_downgrade(struct nfsd4_compoundres *resp, __be32 nfserr, struc { struct xdr_stream *xdr = &resp->xdr; - if (!nfserr) - nfserr = nfsd4_encode_stateid(xdr, &od->od_stateid); - - return nfserr; + return nfsd4_encode_stateid(xdr, &od->od_stateid); } static __be32 nfsd4_encode_splice_read( @@ -3555,20 +3522,15 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, struct raparms *ra = NULL; __be32 *p; - if (nfserr) - goto out; - p = xdr_reserve_space(xdr, 8); /* eof flag and byte count */ if (!p) { WARN_ON_ONCE(test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags)); - nfserr = nfserr_resource; - goto out; + return nfserr_resource; } if (resp->xdr.buf->page_len && test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags)) { WARN_ON_ONCE(1); - nfserr = nfserr_resource; - goto out; + return nfserr_resource; } xdr_commit_encode(xdr); @@ -3592,7 +3554,6 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr, if (nfserr) xdr_truncate_encode(xdr, starting_len); -out: return nfserr; } @@ -3606,9 +3567,6 @@ nfsd4_encode_readlink(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd int length_offset = xdr->buf->len; __be32 *p; - if (nfserr) - return nfserr; - p = xdr_reserve_space(xdr, 4); if (!p) return nfserr_resource; @@ -3652,9 +3610,6 @@ nfsd4_encode_readdir(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4 int starting_len = xdr->buf->len; __be32 *p; - if (nfserr) - return nfserr; - p = xdr_reserve_space(xdr, NFS4_VERIFIER_SIZE); if (!p) return nfserr_resource; @@ -3740,13 +3695,11 @@ nfsd4_encode_remove(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ struct xdr_stream *xdr = &resp->xdr; __be32 *p; - if (!nfserr) { - p = xdr_reserve_space(xdr, 20); - if (!p) - return nfserr_resource; - p = encode_cinfo(p, &remove->rm_cinfo); - } - return nfserr; + p = xdr_reserve_space(xdr, 20); + if (!p) + return nfserr_resource; + p = encode_cinfo(p, &remove->rm_cinfo); + return 0; } static __be32 @@ -3755,19 +3708,16 @@ nfsd4_encode_rename(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_ struct xdr_stream *xdr = &resp->xdr; __be32 *p; - if (!nfserr) { - p = xdr_reserve_space(xdr, 40); - if (!p) - return nfserr_resource; - p = encode_cinfo(p, &rename->rn_sinfo); - p = encode_cinfo(p, &rename->rn_tinfo); - } - return nfserr; + p = xdr_reserve_space(xdr, 40); + if (!p) + return nfserr_resource; + p = encode_cinfo(p, &rename->rn_sinfo); + p = encode_cinfo(p, &rename->rn_tinfo); + return 0; } static __be32 -nfsd4_do_encode_secinfo(struct xdr_stream *xdr, - __be32 nfserr, struct svc_export *exp) +nfsd4_do_encode_secinfo(struct xdr_stream *xdr, struct svc_export *exp) { u32 i, nflavs, supported; struct exp_flavor_info *flavs; @@ -3775,9 +3725,6 @@ nfsd4_do_encode_secinfo(struct xdr_stream *xdr, __be32 *p, *flavorsp; static bool report = true; - if (nfserr) - goto out; - nfserr = nfserr_resource; if (exp->ex_nflavors) { flavs = exp->ex_flavors; nflavs = exp->ex_nflavors; @@ -3801,7 +3748,7 @@ nfsd4_do_encode_secinfo(struct xdr_stream *xdr, supported = 0; p = xdr_reserve_space(xdr, 4); if (!p) - goto out; + return nfserr_resource; flavorsp = p++; /* to be backfilled later */ for (i = 0; i < nflavs; i++) { @@ -3813,7 +3760,7 @@ nfsd4_do_encode_secinfo(struct xdr_stream *xdr, p = xdr_reserve_space(xdr, 4 + 4 + XDR_LEN(info.oid.len) + 4 + 4); if (!p) - goto out; + return nfserr_resource; *p++ = cpu_to_be32(RPC_AUTH_GSS); p = xdr_encode_opaque(p, info.oid.data, info.oid.len); *p++ = cpu_to_be32(info.qop); @@ -3822,7 +3769,7 @@ nfsd4_do_encode_secinfo(struct xdr_stream *xdr, supported++; p = xdr_reserve_space(xdr, 4); if (!p) - goto out; + return nfserr_resource; *p++ = cpu_to_be32(pf); } else { if (report) @@ -3834,9 +3781,7 @@ nfsd4_do_encode_secinfo(struct xdr_stream *xdr, if (nflavs != supported) report = false; *flavorsp = htonl(supported); - nfserr = 0; -out: - return nfserr; + return 0; } static __be32 @@ -3845,7 +3790,7 @@ nfsd4_encode_secinfo(struct nfsd4_compoundres *resp, __be32 nfserr, { struct xdr_stream *xdr = &resp->xdr; - return nfsd4_do_encode_secinfo(xdr, nfserr, secinfo->si_exp); + return nfsd4_do_encode_secinfo(xdr, secinfo->si_exp); } static __be32 @@ -3854,7 +3799,7 @@ nfsd4_encode_secinfo_no_name(struct nfsd4_compoundres *resp, __be32 nfserr, { struct xdr_stream *xdr = &resp->xdr; - return nfsd4_do_encode_secinfo(xdr, nfserr, secinfo->sin_exp); + return nfsd4_do_encode_secinfo(xdr, secinfo->sin_exp); } /* @@ -3915,16 +3860,14 @@ nfsd4_encode_write(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_w struct xdr_stream *xdr = &resp->xdr; __be32 *p; - if (!nfserr) { - p = xdr_reserve_space(xdr, 16); - if (!p) - return nfserr_resource; - *p++ = cpu_to_be32(write->wr_bytes_written); - *p++ = cpu_to_be32(write->wr_how_written); - p = xdr_encode_opaque_fixed(p, write->wr_verifier.data, - NFS4_VERIFIER_SIZE); - } - return nfserr; + p = xdr_reserve_space(xdr, 16); + if (!p) + return nfserr_resource; + *p++ = cpu_to_be32(write->wr_bytes_written); + *p++ = cpu_to_be32(write->wr_how_written); + p = xdr_encode_opaque_fixed(p, write->wr_verifier.data, + NFS4_VERIFIER_SIZE); + return 0; } static __be32 @@ -3937,12 +3880,8 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, char *server_scope; int major_id_sz; int server_scope_sz; - int status = 0; uint64_t minor_id = 0; - if (nfserr) - return nfserr; - major_id = utsname()->nodename; major_id_sz = strlen(major_id); server_scope = utsname()->nodename; @@ -3967,19 +3906,19 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, break; case SP4_MACH_CRED: /* spo_must_enforce bitmap: */ - status = nfsd4_encode_bitmap(xdr, + nfserr = nfsd4_encode_bitmap(xdr, exid->spo_must_enforce[0], exid->spo_must_enforce[1], exid->spo_must_enforce[2]); - if (status) - goto out; + if (nfserr) + return nfserr; /* spo_must_allow bitmap: */ - status = nfsd4_encode_bitmap(xdr, + nfserr = nfsd4_encode_bitmap(xdr, exid->spo_must_allow[0], exid->spo_must_allow[1], exid->spo_must_allow[2]); - if (status) - goto out; + if (nfserr) + return nfserr; break; default: WARN_ON_ONCE(1); @@ -4006,8 +3945,6 @@ nfsd4_encode_exchange_id(struct nfsd4_compoundres *resp, __be32 nfserr, /* Implementation id */ *p++ = cpu_to_be32(0); /* zero length nfs_impl_id4 array */ return 0; -out: - return status; } static __be32 @@ -4017,9 +3954,6 @@ nfsd4_encode_create_session(struct nfsd4_compoundres *resp, __be32 nfserr, struct xdr_stream *xdr = &resp->xdr; __be32 *p; - if (nfserr) - return nfserr; - p = xdr_reserve_space(xdr, 24); if (!p) return nfserr_resource; @@ -4073,9 +4007,6 @@ nfsd4_encode_sequence(struct nfsd4_compoundres *resp, __be32 nfserr, struct xdr_stream *xdr = &resp->xdr; __be32 *p; - if (nfserr) - return nfserr; - p = xdr_reserve_space(xdr, NFS4_MAX_SESSIONID_LEN + 20); if (!p) return nfserr_resource; @@ -4100,9 +4031,6 @@ nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr, struct nfsd4_test_stateid_id *stateid, *next; __be32 *p; - if (nfserr) - return nfserr; - p = xdr_reserve_space(xdr, 4 + (4 * test_stateid->ts_num_ids)); if (!p) return nfserr_resource; @@ -4112,7 +4040,7 @@ nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr, *p++ = stateid->ts_id_status; } - return nfserr; + return 0; } #ifdef CONFIG_NFSD_PNFS @@ -4125,14 +4053,9 @@ nfsd4_encode_getdeviceinfo(struct nfsd4_compoundres *resp, __be32 nfserr, u32 starting_len = xdr->buf->len, needed_len; __be32 *p; - dprintk("%s: err %d\n", __func__, be32_to_cpu(nfserr)); - if (nfserr) - goto out; - - nfserr = nfserr_resource; p = xdr_reserve_space(xdr, 4); if (!p) - goto out; + return nfserr_resource; *p++ = cpu_to_be32(gdev->gd_layout_type); @@ -4148,41 +4071,33 @@ nfsd4_encode_getdeviceinfo(struct nfsd4_compoundres *resp, __be32 nfserr, */ if (xdr->buf->len + 4 > gdev->gd_maxcount) goto toosmall; - goto out; + return nfserr; } } - nfserr = nfserr_resource; if (gdev->gd_notify_types) { p = xdr_reserve_space(xdr, 4 + 4); if (!p) - goto out; + return nfserr_resource; *p++ = cpu_to_be32(1); /* bitmap length */ *p++ = cpu_to_be32(gdev->gd_notify_types); } else { p = xdr_reserve_space(xdr, 4); if (!p) - goto out; + return nfserr_resource; *p++ = 0; } - nfserr = 0; -out: - dprintk("%s: done: %d\n", __func__, be32_to_cpu(nfserr)); - return nfserr; - + return 0; toosmall: dprintk("%s: maxcount too small\n", __func__); needed_len = xdr->buf->len + 4 /* notifications */; xdr_truncate_encode(xdr, starting_len); p = xdr_reserve_space(xdr, 4); - if (!p) { - nfserr = nfserr_resource; - } else { - *p++ = cpu_to_be32(needed_len); - nfserr = nfserr_toosmall; - } - goto out; + if (!p) + return nfserr_resource; + *p++ = cpu_to_be32(needed_len); + return nfserr_toosmall; } static __be32 @@ -4193,14 +4108,9 @@ nfsd4_encode_layoutget(struct nfsd4_compoundres *resp, __be32 nfserr, const struct nfsd4_layout_ops *ops; __be32 *p; - dprintk("%s: err %d\n", __func__, nfserr); - if (nfserr) - goto out; - - nfserr = nfserr_resource; p = xdr_reserve_space(xdr, 36 + sizeof(stateid_opaque_t)); if (!p) - goto out; + return nfserr_resource; *p++ = cpu_to_be32(1); /* we always set return-on-close */ *p++ = cpu_to_be32(lgp->lg_sid.si_generation); @@ -4214,9 +4124,7 @@ nfsd4_encode_layoutget(struct nfsd4_compoundres *resp, __be32 nfserr, *p++ = cpu_to_be32(lgp->lg_layout_type); ops = nfsd4_layout_ops[lgp->lg_layout_type]; - nfserr = ops->encode_layoutget(xdr, lgp); -out: - return nfserr; + return ops->encode_layoutget(xdr, lgp); } static __be32 @@ -4226,9 +4134,6 @@ nfsd4_encode_layoutcommit(struct nfsd4_compoundres *resp, __be32 nfserr, struct xdr_stream *xdr = &resp->xdr; __be32 *p; - if (nfserr) - return nfserr; - p = xdr_reserve_space(xdr, 4); if (!p) return nfserr_resource; @@ -4240,7 +4145,7 @@ nfsd4_encode_layoutcommit(struct nfsd4_compoundres *resp, __be32 nfserr, p = xdr_encode_hyper(p, lcp->lc_newsize); } - return nfs_ok; + return 0; } static __be32 @@ -4250,16 +4155,13 @@ nfsd4_encode_layoutreturn(struct nfsd4_compoundres *resp, __be32 nfserr, struct xdr_stream *xdr = &resp->xdr; __be32 *p; - if (nfserr) - return nfserr; - p = xdr_reserve_space(xdr, 4); if (!p) return nfserr_resource; *p++ = cpu_to_be32(lrp->lrs_present); if (lrp->lrs_present) return nfsd4_encode_stateid(xdr, &lrp->lr_sid); - return nfs_ok; + return 0; } #endif /* CONFIG_NFSD_PNFS */ @@ -4286,16 +4188,14 @@ nfsd4_encode_copy(struct nfsd4_compoundres *resp, __be32 nfserr, { __be32 *p; - if (!nfserr) { - nfserr = nfsd42_encode_write_res(resp, ©->cp_res); - if (nfserr) - return nfserr; + nfserr = nfsd42_encode_write_res(resp, ©->cp_res); + if (nfserr) + return nfserr; - p = xdr_reserve_space(&resp->xdr, 4 + 4); - *p++ = cpu_to_be32(copy->cp_consecutive); - *p++ = cpu_to_be32(copy->cp_synchronous); - } - return nfserr; + p = xdr_reserve_space(&resp->xdr, 4 + 4); + *p++ = cpu_to_be32(copy->cp_consecutive); + *p++ = cpu_to_be32(copy->cp_synchronous); + return 0; } static __be32 @@ -4304,14 +4204,11 @@ nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr, { __be32 *p; - if (nfserr) - return nfserr; - p = xdr_reserve_space(&resp->xdr, 4 + 8); *p++ = cpu_to_be32(seek->seek_eof); p = xdr_encode_hyper(p, seek->seek_pos); - return nfserr; + return 0; } static __be32 -- cgit v1.2.3 From 2412e927604e0af7e17ae4b688b85a1e87e378fe Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 1 Aug 2017 11:59:49 -0400 Subject: sunrpc: Const-ify instances of struct svc_xprt_ops Close an attack vector by moving the arrays of server-side transport methods to read-only memory. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_xprt.h | 4 ++-- net/sunrpc/svcsock.c | 6 +++--- net/sunrpc/xprtrdma/svc_rdma_transport.c | 4 ++-- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index ddb7f94a9d06..6a2ad38f5458 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -31,7 +31,7 @@ struct svc_xprt_ops { struct svc_xprt_class { const char *xcl_name; struct module *xcl_owner; - struct svc_xprt_ops *xcl_ops; + const struct svc_xprt_ops *xcl_ops; struct list_head xcl_list; u32 xcl_max_payload; int xcl_ident; @@ -49,7 +49,7 @@ struct svc_xpt_user { struct svc_xprt { struct svc_xprt_class *xpt_class; - struct svc_xprt_ops *xpt_ops; + const struct svc_xprt_ops *xpt_ops; struct kref xpt_ref; struct list_head xpt_list; struct list_head xpt_ready; diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 2b720fa35c4f..b434f524d677 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -687,7 +687,7 @@ static struct svc_xprt *svc_udp_create(struct svc_serv *serv, return svc_create_socket(serv, IPPROTO_UDP, net, sa, salen, flags); } -static struct svc_xprt_ops svc_udp_ops = { +static const struct svc_xprt_ops svc_udp_ops = { .xpo_create = svc_udp_create, .xpo_recvfrom = svc_udp_recvfrom, .xpo_sendto = svc_udp_sendto, @@ -1229,7 +1229,7 @@ static void svc_bc_tcp_sock_detach(struct svc_xprt *xprt) { } -static struct svc_xprt_ops svc_tcp_bc_ops = { +static const struct svc_xprt_ops svc_tcp_bc_ops = { .xpo_create = svc_bc_tcp_create, .xpo_detach = svc_bc_tcp_sock_detach, .xpo_free = svc_bc_sock_free, @@ -1263,7 +1263,7 @@ static void svc_cleanup_bc_xprt_sock(void) } #endif /* CONFIG_SUNRPC_BACKCHANNEL */ -static struct svc_xprt_ops svc_tcp_ops = { +static const struct svc_xprt_ops svc_tcp_ops = { .xpo_create = svc_tcp_create, .xpo_recvfrom = svc_tcp_recvfrom, .xpo_sendto = svc_tcp_sendto, diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index e660d4965b18..2aa8473dcc97 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -70,7 +70,7 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt); static int svc_rdma_secure_port(struct svc_rqst *); static void svc_rdma_kill_temp_xprt(struct svc_xprt *); -static struct svc_xprt_ops svc_rdma_ops = { +static const struct svc_xprt_ops svc_rdma_ops = { .xpo_create = svc_rdma_create, .xpo_recvfrom = svc_rdma_recvfrom, .xpo_sendto = svc_rdma_sendto, @@ -98,7 +98,7 @@ static struct svc_xprt *svc_rdma_bc_create(struct svc_serv *, struct net *, static void svc_rdma_bc_detach(struct svc_xprt *); static void svc_rdma_bc_free(struct svc_xprt *); -static struct svc_xprt_ops svc_rdma_bc_ops = { +static const struct svc_xprt_ops svc_rdma_bc_ops = { .xpo_create = svc_rdma_bc_create, .xpo_detach = svc_rdma_bc_detach, .xpo_free = svc_rdma_bc_free, -- cgit v1.2.3 From c1df609d9dcec5dcb89ee3091edc3671da1c9996 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 1 Aug 2017 11:59:58 -0400 Subject: nfsd: Const-ify NFSv4 encoding and decoding ops arrays Close an attack vector by moving the arrays of encoding and decoding methods to read-only memory. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4xdr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 18b9c2fad29a..0c3b935d100f 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1780,7 +1780,7 @@ nfsd4_decode_notsupp(struct nfsd4_compoundargs *argp, void *p) typedef __be32(*nfsd4_dec)(struct nfsd4_compoundargs *argp, void *); -static nfsd4_dec nfsd4_dec_ops[] = { +static const nfsd4_dec nfsd4_dec_ops[] = { [OP_ACCESS] = (nfsd4_dec)nfsd4_decode_access, [OP_CLOSE] = (nfsd4_dec)nfsd4_decode_close, [OP_COMMIT] = (nfsd4_dec)nfsd4_decode_commit, @@ -4224,7 +4224,7 @@ typedef __be32(* nfsd4_enc)(struct nfsd4_compoundres *, __be32, void *); * since we don't need to filter out obsolete ops as this is * done in the decoding phase. */ -static nfsd4_enc nfsd4_enc_ops[] = { +static const nfsd4_enc nfsd4_enc_ops[] = { [OP_ACCESS] = (nfsd4_enc)nfsd4_encode_access, [OP_CLOSE] = (nfsd4_enc)nfsd4_encode_close, [OP_COMMIT] = (nfsd4_enc)nfsd4_encode_commit, -- cgit v1.2.3 From afea5657c20b11ec9f895ac5cc33b560fb1e0276 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 1 Aug 2017 12:00:06 -0400 Subject: sunrpc: Const-ify struct sv_serv_ops Close an attack vector by moving the arrays of per-server methods to read-only memory. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- fs/lockd/svc.c | 2 +- fs/nfs/callback.c | 10 +++++----- fs/nfsd/nfssvc.c | 2 +- include/linux/sunrpc/svc.h | 6 +++--- net/sunrpc/svc.c | 6 +++--- 5 files changed, 13 insertions(+), 13 deletions(-) diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 726b6cecf430..b995bdc13976 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -396,7 +396,7 @@ out_rqst: return error; } -static struct svc_serv_ops lockd_sv_ops = { +static const struct svc_serv_ops lockd_sv_ops = { .svo_shutdown = svc_rpcb_cleanup, .svo_enqueue_xprt = svc_xprt_do_enqueue, }; diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 34323877ec13..2cddf7f437e6 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -226,26 +226,26 @@ err_bind: return ret; } -static struct svc_serv_ops nfs40_cb_sv_ops = { +static const struct svc_serv_ops nfs40_cb_sv_ops = { .svo_function = nfs4_callback_svc, .svo_enqueue_xprt = svc_xprt_do_enqueue, .svo_setup = svc_set_num_threads_sync, .svo_module = THIS_MODULE, }; #if defined(CONFIG_NFS_V4_1) -static struct svc_serv_ops nfs41_cb_sv_ops = { +static const struct svc_serv_ops nfs41_cb_sv_ops = { .svo_function = nfs41_callback_svc, .svo_enqueue_xprt = svc_xprt_do_enqueue, .svo_setup = svc_set_num_threads_sync, .svo_module = THIS_MODULE, }; -static struct svc_serv_ops *nfs4_cb_sv_ops[] = { +static const struct svc_serv_ops *nfs4_cb_sv_ops[] = { [0] = &nfs40_cb_sv_ops, [1] = &nfs41_cb_sv_ops, }; #else -static struct svc_serv_ops *nfs4_cb_sv_ops[] = { +static const struct svc_serv_ops *nfs4_cb_sv_ops[] = { [0] = &nfs40_cb_sv_ops, [1] = NULL, }; @@ -254,8 +254,8 @@ static struct svc_serv_ops *nfs4_cb_sv_ops[] = { static struct svc_serv *nfs_callback_create_svc(int minorversion) { struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion]; + const struct svc_serv_ops *sv_ops; struct svc_serv *serv; - struct svc_serv_ops *sv_ops; /* * Check whether we're already up and running. diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index 063ae7de2c12..7e3af3ef0917 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -475,7 +475,7 @@ static int nfsd_get_default_max_blksize(void) return ret; } -static struct svc_serv_ops nfsd_thread_sv_ops = { +static const struct svc_serv_ops nfsd_thread_sv_ops = { .svo_shutdown = nfsd_last_thread, .svo_function = nfsd, .svo_enqueue_xprt = svc_xprt_do_enqueue, diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index a3f8af9bd543..38f561b2dda3 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -99,7 +99,7 @@ struct svc_serv { unsigned int sv_nrpools; /* number of thread pools */ struct svc_pool * sv_pools; /* array of thread pools */ - struct svc_serv_ops *sv_ops; /* server operations */ + const struct svc_serv_ops *sv_ops; /* server operations */ #if defined(CONFIG_SUNRPC_BACKCHANNEL) struct list_head sv_cb_list; /* queue for callback requests * that arrive over the same @@ -465,7 +465,7 @@ int svc_rpcb_setup(struct svc_serv *serv, struct net *net); void svc_rpcb_cleanup(struct svc_serv *serv, struct net *net); int svc_bind(struct svc_serv *serv, struct net *net); struct svc_serv *svc_create(struct svc_program *, unsigned int, - struct svc_serv_ops *); + const struct svc_serv_ops *); struct svc_rqst *svc_rqst_alloc(struct svc_serv *serv, struct svc_pool *pool, int node); struct svc_rqst *svc_prepare_thread(struct svc_serv *serv, @@ -475,7 +475,7 @@ void svc_exit_thread(struct svc_rqst *); unsigned int svc_pool_map_get(void); void svc_pool_map_put(void); struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int, - struct svc_serv_ops *); + const struct svc_serv_ops *); int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int); int svc_set_num_threads_sync(struct svc_serv *, struct svc_pool *, int); int svc_pool_stats_open(struct svc_serv *serv, struct file *file); diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 85ce0db5b0a6..aa04666f929d 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -421,7 +421,7 @@ __svc_init_bc(struct svc_serv *serv) */ static struct svc_serv * __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, - struct svc_serv_ops *ops) + const struct svc_serv_ops *ops) { struct svc_serv *serv; unsigned int vers; @@ -486,7 +486,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, struct svc_serv * svc_create(struct svc_program *prog, unsigned int bufsize, - struct svc_serv_ops *ops) + const struct svc_serv_ops *ops) { return __svc_create(prog, bufsize, /*npools*/1, ops); } @@ -494,7 +494,7 @@ EXPORT_SYMBOL_GPL(svc_create); struct svc_serv * svc_create_pooled(struct svc_program *prog, unsigned int bufsize, - struct svc_serv_ops *ops) + const struct svc_serv_ops *ops) { struct svc_serv *serv; unsigned int npools = svc_pool_map_get(); -- cgit v1.2.3 From 7075a867ce5748007b299bd28f12754cd51719d4 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 1 Aug 2017 12:00:14 -0400 Subject: svcrdma: Clean up svc_rdma_build_read_chunk() Dan Carpenter observed that the while() loop in svc_rdma_build_read_chunk() does not document the assumption that the loop interior is always executed at least once. Defensive: the function now returns -EINVAL if this assumption fails. Suggested-by: Dan Carpenter Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- net/sunrpc/xprtrdma/svc_rdma_rw.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c index 933f79bed270..1f34fae7aec0 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_rw.c +++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c @@ -660,19 +660,21 @@ out_initerr: return -EIO; } +/* Walk the segments in the Read chunk starting at @p and construct + * RDMA Read operations to pull the chunk to the server. + */ static int svc_rdma_build_read_chunk(struct svc_rqst *rqstp, struct svc_rdma_read_info *info, __be32 *p) { int ret; + ret = -EINVAL; info->ri_chunklen = 0; - while (*p++ != xdr_zero) { + while (*p++ != xdr_zero && be32_to_cpup(p++) == info->ri_position) { u32 rs_handle, rs_length; u64 rs_offset; - if (be32_to_cpup(p++) != info->ri_position) - break; rs_handle = be32_to_cpup(p++); rs_length = be32_to_cpup(p++); p = xdr_decode_hyper(p, &rs_offset); -- cgit v1.2.3 From eae03e2ac80a3476f0652cb0ee451d7b06d30564 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 18 Aug 2017 11:12:27 -0400 Subject: nfsd: Incoming xdr_bufs may have content in tail buffer Since the beginning, svcsock has built a received RPC Call message by populating the xdr_buf's head, then placing the remaining message bytes in the xdr_buf's page list. The xdr_buf's tail is never populated. This means that an NFSv4 COMPOUND containing an NFS WRITE operation plus trailing operations has a page list that contains the WRITE data payload followed by the trailing operations. NFSv4 XDR decoders will not look in the xdr_buf's tail, ever, because svcsock never put anything there. To support transports that can pass the write payload in the xdr_buf's pagelist and trailing content in the xdr_buf's tail, introduce logic in READ_BUF that switches to the xdr_buf's tail vec when the decoder runs out of content in rq_arg.pages. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- fs/nfsd/nfs4xdr.c | 20 ++++++++++++++++++++ fs/nfsd/xdr4.h | 1 + 2 files changed, 21 insertions(+) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 08691fe394b2..2c61c6b8ae09 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -159,6 +159,25 @@ static __be32 *read_buf(struct nfsd4_compoundargs *argp, u32 nbytes) */ unsigned int avail = (char *)argp->end - (char *)argp->p; __be32 *p; + + if (argp->pagelen == 0) { + struct kvec *vec = &argp->rqstp->rq_arg.tail[0]; + + if (!argp->tail) { + argp->tail = true; + avail = vec->iov_len; + argp->p = vec->iov_base; + argp->end = vec->iov_base + avail; + } + + if (avail < nbytes) + return NULL; + + p = argp->p; + argp->p += XDR_QUADLEN(nbytes); + return p; + } + if (avail + argp->pagelen < nbytes) return NULL; if (avail + PAGE_SIZE < nbytes) /* need more than a page !! */ @@ -4471,6 +4490,7 @@ nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, __be32 *p) args->end = rqstp->rq_arg.head[0].iov_base + rqstp->rq_arg.head[0].iov_len; args->pagelist = rqstp->rq_arg.pages; args->pagelen = rqstp->rq_arg.page_len; + args->tail = false; args->tmpp = NULL; args->to_free = NULL; args->ops = args->iops; diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h index f8a0b6549a88..1e4edbf70052 100644 --- a/fs/nfsd/xdr4.h +++ b/fs/nfsd/xdr4.h @@ -615,6 +615,7 @@ struct nfsd4_compoundargs { __be32 * end; struct page ** pagelist; int pagelen; + bool tail; __be32 tmp[8]; __be32 * tmpp; struct svcxdr_tmpbuf *to_free; -- cgit v1.2.3 From 193bcb7b3719a315814dce40fc8dcd1af786ea64 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 18 Aug 2017 11:12:35 -0400 Subject: svcrdma: Populate tail iovec when receiving So that NFS WRITE payloads can eventually be placed directly into a file's page cache, enable the RPC-over-RDMA transport to present these payloads in the xdr_buf's page list, while placing trailing content (such as a GETATTR operation) in the xdr_buf's tail. After this change, the RPC-over-RDMA's "copy tail" hack, added by commit a97c331f9aa9 ("svcrdma: Handle additional inline content"), is no longer needed and can be removed. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- net/sunrpc/xprtrdma/svc_rdma_rw.c | 108 ++++++-------------------------------- 1 file changed, 15 insertions(+), 93 deletions(-) diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c index 1f34fae7aec0..7dcda4597057 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_rw.c +++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c @@ -691,78 +691,6 @@ static int svc_rdma_build_read_chunk(struct svc_rqst *rqstp, return ret; } -/* If there is inline content following the Read chunk, append it to - * the page list immediately following the data payload. This has to - * be done after the reader function has determined how many pages - * were consumed for RDMA Read. - * - * On entry, ri_pageno and ri_pageoff point directly to the end of the - * page list. On exit, both have been updated to the new "next byte". - * - * Assumptions: - * - Inline content fits entirely in rq_pages[0] - * - Trailing content is only a handful of bytes - */ -static int svc_rdma_copy_tail(struct svc_rqst *rqstp, - struct svc_rdma_read_info *info) -{ - struct svc_rdma_op_ctxt *head = info->ri_readctxt; - unsigned int tail_length, remaining; - u8 *srcp, *destp; - - /* Assert that all inline content fits in page 0. This is an - * implementation limit, not a protocol limit. - */ - if (head->arg.head[0].iov_len > PAGE_SIZE) { - pr_warn_once("svcrdma: too much trailing inline content\n"); - return -EINVAL; - } - - srcp = head->arg.head[0].iov_base; - srcp += info->ri_position; - tail_length = head->arg.head[0].iov_len - info->ri_position; - remaining = tail_length; - - /* If there is room on the last page in the page list, try to - * fit the trailing content there. - */ - if (info->ri_pageoff > 0) { - unsigned int len; - - len = min_t(unsigned int, remaining, - PAGE_SIZE - info->ri_pageoff); - destp = page_address(rqstp->rq_pages[info->ri_pageno]); - destp += info->ri_pageoff; - - memcpy(destp, srcp, len); - srcp += len; - destp += len; - info->ri_pageoff += len; - remaining -= len; - - if (info->ri_pageoff == PAGE_SIZE) { - info->ri_pageno++; - info->ri_pageoff = 0; - } - } - - /* Otherwise, a fresh page is needed. */ - if (remaining) { - head->arg.pages[info->ri_pageno] = - rqstp->rq_pages[info->ri_pageno]; - head->count++; - - destp = page_address(rqstp->rq_pages[info->ri_pageno]); - memcpy(destp, srcp, remaining); - info->ri_pageoff += remaining; - } - - head->arg.page_len += tail_length; - head->arg.len += tail_length; - head->arg.buflen += tail_length; - return 0; -} - /* Construct RDMA Reads to pull over a normal Read chunk. The chunk * data lands in the page list of head->arg.pages. * @@ -787,34 +715,28 @@ static int svc_rdma_build_normal_read_chunk(struct svc_rqst *rqstp, if (ret < 0) goto out; - /* Read chunk may need XDR round-up (see RFC 5666, s. 3.7). + /* Split the Receive buffer between the head and tail + * buffers at Read chunk's position. XDR roundup of the + * chunk is not included in either the pagelist or in + * the tail. */ - if (info->ri_chunklen & 3) { - u32 padlen = 4 - (info->ri_chunklen & 3); - - info->ri_chunklen += padlen; + head->arg.tail[0].iov_base = + head->arg.head[0].iov_base + info->ri_position; + head->arg.tail[0].iov_len = + head->arg.head[0].iov_len - info->ri_position; + head->arg.head[0].iov_len = info->ri_position; - /* NB: data payload always starts on XDR alignment, - * thus the pad can never contain a page boundary. - */ - info->ri_pageoff += padlen; - if (info->ri_pageoff == PAGE_SIZE) { - info->ri_pageno++; - info->ri_pageoff = 0; - } - } + /* Read chunk may need XDR roundup (see RFC 5666, s. 3.7). + * + * NFSv2/3 write decoders need the length of the tail to + * contain the size of the roundup padding. + */ + head->arg.tail[0].iov_len += 4 - (info->ri_chunklen & 3); head->arg.page_len = info->ri_chunklen; head->arg.len += info->ri_chunklen; head->arg.buflen += info->ri_chunklen; - if (info->ri_position < head->arg.head[0].iov_len) { - ret = svc_rdma_copy_tail(rqstp, info); - if (ret < 0) - goto out; - } - head->arg.head[0].iov_len = info->ri_position; - out: return ret; } -- cgit v1.2.3 From 5a25bfd28c321e522dbe8083ad6219fa2a820610 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 28 Aug 2017 15:06:06 -0400 Subject: svcrdma: Limit RQ depth Ensure that the chosen Receive Queue depth for a newly created transport does not overrun the QP WR limit of the underlying device. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- net/sunrpc/xprtrdma/svc_rdma_transport.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 2aa8473dcc97..cdb04f8a0c25 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -167,8 +167,8 @@ static bool svc_rdma_prealloc_ctxts(struct svcxprt_rdma *xprt) { unsigned int i; - /* Each RPC/RDMA credit can consume a number of send - * and receive WQEs. One ctxt is allocated for each. + /* Each RPC/RDMA credit can consume one Receive and + * one Send WQE at the same time. */ i = xprt->sc_sq_depth + xprt->sc_rq_depth; @@ -742,13 +742,18 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) newxprt->sc_max_sge = min((size_t)dev->attrs.max_sge, (size_t)RPCSVC_MAXPAGES); newxprt->sc_max_req_size = svcrdma_max_req_size; - newxprt->sc_max_requests = min_t(u32, dev->attrs.max_qp_wr, - svcrdma_max_requests); - newxprt->sc_fc_credits = cpu_to_be32(newxprt->sc_max_requests); - newxprt->sc_max_bc_requests = min_t(u32, dev->attrs.max_qp_wr, - svcrdma_max_bc_requests); + newxprt->sc_max_requests = svcrdma_max_requests; + newxprt->sc_max_bc_requests = svcrdma_max_bc_requests; newxprt->sc_rq_depth = newxprt->sc_max_requests + newxprt->sc_max_bc_requests; + if (newxprt->sc_rq_depth > dev->attrs.max_qp_wr) { + pr_warn("svcrdma: reducing receive depth to %d\n", + dev->attrs.max_qp_wr); + newxprt->sc_rq_depth = dev->attrs.max_qp_wr; + newxprt->sc_max_requests = newxprt->sc_rq_depth - 2; + newxprt->sc_max_bc_requests = 2; + } + newxprt->sc_fc_credits = cpu_to_be32(newxprt->sc_max_requests); newxprt->sc_sq_depth = newxprt->sc_rq_depth; atomic_set(&newxprt->sc_sq_avail, newxprt->sc_sq_depth); -- cgit v1.2.3 From 0062818298662d0d05061949d12880146b5ebd65 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 28 Aug 2017 15:06:14 -0400 Subject: rdma core: Add rdma_rw_mr_payload() The amount of payload per MR depends on device capabilities and the memory registration mode in use. The new rdma_rw API hides both, making it difficult for ULPs to determine how large their transport send queues need to be. Expose the MR payload information via a new API. Signed-off-by: Chuck Lever Acked-by: Doug Ledford Signed-off-by: J. Bruce Fields --- drivers/infiniband/core/rw.c | 24 ++++++++++++++++++++++++ include/rdma/rw.h | 2 ++ 2 files changed, 26 insertions(+) diff --git a/drivers/infiniband/core/rw.c b/drivers/infiniband/core/rw.c index dbfd854c32c9..6ca607e8e293 100644 --- a/drivers/infiniband/core/rw.c +++ b/drivers/infiniband/core/rw.c @@ -643,6 +643,30 @@ void rdma_rw_ctx_destroy_signature(struct rdma_rw_ctx *ctx, struct ib_qp *qp, } EXPORT_SYMBOL(rdma_rw_ctx_destroy_signature); +/** + * rdma_rw_mr_factor - return number of MRs required for a payload + * @device: device handling the connection + * @port_num: port num to which the connection is bound + * @maxpages: maximum payload pages per rdma_rw_ctx + * + * Returns the number of MRs the device requires to move @maxpayload + * bytes. The returned value is used during transport creation to + * compute max_rdma_ctxts and the size of the transport's Send and + * Send Completion Queues. + */ +unsigned int rdma_rw_mr_factor(struct ib_device *device, u8 port_num, + unsigned int maxpages) +{ + unsigned int mr_pages; + + if (rdma_rw_can_use_mr(device, port_num)) + mr_pages = rdma_rw_fr_page_list_len(device); + else + mr_pages = device->attrs.max_sge_rd; + return DIV_ROUND_UP(maxpages, mr_pages); +} +EXPORT_SYMBOL(rdma_rw_mr_factor); + void rdma_rw_init_qp(struct ib_device *dev, struct ib_qp_init_attr *attr) { u32 factor; diff --git a/include/rdma/rw.h b/include/rdma/rw.h index 377d865e506d..a3cbbc7b6417 100644 --- a/include/rdma/rw.h +++ b/include/rdma/rw.h @@ -81,6 +81,8 @@ struct ib_send_wr *rdma_rw_ctx_wrs(struct rdma_rw_ctx *ctx, struct ib_qp *qp, int rdma_rw_ctx_post(struct rdma_rw_ctx *ctx, struct ib_qp *qp, u8 port_num, struct ib_cqe *cqe, struct ib_send_wr *chain_wr); +unsigned int rdma_rw_mr_factor(struct ib_device *device, u8 port_num, + unsigned int maxpages); void rdma_rw_init_qp(struct ib_device *dev, struct ib_qp_init_attr *attr); int rdma_rw_init_mrs(struct ib_qp *qp, struct ib_qp_init_attr *attr); void rdma_rw_cleanup_mrs(struct ib_qp *qp); -- cgit v1.2.3 From 26fb2254dd33b02a522fac42745693f5969b7d4b Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 28 Aug 2017 15:06:22 -0400 Subject: svcrdma: Estimate Send Queue depth properly The rdma_rw API adjusts max_send_wr upwards during the rdma_create_qp() call. If the ULP actually wants to take advantage of these extra resources, it must increase the size of its send completion queue (created before rdma_create_qp is called) and increase its send queue accounting limit. Use the new rdma_rw_mr_factor API to figure out the correct value to use for the Send Queue and Send Completion Queue depths. And, ensure that the chosen Send Queue depth for a newly created transport does not overrun the QP WR limit of the underlying device. Lastly, there's no longer a need to carry the Send Queue depth in struct svcxprt_rdma, since the value is used only in the svc_rdma_accept() path. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- net/sunrpc/xprtrdma/svc_rdma_transport.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index cdb04f8a0c25..5caf8e722a11 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -51,6 +51,7 @@ #include #include #include +#include #include #include #include "xprt_rdma.h" @@ -713,7 +714,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) struct ib_qp_init_attr qp_attr; struct ib_device *dev; struct sockaddr *sap; - unsigned int i; + unsigned int i, ctxts; int ret = 0; listen_rdma = container_of(xprt, struct svcxprt_rdma, sc_xprt); @@ -754,7 +755,14 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) newxprt->sc_max_bc_requests = 2; } newxprt->sc_fc_credits = cpu_to_be32(newxprt->sc_max_requests); - newxprt->sc_sq_depth = newxprt->sc_rq_depth; + ctxts = rdma_rw_mr_factor(dev, newxprt->sc_port_num, RPCSVC_MAXPAGES); + ctxts *= newxprt->sc_max_requests; + newxprt->sc_sq_depth = newxprt->sc_rq_depth + ctxts; + if (newxprt->sc_sq_depth > dev->attrs.max_qp_wr) { + pr_warn("svcrdma: reducing send depth to %d\n", + dev->attrs.max_qp_wr); + newxprt->sc_sq_depth = dev->attrs.max_qp_wr; + } atomic_set(&newxprt->sc_sq_avail, newxprt->sc_sq_depth); if (!svc_rdma_prealloc_ctxts(newxprt)) @@ -789,8 +797,8 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) qp_attr.event_handler = qp_event_handler; qp_attr.qp_context = &newxprt->sc_xprt; qp_attr.port_num = newxprt->sc_port_num; - qp_attr.cap.max_rdma_ctxs = newxprt->sc_max_requests; - qp_attr.cap.max_send_wr = newxprt->sc_sq_depth; + qp_attr.cap.max_rdma_ctxs = ctxts; + qp_attr.cap.max_send_wr = newxprt->sc_sq_depth - ctxts; qp_attr.cap.max_recv_wr = newxprt->sc_rq_depth; qp_attr.cap.max_send_sge = newxprt->sc_max_sge; qp_attr.cap.max_recv_sge = newxprt->sc_max_sge; @@ -858,6 +866,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) dprintk(" remote address : %pIS:%u\n", sap, rpc_get_port(sap)); dprintk(" max_sge : %d\n", newxprt->sc_max_sge); dprintk(" sq_depth : %d\n", newxprt->sc_sq_depth); + dprintk(" rdma_rw_ctxs : %d\n", ctxts); dprintk(" max_requests : %d\n", newxprt->sc_max_requests); dprintk(" ord : %d\n", newxprt->sc_ord); -- cgit v1.2.3