From 31c9590ae468478fe47dc0f5f0d3562b2f69450e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 18 Apr 2020 21:06:23 -0400 Subject: SUNRPC: Add "@len" parameter to gss_unwrap() Refactor: This is a pre-requisite to fixing the client-side ralign computation in gss_unwrap_resp_priv(). The length value is passed in explicitly rather that as the value of buf->len. This will subsequently allow gss_unwrap_kerberos_v1() to compute a slack and align value, instead of computing it in gss_unwrap_resp_priv(). Fixes: 35e77d21baa0 ("SUNRPC: Add rpc_auth::au_ralign field") Signed-off-by: Chuck Lever --- net/sunrpc/auth_gss/auth_gss.c | 4 ++-- net/sunrpc/auth_gss/gss_krb5_crypto.c | 8 ++++---- net/sunrpc/auth_gss/gss_krb5_wrap.c | 26 +++++++++++++++----------- net/sunrpc/auth_gss/gss_mech_switch.c | 3 ++- net/sunrpc/auth_gss/svcauth_gss.c | 8 ++------ 5 files changed, 25 insertions(+), 24 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 25fbd8d9de74..7885f37e3688 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -2043,9 +2043,9 @@ gss_unwrap_resp_priv(struct rpc_task *task, struct rpc_cred *cred, offset = (u8 *)(p) - (u8 *)head->iov_base; if (offset + opaque_len > rcv_buf->len) goto unwrap_failed; - rcv_buf->len = offset + opaque_len; - maj_stat = gss_unwrap(ctx->gc_gss_ctx, offset, rcv_buf); + maj_stat = gss_unwrap(ctx->gc_gss_ctx, offset, + offset + opaque_len, rcv_buf); if (maj_stat == GSS_S_CONTEXT_EXPIRED) clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); if (maj_stat != GSS_S_COMPLETE) diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c index 6f2d30d7b766..e7180da1fc6a 100644 --- a/net/sunrpc/auth_gss/gss_krb5_crypto.c +++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c @@ -851,8 +851,8 @@ out_err: } u32 -gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf, - u32 *headskip, u32 *tailskip) +gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, u32 len, + struct xdr_buf *buf, u32 *headskip, u32 *tailskip) { struct xdr_buf subbuf; u32 ret = 0; @@ -881,7 +881,7 @@ gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf, /* create a segment skipping the header and leaving out the checksum */ xdr_buf_subsegment(buf, &subbuf, offset + GSS_KRB5_TOK_HDR_LEN, - (buf->len - offset - GSS_KRB5_TOK_HDR_LEN - + (len - offset - GSS_KRB5_TOK_HDR_LEN - kctx->gk5e->cksumlength)); nblocks = (subbuf.len + blocksize - 1) / blocksize; @@ -926,7 +926,7 @@ gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf, goto out_err; /* Get the packet's hmac value */ - ret = read_bytes_from_xdr_buf(buf, buf->len - kctx->gk5e->cksumlength, + ret = read_bytes_from_xdr_buf(buf, len - kctx->gk5e->cksumlength, pkt_hmac, kctx->gk5e->cksumlength); if (ret) goto out_err; diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c index 6c1920eed771..c7589e35d5d9 100644 --- a/net/sunrpc/auth_gss/gss_krb5_wrap.c +++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c @@ -261,7 +261,8 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset, } static u32 -gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf) +gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, int len, + struct xdr_buf *buf) { int signalg; int sealalg; @@ -284,7 +285,7 @@ gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf) ptr = (u8 *)buf->head[0].iov_base + offset; if (g_verify_token_header(&kctx->mech_used, &bodysize, &ptr, - buf->len - offset)) + len - offset)) return GSS_S_DEFECTIVE_TOKEN; if ((ptr[0] != ((KG_TOK_WRAP_MSG >> 8) & 0xff)) || @@ -324,6 +325,7 @@ gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf) (!kctx->initiate && direction != 0)) return GSS_S_BAD_SIG; + buf->len = len; if (kctx->enctype == ENCTYPE_ARCFOUR_HMAC) { struct crypto_sync_skcipher *cipher; int err; @@ -376,7 +378,7 @@ gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf) data_len = (buf->head[0].iov_base + buf->head[0].iov_len) - data_start; memmove(orig_start, data_start, data_len); buf->head[0].iov_len -= (data_start - orig_start); - buf->len -= (data_start - orig_start); + buf->len = len - (data_start - orig_start); if (gss_krb5_remove_padding(buf, blocksize)) return GSS_S_DEFECTIVE_TOKEN; @@ -486,7 +488,8 @@ gss_wrap_kerberos_v2(struct krb5_ctx *kctx, u32 offset, } static u32 -gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf) +gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, int len, + struct xdr_buf *buf) { time64_t now; u8 *ptr; @@ -532,7 +535,7 @@ gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf) if (rrc != 0) rotate_left(offset + 16, buf, rrc); - err = (*kctx->gk5e->decrypt_v2)(kctx, offset, buf, + err = (*kctx->gk5e->decrypt_v2)(kctx, offset, len, buf, &headskip, &tailskip); if (err) return GSS_S_FAILURE; @@ -542,7 +545,7 @@ gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf) * it against the original */ err = read_bytes_from_xdr_buf(buf, - buf->len - GSS_KRB5_TOK_HDR_LEN - tailskip, + len - GSS_KRB5_TOK_HDR_LEN - tailskip, decrypted_hdr, GSS_KRB5_TOK_HDR_LEN); if (err) { dprintk("%s: error %u getting decrypted_hdr\n", __func__, err); @@ -568,14 +571,14 @@ gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf) * Note that buf->head[0].iov_len may indicate the available * head buffer space rather than that actually occupied. */ - movelen = min_t(unsigned int, buf->head[0].iov_len, buf->len); + movelen = min_t(unsigned int, buf->head[0].iov_len, len); movelen -= offset + GSS_KRB5_TOK_HDR_LEN + headskip; if (offset + GSS_KRB5_TOK_HDR_LEN + headskip + movelen > buf->head[0].iov_len) return GSS_S_FAILURE; memmove(ptr, ptr + GSS_KRB5_TOK_HDR_LEN + headskip, movelen); buf->head[0].iov_len -= GSS_KRB5_TOK_HDR_LEN + headskip; - buf->len -= GSS_KRB5_TOK_HDR_LEN + headskip; + buf->len = len - GSS_KRB5_TOK_HDR_LEN + headskip; /* Trim off the trailing "extra count" and checksum blob */ buf->len -= ec + GSS_KRB5_TOK_HDR_LEN + tailskip; @@ -603,7 +606,8 @@ gss_wrap_kerberos(struct gss_ctx *gctx, int offset, } u32 -gss_unwrap_kerberos(struct gss_ctx *gctx, int offset, struct xdr_buf *buf) +gss_unwrap_kerberos(struct gss_ctx *gctx, int offset, + int len, struct xdr_buf *buf) { struct krb5_ctx *kctx = gctx->internal_ctx_id; @@ -613,9 +617,9 @@ gss_unwrap_kerberos(struct gss_ctx *gctx, int offset, struct xdr_buf *buf) case ENCTYPE_DES_CBC_RAW: case ENCTYPE_DES3_CBC_RAW: case ENCTYPE_ARCFOUR_HMAC: - return gss_unwrap_kerberos_v1(kctx, offset, buf); + return gss_unwrap_kerberos_v1(kctx, offset, len, buf); case ENCTYPE_AES128_CTS_HMAC_SHA1_96: case ENCTYPE_AES256_CTS_HMAC_SHA1_96: - return gss_unwrap_kerberos_v2(kctx, offset, buf); + return gss_unwrap_kerberos_v2(kctx, offset, len, buf); } } diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c index db550bfc2642..69316ab1b9fa 100644 --- a/net/sunrpc/auth_gss/gss_mech_switch.c +++ b/net/sunrpc/auth_gss/gss_mech_switch.c @@ -411,10 +411,11 @@ gss_wrap(struct gss_ctx *ctx_id, u32 gss_unwrap(struct gss_ctx *ctx_id, int offset, + int len, struct xdr_buf *buf) { return ctx_id->mech_type->gm_ops - ->gss_unwrap(ctx_id, offset, buf); + ->gss_unwrap(ctx_id, offset, len, buf); } diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 54ae5be62f6a..d0a2f084e5a4 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -934,7 +934,7 @@ static int unwrap_priv_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gss_ctx *ctx) { u32 priv_len, maj_stat; - int pad, saved_len, remaining_len, offset; + int pad, remaining_len, offset; clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags); @@ -954,12 +954,8 @@ unwrap_priv_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gs buf->len -= pad; fix_priv_head(buf, pad); - /* Maybe it would be better to give gss_unwrap a length parameter: */ - saved_len = buf->len; - buf->len = priv_len; - maj_stat = gss_unwrap(ctx, 0, buf); + maj_stat = gss_unwrap(ctx, 0, priv_len, buf); pad = priv_len - buf->len; - buf->len = saved_len; buf->len -= pad; /* The upper layers assume the buffer is aligned on 4-byte boundaries. * In the krb5p case, at least, the data ends up offset, so we need to -- cgit v1.2.3 From a7e429a6fa6d612d1dacde96c885dc1bb4a9f400 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 18 Apr 2020 14:38:19 -0400 Subject: SUNRPC: Fix GSS privacy computation of auth->au_ralign When the au_ralign field was added to gss_unwrap_resp_priv, the wrong calculation was used. Setting au_rslack == au_ralign is probably correct for kerberos_v1 privacy, but kerberos_v2 privacy adds additional GSS data after the clear text RPC message. au_ralign needs to be smaller than au_rslack in that fairly common case. When xdr_buf_trim() is restored to gss_unwrap_kerberos_v2(), it does exactly what I feared it would: it trims off part of the clear text RPC message. However, that's because rpc_prepare_reply_pages() does not set up the rq_rcv_buf's tail correctly because au_ralign is too large. Fixing the au_ralign computation also corrects the alignment of rq_rcv_buf->pages so that the client does not have to shift reply data payloads after they are received. Fixes: 35e77d21baa0 ("SUNRPC: Add rpc_auth::au_ralign field") Signed-off-by: Chuck Lever --- include/linux/sunrpc/gss_api.h | 1 + net/sunrpc/auth_gss/auth_gss.c | 8 +++----- net/sunrpc/auth_gss/gss_krb5_wrap.c | 19 +++++++++++++++---- 3 files changed, 19 insertions(+), 9 deletions(-) (limited to 'net/sunrpc') diff --git a/include/linux/sunrpc/gss_api.h b/include/linux/sunrpc/gss_api.h index e9a79518d652..bc07e51f20d1 100644 --- a/include/linux/sunrpc/gss_api.h +++ b/include/linux/sunrpc/gss_api.h @@ -21,6 +21,7 @@ struct gss_ctx { struct gss_api_mech *mech_type; void *internal_ctx_id; + unsigned int slack, align; }; #define GSS_C_NO_BUFFER ((struct xdr_netobj) 0) diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 7885f37e3688..ac5cac0dd24b 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -2032,7 +2032,6 @@ gss_unwrap_resp_priv(struct rpc_task *task, struct rpc_cred *cred, struct xdr_buf *rcv_buf = &rqstp->rq_rcv_buf; struct kvec *head = rqstp->rq_rcv_buf.head; struct rpc_auth *auth = cred->cr_auth; - unsigned int savedlen = rcv_buf->len; u32 offset, opaque_len, maj_stat; __be32 *p; @@ -2059,10 +2058,9 @@ gss_unwrap_resp_priv(struct rpc_task *task, struct rpc_cred *cred, */ xdr_init_decode(xdr, rcv_buf, p, rqstp); - auth->au_rslack = auth->au_verfsize + 2 + - XDR_QUADLEN(savedlen - rcv_buf->len); - auth->au_ralign = auth->au_verfsize + 2 + - XDR_QUADLEN(savedlen - rcv_buf->len); + auth->au_rslack = auth->au_verfsize + 2 + ctx->gc_gss_ctx->slack; + auth->au_ralign = auth->au_verfsize + 2 + ctx->gc_gss_ctx->align; + return 0; unwrap_failed: trace_rpcgss_unwrap_failed(task); diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c index c7589e35d5d9..4905652e7567 100644 --- a/net/sunrpc/auth_gss/gss_krb5_wrap.c +++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c @@ -262,7 +262,8 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset, static u32 gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, int len, - struct xdr_buf *buf) + struct xdr_buf *buf, unsigned int *slack, + unsigned int *align) { int signalg; int sealalg; @@ -280,6 +281,7 @@ gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, int len, u32 conflen = kctx->gk5e->conflen; int crypt_offset; u8 *cksumkey; + unsigned int saved_len = buf->len; dprintk("RPC: gss_unwrap_kerberos\n"); @@ -383,6 +385,10 @@ gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, int len, if (gss_krb5_remove_padding(buf, blocksize)) return GSS_S_DEFECTIVE_TOKEN; + /* slack must include room for krb5 padding */ + *slack = XDR_QUADLEN(saved_len - buf->len); + /* The GSS blob always precedes the RPC message payload */ + *align = *slack; return GSS_S_COMPLETE; } @@ -489,7 +495,8 @@ gss_wrap_kerberos_v2(struct krb5_ctx *kctx, u32 offset, static u32 gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, int len, - struct xdr_buf *buf) + struct xdr_buf *buf, unsigned int *slack, + unsigned int *align) { time64_t now; u8 *ptr; @@ -583,6 +590,8 @@ gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, int len, /* Trim off the trailing "extra count" and checksum blob */ buf->len -= ec + GSS_KRB5_TOK_HDR_LEN + tailskip; + *align = XDR_QUADLEN(GSS_KRB5_TOK_HDR_LEN + headskip); + *slack = *align + XDR_QUADLEN(ec + GSS_KRB5_TOK_HDR_LEN + tailskip); return GSS_S_COMPLETE; } @@ -617,9 +626,11 @@ gss_unwrap_kerberos(struct gss_ctx *gctx, int offset, case ENCTYPE_DES_CBC_RAW: case ENCTYPE_DES3_CBC_RAW: case ENCTYPE_ARCFOUR_HMAC: - return gss_unwrap_kerberos_v1(kctx, offset, len, buf); + return gss_unwrap_kerberos_v1(kctx, offset, len, buf, + &gctx->slack, &gctx->align); case ENCTYPE_AES128_CTS_HMAC_SHA1_96: case ENCTYPE_AES256_CTS_HMAC_SHA1_96: - return gss_unwrap_kerberos_v2(kctx, offset, len, buf); + return gss_unwrap_kerberos_v2(kctx, offset, len, buf, + &gctx->slack, &gctx->align); } } -- cgit v1.2.3 From 0a8e7b7d08466b5fc52f8e96070acc116d82a8bb Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 15 Apr 2020 17:36:22 -0400 Subject: SUNRPC: Revert 241b1f419f0e ("SUNRPC: Remove xdr_buf_trim()") I've noticed that when krb5i or krb5p security is in use, retransmitted requests are missing the server's duplicate reply cache. The computed checksum on the retransmitted request does not match the cached checksum, resulting in the server performing the retransmitted request again instead of returning the cached reply. The assumptions made when removing xdr_buf_trim() were not correct. In the send paths, the upper layer has already set the segment lengths correctly, and shorting the buffer's content is simply a matter of reducing buf->len. xdr_buf_trim() is the right answer in the receive/unwrap path on both the client and the server. The buffer segment lengths have to be shortened one-by-one. On the server side in particular, head.iov_len needs to be updated correctly to enable nfsd_cache_csum() to work correctly. The simple buf->len computation doesn't do that, and that results in checksumming stale data in the buffer. The problem isn't noticed until there's significant instability of the RPC transport. At that point, the reliability of retransmit detection on the server becomes crucial. Fixes: 241b1f419f0e ("SUNRPC: Remove xdr_buf_trim()") Signed-off-by: Chuck Lever --- include/linux/sunrpc/xdr.h | 1 + net/sunrpc/auth_gss/gss_krb5_wrap.c | 7 +++---- net/sunrpc/auth_gss/svcauth_gss.c | 2 +- net/sunrpc/xdr.c | 41 +++++++++++++++++++++++++++++++++++++ 4 files changed, 46 insertions(+), 5 deletions(-) (limited to 'net/sunrpc') diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index 01bb41908c93..22c207b2425f 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -184,6 +184,7 @@ xdr_adjust_iovec(struct kvec *iov, __be32 *p) extern void xdr_shift_buf(struct xdr_buf *, size_t); extern void xdr_buf_from_iov(struct kvec *, struct xdr_buf *); extern int xdr_buf_subsegment(struct xdr_buf *, struct xdr_buf *, unsigned int, unsigned int); +extern void xdr_buf_trim(struct xdr_buf *, unsigned int); extern int read_bytes_from_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int); extern int write_bytes_to_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int); diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c index 4905652e7567..cf0fd170ac18 100644 --- a/net/sunrpc/auth_gss/gss_krb5_wrap.c +++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c @@ -580,15 +580,14 @@ gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, int len, */ movelen = min_t(unsigned int, buf->head[0].iov_len, len); movelen -= offset + GSS_KRB5_TOK_HDR_LEN + headskip; - if (offset + GSS_KRB5_TOK_HDR_LEN + headskip + movelen > - buf->head[0].iov_len) - return GSS_S_FAILURE; + BUG_ON(offset + GSS_KRB5_TOK_HDR_LEN + headskip + movelen > + buf->head[0].iov_len); memmove(ptr, ptr + GSS_KRB5_TOK_HDR_LEN + headskip, movelen); buf->head[0].iov_len -= GSS_KRB5_TOK_HDR_LEN + headskip; buf->len = len - GSS_KRB5_TOK_HDR_LEN + headskip; /* Trim off the trailing "extra count" and checksum blob */ - buf->len -= ec + GSS_KRB5_TOK_HDR_LEN + tailskip; + xdr_buf_trim(buf, ec + GSS_KRB5_TOK_HDR_LEN + tailskip); *align = XDR_QUADLEN(GSS_KRB5_TOK_HDR_LEN + headskip); *slack = *align + XDR_QUADLEN(ec + GSS_KRB5_TOK_HDR_LEN + tailskip); diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index d0a2f084e5a4..50d93c49ef1a 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -906,7 +906,7 @@ unwrap_integ_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct g if (svc_getnl(&buf->head[0]) != seq) goto out; /* trim off the mic and padding at the end before returning */ - buf->len -= 4 + round_up_to_quad(mic.len); + xdr_buf_trim(buf, round_up_to_quad(mic.len) + 4); stat = 0; out: kfree(mic.data); diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 15b58c5144f9..6f7d82fb1eb0 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -1150,6 +1150,47 @@ xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf, } EXPORT_SYMBOL_GPL(xdr_buf_subsegment); +/** + * xdr_buf_trim - lop at most "len" bytes off the end of "buf" + * @buf: buf to be trimmed + * @len: number of bytes to reduce "buf" by + * + * Trim an xdr_buf by the given number of bytes by fixing up the lengths. Note + * that it's possible that we'll trim less than that amount if the xdr_buf is + * too small, or if (for instance) it's all in the head and the parser has + * already read too far into it. + */ +void xdr_buf_trim(struct xdr_buf *buf, unsigned int len) +{ + size_t cur; + unsigned int trim = len; + + if (buf->tail[0].iov_len) { + cur = min_t(size_t, buf->tail[0].iov_len, trim); + buf->tail[0].iov_len -= cur; + trim -= cur; + if (!trim) + goto fix_len; + } + + if (buf->page_len) { + cur = min_t(unsigned int, buf->page_len, trim); + buf->page_len -= cur; + trim -= cur; + if (!trim) + goto fix_len; + } + + if (buf->head[0].iov_len) { + cur = min_t(size_t, buf->head[0].iov_len, trim); + buf->head[0].iov_len -= cur; + trim -= cur; + } +fix_len: + buf->len -= (len - trim); +} +EXPORT_SYMBOL_GPL(xdr_buf_trim); + static void __read_bytes_from_xdr_buf(struct xdr_buf *subbuf, void *obj, unsigned int len) { unsigned int this_len; -- cgit v1.2.3 From 31e9a7f353526bbe53165a292f8ea9695ead3168 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Sat, 9 May 2020 07:25:11 +1000 Subject: SUNRPC: fix use-after-free in rpc_free_client_work() Parts of rpc_free_client() were recently moved to a separate rpc_free_clent_work(). This introduced a use-after-free as rpc_clnt_remove_pipedir() calls rpc_net_ns(), and that uses clnt->cl_xprt which has already been freed. So move the call to xprt_put() after the call to rpc_clnt_remove_pipedir(). Reported-by: syzbot+22b5ef302c7c40d94ea8@syzkaller.appspotmail.com Fixes: 7c4310ff5642 ("SUNRPC: defer slow parts of rpc_free_client() to a workqueue.") Signed-off-by: NeilBrown Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 8350d3a2e9a7..8d3972ea688b 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -890,6 +890,7 @@ static void rpc_free_client_work(struct work_struct *work) */ rpc_clnt_debugfs_unregister(clnt); rpc_clnt_remove_pipedir(clnt); + xprt_put(rcu_dereference_raw(clnt->cl_xprt)); kfree(clnt); rpciod_down(); @@ -907,7 +908,6 @@ rpc_free_client(struct rpc_clnt *clnt) rpc_unregister_client(clnt); rpc_free_iostats(clnt->cl_metrics); clnt->cl_metrics = NULL; - xprt_put(rcu_dereference_raw(clnt->cl_xprt)); xprt_iter_destroy(&clnt->cl_xpi); put_cred(clnt->cl_cred); rpc_free_clid(clnt); -- cgit v1.2.3 From ce99aa62e1eb793e259d023c7f6ccb7c4879917b Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 9 May 2020 14:07:13 -0400 Subject: SUNRPC: Signalled ASYNC tasks need to exit Ensure that signalled ASYNC rpc_tasks exit immediately instead of spinning until a timeout (or forever). To avoid checking for the signal flag on every scheduler iteration, the check is instead introduced in the client's finite state machine. Signed-off-by: Chuck Lever Fixes: ae67bd3821bb ("SUNRPC: Fix up task signalling") Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'net/sunrpc') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 8d3972ea688b..c74bc402f8c7 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -2433,6 +2433,11 @@ rpc_check_timeout(struct rpc_task *task) { struct rpc_clnt *clnt = task->tk_client; + if (RPC_SIGNALLED(task)) { + rpc_call_rpcerror(task, -ERESTARTSYS); + return; + } + if (xprt_adjust_timeout(task->tk_rqstp) == 0) return; -- cgit v1.2.3 From 933496e9cc02cda16ca8f0c38e56204376d6b0b2 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Wed, 13 May 2020 18:38:40 -0400 Subject: SUNRPC: 'Directory with parent 'rpc_clnt' already present!' Each rpc_client has a cl_clid which is allocated from a global ida, and a debugfs directory which is named after cl_clid. We're releasing the cl_clid before we free the debugfs directory named after it. As soon as the cl_clid is released, that value is available for another newly created client. That leaves a window where another client may attempt to create a new debugfs directory with the same name as the not-yet-deleted debugfs directory from the dying client. Symptoms are log messages like Directory 4 with parent 'rpc_clnt' already present! Fixes: 7c4310ff5642 "SUNRPC: defer slow parts of rpc_free_client() to a workqueue." Signed-off-by: J. Bruce Fields Signed-off-by: Trond Myklebust --- net/sunrpc/clnt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index c74bc402f8c7..61b21dafd7c0 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -889,6 +889,7 @@ static void rpc_free_client_work(struct work_struct *work) * here. */ rpc_clnt_debugfs_unregister(clnt); + rpc_free_clid(clnt); rpc_clnt_remove_pipedir(clnt); xprt_put(rcu_dereference_raw(clnt->cl_xprt)); @@ -910,7 +911,6 @@ rpc_free_client(struct rpc_clnt *clnt) clnt->cl_metrics = NULL; xprt_iter_destroy(&clnt->cl_xpi); put_cred(clnt->cl_cred); - rpc_free_clid(clnt); INIT_WORK(&clnt->cl_work, rpc_free_client_work); schedule_work(&clnt->cl_work); -- cgit v1.2.3 From ca4faf543a33373bed3650812d5f0cd0bd295b1a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 2 May 2020 10:37:44 -0400 Subject: SUNRPC: Move xpt_mutex into socket xpo_sendto methods It appears that the RPC/RDMA transport does not need serialization of calls to its xpo_sendto method. Move the mutex into the socket methods that still need that serialization. Tail latencies are unambiguously better with this patch applied. fio randrw 8KB 70/30 on NFSv3, smaller numbers are better: clat percentiles (usec): With xpt_mutex: r | 99.99th=[ 8848] w | 99.99th=[ 9634] Without xpt_mutex: r | 99.99th=[ 8586] w | 99.99th=[ 8979] Serializing the construction of RPC/RDMA transport headers is not really necessary at this point, because the Linux NFS server implementation never changes its credit grant on a connection. If that should change, then svc_rdma_sendto will need to serialize access to the transport's credit grant fields. Reported-by: kbuild test robot [ cel: fix uninitialized variable warning ] Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_xprt.h | 6 +++++ net/sunrpc/svc_xprt.c | 12 +++------- net/sunrpc/svcsock.c | 25 +++++++++++++++++++++ net/sunrpc/xprtrdma/svc_rdma_backchannel.c | 35 ++++++++++++++---------------- net/sunrpc/xprtrdma/svc_rdma_sendto.c | 10 ++++----- net/sunrpc/xprtsock.c | 12 ++++++++-- 6 files changed, 64 insertions(+), 36 deletions(-) (limited to 'net/sunrpc') diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h index 9e1e046de176..aca35ab5cff2 100644 --- a/include/linux/sunrpc/svc_xprt.h +++ b/include/linux/sunrpc/svc_xprt.h @@ -117,6 +117,12 @@ static inline int register_xpt_user(struct svc_xprt *xpt, struct svc_xpt_user *u return 0; } +static inline bool svc_xprt_is_dead(const struct svc_xprt *xprt) +{ + return (test_bit(XPT_DEAD, &xprt->xpt_flags) != 0) || + (test_bit(XPT_CLOSE, &xprt->xpt_flags) != 0); +} + int svc_reg_xprt_class(struct svc_xprt_class *); void svc_unreg_xprt_class(struct svc_xprt_class *); void svc_xprt_init(struct net *, struct svc_xprt_class *, struct svc_xprt *, diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 2284ff038dad..07cdbf7d5764 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -914,16 +914,10 @@ int svc_send(struct svc_rqst *rqstp) xb->page_len + xb->tail[0].iov_len; trace_svc_sendto(xb); - - /* Grab mutex to serialize outgoing data. */ - mutex_lock(&xprt->xpt_mutex); trace_svc_stats_latency(rqstp); - if (test_bit(XPT_DEAD, &xprt->xpt_flags) - || test_bit(XPT_CLOSE, &xprt->xpt_flags)) - len = -ENOTCONN; - else - len = xprt->xpt_ops->xpo_sendto(rqstp); - mutex_unlock(&xprt->xpt_mutex); + + len = xprt->xpt_ops->xpo_sendto(rqstp); + trace_svc_send(rqstp, len); svc_xprt_release(rqstp); diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 023514e392b3..3e7b6445e317 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -506,6 +506,9 @@ out_free: * svc_udp_sendto - Send out a reply on a UDP socket * @rqstp: completed svc_rqst * + * xpt_mutex ensures @rqstp's whole message is written to the socket + * without interruption. + * * Returns the number of bytes sent, or a negative errno. */ static int svc_udp_sendto(struct svc_rqst *rqstp) @@ -531,6 +534,11 @@ static int svc_udp_sendto(struct svc_rqst *rqstp) svc_set_cmsg_data(rqstp, cmh); + mutex_lock(&xprt->xpt_mutex); + + if (svc_xprt_is_dead(xprt)) + goto out_notconn; + err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, 0, &sent); xdr_free_bvec(xdr); if (err == -ECONNREFUSED) { @@ -538,9 +546,15 @@ static int svc_udp_sendto(struct svc_rqst *rqstp) err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, 0, &sent); xdr_free_bvec(xdr); } + + mutex_unlock(&xprt->xpt_mutex); if (err < 0) return err; return sent; + +out_notconn: + mutex_unlock(&xprt->xpt_mutex); + return -ENOTCONN; } static int svc_udp_has_wspace(struct svc_xprt *xprt) @@ -1063,6 +1077,9 @@ err_noclose: * svc_tcp_sendto - Send out a reply on a TCP socket * @rqstp: completed svc_rqst * + * xpt_mutex ensures @rqstp's whole message is written to the socket + * without interruption. + * * Returns the number of bytes sent, or a negative errno. */ static int svc_tcp_sendto(struct svc_rqst *rqstp) @@ -1080,12 +1097,19 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp) svc_release_skb(rqstp); + mutex_lock(&xprt->xpt_mutex); + if (svc_xprt_is_dead(xprt)) + goto out_notconn; err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, marker, &sent); xdr_free_bvec(xdr); if (err < 0 || sent != (xdr->len + sizeof(marker))) goto out_close; + mutex_unlock(&xprt->xpt_mutex); return sent; +out_notconn: + mutex_unlock(&xprt->xpt_mutex); + return -ENOTCONN; out_close: pr_notice("rpc-srv/tcp: %s: %s %d when sending %d bytes - shutting down socket\n", xprt->xpt_server->sv_name, @@ -1093,6 +1117,7 @@ out_close: (err < 0) ? err : sent, xdr->len); set_bit(XPT_CLOSE, &xprt->xpt_flags); svc_xprt_enqueue(xprt); + mutex_unlock(&xprt->xpt_mutex); return -EAGAIN; } diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c index af7eb8d202ae..d9aab4504f2c 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c +++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c @@ -210,34 +210,31 @@ drop_connection: return -ENOTCONN; } -/* Send an RPC call on the passive end of a transport - * connection. +/** + * xprt_rdma_bc_send_request - Send a reverse-direction Call + * @rqst: rpc_rqst containing Call message to be sent + * + * Return values: + * %0 if the message was sent successfully + * %ENOTCONN if the message was not sent */ -static int -xprt_rdma_bc_send_request(struct rpc_rqst *rqst) +static int xprt_rdma_bc_send_request(struct rpc_rqst *rqst) { struct svc_xprt *sxprt = rqst->rq_xprt->bc_xprt; - struct svcxprt_rdma *rdma; + struct svcxprt_rdma *rdma = + container_of(sxprt, struct svcxprt_rdma, sc_xprt); int ret; dprintk("svcrdma: sending bc call with xid: %08x\n", be32_to_cpu(rqst->rq_xid)); - mutex_lock(&sxprt->xpt_mutex); - - ret = -ENOTCONN; - rdma = container_of(sxprt, struct svcxprt_rdma, sc_xprt); - if (!test_bit(XPT_DEAD, &sxprt->xpt_flags)) { - ret = rpcrdma_bc_send_request(rdma, rqst); - if (ret == -ENOTCONN) - svc_close_xprt(sxprt); - } + if (test_bit(XPT_DEAD, &sxprt->xpt_flags)) + return -ENOTCONN; - mutex_unlock(&sxprt->xpt_mutex); - - if (ret < 0) - return ret; - return 0; + ret = rpcrdma_bc_send_request(rdma, rqst); + if (ret == -ENOTCONN) + svc_close_xprt(sxprt); + return ret; } static void diff --git a/net/sunrpc/xprtrdma/svc_rdma_sendto.c b/net/sunrpc/xprtrdma/svc_rdma_sendto.c index b6c8643867f2..38e7c3c8c4a9 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_sendto.c +++ b/net/sunrpc/xprtrdma/svc_rdma_sendto.c @@ -868,12 +868,10 @@ int svc_rdma_sendto(struct svc_rqst *rqstp) __be32 *p; int ret; - /* Create the RDMA response header. xprt->xpt_mutex, - * acquired in svc_send(), serializes RPC replies. The - * code path below that inserts the credit grant value - * into each transport header runs only inside this - * critical section. - */ + ret = -ENOTCONN; + if (svc_xprt_is_dead(xprt)) + goto err0; + ret = -ENOMEM; sctxt = svc_rdma_send_ctxt_get(rdma); if (!sctxt) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 845d0be805ec..839c49330785 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2548,8 +2548,16 @@ static int bc_sendto(struct rpc_rqst *req) return sent; } -/* - * The send routine. Borrows from svc_send +/** + * bc_send_request - Send a backchannel Call on a TCP socket + * @req: rpc_rqst containing Call message to be sent + * + * xpt_mutex ensures @rqstp's whole message is written to the socket + * without interruption. + * + * Return values: + * %0 if the message was sent successfully + * %ENOTCONN if the message was not sent */ static int bc_send_request(struct rpc_rqst *req) { -- cgit v1.2.3 From 2abfbe7e3a725380f22f572c69da41515a0dd43b Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 20 Mar 2020 14:02:49 -0400 Subject: svcrdma: Clean up the tracing for rw_ctx_init errors - De-duplicate code - Rename the tracepoint with "_err" to allow enabling via glob - Report the sg_cnt for the failing rw_ctx - Fix a dumb signage issue Signed-off-by: Chuck Lever --- include/trace/events/rpcrdma.h | 12 ++++++--- net/sunrpc/xprtrdma/svc_rdma_rw.c | 56 ++++++++++++++++++++++++--------------- 2 files changed, 43 insertions(+), 25 deletions(-) (limited to 'net/sunrpc') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index 132c3c778a43..f231975064cb 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -1583,28 +1583,32 @@ DECLARE_EVENT_CLASS(svcrdma_dma_map_class, DEFINE_SVC_DMA_EVENT(dma_map_page); DEFINE_SVC_DMA_EVENT(dma_unmap_page); -TRACE_EVENT(svcrdma_dma_map_rwctx, +TRACE_EVENT(svcrdma_dma_map_rw_err, TP_PROTO( const struct svcxprt_rdma *rdma, + unsigned int nents, int status ), - TP_ARGS(rdma, status), + TP_ARGS(rdma, nents, status), TP_STRUCT__entry( __field(int, status) + __field(unsigned int, nents) __string(device, rdma->sc_cm_id->device->name) __string(addr, rdma->sc_xprt.xpt_remotebuf) ), TP_fast_assign( __entry->status = status; + __entry->nents = nents; __assign_str(device, rdma->sc_cm_id->device->name); __assign_str(addr, rdma->sc_xprt.xpt_remotebuf); ), - TP_printk("addr=%s device=%s status=%d", - __get_str(addr), __get_str(device), __entry->status + TP_printk("addr=%s device=%s nents=%u status=%d", + __get_str(addr), __get_str(device), __entry->nents, + __entry->status ) ); diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c index 23c2d3ce0dc9..db70709e165a 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_rw.c +++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c @@ -39,7 +39,7 @@ static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc); struct svc_rdma_rw_ctxt { struct list_head rw_list; struct rdma_rw_ctx rw_ctx; - int rw_nents; + unsigned int rw_nents; struct sg_table rw_sg_table; struct scatterlist rw_first_sgl[]; }; @@ -107,6 +107,34 @@ void svc_rdma_destroy_rw_ctxts(struct svcxprt_rdma *rdma) } } +/** + * svc_rdma_rw_ctx_init - Prepare a R/W context for I/O + * @rdma: controlling transport instance + * @ctxt: R/W context to prepare + * @offset: RDMA offset + * @handle: RDMA tag/handle + * @direction: I/O direction + * + * Returns on success, the number of WQEs that will be needed + * on the workqueue, or a negative errno. + */ +static int svc_rdma_rw_ctx_init(struct svcxprt_rdma *rdma, + struct svc_rdma_rw_ctxt *ctxt, + u64 offset, u32 handle, + enum dma_data_direction direction) +{ + int ret; + + ret = rdma_rw_ctx_init(&ctxt->rw_ctx, rdma->sc_qp, rdma->sc_port_num, + ctxt->rw_sg_table.sgl, ctxt->rw_nents, + 0, offset, handle, direction); + if (unlikely(ret < 0)) { + svc_rdma_put_rw_ctxt(rdma, ctxt); + trace_svcrdma_dma_map_rw_err(rdma, ctxt->rw_nents, ret); + } + return ret; +} + /* A chunk context tracks all I/O for moving one Read or Write * chunk. This is a a set of rdma_rw's that handle data movement * for all segments of one chunk. @@ -431,12 +459,10 @@ svc_rdma_build_writes(struct svc_rdma_write_info *info, goto out_noctx; constructor(info, write_len, ctxt); - ret = rdma_rw_ctx_init(&ctxt->rw_ctx, rdma->sc_qp, - rdma->sc_port_num, ctxt->rw_sg_table.sgl, - ctxt->rw_nents, 0, seg_offset, - seg_handle, DMA_TO_DEVICE); + ret = svc_rdma_rw_ctx_init(rdma, ctxt, seg_offset, seg_handle, + DMA_TO_DEVICE); if (ret < 0) - goto out_initerr; + return -EIO; trace_svcrdma_send_wseg(seg_handle, write_len, seg_offset); @@ -462,11 +488,6 @@ out_overflow: out_noctx: dprintk("svcrdma: no R/W ctxs available\n"); return -ENOMEM; - -out_initerr: - svc_rdma_put_rw_ctxt(rdma, ctxt); - trace_svcrdma_dma_map_rwctx(rdma, ret); - return -EIO; } /* Send one of an xdr_buf's kvecs by itself. To send a Reply @@ -646,12 +667,10 @@ static int svc_rdma_build_read_segment(struct svc_rdma_read_info *info, goto out_overrun; } - ret = rdma_rw_ctx_init(&ctxt->rw_ctx, cc->cc_rdma->sc_qp, - cc->cc_rdma->sc_port_num, - ctxt->rw_sg_table.sgl, ctxt->rw_nents, - 0, offset, rkey, DMA_FROM_DEVICE); + ret = svc_rdma_rw_ctx_init(cc->cc_rdma, ctxt, offset, rkey, + DMA_FROM_DEVICE); if (ret < 0) - goto out_initerr; + return -EIO; list_add(&ctxt->rw_list, &cc->cc_rwctxts); cc->cc_sqecount += ret; @@ -664,11 +683,6 @@ out_noctx: out_overrun: dprintk("svcrdma: request overruns rq_pages\n"); return -EINVAL; - -out_initerr: - trace_svcrdma_dma_map_rwctx(cc->cc_rdma, ret); - svc_rdma_put_rw_ctxt(cc->cc_rdma, ctxt); - return -EIO; } /* Walk the segments in the Read chunk starting at @p and construct -- cgit v1.2.3 From f4e53e1ce3e56a799c47fef5f4f94cb815b52804 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 20 Mar 2020 14:23:13 -0400 Subject: svcrdma: Clean up handling of get_rw_ctx errors Clean up: Replace two dprintk call sites with a tracepoint. Signed-off-by: Chuck Lever --- include/trace/events/rpcrdma.h | 25 +++++++++++++++++++++++++ net/sunrpc/xprtrdma/svc_rdma_rw.c | 27 +++++++++++---------------- 2 files changed, 36 insertions(+), 16 deletions(-) (limited to 'net/sunrpc') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index f231975064cb..aca9d0f3d769 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -1612,6 +1612,31 @@ TRACE_EVENT(svcrdma_dma_map_rw_err, ) ); +TRACE_EVENT(svcrdma_no_rwctx_err, + TP_PROTO( + const struct svcxprt_rdma *rdma, + unsigned int num_sges + ), + + TP_ARGS(rdma, num_sges), + + TP_STRUCT__entry( + __field(unsigned int, num_sges) + __string(device, rdma->sc_cm_id->device->name) + __string(addr, rdma->sc_xprt.xpt_remotebuf) + ), + + TP_fast_assign( + __entry->num_sges = num_sges; + __assign_str(device, rdma->sc_cm_id->device->name); + __assign_str(addr, rdma->sc_xprt.xpt_remotebuf); + ), + + TP_printk("addr=%s device=%s num_sges=%d", + __get_str(addr), __get_str(device), __entry->num_sges + ) +); + TRACE_EVENT(svcrdma_send_pullup, TP_PROTO( unsigned int len diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c index db70709e165a..c2d49f607cfe 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_rw.c +++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c @@ -67,19 +67,22 @@ svc_rdma_get_rw_ctxt(struct svcxprt_rdma *rdma, unsigned int sges) ctxt = kmalloc(struct_size(ctxt, rw_first_sgl, SG_CHUNK_SIZE), GFP_KERNEL); if (!ctxt) - goto out; + goto out_noctx; INIT_LIST_HEAD(&ctxt->rw_list); } ctxt->rw_sg_table.sgl = ctxt->rw_first_sgl; if (sg_alloc_table_chained(&ctxt->rw_sg_table, sges, ctxt->rw_sg_table.sgl, - SG_CHUNK_SIZE)) { - kfree(ctxt); - ctxt = NULL; - } -out: + SG_CHUNK_SIZE)) + goto out_free; return ctxt; + +out_free: + kfree(ctxt); +out_noctx: + trace_svcrdma_no_rwctx_err(rdma, sges); + return NULL; } static void svc_rdma_put_rw_ctxt(struct svcxprt_rdma *rdma, @@ -456,7 +459,7 @@ svc_rdma_build_writes(struct svc_rdma_write_info *info, ctxt = svc_rdma_get_rw_ctxt(rdma, (write_len >> PAGE_SHIFT) + 2); if (!ctxt) - goto out_noctx; + return -ENOMEM; constructor(info, write_len, ctxt); ret = svc_rdma_rw_ctx_init(rdma, ctxt, seg_offset, seg_handle, @@ -484,10 +487,6 @@ out_overflow: dprintk("svcrdma: inadequate space in Write chunk (%u)\n", info->wi_nsegs); return -E2BIG; - -out_noctx: - dprintk("svcrdma: no R/W ctxs available\n"); - return -ENOMEM; } /* Send one of an xdr_buf's kvecs by itself. To send a Reply @@ -637,7 +636,7 @@ static int svc_rdma_build_read_segment(struct svc_rdma_read_info *info, sge_no = PAGE_ALIGN(info->ri_pageoff + len) >> PAGE_SHIFT; ctxt = svc_rdma_get_rw_ctxt(cc->cc_rdma, sge_no); if (!ctxt) - goto out_noctx; + return -ENOMEM; ctxt->rw_nents = sge_no; sg = ctxt->rw_sg_table.sgl; @@ -676,10 +675,6 @@ static int svc_rdma_build_read_segment(struct svc_rdma_read_info *info, cc->cc_sqecount += ret; return 0; -out_noctx: - dprintk("svcrdma: no R/W ctxs available\n"); - return -ENOMEM; - out_overrun: dprintk("svcrdma: request overruns rq_pages\n"); return -EINVAL; -- cgit v1.2.3 From 9d20063892624a98bab093725fae6999cfcb328e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 20 Mar 2020 14:33:05 -0400 Subject: svcrdma: Trace page overruns when constructing RDMA Reads Clean up: Replace a dprintk call site with a tracepoint. Signed-off-by: Chuck Lever --- include/trace/events/rpcrdma.h | 28 ++++++++++++++++++++++++++++ net/sunrpc/xprtrdma/svc_rdma_rw.c | 2 +- 2 files changed, 29 insertions(+), 1 deletion(-) (limited to 'net/sunrpc') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index aca9d0f3d769..d6da6b8d521d 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -1637,6 +1637,34 @@ TRACE_EVENT(svcrdma_no_rwctx_err, ) ); +TRACE_EVENT(svcrdma_page_overrun_err, + TP_PROTO( + const struct svcxprt_rdma *rdma, + const struct svc_rqst *rqst, + unsigned int pageno + ), + + TP_ARGS(rdma, rqst, pageno), + + TP_STRUCT__entry( + __field(unsigned int, pageno) + __field(u32, xid) + __string(device, rdma->sc_cm_id->device->name) + __string(addr, rdma->sc_xprt.xpt_remotebuf) + ), + + TP_fast_assign( + __entry->pageno = pageno; + __entry->xid = __be32_to_cpu(rqst->rq_xid); + __assign_str(device, rdma->sc_cm_id->device->name); + __assign_str(addr, rdma->sc_xprt.xpt_remotebuf); + ), + + TP_printk("addr=%s device=%s xid=0x%08x pageno=%u", __get_str(addr), + __get_str(device), __entry->xid, __entry->pageno + ) +); + TRACE_EVENT(svcrdma_send_pullup, TP_PROTO( unsigned int len diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c index c2d49f607cfe..17098a11d2ad 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_rw.c +++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c @@ -676,7 +676,7 @@ static int svc_rdma_build_read_segment(struct svc_rdma_read_info *info, return 0; out_overrun: - dprintk("svcrdma: request overruns rq_pages\n"); + trace_svcrdma_page_overrun_err(cc->cc_rdma, rqstp, info->ri_pageno); return -EINVAL; } -- cgit v1.2.3 From dbc17acd5d42be457c7311c141f993d9ba5be014 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 20 Mar 2020 14:36:44 -0400 Subject: svcrdma: trace undersized Write chunks Clean up: Replace a dprintk call site. This is the last remaining dprintk call site in svc_rdma_rw.c, so remove dprintk infrastructure as well. Signed-off-by: Chuck Lever --- include/trace/events/rpcrdma.h | 32 ++++++++++++++++++++++++++++++++ net/sunrpc/xprtrdma/svc_rdma_rw.c | 7 ++----- 2 files changed, 34 insertions(+), 5 deletions(-) (limited to 'net/sunrpc') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index d6da6b8d521d..c046b198072a 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -1665,6 +1665,38 @@ TRACE_EVENT(svcrdma_page_overrun_err, ) ); +TRACE_EVENT(svcrdma_small_wrch_err, + TP_PROTO( + const struct svcxprt_rdma *rdma, + unsigned int remaining, + unsigned int seg_no, + unsigned int num_segs + ), + + TP_ARGS(rdma, remaining, seg_no, num_segs), + + TP_STRUCT__entry( + __field(unsigned int, remaining) + __field(unsigned int, seg_no) + __field(unsigned int, num_segs) + __string(device, rdma->sc_cm_id->device->name) + __string(addr, rdma->sc_xprt.xpt_remotebuf) + ), + + TP_fast_assign( + __entry->remaining = remaining; + __entry->seg_no = seg_no; + __entry->num_segs = num_segs; + __assign_str(device, rdma->sc_cm_id->device->name); + __assign_str(addr, rdma->sc_xprt.xpt_remotebuf); + ), + + TP_printk("addr=%s device=%s remaining=%u seg_no=%u num_segs=%u", + __get_str(addr), __get_str(device), __entry->remaining, + __entry->seg_no, __entry->num_segs + ) +); + TRACE_EVENT(svcrdma_send_pullup, TP_PROTO( unsigned int len diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c index 17098a11d2ad..5eb35309ecef 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_rw.c +++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c @@ -9,13 +9,10 @@ #include #include -#include #include "xprt_rdma.h" #include -#define RPCDBG_FACILITY RPCDBG_SVCXPRT - static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc); static void svc_rdma_wc_read_done(struct ib_cq *cq, struct ib_wc *wc); @@ -484,8 +481,8 @@ svc_rdma_build_writes(struct svc_rdma_write_info *info, return 0; out_overflow: - dprintk("svcrdma: inadequate space in Write chunk (%u)\n", - info->wi_nsegs); + trace_svcrdma_small_wrch_err(rdma, remaining, info->wi_seg_no, + info->wi_nsegs); return -E2BIG; } -- cgit v1.2.3 From ea740bd5f58e2912e74f401fd01a9d6aa985ca05 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 20 Mar 2020 17:32:41 -0400 Subject: svcrdma: Fix backchannel return code Way back when I was writing the RPC/RDMA server-side backchannel code, I misread the TCP backchannel reply handler logic. When svc_tcp_recvfrom() successfully receives a backchannel reply, it does not return -EAGAIN. It sets XPT_DATA and returns zero. Update svc_rdma_recvfrom() to return zero. Here, XPT_DATA doesn't need to be set again: it is set whenever a new message is received, behind a spin lock in a single threaded context. Also, if handling the cb reply is not successful, the message is simply dropped. There's no special message framing to deal with as there is in the TCP case. Now that the handle_bc_reply() return value is ignored, I've removed the dprintk call sites in the error exit of handle_bc_reply() in favor of trace points in other areas that already report the error cases. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc_rdma.h | 5 ++-- net/sunrpc/xprtrdma/svc_rdma_backchannel.c | 38 ++++++++---------------------- net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 11 ++++----- 3 files changed, 17 insertions(+), 37 deletions(-) (limited to 'net/sunrpc') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index cbcfbd0521e3..8518c3f37e56 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -160,9 +160,8 @@ struct svc_rdma_send_ctxt { }; /* svc_rdma_backchannel.c */ -extern int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, - __be32 *rdma_resp, - struct xdr_buf *rcvbuf); +extern void svc_rdma_handle_bc_reply(struct svc_rqst *rqstp, + struct svc_rdma_recv_ctxt *rctxt); /* svc_rdma_recvfrom.c */ extern void svc_rdma_recv_ctxts_destroy(struct svcxprt_rdma *rdma); diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c index d9aab4504f2c..b174f3b109a5 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c +++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c @@ -15,26 +15,25 @@ #undef SVCRDMA_BACKCHANNEL_DEBUG /** - * svc_rdma_handle_bc_reply - Process incoming backchannel reply - * @xprt: controlling backchannel transport - * @rdma_resp: pointer to incoming transport header - * @rcvbuf: XDR buffer into which to decode the reply + * svc_rdma_handle_bc_reply - Process incoming backchannel Reply + * @rqstp: resources for handling the Reply + * @rctxt: Received message * - * Returns: - * %0 if @rcvbuf is filled in, xprt_complete_rqst called, - * %-EAGAIN if server should call ->recvfrom again. */ -int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, __be32 *rdma_resp, - struct xdr_buf *rcvbuf) +void svc_rdma_handle_bc_reply(struct svc_rqst *rqstp, + struct svc_rdma_recv_ctxt *rctxt) { + struct svc_xprt *sxprt = rqstp->rq_xprt; + struct rpc_xprt *xprt = sxprt->xpt_bc_xprt; struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); + struct xdr_buf *rcvbuf = &rqstp->rq_arg; struct kvec *dst, *src = &rcvbuf->head[0]; + __be32 *rdma_resp = rctxt->rc_recv_buf; struct rpc_rqst *req; u32 credits; size_t len; __be32 xid; __be32 *p; - int ret; p = (__be32 *)src->iov_base; len = src->iov_len; @@ -49,14 +48,10 @@ int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, __be32 *rdma_resp, __func__, (int)len, p); #endif - ret = -EAGAIN; - if (src->iov_len < 24) - goto out_shortreply; - spin_lock(&xprt->queue_lock); req = xprt_lookup_rqst(xprt, xid); if (!req) - goto out_notfound; + goto out_unlock; dst = &req->rq_private_buf.head[0]; memcpy(&req->rq_private_buf, &req->rq_rcv_buf, sizeof(struct xdr_buf)); @@ -77,25 +72,12 @@ int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, __be32 *rdma_resp, spin_unlock(&xprt->transport_lock); spin_lock(&xprt->queue_lock); - ret = 0; xprt_complete_rqst(req->rq_task, rcvbuf->len); xprt_unpin_rqst(req); rcvbuf->len = 0; out_unlock: spin_unlock(&xprt->queue_lock); -out: - return ret; - -out_shortreply: - dprintk("svcrdma: short bc reply: xprt=%p, len=%zu\n", - xprt, src->iov_len); - goto out; - -out_notfound: - dprintk("svcrdma: unrecognized bc reply: xprt=%p, xid=%08x\n", - xprt, be32_to_cpu(xid)); - goto out_unlock; } /* Send a backwards direction RPC call. diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index efa5fcb5793f..eee7c6478b30 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -878,12 +878,9 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp) goto out_drop; rqstp->rq_xprt_hlen = ret; - if (svc_rdma_is_backchannel_reply(xprt, p)) { - ret = svc_rdma_handle_bc_reply(xprt->xpt_bc_xprt, p, - &rqstp->rq_arg); - svc_rdma_recv_ctxt_put(rdma_xprt, ctxt); - return ret; - } + if (svc_rdma_is_backchannel_reply(xprt, p)) + goto out_backchannel; + svc_rdma_get_inv_rkey(rdma_xprt, ctxt); p += rpcrdma_fixed_maxsz; @@ -913,6 +910,8 @@ out_postfail: svc_rdma_recv_ctxt_put(rdma_xprt, ctxt); return ret; +out_backchannel: + svc_rdma_handle_bc_reply(rqstp, ctxt); out_drop: svc_rdma_recv_ctxt_put(rdma_xprt, ctxt); return 0; -- cgit v1.2.3 From f5046b8f43d180d6f4c9129a750b98f9a503de2d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 20 Mar 2020 15:11:56 -0400 Subject: svcrdma: Remove backchannel dprintk call sites Clean up. Signed-off-by: Chuck Lever --- net/sunrpc/xprtrdma/svc_rdma_backchannel.c | 48 ++++-------------------------- 1 file changed, 5 insertions(+), 43 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c index b174f3b109a5..1ee73f7cf931 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c +++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c @@ -10,10 +10,6 @@ #include "xprt_rdma.h" #include -#define RPCDBG_FACILITY RPCDBG_SVCXPRT - -#undef SVCRDMA_BACKCHANNEL_DEBUG - /** * svc_rdma_handle_bc_reply - Process incoming backchannel Reply * @rqstp: resources for handling the Reply @@ -31,33 +27,17 @@ void svc_rdma_handle_bc_reply(struct svc_rqst *rqstp, __be32 *rdma_resp = rctxt->rc_recv_buf; struct rpc_rqst *req; u32 credits; - size_t len; - __be32 xid; - __be32 *p; - - p = (__be32 *)src->iov_base; - len = src->iov_len; - xid = *rdma_resp; - -#ifdef SVCRDMA_BACKCHANNEL_DEBUG - pr_info("%s: xid=%08x, length=%zu\n", - __func__, be32_to_cpu(xid), len); - pr_info("%s: RPC/RDMA: %*ph\n", - __func__, (int)RPCRDMA_HDRLEN_MIN, rdma_resp); - pr_info("%s: RPC: %*ph\n", - __func__, (int)len, p); -#endif spin_lock(&xprt->queue_lock); - req = xprt_lookup_rqst(xprt, xid); + req = xprt_lookup_rqst(xprt, *rdma_resp); if (!req) goto out_unlock; dst = &req->rq_private_buf.head[0]; memcpy(&req->rq_private_buf, &req->rq_rcv_buf, sizeof(struct xdr_buf)); - if (dst->iov_len < len) + if (dst->iov_len < src->iov_len) goto out_unlock; - memcpy(dst->iov_base, p, len); + memcpy(dst->iov_base, src->iov_base, src->iov_len); xprt_pin_rqst(req); spin_unlock(&xprt->queue_lock); @@ -66,7 +46,6 @@ void svc_rdma_handle_bc_reply(struct svc_rqst *rqstp, credits = 1; /* don't deadlock */ else if (credits > r_xprt->rx_buf.rb_bc_max_requests) credits = r_xprt->rx_buf.rb_bc_max_requests; - spin_lock(&xprt->transport_lock); xprt->cwnd = credits << RPC_CWNDSHIFT; spin_unlock(&xprt->transport_lock); @@ -174,10 +153,6 @@ rpcrdma_bc_send_request(struct svcxprt_rdma *rdma, struct rpc_rqst *rqst) *p++ = xdr_zero; *p = xdr_zero; -#ifdef SVCRDMA_BACKCHANNEL_DEBUG - pr_info("%s: %*ph\n", __func__, 64, rqst->rq_buffer); -#endif - rqst->rq_xtime = ktime_get(); rc = svc_rdma_bc_sendto(rdma, rqst, ctxt); if (rc) @@ -188,7 +163,6 @@ put_ctxt: svc_rdma_send_ctxt_put(rdma, ctxt); drop_connection: - dprintk("svcrdma: failed to send bc call\n"); return -ENOTCONN; } @@ -207,9 +181,6 @@ static int xprt_rdma_bc_send_request(struct rpc_rqst *rqst) container_of(sxprt, struct svcxprt_rdma, sc_xprt); int ret; - dprintk("svcrdma: sending bc call with xid: %08x\n", - be32_to_cpu(rqst->rq_xid)); - if (test_bit(XPT_DEAD, &sxprt->xpt_flags)) return -ENOTCONN; @@ -222,8 +193,6 @@ static int xprt_rdma_bc_send_request(struct rpc_rqst *rqst) static void xprt_rdma_bc_close(struct rpc_xprt *xprt) { - dprintk("svcrdma: %s: xprt %p\n", __func__, xprt); - xprt_disconnect_done(xprt); xprt->cwnd = RPC_CWNDSHIFT; } @@ -231,8 +200,6 @@ xprt_rdma_bc_close(struct rpc_xprt *xprt) static void xprt_rdma_bc_put(struct rpc_xprt *xprt) { - dprintk("svcrdma: %s: xprt %p\n", __func__, xprt); - xprt_rdma_free_addresses(xprt); xprt_free(xprt); } @@ -267,19 +234,14 @@ xprt_setup_rdma_bc(struct xprt_create *args) struct rpc_xprt *xprt; struct rpcrdma_xprt *new_xprt; - if (args->addrlen > sizeof(xprt->addr)) { - dprintk("RPC: %s: address too large\n", __func__); + if (args->addrlen > sizeof(xprt->addr)) return ERR_PTR(-EBADF); - } xprt = xprt_alloc(args->net, sizeof(*new_xprt), RPCRDMA_MAX_BC_REQUESTS, RPCRDMA_MAX_BC_REQUESTS); - if (!xprt) { - dprintk("RPC: %s: couldn't allocate rpc_xprt\n", - __func__); + if (!xprt) return ERR_PTR(-ENOMEM); - } xprt->timeout = &xprt_rdma_bc_timeout; xprt_set_bound(xprt); -- cgit v1.2.3 From 27ce6294445aebe314ff3baefea7c720b50fcc8a Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 23 Mar 2020 16:33:16 -0400 Subject: svcrdma: Rename tracepoints that record header decoding errors Clean up: Use a consistent naming convention so that these trace points can be enabled quickly via a glob. Signed-off-by: Chuck Lever --- include/trace/events/rpcrdma.h | 5 +++-- net/sunrpc/xprtrdma/svc_rdma_recvfrom.c | 10 +++++----- 2 files changed, 8 insertions(+), 7 deletions(-) (limited to 'net/sunrpc') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index c046b198072a..53b24c8c7860 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -1355,7 +1355,7 @@ TRACE_EVENT(svcrdma_decode_rqst, show_rpcrdma_proc(__entry->proc), __entry->hdrlen) ); -TRACE_EVENT(svcrdma_decode_short, +TRACE_EVENT(svcrdma_decode_short_err, TP_PROTO( unsigned int hdrlen ), @@ -1399,7 +1399,8 @@ DECLARE_EVENT_CLASS(svcrdma_badreq_event, ); #define DEFINE_BADREQ_EVENT(name) \ - DEFINE_EVENT(svcrdma_badreq_event, svcrdma_decode_##name,\ + DEFINE_EVENT(svcrdma_badreq_event, \ + svcrdma_decode_##name##_err, \ TP_PROTO( \ __be32 *p \ ), \ diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c index eee7c6478b30..e426fedb9524 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c +++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c @@ -665,23 +665,23 @@ static int svc_rdma_xdr_decode_req(struct xdr_buf *rq_arg, return hdr_len; out_short: - trace_svcrdma_decode_short(rq_arg->len); + trace_svcrdma_decode_short_err(rq_arg->len); return -EINVAL; out_version: - trace_svcrdma_decode_badvers(rdma_argp); + trace_svcrdma_decode_badvers_err(rdma_argp); return -EPROTONOSUPPORT; out_drop: - trace_svcrdma_decode_drop(rdma_argp); + trace_svcrdma_decode_drop_err(rdma_argp); return 0; out_proc: - trace_svcrdma_decode_badproc(rdma_argp); + trace_svcrdma_decode_badproc_err(rdma_argp); return -EINVAL; out_inval: - trace_svcrdma_decode_parse(rdma_argp); + trace_svcrdma_decode_parse_err(rdma_argp); return -EINVAL; } -- cgit v1.2.3 From decc13f7eb258b8c5c564ec1f11a24f14af275f8 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 2 May 2020 10:42:47 -0400 Subject: svcrdma: Displayed remote IP address should match stored address Clean up: After commit 1e091c3bbf51 ("svcrdma: Ignore source port when computing DRC hash"), the IP address stored in xpt_remote always has a port number of zero. Thus, there's no need to display the port number when displaying the IP address of a remote NFS/RDMA client. Signed-off-by: Chuck Lever --- net/sunrpc/xprtrdma/svc_rdma_transport.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index ea54785db4f8..0a1125277a48 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -211,7 +211,12 @@ static void handle_connect_req(struct rdma_cm_id *new_cma_id, newxprt->sc_ord = param->initiator_depth; sa = (struct sockaddr *)&newxprt->sc_cm_id->route.addr.dst_addr; - svc_xprt_set_remote(&newxprt->sc_xprt, sa, svc_addr_len(sa)); + newxprt->sc_xprt.xpt_remotelen = svc_addr_len(sa); + memcpy(&newxprt->sc_xprt.xpt_remote, sa, + newxprt->sc_xprt.xpt_remotelen); + snprintf(newxprt->sc_xprt.xpt_remotebuf, + sizeof(newxprt->sc_xprt.xpt_remotebuf) - 1, "%pISc", sa); + /* The remote port is arbitrary and not under the control of the * client ULP. Set it to a fixed value so that the DRC continues * to be effective after a reconnect. -- cgit v1.2.3 From e979a173a0b8dc1a41bc3194e34b92d79c049ad3 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Thu, 30 Apr 2020 15:25:01 -0400 Subject: svcrdma: Add tracepoints to report ->xpo_accept failures Failure to accept a connection is typically due to a problem specific to a transport type. Also, ->xpo_accept returns NULL on error rather than reporting a specific problem. So, add failure-specific tracepoints in svc_rdma_accept(). Signed-off-by: Chuck Lever --- include/trace/events/rpcrdma.h | 38 +++++++++++++++++++++++++++++++- net/sunrpc/xprtrdma/svc_rdma_transport.c | 27 +++++++++-------------- 2 files changed, 48 insertions(+), 17 deletions(-) (limited to 'net/sunrpc') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index 53b24c8c7860..79ef2ab7743c 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -1309,9 +1309,45 @@ DECLARE_EVENT_CLASS(svcrdma_xprt_event, TP_ARGS(xprt)) DEFINE_XPRT_EVENT(accept); -DEFINE_XPRT_EVENT(fail); DEFINE_XPRT_EVENT(free); +DECLARE_EVENT_CLASS(svcrdma_accept_class, + TP_PROTO( + const struct svcxprt_rdma *rdma, + long status + ), + + TP_ARGS(rdma, status), + + TP_STRUCT__entry( + __field(long, status) + __string(addr, rdma->sc_xprt.xpt_remotebuf) + ), + + TP_fast_assign( + __entry->status = status; + __assign_str(addr, rdma->sc_xprt.xpt_remotebuf); + ), + + TP_printk("addr=%s status=%ld", + __get_str(addr), __entry->status + ) +); + +#define DEFINE_ACCEPT_EVENT(name) \ + DEFINE_EVENT(svcrdma_accept_class, svcrdma_##name##_err, \ + TP_PROTO( \ + const struct svcxprt_rdma *rdma, \ + long status \ + ), \ + TP_ARGS(rdma, status)) + +DEFINE_ACCEPT_EVENT(pd); +DEFINE_ACCEPT_EVENT(qp); +DEFINE_ACCEPT_EVENT(fabric); +DEFINE_ACCEPT_EVENT(initdepth); +DEFINE_ACCEPT_EVENT(accept); + TRACE_DEFINE_ENUM(RDMA_MSG); TRACE_DEFINE_ENUM(RDMA_NOMSG); TRACE_DEFINE_ENUM(RDMA_MSGP); diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index 0a1125277a48..f3b5ad2bec2f 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -410,9 +410,6 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) if (!newxprt) return NULL; - dprintk("svcrdma: newxprt from accept queue = %p, cm_id=%p\n", - newxprt, newxprt->sc_cm_id); - dev = newxprt->sc_cm_id->device; newxprt->sc_port_num = newxprt->sc_cm_id->port_num; @@ -448,21 +445,17 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) newxprt->sc_pd = ib_alloc_pd(dev, 0); if (IS_ERR(newxprt->sc_pd)) { - dprintk("svcrdma: error creating PD for connect request\n"); + trace_svcrdma_pd_err(newxprt, PTR_ERR(newxprt->sc_pd)); goto errout; } newxprt->sc_sq_cq = ib_alloc_cq_any(dev, newxprt, newxprt->sc_sq_depth, IB_POLL_WORKQUEUE); - if (IS_ERR(newxprt->sc_sq_cq)) { - dprintk("svcrdma: error creating SQ CQ for connect request\n"); + if (IS_ERR(newxprt->sc_sq_cq)) goto errout; - } newxprt->sc_rq_cq = ib_alloc_cq_any(dev, newxprt, rq_depth, IB_POLL_WORKQUEUE); - if (IS_ERR(newxprt->sc_rq_cq)) { - dprintk("svcrdma: error creating RQ CQ for connect request\n"); + if (IS_ERR(newxprt->sc_rq_cq)) goto errout; - } memset(&qp_attr, 0, sizeof qp_attr); qp_attr.event_handler = qp_event_handler; @@ -486,7 +479,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) ret = rdma_create_qp(newxprt->sc_cm_id, newxprt->sc_pd, &qp_attr); if (ret) { - dprintk("svcrdma: failed to create QP, ret=%d\n", ret); + trace_svcrdma_qp_err(newxprt, ret); goto errout; } newxprt->sc_qp = newxprt->sc_cm_id->qp; @@ -494,8 +487,10 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) if (!(dev->attrs.device_cap_flags & IB_DEVICE_MEM_MGT_EXTENSIONS)) newxprt->sc_snd_w_inv = false; if (!rdma_protocol_iwarp(dev, newxprt->sc_port_num) && - !rdma_ib_or_roce(dev, newxprt->sc_port_num)) + !rdma_ib_or_roce(dev, newxprt->sc_port_num)) { + trace_svcrdma_fabric_err(newxprt, -EINVAL); goto errout; + } if (!svc_rdma_post_recvs(newxprt)) goto errout; @@ -517,15 +512,17 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) conn_param.initiator_depth = min_t(int, newxprt->sc_ord, dev->attrs.max_qp_init_rd_atom); if (!conn_param.initiator_depth) { - dprintk("svcrdma: invalid ORD setting\n"); ret = -EINVAL; + trace_svcrdma_initdepth_err(newxprt, ret); goto errout; } conn_param.private_data = &pmsg; conn_param.private_data_len = sizeof(pmsg); ret = rdma_accept(newxprt->sc_cm_id, &conn_param); - if (ret) + if (ret) { + trace_svcrdma_accept_err(newxprt, ret); goto errout; + } #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) dprintk("svcrdma: new connection %p accepted:\n", newxprt); @@ -544,8 +541,6 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) return &newxprt->sc_xprt; errout: - dprintk("svcrdma: failure accepting new connection rc=%d.\n", ret); - trace_svcrdma_xprt_fail(&newxprt->sc_xprt); /* Take a reference in case the DTO handler runs */ svc_xprt_get(&newxprt->sc_xprt); if (newxprt->sc_qp && !IS_ERR(newxprt->sc_qp)) -- cgit v1.2.3 From 4b8f380e46e4d3f8e9f6f9545fe2b78b872b6070 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 28 Apr 2020 17:13:32 -0400 Subject: SUNRPC: Tracepoint to record errors in svc_xpo_create() Capture transport creation failures. Signed-off-by: Chuck Lever --- include/trace/events/sunrpc.h | 29 +++++++++++++++++++++++++++++ net/sunrpc/svc_xprt.c | 7 ++++++- 2 files changed, 35 insertions(+), 1 deletion(-) (limited to 'net/sunrpc') diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 53f2461cf552..f3296ed2b753 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -1167,6 +1167,35 @@ DEFINE_EVENT(svc_rqst_status, svc_send, { (1UL << XPT_KILL_TEMP), "XPT_KILL_TEMP"}, \ { (1UL << XPT_CONG_CTRL), "XPT_CONG_CTRL"}) +TRACE_EVENT(svc_xprt_create_err, + TP_PROTO( + const char *program, + const char *protocol, + struct sockaddr *sap, + const struct svc_xprt *xprt + ), + + TP_ARGS(program, protocol, sap, xprt), + + TP_STRUCT__entry( + __field(long, error) + __string(program, program) + __string(protocol, protocol) + __array(unsigned char, addr, sizeof(struct sockaddr_in6)) + ), + + TP_fast_assign( + __entry->error = PTR_ERR(xprt); + __assign_str(program, program); + __assign_str(protocol, protocol); + memcpy(__entry->addr, sap, sizeof(__entry->addr)); + ), + + TP_printk("addr=%pISpc program=%s protocol=%s error=%ld", + __entry->addr, __get_str(program), __get_str(protocol), + __entry->error) +); + TRACE_EVENT(svc_xprt_do_enqueue, TP_PROTO(struct svc_xprt *xprt, struct svc_rqst *rqst), diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 07cdbf7d5764..f89e04210a48 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -206,6 +206,7 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl, .sin6_port = htons(port), }; #endif + struct svc_xprt *xprt; struct sockaddr *sap; size_t len; @@ -224,7 +225,11 @@ static struct svc_xprt *__svc_xpo_create(struct svc_xprt_class *xcl, return ERR_PTR(-EAFNOSUPPORT); } - return xcl->xcl_ops->xpo_create(serv, net, sap, len, flags); + xprt = xcl->xcl_ops->xpo_create(serv, net, sap, len, flags); + if (IS_ERR(xprt)) + trace_svc_xprt_create_err(serv->sv_program->pg_name, + xcl->xcl_name, sap, xprt); + return xprt; } /* -- cgit v1.2.3 From 11bbb0f76e995cb617f582e7a4ec6cb8f6daf910 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 17 Mar 2020 17:41:43 -0400 Subject: SUNRPC: Trace a few more generic svc_xprt events In lieu of dprintks or tracepoints in each individual transport implementation, introduce tracepoints in the generic part of the RPC layer. These typically fire for connection lifetime events, so shouldn't contribute a lot of noise. Signed-off-by: Chuck Lever --- include/trace/events/rpcrdma.h | 32 -------------------------- include/trace/events/sunrpc.h | 39 +++++++++++++++++++++++++++++--- net/sunrpc/svc_xprt.c | 22 ++++-------------- net/sunrpc/svcsock.c | 12 ---------- net/sunrpc/xprtrdma/svc_rdma_transport.c | 21 ++++------------- 5 files changed, 45 insertions(+), 81 deletions(-) (limited to 'net/sunrpc') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index 79ef2ab7743c..bdcde7d33f14 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -1279,38 +1279,6 @@ TRACE_EVENT(xprtrdma_leaked_rep, ** Server-side RPC/RDMA events **/ -DECLARE_EVENT_CLASS(svcrdma_xprt_event, - TP_PROTO( - const struct svc_xprt *xprt - ), - - TP_ARGS(xprt), - - TP_STRUCT__entry( - __field(const void *, xprt) - __string(addr, xprt->xpt_remotebuf) - ), - - TP_fast_assign( - __entry->xprt = xprt; - __assign_str(addr, xprt->xpt_remotebuf); - ), - - TP_printk("xprt=%p addr=%s", - __entry->xprt, __get_str(addr) - ) -); - -#define DEFINE_XPRT_EVENT(name) \ - DEFINE_EVENT(svcrdma_xprt_event, svcrdma_xprt_##name, \ - TP_PROTO( \ - const struct svc_xprt *xprt \ - ), \ - TP_ARGS(xprt)) - -DEFINE_XPRT_EVENT(accept); -DEFINE_XPRT_EVENT(free); - DECLARE_EVENT_CLASS(svcrdma_accept_class, TP_PROTO( const struct svcxprt_rdma *rdma, diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index f3296ed2b753..d40ec8f5c220 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -1236,9 +1236,42 @@ DECLARE_EVENT_CLASS(svc_xprt_event, show_svc_xprt_flags(__entry->flags)) ); -DEFINE_EVENT(svc_xprt_event, svc_xprt_no_write_space, - TP_PROTO(struct svc_xprt *xprt), - TP_ARGS(xprt)); +#define DEFINE_SVC_XPRT_EVENT(name) \ + DEFINE_EVENT(svc_xprt_event, svc_xprt_##name, \ + TP_PROTO( \ + struct svc_xprt *xprt \ + ), \ + TP_ARGS(xprt)) + +DEFINE_SVC_XPRT_EVENT(no_write_space); +DEFINE_SVC_XPRT_EVENT(close); +DEFINE_SVC_XPRT_EVENT(detach); +DEFINE_SVC_XPRT_EVENT(free); + +TRACE_EVENT(svc_xprt_accept, + TP_PROTO( + const struct svc_xprt *xprt, + const char *service + ), + + TP_ARGS(xprt, service), + + TP_STRUCT__entry( + __string(addr, xprt->xpt_remotebuf) + __string(protocol, xprt->xpt_class->xcl_name) + __string(service, service) + ), + + TP_fast_assign( + __assign_str(addr, xprt->xpt_remotebuf); + __assign_str(protocol, xprt->xpt_class->xcl_name) + __assign_str(service, service); + ), + + TP_printk("addr=%s protocol=%s service=%s", + __get_str(addr), __get_str(protocol), __get_str(service) + ) +); TRACE_EVENT(svc_xprt_dequeue, TP_PROTO(struct svc_rqst *rqst), diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index f89e04210a48..0a546ef02dde 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -153,6 +153,7 @@ static void svc_xprt_free(struct kref *kref) xprt_put(xprt->xpt_bc_xprt); if (xprt->xpt_bc_xps) xprt_switch_put(xprt->xpt_bc_xps); + trace_svc_xprt_free(xprt); xprt->xpt_ops->xpo_free(xprt); module_put(owner); } @@ -309,15 +310,11 @@ int svc_create_xprt(struct svc_serv *serv, const char *xprt_name, { int err; - dprintk("svc: creating transport %s[%d]\n", xprt_name, port); err = _svc_create_xprt(serv, xprt_name, net, family, port, flags, cred); if (err == -EPROTONOSUPPORT) { request_module("svc%s", xprt_name); err = _svc_create_xprt(serv, xprt_name, net, family, port, flags, cred); } - if (err < 0) - dprintk("svc: transport %s not found, err %d\n", - xprt_name, -err); return err; } EXPORT_SYMBOL_GPL(svc_create_xprt); @@ -785,7 +782,6 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt) int len = 0; if (test_bit(XPT_CLOSE, &xprt->xpt_flags)) { - dprintk("svc_recv: found XPT_CLOSE\n"); if (test_and_clear_bit(XPT_KILL_TEMP, &xprt->xpt_flags)) xprt->xpt_ops->xpo_kill_temp_xprt(xprt); svc_delete_xprt(xprt); @@ -804,6 +800,7 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt) if (newxpt) { newxpt->xpt_cred = get_cred(xprt->xpt_cred); svc_add_new_temp_xprt(serv, newxpt); + trace_svc_xprt_accept(newxpt, serv->sv_name); } else module_put(xprt->xpt_class->xcl_owner); } else if (svc_xprt_reserve_slot(rqstp, xprt)) { @@ -840,14 +837,6 @@ int svc_recv(struct svc_rqst *rqstp, long timeout) struct svc_serv *serv = rqstp->rq_server; int len, err; - dprintk("svc: server %p waiting for data (to = %ld)\n", - rqstp, timeout); - - if (rqstp->rq_xprt) - printk(KERN_ERR - "svc_recv: service %p, transport not NULL!\n", - rqstp); - err = svc_alloc_arg(rqstp); if (err) goto out; @@ -895,7 +884,6 @@ EXPORT_SYMBOL_GPL(svc_recv); void svc_drop(struct svc_rqst *rqstp) { trace_svc_drop(rqstp); - dprintk("svc: xprt %p dropped request\n", rqstp->rq_xprt); svc_xprt_release(rqstp); } EXPORT_SYMBOL_GPL(svc_drop); @@ -1030,11 +1018,10 @@ static void svc_delete_xprt(struct svc_xprt *xprt) struct svc_serv *serv = xprt->xpt_server; struct svc_deferred_req *dr; - /* Only do this once */ if (test_and_set_bit(XPT_DEAD, &xprt->xpt_flags)) - BUG(); + return; - dprintk("svc: svc_delete_xprt(%p)\n", xprt); + trace_svc_xprt_detach(xprt); xprt->xpt_ops->xpo_detach(xprt); if (xprt->xpt_bc_xprt) xprt->xpt_bc_xprt->ops->close(xprt->xpt_bc_xprt); @@ -1055,6 +1042,7 @@ static void svc_delete_xprt(struct svc_xprt *xprt) void svc_close_xprt(struct svc_xprt *xprt) { + trace_svc_xprt_close(xprt); set_bit(XPT_CLOSE, &xprt->xpt_flags); if (test_and_set_bit(XPT_BUSY, &xprt->xpt_flags)) /* someone else will have to effect the close */ diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 3e7b6445e317..cf4bd198c19d 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -727,7 +727,6 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt) int err, slen; RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]); - dprintk("svc: tcp_accept %p sock %p\n", svsk, sock); if (!sock) return NULL; @@ -1363,11 +1362,6 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv, int newlen; int family; int val; - RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]); - - dprintk("svc: svc_create_socket(%s, %d, %s)\n", - serv->sv_program->pg_name, protocol, - __svc_print_addr(sin, buf, sizeof(buf))); if (protocol != IPPROTO_UDP && protocol != IPPROTO_TCP) { printk(KERN_WARNING "svc: only UDP and TCP " @@ -1427,7 +1421,6 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv, svc_xprt_set_local(&svsk->sk_xprt, newsin, newlen); return (struct svc_xprt *)svsk; bummer: - dprintk("svc: svc_create_socket error = %d\n", -error); sock_release(sock); return ERR_PTR(error); } @@ -1441,8 +1434,6 @@ static void svc_sock_detach(struct svc_xprt *xprt) struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); struct sock *sk = svsk->sk_sk; - dprintk("svc: svc_sock_detach(%p)\n", svsk); - /* put back the old socket callbacks */ lock_sock(sk); sk->sk_state_change = svsk->sk_ostate; @@ -1459,8 +1450,6 @@ static void svc_tcp_sock_detach(struct svc_xprt *xprt) { struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); - dprintk("svc: svc_tcp_sock_detach(%p)\n", svsk); - svc_sock_detach(xprt); if (!test_bit(XPT_LISTENER, &xprt->xpt_flags)) { @@ -1475,7 +1464,6 @@ static void svc_tcp_sock_detach(struct svc_xprt *xprt) static void svc_sock_free(struct svc_xprt *xprt) { struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt); - dprintk("svc: svc_sock_free(%p)\n", svsk); if (svsk->sk_sock->file) sockfd_put(svsk->sk_sock); diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index f3b5ad2bec2f..d38be57b00ed 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -314,11 +314,8 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, struct svcxprt_rdma *cma_xprt; int ret; - dprintk("svcrdma: Creating RDMA listener\n"); - if ((sa->sa_family != AF_INET) && (sa->sa_family != AF_INET6)) { - dprintk("svcrdma: Address family %d is not supported.\n", sa->sa_family); + if (sa->sa_family != AF_INET && sa->sa_family != AF_INET6) return ERR_PTR(-EAFNOSUPPORT); - } cma_xprt = svc_rdma_create_xprt(serv, net); if (!cma_xprt) return ERR_PTR(-ENOMEM); @@ -329,7 +326,6 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(listen_id)) { ret = PTR_ERR(listen_id); - dprintk("svcrdma: rdma_create_id failed = %d\n", ret); goto err0; } @@ -338,23 +334,17 @@ static struct svc_xprt *svc_rdma_create(struct svc_serv *serv, */ #if IS_ENABLED(CONFIG_IPV6) ret = rdma_set_afonly(listen_id, 1); - if (ret) { - dprintk("svcrdma: rdma_set_afonly failed = %d\n", ret); + if (ret) goto err1; - } #endif ret = rdma_bind_addr(listen_id, sa); - if (ret) { - dprintk("svcrdma: rdma_bind_addr failed = %d\n", ret); + if (ret) goto err1; - } cma_xprt->sc_cm_id = listen_id; ret = rdma_listen(listen_id, RPCRDMA_LISTEN_BACKLOG); - if (ret) { - dprintk("svcrdma: rdma_listen failed = %d\n", ret); + if (ret) goto err1; - } /* * We need to use the address from the cm_id in case the @@ -537,7 +527,6 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) dprintk(" ord : %d\n", conn_param.initiator_depth); #endif - trace_svcrdma_xprt_accept(&newxprt->sc_xprt); return &newxprt->sc_xprt; errout: @@ -578,8 +567,6 @@ static void __svc_rdma_free(struct work_struct *work) container_of(work, struct svcxprt_rdma, sc_work); struct svc_xprt *xprt = &rdma->sc_xprt; - trace_svcrdma_xprt_free(xprt); - if (rdma->sc_qp && !IS_ERR(rdma->sc_qp)) ib_drain_qp(rdma->sc_qp); -- cgit v1.2.3 From d998882b4b1b08e72f4414d8eb6647af72bffe2f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 1 May 2020 10:40:47 -0400 Subject: SUNRPC: Remove "#include " Clean up: Commit 850cbaddb52d ("udp: use it's own memory accounting schema") removed the last skb-related tracepoint from svcsock.c, so it is no longer necessary to include trace/events/skb.h. Signed-off-by: Chuck Lever --- net/sunrpc/svcsock.c | 1 - 1 file changed, 1 deletion(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index cf4bd198c19d..1c4d0aacc531 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -45,7 +45,6 @@ #include #include #include -#include #include #include -- cgit v1.2.3 From 998024dee197944a7018a0bdc85b83b569ddec22 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 17 Mar 2020 15:06:31 -0400 Subject: SUNRPC: Add more svcsock tracepoints In addition to tracing recently-updated socket sendto events, this commit adds a trace event class that can be used for additional svcsock-related tracepoints in subsequent patches. Signed-off-by: Chuck Lever --- include/trace/events/sunrpc.h | 97 +++++++++++++++++++++++++++++++++++++++++++ net/sunrpc/svcsock.c | 30 +++++-------- 2 files changed, 107 insertions(+), 20 deletions(-) (limited to 'net/sunrpc') diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index d40ec8f5c220..bf086640b14a 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -14,6 +14,39 @@ #include #include +TRACE_DEFINE_ENUM(SOCK_STREAM); +TRACE_DEFINE_ENUM(SOCK_DGRAM); +TRACE_DEFINE_ENUM(SOCK_RAW); +TRACE_DEFINE_ENUM(SOCK_RDM); +TRACE_DEFINE_ENUM(SOCK_SEQPACKET); +TRACE_DEFINE_ENUM(SOCK_DCCP); +TRACE_DEFINE_ENUM(SOCK_PACKET); + +#define show_socket_type(type) \ + __print_symbolic(type, \ + { SOCK_STREAM, "STREAM" }, \ + { SOCK_DGRAM, "DGRAM" }, \ + { SOCK_RAW, "RAW" }, \ + { SOCK_RDM, "RDM" }, \ + { SOCK_SEQPACKET, "SEQPACKET" }, \ + { SOCK_DCCP, "DCCP" }, \ + { SOCK_PACKET, "PACKET" }) + +/* This list is known to be incomplete, add new enums as needed. */ +TRACE_DEFINE_ENUM(AF_UNSPEC); +TRACE_DEFINE_ENUM(AF_UNIX); +TRACE_DEFINE_ENUM(AF_LOCAL); +TRACE_DEFINE_ENUM(AF_INET); +TRACE_DEFINE_ENUM(AF_INET6); + +#define rpc_show_address_family(family) \ + __print_symbolic(family, \ + { AF_UNSPEC, "AF_UNSPEC" }, \ + { AF_UNIX, "AF_UNIX" }, \ + { AF_LOCAL, "AF_LOCAL" }, \ + { AF_INET, "AF_INET" }, \ + { AF_INET6, "AF_INET6" }) + DECLARE_EVENT_CLASS(xdr_buf_class, TP_PROTO( const struct xdr_buf *xdr @@ -1384,6 +1417,70 @@ DECLARE_EVENT_CLASS(svc_deferred_event, DEFINE_SVC_DEFERRED_EVENT(drop); DEFINE_SVC_DEFERRED_EVENT(revisit); +TRACE_EVENT(svcsock_new_socket, + TP_PROTO( + const struct socket *socket + ), + + TP_ARGS(socket), + + TP_STRUCT__entry( + __field(unsigned long, type) + __field(unsigned long, family) + __field(bool, listener) + ), + + TP_fast_assign( + __entry->type = socket->type; + __entry->family = socket->sk->sk_family; + __entry->listener = (socket->sk->sk_state == TCP_LISTEN); + ), + + TP_printk("type=%s family=%s%s", + show_socket_type(__entry->type), + rpc_show_address_family(__entry->family), + __entry->listener ? " (listener)" : "" + ) +); + +DECLARE_EVENT_CLASS(svcsock_class, + TP_PROTO( + const struct svc_xprt *xprt, + ssize_t result + ), + + TP_ARGS(xprt, result), + + TP_STRUCT__entry( + __field(ssize_t, result) + __field(unsigned long, flags) + __string(addr, xprt->xpt_remotebuf) + ), + + TP_fast_assign( + __entry->result = result; + __entry->flags = xprt->xpt_flags; + __assign_str(addr, xprt->xpt_remotebuf); + ), + + TP_printk("addr=%s result=%zd flags=%s", __get_str(addr), + __entry->result, show_svc_xprt_flags(__entry->flags) + ) +); + +#define DEFINE_SVCSOCK_EVENT(name) \ + DEFINE_EVENT(svcsock_class, svcsock_##name, \ + TP_PROTO( \ + const struct svc_xprt *xprt, \ + ssize_t result \ + ), \ + TP_ARGS(xprt, result)) + +DEFINE_SVCSOCK_EVENT(udp_send); +DEFINE_SVCSOCK_EVENT(tcp_send); +DEFINE_SVCSOCK_EVENT(data_ready); +DEFINE_SVCSOCK_EVENT(write_space); + DECLARE_EVENT_CLASS(cache_event, TP_PROTO( const struct cache_detail *cd, diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 1c4d0aacc531..758b835ad4ce 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -54,6 +54,8 @@ #include #include +#include + #include "socklib.h" #include "sunrpc.h" @@ -281,13 +283,10 @@ static void svc_data_ready(struct sock *sk) struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; if (svsk) { - dprintk("svc: socket %p(inet %p), busy=%d\n", - svsk, sk, - test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags)); - /* Refer to svc_setup_socket() for details. */ rmb(); svsk->sk_odata(sk); + trace_svcsock_data_ready(&svsk->sk_xprt, 0); if (!test_and_set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags)) svc_xprt_enqueue(&svsk->sk_xprt); } @@ -301,11 +300,9 @@ static void svc_write_space(struct sock *sk) struct svc_sock *svsk = (struct svc_sock *)(sk->sk_user_data); if (svsk) { - dprintk("svc: socket %p(inet %p), write_space busy=%d\n", - svsk, sk, test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags)); - /* Refer to svc_setup_socket() for details. */ rmb(); + trace_svcsock_write_space(&svsk->sk_xprt, 0); svsk->sk_owspace(sk); svc_xprt_enqueue(&svsk->sk_xprt); } @@ -545,6 +542,7 @@ static int svc_udp_sendto(struct svc_rqst *rqstp) err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, 0, &sent); xdr_free_bvec(xdr); } + trace_svcsock_udp_send(xprt, err); mutex_unlock(&xprt->xpt_mutex); if (err < 0) @@ -616,7 +614,7 @@ static struct svc_xprt_class svc_udp_class = { static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv) { - int err, level, optname, one = 1; + int level, optname, one = 1; svc_xprt_init(sock_net(svsk->sk_sock->sk), &svc_udp_class, &svsk->sk_xprt, serv); @@ -647,9 +645,8 @@ static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv) default: BUG(); } - err = kernel_setsockopt(svsk->sk_sock, level, optname, - (char *)&one, sizeof(one)); - dprintk("svc: kernel_setsockopt returned %d\n", err); + kernel_setsockopt(svsk->sk_sock, level, optname, (char *)&one, + sizeof(one)); } /* @@ -1100,6 +1097,7 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp) goto out_notconn; err = xprt_sock_sendmsg(svsk->sk_sock, &msg, xdr, 0, marker, &sent); xdr_free_bvec(xdr); + trace_svcsock_tcp_send(xprt, err < 0 ? err : sent); if (err < 0 || sent != (xdr->len + sizeof(marker))) goto out_close; mutex_unlock(&xprt->xpt_mutex); @@ -1170,13 +1168,11 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv) set_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags); set_bit(XPT_CONG_CTRL, &svsk->sk_xprt.xpt_flags); if (sk->sk_state == TCP_LISTEN) { - dprintk("setting up TCP socket for listening\n"); strcpy(svsk->sk_xprt.xpt_remotebuf, "listener"); set_bit(XPT_LISTENER, &svsk->sk_xprt.xpt_flags); sk->sk_data_ready = svc_tcp_listen_data_ready; set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags); } else { - dprintk("setting up TCP socket for reading\n"); sk->sk_state_change = svc_tcp_state_change; sk->sk_data_ready = svc_data_ready; sk->sk_write_space = svc_write_space; @@ -1226,7 +1222,6 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, int pmap_register = !(flags & SVC_SOCK_ANONYMOUS); int err = 0; - dprintk("svc: svc_setup_socket %p\n", sock); svsk = kzalloc(sizeof(*svsk), GFP_KERNEL); if (!svsk) return ERR_PTR(-ENOMEM); @@ -1263,12 +1258,7 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, else svc_tcp_init(svsk, serv); - dprintk("svc: svc_setup_socket created %p (inet %p), " - "listen %d close %d\n", - svsk, svsk->sk_sk, - test_bit(XPT_LISTENER, &svsk->sk_xprt.xpt_flags), - test_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags)); - + trace_svcsock_new_socket(sock); return svsk; } -- cgit v1.2.3 From a0469f46faab786e8ec9f8c8526a185357b38772 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 18 Mar 2020 11:20:50 -0400 Subject: SUNRPC: Replace dprintk call sites in TCP state change callouts Report TCP socket state changes and accept failures via tracepoints, replacing dprintk() call sites. No tracepoint is added in svc_tcp_listen_data_ready. There's no information available there that isn't also reported by the svcsock_new_socket and the accept failure tracepoints. Signed-off-by: Chuck Lever --- include/trace/events/sunrpc.h | 67 +++++++++++++++++++++++++++++++++++++++++++ net/sunrpc/svcsock.c | 35 ++++------------------ 2 files changed, 73 insertions(+), 29 deletions(-) (limited to 'net/sunrpc') diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index bf086640b14a..ed8c991d4f04 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -1481,6 +1481,73 @@ DEFINE_SVCSOCK_EVENT(tcp_send); DEFINE_SVCSOCK_EVENT(data_ready); DEFINE_SVCSOCK_EVENT(write_space); +TRACE_EVENT(svcsock_tcp_state, + TP_PROTO( + const struct svc_xprt *xprt, + const struct socket *socket + ), + + TP_ARGS(xprt, socket), + + TP_STRUCT__entry( + __field(unsigned long, socket_state) + __field(unsigned long, sock_state) + __field(unsigned long, flags) + __string(addr, xprt->xpt_remotebuf) + ), + + TP_fast_assign( + __entry->socket_state = socket->state; + __entry->sock_state = socket->sk->sk_state; + __entry->flags = xprt->xpt_flags; + __assign_str(addr, xprt->xpt_remotebuf); + ), + + TP_printk("addr=%s state=%s sk_state=%s flags=%s", __get_str(addr), + rpc_show_socket_state(__entry->socket_state), + rpc_show_sock_state(__entry->sock_state), + show_svc_xprt_flags(__entry->flags) + ) +); + +DECLARE_EVENT_CLASS(svcsock_accept_class, + TP_PROTO( + const struct svc_xprt *xprt, + const char *service, + long status + ), + + TP_ARGS(xprt, service, status), + + TP_STRUCT__entry( + __field(long, status) + __string(service, service) + __array(unsigned char, addr, sizeof(struct sockaddr_in6)) + ), + + TP_fast_assign( + __entry->status = status; + __assign_str(service, service); + memcpy(__entry->addr, &xprt->xpt_local, sizeof(__entry->addr)); + ), + + TP_printk("listener=%pISpc service=%s status=%ld", + __entry->addr, __get_str(service), __entry->status + ) +); + +#define DEFINE_ACCEPT_EVENT(name) \ + DEFINE_EVENT(svcsock_accept_class, svcsock_##name##_err, \ + TP_PROTO( \ + const struct svc_xprt *xprt, \ + const char *service, \ + long status \ + ), \ + TP_ARGS(xprt, service, status)) + +DEFINE_ACCEPT_EVENT(accept); +DEFINE_ACCEPT_EVENT(getpeername); + DECLARE_EVENT_CLASS(cache_event, TP_PROTO( const struct cache_detail *cd, diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 758b835ad4ce..4ac1180c6306 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -657,9 +657,6 @@ static void svc_tcp_listen_data_ready(struct sock *sk) { struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; - dprintk("svc: socket %p TCP (listen) state change %d\n", - sk, sk->sk_state); - if (svsk) { /* Refer to svc_setup_socket() for details. */ rmb(); @@ -680,8 +677,7 @@ static void svc_tcp_listen_data_ready(struct sock *sk) if (svsk) { set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags); svc_xprt_enqueue(&svsk->sk_xprt); - } else - printk("svc: socket %p: no user data\n", sk); + } } } @@ -692,15 +688,11 @@ static void svc_tcp_state_change(struct sock *sk) { struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data; - dprintk("svc: socket %p TCP (connected) state change %d (svsk %p)\n", - sk, sk->sk_state, sk->sk_user_data); - - if (!svsk) - printk("svc: socket %p: no user data\n", sk); - else { + if (svsk) { /* Refer to svc_setup_socket() for details. */ rmb(); svsk->sk_ostate(sk); + trace_svcsock_tcp_state(&svsk->sk_xprt, svsk->sk_sock); if (sk->sk_state != TCP_ESTABLISHED) { set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); svc_xprt_enqueue(&svsk->sk_xprt); @@ -721,7 +713,6 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt) struct socket *newsock; struct svc_sock *newsvsk; int err, slen; - RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]); if (!sock) return NULL; @@ -735,30 +726,18 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt) else if (err != -EAGAIN) net_warn_ratelimited("%s: accept failed (err %d)!\n", serv->sv_name, -err); + trace_svcsock_accept_err(xprt, serv->sv_name, err); return NULL; } set_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags); err = kernel_getpeername(newsock, sin); if (err < 0) { - net_warn_ratelimited("%s: peername failed (err %d)!\n", - serv->sv_name, -err); + trace_svcsock_getpeername_err(xprt, serv->sv_name, err); goto failed; /* aborted connection or whatever */ } slen = err; - /* Ideally, we would want to reject connections from unauthorized - * hosts here, but when we get encryption, the IP of the host won't - * tell us anything. For now just warn about unpriv connections. - */ - if (!svc_port_is_privileged(sin)) { - dprintk("%s: connect from unprivileged port: %s\n", - serv->sv_name, - __svc_print_addr(sin, buf, sizeof(buf))); - } - dprintk("%s: connect from %s\n", serv->sv_name, - __svc_print_addr(sin, buf, sizeof(buf))); - /* Reset the inherited callbacks before calling svc_setup_socket */ newsock->sk->sk_state_change = svsk->sk_ostate; newsock->sk->sk_data_ready = svsk->sk_odata; @@ -776,10 +755,8 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt) svc_xprt_set_remote(&newsvsk->sk_xprt, sin, slen); err = kernel_getsockname(newsock, sin); slen = err; - if (unlikely(err < 0)) { - dprintk("svc_tcp_accept: kernel_getsockname error %d\n", -err); + if (unlikely(err < 0)) slen = offsetof(struct sockaddr, sa_data); - } svc_xprt_set_local(&newsvsk->sk_xprt, sin, slen); if (sock_is_loopback(newsock->sk)) -- cgit v1.2.3 From b4af59328c25bdd585bc1da14f25d8ba4ebc616c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 2 May 2020 11:59:37 -0400 Subject: SUNRPC: Trace server-side rpcbind registration events Signed-off-by: Chuck Lever --- include/trace/events/sunrpc.h | 80 +++++++++++++++++++++++++++++++++++++++++++ net/sunrpc/svc.c | 19 +++------- 2 files changed, 84 insertions(+), 15 deletions(-) (limited to 'net/sunrpc') diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index ed8c991d4f04..6d85bbb7b8b1 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -1581,6 +1581,86 @@ DEFINE_CACHE_EVENT(cache_entry_update); DEFINE_CACHE_EVENT(cache_entry_make_negative); DEFINE_CACHE_EVENT(cache_entry_no_listener); +DECLARE_EVENT_CLASS(register_class, + TP_PROTO( + const char *program, + const u32 version, + const int family, + const unsigned short protocol, + const unsigned short port, + int error + ), + + TP_ARGS(program, version, family, protocol, port, error), + + TP_STRUCT__entry( + __field(u32, version) + __field(unsigned long, family) + __field(unsigned short, protocol) + __field(unsigned short, port) + __field(int, error) + __string(program, program) + ), + + TP_fast_assign( + __entry->version = version; + __entry->family = family; + __entry->protocol = protocol; + __entry->port = port; + __entry->error = error; + __assign_str(program, program); + ), + + TP_printk("program=%sv%u proto=%s port=%u family=%s error=%d", + __get_str(program), __entry->version, + __entry->protocol == IPPROTO_UDP ? "udp" : "tcp", + __entry->port, rpc_show_address_family(__entry->family), + __entry->error + ) +); + +#define DEFINE_REGISTER_EVENT(name) \ + DEFINE_EVENT(register_class, svc_##name, \ + TP_PROTO( \ + const char *program, \ + const u32 version, \ + const int family, \ + const unsigned short protocol, \ + const unsigned short port, \ + int error \ + ), \ + TP_ARGS(program, version, family, protocol, \ + port, error)) + +DEFINE_REGISTER_EVENT(register); +DEFINE_REGISTER_EVENT(noregister); + +TRACE_EVENT(svc_unregister, + TP_PROTO( + const char *program, + const u32 version, + int error + ), + + TP_ARGS(program, version, error), + + TP_STRUCT__entry( + __field(u32, version) + __field(int, error) + __string(program, program) + ), + + TP_fast_assign( + __entry->version = version; + __entry->error = error; + __assign_str(program, program); + ), + + TP_printk("program=%sv%u error=%d", + __get_str(program), __entry->version, __entry->error + ) +); + #endif /* _TRACE_SUNRPC_H */ #include diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 9ed3126600ce..3e74d61ca7da 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -991,6 +991,7 @@ static int __svc_register(struct net *net, const char *progname, #endif } + trace_svc_register(progname, version, protocol, port, family, error); return error; } @@ -1000,11 +1001,6 @@ int svc_rpcbind_set_version(struct net *net, unsigned short proto, unsigned short port) { - dprintk("svc: svc_register(%sv%d, %s, %u, %u)\n", - progp->pg_name, version, - proto == IPPROTO_UDP? "udp" : "tcp", - port, family); - return __svc_register(net, progp->pg_name, progp->pg_prog, version, family, proto, port); @@ -1024,11 +1020,8 @@ int svc_generic_rpcbind_set(struct net *net, return 0; if (vers->vs_hidden) { - dprintk("svc: svc_register(%sv%d, %s, %u, %u)" - " (but not telling portmap)\n", - progp->pg_name, version, - proto == IPPROTO_UDP? "udp" : "tcp", - port, family); + trace_svc_noregister(progp->pg_name, version, proto, + port, family, 0); return 0; } @@ -1106,8 +1099,7 @@ static void __svc_unregister(struct net *net, const u32 program, const u32 versi if (error == -EPROTONOSUPPORT) error = rpcb_register(net, program, version, 0, 0); - dprintk("svc: %s(%sv%u), error %d\n", - __func__, progname, version, error); + trace_svc_unregister(progname, version, error); } /* @@ -1132,9 +1124,6 @@ static void svc_unregister(const struct svc_serv *serv, struct net *net) continue; if (progp->pg_vers[i]->vs_hidden) continue; - - dprintk("svc: attempting to unregister %sv%u\n", - progp->pg_name, i); __svc_unregister(net, progp->pg_prog, i, progp->pg_name); } } -- cgit v1.2.3 From 02648908d19a99532b0839959e38ae53d95d2798 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Tue, 17 Mar 2020 14:12:15 -0400 Subject: SUNRPC: Rename svc_sock::sk_reclen Clean up. I find the name of the svc_sock::sk_reclen field confusing, so I've changed it to better reflect its function. This field is not read directly to get the record length. Rather, it is a buffer containing a record marker that needs to be decoded. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svcsock.h | 6 +++--- net/sunrpc/svcsock.c | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'net/sunrpc') diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h index 771baadaee9d..b7ac7fe68306 100644 --- a/include/linux/sunrpc/svcsock.h +++ b/include/linux/sunrpc/svcsock.h @@ -28,7 +28,7 @@ struct svc_sock { /* private TCP part */ /* On-the-wire fragment header: */ - __be32 sk_reclen; + __be32 sk_marker; /* As we receive a record, this includes the length received so * far (including the fragment header): */ u32 sk_tcplen; @@ -41,12 +41,12 @@ struct svc_sock { static inline u32 svc_sock_reclen(struct svc_sock *svsk) { - return ntohl(svsk->sk_reclen) & RPC_FRAGMENT_SIZE_MASK; + return be32_to_cpu(svsk->sk_marker) & RPC_FRAGMENT_SIZE_MASK; } static inline u32 svc_sock_final_rec(struct svc_sock *svsk) { - return ntohl(svsk->sk_reclen) & RPC_LAST_STREAM_FRAGMENT; + return be32_to_cpu(svsk->sk_marker) & RPC_LAST_STREAM_FRAGMENT; } /* diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 4ac1180c6306..d63b21f3f207 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -841,7 +841,7 @@ static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp) struct kvec iov; want = sizeof(rpc_fraghdr) - svsk->sk_tcplen; - iov.iov_base = ((char *) &svsk->sk_reclen) + svsk->sk_tcplen; + iov.iov_base = ((char *)&svsk->sk_marker) + svsk->sk_tcplen; iov.iov_len = want; len = svc_recvfrom(rqstp, &iov, 1, want, 0); if (len < 0) @@ -938,7 +938,7 @@ static void svc_tcp_fragment_received(struct svc_sock *svsk) svc_sock_final_rec(svsk) ? "final" : "nonfinal", svc_sock_reclen(svsk)); svsk->sk_tcplen = 0; - svsk->sk_reclen = 0; + svsk->sk_marker = xdr_zero; } /* @@ -1154,7 +1154,7 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv) sk->sk_data_ready = svc_data_ready; sk->sk_write_space = svc_write_space; - svsk->sk_reclen = 0; + svsk->sk_marker = xdr_zero; svsk->sk_tcplen = 0; svsk->sk_datalen = 0; memset(&svsk->sk_pages[0], 0, sizeof(svsk->sk_pages)); -- cgit v1.2.3 From a5cda73e49aaaac58b25750f4b591c0dc9726a44 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 16 Mar 2020 14:53:04 -0400 Subject: SUNRPC: Restructure svc_tcp_recv_record() Refactor: svc_recvfrom() is going to be converted to read into bio_vecs in a moment. Unhook the only other caller, svc_tcp_recv_record(), which just wants to read the 4-byte stream record marker into a kvec. While we're in the area, streamline this helper by straight-lining the hot path, replace dprintk call sites with tracepoints, and reduce the number of atomic bit operations in this path. Signed-off-by: Chuck Lever --- include/trace/events/sunrpc.h | 24 ++++++++++++++++++++ net/sunrpc/svcsock.c | 51 +++++++++++++++++++++---------------------- 2 files changed, 49 insertions(+), 26 deletions(-) (limited to 'net/sunrpc') diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 6d85bbb7b8b1..ec4ae34a1f84 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -1443,6 +1443,30 @@ TRACE_EVENT(svcsock_new_socket, ) ); +TRACE_EVENT(svcsock_marker, + TP_PROTO( + const struct svc_xprt *xprt, + __be32 marker + ), + + TP_ARGS(xprt, marker), + + TP_STRUCT__entry( + __field(unsigned int, length) + __field(bool, last) + __string(addr, xprt->xpt_remotebuf) + ), + + TP_fast_assign( + __entry->length = be32_to_cpu(marker) & RPC_FRAGMENT_SIZE_MASK; + __entry->last = be32_to_cpu(marker) & RPC_LAST_STREAM_FRAGMENT; + __assign_str(addr, xprt->xpt_remotebuf); + ), + + TP_printk("addr=%s length=%u%s", __get_str(addr), + __entry->length, __entry->last ? " (last)" : "") +); + DECLARE_EVENT_CLASS(svcsock_class, TP_PROTO( const struct svc_xprt *xprt, diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index d63b21f3f207..9c1eb13aa9b8 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -828,47 +828,45 @@ out: } /* - * Receive fragment record header. - * If we haven't gotten the record length yet, get the next four bytes. + * Receive fragment record header into sk_marker. */ -static int svc_tcp_recv_record(struct svc_sock *svsk, struct svc_rqst *rqstp) +static ssize_t svc_tcp_read_marker(struct svc_sock *svsk, + struct svc_rqst *rqstp) { - struct svc_serv *serv = svsk->sk_xprt.xpt_server; - unsigned int want; - int len; + ssize_t want, len; + /* If we haven't gotten the record length yet, + * get the next four bytes. + */ if (svsk->sk_tcplen < sizeof(rpc_fraghdr)) { + struct msghdr msg = { NULL }; struct kvec iov; want = sizeof(rpc_fraghdr) - svsk->sk_tcplen; iov.iov_base = ((char *)&svsk->sk_marker) + svsk->sk_tcplen; iov.iov_len = want; - len = svc_recvfrom(rqstp, &iov, 1, want, 0); + iov_iter_kvec(&msg.msg_iter, READ, &iov, 1, want); + len = sock_recvmsg(svsk->sk_sock, &msg, MSG_DONTWAIT); if (len < 0) - goto error; + return len; svsk->sk_tcplen += len; - if (len < want) { - dprintk("svc: short recvfrom while reading record " - "length (%d of %d)\n", len, want); - return -EAGAIN; + /* call again to read the remaining bytes */ + goto err_short; } - - dprintk("svc: TCP record, %d bytes\n", svc_sock_reclen(svsk)); + trace_svcsock_marker(&svsk->sk_xprt, svsk->sk_marker); if (svc_sock_reclen(svsk) + svsk->sk_datalen > - serv->sv_max_mesg) { - net_notice_ratelimited("RPC: fragment too large: %d\n", - svc_sock_reclen(svsk)); - goto err_delete; - } + svsk->sk_xprt.xpt_server->sv_max_mesg) + goto err_too_large; } - return svc_sock_reclen(svsk); -error: - dprintk("RPC: TCP recv_record got %d\n", len); - return len; -err_delete: + +err_too_large: + net_notice_ratelimited("svc: %s %s RPC fragment too large: %d\n", + __func__, svsk->sk_xprt.xpt_server->sv_name, + svc_sock_reclen(svsk)); set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); +err_short: return -EAGAIN; } @@ -961,12 +959,13 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) test_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags), test_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags)); - len = svc_tcp_recv_record(svsk, rqstp); + clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); + len = svc_tcp_read_marker(svsk, rqstp); if (len < 0) goto error; base = svc_tcp_restore_pages(svsk, rqstp); - want = svc_sock_reclen(svsk) - (svsk->sk_tcplen - sizeof(rpc_fraghdr)); + want = len - (svsk->sk_tcplen - sizeof(rpc_fraghdr)); vec = rqstp->rq_vec; -- cgit v1.2.3 From 7dae1dd726aac7871d9cc56ed9d13fa09c0212f9 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 20 May 2020 13:41:02 -0400 Subject: SUNRPC: Replace dprintk() call sites in TCP receive path Signed-off-by: Chuck Lever --- include/trace/events/sunrpc.h | 1 + net/sunrpc/svcsock.c | 16 ++-------------- 2 files changed, 3 insertions(+), 14 deletions(-) (limited to 'net/sunrpc') diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index ec4ae34a1f84..bfea554bd91f 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -1502,6 +1502,7 @@ DECLARE_EVENT_CLASS(svcsock_class, DEFINE_SVCSOCK_EVENT(udp_send); DEFINE_SVCSOCK_EVENT(tcp_send); +DEFINE_SVCSOCK_EVENT(tcp_recv); DEFINE_SVCSOCK_EVENT(data_ready); DEFINE_SVCSOCK_EVENT(write_space); diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 9c1eb13aa9b8..8cf06b676831 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -119,9 +119,8 @@ static void svc_release_skb(struct svc_rqst *rqstp) if (skb) { struct svc_sock *svsk = container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt); - rqstp->rq_xprt_ctxt = NULL; - dprintk("svc: service %p, releasing skb %p\n", rqstp, skb); + rqstp->rq_xprt_ctxt = NULL; skb_free_datagram_locked(svsk->sk_sk, skb); } } @@ -132,8 +131,6 @@ static void svc_release_udp_skb(struct svc_rqst *rqstp) if (skb) { rqstp->rq_xprt_ctxt = NULL; - - dprintk("svc: service %p, releasing skb %p\n", rqstp, skb); consume_skb(skb); } } @@ -245,8 +242,6 @@ static ssize_t svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, if (len == buflen) set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); - dprintk("svc: socket %p recvfrom(%p, %zu) = %zd\n", - svsk, iov[0].iov_base, iov[0].iov_len, len); return len; } @@ -932,9 +927,6 @@ static int copy_pages_to_kvecs(struct kvec *vec, struct page **pages, int len) static void svc_tcp_fragment_received(struct svc_sock *svsk) { /* If we have more data, signal svc_xprt_enqueue() to try again */ - dprintk("svc: TCP %s record (%d bytes)\n", - svc_sock_final_rec(svsk) ? "final" : "nonfinal", - svc_sock_reclen(svsk)); svsk->sk_tcplen = 0; svsk->sk_marker = xdr_zero; } @@ -954,11 +946,6 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) __be32 calldir; int pnum; - dprintk("svc: tcp_recv %p data %d conn %d close %d\n", - svsk, test_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags), - test_bit(XPT_CONN, &svsk->sk_xprt.xpt_flags), - test_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags)); - clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); len = svc_tcp_read_marker(svsk, rqstp); if (len < 0) @@ -977,6 +964,7 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) /* Now receive data */ len = svc_recvfrom(rqstp, vec, pnum, base + want, base); if (len >= 0) { + trace_svcsock_tcp_recv(&svsk->sk_xprt, len); svsk->sk_tcplen += len; svsk->sk_datalen += len; } -- cgit v1.2.3 From 6be8c5949149ff45c86dd9e49dfab920078bfcd5 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 20 May 2020 12:29:13 -0400 Subject: SUNRPC: Refactor recvfrom path dealing with incomplete TCP receives Clean up: move exception processing out of the main path. Signed-off-by: Chuck Lever --- include/trace/events/sunrpc.h | 31 +++++++++++++++++++++++++++++++ net/sunrpc/svcsock.c | 39 +++++++++++++++++++-------------------- 2 files changed, 50 insertions(+), 20 deletions(-) (limited to 'net/sunrpc') diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index bfea554bd91f..81659876b4af 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -1503,9 +1503,40 @@ DECLARE_EVENT_CLASS(svcsock_class, DEFINE_SVCSOCK_EVENT(udp_send); DEFINE_SVCSOCK_EVENT(tcp_send); DEFINE_SVCSOCK_EVENT(tcp_recv); +DEFINE_SVCSOCK_EVENT(tcp_recv_eagain); +DEFINE_SVCSOCK_EVENT(tcp_recv_err); DEFINE_SVCSOCK_EVENT(data_ready); DEFINE_SVCSOCK_EVENT(write_space); +TRACE_EVENT(svcsock_tcp_recv_short, + TP_PROTO( + const struct svc_xprt *xprt, + u32 expected, + u32 received + ), + + TP_ARGS(xprt, expected, received), + + TP_STRUCT__entry( + __field(u32, expected) + __field(u32, received) + __field(unsigned long, flags) + __string(addr, xprt->xpt_remotebuf) + ), + + TP_fast_assign( + __entry->expected = expected; + __entry->received = received; + __entry->flags = xprt->xpt_flags; + __assign_str(addr, xprt->xpt_remotebuf); + ), + + TP_printk("addr=%s flags=%s expected=%u received=%u", + __get_str(addr), show_svc_xprt_flags(__entry->flags), + __entry->expected, __entry->received + ) +); + TRACE_EVENT(svcsock_tcp_state, TP_PROTO( const struct svc_xprt *xprt, diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 8cf06b676831..087e21b0f1bb 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -968,23 +968,10 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) svsk->sk_tcplen += len; svsk->sk_datalen += len; } - if (len != want || !svc_sock_final_rec(svsk)) { - svc_tcp_save_pages(svsk, rqstp); - if (len < 0 && len != -EAGAIN) - goto err_delete; - if (len == want) - svc_tcp_fragment_received(svsk); - else - dprintk("svc: incomplete TCP record (%d of %d)\n", - (int)(svsk->sk_tcplen - sizeof(rpc_fraghdr)), - svc_sock_reclen(svsk)); - goto err_noclose; - } - - if (svsk->sk_datalen < 8) { - svsk->sk_datalen = 0; - goto err_delete; /* client is nuts. */ - } + if (len != want || !svc_sock_final_rec(svsk)) + goto err_incomplete; + if (svsk->sk_datalen < 8) + goto err_nuts; rqstp->rq_arg.len = svsk->sk_datalen; rqstp->rq_arg.page_base = 0; @@ -1019,14 +1006,26 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) return rqstp->rq_arg.len; +err_incomplete: + svc_tcp_save_pages(svsk, rqstp); + if (len < 0 && len != -EAGAIN) + goto err_delete; + if (len == want) + svc_tcp_fragment_received(svsk); + else + trace_svcsock_tcp_recv_short(&svsk->sk_xprt, + svc_sock_reclen(svsk), + svsk->sk_tcplen - sizeof(rpc_fraghdr)); + goto err_noclose; error: if (len != -EAGAIN) goto err_delete; - dprintk("RPC: TCP recvfrom got EAGAIN\n"); + trace_svcsock_tcp_recv_eagain(&svsk->sk_xprt, 0); return 0; +err_nuts: + svsk->sk_datalen = 0; err_delete: - printk(KERN_NOTICE "%s: recvfrom returned errno %d\n", - svsk->sk_xprt.xpt_server->sv_name, -len); + trace_svcsock_tcp_recv_err(&svsk->sk_xprt, len); set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); err_noclose: return 0; /* record not complete */ -- cgit v1.2.3 From cca557a5a60faaf307bbb76035dd90ec47cf0e0c Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 20 May 2020 12:53:05 -0400 Subject: SUNRPC: Clean up svc_release_skb() functions Rename these functions using the convention used for other xpo method entry points. Signed-off-by: Chuck Lever --- net/sunrpc/svcsock.c | 23 +++++++++++++++-------- 1 file changed, 15 insertions(+), 8 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 087e21b0f1bb..5b2981a0711c 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -109,10 +109,12 @@ static void svc_reclassify_socket(struct socket *sock) } #endif -/* - * Release an skbuff after use +/** + * svc_tcp_release_rqst - Release transport-related resources + * @rqstp: request structure with resources to be released + * */ -static void svc_release_skb(struct svc_rqst *rqstp) +static void svc_tcp_release_rqst(struct svc_rqst *rqstp) { struct sk_buff *skb = rqstp->rq_xprt_ctxt; @@ -125,7 +127,12 @@ static void svc_release_skb(struct svc_rqst *rqstp) } } -static void svc_release_udp_skb(struct svc_rqst *rqstp) +/** + * svc_udp_release_rqst - Release transport-related resources + * @rqstp: request structure with resources to be released + * + */ +static void svc_udp_release_rqst(struct svc_rqst *rqstp) { struct sk_buff *skb = rqstp->rq_xprt_ctxt; @@ -521,7 +528,7 @@ static int svc_udp_sendto(struct svc_rqst *rqstp) unsigned int uninitialized_var(sent); int err; - svc_release_udp_skb(rqstp); + svc_udp_release_rqst(rqstp); svc_set_cmsg_data(rqstp, cmh); @@ -590,7 +597,7 @@ static const struct svc_xprt_ops svc_udp_ops = { .xpo_recvfrom = svc_udp_recvfrom, .xpo_sendto = svc_udp_sendto, .xpo_read_payload = svc_sock_read_payload, - .xpo_release_rqst = svc_release_udp_skb, + .xpo_release_rqst = svc_udp_release_rqst, .xpo_detach = svc_sock_detach, .xpo_free = svc_sock_free, .xpo_has_wspace = svc_udp_has_wspace, @@ -1053,7 +1060,7 @@ static int svc_tcp_sendto(struct svc_rqst *rqstp) unsigned int uninitialized_var(sent); int err; - svc_release_skb(rqstp); + svc_tcp_release_rqst(rqstp); mutex_lock(&xprt->xpt_mutex); if (svc_xprt_is_dead(xprt)) @@ -1093,7 +1100,7 @@ static const struct svc_xprt_ops svc_tcp_ops = { .xpo_recvfrom = svc_tcp_recvfrom, .xpo_sendto = svc_tcp_sendto, .xpo_read_payload = svc_sock_read_payload, - .xpo_release_rqst = svc_release_skb, + .xpo_release_rqst = svc_tcp_release_rqst, .xpo_detach = svc_tcp_sock_detach, .xpo_free = svc_sock_free, .xpo_has_wspace = svc_tcp_has_wspace, -- cgit v1.2.3 From ca07eda33e01eafa7a26ec06974f7eacee6a89c8 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 20 May 2020 17:30:12 -0400 Subject: SUNRPC: Refactor svc_recvfrom() This function is not currently "generic" so remove the documenting comment and rename it appropriately. Its internals are converted to use bio_vecs for reading from the transport socket. In existing typical sunrpc uses of bio_vecs, the bio_vec array is allocated dynamically. Here, instead, an array of bio_vecs is added to svc_rqst. The lifetime of this array can be greater than one call to xpo_recvfrom(): - Multiple calls to xpo_recvfrom() might be needed to read an RPC message completely. - At some later point, rq_arg.bvecs will point to this array and it will carry the received message into svc_process(). I also expect that a future optimization will remove either the rq_vec or rq_pages array in favor of rq_bvec, thus conserving the size of struct svc_rqst. Signed-off-by: Chuck Lever --- include/linux/sunrpc/svc.h | 1 + net/sunrpc/svcsock.c | 109 ++++++++++++++++++++++++++++----------------- 2 files changed, 69 insertions(+), 41 deletions(-) (limited to 'net/sunrpc') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index fd390894a584..05da19a0516d 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -254,6 +254,7 @@ struct svc_rqst { struct page * *rq_page_end; /* one past the last page */ struct kvec rq_vec[RPCSVC_MAXPAGES]; /* generally useful.. */ + struct bio_vec rq_bvec[RPCSVC_MAXPAGES]; __be32 rq_xid; /* transmission id */ u32 rq_prog; /* program number */ diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 5b2981a0711c..3e25511b800e 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -223,26 +223,62 @@ static int svc_one_sock_name(struct svc_sock *svsk, char *buf, int remaining) return len; } +#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE +static void svc_flush_bvec(const struct bio_vec *bvec, size_t size, size_t seek) +{ + struct bvec_iter bi = { + .bi_size = size, + }; + struct bio_vec bv; + + bvec_iter_advance(bvec, &bi, seek & PAGE_MASK); + for_each_bvec(bv, bvec, bi, bi) + flush_dcache_page(bv.bv_page); +} +#else +static inline void svc_flush_bvec(const struct bio_vec *bvec, size_t size, + size_t seek) +{ +} +#endif + /* - * Generic recvfrom routine. + * Read from @rqstp's transport socket. The incoming message fills whole + * pages in @rqstp's rq_pages array until the last page of the message + * has been received into a partial page. */ -static ssize_t svc_recvfrom(struct svc_rqst *rqstp, struct kvec *iov, - unsigned int nr, size_t buflen, unsigned int base) +static ssize_t svc_tcp_read_msg(struct svc_rqst *rqstp, size_t buflen, + size_t seek) { struct svc_sock *svsk = container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt); + struct bio_vec *bvec = rqstp->rq_bvec; struct msghdr msg = { NULL }; + unsigned int i; ssize_t len; + size_t t; rqstp->rq_xprt_hlen = 0; clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); - iov_iter_kvec(&msg.msg_iter, READ, iov, nr, buflen); - if (base != 0) { - iov_iter_advance(&msg.msg_iter, base); - buflen -= base; + + for (i = 0, t = 0; t < buflen; i++, t += PAGE_SIZE) { + bvec[i].bv_page = rqstp->rq_pages[i]; + bvec[i].bv_len = PAGE_SIZE; + bvec[i].bv_offset = 0; + } + rqstp->rq_respages = &rqstp->rq_pages[i]; + rqstp->rq_next_page = rqstp->rq_respages + 1; + + iov_iter_bvec(&msg.msg_iter, READ, bvec, i, buflen); + if (seek) { + iov_iter_advance(&msg.msg_iter, seek); + buflen -= seek; } len = sock_recvmsg(svsk->sk_sock, &msg, MSG_DONTWAIT); + if (len > 0) + svc_flush_bvec(bvec, len, seek); + /* If we read a full record, then assume there may be more * data to read (stream based sockets only!) */ @@ -775,13 +811,14 @@ failed: return NULL; } -static unsigned int svc_tcp_restore_pages(struct svc_sock *svsk, struct svc_rqst *rqstp) +static size_t svc_tcp_restore_pages(struct svc_sock *svsk, + struct svc_rqst *rqstp) { - unsigned int i, len, npages; + size_t len = svsk->sk_datalen; + unsigned int i, npages; - if (svsk->sk_datalen == 0) + if (!len) return 0; - len = svsk->sk_datalen; npages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; for (i = 0; i < npages; i++) { if (rqstp->rq_pages[i] != NULL) @@ -917,20 +954,6 @@ unlock_eagain: return -EAGAIN; } -static int copy_pages_to_kvecs(struct kvec *vec, struct page **pages, int len) -{ - int i = 0; - int t = 0; - - while (t < len) { - vec[i].iov_base = page_address(pages[i]); - vec[i].iov_len = PAGE_SIZE; - i++; - t += PAGE_SIZE; - } - return i; -} - static void svc_tcp_fragment_received(struct svc_sock *svsk) { /* If we have more data, signal svc_xprt_enqueue() to try again */ @@ -938,20 +961,33 @@ static void svc_tcp_fragment_received(struct svc_sock *svsk) svsk->sk_marker = xdr_zero; } -/* - * Receive data from a TCP socket. +/** + * svc_tcp_recvfrom - Receive data from a TCP socket + * @rqstp: request structure into which to receive an RPC Call + * + * Called in a loop when XPT_DATA has been set. + * + * Read the 4-byte stream record marker, then use the record length + * in that marker to set up exactly the resources needed to receive + * the next RPC message into @rqstp. + * + * Returns: + * On success, the number of bytes in a received RPC Call, or + * %0 if a complete RPC Call message was not ready to return + * + * The zero return case handles partial receives and callback Replies. + * The state of a partial receive is preserved in the svc_sock for + * the next call to svc_tcp_recvfrom. */ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) { struct svc_sock *svsk = container_of(rqstp->rq_xprt, struct svc_sock, sk_xprt); struct svc_serv *serv = svsk->sk_xprt.xpt_server; - int len; - struct kvec *vec; - unsigned int want, base; + size_t want, base; + ssize_t len; __be32 *p; __be32 calldir; - int pnum; clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); len = svc_tcp_read_marker(svsk, rqstp); @@ -960,16 +996,7 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp) base = svc_tcp_restore_pages(svsk, rqstp); want = len - (svsk->sk_tcplen - sizeof(rpc_fraghdr)); - - vec = rqstp->rq_vec; - - pnum = copy_pages_to_kvecs(&vec[0], &rqstp->rq_pages[0], base + want); - - rqstp->rq_respages = &rqstp->rq_pages[pnum]; - rqstp->rq_next_page = rqstp->rq_respages + 1; - - /* Now receive data */ - len = svc_recvfrom(rqstp, vec, pnum, base + want, base); + len = svc_tcp_read_msg(rqstp, base + want, base); if (len >= 0) { trace_svcsock_tcp_recv(&svsk->sk_xprt, len); svsk->sk_tcplen += len; -- cgit v1.2.3 From fff1ebb269b6c18b21bc0ddab7dd8b0c5e68e0a1 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 20 May 2020 17:30:24 -0400 Subject: SUNRPC: Restructure svc_udp_recvfrom() Clean up. At this point, we are not ready yet to support bio_vecs in the UDP transport implementation. However, we can clean up svc_udp_recvfrom() to match the tracing and straight-lining recently changes made in svc_tcp_recvfrom(). Signed-off-by: Chuck Lever --- include/trace/events/sunrpc.h | 2 ++ net/sunrpc/svcsock.c | 60 +++++++++++++++++++++++++------------------ 2 files changed, 37 insertions(+), 25 deletions(-) (limited to 'net/sunrpc') diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 81659876b4af..08c7d618ceb4 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -1501,6 +1501,8 @@ DECLARE_EVENT_CLASS(svcsock_class, TP_ARGS(xprt, result)) DEFINE_SVCSOCK_EVENT(udp_send); +DEFINE_SVCSOCK_EVENT(udp_recv); +DEFINE_SVCSOCK_EVENT(udp_recv_err); DEFINE_SVCSOCK_EVENT(tcp_send); DEFINE_SVCSOCK_EVENT(tcp_recv); DEFINE_SVCSOCK_EVENT(tcp_recv_eagain); diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 3e25511b800e..97d2b6f8c791 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -425,8 +425,15 @@ static int svc_udp_get_dest_address(struct svc_rqst *rqstp, return 0; } -/* - * Receive a datagram from a UDP socket. +/** + * svc_udp_recvfrom - Receive a datagram from a UDP socket. + * @rqstp: request structure into which to receive an RPC Call + * + * Called in a loop when XPT_DATA has been set. + * + * Returns: + * On success, the number of bytes in a received RPC Call, or + * %0 if a complete RPC Call message was not ready to return */ static int svc_udp_recvfrom(struct svc_rqst *rqstp) { @@ -460,20 +467,14 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp) svc_sock_setbufsize(svsk, serv->sv_nrthreads + 3); clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); - skb = NULL; err = kernel_recvmsg(svsk->sk_sock, &msg, NULL, 0, 0, MSG_PEEK | MSG_DONTWAIT); - if (err >= 0) - skb = skb_recv_udp(svsk->sk_sk, 0, 1, &err); - - if (skb == NULL) { - if (err != -EAGAIN) { - /* possibly an icmp error */ - dprintk("svc: recvfrom returned error %d\n", -err); - set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); - } - return 0; - } + if (err < 0) + goto out_recv_err; + skb = skb_recv_udp(svsk->sk_sk, 0, 1, &err); + if (!skb) + goto out_recv_err; + len = svc_addr_len(svc_addr(rqstp)); rqstp->rq_addrlen = len; if (skb->tstamp == 0) { @@ -484,26 +485,21 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp) sock_write_timestamp(svsk->sk_sk, skb->tstamp); set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); /* there may be more data... */ - len = skb->len; + len = skb->len; rqstp->rq_arg.len = len; + trace_svcsock_udp_recv(&svsk->sk_xprt, len); rqstp->rq_prot = IPPROTO_UDP; - if (!svc_udp_get_dest_address(rqstp, cmh)) { - net_warn_ratelimited("svc: received unknown control message %d/%d; dropping RPC reply datagram\n", - cmh->cmsg_level, cmh->cmsg_type); - goto out_free; - } + if (!svc_udp_get_dest_address(rqstp, cmh)) + goto out_cmsg_err; rqstp->rq_daddrlen = svc_addr_len(svc_daddr(rqstp)); if (skb_is_nonlinear(skb)) { /* we have to copy */ local_bh_disable(); - if (csum_partial_copy_to_xdr(&rqstp->rq_arg, skb)) { - local_bh_enable(); - /* checksum error */ - goto out_free; - } + if (csum_partial_copy_to_xdr(&rqstp->rq_arg, skb)) + goto out_bh_enable; local_bh_enable(); consume_skb(skb); } else { @@ -531,6 +527,20 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp) serv->sv_stats->netudpcnt++; return len; + +out_recv_err: + if (err != -EAGAIN) { + /* possibly an icmp error */ + set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags); + } + trace_svcsock_udp_recv_err(&svsk->sk_xprt, err); + return 0; +out_cmsg_err: + net_warn_ratelimited("svc: received unknown control message %d/%d; dropping RPC reply datagram\n", + cmh->cmsg_level, cmh->cmsg_type); + goto out_free; +out_bh_enable: + local_bh_enable(); out_free: kfree_skb(skb); return 0; -- cgit v1.2.3 From 8954c5c212d321404a7aaf5b48f04a49655c928d Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 15 Apr 2020 09:05:26 -0400 Subject: SUNRPC: Clean up request deferral tracepoints - Rename these so they are easy to enable and search for as a set - Move the tracepoints to get a more accurate sense of control flow - Tracepoints should not fire on xprt shutdown - Display memory address in case data structure had been corrupted - Abandon dprintk in these paths I haven't ever gotten one of these tracepoints to trigger. I wonder if we should simply remove them. Signed-off-by: Chuck Lever --- include/trace/events/sunrpc.h | 11 ++++++++--- net/sunrpc/svc_xprt.c | 12 ++++++------ 2 files changed, 14 insertions(+), 9 deletions(-) (limited to 'net/sunrpc') diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h index 2a7f6f83341f..852413cbb7d9 100644 --- a/include/trace/events/sunrpc.h +++ b/include/trace/events/sunrpc.h @@ -1406,27 +1406,32 @@ DECLARE_EVENT_CLASS(svc_deferred_event, TP_ARGS(dr), TP_STRUCT__entry( + __field(const void *, dr) __field(u32, xid) __string(addr, dr->xprt->xpt_remotebuf) ), TP_fast_assign( + __entry->dr = dr; __entry->xid = be32_to_cpu(*(__be32 *)(dr->args + (dr->xprt_hlen>>2))); __assign_str(addr, dr->xprt->xpt_remotebuf); ), - TP_printk("addr=%s xid=0x%08x", __get_str(addr), __entry->xid) + TP_printk("addr=%s dr=%p xid=0x%08x", __get_str(addr), __entry->dr, + __entry->xid) ); + #define DEFINE_SVC_DEFERRED_EVENT(name) \ - DEFINE_EVENT(svc_deferred_event, svc_##name##_deferred, \ + DEFINE_EVENT(svc_deferred_event, svc_defer_##name, \ TP_PROTO( \ const struct svc_deferred_req *dr \ ), \ TP_ARGS(dr)) DEFINE_SVC_DEFERRED_EVENT(drop); -DEFINE_SVC_DEFERRED_EVENT(revisit); +DEFINE_SVC_DEFERRED_EVENT(queue); +DEFINE_SVC_DEFERRED_EVENT(recv); TRACE_EVENT(svcsock_new_socket, TP_PROTO( diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index 0a546ef02dde..c1ff8cdb5b2b 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -1145,16 +1145,15 @@ static void svc_revisit(struct cache_deferred_req *dreq, int too_many) set_bit(XPT_DEFERRED, &xprt->xpt_flags); if (too_many || test_bit(XPT_DEAD, &xprt->xpt_flags)) { spin_unlock(&xprt->xpt_lock); - dprintk("revisit canceled\n"); + trace_svc_defer_drop(dr); svc_xprt_put(xprt); - trace_svc_drop_deferred(dr); kfree(dr); return; } - dprintk("revisit queued\n"); dr->xprt = NULL; list_add(&dr->handle.recent, &xprt->xpt_deferred); spin_unlock(&xprt->xpt_lock); + trace_svc_defer_queue(dr); svc_xprt_enqueue(xprt); svc_xprt_put(xprt); } @@ -1200,22 +1199,24 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req) memcpy(dr->args, rqstp->rq_arg.head[0].iov_base - skip, dr->argslen << 2); } + trace_svc_defer(rqstp); svc_xprt_get(rqstp->rq_xprt); dr->xprt = rqstp->rq_xprt; set_bit(RQ_DROPME, &rqstp->rq_flags); dr->handle.revisit = svc_revisit; - trace_svc_defer(rqstp); return &dr->handle; } /* * recv data from a deferred request into an active one */ -static int svc_deferred_recv(struct svc_rqst *rqstp) +static noinline int svc_deferred_recv(struct svc_rqst *rqstp) { struct svc_deferred_req *dr = rqstp->rq_deferred; + trace_svc_defer_recv(dr); + /* setup iov_base past transport header */ rqstp->rq_arg.head[0].iov_base = dr->args + (dr->xprt_hlen>>2); /* The iov_len does not include the transport header bytes */ @@ -1246,7 +1247,6 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt) struct svc_deferred_req, handle.recent); list_del_init(&dr->handle.recent); - trace_svc_revisit_deferred(dr); } else clear_bit(XPT_DEFERRED, &xprt->xpt_flags); spin_unlock(&xprt->xpt_lock); -- cgit v1.2.3