summaryrefslogtreecommitdiffstats
path: root/net/sunrpc/xprtrdma/rpc_rdma.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-01-30 19:03:48 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2018-01-30 19:03:48 -0800
commitefd52b5d363e3e3b6224ad39949219c0df117c91 (patch)
tree2d885d2f431a324af58d8f267755240bff3e32da /net/sunrpc/xprtrdma/rpc_rdma.c
parent1ed2d76e0213751c82e3a242b61b0883daf330df (diff)
parente231c6879cfd44e4fffd384bb6dd7d313249a523 (diff)
downloadlinux-efd52b5d363e3e3b6224ad39949219c0df117c91.tar.bz2
Merge tag 'nfs-for-4.16-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs
Pull NFS client updates from Trond Myklebust: "Highlights include: Stable bugfixes: - Fix breakages in the nfsstat utility due to the inclusion of the NFSv4 LOOKUPP operation - Fix a NULL pointer dereference in nfs_idmap_prepare_pipe_upcall() due to nfs_idmap_legacy_upcall() being called without an 'aux' parameter - Fix a refcount leak in the standard O_DIRECT error path - Fix a refcount leak in the pNFS O_DIRECT fallback to MDS path - Fix CPU latency issues with nfs_commit_release_pages() - Fix the LAYOUTUNAVAILABLE error case in the file layout type - NFS: Fix a race between mmap() and O_DIRECT Features: - Support the statx() mask and query flags to enable optimisations when the user is requesting only attributes that are already up to date in the inode cache, or is specifying the AT_STATX_DONT_SYNC flag - Add a module alias for the SCSI pNFS layout type Bugfixes: - Automounting when resolving a NFSv4 referral should preserve the RDMA transport protocol settings - Various other RDMA bugfixes from Chuck - pNFS block layout fixes - Always set NFS_LOCK_LOST when a lock is lost" * tag 'nfs-for-4.16-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (69 commits) NFS: Fix a race between mmap() and O_DIRECT NFS: Remove a redundant call to unmap_mapping_range() pnfs/blocklayout: Ensure disk address in block device map pnfs/blocklayout: pnfs_block_dev_map uses bytes, not sectors lockd: Fix server refcounting SUNRPC: Fix null rpc_clnt dereference in rpc_task_queued tracepoint SUNRPC: Micro-optimize __rpc_execute SUNRPC: task_run_action should display tk_callback sunrpc: Format RPC events consistently for display SUNRPC: Trace xprt_timer events xprtrdma: Correct some documenting comments xprtrdma: Fix "bytes registered" accounting xprtrdma: Instrument allocation/release of rpcrdma_req/rep objects xprtrdma: Add trace points to instrument QP and CQ access upcalls xprtrdma: Add trace points in the client-side backchannel code paths xprtrdma: Add trace points for connect events xprtrdma: Add trace points to instrument MR allocation and recovery xprtrdma: Add trace points to instrument memory invalidation xprtrdma: Add trace points in reply decoder path xprtrdma: Add trace points to instrument memory registration ..
Diffstat (limited to 'net/sunrpc/xprtrdma/rpc_rdma.c')
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c162
1 files changed, 59 insertions, 103 deletions
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index a3f2ab283aeb..162e5dd82466 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -292,15 +292,15 @@ encode_item_not_present(struct xdr_stream *xdr)
}
static void
-xdr_encode_rdma_segment(__be32 *iptr, struct rpcrdma_mw *mw)
+xdr_encode_rdma_segment(__be32 *iptr, struct rpcrdma_mr *mr)
{
- *iptr++ = cpu_to_be32(mw->mw_handle);
- *iptr++ = cpu_to_be32(mw->mw_length);
- xdr_encode_hyper(iptr, mw->mw_offset);
+ *iptr++ = cpu_to_be32(mr->mr_handle);
+ *iptr++ = cpu_to_be32(mr->mr_length);
+ xdr_encode_hyper(iptr, mr->mr_offset);
}
static int
-encode_rdma_segment(struct xdr_stream *xdr, struct rpcrdma_mw *mw)
+encode_rdma_segment(struct xdr_stream *xdr, struct rpcrdma_mr *mr)
{
__be32 *p;
@@ -308,12 +308,12 @@ encode_rdma_segment(struct xdr_stream *xdr, struct rpcrdma_mw *mw)
if (unlikely(!p))
return -EMSGSIZE;
- xdr_encode_rdma_segment(p, mw);
+ xdr_encode_rdma_segment(p, mr);
return 0;
}
static int
-encode_read_segment(struct xdr_stream *xdr, struct rpcrdma_mw *mw,
+encode_read_segment(struct xdr_stream *xdr, struct rpcrdma_mr *mr,
u32 position)
{
__be32 *p;
@@ -324,7 +324,7 @@ encode_read_segment(struct xdr_stream *xdr, struct rpcrdma_mw *mw,
*p++ = xdr_one; /* Item present */
*p++ = cpu_to_be32(position);
- xdr_encode_rdma_segment(p, mw);
+ xdr_encode_rdma_segment(p, mr);
return 0;
}
@@ -348,7 +348,7 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
{
struct xdr_stream *xdr = &req->rl_stream;
struct rpcrdma_mr_seg *seg;
- struct rpcrdma_mw *mw;
+ struct rpcrdma_mr *mr;
unsigned int pos;
int nsegs;
@@ -363,21 +363,17 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
do {
seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
- false, &mw);
+ false, &mr);
if (IS_ERR(seg))
return PTR_ERR(seg);
- rpcrdma_push_mw(mw, &req->rl_registered);
+ rpcrdma_mr_push(mr, &req->rl_registered);
- if (encode_read_segment(xdr, mw, pos) < 0)
+ if (encode_read_segment(xdr, mr, pos) < 0)
return -EMSGSIZE;
- dprintk("RPC: %5u %s: pos %u %u@0x%016llx:0x%08x (%s)\n",
- rqst->rq_task->tk_pid, __func__, pos,
- mw->mw_length, (unsigned long long)mw->mw_offset,
- mw->mw_handle, mw->mw_nents < nsegs ? "more" : "last");
-
+ trace_xprtrdma_read_chunk(rqst->rq_task, pos, mr, nsegs);
r_xprt->rx_stats.read_chunk_count++;
- nsegs -= mw->mw_nents;
+ nsegs -= mr->mr_nents;
} while (nsegs);
return 0;
@@ -404,7 +400,7 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
{
struct xdr_stream *xdr = &req->rl_stream;
struct rpcrdma_mr_seg *seg;
- struct rpcrdma_mw *mw;
+ struct rpcrdma_mr *mr;
int nsegs, nchunks;
__be32 *segcount;
@@ -425,23 +421,19 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
nchunks = 0;
do {
seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
- true, &mw);
+ true, &mr);
if (IS_ERR(seg))
return PTR_ERR(seg);
- rpcrdma_push_mw(mw, &req->rl_registered);
+ rpcrdma_mr_push(mr, &req->rl_registered);
- if (encode_rdma_segment(xdr, mw) < 0)
+ if (encode_rdma_segment(xdr, mr) < 0)
return -EMSGSIZE;
- dprintk("RPC: %5u %s: %u@0x016%llx:0x%08x (%s)\n",
- rqst->rq_task->tk_pid, __func__,
- mw->mw_length, (unsigned long long)mw->mw_offset,
- mw->mw_handle, mw->mw_nents < nsegs ? "more" : "last");
-
+ trace_xprtrdma_write_chunk(rqst->rq_task, mr, nsegs);
r_xprt->rx_stats.write_chunk_count++;
- r_xprt->rx_stats.total_rdma_request += seg->mr_len;
+ r_xprt->rx_stats.total_rdma_request += mr->mr_length;
nchunks++;
- nsegs -= mw->mw_nents;
+ nsegs -= mr->mr_nents;
} while (nsegs);
/* Update count of segments in this Write chunk */
@@ -468,7 +460,7 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
{
struct xdr_stream *xdr = &req->rl_stream;
struct rpcrdma_mr_seg *seg;
- struct rpcrdma_mw *mw;
+ struct rpcrdma_mr *mr;
int nsegs, nchunks;
__be32 *segcount;
@@ -487,23 +479,19 @@ rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req,
nchunks = 0;
do {
seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs,
- true, &mw);
+ true, &mr);
if (IS_ERR(seg))
return PTR_ERR(seg);
- rpcrdma_push_mw(mw, &req->rl_registered);
+ rpcrdma_mr_push(mr, &req->rl_registered);
- if (encode_rdma_segment(xdr, mw) < 0)
+ if (encode_rdma_segment(xdr, mr) < 0)
return -EMSGSIZE;
- dprintk("RPC: %5u %s: %u@0x%016llx:0x%08x (%s)\n",
- rqst->rq_task->tk_pid, __func__,
- mw->mw_length, (unsigned long long)mw->mw_offset,
- mw->mw_handle, mw->mw_nents < nsegs ? "more" : "last");
-
+ trace_xprtrdma_reply_chunk(rqst->rq_task, mr, nsegs);
r_xprt->rx_stats.reply_chunk_count++;
- r_xprt->rx_stats.total_rdma_request += seg->mr_len;
+ r_xprt->rx_stats.total_rdma_request += mr->mr_length;
nchunks++;
- nsegs -= mw->mw_nents;
+ nsegs -= mr->mr_nents;
} while (nsegs);
/* Update count of segments in the Reply chunk */
@@ -524,9 +512,6 @@ rpcrdma_unmap_sendctx(struct rpcrdma_sendctx *sc)
struct ib_sge *sge;
unsigned int count;
- dprintk("RPC: %s: unmapping %u sges for sc=%p\n",
- __func__, sc->sc_unmap_count, sc);
-
/* The first two SGEs contain the transport header and
* the inline buffer. These are always left mapped so
* they can be cheaply re-used.
@@ -754,11 +739,6 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
__be32 *p;
int ret;
-#if defined(CONFIG_SUNRPC_BACKCHANNEL)
- if (test_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state))
- return rpcrdma_bc_marshal_reply(rqst);
-#endif
-
rpcrdma_set_xdrlen(&req->rl_hdrbuf, 0);
xdr_init_encode(xdr, &req->rl_hdrbuf,
req->rl_rdmabuf->rg_base);
@@ -821,6 +801,17 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
rtype = rpcrdma_areadch;
}
+ /* If this is a retransmit, discard previously registered
+ * chunks. Very likely the connection has been replaced,
+ * so these registrations are invalid and unusable.
+ */
+ while (unlikely(!list_empty(&req->rl_registered))) {
+ struct rpcrdma_mr *mr;
+
+ mr = rpcrdma_mr_pop(&req->rl_registered);
+ rpcrdma_mr_defer_recovery(mr);
+ }
+
/* This implementation supports the following combinations
* of chunk lists in one RPC-over-RDMA Call message:
*
@@ -868,10 +859,7 @@ rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst)
if (ret)
goto out_err;
- dprintk("RPC: %5u %s: %s/%s: hdrlen %u rpclen\n",
- rqst->rq_task->tk_pid, __func__,
- transfertypes[rtype], transfertypes[wtype],
- xdr_stream_pos(xdr));
+ trace_xprtrdma_marshal(rqst, xdr_stream_pos(xdr), rtype, wtype);
ret = rpcrdma_prepare_send_sges(r_xprt, req, xdr_stream_pos(xdr),
&rqst->rq_snd_buf, rtype);
@@ -926,8 +914,7 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
curlen = rqst->rq_rcv_buf.head[0].iov_len;
if (curlen > copy_len)
curlen = copy_len;
- dprintk("RPC: %s: srcp 0x%p len %d hdrlen %d\n",
- __func__, srcp, copy_len, curlen);
+ trace_xprtrdma_fixup(rqst, copy_len, curlen);
srcp += curlen;
copy_len -= curlen;
@@ -947,9 +934,8 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
if (curlen > pagelist_len)
curlen = pagelist_len;
- dprintk("RPC: %s: page %d"
- " srcp 0x%p len %d curlen %d\n",
- __func__, i, srcp, copy_len, curlen);
+ trace_xprtrdma_fixup_pg(rqst, i, srcp,
+ copy_len, curlen);
destp = kmap_atomic(ppages[i]);
memcpy(destp + page_base, srcp, curlen);
flush_dcache_page(ppages[i]);
@@ -984,24 +970,6 @@ rpcrdma_inline_fixup(struct rpc_rqst *rqst, char *srcp, int copy_len, int pad)
return fixup_copy_count;
}
-/* Caller must guarantee @rep remains stable during this call.
- */
-static void
-rpcrdma_mark_remote_invalidation(struct list_head *mws,
- struct rpcrdma_rep *rep)
-{
- struct rpcrdma_mw *mw;
-
- if (!(rep->rr_wc_flags & IB_WC_WITH_INVALIDATE))
- return;
-
- list_for_each_entry(mw, mws, mw_list)
- if (mw->mw_handle == rep->rr_inv_rkey) {
- mw->mw_flags = RPCRDMA_MW_F_RI;
- break; /* only one invalidated MR per RPC */
- }
-}
-
/* By convention, backchannel calls arrive via rdma_msg type
* messages, and never populate the chunk lists. This makes
* the RPC/RDMA header small and fixed in size, so it is
@@ -1058,26 +1026,19 @@ out_short:
static int decode_rdma_segment(struct xdr_stream *xdr, u32 *length)
{
+ u32 handle;
+ u64 offset;
__be32 *p;
p = xdr_inline_decode(xdr, 4 * sizeof(*p));
if (unlikely(!p))
return -EIO;
- ifdebug(FACILITY) {
- u64 offset;
- u32 handle;
-
- handle = be32_to_cpup(p++);
- *length = be32_to_cpup(p++);
- xdr_decode_hyper(p, &offset);
- dprintk("RPC: %s: segment %u@0x%016llx:0x%08x\n",
- __func__, *length, (unsigned long long)offset,
- handle);
- } else {
- *length = be32_to_cpup(p + 1);
- }
+ handle = be32_to_cpup(p++);
+ *length = be32_to_cpup(p++);
+ xdr_decode_hyper(p, &offset);
+ trace_xprtrdma_decode_seg(handle, *length, offset);
return 0;
}
@@ -1098,8 +1059,6 @@ static int decode_write_chunk(struct xdr_stream *xdr, u32 *length)
*length += seglength;
}
- dprintk("RPC: %s: segcount=%u, %u bytes\n",
- __func__, be32_to_cpup(p), *length);
return 0;
}
@@ -1296,8 +1255,7 @@ out:
* being marshaled.
*/
out_badheader:
- dprintk("RPC: %5u %s: invalid rpcrdma reply (type %u)\n",
- rqst->rq_task->tk_pid, __func__, be32_to_cpu(rep->rr_proc));
+ trace_xprtrdma_reply_hdr(rep);
r_xprt->rx_stats.bad_reply_count++;
status = -EIO;
goto out;
@@ -1339,9 +1297,12 @@ void rpcrdma_deferred_completion(struct work_struct *work)
struct rpcrdma_rep *rep =
container_of(work, struct rpcrdma_rep, rr_work);
struct rpcrdma_req *req = rpcr_to_rdmar(rep->rr_rqst);
+ struct rpcrdma_xprt *r_xprt = rep->rr_rxprt;
- rpcrdma_mark_remote_invalidation(&req->rl_registered, rep);
- rpcrdma_release_rqst(rep->rr_rxprt, req);
+ trace_xprtrdma_defer_cmp(rep);
+ if (rep->rr_wc_flags & IB_WC_WITH_INVALIDATE)
+ r_xprt->rx_ia.ri_ops->ro_reminv(rep, &req->rl_registered);
+ rpcrdma_release_rqst(r_xprt, req);
rpcrdma_complete_rqst(rep);
}
@@ -1360,8 +1321,6 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
u32 credits;
__be32 *p;
- dprintk("RPC: %s: incoming rep %p\n", __func__, rep);
-
if (rep->rr_hdrbuf.head[0].iov_len == 0)
goto out_badstatus;
@@ -1405,8 +1364,7 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep)
rep->rr_rqst = rqst;
clear_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags);
- dprintk("RPC: %s: reply %p completes request %p (xid 0x%08x)\n",
- __func__, rep, req, be32_to_cpu(rep->rr_xid));
+ trace_xprtrdma_reply(rqst->rq_task, rep, req, credits);
queue_work_on(req->rl_cpu, rpcrdma_receive_wq, &rep->rr_work);
return;
@@ -1420,8 +1378,7 @@ out_badstatus:
return;
out_badversion:
- dprintk("RPC: %s: invalid version %d\n",
- __func__, be32_to_cpu(rep->rr_vers));
+ trace_xprtrdma_reply_vers(rep);
goto repost;
/* The RPC transaction has already been terminated, or the header
@@ -1429,12 +1386,11 @@ out_badversion:
*/
out_norqst:
spin_unlock(&xprt->recv_lock);
- dprintk("RPC: %s: no match for incoming xid 0x%08x\n",
- __func__, be32_to_cpu(rep->rr_xid));
+ trace_xprtrdma_reply_rqst(rep);
goto repost;
out_shortreply:
- dprintk("RPC: %s: short/invalid reply\n", __func__);
+ trace_xprtrdma_reply_short(rep);
/* If no pending RPC transaction was matched, post a replacement
* receive buffer before returning.