From 62a89501a3bde310ba339cc90bb05879528d84a0 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Wed, 12 Feb 2020 11:12:35 -0500 Subject: xprtrdma: Enhance MR-related trace points Two changes: - Show the number of SG entries that were mapped. This helps debug DMA-related problems. - Record the MR's resource ID instead of its memory address. This groups each MR with its associated rdma-tool output, and reduces needless exposure of memory addresses. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/frwr_ops.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 125297c9aa3e..0dc799553a08 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -419,7 +419,7 @@ void frwr_reminv(struct rpcrdma_rep *rep, struct list_head *mrs) list_for_each_entry(mr, mrs, mr_list) if (mr->mr_handle == rep->rr_inv_rkey) { list_del_init(&mr->mr_list); - trace_xprtrdma_mr_remoteinv(mr); + trace_xprtrdma_mr_reminv(mr); rpcrdma_mr_put(mr); break; /* only one invalidated MR per RPC */ } -- cgit v1.2.3 From 85cd8e2b78eea7374927750ffec60bf047f8f90b Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 21 Feb 2020 17:00:12 -0500 Subject: xprtrdma: Invoke rpcrdma_ep_create() in the connect worker Refactor rpcrdma_ep_create(), rpcrdma_ep_disconnect(), and rpcrdma_ep_destroy(). rpcrdma_ep_create will be invoked at connect time instead of at transport set-up time. It will be responsible for allocating per- connection resources. In this patch it allocates the CQs and creates a QP. More to come. rpcrdma_ep_destroy() is the inverse functionality that is invoked at disconnect time. It will be responsible for releasing the CQs and QP. These changes should be safe to do because both connect and disconnect is guaranteed to be serialized by the transport send lock. This takes us another step closer to resolving the address and route only at connect time so that connection failover to another device will work correctly. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/transport.c | 10 +-- net/sunrpc/xprtrdma/verbs.c | 147 +++++++++++++++------------------------- net/sunrpc/xprtrdma/xprt_rdma.h | 2 - 3 files changed, 56 insertions(+), 103 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 3cfeba68ee9a..d915524a8e68 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -284,7 +284,7 @@ xprt_rdma_destroy(struct rpc_xprt *xprt) cancel_delayed_work_sync(&r_xprt->rx_connect_worker); - rpcrdma_ep_destroy(r_xprt); + rpcrdma_ep_disconnect(&r_xprt->rx_ep, &r_xprt->rx_ia); rpcrdma_buffer_destroy(&r_xprt->rx_buf); rpcrdma_ia_close(&r_xprt->rx_ia); @@ -351,13 +351,9 @@ xprt_setup_rdma(struct xprt_create *args) if (rc) goto out1; - rc = rpcrdma_ep_create(new_xprt); - if (rc) - goto out2; - rc = rpcrdma_buffer_create(new_xprt); if (rc) - goto out3; + goto out2; if (!try_module_get(THIS_MODULE)) goto out4; @@ -375,8 +371,6 @@ xprt_setup_rdma(struct xprt_create *args) out4: rpcrdma_buffer_destroy(&new_xprt->rx_buf); rc = -ENODEV; -out3: - rpcrdma_ep_destroy(new_xprt); out2: rpcrdma_ia_close(&new_xprt->rx_ia); out1: diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 353f61ac8d51..042e6cc4f767 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -84,6 +84,7 @@ static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep); static void rpcrdma_reps_unmap(struct rpcrdma_xprt *r_xprt); static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt); static void rpcrdma_mrs_destroy(struct rpcrdma_xprt *r_xprt); +static void rpcrdma_ep_destroy(struct rpcrdma_xprt *r_xprt); static struct rpcrdma_regbuf * rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction, gfp_t flags); @@ -391,32 +392,17 @@ rpcrdma_ia_remove(struct rpcrdma_ia *ia) { struct rpcrdma_xprt *r_xprt = container_of(ia, struct rpcrdma_xprt, rx_ia); - struct rpcrdma_ep *ep = &r_xprt->rx_ep; - /* This is similar to rpcrdma_ep_destroy, but: - * - Don't cancel the connect worker. - * - Don't call rpcrdma_ep_disconnect, which waits - * for another conn upcall, which will deadlock. - * - rdma_disconnect is unneeded, the underlying - * connection is already gone. - */ - if (ia->ri_id->qp) { + if (ia->ri_id->qp) rpcrdma_xprt_drain(r_xprt); - rdma_destroy_qp(ia->ri_id); - ia->ri_id->qp = NULL; - } - ib_free_cq(ep->rep_attr.recv_cq); - ep->rep_attr.recv_cq = NULL; - ib_free_cq(ep->rep_attr.send_cq); - ep->rep_attr.send_cq = NULL; - /* The ULP is responsible for ensuring all DMA - * mappings and MRs are gone. - */ rpcrdma_reps_unmap(r_xprt); rpcrdma_reqs_reset(r_xprt); rpcrdma_mrs_destroy(r_xprt); rpcrdma_sendctxs_destroy(r_xprt); + + rpcrdma_ep_destroy(r_xprt); + ib_dealloc_pd(ia->ri_pd); ia->ri_pd = NULL; @@ -434,11 +420,8 @@ rpcrdma_ia_remove(struct rpcrdma_ia *ia) void rpcrdma_ia_close(struct rpcrdma_ia *ia) { - if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) { - if (ia->ri_id->qp) - rdma_destroy_qp(ia->ri_id); + if (ia->ri_id && !IS_ERR(ia->ri_id)) rdma_destroy_id(ia->ri_id); - } ia->ri_id = NULL; /* If the pd is still busy, xprtrdma missed freeing a resource */ @@ -447,25 +430,19 @@ rpcrdma_ia_close(struct rpcrdma_ia *ia) ia->ri_pd = NULL; } -/** - * rpcrdma_ep_create - Create unconnected endpoint - * @r_xprt: transport to instantiate - * - * Returns zero on success, or a negative errno. - */ -int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt) +static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt, + struct rdma_cm_id *id) { struct rpcrdma_ep *ep = &r_xprt->rx_ep; struct rpcrdma_ia *ia = &r_xprt->rx_ia; struct rpcrdma_connect_private *pmsg = &ep->rep_cm_private; - struct ib_cq *sendcq, *recvcq; int rc; ep->rep_max_requests = r_xprt->rx_xprt.max_reqs; ep->rep_inline_send = xprt_rdma_max_inline_write; ep->rep_inline_recv = xprt_rdma_max_inline_read; - rc = frwr_query_device(r_xprt, ia->ri_id->device); + rc = frwr_query_device(r_xprt, id->device); if (rc) return rc; r_xprt->rx_buf.rb_max_requests = cpu_to_be32(ep->rep_max_requests); @@ -491,25 +468,22 @@ int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt) init_waitqueue_head(&ep->rep_connect_wait); ep->rep_receive_count = 0; - sendcq = ib_alloc_cq_any(ia->ri_id->device, r_xprt, - ep->rep_attr.cap.max_send_wr + 1, - IB_POLL_WORKQUEUE); - if (IS_ERR(sendcq)) { - rc = PTR_ERR(sendcq); - goto out1; + ep->rep_attr.send_cq = ib_alloc_cq_any(id->device, r_xprt, + ep->rep_attr.cap.max_send_wr, + IB_POLL_WORKQUEUE); + if (IS_ERR(ep->rep_attr.send_cq)) { + rc = PTR_ERR(ep->rep_attr.send_cq); + goto out_destroy; } - recvcq = ib_alloc_cq_any(ia->ri_id->device, NULL, - ep->rep_attr.cap.max_recv_wr + 1, - IB_POLL_WORKQUEUE); - if (IS_ERR(recvcq)) { - rc = PTR_ERR(recvcq); - goto out2; + ep->rep_attr.recv_cq = ib_alloc_cq_any(id->device, NULL, + ep->rep_attr.cap.max_recv_wr, + IB_POLL_WORKQUEUE); + if (IS_ERR(ep->rep_attr.recv_cq)) { + rc = PTR_ERR(ep->rep_attr.recv_cq); + goto out_destroy; } - ep->rep_attr.send_cq = sendcq; - ep->rep_attr.recv_cq = recvcq; - /* Initialize cma parameters */ memset(&ep->rep_remote_cma, 0, sizeof(ep->rep_remote_cma)); @@ -525,7 +499,7 @@ int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt) /* Client offers RDMA Read but does not initiate */ ep->rep_remote_cma.initiator_depth = 0; ep->rep_remote_cma.responder_resources = - min_t(int, U8_MAX, ia->ri_id->device->attrs.max_qp_rd_atom); + min_t(int, U8_MAX, id->device->attrs.max_qp_rd_atom); /* Limit transport retries so client can detect server * GID changes quickly. RPC layer handles re-establishing @@ -540,45 +514,41 @@ int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt) ep->rep_remote_cma.flow_control = 0; ep->rep_remote_cma.rnr_retry_count = 0; + rc = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr); + if (rc) + goto out_destroy; return 0; -out2: - ib_free_cq(sendcq); -out1: +out_destroy: + rpcrdma_ep_destroy(r_xprt); return rc; } -/** - * rpcrdma_ep_destroy - Disconnect and destroy endpoint. - * @r_xprt: transport instance to shut down - * - */ -void rpcrdma_ep_destroy(struct rpcrdma_xprt *r_xprt) +static void rpcrdma_ep_destroy(struct rpcrdma_xprt *r_xprt) { struct rpcrdma_ep *ep = &r_xprt->rx_ep; struct rpcrdma_ia *ia = &r_xprt->rx_ia; if (ia->ri_id && ia->ri_id->qp) { - rpcrdma_ep_disconnect(ep, ia); rdma_destroy_qp(ia->ri_id); ia->ri_id->qp = NULL; } if (ep->rep_attr.recv_cq) ib_free_cq(ep->rep_attr.recv_cq); + ep->rep_attr.recv_cq = NULL; if (ep->rep_attr.send_cq) ib_free_cq(ep->rep_attr.send_cq); + ep->rep_attr.send_cq = NULL; } /* Re-establish a connection after a device removal event. * Unlike a normal reconnection, a fresh PD and a new set * of MRs and buffers is needed. */ -static int rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt, - struct ib_qp_init_attr *qp_init_attr) +static int rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt) { struct rpcrdma_ia *ia = &r_xprt->rx_ia; - struct rpcrdma_ep *ep = &r_xprt->rx_ep; int rc, err; trace_xprtrdma_reinsert(r_xprt); @@ -587,39 +557,24 @@ static int rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt, if (rpcrdma_ia_open(r_xprt)) goto out1; - rc = -ENOMEM; - err = rpcrdma_ep_create(r_xprt); - if (err) { - pr_err("rpcrdma: rpcrdma_ep_create returned %d\n", err); - goto out2; - } - memcpy(qp_init_attr, &ep->rep_attr, sizeof(*qp_init_attr)); - rc = -ENETUNREACH; - err = rdma_create_qp(ia->ri_id, ia->ri_pd, qp_init_attr); - if (err) { - pr_err("rpcrdma: rdma_create_qp returned %d\n", err); - goto out3; - } + err = rpcrdma_ep_create(r_xprt, ia->ri_id); + if (err) + goto out2; return 0; -out3: - rpcrdma_ep_destroy(r_xprt); out2: rpcrdma_ia_close(ia); out1: return rc; } -static int rpcrdma_ep_reconnect(struct rpcrdma_xprt *r_xprt, - struct ib_qp_init_attr *qp_init_attr) +static int rpcrdma_ep_reconnect(struct rpcrdma_xprt *r_xprt) { struct rpcrdma_ia *ia = &r_xprt->rx_ia; struct rdma_cm_id *id, *old; int err, rc; - rpcrdma_ep_disconnect(&r_xprt->rx_ep, ia); - rc = -EHOSTUNREACH; id = rpcrdma_create_id(r_xprt, ia); if (IS_ERR(id)) @@ -640,15 +595,14 @@ static int rpcrdma_ep_reconnect(struct rpcrdma_xprt *r_xprt, goto out_destroy; } - err = rdma_create_qp(id, ia->ri_pd, qp_init_attr); + err = rpcrdma_ep_create(r_xprt, id); if (err) goto out_destroy; - /* Atomically replace the transport's ID and QP. */ + /* Atomically replace the transport's ID. */ rc = 0; old = ia->ri_id; ia->ri_id = id; - rdma_destroy_qp(old); out_destroy: rdma_destroy_id(old); @@ -665,26 +619,25 @@ rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) struct rpcrdma_xprt *r_xprt = container_of(ia, struct rpcrdma_xprt, rx_ia); struct rpc_xprt *xprt = &r_xprt->rx_xprt; - struct ib_qp_init_attr qp_init_attr; int rc; retry: - memcpy(&qp_init_attr, &ep->rep_attr, sizeof(qp_init_attr)); switch (ep->rep_connected) { case 0: - rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &qp_init_attr); - if (rc) { - rc = -ENETUNREACH; + rc = -ENETUNREACH; + if (rpcrdma_ep_create(r_xprt, ia->ri_id)) goto out_noupdate; - } break; case -ENODEV: - rc = rpcrdma_ep_recreate_xprt(r_xprt, &qp_init_attr); + rc = rpcrdma_ep_recreate_xprt(r_xprt); if (rc) goto out_noupdate; break; + case 1: + rpcrdma_ep_disconnect(ep, ia); + /* fall through */ default: - rc = rpcrdma_ep_reconnect(r_xprt, &qp_init_attr); + rc = rpcrdma_ep_reconnect(r_xprt); if (rc) goto out; } @@ -742,10 +695,14 @@ rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) { struct rpcrdma_xprt *r_xprt = container_of(ep, struct rpcrdma_xprt, rx_ep); + struct rdma_cm_id *id = ia->ri_id; int rc; + if (!id) + goto out; + /* returns without wait if ID is not connected */ - rc = rdma_disconnect(ia->ri_id); + rc = rdma_disconnect(id); if (!rc) wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 1); @@ -753,10 +710,14 @@ rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) ep->rep_connected = rc; trace_xprtrdma_disconnect(r_xprt, rc); - rpcrdma_xprt_drain(r_xprt); + if (id->qp) + rpcrdma_xprt_drain(r_xprt); +out: rpcrdma_reqs_reset(r_xprt); rpcrdma_mrs_destroy(r_xprt); rpcrdma_sendctxs_destroy(r_xprt); + + rpcrdma_ep_destroy(r_xprt); } /* Fixed-size circular FIFO queue. This implementation is wait-free and diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 37d5080c250b..9a536319557e 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -464,8 +464,6 @@ void rpcrdma_ia_close(struct rpcrdma_ia *); /* * Endpoint calls - xprtrdma/verbs.c */ -int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt); -void rpcrdma_ep_destroy(struct rpcrdma_xprt *r_xprt); int rpcrdma_ep_connect(struct rpcrdma_ep *, struct rpcrdma_ia *); void rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *); -- cgit v1.2.3 From 253a51622fb03425b611e709e34f1ea70949a61f Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 21 Feb 2020 17:00:17 -0500 Subject: xprtrdma: Refactor frwr_init_mr() Clean up: prepare for combining the rpcrdma_ia and rpcrdma_ep structures. Take the opportunity to rename the function to be consistent with the "subsystem _ object _ verb" naming scheme. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/frwr_ops.c | 10 ++++++---- net/sunrpc/xprtrdma/verbs.c | 4 +--- net/sunrpc/xprtrdma/xprt_rdma.h | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 0dc799553a08..2c96b624a77d 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -52,7 +52,7 @@ /** * frwr_release_mr - Destroy one MR - * @mr: MR allocated by frwr_init_mr + * @mr: MR allocated by frwr_mr_init * */ void frwr_release_mr(struct rpcrdma_mr *mr) @@ -106,15 +106,16 @@ void frwr_reset(struct rpcrdma_req *req) } /** - * frwr_init_mr - Initialize one MR - * @ia: interface adapter + * frwr_mr_init - Initialize one MR + * @r_xprt: controlling transport instance * @mr: generic MR to prepare for FRWR * * Returns zero if successful. Otherwise a negative errno * is returned. */ -int frwr_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr) +int frwr_mr_init(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr) { + struct rpcrdma_ia *ia = &r_xprt->rx_ia; unsigned int depth = ia->ri_max_frwr_depth; struct scatterlist *sg; struct ib_mr *frmr; @@ -128,6 +129,7 @@ int frwr_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr) if (!sg) goto out_list_err; + mr->mr_xprt = r_xprt; mr->frwr.fr_mr = frmr; mr->mr_dir = DMA_NONE; INIT_LIST_HEAD(&mr->mr_list); diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 042e6cc4f767..02ce3d548825 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -896,14 +896,12 @@ rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt) if (!mr) break; - rc = frwr_init_mr(ia, mr); + rc = frwr_mr_init(r_xprt, mr); if (rc) { kfree(mr); break; } - mr->mr_xprt = r_xprt; - spin_lock(&buf->rb_lock); rpcrdma_mr_push(mr, &buf->rb_mrs); list_add(&mr->mr_all, &buf->rb_all_mrs); diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 9a536319557e..9e3e9a82cb9f 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -536,7 +536,7 @@ rpcrdma_data_dir(bool writing) void frwr_reset(struct rpcrdma_req *req); int frwr_query_device(struct rpcrdma_xprt *r_xprt, const struct ib_device *device); -int frwr_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr); +int frwr_mr_init(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr); void frwr_release_mr(struct rpcrdma_mr *mr); struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, -- cgit v1.2.3 From 97d0de8812a10a66510ff95f8fe6e8d3053fd2ca Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 21 Feb 2020 17:00:23 -0500 Subject: xprtrdma: Clean up the post_send path Clean up: Simplify the synopses of functions in the post_send path by combining the struct rpcrdma_ia and struct rpcrdma_ep arguments. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/backchannel.c | 2 +- net/sunrpc/xprtrdma/frwr_ops.c | 14 +++++++++----- net/sunrpc/xprtrdma/transport.c | 2 +- net/sunrpc/xprtrdma/verbs.c | 13 +++++-------- net/sunrpc/xprtrdma/xprt_rdma.h | 5 ++--- 5 files changed, 18 insertions(+), 18 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c index 1a0ae0c61353..4b43910a6ed2 100644 --- a/net/sunrpc/xprtrdma/backchannel.c +++ b/net/sunrpc/xprtrdma/backchannel.c @@ -115,7 +115,7 @@ int xprt_rdma_bc_send_reply(struct rpc_rqst *rqst) if (rc < 0) goto failed_marshal; - if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) + if (rpcrdma_post_sends(r_xprt, req)) goto drop_connection; return 0; diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 2c96b624a77d..a1b5c8024cca 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -374,18 +374,22 @@ static void frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc) } /** - * frwr_send - post Send WR containing the RPC Call message - * @ia: interface adapter - * @req: Prepared RPC Call + * frwr_send - post Send WRs containing the RPC Call message + * @r_xprt: controlling transport instance + * @req: prepared RPC Call * * For FRWR, chain any FastReg WRs to the Send WR. Only a * single ib_post_send call is needed to register memory * and then post the Send WR. * - * Returns the result of ib_post_send. + * Returns the return code from ib_post_send. + * + * Caller must hold the transport send lock to ensure that the + * pointers to the transport's rdma_cm_id and QP are stable. */ -int frwr_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req) +int frwr_send(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) { + struct rpcrdma_ia *ia = &r_xprt->rx_ia; struct ib_send_wr *post_wr; struct rpcrdma_mr *mr; diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index d915524a8e68..8934c24a5701 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -688,7 +688,7 @@ xprt_rdma_send_request(struct rpc_rqst *rqst) goto drop_connection; rqst->rq_xtime = ktime_get(); - if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) + if (rpcrdma_post_sends(r_xprt, req)) goto drop_connection; rqst->rq_xmit_bytes_sent += rqst->rq_snd_buf.len; diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 02ce3d548825..8fd6682d2646 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1461,20 +1461,17 @@ static void rpcrdma_regbuf_free(struct rpcrdma_regbuf *rb) } /** - * rpcrdma_ep_post - Post WRs to a transport's Send Queue - * @ia: transport's device information - * @ep: transport's RDMA endpoint information + * rpcrdma_post_sends - Post WRs to a transport's Send Queue + * @r_xprt: controlling transport instance * @req: rpcrdma_req containing the Send WR to post * * Returns 0 if the post was successful, otherwise -ENOTCONN * is returned. */ -int -rpcrdma_ep_post(struct rpcrdma_ia *ia, - struct rpcrdma_ep *ep, - struct rpcrdma_req *req) +int rpcrdma_post_sends(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) { struct ib_send_wr *send_wr = &req->rl_wr; + struct rpcrdma_ep *ep = &r_xprt->rx_ep; int rc; if (!ep->rep_send_count || kref_read(&req->rl_kref) > 1) { @@ -1485,7 +1482,7 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia, --ep->rep_send_count; } - rc = frwr_send(ia, req); + rc = frwr_send(r_xprt, req); trace_xprtrdma_post_send(req, rc); if (rc) return -ENOTCONN; diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 9e3e9a82cb9f..82ec4c25432f 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -467,8 +467,7 @@ void rpcrdma_ia_close(struct rpcrdma_ia *); int rpcrdma_ep_connect(struct rpcrdma_ep *, struct rpcrdma_ia *); void rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *); -int rpcrdma_ep_post(struct rpcrdma_ia *, struct rpcrdma_ep *, - struct rpcrdma_req *); +int rpcrdma_post_sends(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req); void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp); /* @@ -542,7 +541,7 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, int nsegs, bool writing, __be32 xid, struct rpcrdma_mr *mr); -int frwr_send(struct rpcrdma_ia *ia, struct rpcrdma_req *req); +int frwr_send(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req); void frwr_reminv(struct rpcrdma_rep *rep, struct list_head *mrs); void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req); void frwr_unmap_async(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req); -- cgit v1.2.3 From 9144a803df6ca4185238ca343dbb65d8137c036e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 21 Feb 2020 17:00:28 -0500 Subject: xprtrdma: Refactor rpcrdma_ep_connect() and rpcrdma_ep_disconnect() Clean up: Simplify the synopses of functions in the connect and disconnect paths in preparation for combining the rpcrdma_ia and struct rpcrdma_ep structures. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/transport.c | 6 +++--- net/sunrpc/xprtrdma/verbs.c | 30 +++++++++++++++--------------- net/sunrpc/xprtrdma/xprt_rdma.h | 4 ++-- 3 files changed, 20 insertions(+), 20 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 8934c24a5701..6349e6c98b57 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -240,7 +240,7 @@ xprt_rdma_connect_worker(struct work_struct *work) struct rpc_xprt *xprt = &r_xprt->rx_xprt; int rc; - rc = rpcrdma_ep_connect(&r_xprt->rx_ep, &r_xprt->rx_ia); + rc = rpcrdma_xprt_connect(r_xprt); xprt_clear_connecting(xprt); if (r_xprt->rx_ep.rep_connected > 0) { xprt->stat.connect_count++; @@ -284,7 +284,7 @@ xprt_rdma_destroy(struct rpc_xprt *xprt) cancel_delayed_work_sync(&r_xprt->rx_connect_worker); - rpcrdma_ep_disconnect(&r_xprt->rx_ep, &r_xprt->rx_ia); + rpcrdma_xprt_disconnect(r_xprt); rpcrdma_buffer_destroy(&r_xprt->rx_buf); rpcrdma_ia_close(&r_xprt->rx_ia); @@ -409,7 +409,7 @@ void xprt_rdma_close(struct rpc_xprt *xprt) if (ep->rep_connected == -ENODEV) return; - rpcrdma_ep_disconnect(ep, ia); + rpcrdma_xprt_disconnect(r_xprt); out: xprt->reestablish_timeout = 0; diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 8fd6682d2646..f361213a8157 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -610,15 +610,17 @@ out: return rc; } -/* - * Connect unconnected endpoint. +/** + * rpcrdma_xprt_connect - Connect an unconnected transport + * @r_xprt: controlling transport instance + * + * Returns 0 on success or a negative errno. */ -int -rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) +int rpcrdma_xprt_connect(struct rpcrdma_xprt *r_xprt) { - struct rpcrdma_xprt *r_xprt = container_of(ia, struct rpcrdma_xprt, - rx_ia); struct rpc_xprt *xprt = &r_xprt->rx_xprt; + struct rpcrdma_ep *ep = &r_xprt->rx_ep; + struct rpcrdma_ia *ia = &r_xprt->rx_ia; int rc; retry: @@ -634,7 +636,7 @@ retry: goto out_noupdate; break; case 1: - rpcrdma_ep_disconnect(ep, ia); + rpcrdma_xprt_disconnect(r_xprt); /* fall through */ default: rc = rpcrdma_ep_reconnect(r_xprt); @@ -668,7 +670,7 @@ retry: rc = rpcrdma_reqs_setup(r_xprt); if (rc) { - rpcrdma_ep_disconnect(ep, ia); + rpcrdma_xprt_disconnect(r_xprt); goto out; } rpcrdma_mrs_create(r_xprt); @@ -683,18 +685,16 @@ out_noupdate: } /** - * rpcrdma_ep_disconnect - Disconnect underlying transport - * @ep: endpoint to disconnect - * @ia: associated interface adapter + * rpcrdma_xprt_disconnect - Disconnect underlying transport + * @r_xprt: controlling transport instance * * Caller serializes. Either the transport send lock is held, * or we're being called to destroy the transport. */ -void -rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) +void rpcrdma_xprt_disconnect(struct rpcrdma_xprt *r_xprt) { - struct rpcrdma_xprt *r_xprt = container_of(ep, struct rpcrdma_xprt, - rx_ep); + struct rpcrdma_ep *ep = &r_xprt->rx_ep; + struct rpcrdma_ia *ia = &r_xprt->rx_ia; struct rdma_cm_id *id = ia->ri_id; int rc; diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 82ec4c25432f..9ead06b1d8a4 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -464,8 +464,8 @@ void rpcrdma_ia_close(struct rpcrdma_ia *); /* * Endpoint calls - xprtrdma/verbs.c */ -int rpcrdma_ep_connect(struct rpcrdma_ep *, struct rpcrdma_ia *); -void rpcrdma_ep_disconnect(struct rpcrdma_ep *, struct rpcrdma_ia *); +int rpcrdma_xprt_connect(struct rpcrdma_xprt *r_xprt); +void rpcrdma_xprt_disconnect(struct rpcrdma_xprt *r_xprt); int rpcrdma_post_sends(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req); void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp); -- cgit v1.2.3 From 9ba373ee24ea07b60b8c5041cafe88d5796bacef Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 21 Feb 2020 17:00:33 -0500 Subject: xprtrdma: Allocate Protection Domain in rpcrdma_ep_create() Make a Protection Domain (PD) a per-connection resource rather than a per-transport resource. In other words, when the connection terminates, the PD is destroyed. Thus there is one less HW resource that remains allocated to a transport after a connection is closed. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/verbs.c | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index f361213a8157..36fe7baea014 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -363,14 +363,6 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt) rc = PTR_ERR(ia->ri_id); goto out_err; } - - ia->ri_pd = ib_alloc_pd(ia->ri_id->device, 0); - if (IS_ERR(ia->ri_pd)) { - rc = PTR_ERR(ia->ri_pd); - pr_err("rpcrdma: ib_alloc_pd() returned %d\n", rc); - goto out_err; - } - return 0; out_err: @@ -403,9 +395,6 @@ rpcrdma_ia_remove(struct rpcrdma_ia *ia) rpcrdma_ep_destroy(r_xprt); - ib_dealloc_pd(ia->ri_pd); - ia->ri_pd = NULL; - /* Allow waiters to continue */ complete(&ia->ri_remove_done); @@ -423,11 +412,6 @@ rpcrdma_ia_close(struct rpcrdma_ia *ia) if (ia->ri_id && !IS_ERR(ia->ri_id)) rdma_destroy_id(ia->ri_id); ia->ri_id = NULL; - - /* If the pd is still busy, xprtrdma missed freeing a resource */ - if (ia->ri_pd && !IS_ERR(ia->ri_pd)) - ib_dealloc_pd(ia->ri_pd); - ia->ri_pd = NULL; } static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt, @@ -514,6 +498,12 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt, ep->rep_remote_cma.flow_control = 0; ep->rep_remote_cma.rnr_retry_count = 0; + ia->ri_pd = ib_alloc_pd(id->device, 0); + if (IS_ERR(ia->ri_pd)) { + rc = PTR_ERR(ia->ri_pd); + goto out_destroy; + } + rc = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr); if (rc) goto out_destroy; @@ -540,6 +530,10 @@ static void rpcrdma_ep_destroy(struct rpcrdma_xprt *r_xprt) if (ep->rep_attr.send_cq) ib_free_cq(ep->rep_attr.send_cq); ep->rep_attr.send_cq = NULL; + + if (ia->ri_pd) + ib_dealloc_pd(ia->ri_pd); + ia->ri_pd = NULL; } /* Re-establish a connection after a device removal event. -- cgit v1.2.3 From 81fe0c57f4e136375f3bcda74af413f82a34a1bb Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 21 Feb 2020 17:00:38 -0500 Subject: xprtrdma: Invoke rpcrdma_ia_open in the connect worker Move rdma_cm_id creation into rpcrdma_ep_create() so that it is now responsible for allocating all per-connection hardware resources. With this clean-up, all three arms of the switch statement in rpcrdma_ep_connect are exactly the same now, thus the switch can be removed. Because device removal behaves a little differently than disconnection, there is a little more work to be done before rpcrdma_ep_destroy() can release the connection's rdma_cm_id. So it is not quite symmetrical with rpcrdma_ep_create() yet. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/rpcrdma.h | 1 - net/sunrpc/xprtrdma/transport.c | 7 -- net/sunrpc/xprtrdma/verbs.c | 153 ++++++---------------------------------- net/sunrpc/xprtrdma/xprt_rdma.h | 2 - 4 files changed, 20 insertions(+), 143 deletions(-) (limited to 'net/sunrpc') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index 87f4461ab108..ba37c47b51e8 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -415,7 +415,6 @@ DEFINE_CONN_EVENT(disconnect); DEFINE_RXPRT_EVENT(xprtrdma_create); DEFINE_RXPRT_EVENT(xprtrdma_op_destroy); DEFINE_RXPRT_EVENT(xprtrdma_remove); -DEFINE_RXPRT_EVENT(xprtrdma_reinsert); DEFINE_RXPRT_EVENT(xprtrdma_op_inject_dsc); DEFINE_RXPRT_EVENT(xprtrdma_op_close); DEFINE_RXPRT_EVENT(xprtrdma_op_setport); diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 6349e6c98b57..745dfd149637 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -286,7 +286,6 @@ xprt_rdma_destroy(struct rpc_xprt *xprt) rpcrdma_xprt_disconnect(r_xprt); rpcrdma_buffer_destroy(&r_xprt->rx_buf); - rpcrdma_ia_close(&r_xprt->rx_ia); xprt_rdma_free_addresses(xprt); xprt_free(xprt); @@ -347,10 +346,6 @@ xprt_setup_rdma(struct xprt_create *args) xprt_rdma_format_addresses(xprt, sap); new_xprt = rpcx_to_rdmax(xprt); - rc = rpcrdma_ia_open(new_xprt); - if (rc) - goto out1; - rc = rpcrdma_buffer_create(new_xprt); if (rc) goto out2; @@ -372,8 +367,6 @@ out4: rpcrdma_buffer_destroy(&new_xprt->rx_buf); rc = -ENODEV; out2: - rpcrdma_ia_close(&new_xprt->rx_ia); -out1: trace_xprtrdma_op_destroy(new_xprt); xprt_rdma_free_addresses(xprt); xprt_free(xprt); diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 36fe7baea014..3df20f355579 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -345,31 +345,6 @@ out: * Exported functions. */ -/** - * rpcrdma_ia_open - Open and initialize an Interface Adapter. - * @xprt: transport with IA to (re)initialize - * - * Returns 0 on success, negative errno if an appropriate - * Interface Adapter could not be found and opened. - */ -int -rpcrdma_ia_open(struct rpcrdma_xprt *xprt) -{ - struct rpcrdma_ia *ia = &xprt->rx_ia; - int rc; - - ia->ri_id = rpcrdma_create_id(xprt, ia); - if (IS_ERR(ia->ri_id)) { - rc = PTR_ERR(ia->ri_id); - goto out_err; - } - return 0; - -out_err: - rpcrdma_ia_close(ia); - return rc; -} - /** * rpcrdma_ia_remove - Handle device driver unload * @ia: interface adapter being removed @@ -401,34 +376,26 @@ rpcrdma_ia_remove(struct rpcrdma_ia *ia) trace_xprtrdma_remove(r_xprt); } -/** - * rpcrdma_ia_close - Clean up/close an IA. - * @ia: interface adapter to close - * - */ -void -rpcrdma_ia_close(struct rpcrdma_ia *ia) -{ - if (ia->ri_id && !IS_ERR(ia->ri_id)) - rdma_destroy_id(ia->ri_id); - ia->ri_id = NULL; -} - -static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt, - struct rdma_cm_id *id) +static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt) { struct rpcrdma_ep *ep = &r_xprt->rx_ep; struct rpcrdma_ia *ia = &r_xprt->rx_ia; struct rpcrdma_connect_private *pmsg = &ep->rep_cm_private; + struct rdma_cm_id *id; int rc; + id = rpcrdma_create_id(r_xprt, ia); + if (IS_ERR(id)) + return PTR_ERR(id); + ep->rep_max_requests = r_xprt->rx_xprt.max_reqs; ep->rep_inline_send = xprt_rdma_max_inline_write; ep->rep_inline_recv = xprt_rdma_max_inline_read; rc = frwr_query_device(r_xprt, id->device); if (rc) - return rc; + goto out_destroy; + r_xprt->rx_buf.rb_max_requests = cpu_to_be32(ep->rep_max_requests); ep->rep_attr.event_handler = rpcrdma_qp_event_handler; @@ -507,10 +474,12 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt, rc = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr); if (rc) goto out_destroy; + ia->ri_id = id; return 0; out_destroy: rpcrdma_ep_destroy(r_xprt); + rdma_destroy_id(id); return rc; } @@ -536,79 +505,8 @@ static void rpcrdma_ep_destroy(struct rpcrdma_xprt *r_xprt) ia->ri_pd = NULL; } -/* Re-establish a connection after a device removal event. - * Unlike a normal reconnection, a fresh PD and a new set - * of MRs and buffers is needed. - */ -static int rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt) -{ - struct rpcrdma_ia *ia = &r_xprt->rx_ia; - int rc, err; - - trace_xprtrdma_reinsert(r_xprt); - - rc = -EHOSTUNREACH; - if (rpcrdma_ia_open(r_xprt)) - goto out1; - - rc = -ENETUNREACH; - err = rpcrdma_ep_create(r_xprt, ia->ri_id); - if (err) - goto out2; - return 0; - -out2: - rpcrdma_ia_close(ia); -out1: - return rc; -} - -static int rpcrdma_ep_reconnect(struct rpcrdma_xprt *r_xprt) -{ - struct rpcrdma_ia *ia = &r_xprt->rx_ia; - struct rdma_cm_id *id, *old; - int err, rc; - - rc = -EHOSTUNREACH; - id = rpcrdma_create_id(r_xprt, ia); - if (IS_ERR(id)) - goto out; - - /* As long as the new ID points to the same device as the - * old ID, we can reuse the transport's existing PD and all - * previously allocated MRs. Also, the same device means - * the transport's previous DMA mappings are still valid. - * - * This is a sanity check only. There should be no way these - * point to two different devices here. - */ - old = id; - rc = -ENETUNREACH; - if (ia->ri_id->device != id->device) { - pr_err("rpcrdma: can't reconnect on different device!\n"); - goto out_destroy; - } - - err = rpcrdma_ep_create(r_xprt, id); - if (err) - goto out_destroy; - - /* Atomically replace the transport's ID. */ - rc = 0; - old = ia->ri_id; - ia->ri_id = id; - -out_destroy: - rdma_destroy_id(old); -out: - return rc; -} - -/** - * rpcrdma_xprt_connect - Connect an unconnected transport - * @r_xprt: controlling transport instance - * - * Returns 0 on success or a negative errno. +/* + * Connect unconnected endpoint. */ int rpcrdma_xprt_connect(struct rpcrdma_xprt *r_xprt) { @@ -618,25 +516,10 @@ int rpcrdma_xprt_connect(struct rpcrdma_xprt *r_xprt) int rc; retry: - switch (ep->rep_connected) { - case 0: - rc = -ENETUNREACH; - if (rpcrdma_ep_create(r_xprt, ia->ri_id)) - goto out_noupdate; - break; - case -ENODEV: - rc = rpcrdma_ep_recreate_xprt(r_xprt); - if (rc) - goto out_noupdate; - break; - case 1: - rpcrdma_xprt_disconnect(r_xprt); - /* fall through */ - default: - rc = rpcrdma_ep_reconnect(r_xprt); - if (rc) - goto out; - } + rpcrdma_xprt_disconnect(r_xprt); + rc = rpcrdma_ep_create(r_xprt); + if (rc) + goto out_noupdate; ep->rep_connected = 0; xprt_clear_connected(xprt); @@ -712,6 +595,10 @@ out: rpcrdma_sendctxs_destroy(r_xprt); rpcrdma_ep_destroy(r_xprt); + + if (ia->ri_id) + rdma_destroy_id(ia->ri_id); + ia->ri_id = NULL; } /* Fixed-size circular FIFO queue. This implementation is wait-free and diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 9ead06b1d8a4..8be1b70b71a2 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -457,9 +457,7 @@ extern unsigned int xprt_rdma_memreg_strategy; /* * Interface Adapter calls - xprtrdma/verbs.c */ -int rpcrdma_ia_open(struct rpcrdma_xprt *xprt); void rpcrdma_ia_remove(struct rpcrdma_ia *ia); -void rpcrdma_ia_close(struct rpcrdma_ia *); /* * Endpoint calls - xprtrdma/verbs.c -- cgit v1.2.3 From 897b7be9bca0caa27cdf7520bdc7689abe989a53 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 21 Feb 2020 17:00:44 -0500 Subject: xprtrdma: Remove rpcrdma_ia::ri_flags Clean up: The upper layer serializes calls to xprt_rdma_close, so there is no need for an atomic bit operation, saving 8 bytes in rpcrdma_ia. This enables merging rpcrdma_ia_remove directly into the disconnect logic. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/transport.c | 15 ----------- net/sunrpc/xprtrdma/verbs.c | 55 ++++++++++------------------------------- net/sunrpc/xprtrdma/xprt_rdma.h | 10 -------- 3 files changed, 13 insertions(+), 67 deletions(-) (limited to 'net/sunrpc') diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 745dfd149637..d7b7dab0aeb6 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -385,26 +385,11 @@ out2: void xprt_rdma_close(struct rpc_xprt *xprt) { struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); - struct rpcrdma_ep *ep = &r_xprt->rx_ep; - struct rpcrdma_ia *ia = &r_xprt->rx_ia; - - might_sleep(); trace_xprtrdma_op_close(r_xprt); - /* Prevent marshaling and sending of new requests */ - xprt_clear_connected(xprt); - - if (test_and_clear_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags)) { - rpcrdma_ia_remove(ia); - goto out; - } - - if (ep->rep_connected == -ENODEV) - return; rpcrdma_xprt_disconnect(r_xprt); -out: xprt->reestablish_timeout = 0; ++xprt->connect_cookie; xprt_disconnect_done(xprt); diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 3df20f355579..a7f46bbbf017 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -250,12 +250,11 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) rpcrdma_addrstr(r_xprt), rpcrdma_portstr(r_xprt)); #endif init_completion(&ia->ri_remove_done); - set_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags); ep->rep_connected = -ENODEV; xprt_force_disconnect(xprt); wait_for_completion(&ia->ri_remove_done); + trace_xprtrdma_remove(r_xprt); - ia->ri_id = NULL; /* Return 1 to ensure the core destroys the id. */ return 1; case RDMA_CM_EVENT_ESTABLISHED: @@ -345,37 +344,6 @@ out: * Exported functions. */ -/** - * rpcrdma_ia_remove - Handle device driver unload - * @ia: interface adapter being removed - * - * Divest transport H/W resources associated with this adapter, - * but allow it to be restored later. - * - * Caller must hold the transport send lock. - */ -void -rpcrdma_ia_remove(struct rpcrdma_ia *ia) -{ - struct rpcrdma_xprt *r_xprt = container_of(ia, struct rpcrdma_xprt, - rx_ia); - - if (ia->ri_id->qp) - rpcrdma_xprt_drain(r_xprt); - - rpcrdma_reps_unmap(r_xprt); - rpcrdma_reqs_reset(r_xprt); - rpcrdma_mrs_destroy(r_xprt); - rpcrdma_sendctxs_destroy(r_xprt); - - rpcrdma_ep_destroy(r_xprt); - - /* Allow waiters to continue */ - complete(&ia->ri_remove_done); - - trace_xprtrdma_remove(r_xprt); -} - static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt) { struct rpcrdma_ep *ep = &r_xprt->rx_ep; @@ -573,12 +541,13 @@ void rpcrdma_xprt_disconnect(struct rpcrdma_xprt *r_xprt) struct rpcrdma_ep *ep = &r_xprt->rx_ep; struct rpcrdma_ia *ia = &r_xprt->rx_ia; struct rdma_cm_id *id = ia->ri_id; - int rc; + int rc, status = ep->rep_connected; + + might_sleep(); if (!id) - goto out; + return; - /* returns without wait if ID is not connected */ rc = rdma_disconnect(id); if (!rc) wait_event_interruptible(ep->rep_connect_wait, @@ -589,15 +558,17 @@ void rpcrdma_xprt_disconnect(struct rpcrdma_xprt *r_xprt) if (id->qp) rpcrdma_xprt_drain(r_xprt); -out: + rpcrdma_reps_unmap(r_xprt); rpcrdma_reqs_reset(r_xprt); rpcrdma_mrs_destroy(r_xprt); rpcrdma_sendctxs_destroy(r_xprt); rpcrdma_ep_destroy(r_xprt); - if (ia->ri_id) - rdma_destroy_id(ia->ri_id); + if (status == -ENODEV) + complete(&ia->ri_remove_done); + else + rdma_destroy_id(id); ia->ri_id = NULL; } @@ -815,10 +786,10 @@ void rpcrdma_mrs_refresh(struct rpcrdma_xprt *r_xprt) struct rpcrdma_buffer *buf = &r_xprt->rx_buf; struct rpcrdma_ep *ep = &r_xprt->rx_ep; - /* If there is no underlying device, it's no use to - * wake the refresh worker. + /* If there is no underlying connection, it's no use + * to wake the refresh worker. */ - if (ep->rep_connected != -ENODEV) { + if (ep->rep_connected == 1) { /* The work is scheduled on a WQ_MEM_RECLAIM * workqueue in order to prevent MR allocation * from recursing into NFS during direct reclaim. diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 8be1b70b71a2..d2a0f125f7a8 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -75,15 +75,10 @@ struct rpcrdma_ia { unsigned int ri_max_frwr_depth; bool ri_implicit_roundup; enum ib_mr_type ri_mrtype; - unsigned long ri_flags; struct completion ri_done; struct completion ri_remove_done; }; -enum { - RPCRDMA_IAF_REMOVING = 0, -}; - /* * RDMA Endpoint -- one per transport instance */ @@ -454,11 +449,6 @@ extern int xprt_rdma_pad_optimize; */ extern unsigned int xprt_rdma_memreg_strategy; -/* - * Interface Adapter calls - xprtrdma/verbs.c - */ -void rpcrdma_ia_remove(struct rpcrdma_ia *ia); - /* * Endpoint calls - xprtrdma/verbs.c */ -- cgit v1.2.3 From d6ccebf956338ea015d7d54c4a4c9c17605707cb Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 21 Feb 2020 17:00:49 -0500 Subject: xprtrdma: Disconnect on flushed completion Completion errors after a disconnect often occur much sooner than a CM_DISCONNECT event. Use this to try to detect connection loss more quickly. Note that other kernel ULPs do take care to disconnect explicitly when a WR is flushed. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/rpcrdma.h | 3 ++- net/sunrpc/xprtrdma/frwr_ops.c | 24 ++++++++++++++++-------- net/sunrpc/xprtrdma/verbs.c | 37 ++++++++++++++++++++++++++++--------- net/sunrpc/xprtrdma/xprt_rdma.h | 1 + 4 files changed, 47 insertions(+), 18 deletions(-) (limited to 'net/sunrpc') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index ba37c47b51e8..cfbe28ad2614 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -109,7 +109,7 @@ DECLARE_EVENT_CLASS(xprtrdma_connect_class, __assign_str(port, rpcrdma_portstr(r_xprt)); ), - TP_printk("peer=[%s]:%s r_xprt=%p: rc=%d connect status=%d", + TP_printk("peer=[%s]:%s r_xprt=%p: rc=%d connection status=%d", __get_str(addr), __get_str(port), __entry->r_xprt, __entry->rc, __entry->connect_status ) @@ -411,6 +411,7 @@ TRACE_EVENT(xprtrdma_inline_thresh, DEFINE_CONN_EVENT(connect); DEFINE_CONN_EVENT(disconnect); +DEFINE_CONN_EVENT(flush_dct); DEFINE_RXPRT_EVENT(xprtrdma_create); DEFINE_RXPRT_EVENT(xprtrdma_op_destroy); diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index a1b5c8024cca..b482fac7be89 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -358,8 +358,8 @@ out_mapmr_err: /** * frwr_wc_fastreg - Invoked by RDMA provider for a flushed FastReg WC - * @cq: completion queue (ignored) - * @wc: completed WR + * @cq: completion queue + * @wc: WCE for a completed FastReg WR * */ static void frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc) @@ -371,6 +371,8 @@ static void frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc) /* WARNING: Only wr_cqe and status are reliable at this point */ trace_xprtrdma_wc_fastreg(wc, frwr); /* The MR will get recycled when the associated req is retransmitted */ + + rpcrdma_flush_disconnect(cq, wc); } /** @@ -441,8 +443,8 @@ static void __frwr_release_mr(struct ib_wc *wc, struct rpcrdma_mr *mr) /** * frwr_wc_localinv - Invoked by RDMA provider for a LOCAL_INV WC - * @cq: completion queue (ignored) - * @wc: completed WR + * @cq: completion queue + * @wc: WCE for a completed LocalInv WR * */ static void frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc) @@ -455,12 +457,14 @@ static void frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc) /* WARNING: Only wr_cqe and status are reliable at this point */ trace_xprtrdma_wc_li(wc, frwr); __frwr_release_mr(wc, mr); + + rpcrdma_flush_disconnect(cq, wc); } /** * frwr_wc_localinv_wake - Invoked by RDMA provider for a LOCAL_INV WC - * @cq: completion queue (ignored) - * @wc: completed WR + * @cq: completion queue + * @wc: WCE for a completed LocalInv WR * * Awaken anyone waiting for an MR to finish being fenced. */ @@ -475,6 +479,8 @@ static void frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc) trace_xprtrdma_wc_li_wake(wc, frwr); __frwr_release_mr(wc, mr); complete(&frwr->fr_linv_done); + + rpcrdma_flush_disconnect(cq, wc); } /** @@ -562,8 +568,8 @@ void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) /** * frwr_wc_localinv_done - Invoked by RDMA provider for a signaled LOCAL_INV WC - * @cq: completion queue (ignored) - * @wc: completed WR + * @cq: completion queue + * @wc: WCE for a completed LocalInv WR * */ static void frwr_wc_localinv_done(struct ib_cq *cq, struct ib_wc *wc) @@ -581,6 +587,8 @@ static void frwr_wc_localinv_done(struct ib_cq *cq, struct ib_wc *wc) /* Ensure @rep is generated before __frwr_release_mr */ smp_rmb(); rpcrdma_complete_rqst(rep); + + rpcrdma_flush_disconnect(cq, wc); } /** diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index a7f46bbbf017..dfe680e3234a 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -128,14 +128,32 @@ rpcrdma_qp_event_handler(struct ib_event *event, void *context) trace_xprtrdma_qp_event(r_xprt, event); } +/** + * rpcrdma_flush_disconnect - Disconnect on flushed completion + * @cq: completion queue + * @wc: work completion entry + * + * Must be called in process context. + */ +void rpcrdma_flush_disconnect(struct ib_cq *cq, struct ib_wc *wc) +{ + struct rpcrdma_xprt *r_xprt = cq->cq_context; + struct rpc_xprt *xprt = &r_xprt->rx_xprt; + + if (wc->status != IB_WC_SUCCESS && r_xprt->rx_ep.rep_connected == 1) { + r_xprt->rx_ep.rep_connected = -ECONNABORTED; + trace_xprtrdma_flush_dct(r_xprt, wc->status); + xprt_force_disconnect(xprt); + } +} + /** * rpcrdma_wc_send - Invoked by RDMA provider for each polled Send WC * @cq: completion queue - * @wc: completed WR + * @wc: WCE for a completed Send WR * */ -static void -rpcrdma_wc_send(struct ib_cq *cq, struct ib_wc *wc) +static void rpcrdma_wc_send(struct ib_cq *cq, struct ib_wc *wc) { struct ib_cqe *cqe = wc->wr_cqe; struct rpcrdma_sendctx *sc = @@ -144,21 +162,21 @@ rpcrdma_wc_send(struct ib_cq *cq, struct ib_wc *wc) /* WARNING: Only wr_cqe and status are reliable at this point */ trace_xprtrdma_wc_send(sc, wc); rpcrdma_sendctx_put_locked((struct rpcrdma_xprt *)cq->cq_context, sc); + rpcrdma_flush_disconnect(cq, wc); } /** * rpcrdma_wc_receive - Invoked by RDMA provider for each polled Receive WC - * @cq: completion queue (ignored) - * @wc: completed WR + * @cq: completion queue + * @wc: WCE for a completed Receive WR * */ -static void -rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) +static void rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) { struct ib_cqe *cqe = wc->wr_cqe; struct rpcrdma_rep *rep = container_of(cqe, struct rpcrdma_rep, rr_cqe); - struct rpcrdma_xprt *r_xprt = rep->rr_rxprt; + struct rpcrdma_xprt *r_xprt = cq->cq_context; /* WARNING: Only wr_cqe and status are reliable at this point */ trace_xprtrdma_wc_receive(wc); @@ -179,6 +197,7 @@ rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) return; out_flushed: + rpcrdma_flush_disconnect(cq, wc); rpcrdma_rep_destroy(rep); } @@ -395,7 +414,7 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt) goto out_destroy; } - ep->rep_attr.recv_cq = ib_alloc_cq_any(id->device, NULL, + ep->rep_attr.recv_cq = ib_alloc_cq_any(id->device, r_xprt, ep->rep_attr.cap.max_recv_wr, IB_POLL_WORKQUEUE); if (IS_ERR(ep->rep_attr.recv_cq)) { diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index d2a0f125f7a8..8a3ac9d7ee81 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -452,6 +452,7 @@ extern unsigned int xprt_rdma_memreg_strategy; /* * Endpoint calls - xprtrdma/verbs.c */ +void rpcrdma_flush_disconnect(struct ib_cq *cq, struct ib_wc *wc); int rpcrdma_xprt_connect(struct rpcrdma_xprt *r_xprt); void rpcrdma_xprt_disconnect(struct rpcrdma_xprt *r_xprt); -- cgit v1.2.3 From 93aa8e0a9de80e1df2be17158a3469285e572b39 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 21 Feb 2020 17:00:54 -0500 Subject: xprtrdma: Merge struct rpcrdma_ia into struct rpcrdma_ep I eventually want to allocate rpcrdma_ep separately from struct rpcrdma_xprt so that on occasion there can be more than one ep per xprt. The new struct rpcrdma_ep will contain all the fields currently in rpcrdma_ia and in rpcrdma_ep. This is all the device and CM settings for the connection, in addition to per-connection settings negotiated with the remote. Take this opportunity to rename the existing ep fields from rep_* to re_* to disambiguate these from struct rpcrdma_rep. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/rpcrdma.h | 12 +- net/sunrpc/xprtrdma/backchannel.c | 4 +- net/sunrpc/xprtrdma/frwr_ops.c | 108 +++++++-------- net/sunrpc/xprtrdma/rpc_rdma.c | 31 ++--- net/sunrpc/xprtrdma/transport.c | 9 +- net/sunrpc/xprtrdma/verbs.c | 283 +++++++++++++++++++------------------- net/sunrpc/xprtrdma/xprt_rdma.h | 60 ++++---- 7 files changed, 246 insertions(+), 261 deletions(-) (limited to 'net/sunrpc') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index cfbe28ad2614..843269f0e291 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -104,7 +104,7 @@ DECLARE_EVENT_CLASS(xprtrdma_connect_class, TP_fast_assign( __entry->r_xprt = r_xprt; __entry->rc = rc; - __entry->connect_status = r_xprt->rx_ep.rep_connected; + __entry->connect_status = r_xprt->rx_ep.re_connect_status; __assign_str(addr, rpcrdma_addrstr(r_xprt)); __assign_str(port, rpcrdma_portstr(r_xprt)); ), @@ -394,10 +394,10 @@ TRACE_EVENT(xprtrdma_inline_thresh, const struct rpcrdma_ep *ep = &r_xprt->rx_ep; __entry->r_xprt = r_xprt; - __entry->inline_send = ep->rep_inline_send; - __entry->inline_recv = ep->rep_inline_recv; - __entry->max_send = ep->rep_max_inline_send; - __entry->max_recv = ep->rep_max_inline_recv; + __entry->inline_send = ep->re_inline_send; + __entry->inline_recv = ep->re_inline_recv; + __entry->max_send = ep->re_max_inline_send; + __entry->max_recv = ep->re_max_inline_recv; __assign_str(addr, rpcrdma_addrstr(r_xprt)); __assign_str(port, rpcrdma_portstr(r_xprt)); ), @@ -803,7 +803,7 @@ TRACE_EVENT(xprtrdma_post_recvs, __entry->r_xprt = r_xprt; __entry->count = count; __entry->status = status; - __entry->posted = r_xprt->rx_ep.rep_receive_count; + __entry->posted = r_xprt->rx_ep.re_receive_count; __assign_str(addr, rpcrdma_addrstr(r_xprt)); __assign_str(port, rpcrdma_portstr(r_xprt)); ), diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c index 4b43910a6ed2..4b20102cf060 100644 --- a/net/sunrpc/xprtrdma/backchannel.c +++ b/net/sunrpc/xprtrdma/backchannel.c @@ -47,7 +47,7 @@ size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *xprt) struct rpcrdma_ep *ep = &r_xprt->rx_ep; size_t maxmsg; - maxmsg = min_t(unsigned int, ep->rep_inline_send, ep->rep_inline_recv); + maxmsg = min_t(unsigned int, ep->re_inline_send, ep->re_inline_recv); maxmsg = min_t(unsigned int, maxmsg, PAGE_SIZE); return maxmsg - RPCRDMA_HDRLEN_MIN; } @@ -190,7 +190,7 @@ create_req: if (xprt->bc_alloc_count >= RPCRDMA_BACKWARD_WRS) return NULL; - size = min_t(size_t, r_xprt->rx_ep.rep_inline_recv, PAGE_SIZE); + size = min_t(size_t, r_xprt->rx_ep.re_inline_recv, PAGE_SIZE); req = rpcrdma_req_create(r_xprt, size, GFP_KERNEL); if (!req) return NULL; diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index b482fac7be89..19bf422f010b 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -74,7 +74,7 @@ static void frwr_mr_recycle(struct rpcrdma_mr *mr) if (mr->mr_dir != DMA_NONE) { trace_xprtrdma_mr_unmap(mr); - ib_dma_unmap_sg(r_xprt->rx_ia.ri_id->device, + ib_dma_unmap_sg(r_xprt->rx_ep.re_id->device, mr->mr_sg, mr->mr_nents, mr->mr_dir); mr->mr_dir = DMA_NONE; } @@ -115,13 +115,13 @@ void frwr_reset(struct rpcrdma_req *req) */ int frwr_mr_init(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr) { - struct rpcrdma_ia *ia = &r_xprt->rx_ia; - unsigned int depth = ia->ri_max_frwr_depth; + struct rpcrdma_ep *ep = &r_xprt->rx_ep; + unsigned int depth = ep->re_max_fr_depth; struct scatterlist *sg; struct ib_mr *frmr; int rc; - frmr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, depth); + frmr = ib_alloc_mr(ep->re_pd, ep->re_mrtype, depth); if (IS_ERR(frmr)) goto out_mr_err; @@ -151,29 +151,24 @@ out_list_err: /** * frwr_query_device - Prepare a transport for use with FRWR - * @r_xprt: controlling transport instance + * @ep: endpoint to fill in * @device: RDMA device to query * * On success, sets: - * ep->rep_attr - * ep->rep_max_requests - * ia->ri_max_rdma_segs - * - * And these FRWR-related fields: - * ia->ri_max_frwr_depth - * ia->ri_mrtype + * ep->re_attr + * ep->re_max_requests + * ep->re_max_rdma_segs + * ep->re_max_fr_depth + * ep->re_mrtype * * Return values: * On success, returns zero. * %-EINVAL - the device does not support FRWR memory registration * %-ENOMEM - the device is not sufficiently capable for NFS/RDMA */ -int frwr_query_device(struct rpcrdma_xprt *r_xprt, - const struct ib_device *device) +int frwr_query_device(struct rpcrdma_ep *ep, const struct ib_device *device) { const struct ib_device_attr *attrs = &device->attrs; - struct rpcrdma_ia *ia = &r_xprt->rx_ia; - struct rpcrdma_ep *ep = &r_xprt->rx_ep; int max_qp_wr, depth, delta; unsigned int max_sge; @@ -190,23 +185,23 @@ int frwr_query_device(struct rpcrdma_xprt *r_xprt, pr_err("rpcrdma: HCA provides only %u send SGEs\n", max_sge); return -ENOMEM; } - ep->rep_attr.cap.max_send_sge = max_sge; - ep->rep_attr.cap.max_recv_sge = 1; + ep->re_attr.cap.max_send_sge = max_sge; + ep->re_attr.cap.max_recv_sge = 1; - ia->ri_mrtype = IB_MR_TYPE_MEM_REG; + ep->re_mrtype = IB_MR_TYPE_MEM_REG; if (attrs->device_cap_flags & IB_DEVICE_SG_GAPS_REG) - ia->ri_mrtype = IB_MR_TYPE_SG_GAPS; + ep->re_mrtype = IB_MR_TYPE_SG_GAPS; /* Quirk: Some devices advertise a large max_fast_reg_page_list_len * capability, but perform optimally when the MRs are not larger * than a page. */ if (attrs->max_sge_rd > RPCRDMA_MAX_HDR_SEGS) - ia->ri_max_frwr_depth = attrs->max_sge_rd; + ep->re_max_fr_depth = attrs->max_sge_rd; else - ia->ri_max_frwr_depth = attrs->max_fast_reg_page_list_len; - if (ia->ri_max_frwr_depth > RPCRDMA_MAX_DATA_SEGS) - ia->ri_max_frwr_depth = RPCRDMA_MAX_DATA_SEGS; + ep->re_max_fr_depth = attrs->max_fast_reg_page_list_len; + if (ep->re_max_fr_depth > RPCRDMA_MAX_DATA_SEGS) + ep->re_max_fr_depth = RPCRDMA_MAX_DATA_SEGS; /* Add room for frwr register and invalidate WRs. * 1. FRWR reg WR for head @@ -222,11 +217,11 @@ int frwr_query_device(struct rpcrdma_xprt *r_xprt, /* Calculate N if the device max FRWR depth is smaller than * RPCRDMA_MAX_DATA_SEGS. */ - if (ia->ri_max_frwr_depth < RPCRDMA_MAX_DATA_SEGS) { - delta = RPCRDMA_MAX_DATA_SEGS - ia->ri_max_frwr_depth; + if (ep->re_max_fr_depth < RPCRDMA_MAX_DATA_SEGS) { + delta = RPCRDMA_MAX_DATA_SEGS - ep->re_max_fr_depth; do { depth += 2; /* FRWR reg + invalidate */ - delta -= ia->ri_max_frwr_depth; + delta -= ep->re_max_fr_depth; } while (delta > 0); } @@ -235,34 +230,34 @@ int frwr_query_device(struct rpcrdma_xprt *r_xprt, max_qp_wr -= 1; if (max_qp_wr < RPCRDMA_MIN_SLOT_TABLE) return -ENOMEM; - if (ep->rep_max_requests > max_qp_wr) - ep->rep_max_requests = max_qp_wr; - ep->rep_attr.cap.max_send_wr = ep->rep_max_requests * depth; - if (ep->rep_attr.cap.max_send_wr > max_qp_wr) { - ep->rep_max_requests = max_qp_wr / depth; - if (!ep->rep_max_requests) + if (ep->re_max_requests > max_qp_wr) + ep->re_max_requests = max_qp_wr; + ep->re_attr.cap.max_send_wr = ep->re_max_requests * depth; + if (ep->re_attr.cap.max_send_wr > max_qp_wr) { + ep->re_max_requests = max_qp_wr / depth; + if (!ep->re_max_requests) return -ENOMEM; - ep->rep_attr.cap.max_send_wr = ep->rep_max_requests * depth; + ep->re_attr.cap.max_send_wr = ep->re_max_requests * depth; } - ep->rep_attr.cap.max_send_wr += RPCRDMA_BACKWARD_WRS; - ep->rep_attr.cap.max_send_wr += 1; /* for ib_drain_sq */ - ep->rep_attr.cap.max_recv_wr = ep->rep_max_requests; - ep->rep_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS; - ep->rep_attr.cap.max_recv_wr += 1; /* for ib_drain_rq */ - - ia->ri_max_rdma_segs = - DIV_ROUND_UP(RPCRDMA_MAX_DATA_SEGS, ia->ri_max_frwr_depth); + ep->re_attr.cap.max_send_wr += RPCRDMA_BACKWARD_WRS; + ep->re_attr.cap.max_send_wr += 1; /* for ib_drain_sq */ + ep->re_attr.cap.max_recv_wr = ep->re_max_requests; + ep->re_attr.cap.max_recv_wr += RPCRDMA_BACKWARD_WRS; + ep->re_attr.cap.max_recv_wr += 1; /* for ib_drain_rq */ + + ep->re_max_rdma_segs = + DIV_ROUND_UP(RPCRDMA_MAX_DATA_SEGS, ep->re_max_fr_depth); /* Reply chunks require segments for head and tail buffers */ - ia->ri_max_rdma_segs += 2; - if (ia->ri_max_rdma_segs > RPCRDMA_MAX_HDR_SEGS) - ia->ri_max_rdma_segs = RPCRDMA_MAX_HDR_SEGS; + ep->re_max_rdma_segs += 2; + if (ep->re_max_rdma_segs > RPCRDMA_MAX_HDR_SEGS) + ep->re_max_rdma_segs = RPCRDMA_MAX_HDR_SEGS; /* Ensure the underlying device is capable of conveying the * largest r/wsize NFS will ask for. This guarantees that * failing over from one RDMA device to another will not * break NFS I/O. */ - if ((ia->ri_max_rdma_segs * ia->ri_max_frwr_depth) < RPCRDMA_MAX_SEGS) + if ((ep->re_max_rdma_segs * ep->re_max_fr_depth) < RPCRDMA_MAX_SEGS) return -ENOMEM; return 0; @@ -288,14 +283,14 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt, int nsegs, bool writing, __be32 xid, struct rpcrdma_mr *mr) { - struct rpcrdma_ia *ia = &r_xprt->rx_ia; + struct rpcrdma_ep *ep = &r_xprt->rx_ep; struct ib_reg_wr *reg_wr; int i, n, dma_nents; struct ib_mr *ibmr; u8 key; - if (nsegs > ia->ri_max_frwr_depth) - nsegs = ia->ri_max_frwr_depth; + if (nsegs > ep->re_max_fr_depth) + nsegs = ep->re_max_fr_depth; for (i = 0; i < nsegs;) { if (seg->mr_page) sg_set_page(&mr->mr_sg[i], @@ -308,7 +303,7 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt, ++seg; ++i; - if (ia->ri_mrtype == IB_MR_TYPE_SG_GAPS) + if (ep->re_mrtype == IB_MR_TYPE_SG_GAPS) continue; if ((i < nsegs && offset_in_page(seg->mr_offset)) || offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len)) @@ -317,7 +312,7 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt, mr->mr_dir = rpcrdma_data_dir(writing); mr->mr_nents = i; - dma_nents = ib_dma_map_sg(ia->ri_id->device, mr->mr_sg, mr->mr_nents, + dma_nents = ib_dma_map_sg(ep->re_id->device, mr->mr_sg, mr->mr_nents, mr->mr_dir); if (!dma_nents) goto out_dmamap_err; @@ -391,7 +386,6 @@ static void frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc) */ int frwr_send(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) { - struct rpcrdma_ia *ia = &r_xprt->rx_ia; struct ib_send_wr *post_wr; struct rpcrdma_mr *mr; @@ -411,7 +405,7 @@ int frwr_send(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) post_wr = &frwr->fr_regwr.wr; } - return ib_post_send(ia->ri_id->qp, post_wr, NULL); + return ib_post_send(r_xprt->rx_ep.re_id->qp, post_wr, NULL); } /** @@ -538,10 +532,10 @@ void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) /* Transport disconnect drains the receive CQ before it * replaces the QP. The RPC reply handler won't call us - * unless ri_id->qp is a valid pointer. + * unless re_id->qp is a valid pointer. */ bad_wr = NULL; - rc = ib_post_send(r_xprt->rx_ia.ri_id->qp, first, &bad_wr); + rc = ib_post_send(r_xprt->rx_ep.re_id->qp, first, &bad_wr); /* The final LOCAL_INV WR in the chain is supposed to * do the wake. If it was never posted, the wake will @@ -643,10 +637,10 @@ void frwr_unmap_async(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) /* Transport disconnect drains the receive CQ before it * replaces the QP. The RPC reply handler won't call us - * unless ri_id->qp is a valid pointer. + * unless re_id->qp is a valid pointer. */ bad_wr = NULL; - rc = ib_post_send(r_xprt->rx_ia.ri_id->qp, first, &bad_wr); + rc = ib_post_send(r_xprt->rx_ep.re_id->qp, first, &bad_wr); if (!rc) return; diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index 28020ec104d4..ad7e6b0187bd 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -103,21 +103,20 @@ static unsigned int rpcrdma_max_reply_header_size(unsigned int maxsegs) /** * rpcrdma_set_max_header_sizes - Initialize inline payload sizes - * @r_xprt: transport instance to initialize + * @ep: endpoint to initialize * * The max_inline fields contain the maximum size of an RPC message * so the marshaling code doesn't have to repeat this calculation * for every RPC. */ -void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *r_xprt) +void rpcrdma_set_max_header_sizes(struct rpcrdma_ep *ep) { - unsigned int maxsegs = r_xprt->rx_ia.ri_max_rdma_segs; - struct rpcrdma_ep *ep = &r_xprt->rx_ep; + unsigned int maxsegs = ep->re_max_rdma_segs; - ep->rep_max_inline_send = - ep->rep_inline_send - rpcrdma_max_call_header_size(maxsegs); - ep->rep_max_inline_recv = - ep->rep_inline_recv - rpcrdma_max_reply_header_size(maxsegs); + ep->re_max_inline_send = + ep->re_inline_send - rpcrdma_max_call_header_size(maxsegs); + ep->re_max_inline_recv = + ep->re_inline_recv - rpcrdma_max_reply_header_size(maxsegs); } /* The client can send a request inline as long as the RPCRDMA header @@ -134,7 +133,7 @@ static bool rpcrdma_args_inline(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdr = &rqst->rq_snd_buf; unsigned int count, remaining, offset; - if (xdr->len > r_xprt->rx_ep.rep_max_inline_send) + if (xdr->len > r_xprt->rx_ep.re_max_inline_send) return false; if (xdr->page_len) { @@ -145,7 +144,7 @@ static bool rpcrdma_args_inline(struct rpcrdma_xprt *r_xprt, remaining -= min_t(unsigned int, PAGE_SIZE - offset, remaining); offset = 0; - if (++count > r_xprt->rx_ep.rep_attr.cap.max_send_sge) + if (++count > r_xprt->rx_ep.re_attr.cap.max_send_sge) return false; } } @@ -162,7 +161,7 @@ static bool rpcrdma_args_inline(struct rpcrdma_xprt *r_xprt, static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst) { - return rqst->rq_rcv_buf.buflen <= r_xprt->rx_ep.rep_max_inline_recv; + return rqst->rq_rcv_buf.buflen <= r_xprt->rx_ep.re_max_inline_recv; } /* The client is required to provide a Reply chunk if the maximum @@ -176,7 +175,7 @@ rpcrdma_nonpayload_inline(const struct rpcrdma_xprt *r_xprt, const struct xdr_buf *buf = &rqst->rq_rcv_buf; return (buf->head[0].iov_len + buf->tail[0].iov_len) < - r_xprt->rx_ep.rep_max_inline_recv; + r_xprt->rx_ep.re_max_inline_recv; } /* Split @vec on page boundaries into SGEs. FMR registers pages, not @@ -255,7 +254,7 @@ rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf, /* When encoding a Read chunk, the tail iovec contains an * XDR pad and may be omitted. */ - if (type == rpcrdma_readch && r_xprt->rx_ia.ri_implicit_roundup) + if (type == rpcrdma_readch && r_xprt->rx_ep.re_implicit_roundup) goto out; /* When encoding a Write chunk, some servers need to see an @@ -263,7 +262,7 @@ rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf, * layer provides space in the tail iovec that may be used * for this purpose. */ - if (type == rpcrdma_writech && r_xprt->rx_ia.ri_implicit_roundup) + if (type == rpcrdma_writech && r_xprt->rx_ep.re_implicit_roundup) goto out; if (xdrbuf->tail[0].iov_len) @@ -1476,8 +1475,8 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep) if (credits == 0) credits = 1; /* don't deadlock */ - else if (credits > r_xprt->rx_ep.rep_max_requests) - credits = r_xprt->rx_ep.rep_max_requests; + else if (credits > r_xprt->rx_ep.re_max_requests) + credits = r_xprt->rx_ep.re_max_requests; if (buf->rb_credits != credits) rpcrdma_update_cwnd(r_xprt, credits); rpcrdma_post_recvs(r_xprt, false); diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index d7b7dab0aeb6..4352fd6e9817 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -238,11 +238,12 @@ xprt_rdma_connect_worker(struct work_struct *work) struct rpcrdma_xprt *r_xprt = container_of(work, struct rpcrdma_xprt, rx_connect_worker.work); struct rpc_xprt *xprt = &r_xprt->rx_xprt; + struct rpcrdma_ep *ep = &r_xprt->rx_ep; int rc; rc = rpcrdma_xprt_connect(r_xprt); xprt_clear_connecting(xprt); - if (r_xprt->rx_ep.rep_connected > 0) { + if (ep->re_connect_status > 0) { xprt->stat.connect_count++; xprt->stat.connect_time += (long)jiffies - xprt->stat.connect_start; @@ -265,7 +266,7 @@ xprt_rdma_inject_disconnect(struct rpc_xprt *xprt) struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); trace_xprtrdma_op_inject_dsc(r_xprt); - rdma_disconnect(r_xprt->rx_ia.ri_id); + rdma_disconnect(r_xprt->rx_ep.re_id); } /** @@ -355,6 +356,7 @@ xprt_setup_rdma(struct xprt_create *args) INIT_DELAYED_WORK(&new_xprt->rx_connect_worker, xprt_rdma_connect_worker); + xprt->max_payload = RPCRDMA_MAX_DATA_SEGS << PAGE_SHIFT; dprintk("RPC: %s: %s:%s\n", __func__, @@ -489,10 +491,11 @@ static void xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task) { struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); + struct rpcrdma_ep *ep = &r_xprt->rx_ep; unsigned long delay; delay = 0; - if (r_xprt->rx_ep.rep_connected != 0) { + if (ep->re_connect_status != 0) { delay = xprt_reconnect_delay(xprt); xprt_reconnect_backoff(xprt, RPCRDMA_INIT_REEST_TO); } diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index dfe680e3234a..10826982ddf8 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -97,17 +97,17 @@ static void rpcrdma_regbuf_free(struct rpcrdma_regbuf *rb); */ static void rpcrdma_xprt_drain(struct rpcrdma_xprt *r_xprt) { - struct rpcrdma_ia *ia = &r_xprt->rx_ia; + struct rdma_cm_id *id = r_xprt->rx_ep.re_id; /* Flush Receives, then wait for deferred Reply work * to complete. */ - ib_drain_rq(ia->ri_id->qp); + ib_drain_rq(id->qp); /* Deferred Reply processing might have scheduled * local invalidations. */ - ib_drain_sq(ia->ri_id->qp); + ib_drain_sq(id->qp); } /** @@ -140,8 +140,9 @@ void rpcrdma_flush_disconnect(struct ib_cq *cq, struct ib_wc *wc) struct rpcrdma_xprt *r_xprt = cq->cq_context; struct rpc_xprt *xprt = &r_xprt->rx_xprt; - if (wc->status != IB_WC_SUCCESS && r_xprt->rx_ep.rep_connected == 1) { - r_xprt->rx_ep.rep_connected = -ECONNABORTED; + if (wc->status != IB_WC_SUCCESS && + r_xprt->rx_ep.re_connect_status == 1) { + r_xprt->rx_ep.re_connect_status = -ECONNABORTED; trace_xprtrdma_flush_dct(r_xprt, wc->status); xprt_force_disconnect(xprt); } @@ -180,7 +181,7 @@ static void rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) /* WARNING: Only wr_cqe and status are reliable at this point */ trace_xprtrdma_wc_receive(wc); - --r_xprt->rx_ep.rep_receive_count; + --r_xprt->rx_ep.re_receive_count; if (wc->status != IB_WC_SUCCESS) goto out_flushed; @@ -209,24 +210,24 @@ static void rpcrdma_update_cm_private(struct rpcrdma_xprt *r_xprt, unsigned int rsize, wsize; /* Default settings for RPC-over-RDMA Version One */ - r_xprt->rx_ia.ri_implicit_roundup = xprt_rdma_pad_optimize; + ep->re_implicit_roundup = xprt_rdma_pad_optimize; rsize = RPCRDMA_V1_DEF_INLINE_SIZE; wsize = RPCRDMA_V1_DEF_INLINE_SIZE; if (pmsg && pmsg->cp_magic == rpcrdma_cmp_magic && pmsg->cp_version == RPCRDMA_CMP_VERSION) { - r_xprt->rx_ia.ri_implicit_roundup = true; + ep->re_implicit_roundup = true; rsize = rpcrdma_decode_buffer_size(pmsg->cp_send_size); wsize = rpcrdma_decode_buffer_size(pmsg->cp_recv_size); } - if (rsize < ep->rep_inline_recv) - ep->rep_inline_recv = rsize; - if (wsize < ep->rep_inline_send) - ep->rep_inline_send = wsize; + if (rsize < ep->re_inline_recv) + ep->re_inline_recv = rsize; + if (wsize < ep->re_inline_send) + ep->re_inline_send = wsize; - rpcrdma_set_max_header_sizes(r_xprt); + rpcrdma_set_max_header_sizes(ep); } /** @@ -241,7 +242,6 @@ static int rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) { struct rpcrdma_xprt *r_xprt = id->context; - struct rpcrdma_ia *ia = &r_xprt->rx_ia; struct rpcrdma_ep *ep = &r_xprt->rx_ep; struct rpc_xprt *xprt = &r_xprt->rx_xprt; @@ -251,57 +251,57 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) switch (event->event) { case RDMA_CM_EVENT_ADDR_RESOLVED: case RDMA_CM_EVENT_ROUTE_RESOLVED: - ia->ri_async_rc = 0; - complete(&ia->ri_done); + ep->re_async_rc = 0; + complete(&ep->re_done); return 0; case RDMA_CM_EVENT_ADDR_ERROR: - ia->ri_async_rc = -EPROTO; - complete(&ia->ri_done); + ep->re_async_rc = -EPROTO; + complete(&ep->re_done); return 0; case RDMA_CM_EVENT_ROUTE_ERROR: - ia->ri_async_rc = -ENETUNREACH; - complete(&ia->ri_done); + ep->re_async_rc = -ENETUNREACH; + complete(&ep->re_done); return 0; case RDMA_CM_EVENT_DEVICE_REMOVAL: #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) pr_info("rpcrdma: removing device %s for %s:%s\n", - ia->ri_id->device->name, + ep->re_id->device->name, rpcrdma_addrstr(r_xprt), rpcrdma_portstr(r_xprt)); #endif - init_completion(&ia->ri_remove_done); - ep->rep_connected = -ENODEV; + init_completion(&ep->re_remove_done); + ep->re_connect_status = -ENODEV; xprt_force_disconnect(xprt); - wait_for_completion(&ia->ri_remove_done); + wait_for_completion(&ep->re_remove_done); trace_xprtrdma_remove(r_xprt); /* Return 1 to ensure the core destroys the id. */ return 1; case RDMA_CM_EVENT_ESTABLISHED: ++xprt->connect_cookie; - ep->rep_connected = 1; + ep->re_connect_status = 1; rpcrdma_update_cm_private(r_xprt, &event->param.conn); trace_xprtrdma_inline_thresh(r_xprt); - wake_up_all(&ep->rep_connect_wait); + wake_up_all(&ep->re_connect_wait); break; case RDMA_CM_EVENT_CONNECT_ERROR: - ep->rep_connected = -ENOTCONN; + ep->re_connect_status = -ENOTCONN; goto disconnected; case RDMA_CM_EVENT_UNREACHABLE: - ep->rep_connected = -ENETUNREACH; + ep->re_connect_status = -ENETUNREACH; goto disconnected; case RDMA_CM_EVENT_REJECTED: dprintk("rpcrdma: connection to %s:%s rejected: %s\n", rpcrdma_addrstr(r_xprt), rpcrdma_portstr(r_xprt), rdma_reject_msg(id, event->status)); - ep->rep_connected = -ECONNREFUSED; + ep->re_connect_status = -ECONNREFUSED; if (event->status == IB_CM_REJ_STALE_CONN) - ep->rep_connected = -EAGAIN; + ep->re_connect_status = -EAGAIN; goto disconnected; case RDMA_CM_EVENT_DISCONNECTED: - ep->rep_connected = -ECONNABORTED; + ep->re_connect_status = -ECONNABORTED; disconnected: xprt_force_disconnect(xprt); - wake_up_all(&ep->rep_connect_wait); + wake_up_all(&ep->re_connect_wait); break; default: break; @@ -309,46 +309,46 @@ disconnected: dprintk("RPC: %s: %s:%s on %s/frwr: %s\n", __func__, rpcrdma_addrstr(r_xprt), rpcrdma_portstr(r_xprt), - ia->ri_id->device->name, rdma_event_msg(event->event)); + ep->re_id->device->name, rdma_event_msg(event->event)); return 0; } -static struct rdma_cm_id * -rpcrdma_create_id(struct rpcrdma_xprt *xprt, struct rpcrdma_ia *ia) +static struct rdma_cm_id *rpcrdma_create_id(struct rpcrdma_xprt *r_xprt, + struct rpcrdma_ep *ep) { unsigned long wtimeout = msecs_to_jiffies(RDMA_RESOLVE_TIMEOUT) + 1; + struct rpc_xprt *xprt = &r_xprt->rx_xprt; struct rdma_cm_id *id; int rc; - init_completion(&ia->ri_done); + init_completion(&ep->re_done); - id = rdma_create_id(xprt->rx_xprt.xprt_net, rpcrdma_cm_event_handler, - xprt, RDMA_PS_TCP, IB_QPT_RC); + id = rdma_create_id(xprt->xprt_net, rpcrdma_cm_event_handler, r_xprt, + RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(id)) return id; - ia->ri_async_rc = -ETIMEDOUT; - rc = rdma_resolve_addr(id, NULL, - (struct sockaddr *)&xprt->rx_xprt.addr, + ep->re_async_rc = -ETIMEDOUT; + rc = rdma_resolve_addr(id, NULL, (struct sockaddr *)&xprt->addr, RDMA_RESOLVE_TIMEOUT); if (rc) goto out; - rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout); + rc = wait_for_completion_interruptible_timeout(&ep->re_done, wtimeout); if (rc < 0) goto out; - rc = ia->ri_async_rc; + rc = ep->re_async_rc; if (rc) goto out; - ia->ri_async_rc = -ETIMEDOUT; + ep->re_async_rc = -ETIMEDOUT; rc = rdma_resolve_route(id, RDMA_RESOLVE_TIMEOUT); if (rc) goto out; - rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout); + rc = wait_for_completion_interruptible_timeout(&ep->re_done, wtimeout); if (rc < 0) goto out; - rc = ia->ri_async_rc; + rc = ep->re_async_rc; if (rc) goto out; @@ -366,102 +366,101 @@ out: static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt) { struct rpcrdma_ep *ep = &r_xprt->rx_ep; - struct rpcrdma_ia *ia = &r_xprt->rx_ia; - struct rpcrdma_connect_private *pmsg = &ep->rep_cm_private; + struct rpcrdma_connect_private *pmsg = &ep->re_cm_private; struct rdma_cm_id *id; int rc; - id = rpcrdma_create_id(r_xprt, ia); + id = rpcrdma_create_id(r_xprt, ep); if (IS_ERR(id)) return PTR_ERR(id); - ep->rep_max_requests = r_xprt->rx_xprt.max_reqs; - ep->rep_inline_send = xprt_rdma_max_inline_write; - ep->rep_inline_recv = xprt_rdma_max_inline_read; - - rc = frwr_query_device(r_xprt, id->device); + ep->re_max_requests = r_xprt->rx_xprt.max_reqs; + ep->re_inline_send = xprt_rdma_max_inline_write; + ep->re_inline_recv = xprt_rdma_max_inline_read; + rc = frwr_query_device(ep, id->device); if (rc) goto out_destroy; - r_xprt->rx_buf.rb_max_requests = cpu_to_be32(ep->rep_max_requests); + r_xprt->rx_buf.rb_max_requests = cpu_to_be32(ep->re_max_requests); - ep->rep_attr.event_handler = rpcrdma_qp_event_handler; - ep->rep_attr.qp_context = ep; - ep->rep_attr.srq = NULL; - ep->rep_attr.cap.max_inline_data = 0; - ep->rep_attr.sq_sig_type = IB_SIGNAL_REQ_WR; - ep->rep_attr.qp_type = IB_QPT_RC; - ep->rep_attr.port_num = ~0; + ep->re_attr.event_handler = rpcrdma_qp_event_handler; + ep->re_attr.qp_context = ep; + ep->re_attr.srq = NULL; + ep->re_attr.cap.max_inline_data = 0; + ep->re_attr.sq_sig_type = IB_SIGNAL_REQ_WR; + ep->re_attr.qp_type = IB_QPT_RC; + ep->re_attr.port_num = ~0; dprintk("RPC: %s: requested max: dtos: send %d recv %d; " "iovs: send %d recv %d\n", __func__, - ep->rep_attr.cap.max_send_wr, - ep->rep_attr.cap.max_recv_wr, - ep->rep_attr.cap.max_send_sge, - ep->rep_attr.cap.max_recv_sge); - - ep->rep_send_batch = ep->rep_max_requests >> 3; - ep->rep_send_count = ep->rep_send_batch; - init_waitqueue_head(&ep->rep_connect_wait); - ep->rep_receive_count = 0; - - ep->rep_attr.send_cq = ib_alloc_cq_any(id->device, r_xprt, - ep->rep_attr.cap.max_send_wr, - IB_POLL_WORKQUEUE); - if (IS_ERR(ep->rep_attr.send_cq)) { - rc = PTR_ERR(ep->rep_attr.send_cq); + ep->re_attr.cap.max_send_wr, + ep->re_attr.cap.max_recv_wr, + ep->re_attr.cap.max_send_sge, + ep->re_attr.cap.max_recv_sge); + + ep->re_send_batch = ep->re_max_requests >> 3; + ep->re_send_count = ep->re_send_batch; + init_waitqueue_head(&ep->re_connect_wait); + + ep->re_attr.send_cq = ib_alloc_cq_any(id->device, r_xprt, + ep->re_attr.cap.max_send_wr, + IB_POLL_WORKQUEUE); + if (IS_ERR(ep->re_attr.send_cq)) { + rc = PTR_ERR(ep->re_attr.send_cq); goto out_destroy; } - ep->rep_attr.recv_cq = ib_alloc_cq_any(id->device, r_xprt, - ep->rep_attr.cap.max_recv_wr, - IB_POLL_WORKQUEUE); - if (IS_ERR(ep->rep_attr.recv_cq)) { - rc = PTR_ERR(ep->rep_attr.recv_cq); + ep->re_attr.recv_cq = ib_alloc_cq_any(id->device, r_xprt, + ep->re_attr.cap.max_recv_wr, + IB_POLL_WORKQUEUE); + if (IS_ERR(ep->re_attr.recv_cq)) { + rc = PTR_ERR(ep->re_attr.recv_cq); goto out_destroy; } + ep->re_receive_count = 0; /* Initialize cma parameters */ - memset(&ep->rep_remote_cma, 0, sizeof(ep->rep_remote_cma)); + memset(&ep->re_remote_cma, 0, sizeof(ep->re_remote_cma)); /* Prepare RDMA-CM private message */ pmsg->cp_magic = rpcrdma_cmp_magic; pmsg->cp_version = RPCRDMA_CMP_VERSION; pmsg->cp_flags |= RPCRDMA_CMP_F_SND_W_INV_OK; - pmsg->cp_send_size = rpcrdma_encode_buffer_size(ep->rep_inline_send); - pmsg->cp_recv_size = rpcrdma_encode_buffer_size(ep->rep_inline_recv); - ep->rep_remote_cma.private_data = pmsg; - ep->rep_remote_cma.private_data_len = sizeof(*pmsg); + pmsg->cp_send_size = rpcrdma_encode_buffer_size(ep->re_inline_send); + pmsg->cp_recv_size = rpcrdma_encode_buffer_size(ep->re_inline_recv); + ep->re_remote_cma.private_data = pmsg; + ep->re_remote_cma.private_data_len = sizeof(*pmsg); /* Client offers RDMA Read but does not initiate */ - ep->rep_remote_cma.initiator_depth = 0; - ep->rep_remote_cma.responder_resources = + ep->re_remote_cma.initiator_depth = 0; + ep->re_remote_cma.responder_resources = min_t(int, U8_MAX, id->device->attrs.max_qp_rd_atom); /* Limit transport retries so client can detect server * GID changes quickly. RPC layer handles re-establishing * transport connection and retransmission. */ - ep->rep_remote_cma.retry_count = 6; + ep->re_remote_cma.retry_count = 6; /* RPC-over-RDMA handles its own flow control. In addition, * make all RNR NAKs visible so we know that RPC-over-RDMA * flow control is working correctly (no NAKs should be seen). */ - ep->rep_remote_cma.flow_control = 0; - ep->rep_remote_cma.rnr_retry_count = 0; + ep->re_remote_cma.flow_control = 0; + ep->re_remote_cma.rnr_retry_count = 0; - ia->ri_pd = ib_alloc_pd(id->device, 0); - if (IS_ERR(ia->ri_pd)) { - rc = PTR_ERR(ia->ri_pd); + ep->re_pd = ib_alloc_pd(id->device, 0); + if (IS_ERR(ep->re_pd)) { + rc = PTR_ERR(ep->re_pd); goto out_destroy; } - rc = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr); + rc = rdma_create_qp(id, ep->re_pd, &ep->re_attr); if (rc) goto out_destroy; - ia->ri_id = id; + + ep->re_id = id; return 0; out_destroy: @@ -473,23 +472,22 @@ out_destroy: static void rpcrdma_ep_destroy(struct rpcrdma_xprt *r_xprt) { struct rpcrdma_ep *ep = &r_xprt->rx_ep; - struct rpcrdma_ia *ia = &r_xprt->rx_ia; - if (ia->ri_id && ia->ri_id->qp) { - rdma_destroy_qp(ia->ri_id); - ia->ri_id->qp = NULL; + if (ep->re_id && ep->re_id->qp) { + rdma_destroy_qp(ep->re_id); + ep->re_id->qp = NULL; } - if (ep->rep_attr.recv_cq) - ib_free_cq(ep->rep_attr.recv_cq); - ep->rep_attr.recv_cq = NULL; - if (ep->rep_attr.send_cq) - ib_free_cq(ep->rep_attr.send_cq); - ep->rep_attr.send_cq = NULL; + if (ep->re_attr.recv_cq) + ib_free_cq(ep->re_attr.recv_cq); + ep->re_attr.recv_cq = NULL; + if (ep->re_attr.send_cq) + ib_free_cq(ep->re_attr.send_cq); + ep->re_attr.send_cq = NULL; - if (ia->ri_pd) - ib_dealloc_pd(ia->ri_pd); - ia->ri_pd = NULL; + if (ep->re_pd) + ib_dealloc_pd(ep->re_pd); + ep->re_pd = NULL; } /* @@ -499,7 +497,6 @@ int rpcrdma_xprt_connect(struct rpcrdma_xprt *r_xprt) { struct rpc_xprt *xprt = &r_xprt->rx_xprt; struct rpcrdma_ep *ep = &r_xprt->rx_ep; - struct rpcrdma_ia *ia = &r_xprt->rx_ia; int rc; retry: @@ -508,7 +505,7 @@ retry: if (rc) goto out_noupdate; - ep->rep_connected = 0; + ep->re_connect_status = 0; xprt_clear_connected(xprt); rpcrdma_reset_cwnd(r_xprt); @@ -518,17 +515,18 @@ retry: if (rc) goto out; - rc = rdma_connect(ia->ri_id, &ep->rep_remote_cma); + rc = rdma_connect(ep->re_id, &ep->re_remote_cma); if (rc) goto out; if (xprt->reestablish_timeout < RPCRDMA_INIT_REEST_TO) xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO; - wait_event_interruptible(ep->rep_connect_wait, ep->rep_connected != 0); - if (ep->rep_connected <= 0) { - if (ep->rep_connected == -EAGAIN) + wait_event_interruptible(ep->re_connect_wait, + ep->re_connect_status != 0); + if (ep->re_connect_status <= 0) { + if (ep->re_connect_status == -EAGAIN) goto retry; - rc = ep->rep_connected; + rc = ep->re_connect_status; goto out; } @@ -541,7 +539,7 @@ retry: out: if (rc) - ep->rep_connected = rc; + ep->re_connect_status = rc; out_noupdate: trace_xprtrdma_connect(r_xprt, rc); @@ -558,9 +556,8 @@ out_noupdate: void rpcrdma_xprt_disconnect(struct rpcrdma_xprt *r_xprt) { struct rpcrdma_ep *ep = &r_xprt->rx_ep; - struct rpcrdma_ia *ia = &r_xprt->rx_ia; - struct rdma_cm_id *id = ia->ri_id; - int rc, status = ep->rep_connected; + struct rdma_cm_id *id = ep->re_id; + int rc, status = ep->re_connect_status; might_sleep(); @@ -569,10 +566,10 @@ void rpcrdma_xprt_disconnect(struct rpcrdma_xprt *r_xprt) rc = rdma_disconnect(id); if (!rc) - wait_event_interruptible(ep->rep_connect_wait, - ep->rep_connected != 1); + wait_event_interruptible(ep->re_connect_wait, + ep->re_connect_status != 1); else - ep->rep_connected = rc; + ep->re_connect_status = rc; trace_xprtrdma_disconnect(r_xprt, rc); if (id->qp) @@ -585,10 +582,10 @@ void rpcrdma_xprt_disconnect(struct rpcrdma_xprt *r_xprt) rpcrdma_ep_destroy(r_xprt); if (status == -ENODEV) - complete(&ia->ri_remove_done); + complete(&ep->re_remove_done); else rdma_destroy_id(id); - ia->ri_id = NULL; + ep->re_id = NULL; } /* Fixed-size circular FIFO queue. This implementation is wait-free and @@ -625,7 +622,7 @@ static struct rpcrdma_sendctx *rpcrdma_sendctx_create(struct rpcrdma_ep *ep) { struct rpcrdma_sendctx *sc; - sc = kzalloc(struct_size(sc, sc_sges, ep->rep_attr.cap.max_send_sge), + sc = kzalloc(struct_size(sc, sc_sges, ep->re_attr.cap.max_send_sge), GFP_KERNEL); if (!sc) return NULL; @@ -645,7 +642,7 @@ static int rpcrdma_sendctxs_create(struct rpcrdma_xprt *r_xprt) * the ->send_request call to fail temporarily before too many * Sends are posted. */ - i = r_xprt->rx_ep.rep_max_requests + RPCRDMA_MAX_BC_REQUESTS; + i = r_xprt->rx_ep.re_max_requests + RPCRDMA_MAX_BC_REQUESTS; buf->rb_sc_ctxs = kcalloc(i, sizeof(sc), GFP_KERNEL); if (!buf->rb_sc_ctxs) return -ENOMEM; @@ -756,10 +753,10 @@ static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt) { struct rpcrdma_buffer *buf = &r_xprt->rx_buf; - struct rpcrdma_ia *ia = &r_xprt->rx_ia; + struct rpcrdma_ep *ep = &r_xprt->rx_ep; unsigned int count; - for (count = 0; count < ia->ri_max_rdma_segs; count++) { + for (count = 0; count < ep->re_max_rdma_segs; count++) { struct rpcrdma_mr *mr; int rc; @@ -808,7 +805,7 @@ void rpcrdma_mrs_refresh(struct rpcrdma_xprt *r_xprt) /* If there is no underlying connection, it's no use * to wake the refresh worker. */ - if (ep->rep_connected == 1) { + if (ep->re_connect_status == 1) { /* The work is scheduled on a WQ_MEM_RECLAIM * workqueue in order to prevent MR allocation * from recursing into NFS during direct reclaim. @@ -872,7 +869,7 @@ int rpcrdma_req_setup(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) /* Compute maximum header buffer size in bytes */ maxhdrsize = rpcrdma_fixed_maxsz + 3 + - r_xprt->rx_ia.ri_max_rdma_segs * rpcrdma_readchunk_maxsz; + r_xprt->rx_ep.re_max_rdma_segs * rpcrdma_readchunk_maxsz; maxhdrsize *= sizeof(__be32); rb = rpcrdma_regbuf_alloc(__roundup_pow_of_two(maxhdrsize), DMA_TO_DEVICE, GFP_KERNEL); @@ -950,7 +947,7 @@ struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt, if (rep == NULL) goto out; - rep->rr_rdmabuf = rpcrdma_regbuf_alloc(r_xprt->rx_ep.rep_inline_recv, + rep->rr_rdmabuf = rpcrdma_regbuf_alloc(r_xprt->rx_ep.re_inline_recv, DMA_FROM_DEVICE, GFP_KERNEL); if (!rep->rr_rdmabuf) goto out_free; @@ -1175,7 +1172,7 @@ void rpcrdma_mr_put(struct rpcrdma_mr *mr) if (mr->mr_dir != DMA_NONE) { trace_xprtrdma_mr_unmap(mr); - ib_dma_unmap_sg(r_xprt->rx_ia.ri_id->device, + ib_dma_unmap_sg(r_xprt->rx_ep.re_id->device, mr->mr_sg, mr->mr_nents, mr->mr_dir); mr->mr_dir = DMA_NONE; } @@ -1293,7 +1290,7 @@ bool rpcrdma_regbuf_realloc(struct rpcrdma_regbuf *rb, size_t size, gfp_t flags) bool __rpcrdma_regbuf_dma_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_regbuf *rb) { - struct ib_device *device = r_xprt->rx_ia.ri_id->device; + struct ib_device *device = r_xprt->rx_ep.re_id->device; if (rb->rg_direction == DMA_NONE) return false; @@ -1306,7 +1303,7 @@ bool __rpcrdma_regbuf_dma_map(struct rpcrdma_xprt *r_xprt, } rb->rg_device = device; - rb->rg_iov.lkey = r_xprt->rx_ia.ri_pd->local_dma_lkey; + rb->rg_iov.lkey = r_xprt->rx_ep.re_pd->local_dma_lkey; return true; } @@ -1345,12 +1342,12 @@ int rpcrdma_post_sends(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) struct rpcrdma_ep *ep = &r_xprt->rx_ep; int rc; - if (!ep->rep_send_count || kref_read(&req->rl_kref) > 1) { + if (!ep->re_send_count || kref_read(&req->rl_kref) > 1) { send_wr->send_flags |= IB_SEND_SIGNALED; - ep->rep_send_count = ep->rep_send_batch; + ep->re_send_count = ep->re_send_batch; } else { send_wr->send_flags &= ~IB_SEND_SIGNALED; - --ep->rep_send_count; + --ep->re_send_count; } rc = frwr_send(r_xprt, req); @@ -1378,9 +1375,9 @@ void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp) count = 0; needed = buf->rb_credits + (buf->rb_bc_srv_max_requests << 1); - if (likely(ep->rep_receive_count > needed)) + if (likely(ep->re_receive_count > needed)) goto out; - needed -= ep->rep_receive_count; + needed -= ep->re_receive_count; if (!temp) needed += RPCRDMA_MAX_RECV_BATCH; @@ -1406,7 +1403,7 @@ void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp) if (!wr) goto out; - rc = ib_post_recv(r_xprt->rx_ia.ri_id->qp, wr, + rc = ib_post_recv(r_xprt->rx_ep.re_id->qp, wr, (const struct ib_recv_wr **)&bad_wr); out: trace_xprtrdma_post_recvs(r_xprt, count, rc); @@ -1420,6 +1417,6 @@ out: --count; } } - ep->rep_receive_count += count; + ep->re_receive_count += count; return; } diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 8a3ac9d7ee81..f3c0b826c9ed 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -65,38 +65,32 @@ #define RPCRDMA_IDLE_DISC_TO (5U * 60 * HZ) /* - * Interface Adapter -- one per transport instance + * RDMA Endpoint -- connection endpoint details */ -struct rpcrdma_ia { - struct rdma_cm_id *ri_id; - struct ib_pd *ri_pd; - int ri_async_rc; - unsigned int ri_max_rdma_segs; - unsigned int ri_max_frwr_depth; - bool ri_implicit_roundup; - enum ib_mr_type ri_mrtype; - struct completion ri_done; - struct completion ri_remove_done; -}; - -/* - * RDMA Endpoint -- one per transport instance - */ - struct rpcrdma_ep { - unsigned int rep_send_count; - unsigned int rep_send_batch; - unsigned int rep_max_inline_send; - unsigned int rep_max_inline_recv; - int rep_connected; - struct ib_qp_init_attr rep_attr; - wait_queue_head_t rep_connect_wait; - struct rpcrdma_connect_private rep_cm_private; - struct rdma_conn_param rep_remote_cma; - unsigned int rep_max_requests; /* depends on device */ - unsigned int rep_inline_send; /* negotiated */ - unsigned int rep_inline_recv; /* negotiated */ - int rep_receive_count; + struct rdma_cm_id *re_id; + struct ib_pd *re_pd; + unsigned int re_max_rdma_segs; + unsigned int re_max_fr_depth; + bool re_implicit_roundup; + enum ib_mr_type re_mrtype; + struct completion re_done; + struct completion re_remove_done; + unsigned int re_send_count; + unsigned int re_send_batch; + unsigned int re_max_inline_send; + unsigned int re_max_inline_recv; + int re_async_rc; + int re_connect_status; + struct ib_qp_init_attr re_attr; + wait_queue_head_t re_connect_wait; + struct rpcrdma_connect_private + re_cm_private; + struct rdma_conn_param re_remote_cma; + int re_receive_count; + unsigned int re_max_requests; /* depends on device */ + unsigned int re_inline_send; /* negotiated */ + unsigned int re_inline_recv; /* negotiated */ }; /* Pre-allocate extra Work Requests for handling backward receives @@ -417,7 +411,6 @@ struct rpcrdma_stats { */ struct rpcrdma_xprt { struct rpc_xprt rx_xprt; - struct rpcrdma_ia rx_ia; struct rpcrdma_ep rx_ep; struct rpcrdma_buffer rx_buf; struct delayed_work rx_connect_worker; @@ -522,8 +515,7 @@ rpcrdma_data_dir(bool writing) /* Memory registration calls xprtrdma/frwr_ops.c */ void frwr_reset(struct rpcrdma_req *req); -int frwr_query_device(struct rpcrdma_xprt *r_xprt, - const struct ib_device *device); +int frwr_query_device(struct rpcrdma_ep *ep, const struct ib_device *device); int frwr_mr_init(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr); void frwr_release_mr(struct rpcrdma_mr *mr); struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt, @@ -555,7 +547,7 @@ int rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt, enum rpcrdma_chunktype rtype); void rpcrdma_sendctx_unmap(struct rpcrdma_sendctx *sc); int rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst); -void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *); +void rpcrdma_set_max_header_sizes(struct rpcrdma_ep *ep); void rpcrdma_reset_cwnd(struct rpcrdma_xprt *r_xprt); void rpcrdma_complete_rqst(struct rpcrdma_rep *rep); void rpcrdma_reply_handler(struct rpcrdma_rep *rep); -- cgit v1.2.3 From 745b734c9bb80559b8ca64ae688901afefc1c3fd Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 21 Feb 2020 17:01:00 -0500 Subject: xprtrdma: Extract sockaddr from struct rdma_cm_id rpcrdma_cm_event_handler() is always passed an @id pointer that is valid. However, in a subsequent patch, we won't be able to extract an r_xprt in every case. So instead of using the r_xprt's presentation address strings, extract them from struct rdma_cm_id. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/rpcrdma.h | 78 ++++++++++++++++++++++++++++-------------- net/sunrpc/xprtrdma/verbs.c | 35 ++++++++----------- 2 files changed, 67 insertions(+), 46 deletions(-) (limited to 'net/sunrpc') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index 843269f0e291..295f75b9b796 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -375,47 +375,74 @@ TRACE_EVENT(xprtrdma_cm_event, TRACE_EVENT(xprtrdma_inline_thresh, TP_PROTO( - const struct rpcrdma_xprt *r_xprt + const struct rpcrdma_ep *ep ), - TP_ARGS(r_xprt), + TP_ARGS(ep), TP_STRUCT__entry( - __field(const void *, r_xprt) __field(unsigned int, inline_send) __field(unsigned int, inline_recv) __field(unsigned int, max_send) __field(unsigned int, max_recv) - __string(addr, rpcrdma_addrstr(r_xprt)) - __string(port, rpcrdma_portstr(r_xprt)) + __array(unsigned char, srcaddr, sizeof(struct sockaddr_in6)) + __array(unsigned char, dstaddr, sizeof(struct sockaddr_in6)) ), TP_fast_assign( - const struct rpcrdma_ep *ep = &r_xprt->rx_ep; + const struct rdma_cm_id *id = ep->re_id; - __entry->r_xprt = r_xprt; __entry->inline_send = ep->re_inline_send; __entry->inline_recv = ep->re_inline_recv; __entry->max_send = ep->re_max_inline_send; __entry->max_recv = ep->re_max_inline_recv; - __assign_str(addr, rpcrdma_addrstr(r_xprt)); - __assign_str(port, rpcrdma_portstr(r_xprt)); + memcpy(__entry->srcaddr, &id->route.addr.src_addr, + sizeof(struct sockaddr_in6)); + memcpy(__entry->dstaddr, &id->route.addr.dst_addr, + sizeof(struct sockaddr_in6)); ), - TP_printk("peer=[%s]:%s r_xprt=%p neg send/recv=%u/%u, calc send/recv=%u/%u", - __get_str(addr), __get_str(port), __entry->r_xprt, + TP_printk("%pISpc -> %pISpc neg send/recv=%u/%u, calc send/recv=%u/%u", + __entry->srcaddr, __entry->dstaddr, __entry->inline_send, __entry->inline_recv, __entry->max_send, __entry->max_recv ) ); +TRACE_EVENT(xprtrdma_remove, + TP_PROTO( + const struct rpcrdma_ep *ep + ), + + TP_ARGS(ep), + + TP_STRUCT__entry( + __array(unsigned char, srcaddr, sizeof(struct sockaddr_in6)) + __array(unsigned char, dstaddr, sizeof(struct sockaddr_in6)) + __string(name, ep->re_id->device->name) + ), + + TP_fast_assign( + const struct rdma_cm_id *id = ep->re_id; + + memcpy(__entry->srcaddr, &id->route.addr.src_addr, + sizeof(struct sockaddr_in6)); + memcpy(__entry->dstaddr, &id->route.addr.dst_addr, + sizeof(struct sockaddr_in6)); + __assign_str(name, id->device->name); + ), + + TP_printk("%pISpc -> %pISpc device=%s", + __entry->srcaddr, __entry->dstaddr, __get_str(name) + ) +); + DEFINE_CONN_EVENT(connect); DEFINE_CONN_EVENT(disconnect); DEFINE_CONN_EVENT(flush_dct); DEFINE_RXPRT_EVENT(xprtrdma_create); DEFINE_RXPRT_EVENT(xprtrdma_op_destroy); -DEFINE_RXPRT_EVENT(xprtrdma_remove); DEFINE_RXPRT_EVENT(xprtrdma_op_inject_dsc); DEFINE_RXPRT_EVENT(xprtrdma_op_close); DEFINE_RXPRT_EVENT(xprtrdma_op_setport); @@ -482,32 +509,33 @@ TRACE_EVENT(xprtrdma_op_set_cto, TRACE_EVENT(xprtrdma_qp_event, TP_PROTO( - const struct rpcrdma_xprt *r_xprt, + const struct rpcrdma_ep *ep, const struct ib_event *event ), - TP_ARGS(r_xprt, event), + TP_ARGS(ep, event), TP_STRUCT__entry( - __field(const void *, r_xprt) - __field(unsigned int, event) + __field(unsigned long, event) __string(name, event->device->name) - __string(addr, rpcrdma_addrstr(r_xprt)) - __string(port, rpcrdma_portstr(r_xprt)) + __array(unsigned char, srcaddr, sizeof(struct sockaddr_in6)) + __array(unsigned char, dstaddr, sizeof(struct sockaddr_in6)) ), TP_fast_assign( - __entry->r_xprt = r_xprt; + const struct rdma_cm_id *id = ep->re_id; + __entry->event = event->event; __assign_str(name, event->device->name); - __assign_str(addr, rpcrdma_addrstr(r_xprt)); - __assign_str(port, rpcrdma_portstr(r_xprt)); + memcpy(__entry->srcaddr, &id->route.addr.src_addr, + sizeof(struct sockaddr_in6)); + memcpy(__entry->dstaddr, &id->route.addr.dst_addr, + sizeof(struct sockaddr_in6)); ), - TP_printk("peer=[%s]:%s r_xprt=%p: dev %s: %s (%u)", - __get_str(addr), __get_str(port), __entry->r_xprt, - __get_str(name), rdma_show_ib_event(__entry->event), - __entry->event + TP_printk("%pISpc -> %pISpc device=%s %s (%lu)", + __entry->srcaddr, __entry->dstaddr, __get_str(name), + rdma_show_ib_event(__entry->event), __entry->event ) ); diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 10826982ddf8..37d07072bdbf 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -116,16 +116,14 @@ static void rpcrdma_xprt_drain(struct rpcrdma_xprt *r_xprt) * @context: ep that owns QP where event occurred * * Called from the RDMA provider (device driver) possibly in an interrupt - * context. + * context. The QP is always destroyed before the ID, so the ID will be + * reliably available when this handler is invoked. */ -static void -rpcrdma_qp_event_handler(struct ib_event *event, void *context) +static void rpcrdma_qp_event_handler(struct ib_event *event, void *context) { struct rpcrdma_ep *ep = context; - struct rpcrdma_xprt *r_xprt = container_of(ep, struct rpcrdma_xprt, - rx_ep); - trace_xprtrdma_qp_event(r_xprt, event); + trace_xprtrdma_qp_event(ep, event); } /** @@ -202,11 +200,10 @@ out_flushed: rpcrdma_rep_destroy(rep); } -static void rpcrdma_update_cm_private(struct rpcrdma_xprt *r_xprt, +static void rpcrdma_update_cm_private(struct rpcrdma_ep *ep, struct rdma_conn_param *param) { const struct rpcrdma_connect_private *pmsg = param->private_data; - struct rpcrdma_ep *ep = &r_xprt->rx_ep; unsigned int rsize, wsize; /* Default settings for RPC-over-RDMA Version One */ @@ -241,6 +238,7 @@ static void rpcrdma_update_cm_private(struct rpcrdma_xprt *r_xprt, static int rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) { + struct sockaddr *sap = (struct sockaddr *)&id->route.addr.dst_addr; struct rpcrdma_xprt *r_xprt = id->context; struct rpcrdma_ep *ep = &r_xprt->rx_ep; struct rpc_xprt *xprt = &r_xprt->rx_xprt; @@ -263,24 +261,21 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) complete(&ep->re_done); return 0; case RDMA_CM_EVENT_DEVICE_REMOVAL: -#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) - pr_info("rpcrdma: removing device %s for %s:%s\n", - ep->re_id->device->name, - rpcrdma_addrstr(r_xprt), rpcrdma_portstr(r_xprt)); -#endif + pr_info("rpcrdma: removing device %s for %pISpc\n", + ep->re_id->device->name, sap); init_completion(&ep->re_remove_done); ep->re_connect_status = -ENODEV; xprt_force_disconnect(xprt); wait_for_completion(&ep->re_remove_done); - trace_xprtrdma_remove(r_xprt); + trace_xprtrdma_remove(ep); /* Return 1 to ensure the core destroys the id. */ return 1; case RDMA_CM_EVENT_ESTABLISHED: ++xprt->connect_cookie; ep->re_connect_status = 1; - rpcrdma_update_cm_private(r_xprt, &event->param.conn); - trace_xprtrdma_inline_thresh(r_xprt); + rpcrdma_update_cm_private(ep, &event->param.conn); + trace_xprtrdma_inline_thresh(ep); wake_up_all(&ep->re_connect_wait); break; case RDMA_CM_EVENT_CONNECT_ERROR: @@ -290,9 +285,8 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) ep->re_connect_status = -ENETUNREACH; goto disconnected; case RDMA_CM_EVENT_REJECTED: - dprintk("rpcrdma: connection to %s:%s rejected: %s\n", - rpcrdma_addrstr(r_xprt), rpcrdma_portstr(r_xprt), - rdma_reject_msg(id, event->status)); + dprintk("rpcrdma: connection to %pISpc rejected: %s\n", + sap, rdma_reject_msg(id, event->status)); ep->re_connect_status = -ECONNREFUSED; if (event->status == IB_CM_REJ_STALE_CONN) ep->re_connect_status = -EAGAIN; @@ -307,8 +301,7 @@ disconnected: break; } - dprintk("RPC: %s: %s:%s on %s/frwr: %s\n", __func__, - rpcrdma_addrstr(r_xprt), rpcrdma_portstr(r_xprt), + dprintk("RPC: %s: %pISpc on %s/frwr: %s\n", __func__, sap, ep->re_id->device->name, rdma_event_msg(event->event)); return 0; } -- cgit v1.2.3 From e28ce90083f032ca0e8ea03478f5b6a38f5930f7 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Fri, 21 Feb 2020 17:01:05 -0500 Subject: xprtrdma: kmalloc rpcrdma_ep separate from rpcrdma_xprt Change the rpcrdma_xprt_disconnect() function so that it no longer waits for the DISCONNECTED event. This prevents blocking if the remote is unresponsive. In rpcrdma_xprt_disconnect(), the transport's rpcrdma_ep is detached. Upon return from rpcrdma_xprt_disconnect(), the transport (r_xprt) is ready immediately for a new connection. The RDMA_CM_DEVICE_REMOVAL and RDMA_CM_DISCONNECTED events are now handled almost identically. However, because the lifetimes of rpcrdma_xprt structures and rpcrdma_ep structures are now independent, creating an rpcrdma_ep needs to take a module ref count. The ep now owns most of the hardware resources for a transport. Also, a kref is needed to ensure that rpcrdma_ep sticks around long enough for the cm_event_handler to finish. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/trace/events/rpcrdma.h | 63 +------------ net/sunrpc/xprtrdma/backchannel.c | 4 +- net/sunrpc/xprtrdma/frwr_ops.c | 12 +-- net/sunrpc/xprtrdma/rpc_rdma.c | 17 ++-- net/sunrpc/xprtrdma/transport.c | 37 ++++---- net/sunrpc/xprtrdma/verbs.c | 194 ++++++++++++++++++++------------------ net/sunrpc/xprtrdma/xprt_rdma.h | 7 +- 7 files changed, 143 insertions(+), 191 deletions(-) (limited to 'net/sunrpc') diff --git a/include/trace/events/rpcrdma.h b/include/trace/events/rpcrdma.h index 295f75b9b796..81b87428f166 100644 --- a/include/trace/events/rpcrdma.h +++ b/include/trace/events/rpcrdma.h @@ -104,7 +104,7 @@ DECLARE_EVENT_CLASS(xprtrdma_connect_class, TP_fast_assign( __entry->r_xprt = r_xprt; __entry->rc = rc; - __entry->connect_status = r_xprt->rx_ep.re_connect_status; + __entry->connect_status = r_xprt->rx_ep->re_connect_status; __assign_str(addr, rpcrdma_addrstr(r_xprt)); __assign_str(port, rpcrdma_portstr(r_xprt)); ), @@ -342,37 +342,6 @@ DECLARE_EVENT_CLASS(xprtrdma_cb_event, ** Connection events **/ -TRACE_EVENT(xprtrdma_cm_event, - TP_PROTO( - const struct rpcrdma_xprt *r_xprt, - struct rdma_cm_event *event - ), - - TP_ARGS(r_xprt, event), - - TP_STRUCT__entry( - __field(const void *, r_xprt) - __field(unsigned int, event) - __field(int, status) - __string(addr, rpcrdma_addrstr(r_xprt)) - __string(port, rpcrdma_portstr(r_xprt)) - ), - - TP_fast_assign( - __entry->r_xprt = r_xprt; - __entry->event = event->event; - __entry->status = event->status; - __assign_str(addr, rpcrdma_addrstr(r_xprt)); - __assign_str(port, rpcrdma_portstr(r_xprt)); - ), - - TP_printk("peer=[%s]:%s r_xprt=%p: %s (%u/%d)", - __get_str(addr), __get_str(port), - __entry->r_xprt, rdma_show_cm_event(__entry->event), - __entry->event, __entry->status - ) -); - TRACE_EVENT(xprtrdma_inline_thresh, TP_PROTO( const struct rpcrdma_ep *ep @@ -409,34 +378,6 @@ TRACE_EVENT(xprtrdma_inline_thresh, ) ); -TRACE_EVENT(xprtrdma_remove, - TP_PROTO( - const struct rpcrdma_ep *ep - ), - - TP_ARGS(ep), - - TP_STRUCT__entry( - __array(unsigned char, srcaddr, sizeof(struct sockaddr_in6)) - __array(unsigned char, dstaddr, sizeof(struct sockaddr_in6)) - __string(name, ep->re_id->device->name) - ), - - TP_fast_assign( - const struct rdma_cm_id *id = ep->re_id; - - memcpy(__entry->srcaddr, &id->route.addr.src_addr, - sizeof(struct sockaddr_in6)); - memcpy(__entry->dstaddr, &id->route.addr.dst_addr, - sizeof(struct sockaddr_in6)); - __assign_str(name, id->device->name); - ), - - TP_printk("%pISpc -> %pISpc device=%s", - __entry->srcaddr, __entry->dstaddr, __get_str(name) - ) -); - DEFINE_CONN_EVENT(connect); DEFINE_CONN_EVENT(disconnect); DEFINE_CONN_EVENT(flush_dct); @@ -831,7 +772,7 @@ TRACE_EVENT(xprtrdma_post_recvs, __entry->r_xprt = r_xprt; __entry->count = count; __entry->status = status; - __entry->posted = r_xprt->rx_ep.re_receive_count; + __entry->posted = r_xprt->rx_ep->re_receive_count; __assign_str(addr, rpcrdma_addrstr(r_xprt)); __assign_str(port, rpcrdma_portstr(r_xprt)); ), diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c index 4b20102cf060..c92c1aac270a 100644 --- a/net/sunrpc/xprtrdma/backchannel.c +++ b/net/sunrpc/xprtrdma/backchannel.c @@ -44,7 +44,7 @@ int xprt_rdma_bc_setup(struct rpc_xprt *xprt, unsigned int reqs) size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *xprt) { struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); - struct rpcrdma_ep *ep = &r_xprt->rx_ep; + struct rpcrdma_ep *ep = r_xprt->rx_ep; size_t maxmsg; maxmsg = min_t(unsigned int, ep->re_inline_send, ep->re_inline_recv); @@ -190,7 +190,7 @@ create_req: if (xprt->bc_alloc_count >= RPCRDMA_BACKWARD_WRS) return NULL; - size = min_t(size_t, r_xprt->rx_ep.re_inline_recv, PAGE_SIZE); + size = min_t(size_t, r_xprt->rx_ep->re_inline_recv, PAGE_SIZE); req = rpcrdma_req_create(r_xprt, size, GFP_KERNEL); if (!req) return NULL; diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 19bf422f010b..ef997880e17a 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -74,7 +74,7 @@ static void frwr_mr_recycle(struct rpcrdma_mr *mr) if (mr->mr_dir != DMA_NONE) { trace_xprtrdma_mr_unmap(mr); - ib_dma_unmap_sg(r_xprt->rx_ep.re_id->device, + ib_dma_unmap_sg(r_xprt->rx_ep->re_id->device, mr->mr_sg, mr->mr_nents, mr->mr_dir); mr->mr_dir = DMA_NONE; } @@ -115,7 +115,7 @@ void frwr_reset(struct rpcrdma_req *req) */ int frwr_mr_init(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr *mr) { - struct rpcrdma_ep *ep = &r_xprt->rx_ep; + struct rpcrdma_ep *ep = r_xprt->rx_ep; unsigned int depth = ep->re_max_fr_depth; struct scatterlist *sg; struct ib_mr *frmr; @@ -283,7 +283,7 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt, int nsegs, bool writing, __be32 xid, struct rpcrdma_mr *mr) { - struct rpcrdma_ep *ep = &r_xprt->rx_ep; + struct rpcrdma_ep *ep = r_xprt->rx_ep; struct ib_reg_wr *reg_wr; int i, n, dma_nents; struct ib_mr *ibmr; @@ -405,7 +405,7 @@ int frwr_send(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) post_wr = &frwr->fr_regwr.wr; } - return ib_post_send(r_xprt->rx_ep.re_id->qp, post_wr, NULL); + return ib_post_send(r_xprt->rx_ep->re_id->qp, post_wr, NULL); } /** @@ -535,7 +535,7 @@ void frwr_unmap_sync(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) * unless re_id->qp is a valid pointer. */ bad_wr = NULL; - rc = ib_post_send(r_xprt->rx_ep.re_id->qp, first, &bad_wr); + rc = ib_post_send(r_xprt->rx_ep->re_id->qp, first, &bad_wr); /* The final LOCAL_INV WR in the chain is supposed to * do the wake. If it was never posted, the wake will @@ -640,7 +640,7 @@ void frwr_unmap_async(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) * unless re_id->qp is a valid pointer. */ bad_wr = NULL; - rc = ib_post_send(r_xprt->rx_ep.re_id->qp, first, &bad_wr); + rc = ib_post_send(r_xprt->rx_ep->re_id->qp, first, &bad_wr); if (!rc) return; diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index ad7e6b0187bd..d1af48e0139c 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -131,9 +131,10 @@ static bool rpcrdma_args_inline(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst) { struct xdr_buf *xdr = &rqst->rq_snd_buf; + struct rpcrdma_ep *ep = r_xprt->rx_ep; unsigned int count, remaining, offset; - if (xdr->len > r_xprt->rx_ep.re_max_inline_send) + if (xdr->len > ep->re_max_inline_send) return false; if (xdr->page_len) { @@ -144,7 +145,7 @@ static bool rpcrdma_args_inline(struct rpcrdma_xprt *r_xprt, remaining -= min_t(unsigned int, PAGE_SIZE - offset, remaining); offset = 0; - if (++count > r_xprt->rx_ep.re_attr.cap.max_send_sge) + if (++count > ep->re_attr.cap.max_send_sge) return false; } } @@ -161,7 +162,7 @@ static bool rpcrdma_args_inline(struct rpcrdma_xprt *r_xprt, static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst) { - return rqst->rq_rcv_buf.buflen <= r_xprt->rx_ep.re_max_inline_recv; + return rqst->rq_rcv_buf.buflen <= r_xprt->rx_ep->re_max_inline_recv; } /* The client is required to provide a Reply chunk if the maximum @@ -175,7 +176,7 @@ rpcrdma_nonpayload_inline(const struct rpcrdma_xprt *r_xprt, const struct xdr_buf *buf = &rqst->rq_rcv_buf; return (buf->head[0].iov_len + buf->tail[0].iov_len) < - r_xprt->rx_ep.re_max_inline_recv; + r_xprt->rx_ep->re_max_inline_recv; } /* Split @vec on page boundaries into SGEs. FMR registers pages, not @@ -254,7 +255,7 @@ rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf, /* When encoding a Read chunk, the tail iovec contains an * XDR pad and may be omitted. */ - if (type == rpcrdma_readch && r_xprt->rx_ep.re_implicit_roundup) + if (type == rpcrdma_readch && r_xprt->rx_ep->re_implicit_roundup) goto out; /* When encoding a Write chunk, some servers need to see an @@ -262,7 +263,7 @@ rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf, * layer provides space in the tail iovec that may be used * for this purpose. */ - if (type == rpcrdma_writech && r_xprt->rx_ep.re_implicit_roundup) + if (type == rpcrdma_writech && r_xprt->rx_ep->re_implicit_roundup) goto out; if (xdrbuf->tail[0].iov_len) @@ -1475,8 +1476,8 @@ void rpcrdma_reply_handler(struct rpcrdma_rep *rep) if (credits == 0) credits = 1; /* don't deadlock */ - else if (credits > r_xprt->rx_ep.re_max_requests) - credits = r_xprt->rx_ep.re_max_requests; + else if (credits > r_xprt->rx_ep->re_max_requests) + credits = r_xprt->rx_ep->re_max_requests; if (buf->rb_credits != credits) rpcrdma_update_cwnd(r_xprt, credits); rpcrdma_post_recvs(r_xprt, false); diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 4352fd6e9817..659da37020a4 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -238,12 +238,12 @@ xprt_rdma_connect_worker(struct work_struct *work) struct rpcrdma_xprt *r_xprt = container_of(work, struct rpcrdma_xprt, rx_connect_worker.work); struct rpc_xprt *xprt = &r_xprt->rx_xprt; - struct rpcrdma_ep *ep = &r_xprt->rx_ep; int rc; rc = rpcrdma_xprt_connect(r_xprt); xprt_clear_connecting(xprt); - if (ep->re_connect_status > 0) { + if (r_xprt->rx_ep && r_xprt->rx_ep->re_connect_status > 0) { + xprt->connect_cookie++; xprt->stat.connect_count++; xprt->stat.connect_time += (long)jiffies - xprt->stat.connect_start; @@ -266,7 +266,7 @@ xprt_rdma_inject_disconnect(struct rpc_xprt *xprt) struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); trace_xprtrdma_op_inject_dsc(r_xprt); - rdma_disconnect(r_xprt->rx_ep.re_id); + rdma_disconnect(r_xprt->rx_ep->re_id); } /** @@ -316,10 +316,15 @@ xprt_setup_rdma(struct xprt_create *args) if (args->addrlen > sizeof(xprt->addr)) return ERR_PTR(-EBADF); + if (!try_module_get(THIS_MODULE)) + return ERR_PTR(-EIO); + xprt = xprt_alloc(args->net, sizeof(struct rpcrdma_xprt), 0, xprt_rdma_slot_table_entries); - if (!xprt) + if (!xprt) { + module_put(THIS_MODULE); return ERR_PTR(-ENOMEM); + } xprt->timeout = &xprt_rdma_default_timeout; xprt->connect_timeout = xprt->timeout->to_initval; @@ -348,11 +353,12 @@ xprt_setup_rdma(struct xprt_create *args) new_xprt = rpcx_to_rdmax(xprt); rc = rpcrdma_buffer_create(new_xprt); - if (rc) - goto out2; - - if (!try_module_get(THIS_MODULE)) - goto out4; + if (rc) { + xprt_rdma_free_addresses(xprt); + xprt_free(xprt); + module_put(THIS_MODULE); + return ERR_PTR(rc); + } INIT_DELAYED_WORK(&new_xprt->rx_connect_worker, xprt_rdma_connect_worker); @@ -364,15 +370,6 @@ xprt_setup_rdma(struct xprt_create *args) xprt->address_strings[RPC_DISPLAY_PORT]); trace_xprtrdma_create(new_xprt); return xprt; - -out4: - rpcrdma_buffer_destroy(&new_xprt->rx_buf); - rc = -ENODEV; -out2: - trace_xprtrdma_op_destroy(new_xprt); - xprt_rdma_free_addresses(xprt); - xprt_free(xprt); - return ERR_PTR(rc); } /** @@ -491,11 +488,11 @@ static void xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task) { struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); - struct rpcrdma_ep *ep = &r_xprt->rx_ep; + struct rpcrdma_ep *ep = r_xprt->rx_ep; unsigned long delay; delay = 0; - if (ep->re_connect_status != 0) { + if (ep && ep->re_connect_status != 0) { delay = xprt_reconnect_delay(xprt); xprt_reconnect_backoff(xprt, RPCRDMA_INIT_REEST_TO); } diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index 37d07072bdbf..cdd84c09df10 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -84,7 +84,7 @@ static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep); static void rpcrdma_reps_unmap(struct rpcrdma_xprt *r_xprt); static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt); static void rpcrdma_mrs_destroy(struct rpcrdma_xprt *r_xprt); -static void rpcrdma_ep_destroy(struct rpcrdma_xprt *r_xprt); +static int rpcrdma_ep_destroy(struct rpcrdma_ep *ep); static struct rpcrdma_regbuf * rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction, gfp_t flags); @@ -97,7 +97,7 @@ static void rpcrdma_regbuf_free(struct rpcrdma_regbuf *rb); */ static void rpcrdma_xprt_drain(struct rpcrdma_xprt *r_xprt) { - struct rdma_cm_id *id = r_xprt->rx_ep.re_id; + struct rdma_cm_id *id = r_xprt->rx_ep->re_id; /* Flush Receives, then wait for deferred Reply work * to complete. @@ -139,8 +139,8 @@ void rpcrdma_flush_disconnect(struct ib_cq *cq, struct ib_wc *wc) struct rpc_xprt *xprt = &r_xprt->rx_xprt; if (wc->status != IB_WC_SUCCESS && - r_xprt->rx_ep.re_connect_status == 1) { - r_xprt->rx_ep.re_connect_status = -ECONNABORTED; + r_xprt->rx_ep->re_connect_status == 1) { + r_xprt->rx_ep->re_connect_status = -ECONNABORTED; trace_xprtrdma_flush_dct(r_xprt, wc->status); xprt_force_disconnect(xprt); } @@ -179,7 +179,7 @@ static void rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) /* WARNING: Only wr_cqe and status are reliable at this point */ trace_xprtrdma_wc_receive(wc); - --r_xprt->rx_ep.re_receive_count; + --r_xprt->rx_ep->re_receive_count; if (wc->status != IB_WC_SUCCESS) goto out_flushed; @@ -239,13 +239,11 @@ static int rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) { struct sockaddr *sap = (struct sockaddr *)&id->route.addr.dst_addr; - struct rpcrdma_xprt *r_xprt = id->context; - struct rpcrdma_ep *ep = &r_xprt->rx_ep; - struct rpc_xprt *xprt = &r_xprt->rx_xprt; + struct rpcrdma_ep *ep = id->context; + struct rpc_xprt *xprt = ep->re_xprt; might_sleep(); - trace_xprtrdma_cm_event(r_xprt, event); switch (event->event) { case RDMA_CM_EVENT_ADDR_RESOLVED: case RDMA_CM_EVENT_ROUTE_RESOLVED: @@ -263,16 +261,13 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) case RDMA_CM_EVENT_DEVICE_REMOVAL: pr_info("rpcrdma: removing device %s for %pISpc\n", ep->re_id->device->name, sap); - init_completion(&ep->re_remove_done); + /* fall through */ + case RDMA_CM_EVENT_ADDR_CHANGE: ep->re_connect_status = -ENODEV; xprt_force_disconnect(xprt); - wait_for_completion(&ep->re_remove_done); - trace_xprtrdma_remove(ep); - - /* Return 1 to ensure the core destroys the id. */ - return 1; + goto disconnected; case RDMA_CM_EVENT_ESTABLISHED: - ++xprt->connect_cookie; + kref_get(&ep->re_kref); ep->re_connect_status = 1; rpcrdma_update_cm_private(ep, &event->param.conn); trace_xprtrdma_inline_thresh(ep); @@ -294,9 +289,7 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) case RDMA_CM_EVENT_DISCONNECTED: ep->re_connect_status = -ECONNABORTED; disconnected: - xprt_force_disconnect(xprt); - wake_up_all(&ep->re_connect_wait); - break; + return rpcrdma_ep_destroy(ep); default: break; } @@ -316,7 +309,7 @@ static struct rdma_cm_id *rpcrdma_create_id(struct rpcrdma_xprt *r_xprt, init_completion(&ep->re_done); - id = rdma_create_id(xprt->xprt_net, rpcrdma_cm_event_handler, r_xprt, + id = rdma_create_id(xprt->xprt_net, rpcrdma_cm_event_handler, ep, RDMA_PS_TCP, IB_QPT_RC); if (IS_ERR(id)) return id; @@ -352,25 +345,66 @@ out: return ERR_PTR(rc); } -/* - * Exported functions. +static void rpcrdma_ep_put(struct kref *kref) +{ + struct rpcrdma_ep *ep = container_of(kref, struct rpcrdma_ep, re_kref); + + if (ep->re_id->qp) { + rdma_destroy_qp(ep->re_id); + ep->re_id->qp = NULL; + } + + if (ep->re_attr.recv_cq) + ib_free_cq(ep->re_attr.recv_cq); + ep->re_attr.recv_cq = NULL; + if (ep->re_attr.send_cq) + ib_free_cq(ep->re_attr.send_cq); + ep->re_attr.send_cq = NULL; + + if (ep->re_pd) + ib_dealloc_pd(ep->re_pd); + ep->re_pd = NULL; + + kfree(ep); + module_put(THIS_MODULE); +} + +/* Returns: + * %0 if @ep still has a positive kref count, or + * %1 if @ep was destroyed successfully. */ +static int rpcrdma_ep_destroy(struct rpcrdma_ep *ep) +{ + return kref_put(&ep->re_kref, rpcrdma_ep_put); +} static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt) { - struct rpcrdma_ep *ep = &r_xprt->rx_ep; - struct rpcrdma_connect_private *pmsg = &ep->re_cm_private; + struct rpcrdma_connect_private *pmsg; + struct ib_device *device; struct rdma_cm_id *id; + struct rpcrdma_ep *ep; int rc; + ep = kzalloc(sizeof(*ep), GFP_NOFS); + if (!ep) + return -EAGAIN; + ep->re_xprt = &r_xprt->rx_xprt; + kref_init(&ep->re_kref); + id = rpcrdma_create_id(r_xprt, ep); - if (IS_ERR(id)) - return PTR_ERR(id); + if (IS_ERR(id)) { + rc = PTR_ERR(id); + goto out_free; + } + __module_get(THIS_MODULE); + device = id->device; + ep->re_id = id; ep->re_max_requests = r_xprt->rx_xprt.max_reqs; ep->re_inline_send = xprt_rdma_max_inline_write; ep->re_inline_recv = xprt_rdma_max_inline_read; - rc = frwr_query_device(ep, id->device); + rc = frwr_query_device(ep, device); if (rc) goto out_destroy; @@ -396,7 +430,7 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt) ep->re_send_count = ep->re_send_batch; init_waitqueue_head(&ep->re_connect_wait); - ep->re_attr.send_cq = ib_alloc_cq_any(id->device, r_xprt, + ep->re_attr.send_cq = ib_alloc_cq_any(device, r_xprt, ep->re_attr.cap.max_send_wr, IB_POLL_WORKQUEUE); if (IS_ERR(ep->re_attr.send_cq)) { @@ -404,7 +438,7 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt) goto out_destroy; } - ep->re_attr.recv_cq = ib_alloc_cq_any(id->device, r_xprt, + ep->re_attr.recv_cq = ib_alloc_cq_any(device, r_xprt, ep->re_attr.cap.max_recv_wr, IB_POLL_WORKQUEUE); if (IS_ERR(ep->re_attr.recv_cq)) { @@ -417,6 +451,7 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt) memset(&ep->re_remote_cma, 0, sizeof(ep->re_remote_cma)); /* Prepare RDMA-CM private message */ + pmsg = &ep->re_cm_private; pmsg->cp_magic = rpcrdma_cmp_magic; pmsg->cp_version = RPCRDMA_CMP_VERSION; pmsg->cp_flags |= RPCRDMA_CMP_F_SND_W_INV_OK; @@ -428,7 +463,7 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt) /* Client offers RDMA Read but does not initiate */ ep->re_remote_cma.initiator_depth = 0; ep->re_remote_cma.responder_resources = - min_t(int, U8_MAX, id->device->attrs.max_qp_rd_atom); + min_t(int, U8_MAX, device->attrs.max_qp_rd_atom); /* Limit transport retries so client can detect server * GID changes quickly. RPC layer handles re-establishing @@ -443,7 +478,7 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt) ep->re_remote_cma.flow_control = 0; ep->re_remote_cma.rnr_retry_count = 0; - ep->re_pd = ib_alloc_pd(id->device, 0); + ep->re_pd = ib_alloc_pd(device, 0); if (IS_ERR(ep->re_pd)) { rc = PTR_ERR(ep->re_pd); goto out_destroy; @@ -453,50 +488,36 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt) if (rc) goto out_destroy; - ep->re_id = id; + r_xprt->rx_ep = ep; return 0; out_destroy: - rpcrdma_ep_destroy(r_xprt); + rpcrdma_ep_destroy(ep); rdma_destroy_id(id); +out_free: + kfree(ep); + r_xprt->rx_ep = NULL; return rc; } -static void rpcrdma_ep_destroy(struct rpcrdma_xprt *r_xprt) -{ - struct rpcrdma_ep *ep = &r_xprt->rx_ep; - - if (ep->re_id && ep->re_id->qp) { - rdma_destroy_qp(ep->re_id); - ep->re_id->qp = NULL; - } - - if (ep->re_attr.recv_cq) - ib_free_cq(ep->re_attr.recv_cq); - ep->re_attr.recv_cq = NULL; - if (ep->re_attr.send_cq) - ib_free_cq(ep->re_attr.send_cq); - ep->re_attr.send_cq = NULL; - - if (ep->re_pd) - ib_dealloc_pd(ep->re_pd); - ep->re_pd = NULL; -} - -/* - * Connect unconnected endpoint. +/** + * rpcrdma_xprt_connect - Connect an unconnected transport + * @r_xprt: controlling transport instance + * + * Returns 0 on success or a negative errno. */ int rpcrdma_xprt_connect(struct rpcrdma_xprt *r_xprt) { struct rpc_xprt *xprt = &r_xprt->rx_xprt; - struct rpcrdma_ep *ep = &r_xprt->rx_ep; + struct rpcrdma_ep *ep; int rc; retry: rpcrdma_xprt_disconnect(r_xprt); rc = rpcrdma_ep_create(r_xprt); if (rc) - goto out_noupdate; + return rc; + ep = r_xprt->rx_ep; ep->re_connect_status = 0; xprt_clear_connected(xprt); @@ -533,8 +554,6 @@ retry: out: if (rc) ep->re_connect_status = rc; - -out_noupdate: trace_xprtrdma_connect(r_xprt, rc); return rc; } @@ -545,40 +564,33 @@ out_noupdate: * * Caller serializes. Either the transport send lock is held, * or we're being called to destroy the transport. + * + * On return, @r_xprt is completely divested of all hardware + * resources and prepared for the next ->connect operation. */ void rpcrdma_xprt_disconnect(struct rpcrdma_xprt *r_xprt) { - struct rpcrdma_ep *ep = &r_xprt->rx_ep; - struct rdma_cm_id *id = ep->re_id; - int rc, status = ep->re_connect_status; - - might_sleep(); + struct rpcrdma_ep *ep = r_xprt->rx_ep; + struct rdma_cm_id *id; + int rc; - if (!id) + if (!ep) return; + id = ep->re_id; rc = rdma_disconnect(id); - if (!rc) - wait_event_interruptible(ep->re_connect_wait, - ep->re_connect_status != 1); - else - ep->re_connect_status = rc; trace_xprtrdma_disconnect(r_xprt, rc); - if (id->qp) - rpcrdma_xprt_drain(r_xprt); + rpcrdma_xprt_drain(r_xprt); rpcrdma_reps_unmap(r_xprt); rpcrdma_reqs_reset(r_xprt); rpcrdma_mrs_destroy(r_xprt); rpcrdma_sendctxs_destroy(r_xprt); - rpcrdma_ep_destroy(r_xprt); - - if (status == -ENODEV) - complete(&ep->re_remove_done); - else + if (rpcrdma_ep_destroy(ep)) rdma_destroy_id(id); - ep->re_id = NULL; + + r_xprt->rx_ep = NULL; } /* Fixed-size circular FIFO queue. This implementation is wait-free and @@ -635,14 +647,14 @@ static int rpcrdma_sendctxs_create(struct rpcrdma_xprt *r_xprt) * the ->send_request call to fail temporarily before too many * Sends are posted. */ - i = r_xprt->rx_ep.re_max_requests + RPCRDMA_MAX_BC_REQUESTS; + i = r_xprt->rx_ep->re_max_requests + RPCRDMA_MAX_BC_REQUESTS; buf->rb_sc_ctxs = kcalloc(i, sizeof(sc), GFP_KERNEL); if (!buf->rb_sc_ctxs) return -ENOMEM; buf->rb_sc_last = i - 1; for (i = 0; i <= buf->rb_sc_last; i++) { - sc = rpcrdma_sendctx_create(&r_xprt->rx_ep); + sc = rpcrdma_sendctx_create(r_xprt->rx_ep); if (!sc) return -ENOMEM; @@ -746,7 +758,7 @@ static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt) { struct rpcrdma_buffer *buf = &r_xprt->rx_buf; - struct rpcrdma_ep *ep = &r_xprt->rx_ep; + struct rpcrdma_ep *ep = r_xprt->rx_ep; unsigned int count; for (count = 0; count < ep->re_max_rdma_segs; count++) { @@ -793,7 +805,7 @@ rpcrdma_mr_refresh_worker(struct work_struct *work) void rpcrdma_mrs_refresh(struct rpcrdma_xprt *r_xprt) { struct rpcrdma_buffer *buf = &r_xprt->rx_buf; - struct rpcrdma_ep *ep = &r_xprt->rx_ep; + struct rpcrdma_ep *ep = r_xprt->rx_ep; /* If there is no underlying connection, it's no use * to wake the refresh worker. @@ -862,7 +874,7 @@ int rpcrdma_req_setup(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) /* Compute maximum header buffer size in bytes */ maxhdrsize = rpcrdma_fixed_maxsz + 3 + - r_xprt->rx_ep.re_max_rdma_segs * rpcrdma_readchunk_maxsz; + r_xprt->rx_ep->re_max_rdma_segs * rpcrdma_readchunk_maxsz; maxhdrsize *= sizeof(__be32); rb = rpcrdma_regbuf_alloc(__roundup_pow_of_two(maxhdrsize), DMA_TO_DEVICE, GFP_KERNEL); @@ -940,7 +952,7 @@ struct rpcrdma_rep *rpcrdma_rep_create(struct rpcrdma_xprt *r_xprt, if (rep == NULL) goto out; - rep->rr_rdmabuf = rpcrdma_regbuf_alloc(r_xprt->rx_ep.re_inline_recv, + rep->rr_rdmabuf = rpcrdma_regbuf_alloc(r_xprt->rx_ep->re_inline_recv, DMA_FROM_DEVICE, GFP_KERNEL); if (!rep->rr_rdmabuf) goto out_free; @@ -1165,7 +1177,7 @@ void rpcrdma_mr_put(struct rpcrdma_mr *mr) if (mr->mr_dir != DMA_NONE) { trace_xprtrdma_mr_unmap(mr); - ib_dma_unmap_sg(r_xprt->rx_ep.re_id->device, + ib_dma_unmap_sg(r_xprt->rx_ep->re_id->device, mr->mr_sg, mr->mr_nents, mr->mr_dir); mr->mr_dir = DMA_NONE; } @@ -1283,7 +1295,7 @@ bool rpcrdma_regbuf_realloc(struct rpcrdma_regbuf *rb, size_t size, gfp_t flags) bool __rpcrdma_regbuf_dma_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_regbuf *rb) { - struct ib_device *device = r_xprt->rx_ep.re_id->device; + struct ib_device *device = r_xprt->rx_ep->re_id->device; if (rb->rg_direction == DMA_NONE) return false; @@ -1296,7 +1308,7 @@ bool __rpcrdma_regbuf_dma_map(struct rpcrdma_xprt *r_xprt, } rb->rg_device = device; - rb->rg_iov.lkey = r_xprt->rx_ep.re_pd->local_dma_lkey; + rb->rg_iov.lkey = r_xprt->rx_ep->re_pd->local_dma_lkey; return true; } @@ -1332,7 +1344,7 @@ static void rpcrdma_regbuf_free(struct rpcrdma_regbuf *rb) int rpcrdma_post_sends(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) { struct ib_send_wr *send_wr = &req->rl_wr; - struct rpcrdma_ep *ep = &r_xprt->rx_ep; + struct rpcrdma_ep *ep = r_xprt->rx_ep; int rc; if (!ep->re_send_count || kref_read(&req->rl_kref) > 1) { @@ -1359,7 +1371,7 @@ int rpcrdma_post_sends(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp) { struct rpcrdma_buffer *buf = &r_xprt->rx_buf; - struct rpcrdma_ep *ep = &r_xprt->rx_ep; + struct rpcrdma_ep *ep = r_xprt->rx_ep; struct ib_recv_wr *wr, *bad_wr; struct rpcrdma_rep *rep; int needed, count, rc; @@ -1396,7 +1408,7 @@ void rpcrdma_post_recvs(struct rpcrdma_xprt *r_xprt, bool temp) if (!wr) goto out; - rc = ib_post_recv(r_xprt->rx_ep.re_id->qp, wr, + rc = ib_post_recv(ep->re_id->qp, wr, (const struct ib_recv_wr **)&bad_wr); out: trace_xprtrdma_post_recvs(r_xprt, count, rc); diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index f3c0b826c9ed..0a16fdb09b2c 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -68,6 +68,7 @@ * RDMA Endpoint -- connection endpoint details */ struct rpcrdma_ep { + struct kref re_kref; struct rdma_cm_id *re_id; struct ib_pd *re_pd; unsigned int re_max_rdma_segs; @@ -75,7 +76,6 @@ struct rpcrdma_ep { bool re_implicit_roundup; enum ib_mr_type re_mrtype; struct completion re_done; - struct completion re_remove_done; unsigned int re_send_count; unsigned int re_send_batch; unsigned int re_max_inline_send; @@ -83,7 +83,8 @@ struct rpcrdma_ep { int re_async_rc; int re_connect_status; struct ib_qp_init_attr re_attr; - wait_queue_head_t re_connect_wait; + wait_queue_head_t re_connect_wait; + struct rpc_xprt *re_xprt; struct rpcrdma_connect_private re_cm_private; struct rdma_conn_param re_remote_cma; @@ -411,7 +412,7 @@ struct rpcrdma_stats { */ struct rpcrdma_xprt { struct rpc_xprt rx_xprt; - struct rpcrdma_ep rx_ep; + struct rpcrdma_ep *rx_ep; struct rpcrdma_buffer rx_buf; struct delayed_work rx_connect_worker; struct rpc_timeout rx_timeout; -- cgit v1.2.3