summaryrefslogtreecommitdiffstats
path: root/net/sunrpc/xprtrdma/frwr_ops.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-01-30 19:03:48 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2018-01-30 19:03:48 -0800
commitefd52b5d363e3e3b6224ad39949219c0df117c91 (patch)
tree2d885d2f431a324af58d8f267755240bff3e32da /net/sunrpc/xprtrdma/frwr_ops.c
parent1ed2d76e0213751c82e3a242b61b0883daf330df (diff)
parente231c6879cfd44e4fffd384bb6dd7d313249a523 (diff)
downloadlinux-efd52b5d363e3e3b6224ad39949219c0df117c91.tar.bz2
Merge tag 'nfs-for-4.16-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs
Pull NFS client updates from Trond Myklebust: "Highlights include: Stable bugfixes: - Fix breakages in the nfsstat utility due to the inclusion of the NFSv4 LOOKUPP operation - Fix a NULL pointer dereference in nfs_idmap_prepare_pipe_upcall() due to nfs_idmap_legacy_upcall() being called without an 'aux' parameter - Fix a refcount leak in the standard O_DIRECT error path - Fix a refcount leak in the pNFS O_DIRECT fallback to MDS path - Fix CPU latency issues with nfs_commit_release_pages() - Fix the LAYOUTUNAVAILABLE error case in the file layout type - NFS: Fix a race between mmap() and O_DIRECT Features: - Support the statx() mask and query flags to enable optimisations when the user is requesting only attributes that are already up to date in the inode cache, or is specifying the AT_STATX_DONT_SYNC flag - Add a module alias for the SCSI pNFS layout type Bugfixes: - Automounting when resolving a NFSv4 referral should preserve the RDMA transport protocol settings - Various other RDMA bugfixes from Chuck - pNFS block layout fixes - Always set NFS_LOCK_LOST when a lock is lost" * tag 'nfs-for-4.16-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (69 commits) NFS: Fix a race between mmap() and O_DIRECT NFS: Remove a redundant call to unmap_mapping_range() pnfs/blocklayout: Ensure disk address in block device map pnfs/blocklayout: pnfs_block_dev_map uses bytes, not sectors lockd: Fix server refcounting SUNRPC: Fix null rpc_clnt dereference in rpc_task_queued tracepoint SUNRPC: Micro-optimize __rpc_execute SUNRPC: task_run_action should display tk_callback sunrpc: Format RPC events consistently for display SUNRPC: Trace xprt_timer events xprtrdma: Correct some documenting comments xprtrdma: Fix "bytes registered" accounting xprtrdma: Instrument allocation/release of rpcrdma_req/rep objects xprtrdma: Add trace points to instrument QP and CQ access upcalls xprtrdma: Add trace points in the client-side backchannel code paths xprtrdma: Add trace points for connect events xprtrdma: Add trace points to instrument MR allocation and recovery xprtrdma: Add trace points to instrument memory invalidation xprtrdma: Add trace points in reply decoder path xprtrdma: Add trace points to instrument memory registration ..
Diffstat (limited to 'net/sunrpc/xprtrdma/frwr_ops.c')
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c329
1 files changed, 169 insertions, 160 deletions
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index 773e66e10a15..90f688f19783 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -1,11 +1,11 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * Copyright (c) 2015 Oracle. All rights reserved.
+ * Copyright (c) 2015, 2017 Oracle. All rights reserved.
* Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved.
*/
/* Lightweight memory registration using Fast Registration Work
- * Requests (FRWR). Also referred to sometimes as FRMR mode.
+ * Requests (FRWR).
*
* FRWR features ordered asynchronous registration and deregistration
* of arbitrarily sized memory regions. This is the fastest and safest
@@ -15,9 +15,9 @@
/* Normal operation
*
* A Memory Region is prepared for RDMA READ or WRITE using a FAST_REG
- * Work Request (frmr_op_map). When the RDMA operation is finished, this
+ * Work Request (frwr_op_map). When the RDMA operation is finished, this
* Memory Region is invalidated using a LOCAL_INV Work Request
- * (frmr_op_unmap).
+ * (frwr_op_unmap_sync).
*
* Typically these Work Requests are not signaled, and neither are RDMA
* SEND Work Requests (with the exception of signaling occasionally to
@@ -26,7 +26,7 @@
*
* As an optimization, frwr_op_unmap marks MRs INVALID before the
* LOCAL_INV WR is posted. If posting succeeds, the MR is placed on
- * rb_mws immediately so that no work (like managing a linked list
+ * rb_mrs immediately so that no work (like managing a linked list
* under a spinlock) is needed in the completion upcall.
*
* But this means that frwr_op_map() can occasionally encounter an MR
@@ -60,7 +60,7 @@
* When frwr_op_map encounters FLUSHED and VALID MRs, they are recovered
* with ib_dereg_mr and then are re-initialized. Because MR recovery
* allocates fresh resources, it is deferred to a workqueue, and the
- * recovered MRs are placed back on the rb_mws list when recovery is
+ * recovered MRs are placed back on the rb_mrs list when recovery is
* complete. frwr_op_map allocates another MR for the current RPC while
* the broken MR is reset.
*
@@ -96,26 +96,26 @@ out_not_supported:
}
static int
-frwr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r)
+frwr_op_init_mr(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
{
- unsigned int depth = ia->ri_max_frmr_depth;
- struct rpcrdma_frmr *f = &r->frmr;
+ unsigned int depth = ia->ri_max_frwr_depth;
+ struct rpcrdma_frwr *frwr = &mr->frwr;
int rc;
- f->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, depth);
- if (IS_ERR(f->fr_mr))
+ frwr->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype, depth);
+ if (IS_ERR(frwr->fr_mr))
goto out_mr_err;
- r->mw_sg = kcalloc(depth, sizeof(*r->mw_sg), GFP_KERNEL);
- if (!r->mw_sg)
+ mr->mr_sg = kcalloc(depth, sizeof(*mr->mr_sg), GFP_KERNEL);
+ if (!mr->mr_sg)
goto out_list_err;
- sg_init_table(r->mw_sg, depth);
- init_completion(&f->fr_linv_done);
+ sg_init_table(mr->mr_sg, depth);
+ init_completion(&frwr->fr_linv_done);
return 0;
out_mr_err:
- rc = PTR_ERR(f->fr_mr);
+ rc = PTR_ERR(frwr->fr_mr);
dprintk("RPC: %s: ib_alloc_mr status %i\n",
__func__, rc);
return rc;
@@ -124,83 +124,85 @@ out_list_err:
rc = -ENOMEM;
dprintk("RPC: %s: sg allocation failure\n",
__func__);
- ib_dereg_mr(f->fr_mr);
+ ib_dereg_mr(frwr->fr_mr);
return rc;
}
static void
-frwr_op_release_mr(struct rpcrdma_mw *r)
+frwr_op_release_mr(struct rpcrdma_mr *mr)
{
int rc;
- /* Ensure MW is not on any rl_registered list */
- if (!list_empty(&r->mw_list))
- list_del(&r->mw_list);
+ /* Ensure MR is not on any rl_registered list */
+ if (!list_empty(&mr->mr_list))
+ list_del(&mr->mr_list);
- rc = ib_dereg_mr(r->frmr.fr_mr);
+ rc = ib_dereg_mr(mr->frwr.fr_mr);
if (rc)
pr_err("rpcrdma: final ib_dereg_mr for %p returned %i\n",
- r, rc);
- kfree(r->mw_sg);
- kfree(r);
+ mr, rc);
+ kfree(mr->mr_sg);
+ kfree(mr);
}
static int
-__frwr_reset_mr(struct rpcrdma_ia *ia, struct rpcrdma_mw *r)
+__frwr_mr_reset(struct rpcrdma_ia *ia, struct rpcrdma_mr *mr)
{
- struct rpcrdma_frmr *f = &r->frmr;
+ struct rpcrdma_frwr *frwr = &mr->frwr;
int rc;
- rc = ib_dereg_mr(f->fr_mr);
+ rc = ib_dereg_mr(frwr->fr_mr);
if (rc) {
pr_warn("rpcrdma: ib_dereg_mr status %d, frwr %p orphaned\n",
- rc, r);
+ rc, mr);
return rc;
}
- f->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype,
- ia->ri_max_frmr_depth);
- if (IS_ERR(f->fr_mr)) {
+ frwr->fr_mr = ib_alloc_mr(ia->ri_pd, ia->ri_mrtype,
+ ia->ri_max_frwr_depth);
+ if (IS_ERR(frwr->fr_mr)) {
pr_warn("rpcrdma: ib_alloc_mr status %ld, frwr %p orphaned\n",
- PTR_ERR(f->fr_mr), r);
- return PTR_ERR(f->fr_mr);
+ PTR_ERR(frwr->fr_mr), mr);
+ return PTR_ERR(frwr->fr_mr);
}
- dprintk("RPC: %s: recovered FRMR %p\n", __func__, f);
- f->fr_state = FRMR_IS_INVALID;
+ dprintk("RPC: %s: recovered FRWR %p\n", __func__, frwr);
+ frwr->fr_state = FRWR_IS_INVALID;
return 0;
}
-/* Reset of a single FRMR. Generate a fresh rkey by replacing the MR.
+/* Reset of a single FRWR. Generate a fresh rkey by replacing the MR.
*/
static void
-frwr_op_recover_mr(struct rpcrdma_mw *mw)
+frwr_op_recover_mr(struct rpcrdma_mr *mr)
{
- enum rpcrdma_frmr_state state = mw->frmr.fr_state;
- struct rpcrdma_xprt *r_xprt = mw->mw_xprt;
+ enum rpcrdma_frwr_state state = mr->frwr.fr_state;
+ struct rpcrdma_xprt *r_xprt = mr->mr_xprt;
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
int rc;
- rc = __frwr_reset_mr(ia, mw);
- if (state != FRMR_FLUSHED_LI)
+ rc = __frwr_mr_reset(ia, mr);
+ if (state != FRWR_FLUSHED_LI) {
+ trace_xprtrdma_dma_unmap(mr);
ib_dma_unmap_sg(ia->ri_device,
- mw->mw_sg, mw->mw_nents, mw->mw_dir);
+ mr->mr_sg, mr->mr_nents, mr->mr_dir);
+ }
if (rc)
goto out_release;
- rpcrdma_put_mw(r_xprt, mw);
+ rpcrdma_mr_put(mr);
r_xprt->rx_stats.mrs_recovered++;
return;
out_release:
- pr_err("rpcrdma: FRMR reset failed %d, %p release\n", rc, mw);
+ pr_err("rpcrdma: FRWR reset failed %d, %p release\n", rc, mr);
r_xprt->rx_stats.mrs_orphaned++;
- spin_lock(&r_xprt->rx_buf.rb_mwlock);
- list_del(&mw->mw_all);
- spin_unlock(&r_xprt->rx_buf.rb_mwlock);
+ spin_lock(&r_xprt->rx_buf.rb_mrlock);
+ list_del(&mr->mr_all);
+ spin_unlock(&r_xprt->rx_buf.rb_mrlock);
- frwr_op_release_mr(mw);
+ frwr_op_release_mr(mr);
}
static int
@@ -214,31 +216,31 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
if (attrs->device_cap_flags & IB_DEVICE_SG_GAPS_REG)
ia->ri_mrtype = IB_MR_TYPE_SG_GAPS;
- ia->ri_max_frmr_depth =
+ ia->ri_max_frwr_depth =
min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
attrs->max_fast_reg_page_list_len);
dprintk("RPC: %s: device's max FR page list len = %u\n",
- __func__, ia->ri_max_frmr_depth);
-
- /* Add room for frmr register and invalidate WRs.
- * 1. FRMR reg WR for head
- * 2. FRMR invalidate WR for head
- * 3. N FRMR reg WRs for pagelist
- * 4. N FRMR invalidate WRs for pagelist
- * 5. FRMR reg WR for tail
- * 6. FRMR invalidate WR for tail
+ __func__, ia->ri_max_frwr_depth);
+
+ /* Add room for frwr register and invalidate WRs.
+ * 1. FRWR reg WR for head
+ * 2. FRWR invalidate WR for head
+ * 3. N FRWR reg WRs for pagelist
+ * 4. N FRWR invalidate WRs for pagelist
+ * 5. FRWR reg WR for tail
+ * 6. FRWR invalidate WR for tail
* 7. The RDMA_SEND WR
*/
depth = 7;
- /* Calculate N if the device max FRMR depth is smaller than
+ /* Calculate N if the device max FRWR depth is smaller than
* RPCRDMA_MAX_DATA_SEGS.
*/
- if (ia->ri_max_frmr_depth < RPCRDMA_MAX_DATA_SEGS) {
- delta = RPCRDMA_MAX_DATA_SEGS - ia->ri_max_frmr_depth;
+ if (ia->ri_max_frwr_depth < RPCRDMA_MAX_DATA_SEGS) {
+ delta = RPCRDMA_MAX_DATA_SEGS - ia->ri_max_frwr_depth;
do {
- depth += 2; /* FRMR reg + invalidate */
- delta -= ia->ri_max_frmr_depth;
+ depth += 2; /* FRWR reg + invalidate */
+ delta -= ia->ri_max_frwr_depth;
} while (delta > 0);
}
@@ -252,7 +254,7 @@ frwr_op_open(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep,
}
ia->ri_max_segs = max_t(unsigned int, 1, RPCRDMA_MAX_DATA_SEGS /
- ia->ri_max_frmr_depth);
+ ia->ri_max_frwr_depth);
return 0;
}
@@ -265,7 +267,7 @@ frwr_op_maxpages(struct rpcrdma_xprt *r_xprt)
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
return min_t(unsigned int, RPCRDMA_MAX_DATA_SEGS,
- RPCRDMA_MAX_HDR_SEGS * ia->ri_max_frmr_depth);
+ RPCRDMA_MAX_HDR_SEGS * ia->ri_max_frwr_depth);
}
static void
@@ -286,16 +288,16 @@ __frwr_sendcompletion_flush(struct ib_wc *wc, const char *wr)
static void
frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc)
{
- struct rpcrdma_frmr *frmr;
- struct ib_cqe *cqe;
+ struct ib_cqe *cqe = wc->wr_cqe;
+ struct rpcrdma_frwr *frwr =
+ container_of(cqe, struct rpcrdma_frwr, fr_cqe);
/* WARNING: Only wr_cqe and status are reliable at this point */
if (wc->status != IB_WC_SUCCESS) {
- cqe = wc->wr_cqe;
- frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe);
- frmr->fr_state = FRMR_FLUSHED_FR;
+ frwr->fr_state = FRWR_FLUSHED_FR;
__frwr_sendcompletion_flush(wc, "fastreg");
}
+ trace_xprtrdma_wc_fastreg(wc, frwr);
}
/**
@@ -307,16 +309,16 @@ frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc)
static void
frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc)
{
- struct rpcrdma_frmr *frmr;
- struct ib_cqe *cqe;
+ struct ib_cqe *cqe = wc->wr_cqe;
+ struct rpcrdma_frwr *frwr = container_of(cqe, struct rpcrdma_frwr,
+ fr_cqe);
/* WARNING: Only wr_cqe and status are reliable at this point */
if (wc->status != IB_WC_SUCCESS) {
- cqe = wc->wr_cqe;
- frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe);
- frmr->fr_state = FRMR_FLUSHED_LI;
+ frwr->fr_state = FRWR_FLUSHED_LI;
__frwr_sendcompletion_flush(wc, "localinv");
}
+ trace_xprtrdma_wc_li(wc, frwr);
}
/**
@@ -329,17 +331,17 @@ frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc)
static void
frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
{
- struct rpcrdma_frmr *frmr;
- struct ib_cqe *cqe;
+ struct ib_cqe *cqe = wc->wr_cqe;
+ struct rpcrdma_frwr *frwr = container_of(cqe, struct rpcrdma_frwr,
+ fr_cqe);
/* WARNING: Only wr_cqe and status are reliable at this point */
- cqe = wc->wr_cqe;
- frmr = container_of(cqe, struct rpcrdma_frmr, fr_cqe);
if (wc->status != IB_WC_SUCCESS) {
- frmr->fr_state = FRMR_FLUSHED_LI;
+ frwr->fr_state = FRWR_FLUSHED_LI;
__frwr_sendcompletion_flush(wc, "localinv");
}
- complete(&frmr->fr_linv_done);
+ complete(&frwr->fr_linv_done);
+ trace_xprtrdma_wc_li_wake(wc, frwr);
}
/* Post a REG_MR Work Request to register a memory region
@@ -347,41 +349,39 @@ frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
*/
static struct rpcrdma_mr_seg *
frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
- int nsegs, bool writing, struct rpcrdma_mw **out)
+ int nsegs, bool writing, struct rpcrdma_mr **out)
{
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
bool holes_ok = ia->ri_mrtype == IB_MR_TYPE_SG_GAPS;
- struct rpcrdma_mw *mw;
- struct rpcrdma_frmr *frmr;
- struct ib_mr *mr;
+ struct rpcrdma_frwr *frwr;
+ struct rpcrdma_mr *mr;
+ struct ib_mr *ibmr;
struct ib_reg_wr *reg_wr;
struct ib_send_wr *bad_wr;
int rc, i, n;
u8 key;
- mw = NULL;
+ mr = NULL;
do {
- if (mw)
- rpcrdma_defer_mr_recovery(mw);
- mw = rpcrdma_get_mw(r_xprt);
- if (!mw)
+ if (mr)
+ rpcrdma_mr_defer_recovery(mr);
+ mr = rpcrdma_mr_get(r_xprt);
+ if (!mr)
return ERR_PTR(-ENOBUFS);
- } while (mw->frmr.fr_state != FRMR_IS_INVALID);
- frmr = &mw->frmr;
- frmr->fr_state = FRMR_IS_VALID;
- mr = frmr->fr_mr;
- reg_wr = &frmr->fr_regwr;
-
- if (nsegs > ia->ri_max_frmr_depth)
- nsegs = ia->ri_max_frmr_depth;
+ } while (mr->frwr.fr_state != FRWR_IS_INVALID);
+ frwr = &mr->frwr;
+ frwr->fr_state = FRWR_IS_VALID;
+
+ if (nsegs > ia->ri_max_frwr_depth)
+ nsegs = ia->ri_max_frwr_depth;
for (i = 0; i < nsegs;) {
if (seg->mr_page)
- sg_set_page(&mw->mw_sg[i],
+ sg_set_page(&mr->mr_sg[i],
seg->mr_page,
seg->mr_len,
offset_in_page(seg->mr_offset));
else
- sg_set_buf(&mw->mw_sg[i], seg->mr_offset,
+ sg_set_buf(&mr->mr_sg[i], seg->mr_offset,
seg->mr_len);
++seg;
@@ -392,30 +392,29 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
break;
}
- mw->mw_dir = rpcrdma_data_dir(writing);
+ mr->mr_dir = rpcrdma_data_dir(writing);
- mw->mw_nents = ib_dma_map_sg(ia->ri_device, mw->mw_sg, i, mw->mw_dir);
- if (!mw->mw_nents)
+ mr->mr_nents = ib_dma_map_sg(ia->ri_device, mr->mr_sg, i, mr->mr_dir);
+ if (!mr->mr_nents)
goto out_dmamap_err;
- n = ib_map_mr_sg(mr, mw->mw_sg, mw->mw_nents, NULL, PAGE_SIZE);
- if (unlikely(n != mw->mw_nents))
+ ibmr = frwr->fr_mr;
+ n = ib_map_mr_sg(ibmr, mr->mr_sg, mr->mr_nents, NULL, PAGE_SIZE);
+ if (unlikely(n != mr->mr_nents))
goto out_mapmr_err;
- dprintk("RPC: %s: Using frmr %p to map %u segments (%llu bytes)\n",
- __func__, frmr, mw->mw_nents, mr->length);
-
- key = (u8)(mr->rkey & 0x000000FF);
- ib_update_fast_reg_key(mr, ++key);
+ key = (u8)(ibmr->rkey & 0x000000FF);
+ ib_update_fast_reg_key(ibmr, ++key);
+ reg_wr = &frwr->fr_regwr;
reg_wr->wr.next = NULL;
reg_wr->wr.opcode = IB_WR_REG_MR;
- frmr->fr_cqe.done = frwr_wc_fastreg;
- reg_wr->wr.wr_cqe = &frmr->fr_cqe;
+ frwr->fr_cqe.done = frwr_wc_fastreg;
+ reg_wr->wr.wr_cqe = &frwr->fr_cqe;
reg_wr->wr.num_sge = 0;
reg_wr->wr.send_flags = 0;
- reg_wr->mr = mr;
- reg_wr->key = mr->rkey;
+ reg_wr->mr = ibmr;
+ reg_wr->key = ibmr->rkey;
reg_wr->access = writing ?
IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE :
IB_ACCESS_REMOTE_READ;
@@ -424,47 +423,64 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg,
if (rc)
goto out_senderr;
- mw->mw_handle = mr->rkey;
- mw->mw_length = mr->length;
- mw->mw_offset = mr->iova;
+ mr->mr_handle = ibmr->rkey;
+ mr->mr_length = ibmr->length;
+ mr->mr_offset = ibmr->iova;
- *out = mw;
+ *out = mr;
return seg;
out_dmamap_err:
pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n",
- mw->mw_sg, i);
- frmr->fr_state = FRMR_IS_INVALID;
- rpcrdma_put_mw(r_xprt, mw);
+ mr->mr_sg, i);
+ frwr->fr_state = FRWR_IS_INVALID;
+ rpcrdma_mr_put(mr);
return ERR_PTR(-EIO);
out_mapmr_err:
pr_err("rpcrdma: failed to map mr %p (%d/%d)\n",
- frmr->fr_mr, n, mw->mw_nents);
- rpcrdma_defer_mr_recovery(mw);
+ frwr->fr_mr, n, mr->mr_nents);
+ rpcrdma_mr_defer_recovery(mr);
return ERR_PTR(-EIO);
out_senderr:
- pr_err("rpcrdma: FRMR registration ib_post_send returned %i\n", rc);
- rpcrdma_defer_mr_recovery(mw);
+ pr_err("rpcrdma: FRWR registration ib_post_send returned %i\n", rc);
+ rpcrdma_mr_defer_recovery(mr);
return ERR_PTR(-ENOTCONN);
}
+/* Handle a remotely invalidated mr on the @mrs list
+ */
+static void
+frwr_op_reminv(struct rpcrdma_rep *rep, struct list_head *mrs)
+{
+ struct rpcrdma_mr *mr;
+
+ list_for_each_entry(mr, mrs, mr_list)
+ if (mr->mr_handle == rep->rr_inv_rkey) {
+ list_del(&mr->mr_list);
+ trace_xprtrdma_remoteinv(mr);
+ mr->frwr.fr_state = FRWR_IS_INVALID;
+ rpcrdma_mr_unmap_and_put(mr);
+ break; /* only one invalidated MR per RPC */
+ }
+}
+
/* Invalidate all memory regions that were registered for "req".
*
* Sleeps until it is safe for the host CPU to access the
* previously mapped memory regions.
*
- * Caller ensures that @mws is not empty before the call. This
+ * Caller ensures that @mrs is not empty before the call. This
* function empties the list.
*/
static void
-frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
+frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mrs)
{
struct ib_send_wr *first, **prev, *last, *bad_wr;
struct rpcrdma_ia *ia = &r_xprt->rx_ia;
- struct rpcrdma_frmr *f;
- struct rpcrdma_mw *mw;
+ struct rpcrdma_frwr *frwr;
+ struct rpcrdma_mr *mr;
int count, rc;
/* ORDER: Invalidate all of the MRs first
@@ -472,31 +488,27 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
* Chain the LOCAL_INV Work Requests and post them with
* a single ib_post_send() call.
*/
- f = NULL;
+ frwr = NULL;
count = 0;
prev = &first;
- list_for_each_entry(mw, mws, mw_list) {
- mw->frmr.fr_state = FRMR_IS_INVALID;
+ list_for_each_entry(mr, mrs, mr_list) {
+ mr->frwr.fr_state = FRWR_IS_INVALID;
- if (mw->mw_flags & RPCRDMA_MW_F_RI)
- continue;
+ frwr = &mr->frwr;
+ trace_xprtrdma_localinv(mr);
- f = &mw->frmr;
- dprintk("RPC: %s: invalidating frmr %p\n",
- __func__, f);
-
- f->fr_cqe.done = frwr_wc_localinv;
- last = &f->fr_invwr;
+ frwr->fr_cqe.done = frwr_wc_localinv;
+ last = &frwr->fr_invwr;
memset(last, 0, sizeof(*last));
- last->wr_cqe = &f->fr_cqe;
+ last->wr_cqe = &frwr->fr_cqe;
last->opcode = IB_WR_LOCAL_INV;
- last->ex.invalidate_rkey = mw->mw_handle;
+ last->ex.invalidate_rkey = mr->mr_handle;
count++;
*prev = last;
prev = &last->next;
}
- if (!f)
+ if (!frwr)
goto unmap;
/* Strong send queue ordering guarantees that when the
@@ -504,8 +516,8 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
* are complete.
*/
last->send_flags = IB_SEND_SIGNALED;
- f->fr_cqe.done = frwr_wc_localinv_wake;
- reinit_completion(&f->fr_linv_done);
+ frwr->fr_cqe.done = frwr_wc_localinv_wake;
+ reinit_completion(&frwr->fr_linv_done);
/* Transport disconnect drains the receive CQ before it
* replaces the QP. The RPC reply handler won't call us
@@ -515,36 +527,32 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws)
bad_wr = NULL;
rc = ib_post_send(ia->ri_id->qp, first, &bad_wr);
if (bad_wr != first)
- wait_for_completion(&f->fr_linv_done);
+ wait_for_completion(&frwr->fr_linv_done);
if (rc)
goto reset_mrs;
/* ORDER: Now DMA unmap all of the MRs, and return
- * them to the free MW list.
+ * them to the free MR list.
*/
unmap:
- while (!list_empty(mws)) {
- mw = rpcrdma_pop_mw(mws);
- dprintk("RPC: %s: DMA unmapping frmr %p\n",
- __func__, &mw->frmr);
- ib_dma_unmap_sg(ia->ri_device,
- mw->mw_sg, mw->mw_nents, mw->mw_dir);
- rpcrdma_put_mw(r_xprt, mw);
+ while (!list_empty(mrs)) {
+ mr = rpcrdma_mr_pop(mrs);
+ rpcrdma_mr_unmap_and_put(mr);
}
return;
reset_mrs:
- pr_err("rpcrdma: FRMR invalidate ib_post_send returned %i\n", rc);
+ pr_err("rpcrdma: FRWR invalidate ib_post_send returned %i\n", rc);
/* Find and reset the MRs in the LOCAL_INV WRs that did not
* get posted.
*/
while (bad_wr) {
- f = container_of(bad_wr, struct rpcrdma_frmr,
- fr_invwr);
- mw = container_of(f, struct rpcrdma_mw, frmr);
+ frwr = container_of(bad_wr, struct rpcrdma_frwr,
+ fr_invwr);
+ mr = container_of(frwr, struct rpcrdma_mr, frwr);
- __frwr_reset_mr(ia, mw);
+ __frwr_mr_reset(ia, mr);
bad_wr = bad_wr->next;
}
@@ -553,6 +561,7 @@ reset_mrs:
const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = {
.ro_map = frwr_op_map,
+ .ro_reminv = frwr_op_reminv,
.ro_unmap_sync = frwr_op_unmap_sync,
.ro_recover_mr = frwr_op_recover_mr,
.ro_open = frwr_op_open,