summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-06-27 09:35:47 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-06-27 09:35:47 -0700
commit4e99b32169e84b4ece5a1d74eb0b7e4ef07866b3 (patch)
tree2875a11d46aa77d02926720024d889f6c8fc4f53
parentab0f2473d374f0dc4e3cc3f386abfafd8cf08ed2 (diff)
parent89a3c9f5b9f0bcaa9aea3e8b2a616fcaea9aad78 (diff)
downloadlinux-4e99b32169e84b4ece5a1d74eb0b7e4ef07866b3.tar.bz2
Merge tag 'nfs-for-5.8-2' of git://git.linux-nfs.org/projects/anna/linux-nfs
Pull NFS client bugfixes from Anna Schumaker: "Stable Fixes: - xprtrdma: Fix handling of RDMA_ERROR replies - sunrpc: Fix rollback in rpc_gssd_dummy_populate() - pNFS/flexfiles: Fix list corruption if the mirror count changes - NFSv4: Fix CLOSE not waiting for direct IO completion - SUNRPC: Properly set the @subbuf parameter of xdr_buf_subsegment() Other Fixes: - xprtrdma: Fix a use-after-free with r_xprt->rx_ep - Fix other xprtrdma races during disconnect - NFS: Fix memory leak of export_path" * tag 'nfs-for-5.8-2' of git://git.linux-nfs.org/projects/anna/linux-nfs: SUNRPC: Properly set the @subbuf parameter of xdr_buf_subsegment() NFSv4 fix CLOSE not waiting for direct IO compeletion pNFS/flexfiles: Fix list corruption if the mirror count changes nfs: Fix memory leak of export_path sunrpc: fixed rollback in rpc_gssd_dummy_populate() xprtrdma: Fix handling of RDMA_ERROR replies xprtrdma: Clean up disconnect xprtrdma: Clean up synopsis of rpcrdma_flush_disconnect() xprtrdma: Use re_connect_status safely in rpcrdma_xprt_connect() xprtrdma: Prevent dereferencing r_xprt->rx_ep after it is freed
-rw-r--r--fs/nfs/direct.c13
-rw-r--r--fs/nfs/file.c1
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c11
-rw-r--r--fs/nfs/nfs4namespace.c1
-rw-r--r--net/sunrpc/rpc_pipe.c1
-rw-r--r--net/sunrpc/xdr.c4
-rw-r--r--net/sunrpc/xprtrdma/frwr_ops.c8
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c9
-rw-r--r--net/sunrpc/xprtrdma/transport.c2
-rw-r--r--net/sunrpc/xprtrdma/verbs.c71
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h3
11 files changed, 75 insertions, 49 deletions
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 1b79dd5cf661..3d113cf8908a 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -267,8 +267,6 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq)
{
struct inode *inode = dreq->inode;
- inode_dio_end(inode);
-
if (dreq->iocb) {
long res = (long) dreq->error;
if (dreq->count != 0) {
@@ -280,7 +278,10 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq)
complete(&dreq->completion);
+ igrab(inode);
nfs_direct_req_release(dreq);
+ inode_dio_end(inode);
+ iput(inode);
}
static void nfs_direct_read_completion(struct nfs_pgio_header *hdr)
@@ -410,8 +411,10 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
* generic layer handle the completion.
*/
if (requested_bytes == 0) {
- inode_dio_end(inode);
+ igrab(inode);
nfs_direct_req_release(dreq);
+ inode_dio_end(inode);
+ iput(inode);
return result < 0 ? result : -EIO;
}
@@ -864,8 +867,10 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
* generic layer handle the completion.
*/
if (requested_bytes == 0) {
- inode_dio_end(inode);
+ igrab(inode);
nfs_direct_req_release(dreq);
+ inode_dio_end(inode);
+ iput(inode);
return result < 0 ? result : -EIO;
}
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index f96367a2463e..ccd6c1637b27 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -83,6 +83,7 @@ nfs_file_release(struct inode *inode, struct file *filp)
dprintk("NFS: release(%pD2)\n", filp);
nfs_inc_stats(inode, NFSIOS_VFSRELEASE);
+ inode_dio_wait(inode);
nfs_file_clear_open_context(filp);
return 0;
}
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 7d399f72ebbb..de03e440b7ee 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -907,9 +907,8 @@ retry:
goto out_mds;
/* Use a direct mapping of ds_idx to pgio mirror_idx */
- if (WARN_ON_ONCE(pgio->pg_mirror_count !=
- FF_LAYOUT_MIRROR_COUNT(pgio->pg_lseg)))
- goto out_mds;
+ if (pgio->pg_mirror_count != FF_LAYOUT_MIRROR_COUNT(pgio->pg_lseg))
+ goto out_eagain;
for (i = 0; i < pgio->pg_mirror_count; i++) {
mirror = FF_LAYOUT_COMP(pgio->pg_lseg, i);
@@ -931,7 +930,10 @@ retry:
(NFS_MOUNT_SOFT|NFS_MOUNT_SOFTERR))
pgio->pg_maxretrans = io_maxretrans;
return;
-
+out_eagain:
+ pnfs_generic_pg_cleanup(pgio);
+ pgio->pg_error = -EAGAIN;
+ return;
out_mds:
trace_pnfs_mds_fallback_pg_init_write(pgio->pg_inode,
0, NFS4_MAX_UINT64, IOMODE_RW,
@@ -941,6 +943,7 @@ out_mds:
pgio->pg_lseg = NULL;
pgio->pg_maxretrans = 0;
nfs_pageio_reset_write_mds(pgio);
+ pgio->pg_error = -EAGAIN;
}
static unsigned int
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index a3ab6e219061..873342308dc0 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -308,6 +308,7 @@ static int try_location(struct fs_context *fc,
if (IS_ERR(export_path))
return PTR_ERR(export_path);
+ kfree(ctx->nfs_server.export_path);
ctx->nfs_server.export_path = export_path;
source = kmalloc(len + 1 + ctx->nfs_server.export_path_len + 1,
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 39e14d5edaf1..e9d0953522f0 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -1317,6 +1317,7 @@ rpc_gssd_dummy_populate(struct dentry *root, struct rpc_pipe *pipe_data)
q.len = strlen(gssd_dummy_clnt_dir[0].name);
clnt_dentry = d_hash_and_lookup(gssd_dentry, &q);
if (!clnt_dentry) {
+ __rpc_depopulate(gssd_dentry, gssd_dummy_clnt_dir, 0, 1);
pipe_dentry = ERR_PTR(-ENOENT);
goto out;
}
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 6f7d82fb1eb0..be11d672b5b9 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -1118,6 +1118,7 @@ xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf,
base = 0;
} else {
base -= buf->head[0].iov_len;
+ subbuf->head[0].iov_base = buf->head[0].iov_base;
subbuf->head[0].iov_len = 0;
}
@@ -1130,6 +1131,8 @@ xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf,
base = 0;
} else {
base -= buf->page_len;
+ subbuf->pages = buf->pages;
+ subbuf->page_base = 0;
subbuf->page_len = 0;
}
@@ -1141,6 +1144,7 @@ xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf,
base = 0;
} else {
base -= buf->tail[0].iov_len;
+ subbuf->tail[0].iov_base = buf->tail[0].iov_base;
subbuf->tail[0].iov_len = 0;
}
diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c
index ef997880e17a..b647562a26dd 100644
--- a/net/sunrpc/xprtrdma/frwr_ops.c
+++ b/net/sunrpc/xprtrdma/frwr_ops.c
@@ -367,7 +367,7 @@ static void frwr_wc_fastreg(struct ib_cq *cq, struct ib_wc *wc)
trace_xprtrdma_wc_fastreg(wc, frwr);
/* The MR will get recycled when the associated req is retransmitted */
- rpcrdma_flush_disconnect(cq, wc);
+ rpcrdma_flush_disconnect(cq->cq_context, wc);
}
/**
@@ -452,7 +452,7 @@ static void frwr_wc_localinv(struct ib_cq *cq, struct ib_wc *wc)
trace_xprtrdma_wc_li(wc, frwr);
__frwr_release_mr(wc, mr);
- rpcrdma_flush_disconnect(cq, wc);
+ rpcrdma_flush_disconnect(cq->cq_context, wc);
}
/**
@@ -474,7 +474,7 @@ static void frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc)
__frwr_release_mr(wc, mr);
complete(&frwr->fr_linv_done);
- rpcrdma_flush_disconnect(cq, wc);
+ rpcrdma_flush_disconnect(cq->cq_context, wc);
}
/**
@@ -582,7 +582,7 @@ static void frwr_wc_localinv_done(struct ib_cq *cq, struct ib_wc *wc)
smp_rmb();
rpcrdma_complete_rqst(rep);
- rpcrdma_flush_disconnect(cq, wc);
+ rpcrdma_flush_disconnect(cq->cq_context, wc);
}
/**
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 2081c8fbfa48..935bbef2f7be 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -1349,8 +1349,7 @@ rpcrdma_decode_error(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep,
be32_to_cpup(p), be32_to_cpu(rep->rr_xid));
}
- r_xprt->rx_stats.bad_reply_count++;
- return -EREMOTEIO;
+ return -EIO;
}
/* Perform XID lookup, reconstruction of the RPC reply, and
@@ -1387,13 +1386,11 @@ out:
spin_unlock(&xprt->queue_lock);
return;
-/* If the incoming reply terminated a pending RPC, the next
- * RPC call will post a replacement receive buffer as it is
- * being marshaled.
- */
out_badheader:
trace_xprtrdma_reply_hdr(rep);
r_xprt->rx_stats.bad_reply_count++;
+ rqst->rq_task->tk_status = status;
+ status = 0;
goto out;
}
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 0c4af7f5e241..14165b673b20 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -242,7 +242,7 @@ xprt_rdma_connect_worker(struct work_struct *work)
rc = rpcrdma_xprt_connect(r_xprt);
xprt_clear_connecting(xprt);
- if (r_xprt->rx_ep && r_xprt->rx_ep->re_connect_status > 0) {
+ if (!rc) {
xprt->connect_cookie++;
xprt->stat.connect_count++;
xprt->stat.connect_time += (long)jiffies -
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 2ae348377806..2198c8ec8dff 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -84,7 +84,8 @@ static void rpcrdma_rep_destroy(struct rpcrdma_rep *rep);
static void rpcrdma_reps_unmap(struct rpcrdma_xprt *r_xprt);
static void rpcrdma_mrs_create(struct rpcrdma_xprt *r_xprt);
static void rpcrdma_mrs_destroy(struct rpcrdma_xprt *r_xprt);
-static int rpcrdma_ep_destroy(struct rpcrdma_ep *ep);
+static void rpcrdma_ep_get(struct rpcrdma_ep *ep);
+static int rpcrdma_ep_put(struct rpcrdma_ep *ep);
static struct rpcrdma_regbuf *
rpcrdma_regbuf_alloc(size_t size, enum dma_data_direction direction,
gfp_t flags);
@@ -97,7 +98,8 @@ static void rpcrdma_regbuf_free(struct rpcrdma_regbuf *rb);
*/
static void rpcrdma_xprt_drain(struct rpcrdma_xprt *r_xprt)
{
- struct rdma_cm_id *id = r_xprt->rx_ep->re_id;
+ struct rpcrdma_ep *ep = r_xprt->rx_ep;
+ struct rdma_cm_id *id = ep->re_id;
/* Flush Receives, then wait for deferred Reply work
* to complete.
@@ -108,6 +110,8 @@ static void rpcrdma_xprt_drain(struct rpcrdma_xprt *r_xprt)
* local invalidations.
*/
ib_drain_sq(id->qp);
+
+ rpcrdma_ep_put(ep);
}
/**
@@ -126,23 +130,27 @@ static void rpcrdma_qp_event_handler(struct ib_event *event, void *context)
trace_xprtrdma_qp_event(ep, event);
}
+/* Ensure xprt_force_disconnect() is invoked exactly once when a
+ * connection is closed or lost. (The important thing is it needs
+ * to be invoked "at least" once).
+ */
+static void rpcrdma_force_disconnect(struct rpcrdma_ep *ep)
+{
+ if (atomic_add_unless(&ep->re_force_disconnect, 1, 1))
+ xprt_force_disconnect(ep->re_xprt);
+}
+
/**
* rpcrdma_flush_disconnect - Disconnect on flushed completion
- * @cq: completion queue
+ * @r_xprt: transport to disconnect
* @wc: work completion entry
*
* Must be called in process context.
*/
-void rpcrdma_flush_disconnect(struct ib_cq *cq, struct ib_wc *wc)
+void rpcrdma_flush_disconnect(struct rpcrdma_xprt *r_xprt, struct ib_wc *wc)
{
- struct rpcrdma_xprt *r_xprt = cq->cq_context;
- struct rpc_xprt *xprt = &r_xprt->rx_xprt;
-
- if (wc->status != IB_WC_SUCCESS &&
- r_xprt->rx_ep->re_connect_status == 1) {
- r_xprt->rx_ep->re_connect_status = -ECONNABORTED;
- xprt_force_disconnect(xprt);
- }
+ if (wc->status != IB_WC_SUCCESS)
+ rpcrdma_force_disconnect(r_xprt->rx_ep);
}
/**
@@ -156,11 +164,12 @@ static void rpcrdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
struct ib_cqe *cqe = wc->wr_cqe;
struct rpcrdma_sendctx *sc =
container_of(cqe, struct rpcrdma_sendctx, sc_cqe);
+ struct rpcrdma_xprt *r_xprt = cq->cq_context;
/* WARNING: Only wr_cqe and status are reliable at this point */
trace_xprtrdma_wc_send(sc, wc);
- rpcrdma_sendctx_put_locked((struct rpcrdma_xprt *)cq->cq_context, sc);
- rpcrdma_flush_disconnect(cq, wc);
+ rpcrdma_sendctx_put_locked(r_xprt, sc);
+ rpcrdma_flush_disconnect(r_xprt, wc);
}
/**
@@ -195,7 +204,7 @@ static void rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
return;
out_flushed:
- rpcrdma_flush_disconnect(cq, wc);
+ rpcrdma_flush_disconnect(r_xprt, wc);
rpcrdma_rep_destroy(rep);
}
@@ -239,7 +248,6 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
{
struct sockaddr *sap = (struct sockaddr *)&id->route.addr.dst_addr;
struct rpcrdma_ep *ep = id->context;
- struct rpc_xprt *xprt = ep->re_xprt;
might_sleep();
@@ -263,10 +271,9 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
/* fall through */
case RDMA_CM_EVENT_ADDR_CHANGE:
ep->re_connect_status = -ENODEV;
- xprt_force_disconnect(xprt);
goto disconnected;
case RDMA_CM_EVENT_ESTABLISHED:
- kref_get(&ep->re_kref);
+ rpcrdma_ep_get(ep);
ep->re_connect_status = 1;
rpcrdma_update_cm_private(ep, &event->param.conn);
trace_xprtrdma_inline_thresh(ep);
@@ -288,8 +295,8 @@ rpcrdma_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event)
case RDMA_CM_EVENT_DISCONNECTED:
ep->re_connect_status = -ECONNABORTED;
disconnected:
- xprt_force_disconnect(xprt);
- return rpcrdma_ep_destroy(ep);
+ rpcrdma_force_disconnect(ep);
+ return rpcrdma_ep_put(ep);
default:
break;
}
@@ -345,7 +352,7 @@ out:
return ERR_PTR(rc);
}
-static void rpcrdma_ep_put(struct kref *kref)
+static void rpcrdma_ep_destroy(struct kref *kref)
{
struct rpcrdma_ep *ep = container_of(kref, struct rpcrdma_ep, re_kref);
@@ -369,13 +376,18 @@ static void rpcrdma_ep_put(struct kref *kref)
module_put(THIS_MODULE);
}
+static noinline void rpcrdma_ep_get(struct rpcrdma_ep *ep)
+{
+ kref_get(&ep->re_kref);
+}
+
/* Returns:
* %0 if @ep still has a positive kref count, or
* %1 if @ep was destroyed successfully.
*/
-static int rpcrdma_ep_destroy(struct rpcrdma_ep *ep)
+static noinline int rpcrdma_ep_put(struct rpcrdma_ep *ep)
{
- return kref_put(&ep->re_kref, rpcrdma_ep_put);
+ return kref_put(&ep->re_kref, rpcrdma_ep_destroy);
}
static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt)
@@ -492,7 +504,7 @@ static int rpcrdma_ep_create(struct rpcrdma_xprt *r_xprt)
return 0;
out_destroy:
- rpcrdma_ep_destroy(ep);
+ rpcrdma_ep_put(ep);
rdma_destroy_id(id);
out_free:
kfree(ep);
@@ -519,10 +531,13 @@ retry:
return rc;
ep = r_xprt->rx_ep;
- ep->re_connect_status = 0;
xprt_clear_connected(xprt);
-
rpcrdma_reset_cwnd(r_xprt);
+
+ /* Bump the ep's reference count while there are
+ * outstanding Receives.
+ */
+ rpcrdma_ep_get(ep);
rpcrdma_post_recvs(r_xprt, true);
rc = rpcrdma_sendctxs_create(r_xprt);
@@ -552,8 +567,6 @@ retry:
rpcrdma_mrs_create(r_xprt);
out:
- if (rc)
- ep->re_connect_status = rc;
trace_xprtrdma_connect(r_xprt, rc);
return rc;
}
@@ -587,7 +600,7 @@ void rpcrdma_xprt_disconnect(struct rpcrdma_xprt *r_xprt)
rpcrdma_mrs_destroy(r_xprt);
rpcrdma_sendctxs_destroy(r_xprt);
- if (rpcrdma_ep_destroy(ep))
+ if (rpcrdma_ep_put(ep))
rdma_destroy_id(id);
r_xprt->rx_ep = NULL;
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 0a16fdb09b2c..43974ef39a50 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -82,6 +82,7 @@ struct rpcrdma_ep {
unsigned int re_max_inline_recv;
int re_async_rc;
int re_connect_status;
+ atomic_t re_force_disconnect;
struct ib_qp_init_attr re_attr;
wait_queue_head_t re_connect_wait;
struct rpc_xprt *re_xprt;
@@ -446,7 +447,7 @@ extern unsigned int xprt_rdma_memreg_strategy;
/*
* Endpoint calls - xprtrdma/verbs.c
*/
-void rpcrdma_flush_disconnect(struct ib_cq *cq, struct ib_wc *wc);
+void rpcrdma_flush_disconnect(struct rpcrdma_xprt *r_xprt, struct ib_wc *wc);
int rpcrdma_xprt_connect(struct rpcrdma_xprt *r_xprt);
void rpcrdma_xprt_disconnect(struct rpcrdma_xprt *r_xprt);