From 9300fdba250cbb71574eb3c6af2540219d926717 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Wed, 6 May 2015 09:32:03 +0100 Subject: SUNRPC: drop stale doc comments in xprtsock.c Several functions have outdated arguments listed in the doc comments. Drop documentation for arguments that no longer exist. Signed-off-by: Stefan Hajnoczi Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'net/sunrpc/xprtsock.c') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 66891e32c5e3..b142feef0cf4 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -901,7 +901,6 @@ static int xs_local_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb) /** * xs_local_data_ready - "data ready" callback for AF_LOCAL sockets * @sk: socket with data to read - * @len: how much data to read * * Currently this assumes we can read the whole reply in a single gulp. */ @@ -965,7 +964,6 @@ static void xs_local_data_ready(struct sock *sk) /** * xs_udp_data_ready - "data ready" callback for UDP sockets * @sk: socket with data to read - * @len: how much data to read * */ static void xs_udp_data_ready(struct sock *sk) @@ -1389,7 +1387,6 @@ static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, uns /** * xs_tcp_data_ready - "data ready" callback for TCP sockets * @sk: socket with data to read - * @bytes: how much data to read * */ static void xs_tcp_data_ready(struct sock *sk) @@ -1886,9 +1883,7 @@ static int xs_local_finish_connecting(struct rpc_xprt *xprt, /** * xs_local_setup_socket - create AF_LOCAL socket, connect to a local endpoint - * @xprt: RPC transport to connect * @transport: socket transport to connect - * @create_sock: function to create a socket of the correct type */ static int xs_local_setup_socket(struct sock_xprt *transport) { @@ -2125,9 +2120,6 @@ out: /** * xs_tcp_setup_socket - create a TCP socket and connect to a remote endpoint - * @xprt: RPC transport to connect - * @transport: socket transport to connect - * @create_sock: function to create a socket of the correct type * * Invoked by a work queue tasklet. */ -- cgit v1.2.3 From 3c87ef6efb40f0e339d705c194b2224f854ec38e Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 3 Jun 2015 16:14:25 -0400 Subject: sunrpc: keep a count of swapfiles associated with the rpc_clnt Jerome reported seeing a warning pop when working with a swapfile on NFS. The nfs_swap_activate can end up calling sk_set_memalloc while holding the rcu_read_lock and that function can sleep. To fix that, we need to take a reference to the xprt while holding the rcu_read_lock, set the socket up for swapping and then drop that reference. But, xprt_put is not exported and having NFS deal with the underlying xprt is a bit of layering violation anyway. Fix this by adding a set of activate/deactivate functions that take a rpc_clnt pointer instead of an rpc_xprt, and have nfs_swap_activate and nfs_swap_deactivate call those. Also, add a per-rpc_clnt atomic counter to keep track of the number of active swapfiles associated with it. When the counter does a 0->1 transition, we enable swapping on the xprt, when we do a 1->0 transition we disable swapping on it. This also allows us to be a bit more selective with the RPC_TASK_SWAPPER flag. If non-swapper and swapper clnts are sharing a xprt, then we only need to flag the tasks from the swapper clnt with that flag. Acked-by: Mel Gorman Reported-by: Jerome Marchand Signed-off-by: Jeff Layton Reviewed-by: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/file.c | 15 ++-------- include/linux/sunrpc/clnt.h | 1 + include/linux/sunrpc/sched.h | 16 +++++++++++ net/sunrpc/clnt.c | 67 ++++++++++++++++++++++++++++++++++++++------ net/sunrpc/xprtsock.c | 3 +- 5 files changed, 78 insertions(+), 24 deletions(-) (limited to 'net/sunrpc/xprtsock.c') diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 8b8d83a526ce..cc4fa1ed61fc 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -555,31 +555,22 @@ static int nfs_launder_page(struct page *page) return nfs_wb_page(inode, page); } -#ifdef CONFIG_NFS_SWAP static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file, sector_t *span) { - int ret; struct rpc_clnt *clnt = NFS_CLIENT(file->f_mapping->host); *span = sis->pages; - rcu_read_lock(); - ret = xs_swapper(rcu_dereference(clnt->cl_xprt), 1); - rcu_read_unlock(); - - return ret; + return rpc_clnt_swap_activate(clnt); } static void nfs_swap_deactivate(struct file *file) { struct rpc_clnt *clnt = NFS_CLIENT(file->f_mapping->host); - rcu_read_lock(); - xs_swapper(rcu_dereference(clnt->cl_xprt), 0); - rcu_read_unlock(); + rpc_clnt_swap_deactivate(clnt); } -#endif const struct address_space_operations nfs_file_aops = { .readpage = nfs_readpage, @@ -596,10 +587,8 @@ const struct address_space_operations nfs_file_aops = { .launder_page = nfs_launder_page, .is_dirty_writeback = nfs_check_dirty_writeback, .error_remove_page = generic_error_remove_page, -#ifdef CONFIG_NFS_SWAP .swap_activate = nfs_swap_activate, .swap_deactivate = nfs_swap_deactivate, -#endif }; /* diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 598ba80ec30c..131032f15cc1 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -56,6 +56,7 @@ struct rpc_clnt { struct rpc_rtt * cl_rtt; /* RTO estimator data */ const struct rpc_timeout *cl_timeout; /* Timeout strategy */ + atomic_t cl_swapper; /* swapfile count */ int cl_nodelen; /* nodename length */ char cl_nodename[UNX_MAXNODENAME+1]; struct rpc_pipe_dir_head cl_pipedir_objects; diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h index 2aa68976a482..d703f0ef37d8 100644 --- a/include/linux/sunrpc/sched.h +++ b/include/linux/sunrpc/sched.h @@ -268,4 +268,20 @@ static inline void rpc_assign_waitqueue_name(struct rpc_wait_queue *q, } #endif +#if IS_ENABLED(CONFIG_SUNRPC_SWAP) +int rpc_clnt_swap_activate(struct rpc_clnt *clnt); +void rpc_clnt_swap_deactivate(struct rpc_clnt *clnt); +#else +static inline int +rpc_clnt_swap_activate(struct rpc_clnt *clnt) +{ + return -EINVAL; +} + +static inline void +rpc_clnt_swap_deactivate(struct rpc_clnt *clnt) +{ +} +#endif /* CONFIG_SUNRPC_SWAP */ + #endif /* _LINUX_SUNRPC_SCHED_H_ */ diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index f6717170480e..0ba65156a62b 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -891,15 +891,8 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt) task->tk_flags |= RPC_TASK_SOFT; if (clnt->cl_noretranstimeo) task->tk_flags |= RPC_TASK_NO_RETRANS_TIMEOUT; - if (sk_memalloc_socks()) { - struct rpc_xprt *xprt; - - rcu_read_lock(); - xprt = rcu_dereference(clnt->cl_xprt); - if (xprt->swapper) - task->tk_flags |= RPC_TASK_SWAPPER; - rcu_read_unlock(); - } + if (atomic_read(&clnt->cl_swapper)) + task->tk_flags |= RPC_TASK_SWAPPER; /* Add to the client's list of all tasks */ spin_lock(&clnt->cl_lock); list_add_tail(&task->tk_task, &clnt->cl_tasks); @@ -2479,3 +2472,59 @@ void rpc_show_tasks(struct net *net) spin_unlock(&sn->rpc_client_lock); } #endif + +#if IS_ENABLED(CONFIG_SUNRPC_SWAP) +int +rpc_clnt_swap_activate(struct rpc_clnt *clnt) +{ + int ret = 0; + struct rpc_xprt *xprt; + + if (atomic_inc_return(&clnt->cl_swapper) == 1) { +retry: + rcu_read_lock(); + xprt = xprt_get(rcu_dereference(clnt->cl_xprt)); + rcu_read_unlock(); + if (!xprt) { + /* + * If we didn't get a reference, then we likely are + * racing with a migration event. Wait for a grace + * period and try again. + */ + synchronize_rcu(); + goto retry; + } + + ret = xs_swapper(xprt, 1); + xprt_put(xprt); + } + return ret; +} +EXPORT_SYMBOL_GPL(rpc_clnt_swap_activate); + +void +rpc_clnt_swap_deactivate(struct rpc_clnt *clnt) +{ + struct rpc_xprt *xprt; + + if (atomic_dec_if_positive(&clnt->cl_swapper) == 0) { +retry: + rcu_read_lock(); + xprt = xprt_get(rcu_dereference(clnt->cl_xprt)); + rcu_read_unlock(); + if (!xprt) { + /* + * If we didn't get a reference, then we likely are + * racing with a migration event. Wait for a grace + * period and try again. + */ + synchronize_rcu(); + goto retry; + } + + xs_swapper(xprt, 0); + xprt_put(xprt); + } +} +EXPORT_SYMBOL_GPL(rpc_clnt_swap_deactivate); +#endif /* CONFIG_SUNRPC_SWAP */ diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index b142feef0cf4..f344c0f8974c 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1955,7 +1955,7 @@ static void xs_local_connect(struct rpc_xprt *xprt, struct rpc_task *task) msleep_interruptible(15000); } -#ifdef CONFIG_SUNRPC_SWAP +#if IS_ENABLED(CONFIG_SUNRPC_SWAP) static void xs_set_memalloc(struct rpc_xprt *xprt) { struct sock_xprt *transport = container_of(xprt, struct sock_xprt, @@ -1987,7 +1987,6 @@ int xs_swapper(struct rpc_xprt *xprt, int enable) return err; } -EXPORT_SYMBOL_GPL(xs_swapper); #else static void xs_set_memalloc(struct rpc_xprt *xprt) { -- cgit v1.2.3 From 8e2281330f9930bccf77cf04027ec60b6cc0fb34 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 3 Jun 2015 16:14:26 -0400 Subject: sunrpc: make xprt->swapper an atomic_t Split xs_swapper into enable/disable functions and eliminate the "enable" flag. Currently, it's racy if you have multiple swapon/swapoff operations running in parallel over the same xprt. Also fix it so that we only set it to a memalloc socket on a 0->1 transition and only clear it on a 1->0 transition. Cc: Mel Gorman Signed-off-by: Jeff Layton Reviewed-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 5 +++-- net/sunrpc/clnt.c | 4 ++-- net/sunrpc/xprtsock.c | 38 +++++++++++++++++++++++++------------- 3 files changed, 30 insertions(+), 17 deletions(-) (limited to 'net/sunrpc/xprtsock.c') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index bc9c39d6d30d..9a3308703c15 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -180,7 +180,7 @@ struct rpc_xprt { atomic_t num_reqs; /* total slots */ unsigned long state; /* transport state */ unsigned char resvport : 1; /* use a reserved port */ - unsigned int swapper; /* we're swapping over this + atomic_t swapper; /* we're swapping over this transport */ unsigned int bind_index; /* bind function index */ @@ -346,7 +346,8 @@ void xprt_release_rqst_cong(struct rpc_task *task); void xprt_disconnect_done(struct rpc_xprt *xprt); void xprt_force_disconnect(struct rpc_xprt *xprt); void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie); -int xs_swapper(struct rpc_xprt *xprt, int enable); +int xs_swapper_enable(struct rpc_xprt *xprt); +void xs_swapper_disable(struct rpc_xprt *xprt); bool xprt_lock_connect(struct rpc_xprt *, struct rpc_task *, void *); void xprt_unlock_connect(struct rpc_xprt *, void *); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 0ba65156a62b..801044aeeedd 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -2495,7 +2495,7 @@ retry: goto retry; } - ret = xs_swapper(xprt, 1); + ret = xs_swapper_enable(xprt); xprt_put(xprt); } return ret; @@ -2522,7 +2522,7 @@ retry: goto retry; } - xs_swapper(xprt, 0); + xs_swapper_disable(xprt); xprt_put(xprt); } } diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index f344c0f8974c..6538e6fbb7ba 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1961,31 +1961,43 @@ static void xs_set_memalloc(struct rpc_xprt *xprt) struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); - if (xprt->swapper) + if (atomic_read(&xprt->swapper)) sk_set_memalloc(transport->inet); } /** - * xs_swapper - Tag this transport as being used for swap. + * xs_swapper_enable - Tag this transport as being used for swap. * @xprt: transport to tag - * @enable: enable/disable * + * Take a reference to this transport on behalf of the rpc_clnt, and + * optionally mark it for swapping if it wasn't already. */ -int xs_swapper(struct rpc_xprt *xprt, int enable) +int +xs_swapper_enable(struct rpc_xprt *xprt) { struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); - int err = 0; - if (enable) { - xprt->swapper++; - xs_set_memalloc(xprt); - } else if (xprt->swapper) { - xprt->swapper--; - sk_clear_memalloc(transport->inet); - } + if (atomic_inc_return(&xprt->swapper) == 1) + sk_set_memalloc(transport->inet); + return 0; +} - return err; +/** + * xs_swapper_disable - Untag this transport as being used for swap. + * @xprt: transport to tag + * + * Drop a "swapper" reference to this xprt on behalf of the rpc_clnt. If the + * swapper refcount goes to 0, untag the socket as a memalloc socket. + */ +void +xs_swapper_disable(struct rpc_xprt *xprt) +{ + struct sock_xprt *transport = container_of(xprt, struct sock_xprt, + xprt); + + if (atomic_dec_and_test(&xprt->swapper)) + sk_clear_memalloc(transport->inet); } #else static void xs_set_memalloc(struct rpc_xprt *xprt) -- cgit v1.2.3 From 264d1df3b34804a7d440d77771020f616a573528 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 3 Jun 2015 16:14:27 -0400 Subject: sunrpc: if we're closing down a socket, clear memalloc on it first We currently increment the memalloc_socks counter if we have a xprt that is associated with a swapfile. That socket can be replaced however during a reconnect event, and the memalloc_socks counter is never decremented if that occurs. When tearing down a xprt socket, check to see if the xprt is set up for swapping and sk_clear_memalloc before releasing the socket if so. Acked-by: Mel Gorman Signed-off-by: Jeff Layton Reviewed-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net/sunrpc/xprtsock.c') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 6538e6fbb7ba..31b856a2935c 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -827,6 +827,9 @@ static void xs_reset_transport(struct sock_xprt *transport) if (sk == NULL) return; + if (atomic_read(&transport->xprt.swapper)) + sk_clear_memalloc(sk); + write_lock_bh(&sk->sk_callback_lock); transport->inet = NULL; transport->sock = NULL; -- cgit v1.2.3 From d6e971d8ecafee18ff9cc1ac646eb0817ab8b143 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 3 Jun 2015 16:14:28 -0400 Subject: sunrpc: lock xprt before trying to set memalloc on the sockets It's possible that we could race with a call to xs_reset_transport, in which case the xprt->inet pointer could be zeroed out while we're accessing it. Lock the xprt before we try to set memalloc on it. Cc: Mel Gorman Signed-off-by: Jeff Layton Reviewed-by: Chuck Lever Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 35 +++++++++++++++++++++++++++-------- 1 file changed, 27 insertions(+), 8 deletions(-) (limited to 'net/sunrpc/xprtsock.c') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 31b856a2935c..e0efd8514886 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1959,11 +1959,22 @@ static void xs_local_connect(struct rpc_xprt *xprt, struct rpc_task *task) } #if IS_ENABLED(CONFIG_SUNRPC_SWAP) +/* + * Note that this should be called with XPRT_LOCKED held (or when we otherwise + * know that we have exclusive access to the socket), to guard against + * races with xs_reset_transport. + */ static void xs_set_memalloc(struct rpc_xprt *xprt) { struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); + /* + * If there's no sock, then we have nothing to set. The + * reconnecting process will get it for us. + */ + if (!transport->inet) + return; if (atomic_read(&xprt->swapper)) sk_set_memalloc(transport->inet); } @@ -1978,11 +1989,15 @@ static void xs_set_memalloc(struct rpc_xprt *xprt) int xs_swapper_enable(struct rpc_xprt *xprt) { - struct sock_xprt *transport = container_of(xprt, struct sock_xprt, - xprt); + struct sock_xprt *xs = container_of(xprt, struct sock_xprt, xprt); - if (atomic_inc_return(&xprt->swapper) == 1) - sk_set_memalloc(transport->inet); + if (atomic_inc_return(&xprt->swapper) != 1) + return 0; + if (wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_KILLABLE)) + return -ERESTARTSYS; + if (xs->inet) + sk_set_memalloc(xs->inet); + xprt_release_xprt(xprt, NULL); return 0; } @@ -1996,11 +2011,15 @@ xs_swapper_enable(struct rpc_xprt *xprt) void xs_swapper_disable(struct rpc_xprt *xprt) { - struct sock_xprt *transport = container_of(xprt, struct sock_xprt, - xprt); + struct sock_xprt *xs = container_of(xprt, struct sock_xprt, xprt); - if (atomic_dec_and_test(&xprt->swapper)) - sk_clear_memalloc(transport->inet); + if (!atomic_dec_and_test(&xprt->swapper)) + return; + if (wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_KILLABLE)) + return; + if (xs->inet) + sk_clear_memalloc(xs->inet); + xprt_release_xprt(xprt, NULL); } #else static void xs_set_memalloc(struct rpc_xprt *xprt) -- cgit v1.2.3 From d67fa4d85a2143b46052b2e9ccc6749a4c97b2de Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Wed, 3 Jun 2015 16:14:29 -0400 Subject: sunrpc: turn swapper_enable/disable functions into rpc_xprt_ops RDMA xprts don't have a sock_xprt, but an rdma_xprt, so the xs_swapper_enable/disable functions will likely oops when fed an RDMA xprt. Turn these functions into rpc_xprt_ops so that that doesn't occur. For now the RDMA versions are no-ops that just return -EINVAL on an attempt to swapon. Cc: Chuck Lever Signed-off-by: Jeff Layton Reviewed-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 16 ++++++++++++++-- net/sunrpc/clnt.c | 4 ++-- net/sunrpc/xprtrdma/transport.c | 15 ++++++++++++++- net/sunrpc/xprtsock.c | 31 +++++++++++++++++++++++++------ 4 files changed, 55 insertions(+), 11 deletions(-) (limited to 'net/sunrpc/xprtsock.c') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 9a3308703c15..b6096592b1d4 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -133,6 +133,8 @@ struct rpc_xprt_ops { void (*close)(struct rpc_xprt *xprt); void (*destroy)(struct rpc_xprt *xprt); void (*print_stats)(struct rpc_xprt *xprt, struct seq_file *seq); + int (*enable_swap)(struct rpc_xprt *xprt); + void (*disable_swap)(struct rpc_xprt *xprt); }; /* @@ -328,6 +330,18 @@ static inline __be32 *xprt_skip_transport_header(struct rpc_xprt *xprt, __be32 * return p + xprt->tsh_size; } +static inline int +xprt_enable_swap(struct rpc_xprt *xprt) +{ + return xprt->ops->enable_swap(xprt); +} + +static inline void +xprt_disable_swap(struct rpc_xprt *xprt) +{ + xprt->ops->disable_swap(xprt); +} + /* * Transport switch helper functions */ @@ -346,8 +360,6 @@ void xprt_release_rqst_cong(struct rpc_task *task); void xprt_disconnect_done(struct rpc_xprt *xprt); void xprt_force_disconnect(struct rpc_xprt *xprt); void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie); -int xs_swapper_enable(struct rpc_xprt *xprt); -void xs_swapper_disable(struct rpc_xprt *xprt); bool xprt_lock_connect(struct rpc_xprt *, struct rpc_task *, void *); void xprt_unlock_connect(struct rpc_xprt *, void *); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 801044aeeedd..576d6ae39f25 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -2495,7 +2495,7 @@ retry: goto retry; } - ret = xs_swapper_enable(xprt); + ret = xprt_enable_swap(xprt); xprt_put(xprt); } return ret; @@ -2522,7 +2522,7 @@ retry: goto retry; } - xs_swapper_disable(xprt); + xprt_disable_swap(xprt); xprt_put(xprt); } } diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index 54f23b1be986..ebf6fe759f0e 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -682,6 +682,17 @@ static void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) r_xprt->rx_stats.bad_reply_count); } +static int +xprt_rdma_enable_swap(struct rpc_xprt *xprt) +{ + return -EINVAL; +} + +static void +xprt_rdma_disable_swap(struct rpc_xprt *xprt) +{ +} + /* * Plumbing for rpc transport switch and kernel module */ @@ -700,7 +711,9 @@ static struct rpc_xprt_ops xprt_rdma_procs = { .send_request = xprt_rdma_send_request, .close = xprt_rdma_close, .destroy = xprt_rdma_destroy, - .print_stats = xprt_rdma_print_stats + .print_stats = xprt_rdma_print_stats, + .enable_swap = xprt_rdma_enable_swap, + .disable_swap = xprt_rdma_disable_swap, }; static struct xprt_class xprt_rdma = { diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index e0efd8514886..bf20726d4ab5 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1980,14 +1980,14 @@ static void xs_set_memalloc(struct rpc_xprt *xprt) } /** - * xs_swapper_enable - Tag this transport as being used for swap. + * xs_enable_swap - Tag this transport as being used for swap. * @xprt: transport to tag * * Take a reference to this transport on behalf of the rpc_clnt, and * optionally mark it for swapping if it wasn't already. */ -int -xs_swapper_enable(struct rpc_xprt *xprt) +static int +xs_enable_swap(struct rpc_xprt *xprt) { struct sock_xprt *xs = container_of(xprt, struct sock_xprt, xprt); @@ -2002,14 +2002,14 @@ xs_swapper_enable(struct rpc_xprt *xprt) } /** - * xs_swapper_disable - Untag this transport as being used for swap. + * xs_disable_swap - Untag this transport as being used for swap. * @xprt: transport to tag * * Drop a "swapper" reference to this xprt on behalf of the rpc_clnt. If the * swapper refcount goes to 0, untag the socket as a memalloc socket. */ -void -xs_swapper_disable(struct rpc_xprt *xprt) +static void +xs_disable_swap(struct rpc_xprt *xprt) { struct sock_xprt *xs = container_of(xprt, struct sock_xprt, xprt); @@ -2025,6 +2025,17 @@ xs_swapper_disable(struct rpc_xprt *xprt) static void xs_set_memalloc(struct rpc_xprt *xprt) { } + +static int +xs_enable_swap(struct rpc_xprt *xprt) +{ + return -EINVAL; +} + +static void +xs_disable_swap(struct rpc_xprt *xprt) +{ +} #endif static void xs_udp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) @@ -2488,6 +2499,8 @@ static struct rpc_xprt_ops xs_local_ops = { .close = xs_close, .destroy = xs_destroy, .print_stats = xs_local_print_stats, + .enable_swap = xs_enable_swap, + .disable_swap = xs_disable_swap, }; static struct rpc_xprt_ops xs_udp_ops = { @@ -2507,6 +2520,8 @@ static struct rpc_xprt_ops xs_udp_ops = { .close = xs_close, .destroy = xs_destroy, .print_stats = xs_udp_print_stats, + .enable_swap = xs_enable_swap, + .disable_swap = xs_disable_swap, }; static struct rpc_xprt_ops xs_tcp_ops = { @@ -2523,6 +2538,8 @@ static struct rpc_xprt_ops xs_tcp_ops = { .close = xs_tcp_shutdown, .destroy = xs_destroy, .print_stats = xs_tcp_print_stats, + .enable_swap = xs_enable_swap, + .disable_swap = xs_disable_swap, }; /* @@ -2540,6 +2557,8 @@ static struct rpc_xprt_ops bc_tcp_ops = { .close = bc_close, .destroy = bc_destroy, .print_stats = xs_tcp_print_stats, + .enable_swap = xs_enable_swap, + .disable_swap = xs_disable_swap, }; static int xs_init_anyaddr(const int family, struct sockaddr *sap) -- cgit v1.2.3 From 4a06825839889cc1756d0dd8a52d6b1071ee0263 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 11 May 2015 14:02:25 -0400 Subject: SUNRPC: Transport fault injection It has been exceptionally useful to exercise the logic that handles local immediate errors and RDMA connection loss. To enable developers to test this regularly and repeatably, add logic to simulate connection loss every so often. Fault injection is disabled by default. It is enabled with $ sudo echo xxx > /sys/kernel/debug/sunrpc/inject_fault/disconnect where "xxx" is a large positive number of transport method calls before a disconnect. A value of several thousand is usually a good number that allows reasonable forward progress while still causing a lot of connection drops. These hooks are disabled when SUNRPC_DEBUG is turned off. Signed-off-by: Chuck Lever Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprt.h | 19 ++++++++++ net/sunrpc/clnt.c | 1 + net/sunrpc/debugfs.c | 77 +++++++++++++++++++++++++++++++++++++++++ net/sunrpc/xprt.c | 2 ++ net/sunrpc/xprtrdma/transport.c | 11 ++++++ net/sunrpc/xprtsock.c | 10 ++++++ 6 files changed, 120 insertions(+) (limited to 'net/sunrpc/xprtsock.c') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index b6096592b1d4..0fb9acbb4780 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -135,6 +135,7 @@ struct rpc_xprt_ops { void (*print_stats)(struct rpc_xprt *xprt, struct seq_file *seq); int (*enable_swap)(struct rpc_xprt *xprt); void (*disable_swap)(struct rpc_xprt *xprt); + void (*inject_disconnect)(struct rpc_xprt *xprt); }; /* @@ -244,6 +245,7 @@ struct rpc_xprt { const char *address_strings[RPC_DISPLAY_MAX]; #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) struct dentry *debugfs; /* debugfs directory */ + atomic_t inject_disconnect; #endif }; @@ -445,6 +447,23 @@ static inline int xprt_test_and_set_binding(struct rpc_xprt *xprt) return test_and_set_bit(XPRT_BINDING, &xprt->state); } +#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) +extern unsigned int rpc_inject_disconnect; +static inline void xprt_inject_disconnect(struct rpc_xprt *xprt) +{ + if (!rpc_inject_disconnect) + return; + if (atomic_dec_return(&xprt->inject_disconnect)) + return; + atomic_set(&xprt->inject_disconnect, rpc_inject_disconnect); + xprt->ops->inject_disconnect(xprt); +} +#else +static inline void xprt_inject_disconnect(struct rpc_xprt *xprt) +{ +} +#endif + #endif /* __KERNEL__*/ #endif /* _LINUX_SUNRPC_XPRT_H */ diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 576d6ae39f25..f41ed882ed3c 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1605,6 +1605,7 @@ call_allocate(struct rpc_task *task) req->rq_callsize + req->rq_rcvsize); if (req->rq_buffer != NULL) return; + xprt_inject_disconnect(xprt); dprintk("RPC: %5u rpc_buffer allocation failed\n", task->tk_pid); diff --git a/net/sunrpc/debugfs.c b/net/sunrpc/debugfs.c index 82962f7e6e88..7cc1b8a6ef6d 100644 --- a/net/sunrpc/debugfs.c +++ b/net/sunrpc/debugfs.c @@ -10,9 +10,12 @@ #include "netns.h" static struct dentry *topdir; +static struct dentry *rpc_fault_dir; static struct dentry *rpc_clnt_dir; static struct dentry *rpc_xprt_dir; +unsigned int rpc_inject_disconnect; + struct rpc_clnt_iter { struct rpc_clnt *clnt; loff_t pos; @@ -257,6 +260,8 @@ rpc_xprt_debugfs_register(struct rpc_xprt *xprt) debugfs_remove_recursive(xprt->debugfs); xprt->debugfs = NULL; } + + atomic_set(&xprt->inject_disconnect, rpc_inject_disconnect); } void @@ -266,11 +271,78 @@ rpc_xprt_debugfs_unregister(struct rpc_xprt *xprt) xprt->debugfs = NULL; } +static int +fault_open(struct inode *inode, struct file *filp) +{ + filp->private_data = kmalloc(128, GFP_KERNEL); + if (!filp->private_data) + return -ENOMEM; + return 0; +} + +static int +fault_release(struct inode *inode, struct file *filp) +{ + kfree(filp->private_data); + return 0; +} + +static ssize_t +fault_disconnect_read(struct file *filp, char __user *user_buf, + size_t len, loff_t *offset) +{ + char *buffer = (char *)filp->private_data; + size_t size; + + size = sprintf(buffer, "%u\n", rpc_inject_disconnect); + return simple_read_from_buffer(user_buf, len, offset, buffer, size); +} + +static ssize_t +fault_disconnect_write(struct file *filp, const char __user *user_buf, + size_t len, loff_t *offset) +{ + char buffer[16]; + + len = min(len, sizeof(buffer) - 1); + if (copy_from_user(buffer, user_buf, len)) + return -EFAULT; + buffer[len] = '\0'; + if (kstrtouint(buffer, 10, &rpc_inject_disconnect)) + return -EINVAL; + return len; +} + +static const struct file_operations fault_disconnect_fops = { + .owner = THIS_MODULE, + .open = fault_open, + .read = fault_disconnect_read, + .write = fault_disconnect_write, + .release = fault_release, +}; + +static struct dentry * +inject_fault_dir(struct dentry *topdir) +{ + struct dentry *faultdir; + + faultdir = debugfs_create_dir("inject_fault", topdir); + if (!faultdir) + return NULL; + + if (!debugfs_create_file("disconnect", S_IFREG | S_IRUSR, faultdir, + NULL, &fault_disconnect_fops)) + return NULL; + + return faultdir; +} + void __exit sunrpc_debugfs_exit(void) { debugfs_remove_recursive(topdir); topdir = NULL; + rpc_fault_dir = NULL; rpc_clnt_dir = NULL; rpc_xprt_dir = NULL; } @@ -282,6 +354,10 @@ sunrpc_debugfs_init(void) if (!topdir) return; + rpc_fault_dir = inject_fault_dir(topdir); + if (!rpc_fault_dir) + goto out_remove; + rpc_clnt_dir = debugfs_create_dir("rpc_clnt", topdir); if (!rpc_clnt_dir) goto out_remove; @@ -294,5 +370,6 @@ sunrpc_debugfs_init(void) out_remove: debugfs_remove_recursive(topdir); topdir = NULL; + rpc_fault_dir = NULL; rpc_clnt_dir = NULL; } diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 1d4fe24af06a..e1fb538e10e0 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -967,6 +967,7 @@ void xprt_transmit(struct rpc_task *task) task->tk_status = status; return; } + xprt_inject_disconnect(xprt); dprintk("RPC: %5u xmit complete\n", task->tk_pid); task->tk_flags |= RPC_TASK_SENT; @@ -1285,6 +1286,7 @@ void xprt_release(struct rpc_task *task) spin_unlock_bh(&xprt->transport_lock); if (req->rq_buffer) xprt->ops->buf_free(req->rq_buffer); + xprt_inject_disconnect(xprt); if (req->rq_cred != NULL) put_rpccred(req->rq_cred); task->tk_rqstp = NULL; diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index ebf6fe759f0e..b8aac23b1b0c 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -246,6 +246,16 @@ xprt_rdma_connect_worker(struct work_struct *work) xprt_clear_connecting(xprt); } +static void +xprt_rdma_inject_disconnect(struct rpc_xprt *xprt) +{ + struct rpcrdma_xprt *r_xprt = container_of(xprt, struct rpcrdma_xprt, + rx_xprt); + + pr_info("rpcrdma: injecting transport disconnect on xprt=%p\n", xprt); + rdma_disconnect(r_xprt->rx_ia.ri_id); +} + /* * xprt_rdma_destroy * @@ -714,6 +724,7 @@ static struct rpc_xprt_ops xprt_rdma_procs = { .print_stats = xprt_rdma_print_stats, .enable_swap = xprt_rdma_enable_swap, .disable_swap = xprt_rdma_disable_swap, + .inject_disconnect = xprt_rdma_inject_disconnect }; static struct xprt_class xprt_rdma = { diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index bf20726d4ab5..fda8ec8c74c0 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -866,6 +866,13 @@ static void xs_close(struct rpc_xprt *xprt) xprt_disconnect_done(xprt); } +static void xs_inject_disconnect(struct rpc_xprt *xprt) +{ + dprintk("RPC: injecting transport disconnect on xprt=%p\n", + xprt); + xprt_disconnect_done(xprt); +} + static void xs_xprt_free(struct rpc_xprt *xprt) { xs_free_peer_addresses(xprt); @@ -2522,6 +2529,7 @@ static struct rpc_xprt_ops xs_udp_ops = { .print_stats = xs_udp_print_stats, .enable_swap = xs_enable_swap, .disable_swap = xs_disable_swap, + .inject_disconnect = xs_inject_disconnect, }; static struct rpc_xprt_ops xs_tcp_ops = { @@ -2540,6 +2548,7 @@ static struct rpc_xprt_ops xs_tcp_ops = { .print_stats = xs_tcp_print_stats, .enable_swap = xs_enable_swap, .disable_swap = xs_disable_swap, + .inject_disconnect = xs_inject_disconnect, }; /* @@ -2559,6 +2568,7 @@ static struct rpc_xprt_ops bc_tcp_ops = { .print_stats = xs_tcp_print_stats, .enable_swap = xs_enable_swap, .disable_swap = xs_disable_swap, + .inject_disconnect = xs_inject_disconnect, }; static int xs_init_anyaddr(const int family, struct sockaddr *sap) -- cgit v1.2.3 From 4876cc779ff525b9c2376d8076edf47815e71f2c Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 19 Jun 2015 16:17:57 -0400 Subject: SUNRPC: Ensure we release the TCP socket once it has been closed This fixes a regression introduced by commit caf4ccd4e88cf2 ("SUNRPC: Make xs_tcp_close() do a socket shutdown rather than a sock_release"). Prior to that commit, the autoclose feature would ensure that an idle connection would result in the socket being both disconnected and released, whereas now only gets disconnected. While the current behaviour is harmless, it does leave the port bound until either RPC traffic resumes or the RPC client is shut down. Reported-by: Steven Rostedt Signed-off-by: Trond Myklebust --- net/sunrpc/xprt.c | 2 +- net/sunrpc/xprtsock.c | 40 ++++++++++++++++++++++------------------ 2 files changed, 23 insertions(+), 19 deletions(-) (limited to 'net/sunrpc/xprtsock.c') diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 3ca31f20b97c..ab5dd621ae0c 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -611,8 +611,8 @@ static void xprt_autoclose(struct work_struct *work) struct rpc_xprt *xprt = container_of(work, struct rpc_xprt, task_cleanup); - xprt->ops->close(xprt); clear_bit(XPRT_CLOSE_WAIT, &xprt->state); + xprt->ops->close(xprt); xprt_release_write(xprt, NULL); } diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index fda8ec8c74c0..ee0715dfc3c7 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -622,24 +622,6 @@ process_status: return status; } -/** - * xs_tcp_shutdown - gracefully shut down a TCP socket - * @xprt: transport - * - * Initiates a graceful shutdown of the TCP socket by calling the - * equivalent of shutdown(SHUT_RDWR); - */ -static void xs_tcp_shutdown(struct rpc_xprt *xprt) -{ - struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); - struct socket *sock = transport->sock; - - if (sock != NULL) { - kernel_sock_shutdown(sock, SHUT_RDWR); - trace_rpc_socket_shutdown(xprt, sock); - } -} - /** * xs_tcp_send_request - write an RPC request to a TCP socket * @task: address of RPC task that manages the state of an RPC request @@ -786,6 +768,7 @@ static void xs_sock_mark_closed(struct rpc_xprt *xprt) xs_sock_reset_connection_flags(xprt); /* Mark transport as closed and wake up all pending tasks */ xprt_disconnect_done(xprt); + xprt_force_disconnect(xprt); } /** @@ -2103,6 +2086,27 @@ out: xprt_wake_pending_tasks(xprt, status); } +/** + * xs_tcp_shutdown - gracefully shut down a TCP socket + * @xprt: transport + * + * Initiates a graceful shutdown of the TCP socket by calling the + * equivalent of shutdown(SHUT_RDWR); + */ +static void xs_tcp_shutdown(struct rpc_xprt *xprt) +{ + struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); + struct socket *sock = transport->sock; + + if (sock == NULL) + return; + if (xprt_connected(xprt)) { + kernel_sock_shutdown(sock, SHUT_RDWR); + trace_rpc_socket_shutdown(xprt, sock); + } else + xs_reset_transport(transport); +} + static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) { struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); -- cgit v1.2.3 From 775f06ab49f5f9e2f6bca9292ef57efa868a0f67 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 20 Jun 2015 15:31:54 -0400 Subject: SUNRPC: Set the TCP user timeout option on client sockets Use the TCP_USER_TIMEOUT socket option to advertise to the server how long we will keep the connection open if there is unacknowledged data. See RFC5482. Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'net/sunrpc/xprtsock.c') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index ee0715dfc3c7..ee359fc7af16 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2117,6 +2117,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) unsigned int keepidle = xprt->timeout->to_initval / HZ; unsigned int keepcnt = xprt->timeout->to_retries + 1; unsigned int opt_on = 1; + unsigned int timeo; /* TCP Keepalive options */ kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE, @@ -2128,6 +2129,12 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) kernel_setsockopt(sock, SOL_TCP, TCP_KEEPCNT, (char *)&keepcnt, sizeof(keepcnt)); + /* TCP user timeout (see RFC5482) */ + timeo = jiffies_to_msecs(xprt->timeout->to_initval) * + (xprt->timeout->to_retries + 1); + kernel_setsockopt(sock, SOL_TCP, TCP_USER_TIMEOUT, + (char *)&timeo, sizeof(timeo)); + write_lock_bh(&sk->sk_callback_lock); xs_save_old_callbacks(transport, sk); -- cgit v1.2.3