summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2016-08-04 19:59:06 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-08-04 19:59:06 -0400
commita71e36045e1fd5813addad2fce878d96e2827d66 (patch)
treec71345261c6342b2d3ee2e5784148898ee59557d /net
parentd58b0d980f0b1c27204da0e05537b606da45a07f (diff)
parent2b11885921a48767b6ace27c481beba3b42371ce (diff)
downloadlinux-a71e36045e1fd5813addad2fce878d96e2827d66.tar.bz2
Merge tag 'nfsd-4.8' of git://linux-nfs.org/~bfields/linux
Pull nfsd updates from Bruce Fields: "Highlights: - Trond made a change to the server's tcp logic that allows a fast client to better take advantage of high bandwidth networks, but may increase the risk that a single client could starve other clients; a new sunrpc.svc_rpc_per_connection_limit parameter should help mitigate this in the (hopefully unlikely) event this becomes a problem in practice. - Tom Haynes added a minimal flex-layout pnfs server, which is of no use in production for now--don't build it unless you're doing client testing or further server development" * tag 'nfsd-4.8' of git://linux-nfs.org/~bfields/linux: (32 commits) nfsd: remove some dead code in nfsd_create_locked() nfsd: drop unnecessary MAY_EXEC check from create nfsd: clean up bad-type check in nfsd_create_locked nfsd: remove unnecessary positive-dentry check nfsd: reorganize nfsd_create nfsd: check d_can_lookup in fh_verify of directories nfsd: remove redundant zero-length check from create nfsd: Make creates return EEXIST instead of EACCES SUNRPC: Detect immediate closure of accepted sockets SUNRPC: accept() may return sockets that are still in SYN_RECV nfsd: allow nfsd to advertise multiple layout types nfsd: Close race between nfsd4_release_lockowner and nfsd4_lock nfsd/blocklayout: Make sure calculate signature/designator length aligned xfs: abstract block export operations from nfsd layouts SUNRPC: Remove unused callback xpo_adjust_wspace() SUNRPC: Change TCP socket space reservation SUNRPC: Add a server side per-connection limit SUNRPC: Micro optimisation for svc_data_ready SUNRPC: Call the default socket callbacks instead of open coding SUNRPC: lock the socket while detaching it ...
Diffstat (limited to 'net')
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c5
-rw-r--r--net/sunrpc/cache.c2
-rw-r--r--net/sunrpc/svc_xprt.c51
-rw-r--r--net/sunrpc/svcsock.c150
4 files changed, 86 insertions, 122 deletions
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index e085f5ae1548..1d281816f2bf 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -1230,8 +1230,9 @@ static int svcauth_gss_proxy_init(struct svc_rqst *rqstp,
if (status)
goto out;
- dprintk("RPC: svcauth_gss: gss major status = %d\n",
- ud.major_status);
+ dprintk("RPC: svcauth_gss: gss major status = %d "
+ "minor status = %d\n",
+ ud.major_status, ud.minor_status);
switch (ud.major_status) {
case GSS_S_CONTINUE_NEEDED:
diff --git a/net/sunrpc/cache.c b/net/sunrpc/cache.c
index 553bf95f7003..4d8e11f94a35 100644
--- a/net/sunrpc/cache.c
+++ b/net/sunrpc/cache.c
@@ -362,7 +362,7 @@ void sunrpc_destroy_cache_detail(struct cache_detail *cd)
cache_purge(cd);
spin_lock(&cache_list_lock);
write_lock(&cd->hash_lock);
- if (cd->entries || atomic_read(&cd->inuse)) {
+ if (cd->entries) {
write_unlock(&cd->hash_lock);
spin_unlock(&cache_list_lock);
goto out;
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 4f01f63102ee..c3f652395a80 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -21,6 +21,10 @@
#define RPCDBG_FACILITY RPCDBG_SVCXPRT
+static unsigned int svc_rpc_per_connection_limit __read_mostly;
+module_param(svc_rpc_per_connection_limit, uint, 0644);
+
+
static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt);
static int svc_deferred_recv(struct svc_rqst *rqstp);
static struct cache_deferred_req *svc_defer(struct cache_req *req);
@@ -329,12 +333,45 @@ char *svc_print_addr(struct svc_rqst *rqstp, char *buf, size_t len)
}
EXPORT_SYMBOL_GPL(svc_print_addr);
+static bool svc_xprt_slots_in_range(struct svc_xprt *xprt)
+{
+ unsigned int limit = svc_rpc_per_connection_limit;
+ int nrqsts = atomic_read(&xprt->xpt_nr_rqsts);
+
+ return limit == 0 || (nrqsts >= 0 && nrqsts < limit);
+}
+
+static bool svc_xprt_reserve_slot(struct svc_rqst *rqstp, struct svc_xprt *xprt)
+{
+ if (!test_bit(RQ_DATA, &rqstp->rq_flags)) {
+ if (!svc_xprt_slots_in_range(xprt))
+ return false;
+ atomic_inc(&xprt->xpt_nr_rqsts);
+ set_bit(RQ_DATA, &rqstp->rq_flags);
+ }
+ return true;
+}
+
+static void svc_xprt_release_slot(struct svc_rqst *rqstp)
+{
+ struct svc_xprt *xprt = rqstp->rq_xprt;
+ if (test_and_clear_bit(RQ_DATA, &rqstp->rq_flags)) {
+ atomic_dec(&xprt->xpt_nr_rqsts);
+ svc_xprt_enqueue(xprt);
+ }
+}
+
static bool svc_xprt_has_something_to_do(struct svc_xprt *xprt)
{
if (xprt->xpt_flags & ((1<<XPT_CONN)|(1<<XPT_CLOSE)))
return true;
- if (xprt->xpt_flags & ((1<<XPT_DATA)|(1<<XPT_DEFERRED)))
- return xprt->xpt_ops->xpo_has_wspace(xprt);
+ if (xprt->xpt_flags & ((1<<XPT_DATA)|(1<<XPT_DEFERRED))) {
+ if (xprt->xpt_ops->xpo_has_wspace(xprt) &&
+ svc_xprt_slots_in_range(xprt))
+ return true;
+ trace_svc_xprt_no_write_space(xprt);
+ return false;
+ }
return false;
}
@@ -480,8 +517,6 @@ void svc_reserve(struct svc_rqst *rqstp, int space)
atomic_sub((rqstp->rq_reserved - space), &xprt->xpt_reserved);
rqstp->rq_reserved = space;
- if (xprt->xpt_ops->xpo_adjust_wspace)
- xprt->xpt_ops->xpo_adjust_wspace(xprt);
svc_xprt_enqueue(xprt);
}
}
@@ -512,8 +547,8 @@ static void svc_xprt_release(struct svc_rqst *rqstp)
rqstp->rq_res.head[0].iov_len = 0;
svc_reserve(rqstp, 0);
+ svc_xprt_release_slot(rqstp);
rqstp->rq_xprt = NULL;
-
svc_xprt_put(xprt);
}
@@ -781,7 +816,7 @@ static int svc_handle_xprt(struct svc_rqst *rqstp, struct svc_xprt *xprt)
svc_add_new_temp_xprt(serv, newxpt);
else
module_put(xprt->xpt_class->xcl_owner);
- } else {
+ } else if (svc_xprt_reserve_slot(rqstp, xprt)) {
/* XPT_DATA|XPT_DEFERRED case: */
dprintk("svc: server %p, pool %u, transport %p, inuse=%d\n",
rqstp, rqstp->rq_pool->sp_id, xprt,
@@ -871,6 +906,7 @@ EXPORT_SYMBOL_GPL(svc_recv);
*/
void svc_drop(struct svc_rqst *rqstp)
{
+ trace_svc_drop(rqstp);
dprintk("svc: xprt %p dropped request\n", rqstp->rq_xprt);
svc_xprt_release(rqstp);
}
@@ -1148,6 +1184,7 @@ static void svc_revisit(struct cache_deferred_req *dreq, int too_many)
spin_unlock(&xprt->xpt_lock);
dprintk("revisit canceled\n");
svc_xprt_put(xprt);
+ trace_svc_drop_deferred(dr);
kfree(dr);
return;
}
@@ -1205,6 +1242,7 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req)
set_bit(RQ_DROPME, &rqstp->rq_flags);
dr->handle.revisit = svc_revisit;
+ trace_svc_defer(rqstp);
return &dr->handle;
}
@@ -1245,6 +1283,7 @@ static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt)
struct svc_deferred_req,
handle.recent);
list_del_init(&dr->handle.recent);
+ trace_svc_revisit_deferred(dr);
} else
clear_bit(XPT_DEFERRED, &xprt->xpt_flags);
spin_unlock(&xprt->xpt_lock);
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index dadfec66dbd8..57625f64efd5 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -60,7 +60,6 @@
static struct svc_sock *svc_setup_socket(struct svc_serv *, struct socket *,
int flags);
-static void svc_udp_data_ready(struct sock *);
static int svc_udp_recvfrom(struct svc_rqst *);
static int svc_udp_sendto(struct svc_rqst *);
static void svc_sock_detach(struct svc_xprt *);
@@ -398,48 +397,21 @@ static int svc_sock_secure_port(struct svc_rqst *rqstp)
return svc_port_is_privileged(svc_addr(rqstp));
}
-static bool sunrpc_waitqueue_active(wait_queue_head_t *wq)
-{
- if (!wq)
- return false;
- /*
- * There should normally be a memory * barrier here--see
- * wq_has_sleeper().
- *
- * It appears that isn't currently necessary, though, basically
- * because callers all appear to have sufficient memory barriers
- * between the time the relevant change is made and the
- * time they call these callbacks.
- *
- * The nfsd code itself doesn't actually explicitly wait on
- * these waitqueues, but it may wait on them for example in
- * sendpage() or sendmsg() calls. (And those may be the only
- * places, since it it uses nonblocking reads.)
- *
- * Maybe we should add the memory barriers anyway, but these are
- * hot paths so we'd need to be convinced there's no sigificant
- * penalty.
- */
- return waitqueue_active(wq);
-}
-
/*
* INET callback when data has been received on the socket.
*/
-static void svc_udp_data_ready(struct sock *sk)
+static void svc_data_ready(struct sock *sk)
{
struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data;
- wait_queue_head_t *wq = sk_sleep(sk);
if (svsk) {
dprintk("svc: socket %p(inet %p), busy=%d\n",
svsk, sk,
test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags));
- set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
- svc_xprt_enqueue(&svsk->sk_xprt);
+ svsk->sk_odata(sk);
+ if (!test_and_set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags))
+ svc_xprt_enqueue(&svsk->sk_xprt);
}
- if (sunrpc_waitqueue_active(wq))
- wake_up_interruptible(wq);
}
/*
@@ -448,56 +420,22 @@ static void svc_udp_data_ready(struct sock *sk)
static void svc_write_space(struct sock *sk)
{
struct svc_sock *svsk = (struct svc_sock *)(sk->sk_user_data);
- wait_queue_head_t *wq = sk_sleep(sk);
if (svsk) {
dprintk("svc: socket %p(inet %p), write_space busy=%d\n",
svsk, sk, test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags));
+ svsk->sk_owspace(sk);
svc_xprt_enqueue(&svsk->sk_xprt);
}
-
- if (sunrpc_waitqueue_active(wq)) {
- dprintk("RPC svc_write_space: someone sleeping on %p\n",
- svsk);
- wake_up_interruptible(wq);
- }
}
static int svc_tcp_has_wspace(struct svc_xprt *xprt)
{
- struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
- struct svc_serv *serv = svsk->sk_xprt.xpt_server;
- int required;
+ struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
if (test_bit(XPT_LISTENER, &xprt->xpt_flags))
return 1;
- required = atomic_read(&xprt->xpt_reserved) + serv->sv_max_mesg;
- if (sk_stream_wspace(svsk->sk_sk) >= required ||
- (sk_stream_min_wspace(svsk->sk_sk) == 0 &&
- atomic_read(&xprt->xpt_reserved) == 0))
- return 1;
- set_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
- return 0;
-}
-
-static void svc_tcp_write_space(struct sock *sk)
-{
- struct svc_sock *svsk = (struct svc_sock *)(sk->sk_user_data);
- struct socket *sock = sk->sk_socket;
-
- if (!sk_stream_is_writeable(sk) || !sock)
- return;
- if (!svsk || svc_tcp_has_wspace(&svsk->sk_xprt))
- clear_bit(SOCK_NOSPACE, &sock->flags);
- svc_write_space(sk);
-}
-
-static void svc_tcp_adjust_wspace(struct svc_xprt *xprt)
-{
- struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
-
- if (svc_tcp_has_wspace(xprt))
- clear_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
+ return !test_bit(SOCK_NOSPACE, &svsk->sk_sock->flags);
}
/*
@@ -746,7 +684,7 @@ static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv)
svc_xprt_init(sock_net(svsk->sk_sock->sk), &svc_udp_class,
&svsk->sk_xprt, serv);
clear_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags);
- svsk->sk_sk->sk_data_ready = svc_udp_data_ready;
+ svsk->sk_sk->sk_data_ready = svc_data_ready;
svsk->sk_sk->sk_write_space = svc_write_space;
/* initialise setting must have enough space to
@@ -786,11 +724,12 @@ static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv)
static void svc_tcp_listen_data_ready(struct sock *sk)
{
struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data;
- wait_queue_head_t *wq;
dprintk("svc: socket %p TCP (listen) state change %d\n",
sk, sk->sk_state);
+ if (svsk)
+ svsk->sk_odata(sk);
/*
* This callback may called twice when a new connection
* is established as a child socket inherits everything
@@ -808,10 +747,6 @@ static void svc_tcp_listen_data_ready(struct sock *sk)
} else
printk("svc: socket %p: no user data\n", sk);
}
-
- wq = sk_sleep(sk);
- if (sunrpc_waitqueue_active(wq))
- wake_up_interruptible_all(wq);
}
/*
@@ -820,7 +755,6 @@ static void svc_tcp_listen_data_ready(struct sock *sk)
static void svc_tcp_state_change(struct sock *sk)
{
struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data;
- wait_queue_head_t *wq = sk_sleep(sk);
dprintk("svc: socket %p TCP (connected) state change %d (svsk %p)\n",
sk, sk->sk_state, sk->sk_user_data);
@@ -828,26 +762,12 @@ static void svc_tcp_state_change(struct sock *sk)
if (!svsk)
printk("svc: socket %p: no user data\n", sk);
else {
- set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
- svc_xprt_enqueue(&svsk->sk_xprt);
- }
- if (sunrpc_waitqueue_active(wq))
- wake_up_interruptible_all(wq);
-}
-
-static void svc_tcp_data_ready(struct sock *sk)
-{
- struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data;
- wait_queue_head_t *wq = sk_sleep(sk);
-
- dprintk("svc: socket %p TCP data ready (svsk %p)\n",
- sk, sk->sk_user_data);
- if (svsk) {
- set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
- svc_xprt_enqueue(&svsk->sk_xprt);
+ svsk->sk_ostate(sk);
+ if (sk->sk_state != TCP_ESTABLISHED) {
+ set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
+ svc_xprt_enqueue(&svsk->sk_xprt);
+ }
}
- if (sunrpc_waitqueue_active(wq))
- wake_up_interruptible(wq);
}
/*
@@ -901,6 +821,11 @@ static struct svc_xprt *svc_tcp_accept(struct svc_xprt *xprt)
dprintk("%s: connect from %s\n", serv->sv_name,
__svc_print_addr(sin, buf, sizeof(buf)));
+ /* Reset the inherited callbacks before calling svc_setup_socket */
+ newsock->sk->sk_state_change = svsk->sk_ostate;
+ newsock->sk->sk_data_ready = svsk->sk_odata;
+ newsock->sk->sk_write_space = svsk->sk_owspace;
+
/* make sure that a write doesn't block forever when
* low on memory
*/
@@ -1317,7 +1242,6 @@ static struct svc_xprt_ops svc_tcp_ops = {
.xpo_has_wspace = svc_tcp_has_wspace,
.xpo_accept = svc_tcp_accept,
.xpo_secure_port = svc_sock_secure_port,
- .xpo_adjust_wspace = svc_tcp_adjust_wspace,
};
static struct svc_xprt_class svc_tcp_class = {
@@ -1357,8 +1281,8 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv)
} else {
dprintk("setting up TCP socket for reading\n");
sk->sk_state_change = svc_tcp_state_change;
- sk->sk_data_ready = svc_tcp_data_ready;
- sk->sk_write_space = svc_tcp_write_space;
+ sk->sk_data_ready = svc_data_ready;
+ sk->sk_write_space = svc_write_space;
svsk->sk_reclen = 0;
svsk->sk_tcplen = 0;
@@ -1368,8 +1292,13 @@ static void svc_tcp_init(struct svc_sock *svsk, struct svc_serv *serv)
tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF;
set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
- if (sk->sk_state != TCP_ESTABLISHED)
+ switch (sk->sk_state) {
+ case TCP_SYN_RECV:
+ case TCP_ESTABLISHED:
+ break;
+ default:
set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
+ }
}
}
@@ -1428,17 +1357,14 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
/* Initialize the socket */
if (sock->type == SOCK_DGRAM)
svc_udp_init(svsk, serv);
- else {
- /* initialise setting must have enough space to
- * receive and respond to one request.
- */
- svc_sock_setbufsize(svsk->sk_sock, 4 * serv->sv_max_mesg,
- 4 * serv->sv_max_mesg);
+ else
svc_tcp_init(svsk, serv);
- }
- dprintk("svc: svc_setup_socket created %p (inet %p)\n",
- svsk, svsk->sk_sk);
+ dprintk("svc: svc_setup_socket created %p (inet %p), "
+ "listen %d close %d\n",
+ svsk, svsk->sk_sk,
+ test_bit(XPT_LISTENER, &svsk->sk_xprt.xpt_flags),
+ test_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags));
return svsk;
}
@@ -1606,18 +1532,16 @@ static void svc_sock_detach(struct svc_xprt *xprt)
{
struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
struct sock *sk = svsk->sk_sk;
- wait_queue_head_t *wq;
dprintk("svc: svc_sock_detach(%p)\n", svsk);
/* put back the old socket callbacks */
+ lock_sock(sk);
sk->sk_state_change = svsk->sk_ostate;
sk->sk_data_ready = svsk->sk_odata;
sk->sk_write_space = svsk->sk_owspace;
-
- wq = sk_sleep(sk);
- if (sunrpc_waitqueue_active(wq))
- wake_up_interruptible(wq);
+ sk->sk_user_data = NULL;
+ release_sock(sk);
}
/*