diff options
author | Eric Dumazet <edumazet@google.com> | 2015-03-19 19:04:20 -0700 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2015-03-20 12:40:25 -0400 |
commit | fa76ce7328b289b6edd476e24eb52fd634261720 (patch) | |
tree | 2e4c116a4e299700c185d73018bbb3518e46e1bb /net/ipv6 | |
parent | 52452c542559ac980b48dbf22a30ee7fa0af507c (diff) | |
download | linux-fa76ce7328b289b6edd476e24eb52fd634261720.tar.bz2 |
inet: get rid of central tcp/dccp listener timer
One of the major issue for TCP is the SYNACK rtx handling,
done by inet_csk_reqsk_queue_prune(), fired by the keepalive
timer of a TCP_LISTEN socket.
This function runs for awful long times, with socket lock held,
meaning that other cpus needing this lock have to spin for hundred of ms.
SYNACK are sent in huge bursts, likely to cause severe drops anyway.
This model was OK 15 years ago when memory was very tight.
We now can afford to have a timer per request sock.
Timer invocations no longer need to lock the listener,
and can be run from all cpus in parallel.
With following patch increasing somaxconn width to 32 bits,
I tested a listener with more than 4 million active request sockets,
and a steady SYNFLOOD of ~200,000 SYN per second.
Host was sending ~830,000 SYNACK per second.
This is ~100 times more what we could achieve before this patch.
Later, we will get rid of the listener hash and use ehash instead.
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv6')
-rw-r--r-- | net/ipv6/inet6_connection_sock.c | 19 | ||||
-rw-r--r-- | net/ipv6/syncookies.c | 1 | ||||
-rw-r--r-- | net/ipv6/tcp_ipv6.c | 12 |
3 files changed, 18 insertions, 14 deletions
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index b7acb9ebc4f5..2f3bbe569e8f 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -112,21 +112,20 @@ static u32 inet6_synq_hash(const struct in6_addr *raddr, const __be16 rport, return c & (synq_hsize - 1); } -struct request_sock *inet6_csk_search_req(const struct sock *sk, +struct request_sock *inet6_csk_search_req(struct sock *sk, const __be16 rport, const struct in6_addr *raddr, const struct in6_addr *laddr, const int iif) { - const struct inet_connection_sock *icsk = inet_csk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; struct request_sock *req; + u32 hash = inet6_synq_hash(raddr, rport, lopt->hash_rnd, + lopt->nr_table_entries); - for (req = lopt->syn_table[inet6_synq_hash(raddr, rport, - lopt->hash_rnd, - lopt->nr_table_entries)]; - req != NULL; - req = req->dl_next) { + write_lock(&icsk->icsk_accept_queue.syn_wait_lock); + for (req = lopt->syn_table[hash]; req != NULL; req = req->dl_next) { const struct inet_request_sock *ireq = inet_rsk(req); if (ireq->ir_rmt_port == rport && @@ -134,12 +133,14 @@ struct request_sock *inet6_csk_search_req(const struct sock *sk, ipv6_addr_equal(&ireq->ir_v6_rmt_addr, raddr) && ipv6_addr_equal(&ireq->ir_v6_loc_addr, laddr) && (!ireq->ir_iif || ireq->ir_iif == iif)) { + atomic_inc(&req->rsk_refcnt); WARN_ON(req->sk != NULL); - return req; + break; } } + write_unlock(&icsk->icsk_accept_queue.syn_wait_lock); - return NULL; + return req; } EXPORT_SYMBOL_GPL(inet6_csk_search_req); diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index da5823e5e5a7..2819137fc87d 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -222,7 +222,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) ireq->ir_mark = inet_request_mark(sk, skb); - req->expires = 0UL; req->num_retrans = 0; ireq->snd_wscale = tcp_opt.snd_wscale; ireq->sack_ok = tcp_opt.sack_ok; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 146f123b52c9..6e3f90db038c 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -421,11 +421,13 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (seq != tcp_rsk(req)->snt_isn) { NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); + reqsk_put(req); goto out; } inet_csk_reqsk_queue_drop(sk, req); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); + reqsk_put(req); goto out; case TCP_SYN_SENT: @@ -988,9 +990,11 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk, struct sk_buff *skb) req = inet6_csk_search_req(sk, th->source, &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, tcp_v6_iif(skb)); - if (req) - return tcp_check_req(sk, skb, req, false); - + if (req) { + nsk = tcp_check_req(sk, skb, req, false); + reqsk_put(req); + return nsk; + } nsk = __inet6_lookup_established(sock_net(sk), &tcp_hashinfo, &ipv6_hdr(skb)->saddr, th->source, &ipv6_hdr(skb)->daddr, ntohs(th->dest), @@ -1670,7 +1674,7 @@ static void tcp_v6_destroy_sock(struct sock *sk) static void get_openreq6(struct seq_file *seq, struct request_sock *req, int i, kuid_t uid) { - int ttd = req->expires - jiffies; + long ttd = req->rsk_timer.expires - jiffies; const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr; const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr; |