diff options
-rw-r--r-- | include/linux/tcp.h | 5 | ||||
-rw-r--r-- | include/net/tcp.h | 15 | ||||
-rw-r--r-- | net/ipv4/tcp_fastopen.c | 219 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 277 | ||||
-rw-r--r-- | net/ipv4/tcp_minisocks.c | 31 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 23 | ||||
-rw-r--r-- | net/ipv6/tcp_ipv6.c | 62 |
7 files changed, 323 insertions, 309 deletions
diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 4e37c71ecd74..bc35e4709e8e 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -366,11 +366,6 @@ static inline bool tcp_passive_fastopen(const struct sock *sk) tcp_sk(sk)->fastopen_rsk != NULL); } -static inline bool fastopen_cookie_present(struct tcp_fastopen_cookie *foc) -{ - return foc->len != -1; -} - extern void tcp_sock_destruct(struct sock *sk); static inline int fastopen_init_queue(struct sock *sk, int backlog) diff --git a/include/net/tcp.h b/include/net/tcp.h index 3c9418456640..f5d6ca4a9d28 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -220,8 +220,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); #define TFO_SERVER_ENABLE 2 #define TFO_CLIENT_NO_COOKIE 4 /* Data in SYN w/o cookie option */ -/* Process SYN data but skip cookie validation */ -#define TFO_SERVER_COOKIE_NOT_CHKED 0x100 /* Accept SYN data w/o any cookie option */ #define TFO_SERVER_COOKIE_NOT_REQD 0x200 @@ -230,10 +228,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo); */ #define TFO_SERVER_WO_SOCKOPT1 0x400 #define TFO_SERVER_WO_SOCKOPT2 0x800 -/* Always create TFO child sockets on a TFO listener even when - * cookie/data not present. (For testing purpose!) - */ -#define TFO_SERVER_ALWAYS 0x1000 extern struct inet_timewait_death_row tcp_death_row; @@ -1120,6 +1114,9 @@ static inline void tcp_openreq_init(struct request_sock *req, ireq->ir_num = ntohs(tcp_hdr(skb)->dest); } +extern void tcp_openreq_init_rwin(struct request_sock *req, + struct sock *sk, struct dst_entry *dst); + void tcp_enter_memory_pressure(struct sock *sk); static inline int keepalive_intvl_when(const struct tcp_sock *tp) @@ -1329,8 +1326,10 @@ void tcp_free_fastopen_req(struct tcp_sock *tp); extern struct tcp_fastopen_context __rcu *tcp_fastopen_ctx; int tcp_fastopen_reset_cipher(void *key, unsigned int len); -void tcp_fastopen_cookie_gen(__be32 src, __be32 dst, - struct tcp_fastopen_cookie *foc); +bool tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, + struct request_sock *req, + struct tcp_fastopen_cookie *foc, + struct dst_entry *dst); void tcp_fastopen_init_key_once(bool publish); #define TCP_FASTOPEN_KEY_LENGTH 16 diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index f195d9316e55..62e48cf84e60 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -72,25 +72,224 @@ error: kfree(ctx); return err; } -/* Computes the fastopen cookie for the IP path. - * The path is a 128 bits long (pad with zeros for IPv4). - * - * The caller must check foc->len to determine if a valid cookie - * has been generated successfully. -*/ -void tcp_fastopen_cookie_gen(__be32 src, __be32 dst, - struct tcp_fastopen_cookie *foc) +static bool __tcp_fastopen_cookie_gen(const void *path, + struct tcp_fastopen_cookie *foc) { - __be32 path[4] = { src, dst, 0, 0 }; struct tcp_fastopen_context *ctx; + bool ok = false; tcp_fastopen_init_key_once(true); rcu_read_lock(); ctx = rcu_dereference(tcp_fastopen_ctx); if (ctx) { - crypto_cipher_encrypt_one(ctx->tfm, foc->val, (__u8 *)path); + crypto_cipher_encrypt_one(ctx->tfm, foc->val, path); foc->len = TCP_FASTOPEN_COOKIE_SIZE; + ok = true; } rcu_read_unlock(); + return ok; +} + +/* Generate the fastopen cookie by doing aes128 encryption on both + * the source and destination addresses. Pad 0s for IPv4 or IPv4-mapped-IPv6 + * addresses. For the longer IPv6 addresses use CBC-MAC. + * + * XXX (TFO) - refactor when TCP_FASTOPEN_COOKIE_SIZE != AES_BLOCK_SIZE. + */ +static bool tcp_fastopen_cookie_gen(struct request_sock *req, + struct sk_buff *syn, + struct tcp_fastopen_cookie *foc) +{ + if (req->rsk_ops->family == AF_INET) { + const struct iphdr *iph = ip_hdr(syn); + + __be32 path[4] = { iph->saddr, iph->daddr, 0, 0 }; + return __tcp_fastopen_cookie_gen(path, foc); + } + +#if IS_ENABLED(CONFIG_IPV6) + if (req->rsk_ops->family == AF_INET6) { + const struct ipv6hdr *ip6h = ipv6_hdr(syn); + struct tcp_fastopen_cookie tmp; + + if (__tcp_fastopen_cookie_gen(&ip6h->saddr, &tmp)) { + struct in6_addr *buf = (struct in6_addr *) tmp.val; + int i = 4; + + for (i = 0; i < 4; i++) + buf->s6_addr32[i] ^= ip6h->daddr.s6_addr32[i]; + return __tcp_fastopen_cookie_gen(buf, foc); + } + } +#endif + return false; +} + +static bool tcp_fastopen_create_child(struct sock *sk, + struct sk_buff *skb, + struct dst_entry *dst, + struct request_sock *req) +{ + struct tcp_sock *tp = tcp_sk(sk); + struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; + struct sock *child; + + req->num_retrans = 0; + req->num_timeout = 0; + req->sk = NULL; + + child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL); + if (child == NULL) + return false; + + spin_lock(&queue->fastopenq->lock); + queue->fastopenq->qlen++; + spin_unlock(&queue->fastopenq->lock); + + /* Initialize the child socket. Have to fix some values to take + * into account the child is a Fast Open socket and is created + * only out of the bits carried in the SYN packet. + */ + tp = tcp_sk(child); + + tp->fastopen_rsk = req; + /* Do a hold on the listner sk so that if the listener is being + * closed, the child that has been accepted can live on and still + * access listen_lock. + */ + sock_hold(sk); + tcp_rsk(req)->listener = sk; + + /* RFC1323: The window in SYN & SYN/ACK segments is never + * scaled. So correct it appropriately. + */ + tp->snd_wnd = ntohs(tcp_hdr(skb)->window); + + /* Activate the retrans timer so that SYNACK can be retransmitted. + * The request socket is not added to the SYN table of the parent + * because it's been added to the accept queue directly. + */ + inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS, + TCP_TIMEOUT_INIT, TCP_RTO_MAX); + + /* Add the child socket directly into the accept queue */ + inet_csk_reqsk_queue_add(sk, req, child); + + /* Now finish processing the fastopen child socket. */ + inet_csk(child)->icsk_af_ops->rebuild_header(child); + tcp_init_congestion_control(child); + tcp_mtup_init(child); + tcp_init_metrics(child); + tcp_init_buffer_space(child); + + /* Queue the data carried in the SYN packet. We need to first + * bump skb's refcnt because the caller will attempt to free it. + * + * XXX (TFO) - we honor a zero-payload TFO request for now, + * (any reason not to?) but no need to queue the skb since + * there is no data. How about SYN+FIN? + */ + if (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1) { + skb = skb_get(skb); + skb_dst_drop(skb); + __skb_pull(skb, tcp_hdr(skb)->doff * 4); + skb_set_owner_r(skb, child); + __skb_queue_tail(&child->sk_receive_queue, skb); + tp->syn_data_acked = 1; + } + tcp_rsk(req)->rcv_nxt = tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; + sk->sk_data_ready(sk); + bh_unlock_sock(child); + sock_put(child); + WARN_ON(req->sk == NULL); + return true; +} +EXPORT_SYMBOL(tcp_fastopen_create_child); + +static bool tcp_fastopen_queue_check(struct sock *sk) +{ + struct fastopen_queue *fastopenq; + + /* Make sure the listener has enabled fastopen, and we don't + * exceed the max # of pending TFO requests allowed before trying + * to validating the cookie in order to avoid burning CPU cycles + * unnecessarily. + * + * XXX (TFO) - The implication of checking the max_qlen before + * processing a cookie request is that clients can't differentiate + * between qlen overflow causing Fast Open to be disabled + * temporarily vs a server not supporting Fast Open at all. + */ + fastopenq = inet_csk(sk)->icsk_accept_queue.fastopenq; + if (fastopenq == NULL || fastopenq->max_qlen == 0) + return false; + + if (fastopenq->qlen >= fastopenq->max_qlen) { + struct request_sock *req1; + spin_lock(&fastopenq->lock); + req1 = fastopenq->rskq_rst_head; + if ((req1 == NULL) || time_after(req1->expires, jiffies)) { + spin_unlock(&fastopenq->lock); + NET_INC_STATS_BH(sock_net(sk), + LINUX_MIB_TCPFASTOPENLISTENOVERFLOW); + return false; + } + fastopenq->rskq_rst_head = req1->dl_next; + fastopenq->qlen--; + spin_unlock(&fastopenq->lock); + reqsk_free(req1); + } + return true; +} + +/* Returns true if we should perform Fast Open on the SYN. The cookie (foc) + * may be updated and return the client in the SYN-ACK later. E.g., Fast Open + * cookie request (foc->len == 0). + */ +bool tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, + struct request_sock *req, + struct tcp_fastopen_cookie *foc, + struct dst_entry *dst) +{ + struct tcp_fastopen_cookie valid_foc = { .len = -1 }; + bool syn_data = TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1; + + if (!((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) && + (syn_data || foc->len >= 0) && + tcp_fastopen_queue_check(sk))) { + foc->len = -1; + return false; + } + + if (syn_data && (sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD)) + goto fastopen; + + if (tcp_fastopen_cookie_gen(req, skb, &valid_foc) && + foc->len == TCP_FASTOPEN_COOKIE_SIZE && + foc->len == valid_foc.len && + !memcmp(foc->val, valid_foc.val, foc->len)) { + /* Cookie is valid. Create a (full) child socket to accept + * the data in SYN before returning a SYN-ACK to ack the + * data. If we fail to create the socket, fall back and + * ack the ISN only but includes the same cookie. + * + * Note: Data-less SYN with valid cookie is allowed to send + * data in SYN_RECV state. + */ +fastopen: + if (tcp_fastopen_create_child(sk, skb, dst, req)) { + foc->len = -1; + NET_INC_STATS_BH(sock_net(sk), + LINUX_MIB_TCPFASTOPENPASSIVE); + return true; + } + } + + NET_INC_STATS_BH(sock_net(sk), foc->len ? + LINUX_MIB_TCPFASTOPENPASSIVEFAIL : + LINUX_MIB_TCPFASTOPENCOOKIEREQD); + *foc = valid_foc; + return false; } +EXPORT_SYMBOL(tcp_try_fastopen); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index ad166dcc278f..a2780e5334c9 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -336,8 +336,8 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) const int code = icmp_hdr(icmp_skb)->code; struct sock *sk; struct sk_buff *skb; - struct request_sock *req; - __u32 seq; + struct request_sock *fastopen; + __u32 seq, snd_una; __u32 remaining; int err; struct net *net = dev_net(icmp_skb->dev); @@ -378,12 +378,12 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) icsk = inet_csk(sk); tp = tcp_sk(sk); - req = tp->fastopen_rsk; seq = ntohl(th->seq); + /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */ + fastopen = tp->fastopen_rsk; + snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una; if (sk->sk_state != TCP_LISTEN && - !between(seq, tp->snd_una, tp->snd_nxt) && - (req == NULL || seq != tcp_rsk(req)->snt_isn)) { - /* For a Fast Open socket, allow seq to be snt_isn. */ + !between(seq, snd_una, tp->snd_nxt)) { NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); goto out; } @@ -426,11 +426,9 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH) break; if (seq != tp->snd_una || !icsk->icsk_retransmits || - !icsk->icsk_backoff) + !icsk->icsk_backoff || fastopen) break; - /* XXX (TFO) - revisit the following logic for TFO */ - if (sock_owned_by_user(sk)) break; @@ -462,14 +460,6 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) goto out; } - /* XXX (TFO) - if it's a TFO socket and has been accepted, rather - * than following the TCP_SYN_RECV case and closing the socket, - * we ignore the ICMP error and keep trying like a fully established - * socket. Is this the right thing to do? - */ - if (req && req->sk == NULL) - goto out; - switch (sk->sk_state) { struct request_sock *req, **prev; case TCP_LISTEN: @@ -502,10 +492,13 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) goto out; case TCP_SYN_SENT: - case TCP_SYN_RECV: /* Cannot happen. - It can f.e. if SYNs crossed, - or Fast Open. - */ + case TCP_SYN_RECV: + /* Only in fast or simultaneous open. If a fast open socket is + * is already accepted it is treated as a connected one below. + */ + if (fastopen && fastopen->sk == NULL) + break; + if (!sock_owned_by_user(sk)) { sk->sk_err = err; @@ -822,7 +815,8 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, */ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, struct request_sock *req, - u16 queue_mapping) + u16 queue_mapping, + struct tcp_fastopen_cookie *foc) { const struct inet_request_sock *ireq = inet_rsk(req); struct flowi4 fl4; @@ -833,7 +827,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL) return -1; - skb = tcp_make_synack(sk, dst, req, NULL); + skb = tcp_make_synack(sk, dst, req, foc); if (skb) { __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr); @@ -852,7 +846,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req) { - int res = tcp_v4_send_synack(sk, NULL, req, 0); + int res = tcp_v4_send_synack(sk, NULL, req, 0, NULL); if (!res) { TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); @@ -1260,187 +1254,6 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv4_ops = { }; #endif -static bool tcp_fastopen_check(struct sock *sk, struct sk_buff *skb, - struct request_sock *req, - struct tcp_fastopen_cookie *foc, - struct tcp_fastopen_cookie *valid_foc) -{ - bool skip_cookie = false; - struct fastopen_queue *fastopenq; - - if (likely(!fastopen_cookie_present(foc))) { - /* See include/net/tcp.h for the meaning of these knobs */ - if ((sysctl_tcp_fastopen & TFO_SERVER_ALWAYS) || - ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD) && - (TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1))) - skip_cookie = true; /* no cookie to validate */ - else - return false; - } - fastopenq = inet_csk(sk)->icsk_accept_queue.fastopenq; - /* A FO option is present; bump the counter. */ - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPFASTOPENPASSIVE); - - /* Make sure the listener has enabled fastopen, and we don't - * exceed the max # of pending TFO requests allowed before trying - * to validating the cookie in order to avoid burning CPU cycles - * unnecessarily. - * - * XXX (TFO) - The implication of checking the max_qlen before - * processing a cookie request is that clients can't differentiate - * between qlen overflow causing Fast Open to be disabled - * temporarily vs a server not supporting Fast Open at all. - */ - if ((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) == 0 || - fastopenq == NULL || fastopenq->max_qlen == 0) - return false; - - if (fastopenq->qlen >= fastopenq->max_qlen) { - struct request_sock *req1; - spin_lock(&fastopenq->lock); - req1 = fastopenq->rskq_rst_head; - if ((req1 == NULL) || time_after(req1->expires, jiffies)) { - spin_unlock(&fastopenq->lock); - NET_INC_STATS_BH(sock_net(sk), - LINUX_MIB_TCPFASTOPENLISTENOVERFLOW); - /* Avoid bumping LINUX_MIB_TCPFASTOPENPASSIVEFAIL*/ - foc->len = -1; - return false; - } - fastopenq->rskq_rst_head = req1->dl_next; - fastopenq->qlen--; - spin_unlock(&fastopenq->lock); - reqsk_free(req1); - } - if (skip_cookie) { - tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq; - return true; - } - - if (foc->len == TCP_FASTOPEN_COOKIE_SIZE) { - if ((sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_CHKED) == 0) { - tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, - ip_hdr(skb)->daddr, valid_foc); - if ((valid_foc->len != TCP_FASTOPEN_COOKIE_SIZE) || - memcmp(&foc->val[0], &valid_foc->val[0], - TCP_FASTOPEN_COOKIE_SIZE) != 0) - return false; - valid_foc->len = -1; - } - /* Acknowledge the data received from the peer. */ - tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->end_seq; - return true; - } else if (foc->len == 0) { /* Client requesting a cookie */ - tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, - ip_hdr(skb)->daddr, valid_foc); - NET_INC_STATS_BH(sock_net(sk), - LINUX_MIB_TCPFASTOPENCOOKIEREQD); - } else { - /* Client sent a cookie with wrong size. Treat it - * the same as invalid and return a valid one. - */ - tcp_fastopen_cookie_gen(ip_hdr(skb)->saddr, - ip_hdr(skb)->daddr, valid_foc); - } - return false; -} - -static int tcp_v4_conn_req_fastopen(struct sock *sk, - struct sk_buff *skb, - struct sk_buff *skb_synack, - struct request_sock *req) -{ - struct tcp_sock *tp = tcp_sk(sk); - struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; - const struct inet_request_sock *ireq = inet_rsk(req); - struct sock *child; - int err; - - req->num_retrans = 0; - req->num_timeout = 0; - req->sk = NULL; - - child = inet_csk(sk)->icsk_af_ops->syn_recv_sock(sk, skb, req, NULL); - if (child == NULL) { - NET_INC_STATS_BH(sock_net(sk), - LINUX_MIB_TCPFASTOPENPASSIVEFAIL); - kfree_skb(skb_synack); - return -1; - } - err = ip_build_and_send_pkt(skb_synack, sk, ireq->ir_loc_addr, - ireq->ir_rmt_addr, ireq->opt); - err = net_xmit_eval(err); - if (!err) - tcp_rsk(req)->snt_synack = tcp_time_stamp; - /* XXX (TFO) - is it ok to ignore error and continue? */ - - spin_lock(&queue->fastopenq->lock); - queue->fastopenq->qlen++; - spin_unlock(&queue->fastopenq->lock); - - /* Initialize the child socket. Have to fix some values to take - * into account the child is a Fast Open socket and is created - * only out of the bits carried in the SYN packet. - */ - tp = tcp_sk(child); - - tp->fastopen_rsk = req; - /* Do a hold on the listner sk so that if the listener is being - * closed, the child that has been accepted can live on and still - * access listen_lock. - */ - sock_hold(sk); - tcp_rsk(req)->listener = sk; - - /* RFC1323: The window in SYN & SYN/ACK segments is never - * scaled. So correct it appropriately. - */ - tp->snd_wnd = ntohs(tcp_hdr(skb)->window); - - /* Activate the retrans timer so that SYNACK can be retransmitted. - * The request socket is not added to the SYN table of the parent - * because it's been added to the accept queue directly. - */ - inet_csk_reset_xmit_timer(child, ICSK_TIME_RETRANS, - TCP_TIMEOUT_INIT, TCP_RTO_MAX); - - /* Add the child socket directly into the accept queue */ - inet_csk_reqsk_queue_add(sk, req, child); - - /* Now finish processing the fastopen child socket. */ - inet_csk(child)->icsk_af_ops->rebuild_header(child); - tcp_init_congestion_control(child); - tcp_mtup_init(child); - tcp_init_metrics(child); - tcp_init_buffer_space(child); - - /* Queue the data carried in the SYN packet. We need to first - * bump skb's refcnt because the caller will attempt to free it. - * - * XXX (TFO) - we honor a zero-payload TFO request for now. - * (Any reason not to?) - */ - if (TCP_SKB_CB(skb)->end_seq == TCP_SKB_CB(skb)->seq + 1) { - /* Don't queue the skb if there is no payload in SYN. - * XXX (TFO) - How about SYN+FIN? - */ - tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; - } else { - skb = skb_get(skb); - skb_dst_drop(skb); - __skb_pull(skb, tcp_hdr(skb)->doff * 4); - skb_set_owner_r(skb, child); - __skb_queue_tail(&child->sk_receive_queue, skb); - tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq; - tp->syn_data_acked = 1; - } - sk->sk_data_ready(sk); - bh_unlock_sock(child); - sock_put(child); - WARN_ON(req->sk == NULL); - return 0; -} - int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) { struct tcp_options_received tmp_opt; @@ -1451,12 +1264,10 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) __be32 saddr = ip_hdr(skb)->saddr; __be32 daddr = ip_hdr(skb)->daddr; __u32 isn = TCP_SKB_CB(skb)->when; - bool want_cookie = false; + bool want_cookie = false, fastopen; struct flowi4 fl4; struct tcp_fastopen_cookie foc = { .len = -1 }; - struct tcp_fastopen_cookie valid_foc = { .len = -1 }; - struct sk_buff *skb_synack; - int do_fastopen; + int err; /* Never answer to SYNs send to broadcast or multicast */ if (skb_rtable(skb)->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) @@ -1555,52 +1366,24 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) isn = tcp_v4_init_sequence(skb); } - tcp_rsk(req)->snt_isn = isn; - - if (dst == NULL) { - dst = inet_csk_route_req(sk, &fl4, req); - if (dst == NULL) - goto drop_and_free; - } - do_fastopen = tcp_fastopen_check(sk, skb, req, &foc, &valid_foc); - - /* We don't call tcp_v4_send_synack() directly because we need - * to make sure a child socket can be created successfully before - * sending back synack! - * - * XXX (TFO) - Ideally one would simply call tcp_v4_send_synack() - * (or better yet, call tcp_send_synack() in the child context - * directly, but will have to fix bunch of other code first) - * after syn_recv_sock() except one will need to first fix the - * latter to remove its dependency on the current implementation - * of tcp_v4_send_synack()->tcp_select_initial_window(). - */ - skb_synack = tcp_make_synack(sk, dst, req, - fastopen_cookie_present(&valid_foc) ? &valid_foc : NULL); - - if (skb_synack) { - __tcp_v4_send_check(skb_synack, ireq->ir_loc_addr, ireq->ir_rmt_addr); - skb_set_queue_mapping(skb_synack, skb_get_queue_mapping(skb)); - } else + if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL) goto drop_and_free; - if (likely(!do_fastopen)) { - int err; - err = ip_build_and_send_pkt(skb_synack, sk, ireq->ir_loc_addr, - ireq->ir_rmt_addr, ireq->opt); - err = net_xmit_eval(err); + tcp_rsk(req)->snt_isn = isn; + tcp_rsk(req)->snt_synack = tcp_time_stamp; + tcp_openreq_init_rwin(req, sk, dst); + fastopen = !want_cookie && + tcp_try_fastopen(sk, skb, req, &foc, dst); + err = tcp_v4_send_synack(sk, dst, req, + skb_get_queue_mapping(skb), &foc); + if (!fastopen) { if (err || want_cookie) goto drop_and_free; tcp_rsk(req)->snt_synack = tcp_time_stamp; tcp_rsk(req)->listener = NULL; - /* Add the request_sock to the SYN table */ inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); - if (fastopen_cookie_present(&foc) && foc.len != 0) - NET_INC_STATS_BH(sock_net(sk), - LINUX_MIB_TCPFASTOPENPASSIVEFAIL); - } else if (tcp_v4_conn_req_fastopen(sk, skb, skb_synack, req)) - goto drop_and_free; + } return 0; diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 05c1b155251d..e68e0d4af6c9 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -362,6 +362,37 @@ void tcp_twsk_destructor(struct sock *sk) } EXPORT_SYMBOL_GPL(tcp_twsk_destructor); +void tcp_openreq_init_rwin(struct request_sock *req, + struct sock *sk, struct dst_entry *dst) +{ + struct inet_request_sock *ireq = inet_rsk(req); + struct tcp_sock *tp = tcp_sk(sk); + __u8 rcv_wscale; + int mss = dst_metric_advmss(dst); + + if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss) + mss = tp->rx_opt.user_mss; + + /* Set this up on the first call only */ + req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW); + + /* limit the window selection if the user enforce a smaller rx buffer */ + if (sk->sk_userlocks & SOCK_RCVBUF_LOCK && + (req->window_clamp > tcp_full_space(sk) || req->window_clamp == 0)) + req->window_clamp = tcp_full_space(sk); + + /* tcp_full_space because it is guaranteed to be the first packet */ + tcp_select_initial_window(tcp_full_space(sk), + mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0), + &req->rcv_wnd, + &req->window_clamp, + ireq->wscale_ok, + &rcv_wscale, + dst_metric(dst, RTAX_INITRWND)); + ireq->rcv_wscale = rcv_wscale; +} +EXPORT_SYMBOL(tcp_openreq_init_rwin); + static inline void TCP_ECN_openreq_child(struct tcp_sock *tp, struct request_sock *req) { diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 694711a140d4..3d61c52bdf79 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -627,7 +627,7 @@ static unsigned int tcp_synack_options(struct sock *sk, if (unlikely(!ireq->tstamp_ok)) remaining -= TCPOLEN_SACKPERM_ALIGNED; } - if (foc != NULL) { + if (foc != NULL && foc->len >= 0) { u32 need = TCPOLEN_EXP_FASTOPEN_BASE + foc->len; need = (need + 3) & ~3U; /* Align to 32 bits */ if (remaining >= need) { @@ -2803,27 +2803,6 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, if (tp->rx_opt.user_mss && tp->rx_opt.user_mss < mss) mss = tp->rx_opt.user_mss; - if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */ - __u8 rcv_wscale; - /* Set this up on the first call only */ - req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW); - - /* limit the window selection if the user enforce a smaller rx buffer */ - if (sk->sk_userlocks & SOCK_RCVBUF_LOCK && - (req->window_clamp > tcp_full_space(sk) || req->window_clamp == 0)) - req->window_clamp = tcp_full_space(sk); - - /* tcp_full_space because it is guaranteed to be the first packet */ - tcp_select_initial_window(tcp_full_space(sk), - mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0), - &req->rcv_wnd, - &req->window_clamp, - ireq->wscale_ok, - &rcv_wscale, - dst_metric(dst, RTAX_INITRWND)); - ireq->rcv_wscale = rcv_wscale; - } - memset(&opts, 0, sizeof(opts)); #ifdef CONFIG_SYN_COOKIES if (unlikely(req->cookie_ts)) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 7fa67439f4d6..3a267bf14f2f 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -340,7 +340,8 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct sock *sk; int err; struct tcp_sock *tp; - __u32 seq; + struct request_sock *fastopen; + __u32 seq, snd_una; struct net *net = dev_net(skb->dev); sk = inet6_lookup(net, &tcp_hashinfo, &hdr->daddr, @@ -371,8 +372,11 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, tp = tcp_sk(sk); seq = ntohl(th->seq); + /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */ + fastopen = tp->fastopen_rsk; + snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una; if (sk->sk_state != TCP_LISTEN && - !between(seq, tp->snd_una, tp->snd_nxt)) { + !between(seq, snd_una, tp->snd_nxt)) { NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); goto out; } @@ -436,8 +440,13 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, goto out; case TCP_SYN_SENT: - case TCP_SYN_RECV: /* Cannot happen. - It can, it SYNs are crossed. --ANK */ + case TCP_SYN_RECV: + /* Only in fast or simultaneous open. If a fast open socket is + * is already accepted it is treated as a connected one below. + */ + if (fastopen && fastopen->sk == NULL) + break; + if (!sock_owned_by_user(sk)) { sk->sk_err = err; sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */ @@ -463,7 +472,8 @@ out: static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst, struct flowi6 *fl6, struct request_sock *req, - u16 queue_mapping) + u16 queue_mapping, + struct tcp_fastopen_cookie *foc) { struct inet_request_sock *ireq = inet_rsk(req); struct ipv6_pinfo *np = inet6_sk(sk); @@ -474,7 +484,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst, if (!dst && (dst = inet6_csk_route_req(sk, fl6, req)) == NULL) goto done; - skb = tcp_make_synack(sk, dst, req, NULL); + skb = tcp_make_synack(sk, dst, req, foc); if (skb) { __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr, @@ -498,7 +508,7 @@ static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req) struct flowi6 fl6; int res; - res = tcp_v6_send_synack(sk, NULL, &fl6, req, 0); + res = tcp_v6_send_synack(sk, NULL, &fl6, req, 0, NULL); if (!res) { TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS); NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); @@ -917,7 +927,12 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, struct request_sock *req) { - tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, + /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV + * sk->sk_state == TCP_SYN_RECV -> for Fast Open. + */ + tcp_v6_send_ack(skb, (sk->sk_state == TCP_LISTEN) ? + tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt, + tcp_rsk(req)->rcv_nxt, req->rcv_wnd, tcp_time_stamp, req->ts_recent, sk->sk_bound_dev_if, tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr), 0, 0); @@ -969,8 +984,10 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) struct tcp_sock *tp = tcp_sk(sk); __u32 isn = TCP_SKB_CB(skb)->when; struct dst_entry *dst = NULL; + struct tcp_fastopen_cookie foc = { .len = -1 }; + bool want_cookie = false, fastopen; struct flowi6 fl6; - bool want_cookie = false; + int err; if (skb->protocol == htons(ETH_P_IP)) return tcp_v4_conn_request(sk, skb); @@ -1001,7 +1018,7 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) tcp_clear_options(&tmp_opt); tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr); tmp_opt.user_mss = tp->rx_opt.user_mss; - tcp_parse_options(skb, &tmp_opt, 0, NULL); + tcp_parse_options(skb, &tmp_opt, 0, want_cookie ? NULL : &foc); if (want_cookie && !tmp_opt.saw_tstamp) tcp_clear_options(&tmp_opt); @@ -1074,19 +1091,27 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) isn = tcp_v6_init_sequence(skb); } have_isn: - tcp_rsk(req)->snt_isn = isn; if (security_inet_conn_request(sk, skb, req)) goto drop_and_release; - if (tcp_v6_send_synack(sk, dst, &fl6, req, - skb_get_queue_mapping(skb)) || - want_cookie) + if (!dst && (dst = inet6_csk_route_req(sk, &fl6, req)) == NULL) goto drop_and_free; + tcp_rsk(req)->snt_isn = isn; tcp_rsk(req)->snt_synack = tcp_time_stamp; - tcp_rsk(req)->listener = NULL; - inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); + tcp_openreq_init_rwin(req, sk, dst); + fastopen = !want_cookie && + tcp_try_fastopen(sk, skb, req, &foc, dst); + err = tcp_v6_send_synack(sk, dst, &fl6, req, + skb_get_queue_mapping(skb), &foc); + if (!fastopen) { + if (err || want_cookie) + goto drop_and_free; + + tcp_rsk(req)->listener = NULL; + inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); + } return 0; drop_and_release: @@ -1760,6 +1785,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) const struct inet_sock *inet = inet_sk(sp); const struct tcp_sock *tp = tcp_sk(sp); const struct inet_connection_sock *icsk = inet_csk(sp); + struct fastopen_queue *fastopenq = icsk->icsk_accept_queue.fastopenq; dest = &sp->sk_v6_daddr; src = &sp->sk_v6_rcv_saddr; @@ -1802,7 +1828,9 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) jiffies_to_clock_t(icsk->icsk_ack.ato), (icsk->icsk_ack.quick << 1) | icsk->icsk_ack.pingpong, tp->snd_cwnd, - tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh + sp->sk_state == TCP_LISTEN ? + (fastopenq ? fastopenq->max_qlen : 0) : + (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh) ); } |