diff options
-rw-r--r-- | include/linux/tcp.h | 3 | ||||
-rw-r--r-- | include/net/inet_sock.h | 6 | ||||
-rw-r--r-- | include/net/tcp.h | 3 | ||||
-rw-r--r-- | include/uapi/linux/tcp.h | 1 | ||||
-rw-r--r-- | net/ipv4/af_inet.c | 31 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 35 | ||||
-rw-r--r-- | net/ipv4/tcp_fastopen.c | 54 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 7 | ||||
-rw-r--r-- | net/ipv4/tcp_output.c | 16 | ||||
-rw-r--r-- | net/ipv6/tcp_ipv6.c | 6 |
10 files changed, 136 insertions, 26 deletions
diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 5371b3d70cfe..f88f4649ba6f 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -222,7 +222,8 @@ struct tcp_sock { u32 chrono_stat[3]; /* Time in jiffies for chrono_stat stats */ u8 chrono_type:2, /* current chronograph type */ rate_app_limited:1, /* rate_{delivered,interval_us} limited? */ - unused:5; + fastopen_connect:1, /* FASTOPEN_CONNECT sockopt */ + unused:4; u8 nonagle : 4,/* Disable Nagle algorithm? */ thin_lto : 1,/* Use linear timeouts for thin streams */ unused1 : 1, diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index c9cff977a7fb..aa95053dfc78 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -206,7 +206,11 @@ struct inet_sock { transparent:1, mc_all:1, nodefrag:1; - __u8 bind_address_no_port:1; + __u8 bind_address_no_port:1, + defer_connect:1; /* Indicates that fastopen_connect is set + * and cookie exists so we defer connect + * until first data frame is written + */ __u8 rcv_tos; __u8 convert_csum; int uc_index; diff --git a/include/net/tcp.h b/include/net/tcp.h index c55d65f74f7f..6ec4ea652f3f 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1493,6 +1493,9 @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, struct tcp_fastopen_cookie *foc, struct dst_entry *dst); void tcp_fastopen_init_key_once(bool publish); +bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss, + struct tcp_fastopen_cookie *cookie); +bool tcp_fastopen_defer_connect(struct sock *sk, int *err); #define TCP_FASTOPEN_KEY_LENGTH 16 /* Fastopen key context */ diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index c53de2691cec..6ff35eb48d10 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -116,6 +116,7 @@ enum { #define TCP_SAVE_SYN 27 /* Record SYN headers for new connections */ #define TCP_SAVED_SYN 28 /* Get SYN headers recorded for connection */ #define TCP_REPAIR_WINDOW 29 /* Get/set window parameters */ +#define TCP_FASTOPEN_CONNECT 30 /* Attempt FastOpen with connect */ struct tcp_repair_opt { __u32 opt_code; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 28fe8da4e1ac..92e7f3e957fa 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -576,13 +576,24 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, int err; long timeo; - if (addr_len < sizeof(uaddr->sa_family)) - return -EINVAL; + /* + * uaddr can be NULL and addr_len can be 0 if: + * sk is a TCP fastopen active socket and + * TCP_FASTOPEN_CONNECT sockopt is set and + * we already have a valid cookie for this socket. + * In this case, user can call write() after connect(). + * write() will invoke tcp_sendmsg_fastopen() which calls + * __inet_stream_connect(). + */ + if (uaddr) { + if (addr_len < sizeof(uaddr->sa_family)) + return -EINVAL; - if (uaddr->sa_family == AF_UNSPEC) { - err = sk->sk_prot->disconnect(sk, flags); - sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED; - goto out; + if (uaddr->sa_family == AF_UNSPEC) { + err = sk->sk_prot->disconnect(sk, flags); + sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED; + goto out; + } } switch (sock->state) { @@ -593,7 +604,10 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, err = -EISCONN; goto out; case SS_CONNECTING: - err = -EALREADY; + if (inet_sk(sk)->defer_connect) + err = -EINPROGRESS; + else + err = -EALREADY; /* Fall out of switch with err, set for this state */ break; case SS_UNCONNECTED: @@ -607,6 +621,9 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, sock->state = SS_CONNECTING; + if (!err && inet_sk(sk)->defer_connect) + goto out; + /* Just entered SS_CONNECTING state; the only * difference is that return value in non-blocking * case is EINPROGRESS, rather than EALREADY. diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index c43eb1a831d7..d9735b76d073 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -533,6 +533,12 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) if (tp->urg_data & TCP_URG_VALID) mask |= POLLPRI; + } else if (sk->sk_state == TCP_SYN_SENT && inet_sk(sk)->defer_connect) { + /* Active TCP fastopen socket with defer_connect + * Return POLLOUT so application can call write() + * in order for kernel to generate SYN+data + */ + mask |= POLLOUT | POLLWRNORM; } /* This barrier is coupled with smp_wmb() in tcp_reset() */ smp_rmb(); @@ -1071,6 +1077,7 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *copied, size_t size) { struct tcp_sock *tp = tcp_sk(sk); + struct inet_sock *inet = inet_sk(sk); int err, flags; if (!(sysctl_tcp_fastopen & TFO_CLIENT_ENABLE)) @@ -1085,9 +1092,19 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, tp->fastopen_req->data = msg; tp->fastopen_req->size = size; + if (inet->defer_connect) { + err = tcp_connect(sk); + /* Same failure procedure as in tcp_v4/6_connect */ + if (err) { + tcp_set_state(sk, TCP_CLOSE); + inet->inet_dport = 0; + sk->sk_route_caps = 0; + } + } flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0; err = __inet_stream_connect(sk->sk_socket, msg->msg_name, msg->msg_namelen, flags); + inet->defer_connect = 0; *copied = tp->fastopen_req->copied; tcp_free_fastopen_req(tp); return err; @@ -1107,7 +1124,7 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) lock_sock(sk); flags = msg->msg_flags; - if (flags & MSG_FASTOPEN) { + if (unlikely(flags & MSG_FASTOPEN || inet_sk(sk)->defer_connect)) { err = tcp_sendmsg_fastopen(sk, msg, &copied_syn, size); if (err == -EINPROGRESS && copied_syn > 0) goto out; @@ -2656,6 +2673,18 @@ static int do_tcp_setsockopt(struct sock *sk, int level, err = -EINVAL; } break; + case TCP_FASTOPEN_CONNECT: + if (val > 1 || val < 0) { + err = -EINVAL; + } else if (sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) { + if (sk->sk_state == TCP_CLOSE) + tp->fastopen_connect = val; + else + err = -EINVAL; + } else { + err = -EOPNOTSUPP; + } + break; case TCP_TIMESTAMP: if (!tp->repair) err = -EPERM; @@ -3016,6 +3045,10 @@ static int do_tcp_getsockopt(struct sock *sk, int level, val = icsk->icsk_accept_queue.fastopenq.max_qlen; break; + case TCP_FASTOPEN_CONNECT: + val = tp->fastopen_connect; + break; + case TCP_TIMESTAMP: val = tcp_time_stamp + tp->tsoffset; break; diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index f51919535ca7..9674bec4a0f8 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -325,3 +325,57 @@ fastopen: *foc = valid_foc; return NULL; } + +bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss, + struct tcp_fastopen_cookie *cookie) +{ + unsigned long last_syn_loss = 0; + int syn_loss = 0; + + tcp_fastopen_cache_get(sk, mss, cookie, &syn_loss, &last_syn_loss); + + /* Recurring FO SYN losses: no cookie or data in SYN */ + if (syn_loss > 1 && + time_before(jiffies, last_syn_loss + (60*HZ << syn_loss))) { + cookie->len = -1; + return false; + } + if (sysctl_tcp_fastopen & TFO_CLIENT_NO_COOKIE) { + cookie->len = -1; + return true; + } + return cookie->len > 0; +} + +/* This function checks if we want to defer sending SYN until the first + * write(). We defer under the following conditions: + * 1. fastopen_connect sockopt is set + * 2. we have a valid cookie + * Return value: return true if we want to defer until application writes data + * return false if we want to send out SYN immediately + */ +bool tcp_fastopen_defer_connect(struct sock *sk, int *err) +{ + struct tcp_fastopen_cookie cookie = { .len = 0 }; + struct tcp_sock *tp = tcp_sk(sk); + u16 mss; + + if (tp->fastopen_connect && !tp->fastopen_req) { + if (tcp_fastopen_cookie_check(sk, &mss, &cookie)) { + inet_sk(sk)->defer_connect = 1; + return true; + } + + /* Alloc fastopen_req in order for FO option to be included + * in SYN + */ + tp->fastopen_req = kzalloc(sizeof(*tp->fastopen_req), + sk->sk_allocation); + if (tp->fastopen_req) + tp->fastopen_req->cookie = cookie; + else + *err = -ENOBUFS; + } + return false; +} +EXPORT_SYMBOL(tcp_fastopen_defer_connect); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a90b4540c11e..8c9e9aa17d66 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -232,6 +232,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) /* OK, now commit destination to socket. */ sk->sk_gso_type = SKB_GSO_TCPV4; sk_setup_caps(sk, &rt->dst); + rt = NULL; if (!tp->write_seq && likely(!tp->repair)) tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr, @@ -242,9 +243,13 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) inet->inet_id = tp->write_seq ^ jiffies; + if (tcp_fastopen_defer_connect(sk, &err)) + return err; + if (err) + goto failure; + err = tcp_connect(sk); - rt = NULL; if (err) goto failure; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 9a1a1494b9dd..671c69535671 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -3267,23 +3267,11 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) { struct tcp_sock *tp = tcp_sk(sk); struct tcp_fastopen_request *fo = tp->fastopen_req; - int syn_loss = 0, space, err = 0; - unsigned long last_syn_loss = 0; + int space, err = 0; struct sk_buff *syn_data; tp->rx_opt.mss_clamp = tp->advmss; /* If MSS is not cached */ - tcp_fastopen_cache_get(sk, &tp->rx_opt.mss_clamp, &fo->cookie, - &syn_loss, &last_syn_loss); - /* Recurring FO SYN losses: revert to regular handshake temporarily */ - if (syn_loss > 1 && - time_before(jiffies, last_syn_loss + (60*HZ << syn_loss))) { - fo->cookie.len = -1; - goto fallback; - } - - if (sysctl_tcp_fastopen & TFO_CLIENT_NO_COOKIE) - fo->cookie.len = -1; - else if (fo->cookie.len <= 0) + if (!tcp_fastopen_cookie_check(sk, &tp->rx_opt.mss_clamp, &fo->cookie)) goto fallback; /* MSS for SYN-data is based on cached MSS and bounded by PMTU and diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index f72100eedd5d..95c05e5293b1 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -287,6 +287,11 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, inet->inet_dport, &tp->tsoffset); + if (tcp_fastopen_defer_connect(sk, &err)) + return err; + if (err) + goto late_failure; + err = tcp_connect(sk); if (err) goto late_failure; @@ -295,7 +300,6 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, late_failure: tcp_set_state(sk, TCP_CLOSE); - __sk_dst_reset(sk); failure: inet->inet_dport = 0; sk->sk_route_caps = 0; |