summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/tcp.h3
-rw-r--r--include/net/inet_sock.h6
-rw-r--r--include/net/tcp.h3
-rw-r--r--include/uapi/linux/tcp.h1
-rw-r--r--net/ipv4/af_inet.c31
-rw-r--r--net/ipv4/tcp.c35
-rw-r--r--net/ipv4/tcp_fastopen.c54
-rw-r--r--net/ipv4/tcp_ipv4.c7
-rw-r--r--net/ipv4/tcp_output.c16
-rw-r--r--net/ipv6/tcp_ipv6.c6
10 files changed, 136 insertions, 26 deletions
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 5371b3d70cfe..f88f4649ba6f 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -222,7 +222,8 @@ struct tcp_sock {
u32 chrono_stat[3]; /* Time in jiffies for chrono_stat stats */
u8 chrono_type:2, /* current chronograph type */
rate_app_limited:1, /* rate_{delivered,interval_us} limited? */
- unused:5;
+ fastopen_connect:1, /* FASTOPEN_CONNECT sockopt */
+ unused:4;
u8 nonagle : 4,/* Disable Nagle algorithm? */
thin_lto : 1,/* Use linear timeouts for thin streams */
unused1 : 1,
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index c9cff977a7fb..aa95053dfc78 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -206,7 +206,11 @@ struct inet_sock {
transparent:1,
mc_all:1,
nodefrag:1;
- __u8 bind_address_no_port:1;
+ __u8 bind_address_no_port:1,
+ defer_connect:1; /* Indicates that fastopen_connect is set
+ * and cookie exists so we defer connect
+ * until first data frame is written
+ */
__u8 rcv_tos;
__u8 convert_csum;
int uc_index;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index c55d65f74f7f..6ec4ea652f3f 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1493,6 +1493,9 @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb,
struct tcp_fastopen_cookie *foc,
struct dst_entry *dst);
void tcp_fastopen_init_key_once(bool publish);
+bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss,
+ struct tcp_fastopen_cookie *cookie);
+bool tcp_fastopen_defer_connect(struct sock *sk, int *err);
#define TCP_FASTOPEN_KEY_LENGTH 16
/* Fastopen key context */
diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index c53de2691cec..6ff35eb48d10 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -116,6 +116,7 @@ enum {
#define TCP_SAVE_SYN 27 /* Record SYN headers for new connections */
#define TCP_SAVED_SYN 28 /* Get SYN headers recorded for connection */
#define TCP_REPAIR_WINDOW 29 /* Get/set window parameters */
+#define TCP_FASTOPEN_CONNECT 30 /* Attempt FastOpen with connect */
struct tcp_repair_opt {
__u32 opt_code;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 28fe8da4e1ac..92e7f3e957fa 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -576,13 +576,24 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
int err;
long timeo;
- if (addr_len < sizeof(uaddr->sa_family))
- return -EINVAL;
+ /*
+ * uaddr can be NULL and addr_len can be 0 if:
+ * sk is a TCP fastopen active socket and
+ * TCP_FASTOPEN_CONNECT sockopt is set and
+ * we already have a valid cookie for this socket.
+ * In this case, user can call write() after connect().
+ * write() will invoke tcp_sendmsg_fastopen() which calls
+ * __inet_stream_connect().
+ */
+ if (uaddr) {
+ if (addr_len < sizeof(uaddr->sa_family))
+ return -EINVAL;
- if (uaddr->sa_family == AF_UNSPEC) {
- err = sk->sk_prot->disconnect(sk, flags);
- sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED;
- goto out;
+ if (uaddr->sa_family == AF_UNSPEC) {
+ err = sk->sk_prot->disconnect(sk, flags);
+ sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED;
+ goto out;
+ }
}
switch (sock->state) {
@@ -593,7 +604,10 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
err = -EISCONN;
goto out;
case SS_CONNECTING:
- err = -EALREADY;
+ if (inet_sk(sk)->defer_connect)
+ err = -EINPROGRESS;
+ else
+ err = -EALREADY;
/* Fall out of switch with err, set for this state */
break;
case SS_UNCONNECTED:
@@ -607,6 +621,9 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr,
sock->state = SS_CONNECTING;
+ if (!err && inet_sk(sk)->defer_connect)
+ goto out;
+
/* Just entered SS_CONNECTING state; the only
* difference is that return value in non-blocking
* case is EINPROGRESS, rather than EALREADY.
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index c43eb1a831d7..d9735b76d073 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -533,6 +533,12 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
if (tp->urg_data & TCP_URG_VALID)
mask |= POLLPRI;
+ } else if (sk->sk_state == TCP_SYN_SENT && inet_sk(sk)->defer_connect) {
+ /* Active TCP fastopen socket with defer_connect
+ * Return POLLOUT so application can call write()
+ * in order for kernel to generate SYN+data
+ */
+ mask |= POLLOUT | POLLWRNORM;
}
/* This barrier is coupled with smp_wmb() in tcp_reset() */
smp_rmb();
@@ -1071,6 +1077,7 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
int *copied, size_t size)
{
struct tcp_sock *tp = tcp_sk(sk);
+ struct inet_sock *inet = inet_sk(sk);
int err, flags;
if (!(sysctl_tcp_fastopen & TFO_CLIENT_ENABLE))
@@ -1085,9 +1092,19 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
tp->fastopen_req->data = msg;
tp->fastopen_req->size = size;
+ if (inet->defer_connect) {
+ err = tcp_connect(sk);
+ /* Same failure procedure as in tcp_v4/6_connect */
+ if (err) {
+ tcp_set_state(sk, TCP_CLOSE);
+ inet->inet_dport = 0;
+ sk->sk_route_caps = 0;
+ }
+ }
flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0;
err = __inet_stream_connect(sk->sk_socket, msg->msg_name,
msg->msg_namelen, flags);
+ inet->defer_connect = 0;
*copied = tp->fastopen_req->copied;
tcp_free_fastopen_req(tp);
return err;
@@ -1107,7 +1124,7 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size)
lock_sock(sk);
flags = msg->msg_flags;
- if (flags & MSG_FASTOPEN) {
+ if (unlikely(flags & MSG_FASTOPEN || inet_sk(sk)->defer_connect)) {
err = tcp_sendmsg_fastopen(sk, msg, &copied_syn, size);
if (err == -EINPROGRESS && copied_syn > 0)
goto out;
@@ -2656,6 +2673,18 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
err = -EINVAL;
}
break;
+ case TCP_FASTOPEN_CONNECT:
+ if (val > 1 || val < 0) {
+ err = -EINVAL;
+ } else if (sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) {
+ if (sk->sk_state == TCP_CLOSE)
+ tp->fastopen_connect = val;
+ else
+ err = -EINVAL;
+ } else {
+ err = -EOPNOTSUPP;
+ }
+ break;
case TCP_TIMESTAMP:
if (!tp->repair)
err = -EPERM;
@@ -3016,6 +3045,10 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
val = icsk->icsk_accept_queue.fastopenq.max_qlen;
break;
+ case TCP_FASTOPEN_CONNECT:
+ val = tp->fastopen_connect;
+ break;
+
case TCP_TIMESTAMP:
val = tcp_time_stamp + tp->tsoffset;
break;
diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c
index f51919535ca7..9674bec4a0f8 100644
--- a/net/ipv4/tcp_fastopen.c
+++ b/net/ipv4/tcp_fastopen.c
@@ -325,3 +325,57 @@ fastopen:
*foc = valid_foc;
return NULL;
}
+
+bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss,
+ struct tcp_fastopen_cookie *cookie)
+{
+ unsigned long last_syn_loss = 0;
+ int syn_loss = 0;
+
+ tcp_fastopen_cache_get(sk, mss, cookie, &syn_loss, &last_syn_loss);
+
+ /* Recurring FO SYN losses: no cookie or data in SYN */
+ if (syn_loss > 1 &&
+ time_before(jiffies, last_syn_loss + (60*HZ << syn_loss))) {
+ cookie->len = -1;
+ return false;
+ }
+ if (sysctl_tcp_fastopen & TFO_CLIENT_NO_COOKIE) {
+ cookie->len = -1;
+ return true;
+ }
+ return cookie->len > 0;
+}
+
+/* This function checks if we want to defer sending SYN until the first
+ * write(). We defer under the following conditions:
+ * 1. fastopen_connect sockopt is set
+ * 2. we have a valid cookie
+ * Return value: return true if we want to defer until application writes data
+ * return false if we want to send out SYN immediately
+ */
+bool tcp_fastopen_defer_connect(struct sock *sk, int *err)
+{
+ struct tcp_fastopen_cookie cookie = { .len = 0 };
+ struct tcp_sock *tp = tcp_sk(sk);
+ u16 mss;
+
+ if (tp->fastopen_connect && !tp->fastopen_req) {
+ if (tcp_fastopen_cookie_check(sk, &mss, &cookie)) {
+ inet_sk(sk)->defer_connect = 1;
+ return true;
+ }
+
+ /* Alloc fastopen_req in order for FO option to be included
+ * in SYN
+ */
+ tp->fastopen_req = kzalloc(sizeof(*tp->fastopen_req),
+ sk->sk_allocation);
+ if (tp->fastopen_req)
+ tp->fastopen_req->cookie = cookie;
+ else
+ *err = -ENOBUFS;
+ }
+ return false;
+}
+EXPORT_SYMBOL(tcp_fastopen_defer_connect);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index a90b4540c11e..8c9e9aa17d66 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -232,6 +232,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
/* OK, now commit destination to socket. */
sk->sk_gso_type = SKB_GSO_TCPV4;
sk_setup_caps(sk, &rt->dst);
+ rt = NULL;
if (!tp->write_seq && likely(!tp->repair))
tp->write_seq = secure_tcp_sequence_number(inet->inet_saddr,
@@ -242,9 +243,13 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
inet->inet_id = tp->write_seq ^ jiffies;
+ if (tcp_fastopen_defer_connect(sk, &err))
+ return err;
+ if (err)
+ goto failure;
+
err = tcp_connect(sk);
- rt = NULL;
if (err)
goto failure;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 9a1a1494b9dd..671c69535671 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -3267,23 +3267,11 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
{
struct tcp_sock *tp = tcp_sk(sk);
struct tcp_fastopen_request *fo = tp->fastopen_req;
- int syn_loss = 0, space, err = 0;
- unsigned long last_syn_loss = 0;
+ int space, err = 0;
struct sk_buff *syn_data;
tp->rx_opt.mss_clamp = tp->advmss; /* If MSS is not cached */
- tcp_fastopen_cache_get(sk, &tp->rx_opt.mss_clamp, &fo->cookie,
- &syn_loss, &last_syn_loss);
- /* Recurring FO SYN losses: revert to regular handshake temporarily */
- if (syn_loss > 1 &&
- time_before(jiffies, last_syn_loss + (60*HZ << syn_loss))) {
- fo->cookie.len = -1;
- goto fallback;
- }
-
- if (sysctl_tcp_fastopen & TFO_CLIENT_NO_COOKIE)
- fo->cookie.len = -1;
- else if (fo->cookie.len <= 0)
+ if (!tcp_fastopen_cookie_check(sk, &tp->rx_opt.mss_clamp, &fo->cookie))
goto fallback;
/* MSS for SYN-data is based on cached MSS and bounded by PMTU and
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index f72100eedd5d..95c05e5293b1 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -287,6 +287,11 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
inet->inet_dport,
&tp->tsoffset);
+ if (tcp_fastopen_defer_connect(sk, &err))
+ return err;
+ if (err)
+ goto late_failure;
+
err = tcp_connect(sk);
if (err)
goto late_failure;
@@ -295,7 +300,6 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
late_failure:
tcp_set_state(sk, TCP_CLOSE);
- __sk_dst_reset(sk);
failure:
inet->inet_dport = 0;
sk->sk_route_caps = 0;