diff options
Diffstat (limited to 'net/ipv4/tcp.c')
-rw-r--r-- | net/ipv4/tcp.c | 87 |
1 files changed, 59 insertions, 28 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 0efb4c7f6704..da385ae997a3 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -406,7 +406,6 @@ void tcp_init_sock(struct sock *sk) tp->mss_cache = TCP_MSS_DEFAULT; tp->reordering = sock_net(sk)->ipv4.sysctl_tcp_reordering; - tcp_enable_early_retrans(tp); tcp_assign_congestion_control(sk); tp->tsoffset = 0; @@ -421,15 +420,13 @@ void tcp_init_sock(struct sock *sk) sk->sk_sndbuf = sysctl_tcp_wmem[1]; sk->sk_rcvbuf = sysctl_tcp_rmem[1]; - local_bh_disable(); sk_sockets_allocated_inc(sk); - local_bh_enable(); } EXPORT_SYMBOL(tcp_init_sock); static void tcp_tx_timestamp(struct sock *sk, u16 tsflags, struct sk_buff *skb) { - if (tsflags) { + if (tsflags && skb) { struct skb_shared_info *shinfo = skb_shinfo(skb); struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); @@ -536,6 +533,12 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) if (tp->urg_data & TCP_URG_VALID) mask |= POLLPRI; + } else if (sk->sk_state == TCP_SYN_SENT && inet_sk(sk)->defer_connect) { + /* Active TCP fastopen socket with defer_connect + * Return POLLOUT so application can call write() + * in order for kernel to generate SYN+data + */ + mask |= POLLOUT | POLLWRNORM; } /* This barrier is coupled with smp_wmb() in tcp_reset() */ smp_rmb(); @@ -964,10 +967,8 @@ new_segment: copied += copy; offset += copy; size -= copy; - if (!size) { - tcp_tx_timestamp(sk, sk->sk_tsflags, skb); + if (!size) goto out; - } if (skb->len < size_goal || (flags & MSG_OOB)) continue; @@ -993,8 +994,11 @@ wait_for_memory: } out: - if (copied && !(flags & MSG_SENDPAGE_NOTLAST)) - tcp_push(sk, flags, mss_now, tp->nonagle, size_goal); + if (copied) { + tcp_tx_timestamp(sk, sk->sk_tsflags, tcp_write_queue_tail(sk)); + if (!(flags & MSG_SENDPAGE_NOTLAST)) + tcp_push(sk, flags, mss_now, tp->nonagle, size_goal); + } return copied; do_error: @@ -1079,6 +1083,7 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *copied, size_t size) { struct tcp_sock *tp = tcp_sk(sk); + struct inet_sock *inet = inet_sk(sk); int err, flags; if (!(sysctl_tcp_fastopen & TFO_CLIENT_ENABLE)) @@ -1093,9 +1098,19 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, tp->fastopen_req->data = msg; tp->fastopen_req->size = size; + if (inet->defer_connect) { + err = tcp_connect(sk); + /* Same failure procedure as in tcp_v4/6_connect */ + if (err) { + tcp_set_state(sk, TCP_CLOSE); + inet->inet_dport = 0; + sk->sk_route_caps = 0; + } + } flags = (msg->msg_flags & MSG_DONTWAIT) ? O_NONBLOCK : 0; err = __inet_stream_connect(sk->sk_socket, msg->msg_name, - msg->msg_namelen, flags); + msg->msg_namelen, flags, 1); + inet->defer_connect = 0; *copied = tp->fastopen_req->copied; tcp_free_fastopen_req(tp); return err; @@ -1115,7 +1130,7 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) lock_sock(sk); flags = msg->msg_flags; - if (flags & MSG_FASTOPEN) { + if (unlikely(flags & MSG_FASTOPEN || inet_sk(sk)->defer_connect)) { err = tcp_sendmsg_fastopen(sk, msg, &copied_syn, size); if (err == -EINPROGRESS && copied_syn > 0) goto out; @@ -1273,7 +1288,7 @@ new_segment: } else { skb_fill_page_desc(skb, i, pfrag->page, pfrag->offset, copy); - get_page(pfrag->page); + page_ref_inc(pfrag->page); } pfrag->offset += copy; } @@ -1287,7 +1302,6 @@ new_segment: copied += copy; if (!msg_data_left(msg)) { - tcp_tx_timestamp(sk, sockc.tsflags, skb); if (unlikely(flags & MSG_EOR)) TCP_SKB_CB(skb)->eor = 1; goto out; @@ -1318,8 +1332,10 @@ wait_for_memory: } out: - if (copied) + if (copied) { + tcp_tx_timestamp(sk, sockc.tsflags, tcp_write_queue_tail(sk)); tcp_push(sk, flags, mss_now, tp->nonagle, size_goal); + } out_nopush: release_sock(sk); return copied + copied_syn; @@ -2479,11 +2495,6 @@ static int do_tcp_setsockopt(struct sock *sk, int level, case TCP_THIN_DUPACK: if (val < 0 || val > 1) err = -EINVAL; - else { - tp->thin_dupack = val; - if (tp->thin_dupack) - tcp_disable_early_retrans(tp); - } break; case TCP_REPAIR: @@ -2668,6 +2679,18 @@ static int do_tcp_setsockopt(struct sock *sk, int level, err = -EINVAL; } break; + case TCP_FASTOPEN_CONNECT: + if (val > 1 || val < 0) { + err = -EINVAL; + } else if (sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) { + if (sk->sk_state == TCP_CLOSE) + tp->fastopen_connect = val; + else + err = -EINVAL; + } else { + err = -EOPNOTSUPP; + } + break; case TCP_TIMESTAMP: if (!tp->repair) err = -EPERM; @@ -2770,6 +2793,9 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_sacked = sk->sk_max_ack_backlog; return; } + + slow = lock_sock_fast(sk); + info->tcpi_ca_state = icsk->icsk_ca_state; info->tcpi_retransmits = icsk->icsk_retransmits; info->tcpi_probes = icsk->icsk_probes_out; @@ -2820,15 +2846,11 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_total_retrans = tp->total_retrans; - slow = lock_sock_fast(sk); - info->tcpi_bytes_acked = tp->bytes_acked; info->tcpi_bytes_received = tp->bytes_received; info->tcpi_notsent_bytes = max_t(int, 0, tp->write_seq - tp->snd_nxt); tcp_get_info_chrono_stats(tp, info); - unlock_sock_fast(sk, slow); - info->tcpi_segs_out = tp->segs_out; info->tcpi_segs_in = tp->segs_in; @@ -2844,6 +2866,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) do_div(rate64, intv); info->tcpi_delivery_rate = rate64; } + unlock_sock_fast(sk, slow); } EXPORT_SYMBOL_GPL(tcp_get_info); @@ -2853,7 +2876,7 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk) struct sk_buff *stats; struct tcp_info info; - stats = alloc_skb(3 * nla_total_size_64bit(sizeof(u64)), GFP_ATOMIC); + stats = alloc_skb(5 * nla_total_size_64bit(sizeof(u64)), GFP_ATOMIC); if (!stats) return NULL; @@ -2864,6 +2887,10 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk) info.tcpi_rwnd_limited, TCP_NLA_PAD); nla_put_u64_64bit(stats, TCP_NLA_SNDBUF_LIMITED, info.tcpi_sndbuf_limited, TCP_NLA_PAD); + nla_put_u64_64bit(stats, TCP_NLA_DATA_SEGS_OUT, + tp->data_segs_out, TCP_NLA_PAD); + nla_put_u64_64bit(stats, TCP_NLA_TOTAL_RETRANS, + tp->total_retrans, TCP_NLA_PAD); return stats; } @@ -2973,8 +3000,9 @@ static int do_tcp_getsockopt(struct sock *sk, int level, case TCP_THIN_LINEAR_TIMEOUTS: val = tp->thin_lto; break; + case TCP_THIN_DUPACK: - val = tp->thin_dupack; + val = 0; break; case TCP_REPAIR: @@ -3027,6 +3055,10 @@ static int do_tcp_getsockopt(struct sock *sk, int level, val = icsk->icsk_accept_queue.fastopenq.max_qlen; break; + case TCP_FASTOPEN_CONNECT: + val = tp->fastopen_connect; + break; + case TCP_TIMESTAMP: val = tcp_time_stamp + tp->tsoffset; break; @@ -3340,6 +3372,7 @@ void __init tcp_init(void) percpu_counter_init(&tcp_sockets_allocated, 0, GFP_KERNEL); percpu_counter_init(&tcp_orphan_count, 0, GFP_KERNEL); + inet_hashinfo_init(&tcp_hashinfo); tcp_hashinfo.bind_bucket_cachep = kmem_cache_create("tcp_bind_bucket", sizeof(struct inet_bind_bucket), 0, @@ -3383,10 +3416,7 @@ void __init tcp_init(void) cnt = tcp_hashinfo.ehash_mask + 1; - - tcp_death_row.sysctl_max_tw_buckets = cnt / 2; sysctl_tcp_max_orphans = cnt / 2; - sysctl_max_syn_backlog = max(128, cnt / 256); tcp_init_mem(); /* Set per-socket limits to no more than 1/128 the pressure threshold */ @@ -3405,6 +3435,7 @@ void __init tcp_init(void) pr_info("Hash tables configured (established %u bind %u)\n", tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size); + tcp_v4_init(); tcp_metrics_init(); BUG_ON(tcp_register_congestion_control(&tcp_reno) != 0); tcp_tasklet_init(); |