summaryrefslogtreecommitdiffstats
path: root/net/ipv4/tcp.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-09-05 14:54:29 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2013-09-05 14:54:29 -0700
commitcc998ff8811530be521f6b316f37ab7676a07938 (patch)
treea054b3bf4b2ef406bf756a6cfc9be2f9115f17ae /net/ipv4/tcp.c
parent57d730924d5cc2c3e280af16a9306587c3a511db (diff)
parent0d40f75bdab241868c0eb6f97aef9f8b3a66f7b3 (diff)
downloadlinux-cc998ff8811530be521f6b316f37ab7676a07938.tar.bz2
Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next
Pull networking changes from David Miller: "Noteworthy changes this time around: 1) Multicast rejoin support for team driver, from Jiri Pirko. 2) Centralize and simplify TCP RTT measurement handling in order to reduce the impact of bad RTO seeding from SYN/ACKs. Also, when both timestamps and local RTT measurements are available prefer the later because there are broken middleware devices which scramble the timestamp. From Yuchung Cheng. 3) Add TCP_NOTSENT_LOWAT socket option to limit the amount of kernel memory consumed to queue up unsend user data. From Eric Dumazet. 4) Add a "physical port ID" abstraction for network devices, from Jiri Pirko. 5) Add a "suppress" operation to influence fib_rules lookups, from Stefan Tomanek. 6) Add a networking development FAQ, from Paul Gortmaker. 7) Extend the information provided by tcp_probe and add ipv6 support, from Daniel Borkmann. 8) Use RCU locking more extensively in openvswitch data paths, from Pravin B Shelar. 9) Add SCTP support to openvswitch, from Joe Stringer. 10) Add EF10 chip support to SFC driver, from Ben Hutchings. 11) Add new SYNPROXY netfilter target, from Patrick McHardy. 12) Compute a rate approximation for sending in TCP sockets, and use this to more intelligently coalesce TSO frames. Furthermore, add a new packet scheduler which takes advantage of this estimate when available. From Eric Dumazet. 13) Allow AF_PACKET fanouts with random selection, from Daniel Borkmann. 14) Add ipv6 support to vxlan driver, from Cong Wang" Resolved conflicts as per discussion. * git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next: (1218 commits) openvswitch: Fix alignment of struct sw_flow_key. netfilter: Fix build errors with xt_socket.c tcp: Add missing braces to do_tcp_setsockopt caif: Add missing braces to multiline if in cfctrl_linkup_request bnx2x: Add missing braces in bnx2x:bnx2x_link_initialize vxlan: Fix kernel panic on device delete. net: mvneta: implement ->ndo_do_ioctl() to support PHY ioctls net: mvneta: properly disable HW PHY polling and ensure adjust_link() works icplus: Use netif_running to determine device state ethernet/arc/arc_emac: Fix huge delays in large file copies tuntap: orphan frags before trying to set tx timestamp tuntap: purge socket error queue on detach qlcnic: use standard NAPI weights ipv6:introduce function to find route for redirect bnx2x: VF RSS support - VF side bnx2x: VF RSS support - PF side vxlan: Notify drivers for listening UDP port changes net: usbnet: update addr_assign_type if appropriate driver/net: enic: update enic maintainers and driver driver/net: enic: Exposing symbols for Cisco's low latency driver ...
Diffstat (limited to 'net/ipv4/tcp.c')
-rw-r--r--net/ipv4/tcp.c46
1 files changed, 34 insertions, 12 deletions
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index b2f6c74861af..6e5617b9f9db 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -283,6 +283,8 @@
int sysctl_tcp_fin_timeout __read_mostly = TCP_FIN_TIMEOUT;
+int sysctl_tcp_min_tso_segs __read_mostly = 2;
+
struct percpu_counter tcp_orphan_count;
EXPORT_SYMBOL_GPL(tcp_orphan_count);
@@ -410,10 +412,6 @@ void tcp_init_sock(struct sock *sk)
icsk->icsk_sync_mss = tcp_sync_mss;
- /* Presumed zeroed, in order of appearance:
- * cookie_in_always, cookie_out_never,
- * s_data_constant, s_data_in, s_data_out
- */
sk->sk_sndbuf = sysctl_tcp_wmem[1];
sk->sk_rcvbuf = sysctl_tcp_rmem[1];
@@ -499,7 +497,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
mask |= POLLIN | POLLRDNORM;
if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
- if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
+ if (sk_stream_is_writeable(sk)) {
mask |= POLLOUT | POLLWRNORM;
} else { /* send SIGIO later */
set_bit(SOCK_ASYNC_NOSPACE,
@@ -510,7 +508,7 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait)
* wspace test but before the flags are set,
* IO signal will be lost.
*/
- if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
+ if (sk_stream_is_writeable(sk))
mask |= POLLOUT | POLLWRNORM;
}
} else
@@ -789,12 +787,28 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
xmit_size_goal = mss_now;
if (large_allowed && sk_can_gso(sk)) {
- xmit_size_goal = ((sk->sk_gso_max_size - 1) -
- inet_csk(sk)->icsk_af_ops->net_header_len -
- inet_csk(sk)->icsk_ext_hdr_len -
- tp->tcp_header_len);
+ u32 gso_size, hlen;
+
+ /* Maybe we should/could use sk->sk_prot->max_header here ? */
+ hlen = inet_csk(sk)->icsk_af_ops->net_header_len +
+ inet_csk(sk)->icsk_ext_hdr_len +
+ tp->tcp_header_len;
+
+ /* Goal is to send at least one packet per ms,
+ * not one big TSO packet every 100 ms.
+ * This preserves ACK clocking and is consistent
+ * with tcp_tso_should_defer() heuristic.
+ */
+ gso_size = sk->sk_pacing_rate / (2 * MSEC_PER_SEC);
+ gso_size = max_t(u32, gso_size,
+ sysctl_tcp_min_tso_segs * mss_now);
+
+ xmit_size_goal = min_t(u32, gso_size,
+ sk->sk_gso_max_size - 1 - hlen);
- /* TSQ : try to have two TSO segments in flight */
+ /* TSQ : try to have at least two segments in flight
+ * (one in NIC TX ring, another in Qdisc)
+ */
xmit_size_goal = min_t(u32, xmit_size_goal,
sysctl_tcp_limit_output_bytes >> 1);
@@ -2454,10 +2468,11 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
case TCP_THIN_DUPACK:
if (val < 0 || val > 1)
err = -EINVAL;
- else
+ else {
tp->thin_dupack = val;
if (tp->thin_dupack)
tcp_disable_early_retrans(tp);
+ }
break;
case TCP_REPAIR:
@@ -2638,6 +2653,10 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
else
tp->tsoffset = val - tcp_time_stamp;
break;
+ case TCP_NOTSENT_LOWAT:
+ tp->notsent_lowat = val;
+ sk->sk_write_space(sk);
+ break;
default:
err = -ENOPROTOOPT;
break;
@@ -2854,6 +2873,9 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
case TCP_TIMESTAMP:
val = tcp_time_stamp + tp->tsoffset;
break;
+ case TCP_NOTSENT_LOWAT:
+ val = tp->notsent_lowat;
+ break;
default:
return -ENOPROTOOPT;
}