diff options
author | Eric Dumazet <edumazet@google.com> | 2018-04-16 10:33:35 -0700 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2018-04-16 18:26:37 -0400 |
commit | d1361840f8c519eaee9a78ffe09e4f0a1b586846 (patch) | |
tree | 86a904ade99a93544e0817cda7dc842b12f9b833 /net/ipv4 | |
parent | 10b19aeac1700c3ba94fb50583a766d9cdaf1e9e (diff) | |
download | linux-d1361840f8c519eaee9a78ffe09e4f0a1b586846.tar.bz2 |
tcp: fix SO_RCVLOWAT and RCVBUF autotuning
Applications might use SO_RCVLOWAT on TCP socket hoping to receive
one [E]POLLIN event only when a given amount of bytes are ready in socket
receive queue.
Problem is that receive autotuning is not aware of this constraint,
meaning sk_rcvbuf might be too small to allow all bytes to be stored.
Add a new (struct proto_ops)->set_rcvlowat method so that a protocol
can override the default setsockopt(SO_RCVLOWAT) behavior.
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/af_inet.c | 1 | ||||
-rw-r--r-- | net/ipv4/tcp.c | 21 |
2 files changed, 22 insertions, 0 deletions
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index eaed0367e669..f5c562aaef35 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1006,6 +1006,7 @@ const struct proto_ops inet_stream_ops = { .compat_getsockopt = compat_sock_common_getsockopt, .compat_ioctl = inet_compat_ioctl, #endif + .set_rcvlowat = tcp_set_rcvlowat, }; EXPORT_SYMBOL(inet_stream_ops); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index bccc4c270087..0abd8d1d3d1d 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1701,6 +1701,27 @@ int tcp_peek_len(struct socket *sock) } EXPORT_SYMBOL(tcp_peek_len); +/* Make sure sk_rcvbuf is big enough to satisfy SO_RCVLOWAT hint */ +int tcp_set_rcvlowat(struct sock *sk, int val) +{ + sk->sk_rcvlowat = val ? : 1; + if (sk->sk_userlocks & SOCK_RCVBUF_LOCK) + return 0; + + /* val comes from user space and might be close to INT_MAX */ + val <<= 1; + if (val < 0) + val = INT_MAX; + + val = min(val, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]); + if (val > sk->sk_rcvbuf) { + sk->sk_rcvbuf = val; + tcp_sk(sk)->window_clamp = tcp_win_from_space(sk, val); + } + return 0; +} +EXPORT_SYMBOL(tcp_set_rcvlowat); + static void tcp_update_recv_tstamps(struct sk_buff *skb, struct scm_timestamping *tss) { |