summaryrefslogtreecommitdiffstats
path: root/net
diff options
context:
space:
mode:
authorFan Du <fan.du@intel.com>2015-02-10 09:53:16 +0800
committerDavid S. Miller <davem@davemloft.net>2015-02-09 18:45:00 -0800
commitb0f9ca53cbb103e9240a29a974e0b6085e58f9f7 (patch)
tree219e761b507aa959e6fd38c79adf3f9bc4be4b4d /net
parentf217d6ca4a8cde473358637aa29daaaa3d0b57a9 (diff)
downloadlinux-b0f9ca53cbb103e9240a29a974e0b6085e58f9f7.tar.bz2
ipv4: Namespecify TCP PMTU mechanism
Packetization Layer Path MTU Discovery works separately beside Path MTU Discovery at IP level, different net namespace has various requirements on which one to chose, e.g., a virutalized container instance would require TCP PMTU to probe an usable effective mtu for underlying tunnel, while the host would employ classical ICMP based PMTU to function. Hence making TCP PMTU mechanism per net namespace to decouple two functionality. Furthermore the probe base MSS should also be configured separately for each namespace. Signed-off-by: Fan Du <fan.du@intel.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'net')
-rw-r--r--net/ipv4/sysctl_net_ipv4.c28
-rw-r--r--net/ipv4/tcp_ipv4.c1
-rw-r--r--net/ipv4/tcp_output.c8
-rw-r--r--net/ipv4/tcp_timer.c7
4 files changed, 23 insertions, 21 deletions
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 82601a68cf90..d151539da8e6 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -604,20 +604,6 @@ static struct ctl_table ipv4_table[] = {
.proc_handler = proc_tcp_congestion_control,
},
{
- .procname = "tcp_mtu_probing",
- .data = &sysctl_tcp_mtu_probing,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
- .procname = "tcp_base_mss",
- .data = &sysctl_tcp_base_mss,
- .maxlen = sizeof(int),
- .mode = 0644,
- .proc_handler = proc_dointvec,
- },
- {
.procname = "tcp_workaround_signed_windows",
.data = &sysctl_tcp_workaround_signed_windows,
.maxlen = sizeof(int),
@@ -883,6 +869,20 @@ static struct ctl_table ipv4_net_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec,
},
+ {
+ .procname = "tcp_mtu_probing",
+ .data = &init_net.ipv4.sysctl_tcp_mtu_probing,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
+ {
+ .procname = "tcp_base_mss",
+ .data = &init_net.ipv4.sysctl_tcp_base_mss,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
{ }
};
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 67bc95fb5d9e..5a2dfed4783b 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2459,6 +2459,7 @@ static int __net_init tcp_sk_init(struct net *net)
*per_cpu_ptr(net->ipv4.tcp_sk, cpu) = sk;
}
net->ipv4.sysctl_tcp_ecn = 2;
+ net->ipv4.sysctl_tcp_base_mss = TCP_BASE_MSS;
return 0;
fail:
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 4fcc9a768849..a2a796c5536b 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -59,9 +59,6 @@ int sysctl_tcp_limit_output_bytes __read_mostly = 131072;
*/
int sysctl_tcp_tso_win_divisor __read_mostly = 3;
-int sysctl_tcp_mtu_probing __read_mostly = 0;
-int sysctl_tcp_base_mss __read_mostly = TCP_BASE_MSS;
-
/* By default, RFC2861 behavior. */
int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
@@ -1350,11 +1347,12 @@ void tcp_mtup_init(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
struct inet_connection_sock *icsk = inet_csk(sk);
+ struct net *net = sock_net(sk);
- icsk->icsk_mtup.enabled = sysctl_tcp_mtu_probing > 1;
+ icsk->icsk_mtup.enabled = net->ipv4.sysctl_tcp_mtu_probing > 1;
icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp + sizeof(struct tcphdr) +
icsk->icsk_af_ops->net_header_len;
- icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, sysctl_tcp_base_mss);
+ icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, net->ipv4.sysctl_tcp_base_mss);
icsk->icsk_mtup.probe_size = 0;
}
EXPORT_SYMBOL(tcp_mtup_init);
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 1829c7fbc77e..0732b787904e 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -101,17 +101,20 @@ static int tcp_orphan_retries(struct sock *sk, int alive)
static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
{
+ struct net *net = sock_net(sk);
+
/* Black hole detection */
- if (sysctl_tcp_mtu_probing) {
+ if (net->ipv4.sysctl_tcp_mtu_probing) {
if (!icsk->icsk_mtup.enabled) {
icsk->icsk_mtup.enabled = 1;
tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
} else {
+ struct net *net = sock_net(sk);
struct tcp_sock *tp = tcp_sk(sk);
int mss;
mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1;
- mss = min(sysctl_tcp_base_mss, mss);
+ mss = min(net->ipv4.sysctl_tcp_base_mss, mss);
mss = max(mss, 68 - tp->tcp_header_len);
icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);
tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);