From f9dd09c7f7199685601d75882447a6598be8a3e0 Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Fri, 6 Nov 2009 00:43:42 -0800
Subject: netfilter: nf_nat: fix NAT issue in 2.6.30.4+

Vitezslav Samel discovered that since 2.6.30.4+ active FTP can not work
over NAT. The "cause" of the problem was a fix of unacknowledged data
detection with NAT (commit a3a9f79e361e864f0e9d75ebe2a0cb43d17c4272).
However, actually, that fix uncovered a long standing bug in TCP conntrack:
when NAT was enabled, we simply updated the max of the right edge of
the segments we have seen (td_end), by the offset NAT produced with
changing IP/port in the data. However, we did not update the other parameter
(td_maxend) which is affected by the NAT offset. Thus that could drift
away from the correct value and thus resulted breaking active FTP.

The patch below fixes the issue by *not* updating the conntrack parameters
from NAT, but instead taking into account the NAT offsets in conntrack in a
consistent way. (Updating from NAT would be more harder and expensive because
it'd need to re-calculate parameters we already calculated in conntrack.)

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/netfilter/nf_nat_core.c   |  3 +++
 net/ipv4/netfilter/nf_nat_helper.c | 34 +++++++++++++++++++++++-----------
 2 files changed, 26 insertions(+), 11 deletions(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index 68afc6ecd343..fe1a64479dd0 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -750,6 +750,8 @@ static int __init nf_nat_init(void)
 	BUG_ON(nfnetlink_parse_nat_setup_hook != NULL);
 	rcu_assign_pointer(nfnetlink_parse_nat_setup_hook,
 			   nfnetlink_parse_nat_setup);
+	BUG_ON(nf_ct_nat_offset != NULL);
+	rcu_assign_pointer(nf_ct_nat_offset, nf_nat_get_offset);
 	return 0;
 
  cleanup_extend:
@@ -764,6 +766,7 @@ static void __exit nf_nat_cleanup(void)
 	nf_ct_extend_unregister(&nat_extend);
 	rcu_assign_pointer(nf_nat_seq_adjust_hook, NULL);
 	rcu_assign_pointer(nfnetlink_parse_nat_setup_hook, NULL);
+	rcu_assign_pointer(nf_ct_nat_offset, NULL);
 	synchronize_net();
 }
 
diff --git a/net/ipv4/netfilter/nf_nat_helper.c b/net/ipv4/netfilter/nf_nat_helper.c
index 09172a65d9b6..f9520fa3aba9 100644
--- a/net/ipv4/netfilter/nf_nat_helper.c
+++ b/net/ipv4/netfilter/nf_nat_helper.c
@@ -73,6 +73,28 @@ adjust_tcp_sequence(u32 seq,
 	DUMP_OFFSET(this_way);
 }
 
+/* Get the offset value, for conntrack */
+s16 nf_nat_get_offset(const struct nf_conn *ct,
+		      enum ip_conntrack_dir dir,
+		      u32 seq)
+{
+	struct nf_conn_nat *nat = nfct_nat(ct);
+	struct nf_nat_seq *this_way;
+	s16 offset;
+
+	if (!nat)
+		return 0;
+
+	this_way = &nat->seq[dir];
+	spin_lock_bh(&nf_nat_seqofs_lock);
+	offset = after(seq, this_way->correction_pos)
+		 ? this_way->offset_after : this_way->offset_before;
+	spin_unlock_bh(&nf_nat_seqofs_lock);
+
+	return offset;
+}
+EXPORT_SYMBOL_GPL(nf_nat_get_offset);
+
 /* Frobs data inside this packet, which is linear. */
 static void mangle_contents(struct sk_buff *skb,
 			    unsigned int dataoff,
@@ -189,11 +211,6 @@ nf_nat_mangle_tcp_packet(struct sk_buff *skb,
 		adjust_tcp_sequence(ntohl(tcph->seq),
 				    (int)rep_len - (int)match_len,
 				    ct, ctinfo);
-		/* Tell TCP window tracking about seq change */
-		nf_conntrack_tcp_update(skb, ip_hdrlen(skb),
-					ct, CTINFO2DIR(ctinfo),
-					(int)rep_len - (int)match_len);
-
 		nf_conntrack_event_cache(IPCT_NATSEQADJ, ct);
 	}
 	return 1;
@@ -415,12 +432,7 @@ nf_nat_seq_adjust(struct sk_buff *skb,
 	tcph->seq = newseq;
 	tcph->ack_seq = newack;
 
-	if (!nf_nat_sack_adjust(skb, tcph, ct, ctinfo))
-		return 0;
-
-	nf_conntrack_tcp_update(skb, ip_hdrlen(skb), ct, dir, seqoff);
-
-	return 1;
+	return nf_nat_sack_adjust(skb, tcph, ct, ctinfo);
 }
 
 /* Setup NAT on this expected conntrack so it follows master. */
-- 
cgit v1.2.3


From 23ca0c989e46924393f1d54bec84801d035dd28e Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 6 Nov 2009 10:37:41 +0000
Subject: ipip: Fix handling of DF packets when pmtudisc is OFF

RFC 2003 requires the outer header to have DF set if DF is set
on the inner header, even when PMTU discovery is off for the
tunnel.  Our implementation does exactly that.

For this to work properly the IPIP gateway also needs to engate
in PMTU when the inner DF bit is set.  As otherwise the original
host would not be able to carry out its PMTU successfully since
part of the path is only visible to the gateway.

Unfortunately when the tunnel PMTU discovery setting is off, we
do not collect the necessary soft state, resulting in blackholes
when the original host tries to perform PMTU discovery.

This problem is not reproducible on the IPIP gateway itself as
the inner packet usually has skb->local_df set.  This is not
correctly cleared (an unrelated bug) when the packet passes
through the tunnel, which allows fragmentation to occur.  For
hosts behind the IPIP gateway it is readily visible with a simple
ping.

This patch fixes the problem by performing PMTU discovery for
all packets with the inner DF bit set, regardless of the PMTU
discovery setting on the tunnel itself.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipip.c | 32 +++++++++++++++++---------------
 1 file changed, 17 insertions(+), 15 deletions(-)

(limited to 'net/ipv4')

diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 08ccd344de7a..ae40ed1ba560 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -438,25 +438,27 @@ static netdev_tx_t ipip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 		goto tx_error;
 	}
 
-	if (tiph->frag_off)
+	df |= old_iph->frag_off & htons(IP_DF);
+
+	if (df) {
 		mtu = dst_mtu(&rt->u.dst) - sizeof(struct iphdr);
-	else
-		mtu = skb_dst(skb) ? dst_mtu(skb_dst(skb)) : dev->mtu;
 
-	if (mtu < 68) {
-		stats->collisions++;
-		ip_rt_put(rt);
-		goto tx_error;
-	}
-	if (skb_dst(skb))
-		skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
+		if (mtu < 68) {
+			stats->collisions++;
+			ip_rt_put(rt);
+			goto tx_error;
+		}
 
-	df |= (old_iph->frag_off&htons(IP_DF));
+		if (skb_dst(skb))
+			skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu);
 
-	if ((old_iph->frag_off&htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) {
-		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
-		ip_rt_put(rt);
-		goto tx_error;
+		if ((old_iph->frag_off & htons(IP_DF)) &&
+		    mtu < ntohs(old_iph->tot_len)) {
+			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
+				  htonl(mtu));
+			ip_rt_put(rt);
+			goto tx_error;
+		}
 	}
 
 	if (tunnel->err_count > 0) {
-- 
cgit v1.2.3