From e87a468eb97da35d8dc00e8fa9828b4de4ab69d0 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vyasevich@gmail.com>
Date: Thu, 14 May 2015 20:34:08 -0400
Subject: ipv6: Fix udp checksums with raw sockets

It was reported that trancerout6 would cause
a kernel to crash when trying to compute checksums
on raw UDP packets.  The cause was the check in
__ip6_append_data that would attempt to use
partial checksums on the packet.  However,
raw sockets do not initialize partial checksum
fields so partial checksums can't be used.

Solve this the same way IPv4 does it.  raw sockets
pass transhdrlen value of 0 to ip_append_data which
causes the checksum to be computed in software.  Use
the same check in ip6_append_data (check transhdrlen).

Reported-by: Wolfgang Walter <linux@stwm.de>
CC: Wolfgang Walter <linux@stwm.de>
CC: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Vladislav Yasevich <vyasevic@redhat.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_output.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index c21777565c58..bc09cb97b840 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1300,8 +1300,10 @@ emsgsize:
 
 	/* If this is the first and only packet and device
 	 * supports checksum offloading, let's use it.
+	 * Use transhdrlen, same as IPv4, because partial
+	 * sums only work when transhdrlen is set.
 	 */
-	if (!skb && sk->sk_protocol == IPPROTO_UDP &&
+	if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
 	    length + fragheaderlen < mtu &&
 	    rt->dst.dev->features & NETIF_F_V6_CSUM &&
 	    !exthdrlen)
-- 
cgit v1.2.3


From 21858cd02dabcf290564cbf4769b101eba54d7bb Mon Sep 17 00:00:00 2001
From: Florent Fourcot <florent.fourcot@enst-bretagne.fr>
Date: Sat, 16 May 2015 00:24:59 +0200
Subject: tcp/ipv6: fix flow label setting in TIME_WAIT state

commit 1d13a96c74fc ("ipv6: tcp: fix flowlabel value in ACK messages
send from TIME_WAIT") added the flow label in the last TCP packets.
Unfortunately, it was not casted properly.

This patch replace the buggy shift with be32_to_cpu/cpu_to_be32.

Fixes: 1d13a96c74fc ("ipv6: tcp: fix flowlabel value in ACK messages")
Reported-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Florent Fourcot <florent.fourcot@enst-bretagne.fr>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_minisocks.c | 2 +-
 net/ipv6/tcp_ipv6.c      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index e5d7649136fc..b5732a54f2ad 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -300,7 +300,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
 			tw->tw_v6_daddr = sk->sk_v6_daddr;
 			tw->tw_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
 			tw->tw_tclass = np->tclass;
-			tw->tw_flowlabel = np->flow_label >> 12;
+			tw->tw_flowlabel = be32_to_cpu(np->flow_label & IPV6_FLOWLABEL_MASK);
 			tw->tw_ipv6only = sk->sk_ipv6only;
 		}
 #endif
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index b6575d665568..3adffb300238 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -914,7 +914,7 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
 			tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
 			tcp_time_stamp + tcptw->tw_ts_offset,
 			tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
-			tw->tw_tclass, (tw->tw_flowlabel << 12));
+			tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel));
 
 	inet_twsk_put(tw);
 }
-- 
cgit v1.2.3


From 33b4b015e1a1ca7a8fdce40af5e71642a8ea355c Mon Sep 17 00:00:00 2001
From: Henning Rogge <hrogge@gmail.com>
Date: Mon, 18 May 2015 21:08:49 +0200
Subject: net/ipv6/udp: Fix ipv6 multicast socket filter regression

Commit <5cf3d46192fc> ("udp: Simplify__udp*_lib_mcast_deliver")
simplified the filter for incoming IPv6 multicast but removed
the check of the local socket address and the UDP destination
address.

This patch restores the filter to prevent sockets bound to a IPv6
multicast IP to receive other UDP traffic link unicast.

Signed-off-by: Henning Rogge <hrogge@gmail.com>
Fixes: 5cf3d46192fc ("udp: Simplify__udp*_lib_mcast_deliver")
Cc: "David S. Miller" <davem@davemloft.net>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/udp.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 3477c919fcc8..c2ec41617a35 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -731,7 +731,9 @@ static bool __udp_v6_is_mcast_sock(struct net *net, struct sock *sk,
 	    (inet->inet_dport && inet->inet_dport != rmt_port) ||
 	    (!ipv6_addr_any(&sk->sk_v6_daddr) &&
 		    !ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr)) ||
-	    (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif))
+	    (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif) ||
+	    (!ipv6_addr_any(&sk->sk_v6_rcv_saddr) &&
+		    !ipv6_addr_equal(&sk->sk_v6_rcv_saddr, loc_addr)))
 		return false;
 	if (!inet6_mc_check(sk, loc_addr, rmt_addr))
 		return false;
-- 
cgit v1.2.3


From 1086bbe97a074844188c6c988fa0b1a98c3ccbb9 Mon Sep 17 00:00:00 2001
From: Dave Jones <davej@codemonkey.org.uk>
Date: Tue, 19 May 2015 20:55:17 -0400
Subject: netfilter: ensure number of counters is >0 in do_replace()

After improving setsockopt() coverage in trinity, I started triggering
vmalloc failures pretty reliably from this code path:

warn_alloc_failed+0xe9/0x140
__vmalloc_node_range+0x1be/0x270
vzalloc+0x4b/0x50
__do_replace+0x52/0x260 [ip_tables]
do_ipt_set_ctl+0x15d/0x1d0 [ip_tables]
nf_setsockopt+0x65/0x90
ip_setsockopt+0x61/0xa0
raw_setsockopt+0x16/0x60
sock_common_setsockopt+0x14/0x20
SyS_setsockopt+0x71/0xd0

It turns out we don't validate that the num_counters field in the
struct we pass in from userspace is initialized.

The same problem also exists in ebtables, arptables, ipv6, and the
compat variants.

Signed-off-by: Dave Jones <davej@codemonkey.org.uk>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/bridge/netfilter/ebtables.c | 4 ++++
 net/ipv4/netfilter/arp_tables.c | 6 ++++++
 net/ipv4/netfilter/ip_tables.c  | 6 ++++++
 net/ipv6/netfilter/ip6_tables.c | 6 ++++++
 4 files changed, 22 insertions(+)

(limited to 'net/ipv6')

diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index 91180a7fc943..24c7c96bf5f8 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1117,6 +1117,8 @@ static int do_replace(struct net *net, const void __user *user,
 		return -ENOMEM;
 	if (tmp.num_counters >= INT_MAX / sizeof(struct ebt_counter))
 		return -ENOMEM;
+	if (tmp.num_counters == 0)
+		return -EINVAL;
 
 	tmp.name[sizeof(tmp.name) - 1] = 0;
 
@@ -2159,6 +2161,8 @@ static int compat_copy_ebt_replace_from_user(struct ebt_replace *repl,
 		return -ENOMEM;
 	if (tmp.num_counters >= INT_MAX / sizeof(struct ebt_counter))
 		return -ENOMEM;
+	if (tmp.num_counters == 0)
+		return -EINVAL;
 
 	memcpy(repl, &tmp, offsetof(struct ebt_replace, hook_entry));
 
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 13bfe84bf3ca..a61200754f4b 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -1075,6 +1075,9 @@ static int do_replace(struct net *net, const void __user *user,
 	/* overflow check */
 	if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
 		return -ENOMEM;
+	if (tmp.num_counters == 0)
+		return -EINVAL;
+
 	tmp.name[sizeof(tmp.name)-1] = 0;
 
 	newinfo = xt_alloc_table_info(tmp.size);
@@ -1499,6 +1502,9 @@ static int compat_do_replace(struct net *net, void __user *user,
 		return -ENOMEM;
 	if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
 		return -ENOMEM;
+	if (tmp.num_counters == 0)
+		return -EINVAL;
+
 	tmp.name[sizeof(tmp.name)-1] = 0;
 
 	newinfo = xt_alloc_table_info(tmp.size);
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index c69db7fa25ee..2d0e265fef6e 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1262,6 +1262,9 @@ do_replace(struct net *net, const void __user *user, unsigned int len)
 	/* overflow check */
 	if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
 		return -ENOMEM;
+	if (tmp.num_counters == 0)
+		return -EINVAL;
+
 	tmp.name[sizeof(tmp.name)-1] = 0;
 
 	newinfo = xt_alloc_table_info(tmp.size);
@@ -1809,6 +1812,9 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
 		return -ENOMEM;
 	if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
 		return -ENOMEM;
+	if (tmp.num_counters == 0)
+		return -EINVAL;
+
 	tmp.name[sizeof(tmp.name)-1] = 0;
 
 	newinfo = xt_alloc_table_info(tmp.size);
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 1a732a1d3c8e..62f5b0d0bc9b 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1275,6 +1275,9 @@ do_replace(struct net *net, const void __user *user, unsigned int len)
 	/* overflow check */
 	if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
 		return -ENOMEM;
+	if (tmp.num_counters == 0)
+		return -EINVAL;
+
 	tmp.name[sizeof(tmp.name)-1] = 0;
 
 	newinfo = xt_alloc_table_info(tmp.size);
@@ -1822,6 +1825,9 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
 		return -ENOMEM;
 	if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
 		return -ENOMEM;
+	if (tmp.num_counters == 0)
+		return -EINVAL;
+
 	tmp.name[sizeof(tmp.name)-1] = 0;
 
 	newinfo = xt_alloc_table_info(tmp.size);
-- 
cgit v1.2.3


From 35f1b4e96b9258a3668872b1139c51e5a23eb876 Mon Sep 17 00:00:00 2001
From: Michal Kubeček <mkubecek@suse.cz>
Date: Mon, 18 May 2015 20:53:55 +0200
Subject: ipv6: do not delete previously existing ECMP routes if add fails

If adding a nexthop of an IPv6 multipath route fails, comment in
ip6_route_multipath() says we are going to delete all nexthops already
added. However, current implementation deletes even the routes it
hasn't even tried to add yet. For example, running

  ip route add 1234:5678::/64 \
      nexthop via fe80::aa dev dummy1 \
      nexthop via fe80::bb dev dummy1 \
      nexthop via fe80::cc dev dummy1

twice results in removing all routes first command added.

Limit the second (delete) run to nexthops that succeeded in the first
(add) run.

Fixes: 51ebd3181572 ("ipv6: add support of equal cost multipath (ECMP)")
Signed-off-by: Michal Kubecek <mkubecek@suse.cz>
Acked-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/route.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index d3588885f097..3821a3517478 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2504,9 +2504,9 @@ static int ip6_route_multipath(struct fib6_config *cfg, int add)
 	int attrlen;
 	int err = 0, last_err = 0;
 
+	remaining = cfg->fc_mp_len;
 beginning:
 	rtnh = (struct rtnexthop *)cfg->fc_mp;
-	remaining = cfg->fc_mp_len;
 
 	/* Parse a Multipath Entry */
 	while (rtnh_ok(rtnh, remaining)) {
@@ -2536,6 +2536,7 @@ beginning:
 				 * next hops that have been already added.
 				 */
 				add = 0;
+				remaining = cfg->fc_mp_len - remaining;
 				goto beginning;
 			}
 		}
-- 
cgit v1.2.3


From 27596472473a02cfef2908a6bcda7e55264ba6b7 Mon Sep 17 00:00:00 2001
From: Michal Kubeček <mkubecek@suse.cz>
Date: Mon, 18 May 2015 20:54:00 +0200
Subject: ipv6: fix ECMP route replacement

When replacing an IPv6 multipath route with "ip route replace", i.e.
NLM_F_CREATE | NLM_F_REPLACE, fib6_add_rt2node() replaces only first
matching route without fixing its siblings, resulting in corrupted
siblings linked list; removing one of the siblings can then end in an
infinite loop.

IPv6 ECMP implementation is a bit different from IPv4 so that route
replacement cannot work in exactly the same way. This should be a
reasonable approximation:

1. If the new route is ECMP-able and there is a matching ECMP-able one
already, replace it and all its siblings (if any).

2. If the new route is ECMP-able and no matching ECMP-able route exists,
replace first matching non-ECMP-able (if any) or just add the new one.

3. If the new route is not ECMP-able, replace first matching
non-ECMP-able route (if any) or add the new route.

We also need to remove the NLM_F_REPLACE flag after replacing old
route(s) by first nexthop of an ECMP route so that each subsequent
nexthop does not replace previous one.

Fixes: 51ebd3181572 ("ipv6: add support of equal cost multipath (ECMP)")
Signed-off-by: Michal Kubecek <mkubecek@suse.cz>
Acked-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_fib.c | 39 +++++++++++++++++++++++++++++++++++++--
 net/ipv6/route.c   | 11 +++++++----
 2 files changed, 44 insertions(+), 6 deletions(-)

(limited to 'net/ipv6')

diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 96dbffff5a24..bde57b113009 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -693,6 +693,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
 {
 	struct rt6_info *iter = NULL;
 	struct rt6_info **ins;
+	struct rt6_info **fallback_ins = NULL;
 	int replace = (info->nlh &&
 		       (info->nlh->nlmsg_flags & NLM_F_REPLACE));
 	int add = (!info->nlh ||
@@ -716,8 +717,13 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
 			    (info->nlh->nlmsg_flags & NLM_F_EXCL))
 				return -EEXIST;
 			if (replace) {
-				found++;
-				break;
+				if (rt_can_ecmp == rt6_qualify_for_ecmp(iter)) {
+					found++;
+					break;
+				}
+				if (rt_can_ecmp)
+					fallback_ins = fallback_ins ?: ins;
+				goto next_iter;
 			}
 
 			if (iter->dst.dev == rt->dst.dev &&
@@ -753,9 +759,17 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
 		if (iter->rt6i_metric > rt->rt6i_metric)
 			break;
 
+next_iter:
 		ins = &iter->dst.rt6_next;
 	}
 
+	if (fallback_ins && !found) {
+		/* No ECMP-able route found, replace first non-ECMP one */
+		ins = fallback_ins;
+		iter = *ins;
+		found++;
+	}
+
 	/* Reset round-robin state, if necessary */
 	if (ins == &fn->leaf)
 		fn->rr_ptr = NULL;
@@ -815,6 +829,8 @@ add:
 		}
 
 	} else {
+		int nsiblings;
+
 		if (!found) {
 			if (add)
 				goto add;
@@ -835,8 +851,27 @@ add:
 			info->nl_net->ipv6.rt6_stats->fib_route_nodes++;
 			fn->fn_flags |= RTN_RTINFO;
 		}
+		nsiblings = iter->rt6i_nsiblings;
 		fib6_purge_rt(iter, fn, info->nl_net);
 		rt6_release(iter);
+
+		if (nsiblings) {
+			/* Replacing an ECMP route, remove all siblings */
+			ins = &rt->dst.rt6_next;
+			iter = *ins;
+			while (iter) {
+				if (rt6_qualify_for_ecmp(iter)) {
+					*ins = iter->dst.rt6_next;
+					fib6_purge_rt(iter, fn, info->nl_net);
+					rt6_release(iter);
+					nsiblings--;
+				} else {
+					ins = &iter->dst.rt6_next;
+				}
+				iter = *ins;
+			}
+			WARN_ON(nsiblings != 0);
+		}
 	}
 
 	return 0;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 3821a3517478..c73ae5039e46 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2541,11 +2541,14 @@ beginning:
 			}
 		}
 		/* Because each route is added like a single route we remove
-		 * this flag after the first nexthop (if there is a collision,
-		 * we have already fail to add the first nexthop:
-		 * fib6_add_rt2node() has reject it).
+		 * these flags after the first nexthop: if there is a collision,
+		 * we have already failed to add the first nexthop:
+		 * fib6_add_rt2node() has rejected it; when replacing, old
+		 * nexthops have been replaced by first new, the rest should
+		 * be added to it.
 		 */
-		cfg->fc_nlinfo.nlh->nlmsg_flags &= ~NLM_F_EXCL;
+		cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL |
+						     NLM_F_REPLACE);
 		rtnh = rtnh_next(rtnh, &remaining);
 	}
 
-- 
cgit v1.2.3