From 4df98e76cde7c64b5606d82584c65dda4151bd6a Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Mon, 16 Dec 2013 12:36:44 +0100 Subject: ipv6: pmtudisc setting not respected with UFO/CORK Sockets marked with IPV6_PMTUDISC_PROBE (or later IPV6_PMTUDISC_INTERFACE) don't respect this setting when the outgoing interface supports UFO. We had the same problem in IPv4, which was fixed in commit daba287b299ec7a2c61ae3a714920e90e8396ad5 ("ipv4: fix DO and PROBE pmtu mode regarding local fragmentation with UFO/CORK"). Also IPV6_DONTFRAG mode did not care about already corked data, thus it may generate a fragmented frame even if this socket option was specified. It also did not care about the length of the ipv6 header and possible options. In the error path allow the user to receive the pmtu notifications via both, rxpmtu method or error queue. The user may opted in for both, so deliver the notification to both error handlers (the handlers check if the error needs to be enqueued). Also report back consistent pmtu values when sending on an already cork-appended socket. Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 36 +++++++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 9 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 4acdb63495db..e6f931997996 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1193,11 +1193,35 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len + (opt ? opt->opt_nflen : 0); - maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr); + maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - + sizeof(struct frag_hdr); if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) { - if (cork->length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) { - ipv6_local_error(sk, EMSGSIZE, fl6, mtu-exthdrlen); + unsigned int maxnonfragsize, headersize; + + headersize = sizeof(struct ipv6hdr) + + (opt ? opt->tot_len : 0) + + (dst_allfrag(&rt->dst) ? + sizeof(struct frag_hdr) : 0) + + rt->rt6i_nfheader_len; + + maxnonfragsize = (np->pmtudisc >= IPV6_PMTUDISC_DO) ? + mtu : sizeof(struct ipv6hdr) + IPV6_MAXPLEN; + + /* dontfrag active */ + if ((cork->length + length > mtu - headersize) && dontfrag && + (sk->sk_protocol == IPPROTO_UDP || + sk->sk_protocol == IPPROTO_RAW)) { + ipv6_local_rxpmtu(sk, fl6, mtu - headersize + + sizeof(struct ipv6hdr)); + goto emsgsize; + } + + if (cork->length + length > maxnonfragsize - headersize) { +emsgsize: + ipv6_local_error(sk, EMSGSIZE, fl6, + mtu - headersize + + sizeof(struct ipv6hdr)); return -EMSGSIZE; } } @@ -1222,12 +1246,6 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, * --yoshfuji */ - if ((length > mtu) && dontfrag && (sk->sk_protocol == IPPROTO_UDP || - sk->sk_protocol == IPPROTO_RAW)) { - ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen); - return -EMSGSIZE; - } - skb = skb_peek_tail(&sk->sk_write_queue); cork->length += length; if (((length > mtu) || -- cgit v1.2.3 From 58a4782449c5882f61882396ef18cc34c7dc1269 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 16 Dec 2013 06:31:23 -0800 Subject: ipv6: sit: update mtu check to take care of gso packets While testing my changes for TSO support in SIT devices, I was using sit0 tunnel which appears to include nopmtudisc flag. But using : ip tun add sittun mode sit remote $REMOTE_IPV4 local $LOCAL_IPV4 \ dev $IFACE We get a tunnel which rejects too long packets because of the mtu check which is not yet GSO aware. erd:~# ip tunnel sittun: ipv6/ip remote 10.246.17.84 local 10.246.17.83 ttl inherit 6rd-prefix 2002::/16 sit0: ipv6/ip remote any local any ttl 64 nopmtudisc 6rd-prefix 2002::/16 This patch is based on an excellent report from Michal Shmidt. In the future, we probably want to extend the MTU check to do the right thing for GSO packets... Fixes: ("61c1db7fae21 ipv6: sit: add GSO/TSO support") Reported-by: Michal Schmidt Signed-off-by: Eric Dumazet Tested-by: Michal Schmidt Signed-off-by: David S. Miller --- net/ipv6/sit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 366fbba3359a..a710fdec42d3 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -924,7 +924,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, if (tunnel->parms.iph.daddr && skb_dst(skb)) skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); - if (skb->len > mtu) { + if (skb->len > mtu && !skb_is_gso(skb)) { icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); ip_rt_put(rt); goto tx_error; -- cgit v1.2.3 From 24f5b855e17df7e355eacd6c4a12cc4d6a6c9ff0 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Thu, 19 Dec 2013 12:40:26 +0800 Subject: ipv6: always set the new created dst's from in ip6_rt_copy ip6_rt_copy only sets dst.from if ort has flag RTF_ADDRCONF and RTF_DEFAULT. but the prefix routes which did get installed by hand locally can have an expiration, and no any flag combination which can ensure a potential from does never expire, so we should always set the new created dst's from. This also fixes the new created dst is always expired since the ort, which is created by RA, maybe has RTF_EXPIRES and RTF_ADDRCONF, but no RTF_DEFAULT. Suggested-by: Hannes Frederic Sowa CC: Gao feng Signed-off-by: Li RongQing Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/route.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index a0a48ac3403f..4b4944c3e4c4 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1905,9 +1905,7 @@ static struct rt6_info *ip6_rt_copy(struct rt6_info *ort, else rt->rt6i_gateway = *dest; rt->rt6i_flags = ort->rt6i_flags; - if ((ort->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) == - (RTF_DEFAULT | RTF_ADDRCONF)) - rt6_set_from(rt, ort); + rt6_set_from(rt, ort); rt->rt6i_metric = 0; #ifdef CONFIG_IPV6_SUBTREES -- cgit v1.2.3 From 6a9eadccff2926e392173a989042f14c867cffbf Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Fri, 20 Dec 2013 17:20:12 +0800 Subject: ipv6: release dst properly in ipip6_tunnel_xmit if a dst is not attached to anywhere, it should be released before exit ipip6_tunnel_xmit, otherwise cause dst memory leakage. Fixes: 61c1db7fae21 ("ipv6: sit: add GSO/TSO support") Signed-off-by: Li RongQing Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/sit.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index a710fdec42d3..c87482252577 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -966,8 +966,10 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, tos = INET_ECN_encapsulate(tos, ipv6_get_dsfield(iph6)); skb = iptunnel_handle_offloads(skb, false, SKB_GSO_SIT); - if (IS_ERR(skb)) + if (IS_ERR(skb)) { + ip_rt_put(rt); goto out; + } err = iptunnel_xmit(rt, skb, fl4.saddr, fl4.daddr, IPPROTO_IPV6, tos, ttl, df, !net_eq(tunnel->net, dev_net(dev))); -- cgit v1.2.3 From c3ac17cd6af2687d5881184edd310a5f9c4baa98 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Thu, 2 Jan 2014 08:49:36 +0800 Subject: ipv6: fix the use of pcpu_tstats in sit when read/write the 64bit data, the correct lock should be hold. Signed-off-by: Li RongQing Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/sit.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net/ipv6') diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index c87482252577..d3005b34476a 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -702,8 +702,10 @@ static int ipip6_rcv(struct sk_buff *skb) } tstats = this_cpu_ptr(tunnel->dev->tstats); + u64_stats_update_begin(&tstats->syncp); tstats->rx_packets++; tstats->rx_bytes += skb->len; + u64_stats_update_end(&tstats->syncp); netif_rx(skb); -- cgit v1.2.3 From fad8da3e085ddf5e661090033287f1a5d62858fc Mon Sep 17 00:00:00 2001 From: Yasushi Asano Date: Tue, 31 Dec 2013 12:04:19 +0900 Subject: ipv6 addrconf: fix preferred lifetime state-changing behavior while valid_lft is infinity Fixed a problem with setting the lifetime of an IPv6 address. When setting preferred_lft to a value not zero or infinity, while valid_lft is infinity(0xffffffff) preferred lifetime is set to forever and does not update. Therefore preferred lifetime never becomes deprecated. valid lifetime and preferred lifetime should be set independently, even if valid lifetime is infinity, preferred lifetime must expire correctly (meaning it must eventually become deprecated) Signed-off-by: Yasushi Asano Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index d5fa5b8c443e..1a341f74aa3f 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3456,7 +3456,12 @@ restart: &inet6_addr_lst[i], addr_lst) { unsigned long age; - if (ifp->flags & IFA_F_PERMANENT) + /* When setting preferred_lft to a value not zero or + * infinity, while valid_lft is infinity + * IFA_F_PERMANENT has a non-infinity life time. + */ + if ((ifp->flags & IFA_F_PERMANENT) && + (ifp->prefered_lft == INFINITY_LIFE_TIME)) continue; spin_lock(&ifp->lock); @@ -3481,7 +3486,8 @@ restart: ifp->flags |= IFA_F_DEPRECATED; } - if (time_before(ifp->tstamp + ifp->valid_lft * HZ, next)) + if ((ifp->valid_lft != INFINITY_LIFE_TIME) && + (time_before(ifp->tstamp + ifp->valid_lft * HZ, next))) next = ifp->tstamp + ifp->valid_lft * HZ; spin_unlock(&ifp->lock); @@ -3761,7 +3767,8 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, put_ifaddrmsg(nlh, ifa->prefix_len, ifa->flags, rt_scope(ifa->scope), ifa->idev->dev->ifindex); - if (!(ifa->flags&IFA_F_PERMANENT)) { + if (!((ifa->flags&IFA_F_PERMANENT) && + (ifa->prefered_lft == INFINITY_LIFE_TIME))) { preferred = ifa->prefered_lft; valid = ifa->valid_lft; if (preferred != INFINITY_LIFE_TIME) { -- cgit v1.2.3 From abb6013cca147ad940b0e9fee260d2d9e93b7018 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Thu, 2 Jan 2014 13:20:12 +0800 Subject: ipv6: fix the use of pcpu_tstats in ip6_tunnel when read/write the 64bit data, the correct lock should be hold. Fixes: 87b6d218f3adb ("tunnel: implement 64 bits statistics") Cc: Stephen Hemminger Cc: Eric Dumazet Signed-off-by: Li RongQing Signed-off-by: David S. Miller --- net/ipv6/ip6_tunnel.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index d6062325db08..7881965a8248 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -103,16 +103,25 @@ struct ip6_tnl_net { static struct net_device_stats *ip6_get_stats(struct net_device *dev) { - struct pcpu_tstats sum = { 0 }; + struct pcpu_tstats tmp, sum = { 0 }; int i; for_each_possible_cpu(i) { + unsigned int start; const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i); - sum.rx_packets += tstats->rx_packets; - sum.rx_bytes += tstats->rx_bytes; - sum.tx_packets += tstats->tx_packets; - sum.tx_bytes += tstats->tx_bytes; + do { + start = u64_stats_fetch_begin_bh(&tstats->syncp); + tmp.rx_packets = tstats->rx_packets; + tmp.rx_bytes = tstats->rx_bytes; + tmp.tx_packets = tstats->tx_packets; + tmp.tx_bytes = tstats->tx_bytes; + } while (u64_stats_fetch_retry_bh(&tstats->syncp, start)); + + sum.rx_packets += tmp.rx_packets; + sum.rx_bytes += tmp.rx_bytes; + sum.tx_packets += tmp.tx_packets; + sum.tx_bytes += tmp.tx_bytes; } dev->stats.rx_packets = sum.rx_packets; dev->stats.rx_bytes = sum.rx_bytes; @@ -824,8 +833,10 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol, } tstats = this_cpu_ptr(t->dev->tstats); + u64_stats_update_begin(&tstats->syncp); tstats->rx_packets++; tstats->rx_bytes += skb->len; + u64_stats_update_end(&tstats->syncp); netif_rx(skb); -- cgit v1.2.3 From 469bdcefdc47a69028029e792ff1e80680c867b9 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Thu, 2 Jan 2014 14:24:36 +0800 Subject: ipv6: fix the use of pcpu_tstats in ip6_vti.c when read/write the 64bit data, the correct lock should be hold. and we can use the generic vti6_get_stats to return stats, and not define a new one in ip6_vti.c Fixes: 87b6d218f3adb ("tunnel: implement 64 bits statistics") Cc: Stephen Hemminger Cc: Eric Dumazet Signed-off-by: Li RongQing Signed-off-by: David S. Miller --- net/ipv6/ip6_vti.c | 24 +++--------------------- 1 file changed, 3 insertions(+), 21 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index ed94ba61dda0..a4564b05c47b 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -75,26 +75,6 @@ struct vti6_net { struct ip6_tnl __rcu **tnls[2]; }; -static struct net_device_stats *vti6_get_stats(struct net_device *dev) -{ - struct pcpu_tstats sum = { 0 }; - int i; - - for_each_possible_cpu(i) { - const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i); - - sum.rx_packets += tstats->rx_packets; - sum.rx_bytes += tstats->rx_bytes; - sum.tx_packets += tstats->tx_packets; - sum.tx_bytes += tstats->tx_bytes; - } - dev->stats.rx_packets = sum.rx_packets; - dev->stats.rx_bytes = sum.rx_bytes; - dev->stats.tx_packets = sum.tx_packets; - dev->stats.tx_bytes = sum.tx_bytes; - return &dev->stats; -} - #define for_each_vti6_tunnel_rcu(start) \ for (t = rcu_dereference(start); t; t = rcu_dereference(t->next)) @@ -331,8 +311,10 @@ static int vti6_rcv(struct sk_buff *skb) } tstats = this_cpu_ptr(t->dev->tstats); + u64_stats_update_begin(&tstats->syncp); tstats->rx_packets++; tstats->rx_bytes += skb->len; + u64_stats_update_end(&tstats->syncp); skb->mark = 0; secpath_reset(skb); @@ -716,7 +698,7 @@ static const struct net_device_ops vti6_netdev_ops = { .ndo_start_xmit = vti6_tnl_xmit, .ndo_do_ioctl = vti6_ioctl, .ndo_change_mtu = vti6_change_mtu, - .ndo_get_stats = vti6_get_stats, + .ndo_get_stats64 = ip_tunnel_get_stats64, }; /** -- cgit v1.2.3 From 88ad31491e21f5dec347911d9804c673af414a09 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Mon, 6 Jan 2014 17:53:14 +0100 Subject: ipv6: don't install anycast address for /128 addresses on routers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It does not make sense to create an anycast address for an /128-prefix. Suppress it. As 32019e651c6fce ("ipv6: Do not leave router anycast address for /127 prefixes.") shows we also may not leave them, because we could accidentally remove an anycast address the user has allocated or got added via another prefix. Cc: François-Xavier Le Bail Cc: Thomas Haller Cc: Jiri Pirko Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 1a341f74aa3f..f62c72b59f8e 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1671,7 +1671,7 @@ void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr) static void addrconf_join_anycast(struct inet6_ifaddr *ifp) { struct in6_addr addr; - if (ifp->prefix_len == 127) /* RFC 6164 */ + if (ifp->prefix_len >= 127) /* RFC 6164 */ return; ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len); if (ipv6_addr_any(&addr)) @@ -1682,7 +1682,7 @@ static void addrconf_join_anycast(struct inet6_ifaddr *ifp) static void addrconf_leave_anycast(struct inet6_ifaddr *ifp) { struct in6_addr addr; - if (ifp->prefix_len == 127) /* RFC 6164 */ + if (ifp->prefix_len >= 127) /* RFC 6164 */ return; ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len); if (ipv6_addr_any(&addr)) -- cgit v1.2.3