diff options
author | Martin KaFai Lau <kafai@fb.com> | 2015-04-28 13:03:06 -0700 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2015-05-01 20:57:06 -0400 |
commit | 4b32b5ad31a68a661f761c76dfd0d076636d3ae9 (patch) | |
tree | 7dfc55612d61e05a3ce15260a6727519eaeeb571 | |
parent | 653437d02f1f12d528c290e64f6dc54be1224db2 (diff) | |
download | linux-4b32b5ad31a68a661f761c76dfd0d076636d3ae9.tar.bz2 |
ipv6: Stop rt6_info from using inet_peer's metrics
inet_peer is indexed by the dst address alone. However, the fib6 tree
could have multiple routing entries (rt6_info) for the same dst. For
example,
1. A /128 dst via multiple gateways.
2. A RTF_CACHE route cloned from a /128 route.
In the above cases, all of them will share the same metrics and
step on each other.
This patch will steer away from inet_peer's metrics and use
dst_cow_metrics_generic() for everything.
Change Highlights:
1. Remove rt6_cow_metrics() which currently acquires metrics from
inet_peer for DST_HOST route (i.e. /128 route).
2. Add rt6i_pmtu to take care of the pmtu update to avoid creating a
full size metrics just to override the RTAX_MTU.
3. After (2), the RTF_CACHE route can also share the metrics with its
dst.from route, by:
dst_init_metrics(&cache_rt->dst, dst_metrics_ptr(cache_rt->dst.from), true);
4. Stop creating RTF_CACHE route by cloning another RTF_CACHE route. Instead,
directly clone from rt->dst.
[ Currently, cloning from another RTF_CACHE is only possible during
rt6_do_redirect(). Also, the old clone is removed from the tree
immediately after the new clone is added. ]
In case of cloning from an older redirect RTF_CACHE, it should work as
before.
In case of cloning from an older pmtu RTF_CACHE, this patch will forget
the pmtu and re-learn it (if there is any) from the redirected route.
The _rt6i_peer and DST_METRICS_FORCE_OVERWRITE will be removed
in the next cleanup patch.
Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Reviewed-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Cc: Steffen Klassert <steffen.klassert@secunet.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | include/net/ip6_fib.h | 10 | ||||
-rw-r--r-- | net/ipv6/route.c | 102 |
2 files changed, 60 insertions, 52 deletions
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 20e80fa7bbdd..7383a8cc8f84 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -124,6 +124,7 @@ struct rt6_info { unsigned long _rt6i_peer; u32 rt6i_metric; + u32 rt6i_pmtu; /* more non-fragment space at head required */ unsigned short rt6i_nfheader_len; u8 rt6i_protocol; @@ -189,15 +190,6 @@ static inline void rt6_update_expires(struct rt6_info *rt0, int timeout) rt0->rt6i_flags |= RTF_EXPIRES; } -static inline void rt6_set_from(struct rt6_info *rt, struct rt6_info *from) -{ - struct dst_entry *new = (struct dst_entry *) from; - - rt->rt6i_flags &= ~RTF_EXPIRES; - dst_hold(new); - rt->dst.from = new; -} - static inline void ip6_rt_put(struct rt6_info *rt) { /* dst_release() accepts a NULL parameter. diff --git a/net/ipv6/route.c b/net/ipv6/route.c index aa4cfdd13f28..4d6eb5d90cb4 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -92,6 +92,7 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, u32 mtu); static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb); +static void rt6_dst_from_metrics_check(struct rt6_info *rt); static int rt6_score_route(struct rt6_info *rt, int oif, int strict); #ifdef CONFIG_IPV6_ROUTE_INFO @@ -136,33 +137,12 @@ static struct inet_peer *rt6_get_peer_create(struct rt6_info *rt) static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old) { - struct rt6_info *rt = (struct rt6_info *) dst; - struct inet_peer *peer; - u32 *p = NULL; + struct rt6_info *rt = (struct rt6_info *)dst; - if (!(rt->dst.flags & DST_HOST)) + if (rt->rt6i_flags & RTF_CACHE) + return NULL; + else return dst_cow_metrics_generic(dst, old); - - peer = rt6_get_peer_create(rt); - if (peer) { - u32 *old_p = __DST_METRICS_PTR(old); - unsigned long prev, new; - - p = peer->metrics; - if (inet_metrics_new(peer) || - (old & DST_METRICS_FORCE_OVERWRITE)) - memcpy(p, old_p, sizeof(u32) * RTAX_MAX); - - new = (unsigned long) p; - prev = cmpxchg(&dst->_metrics, old, new); - - if (prev != old) { - p = __DST_METRICS_PTR(prev); - if (prev & DST_METRICS_READ_ONLY) - p = NULL; - } - } - return p; } static inline const void *choose_neigh_daddr(struct rt6_info *rt, @@ -323,8 +303,7 @@ static void ip6_dst_destroy(struct dst_entry *dst) struct inet6_dev *idev = rt->rt6i_idev; struct dst_entry *from = dst->from; - if (!(rt->dst.flags & DST_HOST)) - dst_destroy_metrics_generic(dst); + dst_destroy_metrics_generic(dst); if (idev) { rt->rt6i_idev = NULL; @@ -333,11 +312,6 @@ static void ip6_dst_destroy(struct dst_entry *dst) dst->from = NULL; dst_release(from); - - if (rt6_has_peer(rt)) { - struct inet_peer *peer = rt6_peer_ptr(rt); - inet_putpeer(peer); - } } static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, @@ -1003,6 +977,7 @@ redo_rt6_select: goto redo_fib6_lookup_lock; out2: + rt6_dst_from_metrics_check(rt); rt->dst.lastuse = jiffies; rt->dst.__use++; @@ -1111,6 +1086,13 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori * Destination cache support functions */ +static void rt6_dst_from_metrics_check(struct rt6_info *rt) +{ + if (rt->dst.from && + dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from)) + dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true); +} + static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) { struct rt6_info *rt; @@ -1127,6 +1109,8 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) if (rt6_check_expired(rt)) return NULL; + rt6_dst_from_metrics_check(rt); + return dst; } @@ -1179,7 +1163,7 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; - dst_metric_set(dst, RTAX_MTU, mtu); + rt6->rt6i_pmtu = mtu; rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires); } } @@ -1359,12 +1343,17 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst) static unsigned int ip6_mtu(const struct dst_entry *dst) { + const struct rt6_info *rt = (const struct rt6_info *)dst; + unsigned int mtu = rt->rt6i_pmtu; struct inet6_dev *idev; - unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); if (mtu) goto out; + mtu = dst_metric_raw(dst, RTAX_MTU); + if (mtu) + goto out; + mtu = IPV6_MIN_MTU; rcu_read_lock(); @@ -1947,12 +1936,27 @@ out: * Misc support functions */ +static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from) +{ + BUG_ON(from->dst.from); + + rt->rt6i_flags &= ~RTF_EXPIRES; + dst_hold(&from->dst); + rt->dst.from = &from->dst; + dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true); +} + static struct rt6_info *ip6_rt_copy(struct rt6_info *ort, const struct in6_addr *dest) { struct net *net = dev_net(ort->dst.dev); - struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0, - ort->rt6i_table); + struct rt6_info *rt; + + if (ort->rt6i_flags & RTF_CACHE) + ort = (struct rt6_info *)ort->dst.from; + + rt = ip6_dst_alloc(net, ort->dst.dev, 0, + ort->rt6i_table); if (rt) { rt->dst.input = ort->dst.input; @@ -1961,7 +1965,6 @@ static struct rt6_info *ip6_rt_copy(struct rt6_info *ort, rt->rt6i_dst.addr = *dest; rt->rt6i_dst.plen = 128; - dst_copy_metrics(&rt->dst, &ort->dst); rt->dst.error = ort->dst.error; rt->rt6i_idev = ort->rt6i_idev; if (rt->rt6i_idev) @@ -2393,11 +2396,20 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) PMTU discouvery. */ if (rt->dst.dev == arg->dev && - !dst_metric_locked(&rt->dst, RTAX_MTU) && - (dst_mtu(&rt->dst) >= arg->mtu || - (dst_mtu(&rt->dst) < arg->mtu && - dst_mtu(&rt->dst) == idev->cnf.mtu6))) { - dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu); + !dst_metric_locked(&rt->dst, RTAX_MTU)) { + if (rt->rt6i_flags & RTF_CACHE) { + /* For RTF_CACHE with rt6i_pmtu == 0 + * (i.e. a redirected route), + * the metrics of its rt->dst.from has already + * been updated. + */ + if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu) + rt->rt6i_pmtu = arg->mtu; + } else if (dst_mtu(&rt->dst) >= arg->mtu || + (dst_mtu(&rt->dst) < arg->mtu && + dst_mtu(&rt->dst) == idev->cnf.mtu6)) { + dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu); + } } return 0; } @@ -2627,6 +2639,7 @@ static int rt6_fill_node(struct net *net, int iif, int type, u32 portid, u32 seq, int prefix, int nowait, unsigned int flags) { + u32 metrics[RTAX_MAX]; struct rtmsg *rtm; struct nlmsghdr *nlh; long expires; @@ -2740,7 +2753,10 @@ static int rt6_fill_node(struct net *net, goto nla_put_failure; } - if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0) + memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics)); + if (rt->rt6i_pmtu) + metrics[RTAX_MTU - 1] = rt->rt6i_pmtu; + if (rtnetlink_put_metrics(skb, metrics) < 0) goto nla_put_failure; if (rt->rt6i_flags & RTF_GATEWAY) { |