diff options
author | David S. Miller <davem@davemloft.net> | 2016-06-15 12:34:34 -0700 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2016-06-15 12:34:34 -0700 |
commit | a6e225cad3659ac904c81dbbbc9b5725a3d67ae2 (patch) | |
tree | 35a4899be648232e3a96c47220f08414eb5fd6c3 | |
parent | c9ad5a6568fb9b81c8fc10bd97867def79d2e41d (diff) | |
parent | 9ff74384600aeecba34ebdacbbde0627489ff601 (diff) | |
download | linux-a6e225cad3659ac904c81dbbbc9b5725a3d67ae2.tar.bz2 |
Merge branch 'vrf-ipv6-mcast-link-local'
David Ahern says:
====================
net: vrf: Handle ipv6 multicast and link-local addresses
IPv6 multicast and link-local addresses require special handling by the
VRF driver. Rather than using the VRF device index and full FIB lookups,
packets to/from these addresses should use direct FIB lookups based on
the VRF device table.
Multicast routes do not make sense for the L3 master device directly.
Accordingly, do not add mcast routes for the device, and the VRF driver
should fail attempts to send packets to ipv6 mcast addresses on the
device (e.g, ping6 ff02::1%<vrf> should fail)
With this change connections into and out of a VRF enslaved device work
for multicast and link-local addresses (icmp, tcp, and udp). e.g.,
1. packets into VM with VRF config:
ping6 -c3 fe80::e0:f9ff:fe1c:b974%br1
ping6 -c3 ff02::1%br1
ssh -6 fe80::e0:f9ff:fe1c:b974%br1
2. packets going out a VRF enslaved device:
ping6 -c3 fe80::18f8:83ff:fe4b:7a2e%eth1
ping6 -c3 ff02::1%eth1
ssh -6 root@fe80::18f8:83ff:fe4b:7a2e%eth1
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r-- | drivers/net/vrf.c | 100 | ||||
-rw-r--r-- | include/net/ip6_route.h | 2 | ||||
-rw-r--r-- | include/net/l3mdev.h | 6 | ||||
-rw-r--r-- | net/ipv6/addrconf.c | 2 | ||||
-rw-r--r-- | net/ipv6/icmp.c | 2 | ||||
-rw-r--r-- | net/ipv6/route.c | 5 | ||||
-rw-r--r-- | net/l3mdev/l3mdev.c | 2 |
7 files changed, 105 insertions, 14 deletions
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index b4d746943bc5..0b5b3c258c2b 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -785,9 +785,63 @@ out: return rc; } +static struct rt6_info *vrf_ip6_route_lookup(struct net *net, + const struct net_device *dev, + struct flowi6 *fl6, + int ifindex, + int flags) +{ + struct net_vrf *vrf = netdev_priv(dev); + struct fib6_table *table = NULL; + struct rt6_info *rt6; + + rcu_read_lock(); + + /* fib6_table does not have a refcnt and can not be freed */ + rt6 = rcu_dereference(vrf->rt6); + if (likely(rt6)) + table = rt6->rt6i_table; + + rcu_read_unlock(); + + if (!table) + return NULL; + + return ip6_pol_route(net, table, ifindex, fl6, flags); +} + +static void vrf_ip6_input_dst(struct sk_buff *skb, struct net_device *vrf_dev, + int ifindex) +{ + const struct ipv6hdr *iph = ipv6_hdr(skb); + struct flowi6 fl6 = { + .daddr = iph->daddr, + .saddr = iph->saddr, + .flowlabel = ip6_flowinfo(iph), + .flowi6_mark = skb->mark, + .flowi6_proto = iph->nexthdr, + .flowi6_iif = ifindex, + }; + struct net *net = dev_net(vrf_dev); + struct rt6_info *rt6; + + rt6 = vrf_ip6_route_lookup(net, vrf_dev, &fl6, ifindex, + RT6_LOOKUP_F_HAS_SADDR | RT6_LOOKUP_F_IFACE); + if (unlikely(!rt6)) + return; + + if (unlikely(&rt6->dst == &net->ipv6.ip6_null_entry->dst)) + return; + + skb_dst_set(skb, &rt6->dst); +} + static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev, struct sk_buff *skb) { + int orig_iif = skb->skb_iif; + bool need_strict; + /* loopback traffic; do not push through packet taps again. * Reset pkt_type for upper layers to process skb */ @@ -798,8 +852,11 @@ static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev, goto out; } - /* if packet is NDISC keep the ingress interface */ - if (!ipv6_ndisc_frame(skb)) { + /* if packet is NDISC or addressed to multicast or link-local + * then keep the ingress interface + */ + need_strict = rt6_need_strict(&ipv6_hdr(skb)->daddr); + if (!ipv6_ndisc_frame(skb) && !need_strict) { skb->dev = vrf_dev; skb->skb_iif = vrf_dev->ifindex; @@ -810,6 +867,9 @@ static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev, IP6CB(skb)->flags |= IP6SKB_L3SLAVE; } + if (need_strict) + vrf_ip6_input_dst(skb, vrf_dev, orig_iif); + out: return skb; } @@ -861,13 +921,37 @@ static struct sk_buff *vrf_l3_rcv(struct net_device *vrf_dev, #if IS_ENABLED(CONFIG_IPV6) static struct dst_entry *vrf_get_rt6_dst(const struct net_device *dev, - const struct flowi6 *fl6) + struct flowi6 *fl6) { + bool need_strict = rt6_need_strict(&fl6->daddr); + struct net_vrf *vrf = netdev_priv(dev); + struct net *net = dev_net(dev); struct dst_entry *dst = NULL; + struct rt6_info *rt; - if (!(fl6->flowi6_flags & FLOWI_FLAG_L3MDEV_SRC)) { - struct net_vrf *vrf = netdev_priv(dev); - struct rt6_info *rt; + /* send to link-local or multicast address */ + if (need_strict) { + int flags = RT6_LOOKUP_F_IFACE; + + /* VRF device does not have a link-local address and + * sending packets to link-local or mcast addresses over + * a VRF device does not make sense + */ + if (fl6->flowi6_oif == dev->ifindex) { + struct dst_entry *dst = &net->ipv6.ip6_null_entry->dst; + + dst_hold(dst); + return dst; + } + + if (!ipv6_addr_any(&fl6->saddr)) + flags |= RT6_LOOKUP_F_HAS_SADDR; + + rt = vrf_ip6_route_lookup(net, dev, fl6, fl6->flowi6_oif, flags); + if (rt) + dst = &rt->dst; + + } else if (!(fl6->flowi6_flags & FLOWI_FLAG_L3MDEV_SRC)) { rcu_read_lock(); @@ -880,6 +964,10 @@ static struct dst_entry *vrf_get_rt6_dst(const struct net_device *dev, rcu_read_unlock(); } + /* make sure oif is set to VRF device for lookup */ + if (!need_strict) + fl6->flowi6_oif = dev->ifindex; + return dst; } #endif diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 54c779416eec..f55bf3d294aa 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -76,6 +76,8 @@ static inline struct dst_entry *ip6_route_output(struct net *net, struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6, int flags); +struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, + int ifindex, struct flowi6 *fl6, int flags); int ip6_route_init(void); void ip6_route_cleanup(void); diff --git a/include/net/l3mdev.h b/include/net/l3mdev.h index 34f33eb96a5e..f8a416ec674c 100644 --- a/include/net/l3mdev.h +++ b/include/net/l3mdev.h @@ -38,7 +38,7 @@ struct l3mdev_ops { /* IPv6 ops */ struct dst_entry * (*l3mdev_get_rt6_dst)(const struct net_device *dev, - const struct flowi6 *fl6); + struct flowi6 *fl6); }; #ifdef CONFIG_NET_L3_MASTER_DEV @@ -139,7 +139,7 @@ static inline bool netif_index_is_l3_master(struct net *net, int ifindex) int l3mdev_get_saddr(struct net *net, int ifindex, struct flowi4 *fl4); -struct dst_entry *l3mdev_get_rt6_dst(struct net *net, const struct flowi6 *fl6); +struct dst_entry *l3mdev_get_rt6_dst(struct net *net, struct flowi6 *fl6); static inline struct sk_buff *l3mdev_l3_rcv(struct sk_buff *skb, u16 proto) @@ -225,7 +225,7 @@ static inline int l3mdev_get_saddr(struct net *net, int ifindex, } static inline -struct dst_entry *l3mdev_get_rt6_dst(struct net *net, const struct flowi6 *fl6) +struct dst_entry *l3mdev_get_rt6_dst(struct net *net, struct flowi6 *fl6) { return NULL; } diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 47f837a58e0a..b12553905e42 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2254,7 +2254,7 @@ static struct inet6_dev *addrconf_add_dev(struct net_device *dev) return ERR_PTR(-EACCES); /* Add default multicast route */ - if (!(dev->flags & IFF_LOOPBACK)) + if (!(dev->flags & IFF_LOOPBACK) && !netif_is_l3_master(dev)) addrconf_add_mroute(dev); return idev; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 40454bfb534e..e32a72fb9982 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -587,7 +587,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) fl6.daddr = ipv6_hdr(skb)->saddr; if (saddr) fl6.saddr = *saddr; - fl6.flowi6_oif = l3mdev_fib_oif(skb->dev); + fl6.flowi6_oif = skb->dev->ifindex; fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY; fl6.flowi6_mark = mark; security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index c6ae6f9b5fe3..d51a1a48b839 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1042,8 +1042,8 @@ static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt) return pcpu_rt; } -static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif, - struct flowi6 *fl6, int flags) +struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, + int oif, struct flowi6 *fl6, int flags) { struct fib6_node *fn, *saved_fn; struct rt6_info *rt; @@ -1139,6 +1139,7 @@ redo_rt6_select: } } +EXPORT_SYMBOL_GPL(ip6_pol_route); static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table, struct flowi6 *fl6, int flags) diff --git a/net/l3mdev/l3mdev.c b/net/l3mdev/l3mdev.c index 7da97809a7e8..d90e4ef09e85 100644 --- a/net/l3mdev/l3mdev.c +++ b/net/l3mdev/l3mdev.c @@ -108,7 +108,7 @@ EXPORT_SYMBOL_GPL(l3mdev_fib_table_by_index); */ struct dst_entry *l3mdev_get_rt6_dst(struct net *net, - const struct flowi6 *fl6) + struct flowi6 *fl6) { struct dst_entry *dst = NULL; struct net_device *dev; |