summaryrefslogtreecommitdiffstats
path: root/drivers/net
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2020-08-04 13:01:46 -0700
committerDavid S. Miller <davem@davemloft.net>2020-08-04 13:01:46 -0700
commit2ac24d6d685cb1c0e0fbb189dc607b05720fb5d1 (patch)
tree49b6306abbc5fbf8d4a25b42b46ef263fbdde33d /drivers/net
parentcabf06e5a275dde2a336c71536465b30ccf2ae4d (diff)
parent7b53682c94032a7a7adec400400c65d0af7fea5a (diff)
downloadlinux-2ac24d6d685cb1c0e0fbb189dc607b05720fb5d1.tar.bz2
Merge branch 'Support-PMTU-discovery-with-bridged-UDP-tunnels'
Stefano Brivio says: ==================== Support PMTU discovery with bridged UDP tunnels Currently, PMTU discovery for UDP tunnels only works if packets are routed to the encapsulating interfaces, not bridged. This results from the fact that we generally don't have valid routes to the senders we can use to relay ICMP and ICMPv6 errors, and makes PMTU discovery completely non-functional for VXLAN and GENEVE ports of both regular bridges and Open vSwitch instances. If the sender is local, and packets are forwarded to the port by a regular bridge, all it takes is to generate a corresponding route exception on the encapsulating device. The bridge then finds the route exception carrying the PMTU value estimate as it forwards frames, and relays ICMP messages back to the socket of the local sender. Patch 1/6 fixes this case. If the sender resides on another node, we actually need to reply to IP and IPv6 packets ourselves and send these ICMP or ICMPv6 errors back, using the same encapsulating device. Patch 2/6, based on an original idea by Florian Westphal, adds the needed functionality, while patches 3/6 and 4/6 add matching support for VXLAN and GENEVE. Finally, 5/6 and 6/6 introduce selftests for all combinations of inner and outer IP versions, covering both VXLAN and GENEVE, with both regular bridges and Open vSwitch instances. v2: Add helper to check for any bridge port, skip oif check for PMTU routes for bridge ports only, split IPv4 and IPv6 helpers and functions (all suggested by David Ahern) ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net')
-rw-r--r--drivers/net/bareudp.c5
-rw-r--r--drivers/net/geneve.c55
-rw-r--r--drivers/net/vxlan.c47
3 files changed, 95 insertions, 12 deletions
diff --git a/drivers/net/bareudp.c b/drivers/net/bareudp.c
index 3b6664c7e73c..841910f1db65 100644
--- a/drivers/net/bareudp.c
+++ b/drivers/net/bareudp.c
@@ -308,7 +308,7 @@ static int bareudp_xmit_skb(struct sk_buff *skb, struct net_device *dev,
return PTR_ERR(rt);
skb_tunnel_check_pmtu(skb, &rt->dst,
- BAREUDP_IPV4_HLEN + info->options_len);
+ BAREUDP_IPV4_HLEN + info->options_len, false);
sport = udp_flow_src_port(bareudp->net, skb,
bareudp->sport_min, USHRT_MAX,
@@ -369,7 +369,8 @@ static int bareudp6_xmit_skb(struct sk_buff *skb, struct net_device *dev,
if (IS_ERR(dst))
return PTR_ERR(dst);
- skb_tunnel_check_pmtu(skb, dst, BAREUDP_IPV6_HLEN + info->options_len);
+ skb_tunnel_check_pmtu(skb, dst, BAREUDP_IPV6_HLEN + info->options_len,
+ false);
sport = udp_flow_src_port(bareudp->net, skb,
bareudp->sport_min, USHRT_MAX,
diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c
index 017c13acc911..c71f994fbc73 100644
--- a/drivers/net/geneve.c
+++ b/drivers/net/geneve.c
@@ -893,8 +893,31 @@ static int geneve_xmit_skb(struct sk_buff *skb, struct net_device *dev,
if (IS_ERR(rt))
return PTR_ERR(rt);
- skb_tunnel_check_pmtu(skb, &rt->dst,
- GENEVE_IPV4_HLEN + info->options_len);
+ err = skb_tunnel_check_pmtu(skb, &rt->dst,
+ GENEVE_IPV4_HLEN + info->options_len,
+ netif_is_any_bridge_port(dev));
+ if (err < 0) {
+ dst_release(&rt->dst);
+ return err;
+ } else if (err) {
+ struct ip_tunnel_info *info;
+
+ info = skb_tunnel_info(skb);
+ if (info) {
+ info->key.u.ipv4.dst = fl4.saddr;
+ info->key.u.ipv4.src = fl4.daddr;
+ }
+
+ if (!pskb_may_pull(skb, ETH_HLEN)) {
+ dst_release(&rt->dst);
+ return -EINVAL;
+ }
+
+ skb->protocol = eth_type_trans(skb, geneve->dev);
+ netif_rx(skb);
+ dst_release(&rt->dst);
+ return -EMSGSIZE;
+ }
sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true);
if (geneve->cfg.collect_md) {
@@ -955,7 +978,30 @@ static int geneve6_xmit_skb(struct sk_buff *skb, struct net_device *dev,
if (IS_ERR(dst))
return PTR_ERR(dst);
- skb_tunnel_check_pmtu(skb, dst, GENEVE_IPV6_HLEN + info->options_len);
+ err = skb_tunnel_check_pmtu(skb, dst,
+ GENEVE_IPV6_HLEN + info->options_len,
+ netif_is_any_bridge_port(dev));
+ if (err < 0) {
+ dst_release(dst);
+ return err;
+ } else if (err) {
+ struct ip_tunnel_info *info = skb_tunnel_info(skb);
+
+ if (info) {
+ info->key.u.ipv6.dst = fl6.saddr;
+ info->key.u.ipv6.src = fl6.daddr;
+ }
+
+ if (!pskb_may_pull(skb, ETH_HLEN)) {
+ dst_release(dst);
+ return -EINVAL;
+ }
+
+ skb->protocol = eth_type_trans(skb, geneve->dev);
+ netif_rx(skb);
+ dst_release(dst);
+ return -EMSGSIZE;
+ }
sport = udp_flow_src_port(geneve->net, skb, 1, USHRT_MAX, true);
if (geneve->cfg.collect_md) {
@@ -1012,7 +1058,8 @@ static netdev_tx_t geneve_xmit(struct sk_buff *skb, struct net_device *dev)
if (likely(!err))
return NETDEV_TX_OK;
- dev_kfree_skb(skb);
+ if (err != -EMSGSIZE)
+ dev_kfree_skb(skb);
if (err == -ELOOP)
dev->stats.collisions++;
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 77658425db8a..6d5816be6131 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -2500,7 +2500,8 @@ static struct dst_entry *vxlan6_get_route(struct vxlan_dev *vxlan,
/* Bypass encapsulation if the destination is local */
static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan,
- struct vxlan_dev *dst_vxlan, __be32 vni)
+ struct vxlan_dev *dst_vxlan, __be32 vni,
+ bool snoop)
{
struct pcpu_sw_netstats *tx_stats, *rx_stats;
union vxlan_addr loopback;
@@ -2532,7 +2533,7 @@ static void vxlan_encap_bypass(struct sk_buff *skb, struct vxlan_dev *src_vxlan,
goto drop;
}
- if (dst_vxlan->cfg.flags & VXLAN_F_LEARN)
+ if ((dst_vxlan->cfg.flags & VXLAN_F_LEARN) && snoop)
vxlan_snoop(dev, &loopback, eth_hdr(skb)->h_source, 0, vni);
u64_stats_update_begin(&tx_stats->syncp);
@@ -2581,7 +2582,7 @@ static int encap_bypass_if_local(struct sk_buff *skb, struct net_device *dev,
return -ENOENT;
}
- vxlan_encap_bypass(skb, vxlan, dst_vxlan, vni);
+ vxlan_encap_bypass(skb, vxlan, dst_vxlan, vni, true);
return 1;
}
@@ -2617,7 +2618,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
if (vxlan_addr_any(dst)) {
if (did_rsc) {
/* short-circuited back to local bridge */
- vxlan_encap_bypass(skb, vxlan, vxlan, default_vni);
+ vxlan_encap_bypass(skb, vxlan, vxlan,
+ default_vni, true);
return;
}
goto drop;
@@ -2720,7 +2722,23 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
}
ndst = &rt->dst;
- skb_tunnel_check_pmtu(skb, ndst, VXLAN_HEADROOM);
+ err = skb_tunnel_check_pmtu(skb, ndst, VXLAN_HEADROOM,
+ netif_is_any_bridge_port(dev));
+ if (err < 0) {
+ goto tx_error;
+ } else if (err) {
+ if (info) {
+ struct in_addr src, dst;
+
+ src = remote_ip.sin.sin_addr;
+ dst = local_ip.sin.sin_addr;
+ info->key.u.ipv4.src = src.s_addr;
+ info->key.u.ipv4.dst = dst.s_addr;
+ }
+ vxlan_encap_bypass(skb, vxlan, vxlan, vni, false);
+ dst_release(ndst);
+ goto out_unlock;
+ }
tos = ip_tunnel_ecn_encap(RT_TOS(tos), old_iph, skb);
ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
@@ -2760,7 +2778,24 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
goto out_unlock;
}
- skb_tunnel_check_pmtu(skb, ndst, VXLAN6_HEADROOM);
+ err = skb_tunnel_check_pmtu(skb, ndst, VXLAN6_HEADROOM,
+ netif_is_any_bridge_port(dev));
+ if (err < 0) {
+ goto tx_error;
+ } else if (err) {
+ if (info) {
+ struct in6_addr src, dst;
+
+ src = remote_ip.sin6.sin6_addr;
+ dst = local_ip.sin6.sin6_addr;
+ info->key.u.ipv6.src = src;
+ info->key.u.ipv6.dst = dst;
+ }
+
+ vxlan_encap_bypass(skb, vxlan, vxlan, vni, false);
+ dst_release(ndst);
+ goto out_unlock;
+ }
tos = ip_tunnel_ecn_encap(RT_TOS(tos), old_iph, skb);
ttl = ttl ? : ip6_dst_hoplimit(ndst);