summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeilin Ye <peilin.ye@bytedance.com>2022-04-21 15:09:02 -0700
committerDavid S. Miller <davem@davemloft.net>2022-04-25 11:40:45 +0100
commit31c417c948d7f6909cb63f0ac3298f3c38f8ce20 (patch)
tree99159f3858c374bd6a062d37a0bc2312269eaf1b
parentfde98ae91f79cab4e020f40c35ed23cbdc59661c (diff)
downloadlinux-31c417c948d7f6909cb63f0ac3298f3c38f8ce20.tar.bz2
ip_gre, ip6_gre: Fix race condition on o_seqno in collect_md mode
As pointed out by Jakub Kicinski, currently using TUNNEL_SEQ in collect_md mode is racy for [IP6]GRE[TAP] devices. Consider the following sequence of events: 1. An [IP6]GRE[TAP] device is created in collect_md mode using "ip link add ... external". "ip" ignores "[o]seq" if "external" is specified, so TUNNEL_SEQ is off, and the device is marked as NETIF_F_LLTX (i.e. it uses lockless TX); 2. Someone sets TUNNEL_SEQ on outgoing skb's, using e.g. bpf_skb_set_tunnel_key() in an eBPF program attached to this device; 3. gre_fb_xmit() or __gre6_xmit() processes these skb's: gre_build_header(skb, tun_hlen, flags, protocol, tunnel_id_to_key32(tun_info->key.tun_id), (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) : 0); ^^^^^^^^^^^^^^^^^ Since we are not using the TX lock (&txq->_xmit_lock), multiple CPUs may try to do this tunnel->o_seqno++ in parallel, which is racy. Fix it by making o_seqno atomic_t. As mentioned by Eric Dumazet in commit b790e01aee74 ("ip_gre: lockless xmit"), making o_seqno atomic_t increases "chance for packets being out of order at receiver" when NETIF_F_LLTX is on. Maybe a better fix would be: 1. Do not ignore "oseq" in external mode. Users MUST specify "oseq" if they want the kernel to allow sequencing of outgoing packets; 2. Reject all outgoing TUNNEL_SEQ packets if the device was not created with "oseq". Unfortunately, that would break userspace. We could now make [IP6]GRE[TAP] devices always NETIF_F_LLTX, but let us do it in separate patches to keep this fix minimal. Suggested-by: Jakub Kicinski <kuba@kernel.org> Fixes: 77a5196a804e ("gre: add sequence number for collect md mode.") Signed-off-by: Peilin Ye <peilin.ye@bytedance.com> Acked-by: William Tu <u9012063@gmail.com> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/net/ip6_tunnel.h2
-rw-r--r--include/net/ip_tunnels.h2
-rw-r--r--net/ipv4/ip_gre.c6
-rw-r--r--net/ipv6/ip6_gre.c7
4 files changed, 9 insertions, 8 deletions
diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h
index a38c4f1e4e5c..74b369bddf49 100644
--- a/include/net/ip6_tunnel.h
+++ b/include/net/ip6_tunnel.h
@@ -58,7 +58,7 @@ struct ip6_tnl {
/* These fields used only by GRE */
__u32 i_seqno; /* The last seen seqno */
- __u32 o_seqno; /* The last output seqno */
+ atomic_t o_seqno; /* The last output seqno */
int hlen; /* tun_hlen + encap_hlen */
int tun_hlen; /* Precalculated header length */
int encap_hlen; /* Encap header length (FOU,GUE) */
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 88dee57eac8a..c24fa934221d 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -116,7 +116,7 @@ struct ip_tunnel {
/* These four fields used only by GRE */
u32 i_seqno; /* The last seen seqno */
- u32 o_seqno; /* The last output seqno */
+ atomic_t o_seqno; /* The last output seqno */
int tun_hlen; /* Precalculated header length */
/* These four fields used only by ERSPAN */
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 21a8943f6fa4..aacee9dd771b 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -464,7 +464,7 @@ static void __gre_xmit(struct sk_buff *skb, struct net_device *dev,
/* Push GRE header. */
gre_build_header(skb, tunnel->tun_hlen,
flags, proto, tunnel->parms.o_key,
- (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) : 0);
+ (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0);
ip_tunnel_xmit(skb, dev, tnl_params, tnl_params->protocol);
}
@@ -502,7 +502,7 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev,
(TUNNEL_CSUM | TUNNEL_KEY | TUNNEL_SEQ);
gre_build_header(skb, tunnel_hlen, flags, proto,
tunnel_id_to_key32(tun_info->key.tun_id),
- (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) : 0);
+ (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno)) : 0);
ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen);
@@ -579,7 +579,7 @@ static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev)
}
gre_build_header(skb, 8, TUNNEL_SEQ,
- proto, 0, htonl(tunnel->o_seqno++));
+ proto, 0, htonl(atomic_fetch_inc(&tunnel->o_seqno)));
ip_md_tunnel_xmit(skb, dev, IPPROTO_GRE, tunnel_hlen);
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index d9e4ac94eab4..5136959b3dc5 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -766,7 +766,7 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
gre_build_header(skb, tun_hlen,
flags, protocol,
tunnel_id_to_key32(tun_info->key.tun_id),
- (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++)
+ (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno))
: 0);
} else {
@@ -777,7 +777,8 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb,
gre_build_header(skb, tunnel->tun_hlen, flags,
protocol, tunnel->parms.o_key,
- (flags & TUNNEL_SEQ) ? htonl(tunnel->o_seqno++) : 0);
+ (flags & TUNNEL_SEQ) ? htonl(atomic_fetch_inc(&tunnel->o_seqno))
+ : 0);
}
return ip6_tnl_xmit(skb, dev, dsfield, fl6, encap_limit, pmtu,
@@ -1055,7 +1056,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
/* Push GRE header. */
proto = (t->parms.erspan_ver == 1) ? htons(ETH_P_ERSPAN)
: htons(ETH_P_ERSPAN2);
- gre_build_header(skb, 8, TUNNEL_SEQ, proto, 0, htonl(t->o_seqno++));
+ gre_build_header(skb, 8, TUNNEL_SEQ, proto, 0, htonl(atomic_fetch_inc(&t->o_seqno)));
/* TooBig packet may have updated dst->dev's mtu */
if (!t->parms.collect_md && dst && dst_mtu(dst) > dst->dev->mtu)