summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/uapi/linux/bpf.h10
-rw-r--r--net/core/filter.c12
-rw-r--r--tools/include/uapi/linux/bpf.h10
-rw-r--r--tools/testing/selftests/bpf/config8
-rw-r--r--tools/testing/selftests/bpf/progs/test_tc_tunnel.c321
-rwxr-xr-xtools/testing/selftests/bpf/test_tc_tunnel.sh136
6 files changed, 417 insertions, 80 deletions
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 31a27dd337dc..2e96d0b4bf65 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1523,6 +1523,10 @@ union bpf_attr {
* * **BPF_F_ADJ_ROOM_ENCAP_L4_UDP **:
* Use with ENCAP_L3 flags to further specify the tunnel type.
*
+ * * **BPF_F_ADJ_ROOM_ENCAP_L2(len) **:
+ * Use with ENCAP_L3/L4 flags to further specify the tunnel
+ * type; **len** is the length of the inner MAC header.
+ *
* A call to this helper is susceptible to change the underlaying
* packet buffer. Therefore, at load time, all checks on pointers
* previously done by the verifier are invalidated and must be
@@ -2664,10 +2668,16 @@ enum bpf_func_id {
/* BPF_FUNC_skb_adjust_room flags. */
#define BPF_F_ADJ_ROOM_FIXED_GSO (1ULL << 0)
+#define BPF_ADJ_ROOM_ENCAP_L2_MASK 0xff
+#define BPF_ADJ_ROOM_ENCAP_L2_SHIFT 56
+
#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 (1ULL << 1)
#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 (1ULL << 2)
#define BPF_F_ADJ_ROOM_ENCAP_L4_GRE (1ULL << 3)
#define BPF_F_ADJ_ROOM_ENCAP_L4_UDP (1ULL << 4)
+#define BPF_F_ADJ_ROOM_ENCAP_L2(len) (((__u64)len & \
+ BPF_ADJ_ROOM_ENCAP_L2_MASK) \
+ << BPF_ADJ_ROOM_ENCAP_L2_SHIFT)
/* Mode for BPF_FUNC_skb_adjust_room helper. */
enum bpf_adj_room_mode {
diff --git a/net/core/filter.c b/net/core/filter.c
index 22eb2edf5573..a1654ef62533 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2969,11 +2969,14 @@ static u32 bpf_skb_net_base_len(const struct sk_buff *skb)
#define BPF_F_ADJ_ROOM_MASK (BPF_F_ADJ_ROOM_FIXED_GSO | \
BPF_F_ADJ_ROOM_ENCAP_L3_MASK | \
BPF_F_ADJ_ROOM_ENCAP_L4_GRE | \
- BPF_F_ADJ_ROOM_ENCAP_L4_UDP)
+ BPF_F_ADJ_ROOM_ENCAP_L4_UDP | \
+ BPF_F_ADJ_ROOM_ENCAP_L2( \
+ BPF_ADJ_ROOM_ENCAP_L2_MASK))
static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
u64 flags)
{
+ u8 inner_mac_len = flags >> BPF_ADJ_ROOM_ENCAP_L2_SHIFT;
bool encap = flags & BPF_F_ADJ_ROOM_ENCAP_L3_MASK;
u16 mac_len = 0, inner_net = 0, inner_trans = 0;
unsigned int gso_type = SKB_GSO_DODGY;
@@ -3008,6 +3011,8 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
mac_len = skb->network_header - skb->mac_header;
inner_net = skb->network_header;
+ if (inner_mac_len > len_diff)
+ return -EINVAL;
inner_trans = skb->transport_header;
}
@@ -3016,8 +3021,7 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
return ret;
if (encap) {
- /* inner mac == inner_net on l3 encap */
- skb->inner_mac_header = inner_net;
+ skb->inner_mac_header = inner_net - inner_mac_len;
skb->inner_network_header = inner_net;
skb->inner_transport_header = inner_trans;
skb_set_inner_protocol(skb, skb->protocol);
@@ -3031,7 +3035,7 @@ static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
gso_type |= SKB_GSO_GRE;
else if (flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
gso_type |= SKB_GSO_IPXIP6;
- else
+ else if (flags & BPF_F_ADJ_ROOM_ENCAP_L3_IPV4)
gso_type |= SKB_GSO_IPXIP4;
if (flags & BPF_F_ADJ_ROOM_ENCAP_L4_GRE ||
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 31a27dd337dc..2e96d0b4bf65 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1523,6 +1523,10 @@ union bpf_attr {
* * **BPF_F_ADJ_ROOM_ENCAP_L4_UDP **:
* Use with ENCAP_L3 flags to further specify the tunnel type.
*
+ * * **BPF_F_ADJ_ROOM_ENCAP_L2(len) **:
+ * Use with ENCAP_L3/L4 flags to further specify the tunnel
+ * type; **len** is the length of the inner MAC header.
+ *
* A call to this helper is susceptible to change the underlaying
* packet buffer. Therefore, at load time, all checks on pointers
* previously done by the verifier are invalidated and must be
@@ -2664,10 +2668,16 @@ enum bpf_func_id {
/* BPF_FUNC_skb_adjust_room flags. */
#define BPF_F_ADJ_ROOM_FIXED_GSO (1ULL << 0)
+#define BPF_ADJ_ROOM_ENCAP_L2_MASK 0xff
+#define BPF_ADJ_ROOM_ENCAP_L2_SHIFT 56
+
#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 (1ULL << 1)
#define BPF_F_ADJ_ROOM_ENCAP_L3_IPV6 (1ULL << 2)
#define BPF_F_ADJ_ROOM_ENCAP_L4_GRE (1ULL << 3)
#define BPF_F_ADJ_ROOM_ENCAP_L4_UDP (1ULL << 4)
+#define BPF_F_ADJ_ROOM_ENCAP_L2(len) (((__u64)len & \
+ BPF_ADJ_ROOM_ENCAP_L2_MASK) \
+ << BPF_ADJ_ROOM_ENCAP_L2_SHIFT)
/* Mode for BPF_FUNC_skb_adjust_room helper. */
enum bpf_adj_room_mode {
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index a42f4fc4dc11..8c976476f6fd 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -25,3 +25,11 @@ CONFIG_XDP_SOCKETS=y
CONFIG_FTRACE_SYSCALLS=y
CONFIG_IPV6_TUNNEL=y
CONFIG_IPV6_GRE=y
+CONFIG_NET_FOU=m
+CONFIG_NET_FOU_IP_TUNNELS=y
+CONFIG_IPV6_FOU=m
+CONFIG_IPV6_FOU_TUNNEL=m
+CONFIG_MPLS=y
+CONFIG_NET_MPLS_GSO=m
+CONFIG_MPLS_ROUTING=m
+CONFIG_MPLS_IPTUNNEL=m
diff --git a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
index f541c2de947d..bcb00d737e95 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
@@ -11,7 +11,9 @@
#include <linux/in.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
+#include <linux/mpls.h>
#include <linux/tcp.h>
+#include <linux/udp.h>
#include <linux/pkt_cls.h>
#include <linux/types.h>
@@ -20,16 +22,36 @@
static const int cfg_port = 8000;
-struct grev4hdr {
- struct iphdr ip;
+static const int cfg_udp_src = 20000;
+
+#define UDP_PORT 5555
+#define MPLS_OVER_UDP_PORT 6635
+#define ETH_OVER_UDP_PORT 7777
+
+/* MPLS label 1000 with S bit (last label) set and ttl of 255. */
+static const __u32 mpls_label = __bpf_constant_htonl(1000 << 12 |
+ MPLS_LS_S_MASK | 0xff);
+
+struct gre_hdr {
__be16 flags;
__be16 protocol;
} __attribute__((packed));
-struct grev6hdr {
+union l4hdr {
+ struct udphdr udp;
+ struct gre_hdr gre;
+};
+
+struct v4hdr {
+ struct iphdr ip;
+ union l4hdr l4hdr;
+ __u8 pad[16]; /* enough space for L2 header */
+} __attribute__((packed));
+
+struct v6hdr {
struct ipv6hdr ip;
- __be16 flags;
- __be16 protocol;
+ union l4hdr l4hdr;
+ __u8 pad[16]; /* enough space for L2 header */
} __attribute__((packed));
static __always_inline void set_ipv4_csum(struct iphdr *iph)
@@ -47,13 +69,15 @@ static __always_inline void set_ipv4_csum(struct iphdr *iph)
iph->check = ~((csum & 0xffff) + (csum >> 16));
}
-static __always_inline int encap_ipv4(struct __sk_buff *skb, bool with_gre)
+static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
+ __u16 l2_proto)
{
- struct grev4hdr h_outer;
+ __u16 udp_dst = UDP_PORT;
struct iphdr iph_inner;
+ struct v4hdr h_outer;
struct tcphdr tcph;
+ int olen, l2_len;
__u64 flags;
- int olen;
if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
sizeof(iph_inner)) < 0)
@@ -70,13 +94,58 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, bool with_gre)
if (tcph.dest != __bpf_constant_htons(cfg_port))
return TC_ACT_OK;
+ olen = sizeof(h_outer.ip);
+ l2_len = 0;
+
flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV4;
- if (with_gre) {
+
+ switch (l2_proto) {
+ case ETH_P_MPLS_UC:
+ l2_len = sizeof(mpls_label);
+ udp_dst = MPLS_OVER_UDP_PORT;
+ break;
+ case ETH_P_TEB:
+ l2_len = ETH_HLEN;
+ udp_dst = ETH_OVER_UDP_PORT;
+ break;
+ }
+ flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len);
+
+ switch (encap_proto) {
+ case IPPROTO_GRE:
flags |= BPF_F_ADJ_ROOM_ENCAP_L4_GRE;
- olen = sizeof(h_outer);
- } else {
- olen = sizeof(h_outer.ip);
+ olen += sizeof(h_outer.l4hdr.gre);
+ h_outer.l4hdr.gre.protocol = bpf_htons(l2_proto);
+ h_outer.l4hdr.gre.flags = 0;
+ break;
+ case IPPROTO_UDP:
+ flags |= BPF_F_ADJ_ROOM_ENCAP_L4_UDP;
+ olen += sizeof(h_outer.l4hdr.udp);
+ h_outer.l4hdr.udp.source = __bpf_constant_htons(cfg_udp_src);
+ h_outer.l4hdr.udp.dest = bpf_htons(udp_dst);
+ h_outer.l4hdr.udp.check = 0;
+ h_outer.l4hdr.udp.len = bpf_htons(bpf_ntohs(iph_inner.tot_len) +
+ sizeof(h_outer.l4hdr.udp) +
+ l2_len);
+ break;
+ case IPPROTO_IPIP:
+ break;
+ default:
+ return TC_ACT_OK;
+ }
+
+ /* add L2 encap (if specified) */
+ switch (l2_proto) {
+ case ETH_P_MPLS_UC:
+ *((__u32 *)((__u8 *)&h_outer + olen)) = mpls_label;
+ break;
+ case ETH_P_TEB:
+ if (bpf_skb_load_bytes(skb, 0, (__u8 *)&h_outer + olen,
+ ETH_HLEN))
+ return TC_ACT_SHOT;
+ break;
}
+ olen += l2_len;
/* add room between mac and network header */
if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
@@ -85,16 +154,10 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, bool with_gre)
/* prepare new outer network header */
h_outer.ip = iph_inner;
h_outer.ip.tot_len = bpf_htons(olen +
- bpf_htons(h_outer.ip.tot_len));
- if (with_gre) {
- h_outer.ip.protocol = IPPROTO_GRE;
- h_outer.protocol = bpf_htons(ETH_P_IP);
- h_outer.flags = 0;
- } else {
- h_outer.ip.protocol = IPPROTO_IPIP;
- }
+ bpf_ntohs(h_outer.ip.tot_len));
+ h_outer.ip.protocol = encap_proto;
- set_ipv4_csum((void *)&h_outer.ip);
+ set_ipv4_csum(&h_outer.ip);
/* store new outer network header */
if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen,
@@ -104,13 +167,16 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, bool with_gre)
return TC_ACT_OK;
}
-static __always_inline int encap_ipv6(struct __sk_buff *skb, bool with_gre)
+static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
+ __u16 l2_proto)
{
+ __u16 udp_dst = UDP_PORT;
struct ipv6hdr iph_inner;
- struct grev6hdr h_outer;
+ struct v6hdr h_outer;
struct tcphdr tcph;
+ int olen, l2_len;
+ __u16 tot_len;
__u64 flags;
- int olen;
if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
sizeof(iph_inner)) < 0)
@@ -124,14 +190,58 @@ static __always_inline int encap_ipv6(struct __sk_buff *skb, bool with_gre)
if (tcph.dest != __bpf_constant_htons(cfg_port))
return TC_ACT_OK;
+ olen = sizeof(h_outer.ip);
+ l2_len = 0;
+
flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV6;
- if (with_gre) {
+
+ switch (l2_proto) {
+ case ETH_P_MPLS_UC:
+ l2_len = sizeof(mpls_label);
+ udp_dst = MPLS_OVER_UDP_PORT;
+ break;
+ case ETH_P_TEB:
+ l2_len = ETH_HLEN;
+ udp_dst = ETH_OVER_UDP_PORT;
+ break;
+ }
+ flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len);
+
+ switch (encap_proto) {
+ case IPPROTO_GRE:
flags |= BPF_F_ADJ_ROOM_ENCAP_L4_GRE;
- olen = sizeof(h_outer);
- } else {
- olen = sizeof(h_outer.ip);
+ olen += sizeof(h_outer.l4hdr.gre);
+ h_outer.l4hdr.gre.protocol = bpf_htons(l2_proto);
+ h_outer.l4hdr.gre.flags = 0;
+ break;
+ case IPPROTO_UDP:
+ flags |= BPF_F_ADJ_ROOM_ENCAP_L4_UDP;
+ olen += sizeof(h_outer.l4hdr.udp);
+ h_outer.l4hdr.udp.source = __bpf_constant_htons(cfg_udp_src);
+ h_outer.l4hdr.udp.dest = bpf_htons(udp_dst);
+ tot_len = bpf_ntohs(iph_inner.payload_len) + sizeof(iph_inner) +
+ sizeof(h_outer.l4hdr.udp);
+ h_outer.l4hdr.udp.check = 0;
+ h_outer.l4hdr.udp.len = bpf_htons(tot_len);
+ break;
+ case IPPROTO_IPV6:
+ break;
+ default:
+ return TC_ACT_OK;
}
+ /* add L2 encap (if specified) */
+ switch (l2_proto) {
+ case ETH_P_MPLS_UC:
+ *((__u32 *)((__u8 *)&h_outer + olen)) = mpls_label;
+ break;
+ case ETH_P_TEB:
+ if (bpf_skb_load_bytes(skb, 0, (__u8 *)&h_outer + olen,
+ ETH_HLEN))
+ return TC_ACT_SHOT;
+ break;
+ }
+ olen += l2_len;
/* add room between mac and network header */
if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
@@ -141,13 +251,8 @@ static __always_inline int encap_ipv6(struct __sk_buff *skb, bool with_gre)
h_outer.ip = iph_inner;
h_outer.ip.payload_len = bpf_htons(olen +
bpf_ntohs(h_outer.ip.payload_len));
- if (with_gre) {
- h_outer.ip.nexthdr = IPPROTO_GRE;
- h_outer.protocol = bpf_htons(ETH_P_IPV6);
- h_outer.flags = 0;
- } else {
- h_outer.ip.nexthdr = IPPROTO_IPV6;
- }
+
+ h_outer.ip.nexthdr = encap_proto;
/* store new outer network header */
if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen,
@@ -157,54 +262,168 @@ static __always_inline int encap_ipv6(struct __sk_buff *skb, bool with_gre)
return TC_ACT_OK;
}
-SEC("encap_ipip")
-int __encap_ipip(struct __sk_buff *skb)
+SEC("encap_ipip_none")
+int __encap_ipip_none(struct __sk_buff *skb)
{
if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
- return encap_ipv4(skb, false);
+ return encap_ipv4(skb, IPPROTO_IPIP, ETH_P_IP);
else
return TC_ACT_OK;
}
-SEC("encap_gre")
-int __encap_gre(struct __sk_buff *skb)
+SEC("encap_gre_none")
+int __encap_gre_none(struct __sk_buff *skb)
{
if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
- return encap_ipv4(skb, true);
+ return encap_ipv4(skb, IPPROTO_GRE, ETH_P_IP);
else
return TC_ACT_OK;
}
-SEC("encap_ip6tnl")
-int __encap_ip6tnl(struct __sk_buff *skb)
+SEC("encap_gre_mpls")
+int __encap_gre_mpls(struct __sk_buff *skb)
+{
+ if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
+ return encap_ipv4(skb, IPPROTO_GRE, ETH_P_MPLS_UC);
+ else
+ return TC_ACT_OK;
+}
+
+SEC("encap_gre_eth")
+int __encap_gre_eth(struct __sk_buff *skb)
+{
+ if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
+ return encap_ipv4(skb, IPPROTO_GRE, ETH_P_TEB);
+ else
+ return TC_ACT_OK;
+}
+
+SEC("encap_udp_none")
+int __encap_udp_none(struct __sk_buff *skb)
+{
+ if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
+ return encap_ipv4(skb, IPPROTO_UDP, ETH_P_IP);
+ else
+ return TC_ACT_OK;
+}
+
+SEC("encap_udp_mpls")
+int __encap_udp_mpls(struct __sk_buff *skb)
+{
+ if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
+ return encap_ipv4(skb, IPPROTO_UDP, ETH_P_MPLS_UC);
+ else
+ return TC_ACT_OK;
+}
+
+SEC("encap_udp_eth")
+int __encap_udp_eth(struct __sk_buff *skb)
+{
+ if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
+ return encap_ipv4(skb, IPPROTO_UDP, ETH_P_TEB);
+ else
+ return TC_ACT_OK;
+}
+
+SEC("encap_ip6tnl_none")
+int __encap_ip6tnl_none(struct __sk_buff *skb)
{
if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
- return encap_ipv6(skb, false);
+ return encap_ipv6(skb, IPPROTO_IPV6, ETH_P_IPV6);
else
return TC_ACT_OK;
}
-SEC("encap_ip6gre")
-int __encap_ip6gre(struct __sk_buff *skb)
+SEC("encap_ip6gre_none")
+int __encap_ip6gre_none(struct __sk_buff *skb)
{
if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
- return encap_ipv6(skb, true);
+ return encap_ipv6(skb, IPPROTO_GRE, ETH_P_IPV6);
+ else
+ return TC_ACT_OK;
+}
+
+SEC("encap_ip6gre_mpls")
+int __encap_ip6gre_mpls(struct __sk_buff *skb)
+{
+ if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
+ return encap_ipv6(skb, IPPROTO_GRE, ETH_P_MPLS_UC);
+ else
+ return TC_ACT_OK;
+}
+
+SEC("encap_ip6gre_eth")
+int __encap_ip6gre_eth(struct __sk_buff *skb)
+{
+ if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
+ return encap_ipv6(skb, IPPROTO_GRE, ETH_P_TEB);
+ else
+ return TC_ACT_OK;
+}
+
+SEC("encap_ip6udp_none")
+int __encap_ip6udp_none(struct __sk_buff *skb)
+{
+ if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
+ return encap_ipv6(skb, IPPROTO_UDP, ETH_P_IPV6);
+ else
+ return TC_ACT_OK;
+}
+
+SEC("encap_ip6udp_mpls")
+int __encap_ip6udp_mpls(struct __sk_buff *skb)
+{
+ if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
+ return encap_ipv6(skb, IPPROTO_UDP, ETH_P_MPLS_UC);
+ else
+ return TC_ACT_OK;
+}
+
+SEC("encap_ip6udp_eth")
+int __encap_ip6udp_eth(struct __sk_buff *skb)
+{
+ if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
+ return encap_ipv6(skb, IPPROTO_UDP, ETH_P_TEB);
else
return TC_ACT_OK;
}
static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
{
- char buf[sizeof(struct grev6hdr)];
- int olen;
+ char buf[sizeof(struct v6hdr)];
+ struct gre_hdr greh;
+ struct udphdr udph;
+ int olen = len;
switch (proto) {
case IPPROTO_IPIP:
case IPPROTO_IPV6:
- olen = len;
break;
case IPPROTO_GRE:
- olen = len + 4 /* gre hdr */;
+ olen += sizeof(struct gre_hdr);
+ if (bpf_skb_load_bytes(skb, off + len, &greh, sizeof(greh)) < 0)
+ return TC_ACT_OK;
+ switch (bpf_ntohs(greh.protocol)) {
+ case ETH_P_MPLS_UC:
+ olen += sizeof(mpls_label);
+ break;
+ case ETH_P_TEB:
+ olen += ETH_HLEN;
+ break;
+ }
+ break;
+ case IPPROTO_UDP:
+ olen += sizeof(struct udphdr);
+ if (bpf_skb_load_bytes(skb, off + len, &udph, sizeof(udph)) < 0)
+ return TC_ACT_OK;
+ switch (bpf_ntohs(udph.dest)) {
+ case MPLS_OVER_UDP_PORT:
+ olen += sizeof(mpls_label);
+ break;
+ case ETH_OVER_UDP_PORT:
+ olen += ETH_HLEN;
+ break;
+ }
break;
default:
return TC_ACT_OK;
diff --git a/tools/testing/selftests/bpf/test_tc_tunnel.sh b/tools/testing/selftests/bpf/test_tc_tunnel.sh
index c805adb88f3a..d4d8d5d3b06e 100755
--- a/tools/testing/selftests/bpf/test_tc_tunnel.sh
+++ b/tools/testing/selftests/bpf/test_tc_tunnel.sh
@@ -15,6 +15,12 @@ readonly ns2_v4=192.168.1.2
readonly ns1_v6=fd::1
readonly ns2_v6=fd::2
+# Must match port used by bpf program
+readonly udpport=5555
+# MPLSoverUDP
+readonly mplsudpport=6635
+readonly mplsproto=137
+
readonly infile="$(mktemp)"
readonly outfile="$(mktemp)"
@@ -38,8 +44,8 @@ setup() {
# clamp route to reserve room for tunnel headers
ip -netns "${ns1}" -4 route flush table main
ip -netns "${ns1}" -6 route flush table main
- ip -netns "${ns1}" -4 route add "${ns2_v4}" mtu 1476 dev veth1
- ip -netns "${ns1}" -6 route add "${ns2_v6}" mtu 1456 dev veth1
+ ip -netns "${ns1}" -4 route add "${ns2_v4}" mtu 1458 dev veth1
+ ip -netns "${ns1}" -6 route add "${ns2_v6}" mtu 1438 dev veth1
sleep 1
@@ -86,30 +92,44 @@ set -e
# no arguments: automated test, run all
if [[ "$#" -eq "0" ]]; then
echo "ipip"
- $0 ipv4 ipip 100
+ $0 ipv4 ipip none 100
echo "ip6ip6"
- $0 ipv6 ip6tnl 100
+ $0 ipv6 ip6tnl none 100
+
+ for mac in none mpls eth ; do
+ echo "ip gre $mac"
+ $0 ipv4 gre $mac 100
+
+ echo "ip6 gre $mac"
+ $0 ipv6 ip6gre $mac 100
+
+ echo "ip gre $mac gso"
+ $0 ipv4 gre $mac 2000
- echo "ip gre"
- $0 ipv4 gre 100
+ echo "ip6 gre $mac gso"
+ $0 ipv6 ip6gre $mac 2000
- echo "ip6 gre"
- $0 ipv6 ip6gre 100
+ echo "ip udp $mac"
+ $0 ipv4 udp $mac 100
- echo "ip gre gso"
- $0 ipv4 gre 2000
+ echo "ip6 udp $mac"
+ $0 ipv6 ip6udp $mac 100
- echo "ip6 gre gso"
- $0 ipv6 ip6gre 2000
+ echo "ip udp $mac gso"
+ $0 ipv4 udp $mac 2000
+
+ echo "ip6 udp $mac gso"
+ $0 ipv6 ip6udp $mac 2000
+ done
echo "OK. All tests passed"
exit 0
fi
-if [[ "$#" -ne "3" ]]; then
+if [[ "$#" -ne "4" ]]; then
echo "Usage: $0"
- echo " or: $0 <ipv4|ipv6> <tuntype> <data_len>"
+ echo " or: $0 <ipv4|ipv6> <tuntype> <none|mpls|eth> <data_len>"
exit 1
fi
@@ -117,12 +137,24 @@ case "$1" in
"ipv4")
readonly addr1="${ns1_v4}"
readonly addr2="${ns2_v4}"
- readonly netcat_opt=-4
+ readonly ipproto=4
+ readonly netcat_opt=-${ipproto}
+ readonly foumod=fou
+ readonly foutype=ipip
+ readonly fouproto=4
+ readonly fouproto_mpls=${mplsproto}
+ readonly gretaptype=gretap
;;
"ipv6")
readonly addr1="${ns1_v6}"
readonly addr2="${ns2_v6}"
- readonly netcat_opt=-6
+ readonly ipproto=6
+ readonly netcat_opt=-${ipproto}
+ readonly foumod=fou6
+ readonly foutype=ip6tnl
+ readonly fouproto="41 -6"
+ readonly fouproto_mpls="${mplsproto} -6"
+ readonly gretaptype=ip6gretap
;;
*)
echo "unknown arg: $1"
@@ -131,9 +163,10 @@ case "$1" in
esac
readonly tuntype=$2
-readonly datalen=$3
+readonly mac=$3
+readonly datalen=$4
-echo "encap ${addr1} to ${addr2}, type ${tuntype}, len ${datalen}"
+echo "encap ${addr1} to ${addr2}, type ${tuntype}, mac ${mac} len ${datalen}"
trap cleanup EXIT
@@ -150,16 +183,63 @@ verify_data
ip netns exec "${ns1}" tc qdisc add dev veth1 clsact
ip netns exec "${ns1}" tc filter add dev veth1 egress \
bpf direct-action object-file ./test_tc_tunnel.o \
- section "encap_${tuntype}"
+ section "encap_${tuntype}_${mac}"
echo "test bpf encap without decap (expect failure)"
server_listen
! client_connect
+if [[ "$tuntype" =~ "udp" ]]; then
+ # Set up fou tunnel.
+ ttype="${foutype}"
+ targs="encap fou encap-sport auto encap-dport $udpport"
+ # fou may be a module; allow this to fail.
+ modprobe "${foumod}" ||true
+ if [[ "$mac" == "mpls" ]]; then
+ dport=${mplsudpport}
+ dproto=${fouproto_mpls}
+ tmode="mode any ttl 255"
+ else
+ dport=${udpport}
+ dproto=${fouproto}
+ fi
+ ip netns exec "${ns2}" ip fou add port $dport ipproto ${dproto}
+ targs="encap fou encap-sport auto encap-dport $dport"
+elif [[ "$tuntype" =~ "gre" && "$mac" == "eth" ]]; then
+ ttype=$gretaptype
+else
+ ttype=$tuntype
+ targs=""
+fi
+
# serverside, insert decap module
# server is still running
# client can connect again
-ip netns exec "${ns2}" ip link add dev testtun0 type "${tuntype}" \
- remote "${addr1}" local "${addr2}"
+ip netns exec "${ns2}" ip link add name testtun0 type "${ttype}" \
+ ${tmode} remote "${addr1}" local "${addr2}" $targs
+
+expect_tun_fail=0
+
+if [[ "$tuntype" == "ip6udp" && "$mac" == "mpls" ]]; then
+ # No support for MPLS IPv6 fou tunnel; expect failure.
+ expect_tun_fail=1
+elif [[ "$tuntype" =~ "udp" && "$mac" == "eth" ]]; then
+ # No support for TEB fou tunnel; expect failure.
+ expect_tun_fail=1
+elif [[ "$tuntype" =~ "gre" && "$mac" == "eth" ]]; then
+ # Share ethernet address between tunnel/veth2 so L2 decap works.
+ ethaddr=$(ip netns exec "${ns2}" ip link show veth2 | \
+ awk '/ether/ { print $2 }')
+ ip netns exec "${ns2}" ip link set testtun0 address $ethaddr
+elif [[ "$mac" == "mpls" ]]; then
+ modprobe mpls_iptunnel ||true
+ modprobe mpls_gso ||true
+ ip netns exec "${ns2}" sysctl -qw net.mpls.platform_labels=65536
+ ip netns exec "${ns2}" ip -f mpls route add 1000 dev lo
+ ip netns exec "${ns2}" ip link set lo up
+ ip netns exec "${ns2}" sysctl -qw net.mpls.conf.testtun0.input=1
+ ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.lo.rp_filter=0
+fi
+
# Because packets are decapped by the tunnel they arrive on testtun0 from
# the IP stack perspective. Ensure reverse path filtering is disabled
# otherwise we drop the TCP SYN as arriving on testtun0 instead of the
@@ -169,16 +249,22 @@ ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.rp_filter=0
# selected as the max of the "all" and device-specific values.
ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.testtun0.rp_filter=0
ip netns exec "${ns2}" ip link set dev testtun0 up
-echo "test bpf encap with tunnel device decap"
-client_connect
-verify_data
+if [[ "$expect_tun_fail" == 1 ]]; then
+ # This tunnel mode is not supported, so we expect failure.
+ echo "test bpf encap with tunnel device decap (expect failure)"
+ ! client_connect
+else
+ echo "test bpf encap with tunnel device decap"
+ client_connect
+ verify_data
+ server_listen
+fi
# serverside, use BPF for decap
ip netns exec "${ns2}" ip link del dev testtun0
ip netns exec "${ns2}" tc qdisc add dev veth2 clsact
ip netns exec "${ns2}" tc filter add dev veth2 ingress \
bpf direct-action object-file ./test_tc_tunnel.o section decap
-server_listen
echo "test bpf encap with bpf decap"
client_connect
verify_data