summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/net/seg6.h3
-rw-r--r--include/uapi/linux/seg6_iptunnel.h18
-rw-r--r--net/ipv6/Kconfig1
-rw-r--r--net/ipv6/seg6_iptunnel.c72
-rw-r--r--net/ipv6/seg6_local.c314
5 files changed, 317 insertions, 91 deletions
diff --git a/include/net/seg6.h b/include/net/seg6.h
index 5379f550f521..099bad59dc90 100644
--- a/include/net/seg6.h
+++ b/include/net/seg6.h
@@ -60,7 +60,8 @@ extern int seg6_local_init(void);
extern void seg6_local_exit(void);
extern bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len);
-extern int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh);
+extern int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh,
+ int proto);
extern int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh);
#endif
diff --git a/include/uapi/linux/seg6_iptunnel.h b/include/uapi/linux/seg6_iptunnel.h
index b6e5a0a1afd7..b23df9f58354 100644
--- a/include/uapi/linux/seg6_iptunnel.h
+++ b/include/uapi/linux/seg6_iptunnel.h
@@ -33,16 +33,26 @@ struct seg6_iptunnel_encap {
enum {
SEG6_IPTUN_MODE_INLINE,
SEG6_IPTUN_MODE_ENCAP,
+ SEG6_IPTUN_MODE_L2ENCAP,
};
#ifdef __KERNEL__
static inline size_t seg6_lwt_headroom(struct seg6_iptunnel_encap *tuninfo)
{
- int encap = (tuninfo->mode == SEG6_IPTUN_MODE_ENCAP);
-
- return ((tuninfo->srh->hdrlen + 1) << 3) +
- (encap * sizeof(struct ipv6hdr));
+ int head = 0;
+
+ switch (tuninfo->mode) {
+ case SEG6_IPTUN_MODE_INLINE:
+ break;
+ case SEG6_IPTUN_MODE_ENCAP:
+ head = sizeof(struct ipv6hdr);
+ break;
+ case SEG6_IPTUN_MODE_L2ENCAP:
+ return 0;
+ }
+
+ return ((tuninfo->srh->hdrlen + 1) << 3) + head;
}
#endif
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 0d722396dce6..ea71e4b0ab7a 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -308,6 +308,7 @@ config IPV6_SEG6_LWTUNNEL
depends on IPV6
select LWTUNNEL
select DST_CACHE
+ select IPV6_MULTIPLE_TABLES
---help---
Support for encapsulation of packets within an outer IPv6
header and a Segment Routing Header using the lightweight
diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c
index 501233040570..bd6cc688bd19 100644
--- a/net/ipv6/seg6_iptunnel.c
+++ b/net/ipv6/seg6_iptunnel.c
@@ -91,7 +91,7 @@ static void set_tun_src(struct net *net, struct net_device *dev,
}
/* encapsulate an IPv6 packet within an outer IPv6 header with a given SRH */
-int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
+int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto)
{
struct net *net = dev_net(skb_dst(skb)->dev);
struct ipv6hdr *hdr, *inner_hdr;
@@ -116,15 +116,22 @@ int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh)
* hlim will be decremented in ip6_forward() afterwards and
* decapsulation will overwrite inner hlim with outer hlim
*/
- ip6_flow_hdr(hdr, ip6_tclass(ip6_flowinfo(inner_hdr)),
- ip6_flowlabel(inner_hdr));
- hdr->hop_limit = inner_hdr->hop_limit;
+
+ if (skb->protocol == htons(ETH_P_IPV6)) {
+ ip6_flow_hdr(hdr, ip6_tclass(ip6_flowinfo(inner_hdr)),
+ ip6_flowlabel(inner_hdr));
+ hdr->hop_limit = inner_hdr->hop_limit;
+ } else {
+ ip6_flow_hdr(hdr, 0, 0);
+ hdr->hop_limit = ip6_dst_hoplimit(skb_dst(skb));
+ }
+
hdr->nexthdr = NEXTHDR_ROUTING;
isrh = (void *)hdr + sizeof(*hdr);
memcpy(isrh, osrh, hdrlen);
- isrh->nexthdr = NEXTHDR_IPV6;
+ isrh->nexthdr = proto;
hdr->daddr = isrh->segments[isrh->first_segment];
set_tun_src(net, skb->dev, &hdr->daddr, &hdr->saddr);
@@ -199,7 +206,7 @@ static int seg6_do_srh(struct sk_buff *skb)
{
struct dst_entry *dst = skb_dst(skb);
struct seg6_iptunnel_encap *tinfo;
- int err = 0;
+ int proto, err = 0;
tinfo = seg6_encap_lwtunnel(dst->lwtstate);
@@ -210,16 +217,46 @@ static int seg6_do_srh(struct sk_buff *skb)
switch (tinfo->mode) {
case SEG6_IPTUN_MODE_INLINE:
+ if (skb->protocol != htons(ETH_P_IPV6))
+ return -EINVAL;
+
err = seg6_do_srh_inline(skb, tinfo->srh);
+ if (err)
+ return err;
+
skb_reset_inner_headers(skb);
break;
case SEG6_IPTUN_MODE_ENCAP:
- err = seg6_do_srh_encap(skb, tinfo->srh);
+ if (skb->protocol == htons(ETH_P_IPV6))
+ proto = IPPROTO_IPV6;
+ else if (skb->protocol == htons(ETH_P_IP))
+ proto = IPPROTO_IPIP;
+ else
+ return -EINVAL;
+
+ err = seg6_do_srh_encap(skb, tinfo->srh, proto);
+ if (err)
+ return err;
+
+ skb->protocol = htons(ETH_P_IPV6);
break;
- }
+ case SEG6_IPTUN_MODE_L2ENCAP:
+ if (!skb_mac_header_was_set(skb))
+ return -EINVAL;
- if (err)
- return err;
+ if (pskb_expand_head(skb, skb->mac_len, 0, GFP_ATOMIC) < 0)
+ return -ENOMEM;
+
+ skb_mac_header_rebuild(skb);
+ skb_push(skb, skb->mac_len);
+
+ err = seg6_do_srh_encap(skb, tinfo->srh, NEXTHDR_NONE);
+ if (err)
+ return err;
+
+ skb->protocol = htons(ETH_P_IPV6);
+ break;
+ }
ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
skb_set_transport_header(skb, sizeof(struct ipv6hdr));
@@ -334,6 +371,9 @@ static int seg6_build_state(struct nlattr *nla,
struct seg6_lwt *slwt;
int err;
+ if (family != AF_INET && family != AF_INET6)
+ return -EINVAL;
+
err = nla_parse_nested(tb, SEG6_IPTUNNEL_MAX, nla,
seg6_iptunnel_policy, extack);
@@ -356,9 +396,14 @@ static int seg6_build_state(struct nlattr *nla,
switch (tuninfo->mode) {
case SEG6_IPTUN_MODE_INLINE:
+ if (family != AF_INET6)
+ return -EINVAL;
+
break;
case SEG6_IPTUN_MODE_ENCAP:
break;
+ case SEG6_IPTUN_MODE_L2ENCAP:
+ break;
default:
return -EINVAL;
}
@@ -382,8 +427,11 @@ static int seg6_build_state(struct nlattr *nla,
memcpy(&slwt->tuninfo, tuninfo, tuninfo_len);
newts->type = LWTUNNEL_ENCAP_SEG6;
- newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT |
- LWTUNNEL_STATE_INPUT_REDIRECT;
+ newts->flags |= LWTUNNEL_STATE_INPUT_REDIRECT;
+
+ if (tuninfo->mode != SEG6_IPTUN_MODE_L2ENCAP)
+ newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT;
+
newts->headroom = seg6_lwt_headroom(tuninfo);
*ts = newts;
diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
index 147680e7a00c..9c1a885ee482 100644
--- a/net/ipv6/seg6_local.c
+++ b/net/ipv6/seg6_local.c
@@ -30,6 +30,7 @@
#ifdef CONFIG_IPV6_SEG6_HMAC
#include <net/seg6_hmac.h>
#endif
+#include <linux/etherdevice.h>
struct seg6_local_lwt;
@@ -99,23 +100,105 @@ static struct ipv6_sr_hdr *get_and_validate_srh(struct sk_buff *skb)
return srh;
}
+static bool decap_and_validate(struct sk_buff *skb, int proto)
+{
+ struct ipv6_sr_hdr *srh;
+ unsigned int off = 0;
+
+ srh = get_srh(skb);
+ if (srh && srh->segments_left > 0)
+ return false;
+
+#ifdef CONFIG_IPV6_SEG6_HMAC
+ if (srh && !seg6_hmac_validate_skb(skb))
+ return false;
+#endif
+
+ if (ipv6_find_hdr(skb, &off, proto, NULL, NULL) < 0)
+ return false;
+
+ if (!pskb_pull(skb, off))
+ return false;
+
+ skb_postpull_rcsum(skb, skb_network_header(skb), off);
+
+ skb_reset_network_header(skb);
+ skb_reset_transport_header(skb);
+ skb->encapsulation = 0;
+
+ return true;
+}
+
+static void advance_nextseg(struct ipv6_sr_hdr *srh, struct in6_addr *daddr)
+{
+ struct in6_addr *addr;
+
+ srh->segments_left--;
+ addr = srh->segments + srh->segments_left;
+ *daddr = *addr;
+}
+
+static void lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr,
+ u32 tbl_id)
+{
+ struct net *net = dev_net(skb->dev);
+ struct ipv6hdr *hdr = ipv6_hdr(skb);
+ int flags = RT6_LOOKUP_F_HAS_SADDR;
+ struct dst_entry *dst = NULL;
+ struct rt6_info *rt;
+ struct flowi6 fl6;
+
+ fl6.flowi6_iif = skb->dev->ifindex;
+ fl6.daddr = nhaddr ? *nhaddr : hdr->daddr;
+ fl6.saddr = hdr->saddr;
+ fl6.flowlabel = ip6_flowinfo(hdr);
+ fl6.flowi6_mark = skb->mark;
+ fl6.flowi6_proto = hdr->nexthdr;
+
+ if (nhaddr)
+ fl6.flowi6_flags = FLOWI_FLAG_KNOWN_NH;
+
+ if (!tbl_id) {
+ dst = ip6_route_input_lookup(net, skb->dev, &fl6, flags);
+ } else {
+ struct fib6_table *table;
+
+ table = fib6_get_table(net, tbl_id);
+ if (!table)
+ goto out;
+
+ rt = ip6_pol_route(net, table, 0, &fl6, flags);
+ dst = &rt->dst;
+ }
+
+ if (dst && dst->dev->flags & IFF_LOOPBACK && !dst->error) {
+ dst_release(dst);
+ dst = NULL;
+ }
+
+out:
+ if (!dst) {
+ rt = net->ipv6.ip6_blk_hole_entry;
+ dst = &rt->dst;
+ dst_hold(dst);
+ }
+
+ skb_dst_drop(skb);
+ skb_dst_set(skb, dst);
+}
+
/* regular endpoint function */
static int input_action_end(struct sk_buff *skb, struct seg6_local_lwt *slwt)
{
struct ipv6_sr_hdr *srh;
- struct in6_addr *addr;
srh = get_and_validate_srh(skb);
if (!srh)
goto drop;
- srh->segments_left--;
- addr = srh->segments + srh->segments_left;
-
- ipv6_hdr(skb)->daddr = *addr;
+ advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
- skb_dst_drop(skb);
- ip6_route_input(skb);
+ lookup_nexthop(skb, NULL, 0);
return dst_input(skb);
@@ -127,41 +210,34 @@ drop:
/* regular endpoint, and forward to specified nexthop */
static int input_action_end_x(struct sk_buff *skb, struct seg6_local_lwt *slwt)
{
- struct net *net = dev_net(skb->dev);
struct ipv6_sr_hdr *srh;
- struct dst_entry *dst;
- struct in6_addr *addr;
- struct ipv6hdr *hdr;
- struct flowi6 fl6;
- int flags;
srh = get_and_validate_srh(skb);
if (!srh)
goto drop;
- srh->segments_left--;
- addr = srh->segments + srh->segments_left;
+ advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
- hdr = ipv6_hdr(skb);
- hdr->daddr = *addr;
+ lookup_nexthop(skb, &slwt->nh6, 0);
- skb_dst_drop(skb);
+ return dst_input(skb);
- fl6.flowi6_iif = skb->dev->ifindex;
- fl6.daddr = slwt->nh6;
- fl6.saddr = hdr->saddr;
- fl6.flowlabel = ip6_flowinfo(hdr);
- fl6.flowi6_mark = skb->mark;
- fl6.flowi6_proto = hdr->nexthdr;
+drop:
+ kfree_skb(skb);
+ return -EINVAL;
+}
- flags = RT6_LOOKUP_F_HAS_SADDR | RT6_LOOKUP_F_IFACE |
- RT6_LOOKUP_F_REACHABLE;
+static int input_action_end_t(struct sk_buff *skb, struct seg6_local_lwt *slwt)
+{
+ struct ipv6_sr_hdr *srh;
- dst = ip6_route_input_lookup(net, skb->dev, &fl6, flags);
- if (dst->dev->flags & IFF_LOOPBACK)
+ srh = get_and_validate_srh(skb);
+ if (!srh)
goto drop;
- skb_dst_set(skb, dst);
+ advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
+
+ lookup_nexthop(skb, NULL, slwt->table);
return dst_input(skb);
@@ -170,45 +246,78 @@ drop:
return -EINVAL;
}
-/* decapsulate and forward to specified nexthop */
-static int input_action_end_dx6(struct sk_buff *skb,
+/* decapsulate and forward inner L2 frame on specified interface */
+static int input_action_end_dx2(struct sk_buff *skb,
struct seg6_local_lwt *slwt)
{
struct net *net = dev_net(skb->dev);
- struct ipv6hdr *inner_hdr;
- struct ipv6_sr_hdr *srh;
- struct dst_entry *dst;
- unsigned int off = 0;
- struct flowi6 fl6;
- bool use_nh;
- int flags;
+ struct net_device *odev;
+ struct ethhdr *eth;
- /* this function accepts IPv6 encapsulated packets, with either
- * an SRH with SL=0, or no SRH.
+ if (!decap_and_validate(skb, NEXTHDR_NONE))
+ goto drop;
+
+ if (!pskb_may_pull(skb, ETH_HLEN))
+ goto drop;
+
+ skb_reset_mac_header(skb);
+ eth = (struct ethhdr *)skb->data;
+
+ /* To determine the frame's protocol, we assume it is 802.3. This avoids
+ * a call to eth_type_trans(), which is not really relevant for our
+ * use case.
*/
+ if (!eth_proto_is_802_3(eth->h_proto))
+ goto drop;
- srh = get_srh(skb);
- if (srh && srh->segments_left > 0)
+ odev = dev_get_by_index_rcu(net, slwt->oif);
+ if (!odev)
goto drop;
-#ifdef CONFIG_IPV6_SEG6_HMAC
- if (srh && !seg6_hmac_validate_skb(skb))
+ /* As we accept Ethernet frames, make sure the egress device is of
+ * the correct type.
+ */
+ if (odev->type != ARPHRD_ETHER)
goto drop;
-#endif
- if (ipv6_find_hdr(skb, &off, IPPROTO_IPV6, NULL, NULL) < 0)
+ if (!(odev->flags & IFF_UP) || !netif_carrier_ok(odev))
goto drop;
- if (!pskb_pull(skb, off))
+ skb_orphan(skb);
+
+ if (skb_warn_if_lro(skb))
goto drop;
- skb_postpull_rcsum(skb, skb_network_header(skb), off);
+ skb_forward_csum(skb);
- skb_reset_network_header(skb);
- skb_reset_transport_header(skb);
- skb->encapsulation = 0;
+ if (skb->len - ETH_HLEN > odev->mtu)
+ goto drop;
- inner_hdr = ipv6_hdr(skb);
+ skb->dev = odev;
+ skb->protocol = eth->h_proto;
+
+ return dev_queue_xmit(skb);
+
+drop:
+ kfree_skb(skb);
+ return -EINVAL;
+}
+
+/* decapsulate and forward to specified nexthop */
+static int input_action_end_dx6(struct sk_buff *skb,
+ struct seg6_local_lwt *slwt)
+{
+ struct in6_addr *nhaddr = NULL;
+
+ /* this function accepts IPv6 encapsulated packets, with either
+ * an SRH with SL=0, or no SRH.
+ */
+
+ if (!decap_and_validate(skb, IPPROTO_IPV6))
+ goto drop;
+
+ if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
+ goto drop;
/* The inner packet is not associated to any local interface,
* so we do not call netif_rx().
@@ -217,28 +326,62 @@ static int input_action_end_dx6(struct sk_buff *skb,
* inner packet's DA. Otherwise, use the specified nexthop.
*/
- use_nh = !ipv6_addr_any(&slwt->nh6);
+ if (!ipv6_addr_any(&slwt->nh6))
+ nhaddr = &slwt->nh6;
+
+ lookup_nexthop(skb, nhaddr, 0);
+
+ return dst_input(skb);
+drop:
+ kfree_skb(skb);
+ return -EINVAL;
+}
+
+static int input_action_end_dx4(struct sk_buff *skb,
+ struct seg6_local_lwt *slwt)
+{
+ struct iphdr *iph;
+ __be32 nhaddr;
+ int err;
+
+ if (!decap_and_validate(skb, IPPROTO_IPIP))
+ goto drop;
+
+ if (!pskb_may_pull(skb, sizeof(struct iphdr)))
+ goto drop;
+
+ skb->protocol = htons(ETH_P_IP);
+
+ iph = ip_hdr(skb);
+
+ nhaddr = slwt->nh4.s_addr ?: iph->daddr;
skb_dst_drop(skb);
- fl6.flowi6_iif = skb->dev->ifindex;
- fl6.daddr = use_nh ? slwt->nh6 : inner_hdr->daddr;
- fl6.saddr = inner_hdr->saddr;
- fl6.flowlabel = ip6_flowinfo(inner_hdr);
- fl6.flowi6_mark = skb->mark;
- fl6.flowi6_proto = inner_hdr->nexthdr;
+ err = ip_route_input(skb, nhaddr, iph->saddr, 0, skb->dev);
+ if (err)
+ goto drop;
- flags = RT6_LOOKUP_F_HAS_SADDR | RT6_LOOKUP_F_REACHABLE;
- if (use_nh)
- flags |= RT6_LOOKUP_F_IFACE;
+ return dst_input(skb);
+
+drop:
+ kfree_skb(skb);
+ return -EINVAL;
+}
+
+static int input_action_end_dt6(struct sk_buff *skb,
+ struct seg6_local_lwt *slwt)
+{
+ if (!decap_and_validate(skb, IPPROTO_IPV6))
+ goto drop;
- dst = ip6_route_input_lookup(net, skb->dev, &fl6, flags);
- if (dst->dev->flags & IFF_LOOPBACK)
+ if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
goto drop;
- skb_dst_set(skb, dst);
+ lookup_nexthop(skb, NULL, slwt->table);
return dst_input(skb);
+
drop:
kfree_skb(skb);
return -EINVAL;
@@ -261,8 +404,7 @@ static int input_action_end_b6(struct sk_buff *skb, struct seg6_local_lwt *slwt)
ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
skb_set_transport_header(skb, sizeof(struct ipv6hdr));
- skb_dst_drop(skb);
- ip6_route_input(skb);
+ lookup_nexthop(skb, NULL, 0);
return dst_input(skb);
@@ -276,29 +418,25 @@ static int input_action_end_b6_encap(struct sk_buff *skb,
struct seg6_local_lwt *slwt)
{
struct ipv6_sr_hdr *srh;
- struct in6_addr *addr;
int err = -EINVAL;
srh = get_and_validate_srh(skb);
if (!srh)
goto drop;
- srh->segments_left--;
- addr = srh->segments + srh->segments_left;
- ipv6_hdr(skb)->daddr = *addr;
+ advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
skb_reset_inner_headers(skb);
skb->encapsulation = 1;
- err = seg6_do_srh_encap(skb, slwt->srh);
+ err = seg6_do_srh_encap(skb, slwt->srh, IPPROTO_IPV6);
if (err)
goto drop;
ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
skb_set_transport_header(skb, sizeof(struct ipv6hdr));
- skb_dst_drop(skb);
- ip6_route_input(skb);
+ lookup_nexthop(skb, NULL, 0);
return dst_input(skb);
@@ -319,11 +457,31 @@ static struct seg6_action_desc seg6_action_table[] = {
.input = input_action_end_x,
},
{
+ .action = SEG6_LOCAL_ACTION_END_T,
+ .attrs = (1 << SEG6_LOCAL_TABLE),
+ .input = input_action_end_t,
+ },
+ {
+ .action = SEG6_LOCAL_ACTION_END_DX2,
+ .attrs = (1 << SEG6_LOCAL_OIF),
+ .input = input_action_end_dx2,
+ },
+ {
.action = SEG6_LOCAL_ACTION_END_DX6,
.attrs = (1 << SEG6_LOCAL_NH6),
.input = input_action_end_dx6,
},
{
+ .action = SEG6_LOCAL_ACTION_END_DX4,
+ .attrs = (1 << SEG6_LOCAL_NH4),
+ .input = input_action_end_dx4,
+ },
+ {
+ .action = SEG6_LOCAL_ACTION_END_DT6,
+ .attrs = (1 << SEG6_LOCAL_TABLE),
+ .input = input_action_end_dt6,
+ },
+ {
.action = SEG6_LOCAL_ACTION_END_B6,
.attrs = (1 << SEG6_LOCAL_SRH),
.input = input_action_end_b6,
@@ -357,6 +515,11 @@ static int seg6_local_input(struct sk_buff *skb)
struct seg6_action_desc *desc;
struct seg6_local_lwt *slwt;
+ if (skb->protocol != htons(ETH_P_IPV6)) {
+ kfree_skb(skb);
+ return -EINVAL;
+ }
+
slwt = seg6_local_lwtunnel(orig_dst->lwtstate);
desc = slwt->desc;
@@ -623,6 +786,9 @@ static int seg6_local_build_state(struct nlattr *nla, unsigned int family,
struct seg6_local_lwt *slwt;
int err;
+ if (family != AF_INET6)
+ return -EINVAL;
+
err = nla_parse_nested(tb, SEG6_LOCAL_MAX, nla, seg6_local_policy,
extack);