From 744676e777207f4992ba4cc728a8a71352963c5b Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Sat, 15 Feb 2020 14:20:56 +0100 Subject: openvswitch: add TTL decrement action New action to decrement TTL instead of setting it to a fixed value. This action will decrement the TTL and, in case of expired TTL, drop it or execute an action passed via a nested attribute. The default TTL expired action is to drop the packet. Supports both IPv4 and IPv6 via the ttl and hop_limit fields, respectively. Tested with a corresponding change in the userspace: # ovs-dpctl dump-flows in_port(2),eth(),eth_type(0x0800), packets:0, bytes:0, used:never, actions:dec_ttl{ttl<=1 action:(drop)},1 in_port(1),eth(),eth_type(0x0800), packets:0, bytes:0, used:never, actions:dec_ttl{ttl<=1 action:(drop)},2 in_port(1),eth(),eth_type(0x0806), packets:0, bytes:0, used:never, actions:2 in_port(2),eth(),eth_type(0x0806), packets:0, bytes:0, used:never, actions:1 # ping -c1 192.168.0.2 -t 42 IP (tos 0x0, ttl 41, id 61647, offset 0, flags [DF], proto ICMP (1), length 84) 192.168.0.1 > 192.168.0.2: ICMP echo request, id 386, seq 1, length 64 # ping -c1 192.168.0.2 -t 120 IP (tos 0x0, ttl 119, id 62070, offset 0, flags [DF], proto ICMP (1), length 84) 192.168.0.1 > 192.168.0.2: ICMP echo request, id 388, seq 1, length 64 # ping -c1 192.168.0.2 -t 1 # Co-developed-by: Bindiya Kurle Signed-off-by: Bindiya Kurle Signed-off-by: Matteo Croce Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/actions.c | 67 ++++++++++++++++++++++++++++++++++++++++ net/openvswitch/flow_netlink.c | 70 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 137 insertions(+) (limited to 'net/openvswitch') diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 7fbfe2adfffa..fc0efd8833c8 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -964,6 +964,25 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb, return ovs_dp_upcall(dp, skb, key, &upcall, cutlen); } +static int dec_ttl_exception_handler(struct datapath *dp, struct sk_buff *skb, + struct sw_flow_key *key, + const struct nlattr *attr, bool last) +{ + /* The first action is always 'OVS_DEC_TTL_ATTR_ARG'. */ + struct nlattr *dec_ttl_arg = nla_data(attr); + int rem = nla_len(attr); + + if (nla_len(dec_ttl_arg)) { + struct nlattr *actions = nla_next(dec_ttl_arg, &rem); + + if (actions) + return clone_execute(dp, skb, key, 0, actions, rem, + last, false); + } + consume_skb(skb); + return 0; +} + /* When 'last' is true, sample() should always consume the 'skb'. * Otherwise, sample() should keep 'skb' intact regardless what * actions are executed within sample(). @@ -1180,6 +1199,45 @@ static int execute_check_pkt_len(struct datapath *dp, struct sk_buff *skb, nla_len(actions), last, clone_flow_key); } +static int execute_dec_ttl(struct sk_buff *skb, struct sw_flow_key *key) +{ + int err; + + if (skb->protocol == htons(ETH_P_IPV6)) { + struct ipv6hdr *nh; + + err = skb_ensure_writable(skb, skb_network_offset(skb) + + sizeof(*nh)); + if (unlikely(err)) + return err; + + nh = ipv6_hdr(skb); + + if (nh->hop_limit <= 1) + return -EHOSTUNREACH; + + key->ip.ttl = --nh->hop_limit; + } else { + struct iphdr *nh; + u8 old_ttl; + + err = skb_ensure_writable(skb, skb_network_offset(skb) + + sizeof(*nh)); + if (unlikely(err)) + return err; + + nh = ip_hdr(skb); + if (nh->ttl <= 1) + return -EHOSTUNREACH; + + old_ttl = nh->ttl--; + csum_replace2(&nh->check, htons(old_ttl << 8), + htons(nh->ttl << 8)); + key->ip.ttl = nh->ttl; + } + return 0; +} + /* Execute a list of actions against 'skb'. */ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, struct sw_flow_key *key, @@ -1365,6 +1423,15 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, break; } + + case OVS_ACTION_ATTR_DEC_TTL: + err = execute_dec_ttl(skb, key); + if (err == -EHOSTUNREACH) { + err = dec_ttl_exception_handler(dp, skb, key, + a, true); + return err; + } + break; } if (unlikely(err)) { diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 7da4230627f5..1ccd5e092bca 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -80,6 +80,7 @@ static bool actions_may_change_flow(const struct nlattr *actions) case OVS_ACTION_ATTR_METER: case OVS_ACTION_ATTR_CHECK_PKT_LEN: case OVS_ACTION_ATTR_ADD_MPLS: + case OVS_ACTION_ATTR_DEC_TTL: default: return true; } @@ -2495,6 +2496,39 @@ static int validate_and_copy_sample(struct net *net, const struct nlattr *attr, return 0; } +static int validate_and_copy_dec_ttl(struct net *net, + const struct nlattr *attr, + const struct sw_flow_key *key, + struct sw_flow_actions **sfa, + __be16 eth_type, __be16 vlan_tci, + u32 mpls_label_count, bool log) +{ + int start, err; + u32 nested = true; + + if (!nla_len(attr)) + return ovs_nla_add_action(sfa, OVS_ACTION_ATTR_DEC_TTL, + NULL, 0, log); + + start = add_nested_action_start(sfa, OVS_ACTION_ATTR_DEC_TTL, log); + if (start < 0) + return start; + + err = ovs_nla_add_action(sfa, OVS_DEC_TTL_ATTR_ACTION, &nested, + sizeof(nested), log); + + if (err) + return err; + + err = __ovs_nla_copy_actions(net, attr, key, sfa, eth_type, + vlan_tci, mpls_label_count, log); + if (err) + return err; + + add_nested_action_end(*sfa, start); + return 0; +} + static int validate_and_copy_clone(struct net *net, const struct nlattr *attr, const struct sw_flow_key *key, @@ -3007,6 +3041,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, [OVS_ACTION_ATTR_CLONE] = (u32)-1, [OVS_ACTION_ATTR_CHECK_PKT_LEN] = (u32)-1, [OVS_ACTION_ATTR_ADD_MPLS] = sizeof(struct ovs_action_add_mpls), + [OVS_ACTION_ATTR_DEC_TTL] = (u32)-1, }; const struct ovs_action_push_vlan *vlan; int type = nla_type(a); @@ -3267,6 +3302,15 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, break; } + case OVS_ACTION_ATTR_DEC_TTL: + err = validate_and_copy_dec_ttl(net, a, key, sfa, + eth_type, vlan_tci, + mpls_label_count, log); + if (err) + return err; + skip_copy = true; + break; + default: OVS_NLERR(log, "Unknown Action type %d", type); return -EINVAL; @@ -3438,6 +3482,26 @@ out: return err; } +static int dec_ttl_action_to_attr(const struct nlattr *attr, + struct sk_buff *skb) +{ + int err = 0, rem = nla_len(attr); + struct nlattr *start; + + start = nla_nest_start_noflag(skb, OVS_ACTION_ATTR_DEC_TTL); + + if (!start) + return -EMSGSIZE; + + err = ovs_nla_put_actions(nla_data(attr), rem, skb); + if (err) + nla_nest_cancel(skb, start); + else + nla_nest_end(skb, start); + + return err; +} + static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb) { const struct nlattr *ovs_key = nla_data(a); @@ -3538,6 +3602,12 @@ int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb) return err; break; + case OVS_ACTION_ATTR_DEC_TTL: + err = dec_ttl_action_to_attr(a, skb); + if (err) + return err; + break; + default: if (nla_put(skb, type, nla_len(a), nla_data(a))) return -EMSGSIZE; -- cgit v1.2.3 From a08e7fd9123d85dfdf8d1dc61dbe321c8359d25f Mon Sep 17 00:00:00 2001 From: Cambda Zhu Date: Thu, 26 Mar 2020 15:33:14 +0800 Subject: net: Fix typo of SKB_SGO_CB_OFFSET The SKB_SGO_CB_OFFSET should be SKB_GSO_CB_OFFSET which means the offset of the GSO in skb cb. This patch fixes the typo. Fixes: 9207f9d45b0a ("net: preserve IP control block during GSO segmentation") Signed-off-by: Cambda Zhu Signed-off-by: David S. Miller --- include/linux/skbuff.h | 4 ++-- net/core/dev.c | 4 ++-- net/ipv4/ip_output.c | 2 +- net/ipv4/udp.c | 2 +- net/openvswitch/datapath.c | 2 +- net/xfrm/xfrm_output.c | 4 ++-- 6 files changed, 9 insertions(+), 9 deletions(-) (limited to 'net/openvswitch') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index e47895082b4b..28b1a2b4459e 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -4389,8 +4389,8 @@ struct skb_gso_cb { __wsum csum; __u16 csum_start; }; -#define SKB_SGO_CB_OFFSET 32 -#define SKB_GSO_CB(skb) ((struct skb_gso_cb *)((skb)->cb + SKB_SGO_CB_OFFSET)) +#define SKB_GSO_CB_OFFSET 32 +#define SKB_GSO_CB(skb) ((struct skb_gso_cb *)((skb)->cb + SKB_GSO_CB_OFFSET)) static inline int skb_tnl_header_len(const struct sk_buff *inner_skb) { diff --git a/net/core/dev.c b/net/core/dev.c index d760dcc47978..dee392f21466 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -3266,7 +3266,7 @@ static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path) * It may return NULL if the skb requires no segmentation. This is * only possible when GSO is used for verifying header integrity. * - * Segmentation preserves SKB_SGO_CB_OFFSET bytes of previous skb cb. + * Segmentation preserves SKB_GSO_CB_OFFSET bytes of previous skb cb. */ struct sk_buff *__skb_gso_segment(struct sk_buff *skb, netdev_features_t features, bool tx_path) @@ -3295,7 +3295,7 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb, features &= ~NETIF_F_GSO_PARTIAL; } - BUILD_BUG_ON(SKB_SGO_CB_OFFSET + + BUILD_BUG_ON(SKB_GSO_CB_OFFSET + sizeof(*SKB_GSO_CB(skb)) > sizeof(skb->cb)); SKB_GSO_CB(skb)->mac_offset = skb_headroom(skb); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index aaaaf907e0d8..090d3097ee15 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -263,7 +263,7 @@ static int ip_finish_output_gso(struct net *net, struct sock *sk, * insufficent MTU. */ features = netif_skb_features(skb); - BUILD_BUG_ON(sizeof(*IPCB(skb)) > SKB_SGO_CB_OFFSET); + BUILD_BUG_ON(sizeof(*IPCB(skb)) > SKB_GSO_CB_OFFSET); segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK); if (IS_ERR_OR_NULL(segs)) { kfree_skb(skb); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 2633fc231593..7ea90eb4a1ba 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2109,7 +2109,7 @@ static int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) if (likely(!udp_unexpected_gso(sk, skb))) return udp_queue_rcv_one_skb(sk, skb); - BUILD_BUG_ON(sizeof(struct udp_skb_cb) > SKB_SGO_CB_OFFSET); + BUILD_BUG_ON(sizeof(struct udp_skb_cb) > SKB_GSO_CB_OFFSET); __skb_push(skb, -skb_mac_offset(skb)); segs = udp_rcv_segment(sk, skb, true); skb_list_walk_safe(segs, skb, next) { diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 07a7dd185995..d8ae541d22a8 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -305,7 +305,7 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb, struct sk_buff *segs, *nskb; int err; - BUILD_BUG_ON(sizeof(*OVS_CB(skb)) > SKB_SGO_CB_OFFSET); + BUILD_BUG_ON(sizeof(*OVS_CB(skb)) > SKB_GSO_CB_OFFSET); segs = __skb_gso_segment(skb, NETIF_F_SG, false); if (IS_ERR(segs)) return PTR_ERR(segs); diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index fafc7aba705f..2fd3d990d992 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -535,8 +535,8 @@ static int xfrm_output_gso(struct net *net, struct sock *sk, struct sk_buff *skb { struct sk_buff *segs, *nskb; - BUILD_BUG_ON(sizeof(*IPCB(skb)) > SKB_SGO_CB_OFFSET); - BUILD_BUG_ON(sizeof(*IP6CB(skb)) > SKB_SGO_CB_OFFSET); + BUILD_BUG_ON(sizeof(*IPCB(skb)) > SKB_GSO_CB_OFFSET); + BUILD_BUG_ON(sizeof(*IP6CB(skb)) > SKB_GSO_CB_OFFSET); segs = skb_gso_segment(skb, 0); kfree_skb(skb); if (IS_ERR(segs)) -- cgit v1.2.3