From 2e68ea92684181412b73979baf1af7d04619c52c Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 17 May 2021 21:15:17 +0300 Subject: ipv4: Calculate multipath hash inside switch statement A subsequent patch will add another multipath hash policy where the multipath hash is calculated directly by the policy specific code and not outside of the switch statement. Prepare for this change by moving the multipath hash calculation inside the switch statement. No functional changes intended. Signed-off-by: Ido Schimmel Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/ipv4/route.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net/ipv4') diff --git a/net/ipv4/route.c b/net/ipv4/route.c index f6787c55f6ab..9d61e969446e 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1912,7 +1912,7 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4, { u32 multipath_hash = fl4 ? fl4->flowi4_multipath_hash : 0; struct flow_keys hash_keys; - u32 mhash; + u32 mhash = 0; switch (net->ipv4.sysctl_fib_multipath_hash_policy) { case 0: @@ -1924,6 +1924,7 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4, hash_keys.addrs.v4addrs.src = fl4->saddr; hash_keys.addrs.v4addrs.dst = fl4->daddr; } + mhash = flow_hash_from_keys(&hash_keys); break; case 1: /* skb is currently provided only when forwarding */ @@ -1957,6 +1958,7 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4, hash_keys.ports.dst = fl4->fl4_dport; hash_keys.basic.ip_proto = fl4->flowi4_proto; } + mhash = flow_hash_from_keys(&hash_keys); break; case 2: memset(&hash_keys, 0, sizeof(hash_keys)); @@ -1987,9 +1989,9 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4, hash_keys.addrs.v4addrs.src = fl4->saddr; hash_keys.addrs.v4addrs.dst = fl4->daddr; } + mhash = flow_hash_from_keys(&hash_keys); break; } - mhash = flow_hash_from_keys(&hash_keys); if (multipath_hash) mhash = jhash_2words(mhash, multipath_hash, 0); -- cgit v1.2.3 From ce5c9c20d364f156c885efed8c71fca2945db00f Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 17 May 2021 21:15:18 +0300 Subject: ipv4: Add a sysctl to control multipath hash fields A subsequent patch will add a new multipath hash policy where the packet fields used for multipath hash calculation are determined by user space. This patch adds a sysctl that allows user space to set these fields. The packet fields are represented using a bitmask and are common between IPv4 and IPv6 to allow user space to use the same numbering across both protocols. For example, to hash based on standard 5-tuple: # sysctl -w net.ipv4.fib_multipath_hash_fields=0x0037 net.ipv4.fib_multipath_hash_fields = 0x0037 The kernel rejects unknown fields, for example: # sysctl -w net.ipv4.fib_multipath_hash_fields=0x1000 sysctl: setting key "net.ipv4.fib_multipath_hash_fields": Invalid argument More fields can be added in the future, if needed. Signed-off-by: Ido Schimmel Reviewed-by: David Ahern Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.rst | 27 +++++++++++++++++++++ include/net/ip_fib.h | 43 ++++++++++++++++++++++++++++++++++ include/net/netns/ipv4.h | 1 + net/ipv4/fib_frontend.c | 6 +++++ net/ipv4/sysctl_net_ipv4.c | 12 ++++++++++ 5 files changed, 89 insertions(+) (limited to 'net/ipv4') diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index c2ecc9894fd0..47494798d03b 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -100,6 +100,33 @@ fib_multipath_hash_policy - INTEGER - 1 - Layer 4 - 2 - Layer 3 or inner Layer 3 if present +fib_multipath_hash_fields - UNSIGNED INTEGER + When fib_multipath_hash_policy is set to 3 (custom multipath hash), the + fields used for multipath hash calculation are determined by this + sysctl. + + This value is a bitmask which enables various fields for multipath hash + calculation. + + Possible fields are: + + ====== ============================ + 0x0001 Source IP address + 0x0002 Destination IP address + 0x0004 IP protocol + 0x0008 Unused (Flow Label) + 0x0010 Source port + 0x0020 Destination port + 0x0040 Inner source IP address + 0x0080 Inner destination IP address + 0x0100 Inner IP protocol + 0x0200 Inner Flow Label + 0x0400 Inner source port + 0x0800 Inner destination port + ====== ============================ + + Default: 0x0007 (source IP, destination IP and IP protocol) + fib_sync_mem - UNSIGNED INTEGER Amount of dirty memory from fib entries that can be backlogged before synchronize_rcu is forced. diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index a914f33f3ed5..3ab2563b1a23 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -466,6 +466,49 @@ int fib_sync_up(struct net_device *dev, unsigned char nh_flags); void fib_sync_mtu(struct net_device *dev, u32 orig_mtu); void fib_nhc_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig); +/* Fields used for sysctl_fib_multipath_hash_fields. + * Common to IPv4 and IPv6. + * + * Add new fields at the end. This is user API. + */ +#define FIB_MULTIPATH_HASH_FIELD_SRC_IP BIT(0) +#define FIB_MULTIPATH_HASH_FIELD_DST_IP BIT(1) +#define FIB_MULTIPATH_HASH_FIELD_IP_PROTO BIT(2) +#define FIB_MULTIPATH_HASH_FIELD_FLOWLABEL BIT(3) +#define FIB_MULTIPATH_HASH_FIELD_SRC_PORT BIT(4) +#define FIB_MULTIPATH_HASH_FIELD_DST_PORT BIT(5) +#define FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP BIT(6) +#define FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP BIT(7) +#define FIB_MULTIPATH_HASH_FIELD_INNER_IP_PROTO BIT(8) +#define FIB_MULTIPATH_HASH_FIELD_INNER_FLOWLABEL BIT(9) +#define FIB_MULTIPATH_HASH_FIELD_INNER_SRC_PORT BIT(10) +#define FIB_MULTIPATH_HASH_FIELD_INNER_DST_PORT BIT(11) + +#define FIB_MULTIPATH_HASH_FIELD_OUTER_MASK \ + (FIB_MULTIPATH_HASH_FIELD_SRC_IP | \ + FIB_MULTIPATH_HASH_FIELD_DST_IP | \ + FIB_MULTIPATH_HASH_FIELD_IP_PROTO | \ + FIB_MULTIPATH_HASH_FIELD_FLOWLABEL | \ + FIB_MULTIPATH_HASH_FIELD_SRC_PORT | \ + FIB_MULTIPATH_HASH_FIELD_DST_PORT) + +#define FIB_MULTIPATH_HASH_FIELD_INNER_MASK \ + (FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP | \ + FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP | \ + FIB_MULTIPATH_HASH_FIELD_INNER_IP_PROTO | \ + FIB_MULTIPATH_HASH_FIELD_INNER_FLOWLABEL | \ + FIB_MULTIPATH_HASH_FIELD_INNER_SRC_PORT | \ + FIB_MULTIPATH_HASH_FIELD_INNER_DST_PORT) + +#define FIB_MULTIPATH_HASH_FIELD_ALL_MASK \ + (FIB_MULTIPATH_HASH_FIELD_OUTER_MASK | \ + FIB_MULTIPATH_HASH_FIELD_INNER_MASK) + +#define FIB_MULTIPATH_HASH_FIELD_DEFAULT_MASK \ + (FIB_MULTIPATH_HASH_FIELD_SRC_IP | \ + FIB_MULTIPATH_HASH_FIELD_DST_IP | \ + FIB_MULTIPATH_HASH_FIELD_IP_PROTO) + #ifdef CONFIG_IP_ROUTE_MULTIPATH int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4, const struct sk_buff *skb, struct flow_keys *flkeys); diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index f6af8d96d3c6..746c80cd4257 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -210,6 +210,7 @@ struct netns_ipv4 { #endif #endif #ifdef CONFIG_IP_ROUTE_MULTIPATH + u32 sysctl_fib_multipath_hash_fields; u8 sysctl_fib_multipath_use_neigh; u8 sysctl_fib_multipath_hash_policy; #endif diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index bfb345c88271..af8814a11378 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1514,6 +1514,12 @@ static int __net_init ip_fib_net_init(struct net *net) if (err) return err; +#ifdef CONFIG_IP_ROUTE_MULTIPATH + /* Default to 3-tuple */ + net->ipv4.sysctl_fib_multipath_hash_fields = + FIB_MULTIPATH_HASH_FIELD_DEFAULT_MASK; +#endif + /* Avoid false sharing : Use at least a full cache line */ size = max_t(size_t, size, L1_CACHE_BYTES); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index a62934b9f15a..45bab3733621 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -48,6 +49,8 @@ static int ip_ping_group_range_min[] = { 0, 0 }; static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX }; static u32 u32_max_div_HZ = UINT_MAX / HZ; static int one_day_secs = 24 * 3600; +static u32 fib_multipath_hash_fields_all_mask __maybe_unused = + FIB_MULTIPATH_HASH_FIELD_ALL_MASK; /* obsolete */ static int sysctl_tcp_low_latency __read_mostly; @@ -1052,6 +1055,15 @@ static struct ctl_table ipv4_net_table[] = { .extra1 = SYSCTL_ZERO, .extra2 = &two, }, + { + .procname = "fib_multipath_hash_fields", + .data = &init_net.ipv4.sysctl_fib_multipath_hash_fields, + .maxlen = sizeof(u32), + .mode = 0644, + .proc_handler = proc_douintvec_minmax, + .extra1 = SYSCTL_ONE, + .extra2 = &fib_multipath_hash_fields_all_mask, + }, #endif { .procname = "ip_unprivileged_port_start", -- cgit v1.2.3 From 4253b4986f98da4bfcb6a24d3fc6ff19f28e8420 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 17 May 2021 21:15:19 +0300 Subject: ipv4: Add custom multipath hash policy Add a new multipath hash policy where the packet fields used for hash calculation are determined by user space via the fib_multipath_hash_fields sysctl that was introduced in the previous patch. The current set of available packet fields includes both outer and inner fields, which requires two invocations of the flow dissector. Avoid unnecessary dissection of the outer or inner flows by skipping dissection if none of the outer or inner fields are required. In accordance with the existing policies, when an skb is not available, packet fields are extracted from the provided flow key. In which case, only outer fields are considered. Signed-off-by: Ido Schimmel Reviewed-by: David Ahern Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.rst | 2 + net/ipv4/route.c | 121 +++++++++++++++++++++++++++++++++ net/ipv4/sysctl_net_ipv4.c | 3 +- 3 files changed, 125 insertions(+), 1 deletion(-) (limited to 'net/ipv4') diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index 47494798d03b..afdcdc0691d6 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -99,6 +99,8 @@ fib_multipath_hash_policy - INTEGER - 0 - Layer 3 - 1 - Layer 4 - 2 - Layer 3 or inner Layer 3 if present + - 3 - Custom multipath hash. Fields used for multipath hash calculation + are determined by fib_multipath_hash_fields sysctl fib_multipath_hash_fields - UNSIGNED INTEGER When fib_multipath_hash_policy is set to 3 (custom multipath hash), the diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 9d61e969446e..a4c477475f4c 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1906,6 +1906,121 @@ out: hash_keys->addrs.v4addrs.dst = key_iph->daddr; } +static u32 fib_multipath_custom_hash_outer(const struct net *net, + const struct sk_buff *skb, + bool *p_has_inner) +{ + u32 hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields; + struct flow_keys keys, hash_keys; + + if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK)) + return 0; + + memset(&hash_keys, 0, sizeof(hash_keys)); + skb_flow_dissect_flow_keys(skb, &keys, FLOW_DISSECTOR_F_STOP_AT_ENCAP); + + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP) + hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP) + hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO) + hash_keys.basic.ip_proto = keys.basic.ip_proto; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT) + hash_keys.ports.src = keys.ports.src; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT) + hash_keys.ports.dst = keys.ports.dst; + + *p_has_inner = !!(keys.control.flags & FLOW_DIS_ENCAPSULATION); + return flow_hash_from_keys(&hash_keys); +} + +static u32 fib_multipath_custom_hash_inner(const struct net *net, + const struct sk_buff *skb, + bool has_inner) +{ + u32 hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields; + struct flow_keys keys, hash_keys; + + /* We assume the packet carries an encapsulation, but if none was + * encountered during dissection of the outer flow, then there is no + * point in calling the flow dissector again. + */ + if (!has_inner) + return 0; + + if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_MASK)) + return 0; + + memset(&hash_keys, 0, sizeof(hash_keys)); + skb_flow_dissect_flow_keys(skb, &keys, 0); + + if (!(keys.control.flags & FLOW_DIS_ENCAPSULATION)) + return 0; + + if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP) + hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP) + hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst; + } else if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP) + hash_keys.addrs.v6addrs.src = keys.addrs.v6addrs.src; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP) + hash_keys.addrs.v6addrs.dst = keys.addrs.v6addrs.dst; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_FLOWLABEL) + hash_keys.tags.flow_label = keys.tags.flow_label; + } + + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_IP_PROTO) + hash_keys.basic.ip_proto = keys.basic.ip_proto; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_PORT) + hash_keys.ports.src = keys.ports.src; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_PORT) + hash_keys.ports.dst = keys.ports.dst; + + return flow_hash_from_keys(&hash_keys); +} + +static u32 fib_multipath_custom_hash_skb(const struct net *net, + const struct sk_buff *skb) +{ + u32 mhash, mhash_inner; + bool has_inner = true; + + mhash = fib_multipath_custom_hash_outer(net, skb, &has_inner); + mhash_inner = fib_multipath_custom_hash_inner(net, skb, has_inner); + + return jhash_2words(mhash, mhash_inner, 0); +} + +static u32 fib_multipath_custom_hash_fl4(const struct net *net, + const struct flowi4 *fl4) +{ + u32 hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields; + struct flow_keys hash_keys; + + if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK)) + return 0; + + memset(&hash_keys, 0, sizeof(hash_keys)); + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP) + hash_keys.addrs.v4addrs.src = fl4->saddr; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP) + hash_keys.addrs.v4addrs.dst = fl4->daddr; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO) + hash_keys.basic.ip_proto = fl4->flowi4_proto; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT) + hash_keys.ports.src = fl4->fl4_sport; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT) + hash_keys.ports.dst = fl4->fl4_dport; + + return flow_hash_from_keys(&hash_keys); +} + /* if skb is set it will be used and fl4 can be NULL */ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4, const struct sk_buff *skb, struct flow_keys *flkeys) @@ -1991,6 +2106,12 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4, } mhash = flow_hash_from_keys(&hash_keys); break; + case 3: + if (skb) + mhash = fib_multipath_custom_hash_skb(net, skb); + else + mhash = fib_multipath_custom_hash_fl4(net, fl4); + break; } if (multipath_hash) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 45bab3733621..ffb38ea06841 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -30,6 +30,7 @@ #include static int two = 2; +static int three __maybe_unused = 3; static int four = 4; static int thousand = 1000; static int tcp_retr1_max = 255; @@ -1053,7 +1054,7 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_fib_multipath_hash_policy, .extra1 = SYSCTL_ZERO, - .extra2 = &two, + .extra2 = &three, }, { .procname = "fib_multipath_hash_fields", -- cgit v1.2.3