From 2cc8c910f51594dde79764a52b2974ddc8f70509 Mon Sep 17 00:00:00 2001 From: Tobias Schramm Date: Fri, 14 May 2021 13:38:11 +0200 Subject: dt-bindings: net: rockchip-dwmac: add rk3308 gmac compatible The Rockchip RK3308 has a gmac that is not fully compatible with any of the other Rockchip gmacs. This patch adds a compatible string for it. Signed-off-by: Tobias Schramm Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/rockchip-dwmac.yaml | 2 ++ 1 file changed, 2 insertions(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/rockchip-dwmac.yaml b/Documentation/devicetree/bindings/net/rockchip-dwmac.yaml index 5acddb6171bf..34a660ad6b30 100644 --- a/Documentation/devicetree/bindings/net/rockchip-dwmac.yaml +++ b/Documentation/devicetree/bindings/net/rockchip-dwmac.yaml @@ -19,6 +19,7 @@ select: - rockchip,rk3128-gmac - rockchip,rk3228-gmac - rockchip,rk3288-gmac + - rockchip,rk3308-gmac - rockchip,rk3328-gmac - rockchip,rk3366-gmac - rockchip,rk3368-gmac @@ -38,6 +39,7 @@ properties: - rockchip,rk3128-gmac - rockchip,rk3228-gmac - rockchip,rk3288-gmac + - rockchip,rk3308-gmac - rockchip,rk3328-gmac - rockchip,rk3366-gmac - rockchip,rk3368-gmac -- cgit v1.2.3 From 227a9ffc1bc77037339530607fe129af3824620e Mon Sep 17 00:00:00 2001 From: Ansuel Smith Date: Fri, 14 May 2021 23:00:00 +0200 Subject: devicetree: net: dsa: qca8k: Document new compatible qca8327 Add support for qca8327 in the compatible list. Signed-off-by: Ansuel Smith Reviewed-by: Andrew Lunn Acked-by: Rob Herring Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/dsa/qca8k.txt | 1 + 1 file changed, 1 insertion(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/dsa/qca8k.txt b/Documentation/devicetree/bindings/net/dsa/qca8k.txt index ccbc6d89325d..1daf68e7ae19 100644 --- a/Documentation/devicetree/bindings/net/dsa/qca8k.txt +++ b/Documentation/devicetree/bindings/net/dsa/qca8k.txt @@ -3,6 +3,7 @@ Required properties: - compatible: should be one of: + "qca,qca8327" "qca,qca8334" "qca,qca8337" -- cgit v1.2.3 From 0c994a28e7518f098c84a3049cb2915780db873a Mon Sep 17 00:00:00 2001 From: Ansuel Smith Date: Fri, 14 May 2021 23:00:11 +0200 Subject: devicetree: bindings: dsa: qca8k: Document internal mdio definition Document new way of declare mapping of internal PHY to port. The new implementation directly declare the PHY connected to the port by adding a node in the switch node. The driver detect this and register an internal mdiobus using the mapping defined in the mdio node. Signed-off-by: Ansuel Smith Reviewed-by: Rob Herring Signed-off-by: David S. Miller --- .../devicetree/bindings/net/dsa/qca8k.txt | 39 ++++++++++++++++++++++ 1 file changed, 39 insertions(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/dsa/qca8k.txt b/Documentation/devicetree/bindings/net/dsa/qca8k.txt index 1daf68e7ae19..8c73f67c43ca 100644 --- a/Documentation/devicetree/bindings/net/dsa/qca8k.txt +++ b/Documentation/devicetree/bindings/net/dsa/qca8k.txt @@ -21,6 +21,10 @@ described in dsa/dsa.txt. If the QCA8K switch is connect to a SoC's external mdio-bus each subnode describing a port needs to have a valid phandle referencing the internal PHY it is connected to. This is because there's no N:N mapping of port and PHY id. +To declare the internal mdio-bus configuration, declare a mdio node in the +switch node and declare the phandle for the port referencing the internal +PHY is connected to. In this config a internal mdio-bus is registered and +the mdio MASTER is used as communication. Don't use mixed external and internal mdio-bus configurations, as this is not supported by the hardware. @@ -150,26 +154,61 @@ for the internal master mdio-bus configuration: port@1 { reg = <1>; label = "lan1"; + phy-mode = "internal"; + phy-handle = <&phy_port1>; }; port@2 { reg = <2>; label = "lan2"; + phy-mode = "internal"; + phy-handle = <&phy_port2>; }; port@3 { reg = <3>; label = "lan3"; + phy-mode = "internal"; + phy-handle = <&phy_port3>; }; port@4 { reg = <4>; label = "lan4"; + phy-mode = "internal"; + phy-handle = <&phy_port4>; }; port@5 { reg = <5>; label = "wan"; + phy-mode = "internal"; + phy-handle = <&phy_port5>; + }; + }; + + mdio { + #address-cells = <1>; + #size-cells = <0>; + + phy_port1: phy@0 { + reg = <0>; + }; + + phy_port2: phy@1 { + reg = <1>; + }; + + phy_port3: phy@2 { + reg = <2>; + }; + + phy_port4: phy@3 { + reg = <3>; + }; + + phy_port5: phy@4 { + reg = <4>; }; }; }; -- cgit v1.2.3 From f9da1c9d7fb5e26272a060089c19823f748aab73 Mon Sep 17 00:00:00 2001 From: Ezequiel Garcia Date: Mon, 17 May 2021 12:40:36 -0300 Subject: dt-bindings: net: rockchip-dwmac: add rk3568 compatible string Add compatible string for RK3568 gmac, and constrain it to be compatible with Synopsys dwmac 4.20a. Signed-off-by: Ezequiel Garcia Signed-off-by: David S. Miller --- .../devicetree/bindings/net/rockchip-dwmac.yaml | 30 +++++++++++++--------- 1 file changed, 18 insertions(+), 12 deletions(-) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/rockchip-dwmac.yaml b/Documentation/devicetree/bindings/net/rockchip-dwmac.yaml index 34a660ad6b30..083623c8d718 100644 --- a/Documentation/devicetree/bindings/net/rockchip-dwmac.yaml +++ b/Documentation/devicetree/bindings/net/rockchip-dwmac.yaml @@ -24,6 +24,7 @@ select: - rockchip,rk3366-gmac - rockchip,rk3368-gmac - rockchip,rk3399-gmac + - rockchip,rk3568-gmac - rockchip,rv1108-gmac required: - compatible @@ -33,18 +34,23 @@ allOf: properties: compatible: - items: - - enum: - - rockchip,px30-gmac - - rockchip,rk3128-gmac - - rockchip,rk3228-gmac - - rockchip,rk3288-gmac - - rockchip,rk3308-gmac - - rockchip,rk3328-gmac - - rockchip,rk3366-gmac - - rockchip,rk3368-gmac - - rockchip,rk3399-gmac - - rockchip,rv1108-gmac + oneOf: + - items: + - enum: + - rockchip,px30-gmac + - rockchip,rk3128-gmac + - rockchip,rk3228-gmac + - rockchip,rk3288-gmac + - rockchip,rk3308-gmac + - rockchip,rk3328-gmac + - rockchip,rk3366-gmac + - rockchip,rk3368-gmac + - rockchip,rk3399-gmac + - rockchip,rv1108-gmac + - items: + - enum: + - rockchip,rk3568-gmac + - const: snps,dwmac-4.20a clocks: minItems: 5 -- cgit v1.2.3 From ce5c9c20d364f156c885efed8c71fca2945db00f Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 17 May 2021 21:15:18 +0300 Subject: ipv4: Add a sysctl to control multipath hash fields A subsequent patch will add a new multipath hash policy where the packet fields used for multipath hash calculation are determined by user space. This patch adds a sysctl that allows user space to set these fields. The packet fields are represented using a bitmask and are common between IPv4 and IPv6 to allow user space to use the same numbering across both protocols. For example, to hash based on standard 5-tuple: # sysctl -w net.ipv4.fib_multipath_hash_fields=0x0037 net.ipv4.fib_multipath_hash_fields = 0x0037 The kernel rejects unknown fields, for example: # sysctl -w net.ipv4.fib_multipath_hash_fields=0x1000 sysctl: setting key "net.ipv4.fib_multipath_hash_fields": Invalid argument More fields can be added in the future, if needed. Signed-off-by: Ido Schimmel Reviewed-by: David Ahern Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.rst | 27 +++++++++++++++++++++ include/net/ip_fib.h | 43 ++++++++++++++++++++++++++++++++++ include/net/netns/ipv4.h | 1 + net/ipv4/fib_frontend.c | 6 +++++ net/ipv4/sysctl_net_ipv4.c | 12 ++++++++++ 5 files changed, 89 insertions(+) (limited to 'Documentation') diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index c2ecc9894fd0..47494798d03b 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -100,6 +100,33 @@ fib_multipath_hash_policy - INTEGER - 1 - Layer 4 - 2 - Layer 3 or inner Layer 3 if present +fib_multipath_hash_fields - UNSIGNED INTEGER + When fib_multipath_hash_policy is set to 3 (custom multipath hash), the + fields used for multipath hash calculation are determined by this + sysctl. + + This value is a bitmask which enables various fields for multipath hash + calculation. + + Possible fields are: + + ====== ============================ + 0x0001 Source IP address + 0x0002 Destination IP address + 0x0004 IP protocol + 0x0008 Unused (Flow Label) + 0x0010 Source port + 0x0020 Destination port + 0x0040 Inner source IP address + 0x0080 Inner destination IP address + 0x0100 Inner IP protocol + 0x0200 Inner Flow Label + 0x0400 Inner source port + 0x0800 Inner destination port + ====== ============================ + + Default: 0x0007 (source IP, destination IP and IP protocol) + fib_sync_mem - UNSIGNED INTEGER Amount of dirty memory from fib entries that can be backlogged before synchronize_rcu is forced. diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index a914f33f3ed5..3ab2563b1a23 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -466,6 +466,49 @@ int fib_sync_up(struct net_device *dev, unsigned char nh_flags); void fib_sync_mtu(struct net_device *dev, u32 orig_mtu); void fib_nhc_update_mtu(struct fib_nh_common *nhc, u32 new, u32 orig); +/* Fields used for sysctl_fib_multipath_hash_fields. + * Common to IPv4 and IPv6. + * + * Add new fields at the end. This is user API. + */ +#define FIB_MULTIPATH_HASH_FIELD_SRC_IP BIT(0) +#define FIB_MULTIPATH_HASH_FIELD_DST_IP BIT(1) +#define FIB_MULTIPATH_HASH_FIELD_IP_PROTO BIT(2) +#define FIB_MULTIPATH_HASH_FIELD_FLOWLABEL BIT(3) +#define FIB_MULTIPATH_HASH_FIELD_SRC_PORT BIT(4) +#define FIB_MULTIPATH_HASH_FIELD_DST_PORT BIT(5) +#define FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP BIT(6) +#define FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP BIT(7) +#define FIB_MULTIPATH_HASH_FIELD_INNER_IP_PROTO BIT(8) +#define FIB_MULTIPATH_HASH_FIELD_INNER_FLOWLABEL BIT(9) +#define FIB_MULTIPATH_HASH_FIELD_INNER_SRC_PORT BIT(10) +#define FIB_MULTIPATH_HASH_FIELD_INNER_DST_PORT BIT(11) + +#define FIB_MULTIPATH_HASH_FIELD_OUTER_MASK \ + (FIB_MULTIPATH_HASH_FIELD_SRC_IP | \ + FIB_MULTIPATH_HASH_FIELD_DST_IP | \ + FIB_MULTIPATH_HASH_FIELD_IP_PROTO | \ + FIB_MULTIPATH_HASH_FIELD_FLOWLABEL | \ + FIB_MULTIPATH_HASH_FIELD_SRC_PORT | \ + FIB_MULTIPATH_HASH_FIELD_DST_PORT) + +#define FIB_MULTIPATH_HASH_FIELD_INNER_MASK \ + (FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP | \ + FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP | \ + FIB_MULTIPATH_HASH_FIELD_INNER_IP_PROTO | \ + FIB_MULTIPATH_HASH_FIELD_INNER_FLOWLABEL | \ + FIB_MULTIPATH_HASH_FIELD_INNER_SRC_PORT | \ + FIB_MULTIPATH_HASH_FIELD_INNER_DST_PORT) + +#define FIB_MULTIPATH_HASH_FIELD_ALL_MASK \ + (FIB_MULTIPATH_HASH_FIELD_OUTER_MASK | \ + FIB_MULTIPATH_HASH_FIELD_INNER_MASK) + +#define FIB_MULTIPATH_HASH_FIELD_DEFAULT_MASK \ + (FIB_MULTIPATH_HASH_FIELD_SRC_IP | \ + FIB_MULTIPATH_HASH_FIELD_DST_IP | \ + FIB_MULTIPATH_HASH_FIELD_IP_PROTO) + #ifdef CONFIG_IP_ROUTE_MULTIPATH int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4, const struct sk_buff *skb, struct flow_keys *flkeys); diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index f6af8d96d3c6..746c80cd4257 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -210,6 +210,7 @@ struct netns_ipv4 { #endif #endif #ifdef CONFIG_IP_ROUTE_MULTIPATH + u32 sysctl_fib_multipath_hash_fields; u8 sysctl_fib_multipath_use_neigh; u8 sysctl_fib_multipath_hash_policy; #endif diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index bfb345c88271..af8814a11378 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1514,6 +1514,12 @@ static int __net_init ip_fib_net_init(struct net *net) if (err) return err; +#ifdef CONFIG_IP_ROUTE_MULTIPATH + /* Default to 3-tuple */ + net->ipv4.sysctl_fib_multipath_hash_fields = + FIB_MULTIPATH_HASH_FIELD_DEFAULT_MASK; +#endif + /* Avoid false sharing : Use at least a full cache line */ size = max_t(size_t, size, L1_CACHE_BYTES); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index a62934b9f15a..45bab3733621 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -48,6 +49,8 @@ static int ip_ping_group_range_min[] = { 0, 0 }; static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX }; static u32 u32_max_div_HZ = UINT_MAX / HZ; static int one_day_secs = 24 * 3600; +static u32 fib_multipath_hash_fields_all_mask __maybe_unused = + FIB_MULTIPATH_HASH_FIELD_ALL_MASK; /* obsolete */ static int sysctl_tcp_low_latency __read_mostly; @@ -1052,6 +1055,15 @@ static struct ctl_table ipv4_net_table[] = { .extra1 = SYSCTL_ZERO, .extra2 = &two, }, + { + .procname = "fib_multipath_hash_fields", + .data = &init_net.ipv4.sysctl_fib_multipath_hash_fields, + .maxlen = sizeof(u32), + .mode = 0644, + .proc_handler = proc_douintvec_minmax, + .extra1 = SYSCTL_ONE, + .extra2 = &fib_multipath_hash_fields_all_mask, + }, #endif { .procname = "ip_unprivileged_port_start", -- cgit v1.2.3 From 4253b4986f98da4bfcb6a24d3fc6ff19f28e8420 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 17 May 2021 21:15:19 +0300 Subject: ipv4: Add custom multipath hash policy Add a new multipath hash policy where the packet fields used for hash calculation are determined by user space via the fib_multipath_hash_fields sysctl that was introduced in the previous patch. The current set of available packet fields includes both outer and inner fields, which requires two invocations of the flow dissector. Avoid unnecessary dissection of the outer or inner flows by skipping dissection if none of the outer or inner fields are required. In accordance with the existing policies, when an skb is not available, packet fields are extracted from the provided flow key. In which case, only outer fields are considered. Signed-off-by: Ido Schimmel Reviewed-by: David Ahern Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.rst | 2 + net/ipv4/route.c | 121 +++++++++++++++++++++++++++++++++ net/ipv4/sysctl_net_ipv4.c | 3 +- 3 files changed, 125 insertions(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index 47494798d03b..afdcdc0691d6 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -99,6 +99,8 @@ fib_multipath_hash_policy - INTEGER - 0 - Layer 3 - 1 - Layer 4 - 2 - Layer 3 or inner Layer 3 if present + - 3 - Custom multipath hash. Fields used for multipath hash calculation + are determined by fib_multipath_hash_fields sysctl fib_multipath_hash_fields - UNSIGNED INTEGER When fib_multipath_hash_policy is set to 3 (custom multipath hash), the diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 9d61e969446e..a4c477475f4c 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1906,6 +1906,121 @@ out: hash_keys->addrs.v4addrs.dst = key_iph->daddr; } +static u32 fib_multipath_custom_hash_outer(const struct net *net, + const struct sk_buff *skb, + bool *p_has_inner) +{ + u32 hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields; + struct flow_keys keys, hash_keys; + + if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK)) + return 0; + + memset(&hash_keys, 0, sizeof(hash_keys)); + skb_flow_dissect_flow_keys(skb, &keys, FLOW_DISSECTOR_F_STOP_AT_ENCAP); + + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP) + hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP) + hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO) + hash_keys.basic.ip_proto = keys.basic.ip_proto; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT) + hash_keys.ports.src = keys.ports.src; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT) + hash_keys.ports.dst = keys.ports.dst; + + *p_has_inner = !!(keys.control.flags & FLOW_DIS_ENCAPSULATION); + return flow_hash_from_keys(&hash_keys); +} + +static u32 fib_multipath_custom_hash_inner(const struct net *net, + const struct sk_buff *skb, + bool has_inner) +{ + u32 hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields; + struct flow_keys keys, hash_keys; + + /* We assume the packet carries an encapsulation, but if none was + * encountered during dissection of the outer flow, then there is no + * point in calling the flow dissector again. + */ + if (!has_inner) + return 0; + + if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_MASK)) + return 0; + + memset(&hash_keys, 0, sizeof(hash_keys)); + skb_flow_dissect_flow_keys(skb, &keys, 0); + + if (!(keys.control.flags & FLOW_DIS_ENCAPSULATION)) + return 0; + + if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP) + hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP) + hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst; + } else if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP) + hash_keys.addrs.v6addrs.src = keys.addrs.v6addrs.src; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP) + hash_keys.addrs.v6addrs.dst = keys.addrs.v6addrs.dst; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_FLOWLABEL) + hash_keys.tags.flow_label = keys.tags.flow_label; + } + + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_IP_PROTO) + hash_keys.basic.ip_proto = keys.basic.ip_proto; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_PORT) + hash_keys.ports.src = keys.ports.src; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_PORT) + hash_keys.ports.dst = keys.ports.dst; + + return flow_hash_from_keys(&hash_keys); +} + +static u32 fib_multipath_custom_hash_skb(const struct net *net, + const struct sk_buff *skb) +{ + u32 mhash, mhash_inner; + bool has_inner = true; + + mhash = fib_multipath_custom_hash_outer(net, skb, &has_inner); + mhash_inner = fib_multipath_custom_hash_inner(net, skb, has_inner); + + return jhash_2words(mhash, mhash_inner, 0); +} + +static u32 fib_multipath_custom_hash_fl4(const struct net *net, + const struct flowi4 *fl4) +{ + u32 hash_fields = net->ipv4.sysctl_fib_multipath_hash_fields; + struct flow_keys hash_keys; + + if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK)) + return 0; + + memset(&hash_keys, 0, sizeof(hash_keys)); + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP) + hash_keys.addrs.v4addrs.src = fl4->saddr; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP) + hash_keys.addrs.v4addrs.dst = fl4->daddr; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO) + hash_keys.basic.ip_proto = fl4->flowi4_proto; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT) + hash_keys.ports.src = fl4->fl4_sport; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT) + hash_keys.ports.dst = fl4->fl4_dport; + + return flow_hash_from_keys(&hash_keys); +} + /* if skb is set it will be used and fl4 can be NULL */ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4, const struct sk_buff *skb, struct flow_keys *flkeys) @@ -1991,6 +2106,12 @@ int fib_multipath_hash(const struct net *net, const struct flowi4 *fl4, } mhash = flow_hash_from_keys(&hash_keys); break; + case 3: + if (skb) + mhash = fib_multipath_custom_hash_skb(net, skb); + else + mhash = fib_multipath_custom_hash_fl4(net, fl4); + break; } if (multipath_hash) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 45bab3733621..ffb38ea06841 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -30,6 +30,7 @@ #include static int two = 2; +static int three __maybe_unused = 3; static int four = 4; static int thousand = 1000; static int tcp_retr1_max = 255; @@ -1053,7 +1054,7 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_fib_multipath_hash_policy, .extra1 = SYSCTL_ZERO, - .extra2 = &two, + .extra2 = &three, }, { .procname = "fib_multipath_hash_fields", -- cgit v1.2.3 From ed13923f980ef84dde0b9010b9e09052dc31a909 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 17 May 2021 21:15:22 +0300 Subject: ipv6: Add a sysctl to control multipath hash fields A subsequent patch will add a new multipath hash policy where the packet fields used for multipath hash calculation are determined by user space. This patch adds a sysctl that allows user space to set these fields. The packet fields are represented using a bitmask and are common between IPv4 and IPv6 to allow user space to use the same numbering across both protocols. For example, to hash based on standard 5-tuple: # sysctl -w net.ipv6.fib_multipath_hash_fields=0x0037 net.ipv6.fib_multipath_hash_fields = 0x0037 To avoid introducing holes in 'struct netns_sysctl_ipv6', move the 'bindv6only' field after the multipath hash fields. The kernel rejects unknown fields, for example: # sysctl -w net.ipv6.fib_multipath_hash_fields=0x1000 sysctl: setting key "net.ipv6.fib_multipath_hash_fields": Invalid argument Signed-off-by: Ido Schimmel Reviewed-by: David Ahern Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.rst | 27 +++++++++++++++++++++++++++ include/net/ipv6.h | 8 ++++++++ include/net/netns/ipv6.h | 3 ++- net/ipv6/ip6_fib.c | 5 +++++ net/ipv6/sysctl_net_ipv6.c | 12 ++++++++++++ 5 files changed, 54 insertions(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index afdcdc0691d6..4246cc4ae35b 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -1773,6 +1773,33 @@ fib_multipath_hash_policy - INTEGER - 1 - Layer 4 (standard 5-tuple) - 2 - Layer 3 or inner Layer 3 if present +fib_multipath_hash_fields - UNSIGNED INTEGER + When fib_multipath_hash_policy is set to 3 (custom multipath hash), the + fields used for multipath hash calculation are determined by this + sysctl. + + This value is a bitmask which enables various fields for multipath hash + calculation. + + Possible fields are: + + ====== ============================ + 0x0001 Source IP address + 0x0002 Destination IP address + 0x0004 IP protocol + 0x0008 Flow Label + 0x0010 Source port + 0x0020 Destination port + 0x0040 Inner source IP address + 0x0080 Inner destination IP address + 0x0100 Inner IP protocol + 0x0200 Inner Flow Label + 0x0400 Inner source port + 0x0800 Inner destination port + ====== ============================ + + Default: 0x0007 (source IP, destination IP and IP protocol) + anycast_src_echo_reply - BOOLEAN Controls the use of anycast addresses as source addresses for ICMPv6 echo reply diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 448bf2b34759..f2d0ecc257bb 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -926,11 +926,19 @@ static inline int ip6_multipath_hash_policy(const struct net *net) { return net->ipv6.sysctl.multipath_hash_policy; } +static inline u32 ip6_multipath_hash_fields(const struct net *net) +{ + return net->ipv6.sysctl.multipath_hash_fields; +} #else static inline int ip6_multipath_hash_policy(const struct net *net) { return 0; } +static inline u32 ip6_multipath_hash_fields(const struct net *net) +{ + return 0; +} #endif /* diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 6153c8067009..bde0b7adb4a3 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -28,8 +28,9 @@ struct netns_sysctl_ipv6 { int ip6_rt_gc_elasticity; int ip6_rt_mtu_expires; int ip6_rt_min_advmss; - u8 bindv6only; + u32 multipath_hash_fields; u8 multipath_hash_policy; + u8 bindv6only; u8 flowlabel_consistency; u8 auto_flowlabels; int icmpv6_time; diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 33d2d6a4e28c..2d650dc24349 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -32,6 +32,7 @@ #include #include +#include #include #include @@ -2355,6 +2356,10 @@ static int __net_init fib6_net_init(struct net *net) if (err) return err; + /* Default to 3-tuple */ + net->ipv6.sysctl.multipath_hash_fields = + FIB_MULTIPATH_HASH_FIELD_DEFAULT_MASK; + spin_lock_init(&net->ipv6.fib6_gc_lock); rwlock_init(&net->ipv6.fib6_walker_lock); INIT_LIST_HEAD(&net->ipv6.fib6_walkers); diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 27102c3d6e1d..ce23c8f7ceb3 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -17,6 +17,7 @@ #include #include #include +#include #ifdef CONFIG_NETLABEL #include #endif @@ -24,6 +25,8 @@ static int two = 2; static int flowlabel_reflect_max = 0x7; static int auto_flowlabels_max = IP6_AUTO_FLOW_LABEL_MAX; +static u32 rt6_multipath_hash_fields_all_mask = + FIB_MULTIPATH_HASH_FIELD_ALL_MASK; static int proc_rt6_multipath_hash_policy(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) @@ -151,6 +154,15 @@ static struct ctl_table ipv6_table_template[] = { .extra1 = SYSCTL_ZERO, .extra2 = &two, }, + { + .procname = "fib_multipath_hash_fields", + .data = &init_net.ipv6.sysctl.multipath_hash_fields, + .maxlen = sizeof(u32), + .mode = 0644, + .proc_handler = proc_douintvec_minmax, + .extra1 = SYSCTL_ONE, + .extra2 = &rt6_multipath_hash_fields_all_mask, + }, { .procname = "seg6_flowlabel", .data = &init_net.ipv6.sysctl.seg6_flowlabel, -- cgit v1.2.3 From 73c2c5cbb15a8a82d5bea52594b0beb038963bcc Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 17 May 2021 21:15:23 +0300 Subject: ipv6: Add custom multipath hash policy Add a new multipath hash policy where the packet fields used for hash calculation are determined by user space via the fib_multipath_hash_fields sysctl that was introduced in the previous patch. The current set of available packet fields includes both outer and inner fields, which requires two invocations of the flow dissector. Avoid unnecessary dissection of the outer or inner flows by skipping dissection if none of the outer or inner fields are required. In accordance with the existing policies, when an skb is not available, packet fields are extracted from the provided flow key. In which case, only outer fields are considered. Signed-off-by: Ido Schimmel Reviewed-by: David Ahern Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.rst | 2 + net/ipv6/route.c | 125 +++++++++++++++++++++++++++++++++ net/ipv6/sysctl_net_ipv6.c | 3 +- 3 files changed, 129 insertions(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index 4246cc4ae35b..a5c250044500 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -1772,6 +1772,8 @@ fib_multipath_hash_policy - INTEGER - 0 - Layer 3 (source and destination addresses plus flow label) - 1 - Layer 4 (standard 5-tuple) - 2 - Layer 3 or inner Layer 3 if present + - 3 - Custom multipath hash. Fields used for multipath hash calculation + are determined by fib_multipath_hash_fields sysctl fib_multipath_hash_fields - UNSIGNED INTEGER When fib_multipath_hash_policy is set to 3 (custom multipath hash), the diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 9935e18146e5..c46889381ae4 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2326,6 +2326,125 @@ out: } } +static u32 rt6_multipath_custom_hash_outer(const struct net *net, + const struct sk_buff *skb, + bool *p_has_inner) +{ + u32 hash_fields = ip6_multipath_hash_fields(net); + struct flow_keys keys, hash_keys; + + if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK)) + return 0; + + memset(&hash_keys, 0, sizeof(hash_keys)); + skb_flow_dissect_flow_keys(skb, &keys, FLOW_DISSECTOR_F_STOP_AT_ENCAP); + + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP) + hash_keys.addrs.v6addrs.src = keys.addrs.v6addrs.src; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP) + hash_keys.addrs.v6addrs.dst = keys.addrs.v6addrs.dst; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO) + hash_keys.basic.ip_proto = keys.basic.ip_proto; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_FLOWLABEL) + hash_keys.tags.flow_label = keys.tags.flow_label; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT) + hash_keys.ports.src = keys.ports.src; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT) + hash_keys.ports.dst = keys.ports.dst; + + *p_has_inner = !!(keys.control.flags & FLOW_DIS_ENCAPSULATION); + return flow_hash_from_keys(&hash_keys); +} + +static u32 rt6_multipath_custom_hash_inner(const struct net *net, + const struct sk_buff *skb, + bool has_inner) +{ + u32 hash_fields = ip6_multipath_hash_fields(net); + struct flow_keys keys, hash_keys; + + /* We assume the packet carries an encapsulation, but if none was + * encountered during dissection of the outer flow, then there is no + * point in calling the flow dissector again. + */ + if (!has_inner) + return 0; + + if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_MASK)) + return 0; + + memset(&hash_keys, 0, sizeof(hash_keys)); + skb_flow_dissect_flow_keys(skb, &keys, 0); + + if (!(keys.control.flags & FLOW_DIS_ENCAPSULATION)) + return 0; + + if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP) + hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP) + hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst; + } else if (keys.control.addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_IP) + hash_keys.addrs.v6addrs.src = keys.addrs.v6addrs.src; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_IP) + hash_keys.addrs.v6addrs.dst = keys.addrs.v6addrs.dst; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_FLOWLABEL) + hash_keys.tags.flow_label = keys.tags.flow_label; + } + + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_IP_PROTO) + hash_keys.basic.ip_proto = keys.basic.ip_proto; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_SRC_PORT) + hash_keys.ports.src = keys.ports.src; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_INNER_DST_PORT) + hash_keys.ports.dst = keys.ports.dst; + + return flow_hash_from_keys(&hash_keys); +} + +static u32 rt6_multipath_custom_hash_skb(const struct net *net, + const struct sk_buff *skb) +{ + u32 mhash, mhash_inner; + bool has_inner = true; + + mhash = rt6_multipath_custom_hash_outer(net, skb, &has_inner); + mhash_inner = rt6_multipath_custom_hash_inner(net, skb, has_inner); + + return jhash_2words(mhash, mhash_inner, 0); +} + +static u32 rt6_multipath_custom_hash_fl6(const struct net *net, + const struct flowi6 *fl6) +{ + u32 hash_fields = ip6_multipath_hash_fields(net); + struct flow_keys hash_keys; + + if (!(hash_fields & FIB_MULTIPATH_HASH_FIELD_OUTER_MASK)) + return 0; + + memset(&hash_keys, 0, sizeof(hash_keys)); + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_IP) + hash_keys.addrs.v6addrs.src = fl6->saddr; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_IP) + hash_keys.addrs.v6addrs.dst = fl6->daddr; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_IP_PROTO) + hash_keys.basic.ip_proto = fl6->flowi6_proto; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_FLOWLABEL) + hash_keys.tags.flow_label = (__force u32)flowi6_get_flowlabel(fl6); + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_SRC_PORT) + hash_keys.ports.src = fl6->fl6_sport; + if (hash_fields & FIB_MULTIPATH_HASH_FIELD_DST_PORT) + hash_keys.ports.dst = fl6->fl6_dport; + + return flow_hash_from_keys(&hash_keys); +} + /* if skb is set it will be used and fl6 can be NULL */ u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6, const struct sk_buff *skb, struct flow_keys *flkeys) @@ -2416,6 +2535,12 @@ u32 rt6_multipath_hash(const struct net *net, const struct flowi6 *fl6, } mhash = flow_hash_from_keys(&hash_keys); break; + case 3: + if (skb) + mhash = rt6_multipath_custom_hash_skb(net, skb); + else + mhash = rt6_multipath_custom_hash_fl6(net, fl6); + break; } return mhash >> 1; diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index ce23c8f7ceb3..160bea5db973 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -23,6 +23,7 @@ #endif static int two = 2; +static int three = 3; static int flowlabel_reflect_max = 0x7; static int auto_flowlabels_max = IP6_AUTO_FLOW_LABEL_MAX; static u32 rt6_multipath_hash_fields_all_mask = @@ -152,7 +153,7 @@ static struct ctl_table ipv6_table_template[] = { .mode = 0644, .proc_handler = proc_rt6_multipath_hash_policy, .extra1 = SYSCTL_ZERO, - .extra2 = &two, + .extra2 = &three, }, { .procname = "fib_multipath_hash_fields", -- cgit v1.2.3 From 9cc52f5a533a321136b9e447042ad9f8224f738c Mon Sep 17 00:00:00 2001 From: Stephan Gerhold Date: Wed, 19 May 2021 11:16:12 +0200 Subject: dt-bindings: net: nfc: s3fwrn5: Add optional clock On some systems, S3FWRN5 depends on having an external clock enabled to function correctly. Allow declaring that clock in the device tree. Signed-off-by: Stephan Gerhold Reviewed-by: Krzysztof Kozlowski Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/nfc/samsung,s3fwrn5.yaml | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/nfc/samsung,s3fwrn5.yaml b/Documentation/devicetree/bindings/net/nfc/samsung,s3fwrn5.yaml index 477066e2b821..081742c2b726 100644 --- a/Documentation/devicetree/bindings/net/nfc/samsung,s3fwrn5.yaml +++ b/Documentation/devicetree/bindings/net/nfc/samsung,s3fwrn5.yaml @@ -27,6 +27,9 @@ properties: reg: maxItems: 1 + clocks: + maxItems: 1 + wake-gpios: maxItems: 1 description: @@ -80,6 +83,8 @@ examples: en-gpios = <&gpf1 4 GPIO_ACTIVE_HIGH>; wake-gpios = <&gpj0 2 GPIO_ACTIVE_HIGH>; + + clocks = <&rpmcc 20>; }; }; # UART example on Raspberry Pi -- cgit v1.2.3 From 4006f986c091cda1a66067f77b6f5704a9618562 Mon Sep 17 00:00:00 2001 From: DENG Qingfang Date: Wed, 19 May 2021 11:32:01 +0800 Subject: dt-bindings: net: dsa: add MT7530 interrupt controller binding Add device tree binding to support MT7530 interrupt controller. Signed-off-by: DENG Qingfang Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Acked-by: Rob Herring Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/dsa/mt7530.txt | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/dsa/mt7530.txt b/Documentation/devicetree/bindings/net/dsa/mt7530.txt index de04626a8e9d..18247ebfc487 100644 --- a/Documentation/devicetree/bindings/net/dsa/mt7530.txt +++ b/Documentation/devicetree/bindings/net/dsa/mt7530.txt @@ -81,6 +81,12 @@ Optional properties: - gpio-controller: Boolean; if defined, MT7530's LED controller will run on GPIO mode. - #gpio-cells: Must be 2 if gpio-controller is defined. +- interrupt-controller: Boolean; Enables the internal interrupt controller. + +If interrupt-controller is defined, the following properties are required. + +- #interrupt-cells: Must be 1. +- interrupts: Parent interrupt for the interrupt controller. See Documentation/devicetree/bindings/net/dsa/dsa.txt for a list of additional required, optional properties and how the integrated switch subnodes must -- cgit v1.2.3 From 75a78026ea1307ef6d6924cc22be3ce9bf453c63 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 6 May 2021 19:55:53 +0200 Subject: dt-bindings: can: rcar_can: Convert to json-schema Convert the Renesas R-Car CAN Controller Device Tree binding documentation to json-schema. Document missing properties. Update the example to match reality. Link: https://lore.kernel.org/r/561c35648e22a3c1e3b5477ae27fd1a50da7fe98.1620323639.git.geert+renesas@glider.be Signed-off-by: Geert Uytterhoeven Reviewed-by: Ulrich Hecht Signed-off-by: Marc Kleine-Budde --- .../devicetree/bindings/net/can/rcar_can.txt | 80 ------------ .../bindings/net/can/renesas,rcar-can.yaml | 139 +++++++++++++++++++++ 2 files changed, 139 insertions(+), 80 deletions(-) delete mode 100644 Documentation/devicetree/bindings/net/can/rcar_can.txt create mode 100644 Documentation/devicetree/bindings/net/can/renesas,rcar-can.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/can/rcar_can.txt b/Documentation/devicetree/bindings/net/can/rcar_can.txt deleted file mode 100644 index 90ac4fef23f5..000000000000 --- a/Documentation/devicetree/bindings/net/can/rcar_can.txt +++ /dev/null @@ -1,80 +0,0 @@ -Renesas R-Car CAN controller Device Tree Bindings -------------------------------------------------- - -Required properties: -- compatible: "renesas,can-r8a7742" if CAN controller is a part of R8A7742 SoC. - "renesas,can-r8a7743" if CAN controller is a part of R8A7743 SoC. - "renesas,can-r8a7744" if CAN controller is a part of R8A7744 SoC. - "renesas,can-r8a7745" if CAN controller is a part of R8A7745 SoC. - "renesas,can-r8a77470" if CAN controller is a part of R8A77470 SoC. - "renesas,can-r8a774a1" if CAN controller is a part of R8A774A1 SoC. - "renesas,can-r8a774b1" if CAN controller is a part of R8A774B1 SoC. - "renesas,can-r8a774c0" if CAN controller is a part of R8A774C0 SoC. - "renesas,can-r8a774e1" if CAN controller is a part of R8A774E1 SoC. - "renesas,can-r8a7778" if CAN controller is a part of R8A7778 SoC. - "renesas,can-r8a7779" if CAN controller is a part of R8A7779 SoC. - "renesas,can-r8a7790" if CAN controller is a part of R8A7790 SoC. - "renesas,can-r8a7791" if CAN controller is a part of R8A7791 SoC. - "renesas,can-r8a7792" if CAN controller is a part of R8A7792 SoC. - "renesas,can-r8a7793" if CAN controller is a part of R8A7793 SoC. - "renesas,can-r8a7794" if CAN controller is a part of R8A7794 SoC. - "renesas,can-r8a7795" if CAN controller is a part of R8A7795 SoC. - "renesas,can-r8a7796" if CAN controller is a part of R8A77960 SoC. - "renesas,can-r8a77961" if CAN controller is a part of R8A77961 SoC. - "renesas,can-r8a77965" if CAN controller is a part of R8A77965 SoC. - "renesas,can-r8a77990" if CAN controller is a part of R8A77990 SoC. - "renesas,can-r8a77995" if CAN controller is a part of R8A77995 SoC. - "renesas,rcar-gen1-can" for a generic R-Car Gen1 compatible device. - "renesas,rcar-gen2-can" for a generic R-Car Gen2 or RZ/G1 - compatible device. - "renesas,rcar-gen3-can" for a generic R-Car Gen3 or RZ/G2 - compatible device. - When compatible with the generic version, nodes must list the - SoC-specific version corresponding to the platform first - followed by the generic version. - -- reg: physical base address and size of the R-Car CAN register map. -- interrupts: interrupt specifier for the sole interrupt. -- clocks: phandles and clock specifiers for 3 CAN clock inputs. -- clock-names: 3 clock input name strings: "clkp1", "clkp2", and "can_clk". -- pinctrl-0: pin control group to be used for this controller. -- pinctrl-names: must be "default". - -Required properties for R8A774A1, R8A774B1, R8A774C0, R8A774E1, R8A7795, -R8A77960, R8A77961, R8A77965, R8A77990, and R8A77995: -For the denoted SoCs, "clkp2" can be CANFD clock. This is a div6 clock and can -be used by both CAN and CAN FD controller at the same time. It needs to be -scaled to maximum frequency if any of these controllers use it. This is done -using the below properties: - -- assigned-clocks: phandle of clkp2(CANFD) clock. -- assigned-clock-rates: maximum frequency of this clock. - -Optional properties: -- renesas,can-clock-select: R-Car CAN Clock Source Select. Valid values are: - <0x0> (default) : Peripheral clock (clkp1) - <0x1> : Peripheral clock (clkp2) - <0x3> : External input clock - -Example -------- - -SoC common .dtsi file: - - can0: can@e6e80000 { - compatible = "renesas,can-r8a7791", "renesas,rcar-gen2-can"; - reg = <0 0xe6e80000 0 0x1000>; - interrupts = <0 186 IRQ_TYPE_LEVEL_HIGH>; - clocks = <&mstp9_clks R8A7791_CLK_RCAN0>, - <&cpg_clocks R8A7791_CLK_RCAN>, <&can_clk>; - clock-names = "clkp1", "clkp2", "can_clk"; - status = "disabled"; - }; - -Board specific .dts file: - -&can0 { - pinctrl-0 = <&can0_pins>; - pinctrl-names = "default"; - status = "okay"; -}; diff --git a/Documentation/devicetree/bindings/net/can/renesas,rcar-can.yaml b/Documentation/devicetree/bindings/net/can/renesas,rcar-can.yaml new file mode 100644 index 000000000000..fadc871fd6b0 --- /dev/null +++ b/Documentation/devicetree/bindings/net/can/renesas,rcar-can.yaml @@ -0,0 +1,139 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/can/renesas,rcar-can.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Renesas R-Car CAN Controller + +maintainers: + - Sergei Shtylyov + +properties: + compatible: + oneOf: + - items: + - enum: + - renesas,can-r8a7778 # R-Car M1-A + - renesas,can-r8a7779 # R-Car H1 + - const: renesas,rcar-gen1-can # R-Car Gen1 + + - items: + - enum: + - renesas,can-r8a7742 # RZ/G1H + - renesas,can-r8a7743 # RZ/G1M + - renesas,can-r8a7744 # RZ/G1N + - renesas,can-r8a7745 # RZ/G1E + - renesas,can-r8a77470 # RZ/G1C + - renesas,can-r8a7790 # R-Car H2 + - renesas,can-r8a7791 # R-Car M2-W + - renesas,can-r8a7792 # R-Car V2H + - renesas,can-r8a7793 # R-Car M2-N + - renesas,can-r8a7794 # R-Car E2 + - const: renesas,rcar-gen2-can # R-Car Gen2 and RZ/G1 + + - items: + - enum: + - renesas,can-r8a774a1 # RZ/G2M + - renesas,can-r8a774b1 # RZ/G2N + - renesas,can-r8a774c0 # RZ/G2E + - renesas,can-r8a774e1 # RZ/G2H + - renesas,can-r8a7795 # R-Car H3 + - renesas,can-r8a7796 # R-Car M3-W + - renesas,can-r8a77961 # R-Car M3-W+ + - renesas,can-r8a77965 # R-Car M3-N + - renesas,can-r8a77990 # R-Car E3 + - renesas,can-r8a77995 # R-Car D3 + - const: renesas,rcar-gen3-can # R-Car Gen3 and RZ/G2 + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + clocks: + maxItems: 3 + + clock-names: + items: + - const: clkp1 + - const: clkp2 + - const: can_clk + + power-domains: + maxItems: 1 + + resets: + maxItems: 1 + + renesas,can-clock-select: + $ref: /schemas/types.yaml#/definitions/uint32 + enum: [ 0, 1, 3 ] + default: 0 + description: | + R-Car CAN Clock Source Select. Valid values are: + <0x0> (default) : Peripheral clock (clkp1) + <0x1> : Peripheral clock (clkp2) + <0x3> : External input clock + + assigned-clocks: + description: + Reference to the clkp2 (CANFD) clock. + On R-Car Gen3 and RZ/G2 SoCs, "clkp2" is the CANFD clock. This is a div6 + clock and can be used by both CAN and CAN FD controllers at the same + time. It needs to be scaled to maximum frequency if any of these + controllers use it. + + assigned-clock-rates: + description: Maximum frequency of the CANFD clock. + +required: + - compatible + - reg + - interrupts + - clocks + - clock-names + - power-domains + +allOf: + - $ref: can-controller.yaml# + + - if: + not: + properties: + compatible: + contains: + const: renesas,rcar-gen1-can + then: + required: + - resets + + - if: + properties: + compatible: + contains: + const: renesas,rcar-gen3-can + then: + required: + - assigned-clocks + - assigned-clock-rates + +unevaluatedProperties: false + +examples: + - | + #include + #include + #include + + can0: can@e6e80000 { + compatible = "renesas,can-r8a7791", "renesas,rcar-gen2-can"; + reg = <0xe6e80000 0x1000>; + interrupts = ; + clocks = <&cpg CPG_MOD 916>, + <&cpg CPG_CORE R8A7791_CLK_RCAN>, <&can_clk>; + clock-names = "clkp1", "clkp2", "can_clk"; + power-domains = <&sysc R8A7791_PD_ALWAYS_ON>; + resets = <&cpg 916>; + }; -- cgit v1.2.3 From 8a5e7d19c8c747e3e7bfa0283a54742b103afcb5 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 6 May 2021 19:55:54 +0200 Subject: dt-bindings: can: rcar_canfd: Convert to json-schema Convert the Renesas R-Car CAN FD Controller Device Tree binding documentation to json-schema. Document missing properties. The CANFD clock needs to be configured for the maximum frequency on R-Car V3M and V3H, too. Update the example to match reality. Link: https://lore.kernel.org/r/905134c87f72e2d8e37c309e0ce28ecd7d4f3992.1620323639.git.geert+renesas@glider.be Signed-off-by: Geert Uytterhoeven Reviewed-by: Ulrich Hecht Signed-off-by: Marc Kleine-Budde --- .../devicetree/bindings/net/can/rcar_canfd.txt | 107 ------------------ .../bindings/net/can/renesas,rcar-canfd.yaml | 122 +++++++++++++++++++++ 2 files changed, 122 insertions(+), 107 deletions(-) delete mode 100644 Documentation/devicetree/bindings/net/can/rcar_canfd.txt create mode 100644 Documentation/devicetree/bindings/net/can/renesas,rcar-canfd.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/can/rcar_canfd.txt b/Documentation/devicetree/bindings/net/can/rcar_canfd.txt deleted file mode 100644 index 248c4ed97a0a..000000000000 --- a/Documentation/devicetree/bindings/net/can/rcar_canfd.txt +++ /dev/null @@ -1,107 +0,0 @@ -Renesas R-Car CAN FD controller Device Tree Bindings ----------------------------------------------------- - -Required properties: -- compatible: Must contain one or more of the following: - - "renesas,rcar-gen3-canfd" for R-Car Gen3 and RZ/G2 compatible controllers. - - "renesas,r8a774a1-canfd" for R8A774A1 (RZ/G2M) compatible controller. - - "renesas,r8a774b1-canfd" for R8A774B1 (RZ/G2N) compatible controller. - - "renesas,r8a774c0-canfd" for R8A774C0 (RZ/G2E) compatible controller. - - "renesas,r8a774e1-canfd" for R8A774E1 (RZ/G2H) compatible controller. - - "renesas,r8a7795-canfd" for R8A7795 (R-Car H3) compatible controller. - - "renesas,r8a7796-canfd" for R8A7796 (R-Car M3-W) compatible controller. - - "renesas,r8a77965-canfd" for R8A77965 (R-Car M3-N) compatible controller. - - "renesas,r8a77970-canfd" for R8A77970 (R-Car V3M) compatible controller. - - "renesas,r8a77980-canfd" for R8A77980 (R-Car V3H) compatible controller. - - "renesas,r8a77990-canfd" for R8A77990 (R-Car E3) compatible controller. - - "renesas,r8a77995-canfd" for R8A77995 (R-Car D3) compatible controller. - - When compatible with the generic version, nodes must list the - SoC-specific version corresponding to the platform first, followed by the - family-specific and/or generic versions. - -- reg: physical base address and size of the R-Car CAN FD register map. -- interrupts: interrupt specifiers for the Channel & Global interrupts -- clocks: phandles and clock specifiers for 3 clock inputs. -- clock-names: 3 clock input name strings: "fck", "canfd", "can_clk". -- pinctrl-0: pin control group to be used for this controller. -- pinctrl-names: must be "default". - -Required child nodes: -The controller supports two channels and each is represented as a child node. -The name of the child nodes are "channel0" and "channel1" respectively. Each -child node supports the "status" property only, which is used to -enable/disable the respective channel. - -Required properties for R8A774A1, R8A774B1, R8A774C0, R8A774E1, R8A7795, -R8A7796, R8A77965, R8A77990, and R8A77995: -In the denoted SoCs, canfd clock is a div6 clock and can be used by both CAN -and CAN FD controller at the same time. It needs to be scaled to maximum -frequency if any of these controllers use it. This is done using the below -properties: - -- assigned-clocks: phandle of canfd clock. -- assigned-clock-rates: maximum frequency of this clock. - -Optional property: -The controller can operate in either CAN FD only mode (default) or -Classical CAN only mode. The mode is global to both the channels. In order to -enable the later, define the following optional property. - - renesas,no-can-fd: puts the controller in Classical CAN only mode. - -Example -------- - -SoC common .dtsi file: - - canfd: can@e66c0000 { - compatible = "renesas,r8a7795-canfd", - "renesas,rcar-gen3-canfd"; - reg = <0 0xe66c0000 0 0x8000>; - interrupts = , - ; - clocks = <&cpg CPG_MOD 914>, - <&cpg CPG_CORE R8A7795_CLK_CANFD>, - <&can_clk>; - clock-names = "fck", "canfd", "can_clk"; - assigned-clocks = <&cpg CPG_CORE R8A7795_CLK_CANFD>; - assigned-clock-rates = <40000000>; - power-domains = <&cpg>; - status = "disabled"; - - channel0 { - status = "disabled"; - }; - - channel1 { - status = "disabled"; - }; - }; - -Board specific .dts file: - -E.g. below enables Channel 1 alone in the board in Classical CAN only mode. - -&canfd { - pinctrl-0 = <&canfd1_pins>; - pinctrl-names = "default"; - renesas,no-can-fd; - status = "okay"; - - channel1 { - status = "okay"; - }; -}; - -E.g. below enables Channel 0 alone in the board using External clock -as fCAN clock. - -&canfd { - pinctrl-0 = <&canfd0_pins>, <&can_clk_pins>; - pinctrl-names = "default"; - status = "okay"; - - channel0 { - status = "okay"; - }; -}; diff --git a/Documentation/devicetree/bindings/net/can/renesas,rcar-canfd.yaml b/Documentation/devicetree/bindings/net/can/renesas,rcar-canfd.yaml new file mode 100644 index 000000000000..0b33ba9ccb47 --- /dev/null +++ b/Documentation/devicetree/bindings/net/can/renesas,rcar-canfd.yaml @@ -0,0 +1,122 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/can/renesas,rcar-canfd.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Renesas R-Car CAN FD Controller + +maintainers: + - Fabrizio Castro + +allOf: + - $ref: can-controller.yaml# + +properties: + compatible: + oneOf: + - items: + - enum: + - renesas,r8a774a1-canfd # RZ/G2M + - renesas,r8a774b1-canfd # RZ/G2N + - renesas,r8a774c0-canfd # RZ/G2E + - renesas,r8a774e1-canfd # RZ/G2H + - renesas,r8a7795-canfd # R-Car H3 + - renesas,r8a7796-canfd # R-Car M3-W + - renesas,r8a77965-canfd # R-Car M3-N + - renesas,r8a77970-canfd # R-Car V3M + - renesas,r8a77980-canfd # R-Car V3H + - renesas,r8a77990-canfd # R-Car E3 + - renesas,r8a77995-canfd # R-Car D3 + - const: renesas,rcar-gen3-canfd # R-Car Gen3 and RZ/G2 + + reg: + maxItems: 1 + + interrupts: + items: + - description: Channel interrupt + - description: Global interrupt + + clocks: + maxItems: 3 + + clock-names: + items: + - const: fck + - const: canfd + - const: can_clk + + power-domains: + maxItems: 1 + + resets: + maxItems: 1 + + renesas,no-can-fd: + $ref: /schemas/types.yaml#/definitions/flag + description: + The controller can operate in either CAN FD only mode (default) or + Classical CAN only mode. The mode is global to both the channels. + Specify this property to put the controller in Classical CAN only mode. + + assigned-clocks: + description: + Reference to the CANFD clock. The CANFD clock is a div6 clock and can be + used by both CAN (if present) and CAN FD controllers at the same time. + It needs to be scaled to maximum frequency if any of these controllers + use it. + + assigned-clock-rates: + description: Maximum frequency of the CANFD clock. + +patternProperties: + "^channel[01]$": + type: object + description: + The controller supports two channels and each is represented as a child + node. Each child node supports the "status" property only, which + is used to enable/disable the respective channel. + +required: + - compatible + - reg + - interrupts + - clocks + - clock-names + - power-domains + - resets + - assigned-clocks + - assigned-clock-rates + - channel0 + - channel1 + +unevaluatedProperties: false + +examples: + - | + #include + #include + #include + + canfd: can@e66c0000 { + compatible = "renesas,r8a7795-canfd", + "renesas,rcar-gen3-canfd"; + reg = <0xe66c0000 0x8000>; + interrupts = , + ; + clocks = <&cpg CPG_MOD 914>, + <&cpg CPG_CORE R8A7795_CLK_CANFD>, + <&can_clk>; + clock-names = "fck", "canfd", "can_clk"; + assigned-clocks = <&cpg CPG_CORE R8A7795_CLK_CANFD>; + assigned-clock-rates = <40000000>; + power-domains = <&sysc R8A7795_PD_ALWAYS_ON>; + resets = <&cpg 914>; + + channel0 { + }; + + channel1 { + }; + }; -- cgit v1.2.3 From fc8c262e0eb5aa248af5051377e4ff3348841ac5 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Wed, 26 May 2021 08:24:57 -0700 Subject: bpf, docs: Add llvm_reloc.rst to explain llvm bpf relocations LLVM upstream commit https://reviews.llvm.org/D102712 made some changes to bpf relocations to make them llvm linker lld friendly. The scope of existing relocations R_BPF_64_{64,32} is narrowed and new relocations R_BPF_64_{ABS32,ABS64,NODYLD32} are introduced. Let us add some documentation about llvm bpf relocations so people can understand how to resolve them properly in their respective tools. Signed-off-by: Yonghong Song Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20210526152457.335210-1-yhs@fb.com --- Documentation/bpf/index.rst | 1 + Documentation/bpf/llvm_reloc.rst | 240 +++++++++++++++++++++++++++++++++ tools/testing/selftests/bpf/README.rst | 19 +++ 3 files changed, 260 insertions(+) create mode 100644 Documentation/bpf/llvm_reloc.rst (limited to 'Documentation') diff --git a/Documentation/bpf/index.rst b/Documentation/bpf/index.rst index a702f67dd45f..93e8cf12a6d4 100644 --- a/Documentation/bpf/index.rst +++ b/Documentation/bpf/index.rst @@ -84,6 +84,7 @@ Other :maxdepth: 1 ringbuf + llvm_reloc .. Links: .. _networking-filter: ../networking/filter.rst diff --git a/Documentation/bpf/llvm_reloc.rst b/Documentation/bpf/llvm_reloc.rst new file mode 100644 index 000000000000..ca8957d5b671 --- /dev/null +++ b/Documentation/bpf/llvm_reloc.rst @@ -0,0 +1,240 @@ +.. SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) + +==================== +BPF LLVM Relocations +==================== + +This document describes LLVM BPF backend relocation types. + +Relocation Record +================= + +LLVM BPF backend records each relocation with the following 16-byte +ELF structure:: + + typedef struct + { + Elf64_Addr r_offset; // Offset from the beginning of section. + Elf64_Xword r_info; // Relocation type and symbol index. + } Elf64_Rel; + +For example, for the following code:: + + int g1 __attribute__((section("sec"))); + int g2 __attribute__((section("sec"))); + static volatile int l1 __attribute__((section("sec"))); + static volatile int l2 __attribute__((section("sec"))); + int test() { + return g1 + g2 + l1 + l2; + } + +Compiled with ``clang -target bpf -O2 -c test.c``, the following is +the code with ``llvm-objdump -dr test.o``:: + + 0: 18 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r1 = 0 ll + 0000000000000000: R_BPF_64_64 g1 + 2: 61 11 00 00 00 00 00 00 r1 = *(u32 *)(r1 + 0) + 3: 18 02 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r2 = 0 ll + 0000000000000018: R_BPF_64_64 g2 + 5: 61 20 00 00 00 00 00 00 r0 = *(u32 *)(r2 + 0) + 6: 0f 10 00 00 00 00 00 00 r0 += r1 + 7: 18 01 00 00 08 00 00 00 00 00 00 00 00 00 00 00 r1 = 8 ll + 0000000000000038: R_BPF_64_64 sec + 9: 61 11 00 00 00 00 00 00 r1 = *(u32 *)(r1 + 0) + 10: 0f 10 00 00 00 00 00 00 r0 += r1 + 11: 18 01 00 00 0c 00 00 00 00 00 00 00 00 00 00 00 r1 = 12 ll + 0000000000000058: R_BPF_64_64 sec + 13: 61 11 00 00 00 00 00 00 r1 = *(u32 *)(r1 + 0) + 14: 0f 10 00 00 00 00 00 00 r0 += r1 + 15: 95 00 00 00 00 00 00 00 exit + +There are four relations in the above for four ``LD_imm64`` instructions. +The following ``llvm-readelf -r test.o`` shows the binary values of the four +relocations:: + + Relocation section '.rel.text' at offset 0x190 contains 4 entries: + Offset Info Type Symbol's Value Symbol's Name + 0000000000000000 0000000600000001 R_BPF_64_64 0000000000000000 g1 + 0000000000000018 0000000700000001 R_BPF_64_64 0000000000000004 g2 + 0000000000000038 0000000400000001 R_BPF_64_64 0000000000000000 sec + 0000000000000058 0000000400000001 R_BPF_64_64 0000000000000000 sec + +Each relocation is represented by ``Offset`` (8 bytes) and ``Info`` (8 bytes). +For example, the first relocation corresponds to the first instruction +(Offset 0x0) and the corresponding ``Info`` indicates the relocation type +of ``R_BPF_64_64`` (type 1) and the entry in the symbol table (entry 6). +The following is the symbol table with ``llvm-readelf -s test.o``:: + + Symbol table '.symtab' contains 8 entries: + Num: Value Size Type Bind Vis Ndx Name + 0: 0000000000000000 0 NOTYPE LOCAL DEFAULT UND + 1: 0000000000000000 0 FILE LOCAL DEFAULT ABS test.c + 2: 0000000000000008 4 OBJECT LOCAL DEFAULT 4 l1 + 3: 000000000000000c 4 OBJECT LOCAL DEFAULT 4 l2 + 4: 0000000000000000 0 SECTION LOCAL DEFAULT 4 sec + 5: 0000000000000000 128 FUNC GLOBAL DEFAULT 2 test + 6: 0000000000000000 4 OBJECT GLOBAL DEFAULT 4 g1 + 7: 0000000000000004 4 OBJECT GLOBAL DEFAULT 4 g2 + +The 6th entry is global variable ``g1`` with value 0. + +Similarly, the second relocation is at ``.text`` offset ``0x18``, instruction 3, +for global variable ``g2`` which has a symbol value 4, the offset +from the start of ``.data`` section. + +The third and fourth relocations refers to static variables ``l1`` +and ``l2``. From ``.rel.text`` section above, it is not clear +which symbols they really refers to as they both refers to +symbol table entry 4, symbol ``sec``, which has ``STT_SECTION`` type +and represents a section. So for static variable or function, +the section offset is written to the original insn +buffer, which is called ``A`` (addend). Looking at +above insn ``7`` and ``11``, they have section offset ``8`` and ``12``. +From symbol table, we can find that they correspond to entries ``2`` +and ``3`` for ``l1`` and ``l2``. + +In general, the ``A`` is 0 for global variables and functions, +and is the section offset or some computation result based on +section offset for static variables/functions. The non-section-offset +case refers to function calls. See below for more details. + +Different Relocation Types +========================== + +Six relocation types are supported. The following is an overview and +``S`` represents the value of the symbol in the symbol table:: + + Enum ELF Reloc Type Description BitSize Offset Calculation + 0 R_BPF_NONE None + 1 R_BPF_64_64 ld_imm64 insn 32 r_offset + 4 S + A + 2 R_BPF_64_ABS64 normal data 64 r_offset S + A + 3 R_BPF_64_ABS32 normal data 32 r_offset S + A + 4 R_BPF_64_NODYLD32 .BTF[.ext] data 32 r_offset S + A + 10 R_BPF_64_32 call insn 32 r_offset + 4 (S + A) / 8 - 1 + +For example, ``R_BPF_64_64`` relocation type is used for ``ld_imm64`` instruction. +The actual to-be-relocated data (0 or section offset) +is stored at ``r_offset + 4`` and the read/write +data bitsize is 32 (4 bytes). The relocation can be resolved with +the symbol value plus implicit addend. Note that the ``BitSize`` is 32 which +means the section offset must be less than or equal to ``UINT32_MAX`` and this +is enforced by LLVM BPF backend. + +In another case, ``R_BPF_64_ABS64`` relocation type is used for normal 64-bit data. +The actual to-be-relocated data is stored at ``r_offset`` and the read/write data +bitsize is 64 (8 bytes). The relocation can be resolved with +the symbol value plus implicit addend. + +Both ``R_BPF_64_ABS32`` and ``R_BPF_64_NODYLD32`` types are for 32-bit data. +But ``R_BPF_64_NODYLD32`` specifically refers to relocations in ``.BTF`` and +``.BTF.ext`` sections. For cases like bcc where llvm ``ExecutionEngine RuntimeDyld`` +is involved, ``R_BPF_64_NODYLD32`` types of relocations should not be resolved +to actual function/variable address. Otherwise, ``.BTF`` and ``.BTF.ext`` +become unusable by bcc and kernel. + +Type ``R_BPF_64_32`` is used for call instruction. The call target section +offset is stored at ``r_offset + 4`` (32bit) and calculated as +``(S + A) / 8 - 1``. + +Examples +======== + +Types ``R_BPF_64_64`` and ``R_BPF_64_32`` are used to resolve ``ld_imm64`` +and ``call`` instructions. For example:: + + __attribute__((noinline)) __attribute__((section("sec1"))) + int gfunc(int a, int b) { + return a * b; + } + static __attribute__((noinline)) __attribute__((section("sec1"))) + int lfunc(int a, int b) { + return a + b; + } + int global __attribute__((section("sec2"))); + int test(int a, int b) { + return gfunc(a, b) + lfunc(a, b) + global; + } + +Compiled with ``clang -target bpf -O2 -c test.c``, we will have +following code with `llvm-objdump -dr test.o``:: + + Disassembly of section .text: + + 0000000000000000 : + 0: bf 26 00 00 00 00 00 00 r6 = r2 + 1: bf 17 00 00 00 00 00 00 r7 = r1 + 2: 85 10 00 00 ff ff ff ff call -1 + 0000000000000010: R_BPF_64_32 gfunc + 3: bf 08 00 00 00 00 00 00 r8 = r0 + 4: bf 71 00 00 00 00 00 00 r1 = r7 + 5: bf 62 00 00 00 00 00 00 r2 = r6 + 6: 85 10 00 00 02 00 00 00 call 2 + 0000000000000030: R_BPF_64_32 sec1 + 7: 0f 80 00 00 00 00 00 00 r0 += r8 + 8: 18 01 00 00 00 00 00 00 00 00 00 00 00 00 00 00 r1 = 0 ll + 0000000000000040: R_BPF_64_64 global + 10: 61 11 00 00 00 00 00 00 r1 = *(u32 *)(r1 + 0) + 11: 0f 10 00 00 00 00 00 00 r0 += r1 + 12: 95 00 00 00 00 00 00 00 exit + + Disassembly of section sec1: + + 0000000000000000 : + 0: bf 20 00 00 00 00 00 00 r0 = r2 + 1: 2f 10 00 00 00 00 00 00 r0 *= r1 + 2: 95 00 00 00 00 00 00 00 exit + + 0000000000000018 : + 3: bf 20 00 00 00 00 00 00 r0 = r2 + 4: 0f 10 00 00 00 00 00 00 r0 += r1 + 5: 95 00 00 00 00 00 00 00 exit + +The first relocation corresponds to ``gfunc(a, b)`` where ``gfunc`` has a value of 0, +so the ``call`` instruction offset is ``(0 + 0)/8 - 1 = -1``. +The second relocation corresponds to ``lfunc(a, b)`` where ``lfunc`` has a section +offset ``0x18``, so the ``call`` instruction offset is ``(0 + 0x18)/8 - 1 = 2``. +The third relocation corresponds to ld_imm64 of ``global``, which has a section +offset ``0``. + +The following is an example to show how R_BPF_64_ABS64 could be generated:: + + int global() { return 0; } + struct t { void *g; } gbl = { global }; + +Compiled with ``clang -target bpf -O2 -g -c test.c``, we will see a +relocation below in ``.data`` section with command +``llvm-readelf -r test.o``:: + + Relocation section '.rel.data' at offset 0x458 contains 1 entries: + Offset Info Type Symbol's Value Symbol's Name + 0000000000000000 0000000700000002 R_BPF_64_ABS64 0000000000000000 global + +The relocation says the first 8-byte of ``.data`` section should be +filled with address of ``global`` variable. + +With ``llvm-readelf`` output, we can see that dwarf sections have a bunch of +``R_BPF_64_ABS32`` and ``R_BPF_64_ABS64`` relocations:: + + Relocation section '.rel.debug_info' at offset 0x468 contains 13 entries: + Offset Info Type Symbol's Value Symbol's Name + 0000000000000006 0000000300000003 R_BPF_64_ABS32 0000000000000000 .debug_abbrev + 000000000000000c 0000000400000003 R_BPF_64_ABS32 0000000000000000 .debug_str + 0000000000000012 0000000400000003 R_BPF_64_ABS32 0000000000000000 .debug_str + 0000000000000016 0000000600000003 R_BPF_64_ABS32 0000000000000000 .debug_line + 000000000000001a 0000000400000003 R_BPF_64_ABS32 0000000000000000 .debug_str + 000000000000001e 0000000200000002 R_BPF_64_ABS64 0000000000000000 .text + 000000000000002b 0000000400000003 R_BPF_64_ABS32 0000000000000000 .debug_str + 0000000000000037 0000000800000002 R_BPF_64_ABS64 0000000000000000 gbl + 0000000000000040 0000000400000003 R_BPF_64_ABS32 0000000000000000 .debug_str + ...... + +The .BTF/.BTF.ext sections has R_BPF_64_NODYLD32 relocations:: + + Relocation section '.rel.BTF' at offset 0x538 contains 1 entries: + Offset Info Type Symbol's Value Symbol's Name + 0000000000000084 0000000800000004 R_BPF_64_NODYLD32 0000000000000000 gbl + + Relocation section '.rel.BTF.ext' at offset 0x548 contains 2 entries: + Offset Info Type Symbol's Value Symbol's Name + 000000000000002c 0000000200000004 R_BPF_64_NODYLD32 0000000000000000 .text + 0000000000000040 0000000200000004 R_BPF_64_NODYLD32 0000000000000000 .text diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst index 3353778c30f8..8deec1ca9150 100644 --- a/tools/testing/selftests/bpf/README.rst +++ b/tools/testing/selftests/bpf/README.rst @@ -202,3 +202,22 @@ generate valid BTF information for weak variables. Please make sure you use Clang that contains the fix. __ https://reviews.llvm.org/D100362 + +Clang relocation changes +======================== + +Clang 13 patch `clang reloc patch`_ made some changes on relocations such +that existing relocation types are broken into more types and +each new type corresponds to only one way to resolve relocation. +See `kernel llvm reloc`_ for more explanation and some examples. +Using clang 13 to compile old libbpf which has static linker support, +there will be a compilation failure:: + + libbpf: ELF relo #0 in section #6 has unexpected type 2 in .../bpf_tcp_nogpl.o + +Here, ``type 2`` refers to new relocation type ``R_BPF_64_ABS64``. +To fix this issue, user newer libbpf. + +.. Links +.. _clang reloc patch: https://reviews.llvm.org/D102712 +.. _kernel llvm reloc: /Documentation/bpf/llvm_reloc.rst -- cgit v1.2.3 From 744ee14054c8ca5ad0fe3ab9172709c17d8a240a Mon Sep 17 00:00:00 2001 From: Matthieu Baerts Date: Thu, 27 May 2021 16:54:30 -0700 Subject: mptcp: restrict values of 'enabled' sysctl To avoid confusions, it seems better to parse this sysctl parameter as a boolean. We use it as a boolean, no need to parse an integer and bring confusions if we see a value different from 0 and 1, especially with this parameter name: enabled. It seems fine to do this modification because the default value is 1 (enabled). Then the only other interesting value to set is 0 (disabled). All other values would not have changed the default behaviour. Suggested-by: Florian Westphal Acked-by: Florian Westphal Signed-off-by: Matthieu Baerts Signed-off-by: Mat Martineau Signed-off-by: Jakub Kicinski --- Documentation/networking/mptcp-sysctl.rst | 8 ++++---- net/mptcp/ctrl.c | 8 +++++--- 2 files changed, 9 insertions(+), 7 deletions(-) (limited to 'Documentation') diff --git a/Documentation/networking/mptcp-sysctl.rst b/Documentation/networking/mptcp-sysctl.rst index 6af0196c4297..3b352e5f6300 100644 --- a/Documentation/networking/mptcp-sysctl.rst +++ b/Documentation/networking/mptcp-sysctl.rst @@ -7,13 +7,13 @@ MPTCP Sysfs variables /proc/sys/net/mptcp/* Variables =============================== -enabled - INTEGER +enabled - BOOLEAN Control whether MPTCP sockets can be created. - MPTCP sockets can be created if the value is nonzero. This is - a per-namespace sysctl. + MPTCP sockets can be created if the value is 1. This is a + per-namespace sysctl. - Default: 1 + Default: 1 (enabled) add_addr_timeout - INTEGER (seconds) Set the timeout after which an ADD_ADDR control message will be diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c index a3b15ed60b77..1ec4d36a39f0 100644 --- a/net/mptcp/ctrl.c +++ b/net/mptcp/ctrl.c @@ -21,7 +21,7 @@ struct mptcp_pernet { struct ctl_table_header *ctl_table_hdr; #endif - int mptcp_enabled; + u8 mptcp_enabled; unsigned int add_addr_timeout; }; @@ -50,12 +50,14 @@ static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet) static struct ctl_table mptcp_sysctl_table[] = { { .procname = "enabled", - .maxlen = sizeof(int), + .maxlen = sizeof(u8), .mode = 0644, /* users with CAP_NET_ADMIN or root (not and) can change this * value, same as other sysctl or the 'net' tree. */ - .proc_handler = proc_dointvec, + .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE }, { .procname = "add_addr_timeout", -- cgit v1.2.3 From 710b797cf61b318995db6d31767a532162db113d Mon Sep 17 00:00:00 2001 From: Sharath Chandra Vurukala Date: Wed, 2 Jun 2021 00:58:34 +0530 Subject: docs: networking: Add documentation for MAPv5 Adding documentation explaining the new MAPv4/v5 packet formats and the corresponding checksum offload headers. Signed-off-by: Sharath Chandra Vurukala Signed-off-by: David S. Miller --- .../device_drivers/cellular/qualcomm/rmnet.rst | 126 +++++++++++++++++++-- 1 file changed, 114 insertions(+), 12 deletions(-) (limited to 'Documentation') diff --git a/Documentation/networking/device_drivers/cellular/qualcomm/rmnet.rst b/Documentation/networking/device_drivers/cellular/qualcomm/rmnet.rst index 70643b58de05..4118384cf8eb 100644 --- a/Documentation/networking/device_drivers/cellular/qualcomm/rmnet.rst +++ b/Documentation/networking/device_drivers/cellular/qualcomm/rmnet.rst @@ -27,34 +27,136 @@ these MAP frames and send them to appropriate PDN's. 2. Packet format ================ -a. MAP packet (data / control) +a. MAP packet v1 (data / control) -MAP header has the same endianness of the IP packet. +MAP header fields are in big endian format. Packet format:: - Bit 0 1 2-7 8 - 15 16 - 31 + Bit 0 1 2-7 8-15 16-31 Function Command / Data Reserved Pad Multiplexer ID Payload length - Bit 32 - x - Function Raw Bytes + + Bit 32-x + Function Raw bytes Command (1)/ Data (0) bit value is to indicate if the packet is a MAP command -or data packet. Control packet is used for transport level flow control. Data +or data packet. Command packet is used for transport level flow control. Data packets are standard IP packets. -Reserved bits are usually zeroed out and to be ignored by receiver. +Reserved bits must be zero when sent and ignored when received. -Padding is number of bytes to be added for 4 byte alignment if required by -hardware. +Padding is the number of bytes to be appended to the payload to +ensure 4 byte alignment. Multiplexer ID is to indicate the PDN on which data has to be sent. Payload length includes the padding length but does not include MAP header length. -b. MAP packet (command specific):: +b. Map packet v4 (data / control) + +MAP header fields are in big endian format. + +Packet format:: + + Bit 0 1 2-7 8-15 16-31 + Function Command / Data Reserved Pad Multiplexer ID Payload length + + Bit 32-(x-33) (x-32)-x + Function Raw bytes Checksum offload header + +Command (1)/ Data (0) bit value is to indicate if the packet is a MAP command +or data packet. Command packet is used for transport level flow control. Data +packets are standard IP packets. + +Reserved bits must be zero when sent and ignored when received. + +Padding is the number of bytes to be appended to the payload to +ensure 4 byte alignment. + +Multiplexer ID is to indicate the PDN on which data has to be sent. + +Payload length includes the padding length but does not include MAP header +length. + +Checksum offload header, has the information about the checksum processing done +by the hardware.Checksum offload header fields are in big endian format. + +Packet format:: + + Bit 0-14 15 16-31 + Function Reserved Valid Checksum start offset + + Bit 31-47 48-64 + Function Checksum length Checksum value + +Reserved bits must be zero when sent and ignored when received. + +Valid bit indicates whether the partial checksum is calculated and is valid. +Set to 1, if its is valid. Set to 0 otherwise. + +Padding is the number of bytes to be appended to the payload to +ensure 4 byte alignment. + +Checksum start offset, Indicates the offset in bytes from the beginning of the +IP header, from which modem computed checksum. + +Checksum length is the Length in bytes starting from CKSUM_START_OFFSET, +over which checksum is computed. + +Checksum value, indicates the checksum computed. + +c. MAP packet v5 (data / control) + +MAP header fields are in big endian format. + +Packet format:: + + Bit 0 1 2-7 8-15 16-31 + Function Command / Data Next header Pad Multiplexer ID Payload length + + Bit 32-x + Function Raw bytes + +Command (1)/ Data (0) bit value is to indicate if the packet is a MAP command +or data packet. Command packet is used for transport level flow control. Data +packets are standard IP packets. + +Next header is used to indicate the presence of another header, currently is +limited to checksum header. + +Padding is the number of bytes to be appended to the payload to +ensure 4 byte alignment. + +Multiplexer ID is to indicate the PDN on which data has to be sent. + +Payload length includes the padding length but does not include MAP header +length. + +d. Checksum offload header v5 + +Checksum offload header fields are in big endian format. + + Bit 0 - 6 7 8-15 16-31 + Function Header Type Next Header Checksum Valid Reserved + +Header Type is to indicate the type of header, this usually is set to CHECKSUM + +Header types += ========================================== +0 Reserved +1 Reserved +2 checksum header + +Checksum Valid is to indicate whether the header checksum is valid. Value of 1 +implies that checksum is calculated on this packet and is valid, value of 0 +indicates that the calculated packet checksum is invalid. + +Reserved bits must be zero when sent and ignored when received. + +e. MAP packet v1/v5 (command specific):: - Bit 0 1 2-7 8 - 15 16 - 31 + Bit 0 1 2-7 8 - 15 16 - 31 Function Command Reserved Pad Multiplexer ID Payload length Bit 32 - 39 40 - 45 46 - 47 48 - 63 Function Command name Reserved Command Type Reserved @@ -74,7 +176,7 @@ Command types 3 is for error during processing of commands = ========================================== -c. Aggregation +f. Aggregation Aggregation is multiple MAP packets (can be data or command) delivered to rmnet in a single linear skb. rmnet will process the individual -- cgit v1.2.3 From 011ab4dffe965c16c2ba27c0b97d42d41a97b4da Mon Sep 17 00:00:00 2001 From: Rafał Miłecki Date: Thu, 27 May 2021 22:28:15 +0200 Subject: dt-bindings: net: brcm,iproc-mdio: convert to the json-schema MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This helps validating DTS files. Introduced changes: 1. Swapped #address-cells and #size-cells values 2. Renamed node: s/enet-gphy/ethernet-phy@/ Signed-off-by: Rafał Miłecki Signed-off-by: David S. Miller --- .../devicetree/bindings/net/brcm,iproc-mdio.txt | 23 ------------- .../devicetree/bindings/net/brcm,iproc-mdio.yaml | 38 ++++++++++++++++++++++ 2 files changed, 38 insertions(+), 23 deletions(-) delete mode 100644 Documentation/devicetree/bindings/net/brcm,iproc-mdio.txt create mode 100644 Documentation/devicetree/bindings/net/brcm,iproc-mdio.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/brcm,iproc-mdio.txt b/Documentation/devicetree/bindings/net/brcm,iproc-mdio.txt deleted file mode 100644 index 8ba9ed11d716..000000000000 --- a/Documentation/devicetree/bindings/net/brcm,iproc-mdio.txt +++ /dev/null @@ -1,23 +0,0 @@ -* Broadcom iProc MDIO bus controller - -Required properties: -- compatible: should be "brcm,iproc-mdio" -- reg: address and length of the register set for the MDIO interface -- #size-cells: must be 1 -- #address-cells: must be 0 - -Child nodes of this MDIO bus controller node are standard Ethernet PHY device -nodes as described in Documentation/devicetree/bindings/net/phy.txt - -Example: - -mdio@18002000 { - compatible = "brcm,iproc-mdio"; - reg = <0x18002000 0x8>; - #size-cells = <1>; - #address-cells = <0>; - - enet-gphy@0 { - reg = <0>; - }; -}; diff --git a/Documentation/devicetree/bindings/net/brcm,iproc-mdio.yaml b/Documentation/devicetree/bindings/net/brcm,iproc-mdio.yaml new file mode 100644 index 000000000000..3031395f7e6e --- /dev/null +++ b/Documentation/devicetree/bindings/net/brcm,iproc-mdio.yaml @@ -0,0 +1,38 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/brcm,iproc-mdio.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Broadcom iProc MDIO bus controller + +maintainers: + - Rafał Miłecki + +allOf: + - $ref: mdio.yaml# + +properties: + compatible: + const: brcm,iproc-mdio + + reg: + maxItems: 1 + +unevaluatedProperties: false + +required: + - reg + +examples: + - | + mdio@18002000 { + compatible = "brcm,iproc-mdio"; + reg = <0x18002000 0x8>; + #address-cells = <1>; + #size-cells = <0>; + + ethernet-phy@0 { + reg = <0>; + }; + }; -- cgit v1.2.3 From b62767e7bab3a397166a2fa36b409e5e2859f100 Mon Sep 17 00:00:00 2001 From: Dmytro Linkin Date: Wed, 2 Jun 2021 15:17:31 +0300 Subject: Documentation: devlink rate objects Add devlink rate objects section at devlink port documentation. Add devlink rate support info at netdevsim devlink documentation. Signed-off-by: Dmytro Linkin Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- Documentation/networking/devlink/devlink-port.rst | 35 +++++++++++++++++++++++ Documentation/networking/devlink/netdevsim.rst | 26 +++++++++++++++++ 2 files changed, 61 insertions(+) (limited to 'Documentation') diff --git a/Documentation/networking/devlink/devlink-port.rst b/Documentation/networking/devlink/devlink-port.rst index ab790e7980b8..7627b1da01f2 100644 --- a/Documentation/networking/devlink/devlink-port.rst +++ b/Documentation/networking/devlink/devlink-port.rst @@ -164,6 +164,41 @@ device to instantiate the subfunction device on particular PCI function. A subfunction device is created on the :ref:`Documentation/driver-api/auxiliary_bus.rst `. At this point a matching subfunction driver binds to the subfunction's auxiliary device. +Rate object management +====================== + +Devlink provides API to manage tx rates of single devlink port or a group. +This is done through rate objects, which can be one of the two types: + +``leaf`` + Represents a single devlink port; created/destroyed by the driver. Since leaf + have 1to1 mapping to its devlink port, in user space it is referred as + ``pci//``; + +``node`` + Represents a group of rate objects (leafs and/or nodes); created/deleted by + request from the userspace; initially empty (no rate objects added). In + userspace it is referred as ``pci//``, where + ``node_name`` can be any identifier, except decimal number, to avoid + collisions with leafs. + +API allows to configure following rate object's parameters: + +``tx_share`` + Minimum TX rate value shared among all other rate objects, or rate objects + that parts of the parent group, if it is a part of the same group. + +``tx_max`` + Maximum TX rate value. + +``parent`` + Parent node name. Parent node rate limits are considered as additional limits + to all node children limits. ``tx_max`` is an upper limit for children. + ``tx_share`` is a total bandwidth distributed among children. + +Driver implementations are allowed to support both or either rate object types +and setting methods of their parameters. + Terms and Definitions ===================== diff --git a/Documentation/networking/devlink/netdevsim.rst b/Documentation/networking/devlink/netdevsim.rst index 02c2d20dc673..8a292fb5aaea 100644 --- a/Documentation/networking/devlink/netdevsim.rst +++ b/Documentation/networking/devlink/netdevsim.rst @@ -57,6 +57,32 @@ entries, FIB rule entries and nexthops that the driver will allow. $ devlink resource set netdevsim/netdevsim0 path /nexthops size 16 $ devlink dev reload netdevsim/netdevsim0 +Rate objects +============ + +The ``netdevsim`` driver supports rate objects management, which includes: + +- registerging/unregistering leaf rate objects per VF devlink port; +- creation/deletion node rate objects; +- setting tx_share and tx_max rate values for any rate object type; +- setting parent node for any rate object type. + +Rate nodes and it's parameters are exposed in ``netdevsim`` debugfs in RO mode. +For example created rate node with name ``some_group``: + +.. code:: shell + + $ ls /sys/kernel/debug/netdevsim/netdevsim0/rate_groups/some_group + rate_parent tx_max tx_share + +Same parameters are exposed for leaf objects in corresponding ports directories. +For ex.: + +.. code:: shell + + $ ls /sys/kernel/debug/netdevsim/netdevsim0/ports/1 + dev ethtool rate_parent tx_max tx_share + Driver-specific Traps ===================== -- cgit v1.2.3 From c858d436be8b949c368de0e079084acaff3d4aaf Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 4 Jun 2021 17:01:48 +0300 Subject: net: phy: introduce PHY_INTERFACE_MODE_REVRMII The "reverse RMII" protocol name is a personal invention, derived from "reverse MII". Just like MII, RMII is an asymmetric protocol in that a PHY behaves differently than a MAC. In the case of RMII, for example: - the 50 MHz clock signals are either driven by the MAC or by an external oscillator (but never by the PHY). - the PHY can transmit extra in-band control symbols via RXD[1:0] which the MAC is supposed to understand, but a PHY isn't. The "reverse MII" protocol is not standardized either, except for this web document: https://www.eetimes.com/reverse-media-independent-interface-revmii-block-architecture/# In short, it means that the Ethernet controller speaks the 4-bit data parallel protocol from the perspective of a PHY (it acts like a PHY). This might mean that it implements clause 22 compatible registers, although that is optional - the important bit is that its pins can be connected to an MII MAC and it will 'just work'. In this discussion thread: https://lore.kernel.org/netdev/20210201214515.cx6ivvme2tlquge2@skbuf/ we agreed that it would be an abuse of terms to use the "RevMII" name for anything than the 4-bit parallel MII protocol. But since all the same concepts can be applied to the 2-bit Reduced MII protocol as well, here we are introducing a "Reverse RMII" protocol. This means: "behave like an RMII PHY". Signed-off-by: Vladimir Oltean Acked-by: Florian Fainelli Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/ethernet-controller.yaml | 1 + include/linux/phy.h | 4 ++++ 2 files changed, 5 insertions(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/ethernet-controller.yaml b/Documentation/devicetree/bindings/net/ethernet-controller.yaml index e8f04687a3e0..d97b561003ed 100644 --- a/Documentation/devicetree/bindings/net/ethernet-controller.yaml +++ b/Documentation/devicetree/bindings/net/ethernet-controller.yaml @@ -68,6 +68,7 @@ properties: - tbi - rev-mii - rmii + - rev-rmii # RX and TX delays are added by the MAC when required - rgmii diff --git a/include/linux/phy.h b/include/linux/phy.h index 852743f07e3e..ed332ac92e25 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -93,6 +93,7 @@ extern const int phy_10gbit_features_array[1]; * @PHY_INTERFACE_MODE_TBI: Ten Bit Interface * @PHY_INTERFACE_MODE_REVMII: Reverse Media Independent Interface * @PHY_INTERFACE_MODE_RMII: Reduced Media Independent Interface + * @PHY_INTERFACE_MODE_REVRMII: Reduced Media Independent Interface in PHY role * @PHY_INTERFACE_MODE_RGMII: Reduced gigabit media-independent interface * @PHY_INTERFACE_MODE_RGMII_ID: RGMII with Internal RX+TX delay * @PHY_INTERFACE_MODE_RGMII_RXID: RGMII with Internal RX delay @@ -126,6 +127,7 @@ typedef enum { PHY_INTERFACE_MODE_TBI, PHY_INTERFACE_MODE_REVMII, PHY_INTERFACE_MODE_RMII, + PHY_INTERFACE_MODE_REVRMII, PHY_INTERFACE_MODE_RGMII, PHY_INTERFACE_MODE_RGMII_ID, PHY_INTERFACE_MODE_RGMII_RXID, @@ -185,6 +187,8 @@ static inline const char *phy_modes(phy_interface_t interface) return "rev-mii"; case PHY_INTERFACE_MODE_RMII: return "rmii"; + case PHY_INTERFACE_MODE_REVRMII: + return "rev-rmii"; case PHY_INTERFACE_MODE_RGMII: return "rgmii"; case PHY_INTERFACE_MODE_RGMII_ID: -- cgit v1.2.3 From 5d645df99ac60fab5368e01f1ddf4a57fa4f719f Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 4 Jun 2021 17:01:50 +0300 Subject: net: dsa: sja1105: determine PHY/MAC role from PHY interface type Now that both RevMII as well as RevRMII exist, we can deprecate the sja1105,role-mac and sja1105,role-phy properties and simply let the user select that a port operates in MII PHY role by using phy-mode = "rev-mii"; or in RMII PHY role by using phy-mode = "rev-rmii"; There are no fixed-link MII or RMII properties in mainline device trees, and the setup itself is fairly uncommon, so there shouldn't be risks of breaking compatibility. Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- .../devicetree/bindings/net/dsa/sja1105.txt | 37 +------------ drivers/net/dsa/sja1105/sja1105_main.c | 64 ++++++---------------- 2 files changed, 19 insertions(+), 82 deletions(-) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/dsa/sja1105.txt b/Documentation/devicetree/bindings/net/dsa/sja1105.txt index 13fd21074d48..dcf3b2c1d26b 100644 --- a/Documentation/devicetree/bindings/net/dsa/sja1105.txt +++ b/Documentation/devicetree/bindings/net/dsa/sja1105.txt @@ -19,37 +19,6 @@ Required properties: of support for RGMII internal delays (supported on P/Q/R/S, but not on E/T). -Optional properties: - -- sja1105,role-mac: -- sja1105,role-phy: - Boolean properties that can be assigned under each port node. By - default (unless otherwise specified) a port is configured as MAC if it - is driving a PHY (phy-handle is present) or as PHY if it is PHY-less - (fixed-link specified, presumably because it is connected to a MAC). - The effect of this property (in either its implicit or explicit form) - is: - - In the case of MII or RMII it specifies whether the SJA1105 port is a - clock source or sink for this interface (not applicable for RGMII - where there is a Tx and an Rx clock). - - In the case of RGMII it affects the behavior regarding internal - delays: - 1. If sja1105,role-mac is specified, and the phy-mode property is one - of "rgmii-id", "rgmii-txid" or "rgmii-rxid", then the entity - designated to apply the delay/clock skew necessary for RGMII - is the PHY. The SJA1105 MAC does not apply any internal delays. - 2. If sja1105,role-phy is specified, and the phy-mode property is one - of the above, the designated entity to apply the internal delays - is the SJA1105 MAC (if hardware-supported). This is only supported - by the second-generation (P/Q/R/S) hardware. On a first-generation - E or T device, it is an error to specify an RGMII phy-mode other - than "rgmii" for a port that is in fixed-link mode. In that case, - the clock skew must either be added by the MAC at the other end of - the fixed-link, or by PCB serpentine traces on the board. - These properties are required, for example, in the case where SJA1105 - ports are at both ends of a MII/RMII PHY-less setup. One end would need - to have sja1105,role-mac, while the other sja1105,role-phy. - See Documentation/devicetree/bindings/net/dsa/dsa.txt for the list of standard DSA required and optional properties. @@ -87,7 +56,6 @@ arch/arm/boot/dts/ls1021a-tsn.dts: phy-handle = <&rgmii_phy6>; phy-mode = "rgmii-id"; reg = <0>; - /* Implicit "sja1105,role-mac;" */ }; port@1 { /* ETH2 written on chassis */ @@ -95,7 +63,6 @@ arch/arm/boot/dts/ls1021a-tsn.dts: phy-handle = <&rgmii_phy3>; phy-mode = "rgmii-id"; reg = <1>; - /* Implicit "sja1105,role-mac;" */ }; port@2 { /* ETH3 written on chassis */ @@ -103,7 +70,6 @@ arch/arm/boot/dts/ls1021a-tsn.dts: phy-handle = <&rgmii_phy4>; phy-mode = "rgmii-id"; reg = <2>; - /* Implicit "sja1105,role-mac;" */ }; port@3 { /* ETH4 written on chassis */ @@ -111,14 +77,13 @@ arch/arm/boot/dts/ls1021a-tsn.dts: label = "swp4"; phy-mode = "rgmii-id"; reg = <3>; - /* Implicit "sja1105,role-mac;" */ }; port@4 { /* Internal port connected to eth2 */ ethernet = <&enet2>; phy-mode = "rgmii"; reg = <4>; - /* Implicit "sja1105,role-phy;" */ + fixed-link { speed = <1000>; full-duplex; diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index 5839c1e0475a..cbce6e90dc63 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -57,14 +57,6 @@ static bool sja1105_can_forward(struct sja1105_l2_forwarding_entry *l2_fwd, return !!(l2_fwd[from].reach_port & BIT(to)); } -/* Structure used to temporarily transport device tree - * settings into sja1105_setup - */ -struct sja1105_dt_port { - phy_interface_t phy_mode; - sja1105_mii_role_t role; -}; - static int sja1105_init_mac_settings(struct sja1105_private *priv) { struct sja1105_mac_config_entry default_mac = { @@ -143,8 +135,7 @@ static int sja1105_init_mac_settings(struct sja1105_private *priv) return 0; } -static int sja1105_init_mii_settings(struct sja1105_private *priv, - struct sja1105_dt_port *ports) +static int sja1105_init_mii_settings(struct sja1105_private *priv) { struct device *dev = &priv->spidev->dev; struct sja1105_xmii_params_entry *mii; @@ -171,16 +162,24 @@ static int sja1105_init_mii_settings(struct sja1105_private *priv, mii = table->entries; for (i = 0; i < ds->num_ports; i++) { + sja1105_mii_role_t role = XMII_MAC; + if (dsa_is_unused_port(priv->ds, i)) continue; - switch (ports[i].phy_mode) { + switch (priv->phy_mode[i]) { + case PHY_INTERFACE_MODE_REVMII: + role = XMII_PHY; + fallthrough; case PHY_INTERFACE_MODE_MII: if (!priv->info->supports_mii[i]) goto unsupported; mii->xmii_mode[i] = XMII_MODE_MII; break; + case PHY_INTERFACE_MODE_REVRMII: + role = XMII_PHY; + fallthrough; case PHY_INTERFACE_MODE_RMII: if (!priv->info->supports_rmii[i]) goto unsupported; @@ -211,24 +210,11 @@ static int sja1105_init_mii_settings(struct sja1105_private *priv, unsupported: default: dev_err(dev, "Unsupported PHY mode %s on port %d!\n", - phy_modes(ports[i].phy_mode), i); + phy_modes(priv->phy_mode[i]), i); return -EINVAL; } - /* Even though the SerDes port is able to drive SGMII autoneg - * like a PHY would, from the perspective of the XMII tables, - * the SGMII port should always be put in MAC mode. - * Similarly, RGMII is a symmetric protocol electrically - * speaking, and the 'RGMII PHY' role does not mean anything to - * hardware. Just keep the 'PHY role' notation relevant to the - * driver to mean 'the switch port should apply RGMII delays', - * but unconditionally put the port in the MAC role. - */ - if (ports[i].phy_mode == PHY_INTERFACE_MODE_SGMII || - phy_interface_mode_is_rgmii(ports[i].phy_mode)) - mii->phy_mac[i] = XMII_MAC; - else - mii->phy_mac[i] = ports[i].role; + mii->phy_mac[i] = role; } return 0; } @@ -751,8 +737,7 @@ static int sja1105_init_l2_policing(struct sja1105_private *priv) return 0; } -static int sja1105_static_config_load(struct sja1105_private *priv, - struct sja1105_dt_port *ports) +static int sja1105_static_config_load(struct sja1105_private *priv) { int rc; @@ -767,7 +752,7 @@ static int sja1105_static_config_load(struct sja1105_private *priv, rc = sja1105_init_mac_settings(priv); if (rc < 0) return rc; - rc = sja1105_init_mii_settings(priv, ports); + rc = sja1105_init_mii_settings(priv); if (rc < 0) return rc; rc = sja1105_init_static_fdb(priv); @@ -824,7 +809,6 @@ static int sja1105_parse_rgmii_delays(struct sja1105_private *priv) } static int sja1105_parse_ports_node(struct sja1105_private *priv, - struct sja1105_dt_port *ports, struct device_node *ports_node) { struct device *dev = &priv->spidev->dev; @@ -853,7 +837,6 @@ static int sja1105_parse_ports_node(struct sja1105_private *priv, of_node_put(child); return -ENODEV; } - ports[index].phy_mode = phy_mode; phy_node = of_parse_phandle(child, "phy-handle", 0); if (!phy_node) { @@ -867,27 +850,17 @@ static int sja1105_parse_ports_node(struct sja1105_private *priv, * So it's a fixed link. Default to PHY role. */ priv->fixed_link[index] = true; - ports[index].role = XMII_PHY; } else { - /* phy-handle present => put port in MAC role */ - ports[index].role = XMII_MAC; of_node_put(phy_node); } - /* The MAC/PHY role can be overridden with explicit bindings */ - if (of_property_read_bool(child, "sja1105,role-mac")) - ports[index].role = XMII_MAC; - else if (of_property_read_bool(child, "sja1105,role-phy")) - ports[index].role = XMII_PHY; - priv->phy_mode[index] = phy_mode; } return 0; } -static int sja1105_parse_dt(struct sja1105_private *priv, - struct sja1105_dt_port *ports) +static int sja1105_parse_dt(struct sja1105_private *priv) { struct device *dev = &priv->spidev->dev; struct device_node *switch_node = dev->of_node; @@ -902,7 +875,7 @@ static int sja1105_parse_dt(struct sja1105_private *priv, return -ENODEV; } - rc = sja1105_parse_ports_node(priv, ports, ports_node); + rc = sja1105_parse_ports_node(priv, ports_node); of_node_put(ports_node); return rc; @@ -3008,11 +2981,10 @@ static const struct dsa_8021q_ops sja1105_dsa_8021q_ops = { */ static int sja1105_setup(struct dsa_switch *ds) { - struct sja1105_dt_port ports[SJA1105_MAX_NUM_PORTS]; struct sja1105_private *priv = ds->priv; int rc; - rc = sja1105_parse_dt(priv, ports); + rc = sja1105_parse_dt(priv); if (rc < 0) { dev_err(ds->dev, "Failed to parse DT: %d\n", rc); return rc; @@ -3033,7 +3005,7 @@ static int sja1105_setup(struct dsa_switch *ds) return rc; } /* Create and send configuration down to device */ - rc = sja1105_static_config_load(priv, ports); + rc = sja1105_static_config_load(priv); if (rc < 0) { dev_err(ds->dev, "Failed to load static config: %d\n", rc); goto out_ptp_clock_unregister; -- cgit v1.2.3 From 62568bdbe6f6293c955fbd98db15adf7ee6aca1c Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 4 Jun 2021 17:01:51 +0300 Subject: dt-bindings: net: dsa: sja1105: convert to YAML schema Since the sja1105 driver no longer has any custom device tree properties, the conversion is trivial. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- .../devicetree/bindings/net/dsa/nxp,sja1105.yaml | 89 +++++++++++++++ .../devicetree/bindings/net/dsa/sja1105.txt | 121 --------------------- 2 files changed, 89 insertions(+), 121 deletions(-) create mode 100644 Documentation/devicetree/bindings/net/dsa/nxp,sja1105.yaml delete mode 100644 Documentation/devicetree/bindings/net/dsa/sja1105.txt (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/dsa/nxp,sja1105.yaml b/Documentation/devicetree/bindings/net/dsa/nxp,sja1105.yaml new file mode 100644 index 000000000000..d6ac9a0c1b04 --- /dev/null +++ b/Documentation/devicetree/bindings/net/dsa/nxp,sja1105.yaml @@ -0,0 +1,89 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/dsa/nxp,sja1105.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: NXP SJA1105 Automotive Ethernet Switch Family Device Tree Bindings + +description: + The SJA1105 SPI interface requires a CS-to-CLK time (t2 in UM10944.pdf) of at + least one half of t_CLK. At an SPI frequency of 1MHz, this means a minimum + cs_sck_delay of 500ns. Ensuring that this SPI timing requirement is observed + depends on the SPI bus master driver. + +allOf: + - $ref: "dsa.yaml#" + +maintainers: + - Vladimir Oltean + +properties: + compatible: + enum: + - nxp,sja1105e + - nxp,sja1105t + - nxp,sja1105p + - nxp,sja1105q + - nxp,sja1105r + - nxp,sja1105s + + reg: + maxItems: 1 + +required: + - compatible + - reg + +unevaluatedProperties: false + +examples: + - | + spi { + #address-cells = <1>; + #size-cells = <0>; + + ethernet-switch@1 { + reg = <0x1>; + compatible = "nxp,sja1105t"; + + ethernet-ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + phy-handle = <&rgmii_phy6>; + phy-mode = "rgmii-id"; + reg = <0>; + }; + + port@1 { + phy-handle = <&rgmii_phy3>; + phy-mode = "rgmii-id"; + reg = <1>; + }; + + port@2 { + phy-handle = <&rgmii_phy4>; + phy-mode = "rgmii-id"; + reg = <2>; + }; + + port@3 { + phy-mode = "rgmii-id"; + reg = <3>; + }; + + port@4 { + ethernet = <&enet2>; + phy-mode = "rgmii"; + reg = <4>; + + fixed-link { + speed = <1000>; + full-duplex; + }; + }; + }; + }; + }; diff --git a/Documentation/devicetree/bindings/net/dsa/sja1105.txt b/Documentation/devicetree/bindings/net/dsa/sja1105.txt deleted file mode 100644 index dcf3b2c1d26b..000000000000 --- a/Documentation/devicetree/bindings/net/dsa/sja1105.txt +++ /dev/null @@ -1,121 +0,0 @@ -NXP SJA1105 switch driver -========================= - -Required properties: - -- compatible: - Must be one of: - - "nxp,sja1105e" - - "nxp,sja1105t" - - "nxp,sja1105p" - - "nxp,sja1105q" - - "nxp,sja1105r" - - "nxp,sja1105s" - - Although the device ID could be detected at runtime, explicit bindings - are required in order to be able to statically check their validity. - For example, SGMII can only be specified on port 4 of R and S devices, - and the non-SGMII devices, while pin-compatible, are not equal in terms - of support for RGMII internal delays (supported on P/Q/R/S, but not on - E/T). - -See Documentation/devicetree/bindings/net/dsa/dsa.txt for the list of standard -DSA required and optional properties. - -Other observations ------------------- - -The SJA1105 SPI interface requires a CS-to-CLK time (t2 in UM10944) of at least -one half of t_CLK. At an SPI frequency of 1MHz, this means a minimum -cs_sck_delay of 500ns. Ensuring that this SPI timing requirement is observed -depends on the SPI bus master driver. - -Example -------- - -Ethernet switch connected via SPI to the host, CPU port wired to enet2: - -arch/arm/boot/dts/ls1021a-tsn.dts: - -/* SPI controller of the LS1021 */ -&dspi0 { - sja1105@1 { - reg = <0x1>; - #address-cells = <1>; - #size-cells = <0>; - compatible = "nxp,sja1105t"; - spi-max-frequency = <4000000>; - fsl,spi-cs-sck-delay = <1000>; - fsl,spi-sck-cs-delay = <1000>; - ports { - #address-cells = <1>; - #size-cells = <0>; - port@0 { - /* ETH5 written on chassis */ - label = "swp5"; - phy-handle = <&rgmii_phy6>; - phy-mode = "rgmii-id"; - reg = <0>; - }; - port@1 { - /* ETH2 written on chassis */ - label = "swp2"; - phy-handle = <&rgmii_phy3>; - phy-mode = "rgmii-id"; - reg = <1>; - }; - port@2 { - /* ETH3 written on chassis */ - label = "swp3"; - phy-handle = <&rgmii_phy4>; - phy-mode = "rgmii-id"; - reg = <2>; - }; - port@3 { - /* ETH4 written on chassis */ - phy-handle = <&rgmii_phy5>; - label = "swp4"; - phy-mode = "rgmii-id"; - reg = <3>; - }; - port@4 { - /* Internal port connected to eth2 */ - ethernet = <&enet2>; - phy-mode = "rgmii"; - reg = <4>; - - fixed-link { - speed = <1000>; - full-duplex; - }; - }; - }; - }; -}; - -/* MDIO controller of the LS1021 */ -&mdio0 { - /* BCM5464 */ - rgmii_phy3: ethernet-phy@3 { - reg = <0x3>; - }; - rgmii_phy4: ethernet-phy@4 { - reg = <0x4>; - }; - rgmii_phy5: ethernet-phy@5 { - reg = <0x5>; - }; - rgmii_phy6: ethernet-phy@6 { - reg = <0x6>; - }; -}; - -/* Ethernet master port of the LS1021 */ -&enet2 { - phy-connection-type = "rgmii"; - status = "ok"; - fixed-link { - speed = <1000>; - full-duplex; - }; -}; -- cgit v1.2.3 From a9f15dc2b9733cb5870e655e6b77a4ec2cc51b8b Mon Sep 17 00:00:00 2001 From: Joakim Zhang Date: Tue, 8 Jun 2021 11:15:32 +0800 Subject: dt-bindings: net: add dt binding for realtek rtl82xx phy Add binding for realtek rtl82xx phy. Signed-off-by: Joakim Zhang Signed-off-by: David S. Miller --- .../devicetree/bindings/net/realtek,rtl82xx.yaml | 45 ++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 Documentation/devicetree/bindings/net/realtek,rtl82xx.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/realtek,rtl82xx.yaml b/Documentation/devicetree/bindings/net/realtek,rtl82xx.yaml new file mode 100644 index 000000000000..bb94a2388520 --- /dev/null +++ b/Documentation/devicetree/bindings/net/realtek,rtl82xx.yaml @@ -0,0 +1,45 @@ +# SPDX-License-Identifier: GPL-2.0+ +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/realtek,rtl82xx.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Realtek RTL82xx PHY + +maintainers: + - Andrew Lunn + - Florian Fainelli + - Heiner Kallweit + +description: + Bindings for Realtek RTL82xx PHYs + +allOf: + - $ref: ethernet-phy.yaml# + +properties: + realtek,clkout-disable: + type: boolean + description: + Disable CLKOUT clock, CLKOUT clock default is enabled after hardware reset. + + + realtek,aldps-enable: + type: boolean + description: + Enable ALDPS mode, ALDPS mode default is disabled after hardware reset. + +unevaluatedProperties: false + +examples: + - | + mdio { + #address-cells = <1>; + #size-cells = <0>; + + ethphy1: ethernet-phy@1 { + reg = <1>; + realtek,clkout-disable; + realtek,aldps-enable; + }; + }; -- cgit v1.2.3 From 070f5b701d559ae139b348fb19145269b58b68c3 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 8 Jun 2021 12:25:35 +0300 Subject: dt-bindings: net: dsa: sja1105: add SJA1110 bindings There are 4 variations of the SJA1110 switch which have a different set of MII protocols supported per port. Document the compatible strings. Also, the SJA1110 optionally supports 2 internal MDIO buses for 2 different types of Ethernet PHYs. Document a container node called "mdios" which has 2 subnodes "mdio@0" and "mdio@1", identifiable via compatible string, under which the driver finds the internal PHYs. Cc: Rob Herring Cc: devicetree@vger.kernel.org Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- .../devicetree/bindings/net/dsa/nxp,sja1105.yaml | 43 ++++++++++++++++++++++ 1 file changed, 43 insertions(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/dsa/nxp,sja1105.yaml b/Documentation/devicetree/bindings/net/dsa/nxp,sja1105.yaml index d6ac9a0c1b04..0b8a05dd52e6 100644 --- a/Documentation/devicetree/bindings/net/dsa/nxp,sja1105.yaml +++ b/Documentation/devicetree/bindings/net/dsa/nxp,sja1105.yaml @@ -27,10 +27,53 @@ properties: - nxp,sja1105q - nxp,sja1105r - nxp,sja1105s + - nxp,sja1110a + - nxp,sja1110b + - nxp,sja1110c + - nxp,sja1110d reg: maxItems: 1 + # Optional container node for the 2 internal MDIO buses of the SJA1110 + # (one for the internal 100base-T1 PHYs and the other for the single + # 100base-TX PHY). The "reg" property does not have physical significance. + # The PHY addresses to port correspondence is as follows: for 100base-T1, + # port 5 has PHY 1, port 6 has PHY 2 etc, while for 100base-TX, port 1 has + # PHY 1. + mdios: + type: object + + properties: + '#address-cells': + const: 1 + '#size-cells': + const: 0 + + patternProperties: + "^mdio@[0-1]$": + type: object + + allOf: + - $ref: "http://devicetree.org/schemas/net/mdio.yaml#" + + properties: + compatible: + oneOf: + - enum: + - nxp,sja1110-base-t1-mdio + - nxp,sja1110-base-tx-mdio + + reg: + oneOf: + - enum: + - 0 + - 1 + + required: + - compatible + - reg + required: - compatible - reg -- cgit v1.2.3 From 511c537bb5647662ff7df7a41180a1721c078720 Mon Sep 17 00:00:00 2001 From: Shay Agroskin Date: Tue, 8 Jun 2021 19:01:15 +0300 Subject: net: ena: fix RST format in ENA documentation file The documentation file used to be written in markdown format but was converted to reStructuredText (rst). The converted file doesn't keep up with rst format requirements which results in hard-to-read text. This patch fixes the formatting of the file. The patch also * Highlights and emphasizes some lines to improve readability * Rephrases some hard-to-understand text * Updates outdated function descriptions. * Removes TSO description which falsely claims the driver supports it Signed-off-by: Shay Agroskin Signed-off-by: David S. Miller --- .../device_drivers/ethernet/amazon/ena.rst | 164 ++++++++++----------- 1 file changed, 78 insertions(+), 86 deletions(-) (limited to 'Documentation') diff --git a/Documentation/networking/device_drivers/ethernet/amazon/ena.rst b/Documentation/networking/device_drivers/ethernet/amazon/ena.rst index f8c6469f2bd2..01b2a69b0cb0 100644 --- a/Documentation/networking/device_drivers/ethernet/amazon/ena.rst +++ b/Documentation/networking/device_drivers/ethernet/amazon/ena.rst @@ -11,12 +11,12 @@ ENA is a networking interface designed to make good use of modern CPU features and system architectures. The ENA device exposes a lightweight management interface with a -minimal set of memory mapped registers and extendable command set +minimal set of memory mapped registers and extendible command set through an Admin Queue. The driver supports a range of ENA devices, is link-speed independent -(i.e., the same driver is used for 10GbE, 25GbE, 40GbE, etc.), and has -a negotiated and extendable feature set. +(i.e., the same driver is used for 10GbE, 25GbE, 40GbE, etc), and has +a negotiated and extendible feature set. Some ENA devices support SR-IOV. This driver is used for both the SR-IOV Physical Function (PF) and Virtual Function (VF) devices. @@ -27,9 +27,9 @@ is advertised by the device via the Admin Queue), a dedicated MSI-X interrupt vector per Tx/Rx queue pair, adaptive interrupt moderation, and CPU cacheline optimized data placement. -The ENA driver supports industry standard TCP/IP offload features such -as checksum offload and TCP transmit segmentation offload (TSO). -Receive-side scaling (RSS) is supported for multi-core scaling. +The ENA driver supports industry standard TCP/IP offload features such as +checksum offload. Receive-side scaling (RSS) is supported for multi-core +scaling. The ENA driver and its corresponding devices implement health monitoring mechanisms such as watchdog, enabling the device and driver @@ -38,22 +38,20 @@ debug logs. Some of the ENA devices support a working mode called Low-latency Queue (LLQ), which saves several more microseconds. - ENA Source Code Directory Structure =================================== ================= ====================================================== ena_com.[ch] Management communication layer. This layer is - responsible for the handling all the management - (admin) communication between the device and the - driver. + responsible for the handling all the management + (admin) communication between the device and the + driver. ena_eth_com.[ch] Tx/Rx data path. ena_admin_defs.h Definition of ENA management interface. ena_eth_io_defs.h Definition of ENA data path interface. ena_common_defs.h Common definitions for ena_com layer. ena_regs_defs.h Definition of ENA PCI memory-mapped (MMIO) registers. ena_netdev.[ch] Main Linux kernel driver. -ena_syfsfs.[ch] Sysfs files. ena_ethtool.c ethtool callbacks. ena_pci_id_tbl.h Supported device IDs. ================= ====================================================== @@ -69,7 +67,7 @@ ENA management interface is exposed by means of: - Asynchronous Event Notification Queue (AENQ) ENA device MMIO Registers are accessed only during driver -initialization and are not involved in further normal device +initialization and are not used during further normal device operation. AQ is used for submitting management commands, and the @@ -100,28 +98,27 @@ group may have multiple syndromes, as shown below The events are: - ==================== =============== - Group Syndrome - ==================== =============== - Link state change **X** - Fatal error **X** - Notification Suspend traffic - Notification Resume traffic - Keep-Alive **X** - ==================== =============== +==================== =============== +Group Syndrome +==================== =============== +Link state change **X** +Fatal error **X** +Notification Suspend traffic +Notification Resume traffic +Keep-Alive **X** +==================== =============== ACQ and AENQ share the same MSI-X vector. -Keep-Alive is a special mechanism that allows monitoring of the -device's health. The driver maintains a watchdog (WD) handler which, -if fired, logs the current state and statistics then resets and -restarts the ENA device and driver. A Keep-Alive event is delivered by -the device every second. The driver re-arms the WD upon reception of a -Keep-Alive event. A missed Keep-Alive event causes the WD handler to -fire. +Keep-Alive is a special mechanism that allows monitoring the device's health. +A Keep-Alive event is delivered by the device every second. +The driver maintains a watchdog (WD) handler which logs the current state and +statistics. If the keep-alive events aren't delivered as expected the WD resets +the device and the driver. Data Path Interface =================== + I/O operations are based on Tx and Rx Submission Queues (Tx SQ and Rx SQ correspondingly). Each SQ has a completion queue (CQ) associated with it. @@ -131,26 +128,24 @@ physical memory. The ENA driver supports two Queue Operation modes for Tx SQs: -- Regular mode +- **Regular mode:** + In this mode the Tx SQs reside in the host's memory. The ENA + device fetches the ENA Tx descriptors and packet data from host + memory. - * In this mode the Tx SQs reside in the host's memory. The ENA - device fetches the ENA Tx descriptors and packet data from host - memory. +- **Low Latency Queue (LLQ) mode or "push-mode":** + In this mode the driver pushes the transmit descriptors and the + first 128 bytes of the packet directly to the ENA device memory + space. The rest of the packet payload is fetched by the + device. For this operation mode, the driver uses a dedicated PCI + device memory BAR, which is mapped with write-combine capability. -- Low Latency Queue (LLQ) mode or "push-mode". - - * In this mode the driver pushes the transmit descriptors and the - first 128 bytes of the packet directly to the ENA device memory - space. The rest of the packet payload is fetched by the - device. For this operation mode, the driver uses a dedicated PCI - device memory BAR, which is mapped with write-combine capability. + **Note that** not all ENA devices support LLQ, and this feature is negotiated + with the device upon initialization. If the ENA device does not + support LLQ mode, the driver falls back to the regular mode. The Rx SQs support only the regular mode. -Note: Not all ENA devices support LLQ, and this feature is negotiated - with the device upon initialization. If the ENA device does not - support LLQ mode, the driver falls back to the regular mode. - The driver supports multi-queue for both Tx and Rx. This has various benefits: @@ -165,6 +160,7 @@ benefits: Interrupt Modes =============== + The driver assigns a single MSI-X vector per queue pair (for both Tx and Rx directions). The driver assigns an additional dedicated MSI-X vector for management (for ACQ and AENQ). @@ -190,20 +186,21 @@ unmasked by the driver after NAPI processing is complete. Interrupt Moderation ==================== + ENA driver and device can operate in conventional or adaptive interrupt moderation mode. -In conventional mode the driver instructs device to postpone interrupt +**In conventional mode** the driver instructs device to postpone interrupt posting according to static interrupt delay value. The interrupt delay -value can be configured through ethtool(8). The following ethtool -parameters are supported by the driver: tx-usecs, rx-usecs +value can be configured through `ethtool(8)`. The following `ethtool` +parameters are supported by the driver: ``tx-usecs``, ``rx-usecs`` -In adaptive interrupt moderation mode the interrupt delay value is +**In adaptive interrupt** moderation mode the interrupt delay value is updated by the driver dynamically and adjusted every NAPI cycle according to the traffic nature. -Adaptive coalescing can be switched on/off through ethtool(8) -adaptive_rx on|off parameter. +Adaptive coalescing can be switched on/off through `ethtool(8)`'s +:code:`adaptive_rx on|off` parameter. More information about Adaptive Interrupt Moderation (DIM) can be found in Documentation/networking/net_dim.rst @@ -214,17 +211,10 @@ The rx_copybreak is initialized by default to ENA_DEFAULT_RX_COPYBREAK and can be configured by the ETHTOOL_STUNABLE command of the SIOCETHTOOL ioctl. -SKB -=== -The driver-allocated SKB for frames received from Rx handling using -NAPI context. The allocation method depends on the size of the packet. -If the frame length is larger than rx_copybreak, napi_get_frags() -is used, otherwise netdev_alloc_skb_ip_align() is used, the buffer -content is copied (by CPU) to the SKB, and the buffer is recycled. - Statistics ========== -The user can obtain ENA device and driver statistics using ethtool. + +The user can obtain ENA device and driver statistics using `ethtool`. The driver can collect regular or extended statistics (including per-queue stats) from the device. @@ -232,22 +222,23 @@ In addition the driver logs the stats to syslog upon device reset. MTU === + The driver supports an arbitrarily large MTU with a maximum that is negotiated with the device. The driver configures MTU using the SetFeature command (ENA_ADMIN_MTU property). The user can change MTU -via ip(8) and similar legacy tools. +via `ip(8)` and similar legacy tools. Stateless Offloads ================== + The ENA driver supports: -- TSO over IPv4/IPv6 -- TSO with ECN - IPv4 header checksum offload - TCP/UDP over IPv4/IPv6 checksum offloads RSS === + - The ENA device supports RSS that allows flexible Rx traffic steering. - Toeplitz and CRC32 hash functions are supported. @@ -260,41 +251,42 @@ RSS function delivered in the Rx CQ descriptor is set in the received SKB. - The user can provide a hash key, hash function, and configure the - indirection table through ethtool(8). + indirection table through `ethtool(8)`. DATA PATH ========= + Tx -- -ena_start_xmit() is called by the stack. This function does the following: +:code:`ena_start_xmit()` is called by the stack. This function does the following: -- Maps data buffers (skb->data and frags). -- Populates ena_buf for the push buffer (if the driver and device are - in push mode.) +- Maps data buffers (``skb->data`` and frags). +- Populates ``ena_buf`` for the push buffer (if the driver and device are + in push mode). - Prepares ENA bufs for the remaining frags. -- Allocates a new request ID from the empty req_id ring. The request +- Allocates a new request ID from the empty ``req_id`` ring. The request ID is the index of the packet in the Tx info. This is used for - out-of-order TX completions. + out-of-order Tx completions. - Adds the packet to the proper place in the Tx ring. -- Calls ena_com_prepare_tx(), an ENA communication layer that converts - the ena_bufs to ENA descriptors (and adds meta ENA descriptors as - needed.) +- Calls :code:`ena_com_prepare_tx()`, an ENA communication layer that converts + the ``ena_bufs`` to ENA descriptors (and adds meta ENA descriptors as + needed). * This function also copies the ENA descriptors and the push buffer - to the Device memory space (if in push mode.) + to the Device memory space (if in push mode). -- Writes doorbell to the ENA device. +- Writes a doorbell to the ENA device. - When the ENA device finishes sending the packet, a completion interrupt is raised. - The interrupt handler schedules NAPI. -- The ena_clean_tx_irq() function is called. This function handles the +- The :code:`ena_clean_tx_irq()` function is called. This function handles the completion descriptors generated by the ENA, with a single completion descriptor per completed packet. - * req_id is retrieved from the completion descriptor. The tx_info of - the packet is retrieved via the req_id. The data buffers are - unmapped and req_id is returned to the empty req_id ring. + * ``req_id`` is retrieved from the completion descriptor. The ``tx_info`` of + the packet is retrieved via the ``req_id``. The data buffers are + unmapped and ``req_id`` is returned to the empty ``req_id`` ring. * The function stops when the completion descriptors are completed or the budget is reached. @@ -303,12 +295,11 @@ Rx - When a packet is received from the ENA device. - The interrupt handler schedules NAPI. -- The ena_clean_rx_irq() function is called. This function calls - ena_rx_pkt(), an ENA communication layer function, which returns the - number of descriptors used for a new unhandled packet, and zero if +- The :code:`ena_clean_rx_irq()` function is called. This function calls + :code:`ena_com_rx_pkt()`, an ENA communication layer function, which returns the + number of descriptors used for a new packet, and zero if no new packet is found. -- Then it calls the ena_clean_rx_irq() function. -- ena_eth_rx_skb() checks packet length: +- :code:`ena_rx_skb()` checks packet length: * If the packet is small (len < rx_copybreak), the driver allocates a SKB for the new packet, and copies the packet payload into the @@ -317,9 +308,10 @@ Rx - In this way the original data buffer is not passed to the stack and is reused for future Rx packets. - * Otherwise the function unmaps the Rx buffer, then allocates the - new SKB structure and hooks the Rx buffer to the SKB frags. + * Otherwise the function unmaps the Rx buffer, sets the first + descriptor as `skb`'s linear part and the other descriptors as the + `skb`'s frags. - The new SKB is updated with the necessary information (protocol, - checksum hw verify result, etc.), and then passed to the network - stack, using the NAPI interface function napi_gro_receive(). + checksum hw verify result, etc), and then passed to the network + stack, using the NAPI interface function :code:`napi_gro_receive()`. -- cgit v1.2.3 From 7cd6a54a828558c02ee2117eeba43593de54c448 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Mon, 7 Jun 2021 17:42:49 +0300 Subject: net/mlx5: Bridge, handle FDB events Hardware supported by mlx5 driver doesn't provide learning and requires the driver to emulate all switch-like behavior in software. As such, all packets by default go through miss path, appear on representor and get to software bridge, if it is the upper device of the representor. This causes bridge to process packet in software, learn the MAC address to FDB and send SWITCHDEV_FDB_ADD_TO_DEVICE event to all subscribers. In order to offload FDB entries in mlx5, register switchdev notifier callback and implement support for both 'added_by_user' and dynamic FDB entry SWITCHDEV_FDB_ADD_TO_DEVICE events asynchronously using new mlx5_esw_bridge_offloads->wq ordered workqueue. In workqueue callback offload the ingress rule (matching FDB entry MAC as packet source MAC) and egress table rule (matching FDB entry MAC as destination MAC). For ingress table rule also match source vport to ensure that only traffic coming from expected bridge port is matched by offloaded rule. Save all the relevant FDB entry data in struct mlx5_esw_bridge_fdb_entry instance and insert the instance in new mlx5_esw_bridge->fdb_list list (for traversing all entries by software ageing implementation in following patch) and in new mlx5_esw_bridge->fdb_ht hash table for fast retrieval. Notify the bridge that FDB entry has been offloaded by sending SWITCHDEV_FDB_OFFLOADED notification. Delete FDB entry on reception of SWITCHDEV_FDB_DEL_TO_DEVICE event. Signed-off-by: Vlad Buslov Reviewed-by: Jianbo Liu Signed-off-by: Saeed Mahameed --- .../device_drivers/ethernet/mellanox/mlx5.rst | 15 ++ .../ethernet/mellanox/mlx5/core/en/rep/bridge.c | 150 +++++++++++- .../net/ethernet/mellanox/mlx5/core/esw/bridge.c | 254 ++++++++++++++++++++- .../net/ethernet/mellanox/mlx5/core/esw/bridge.h | 9 + 4 files changed, 424 insertions(+), 4 deletions(-) (limited to 'Documentation') diff --git a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst index 936a10f1942c..ea32136b30e7 100644 --- a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst +++ b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst @@ -12,6 +12,7 @@ Contents - `Enabling the driver and kconfig options`_ - `Devlink info`_ - `Devlink parameters`_ +- `Bridge offload`_ - `mlx5 subfunction`_ - `mlx5 function attributes`_ - `Devlink health reporters`_ @@ -217,6 +218,20 @@ users try to enable them. $ devlink dev eswitch set pci/0000:06:00.0 mode switchdev +Bridge offload +============== +The mlx5 driver implements support for offloading bridge rules when in switchdev +mode. Linux bridge FDBs are automatically offloaded when mlx5 switchdev +representor is attached to bridge. + +- Change device to switchdev mode:: + + $ devlink dev eswitch set pci/0000:06:00.0 mode switchdev + +- Attach mlx5 switchdev representor 'enp8s0f0' to bridge netdev 'bridge1':: + + $ ip link set enp8s0f0 master bridge1 + mlx5 subfunction ================ mlx5 supports subfunction management using devlink port (see :ref:`Documentation/networking/devlink/devlink-port.rst `) interface. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c index de7a68488a9d..b34e9cb686e3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c @@ -8,6 +8,13 @@ #include "esw/bridge.h" #include "en_rep.h" +struct mlx5_bridge_switchdev_fdb_work { + struct work_struct work; + struct switchdev_notifier_fdb_info fdb_info; + struct net_device *dev; + bool add; +}; + static int mlx5_esw_bridge_port_changeupper(struct notifier_block *nb, void *ptr) { struct mlx5_esw_bridge_offloads *br_offloads = container_of(nb, @@ -65,6 +72,124 @@ static int mlx5_esw_bridge_switchdev_port_event(struct notifier_block *nb, return notifier_from_errno(err); } +static void +mlx5_esw_bridge_cleanup_switchdev_fdb_work(struct mlx5_bridge_switchdev_fdb_work *fdb_work) +{ + dev_put(fdb_work->dev); + kfree(fdb_work->fdb_info.addr); + kfree(fdb_work); +} + +static void mlx5_esw_bridge_switchdev_fdb_event_work(struct work_struct *work) +{ + struct mlx5_bridge_switchdev_fdb_work *fdb_work = + container_of(work, struct mlx5_bridge_switchdev_fdb_work, work); + struct switchdev_notifier_fdb_info *fdb_info = + &fdb_work->fdb_info; + struct net_device *dev = fdb_work->dev; + struct mlx5e_rep_priv *rpriv; + struct mlx5_eswitch *esw; + struct mlx5_vport *vport; + struct mlx5e_priv *priv; + u16 vport_num; + + rtnl_lock(); + + priv = netdev_priv(dev); + rpriv = priv->ppriv; + vport_num = rpriv->rep->vport; + esw = priv->mdev->priv.eswitch; + vport = mlx5_eswitch_get_vport(esw, vport_num); + if (IS_ERR(vport)) + goto out; + + if (fdb_work->add) + mlx5_esw_bridge_fdb_create(dev, esw, vport, fdb_info); + else + mlx5_esw_bridge_fdb_remove(dev, esw, vport, fdb_info); + +out: + rtnl_unlock(); + mlx5_esw_bridge_cleanup_switchdev_fdb_work(fdb_work); +} + +static struct mlx5_bridge_switchdev_fdb_work * +mlx5_esw_bridge_init_switchdev_fdb_work(struct net_device *dev, bool add, + struct switchdev_notifier_fdb_info *fdb_info) +{ + struct mlx5_bridge_switchdev_fdb_work *work; + u8 *addr; + + work = kzalloc(sizeof(*work), GFP_ATOMIC); + if (!work) + return ERR_PTR(-ENOMEM); + + INIT_WORK(&work->work, mlx5_esw_bridge_switchdev_fdb_event_work); + memcpy(&work->fdb_info, fdb_info, sizeof(work->fdb_info)); + + addr = kzalloc(ETH_ALEN, GFP_ATOMIC); + if (!addr) { + kfree(work); + return ERR_PTR(-ENOMEM); + } + ether_addr_copy(addr, fdb_info->addr); + work->fdb_info.addr = addr; + + dev_hold(dev); + work->dev = dev; + work->add = add; + return work; +} + +static int mlx5_esw_bridge_switchdev_event(struct notifier_block *nb, + unsigned long event, void *ptr) +{ + struct mlx5_esw_bridge_offloads *br_offloads = container_of(nb, + struct mlx5_esw_bridge_offloads, + nb); + struct net_device *dev = switchdev_notifier_info_to_dev(ptr); + struct switchdev_notifier_fdb_info *fdb_info; + struct mlx5_bridge_switchdev_fdb_work *work; + struct switchdev_notifier_info *info = ptr; + struct net_device *upper; + struct mlx5e_priv *priv; + + if (!mlx5e_eswitch_rep(dev)) + return NOTIFY_DONE; + priv = netdev_priv(dev); + if (priv->mdev->priv.eswitch != br_offloads->esw) + return NOTIFY_DONE; + + upper = netdev_master_upper_dev_get_rcu(dev); + if (!upper) + return NOTIFY_DONE; + if (!netif_is_bridge_master(upper)) + return NOTIFY_DONE; + + switch (event) { + case SWITCHDEV_FDB_ADD_TO_DEVICE: + case SWITCHDEV_FDB_DEL_TO_DEVICE: + fdb_info = container_of(info, + struct switchdev_notifier_fdb_info, + info); + + work = mlx5_esw_bridge_init_switchdev_fdb_work(dev, + event == SWITCHDEV_FDB_ADD_TO_DEVICE, + fdb_info); + if (IS_ERR(work)) { + WARN_ONCE(1, "Failed to init switchdev work, err=%ld", + PTR_ERR(work)); + return notifier_from_errno(PTR_ERR(work)); + } + + queue_work(br_offloads->wq, &work->work); + break; + default: + break; + } + return NOTIFY_DONE; +} + void mlx5e_rep_bridge_init(struct mlx5e_priv *priv) { struct mlx5_esw_bridge_offloads *br_offloads; @@ -81,13 +206,34 @@ void mlx5e_rep_bridge_init(struct mlx5e_priv *priv) return; } + br_offloads->wq = alloc_ordered_workqueue("mlx5_bridge_wq", 0); + if (!br_offloads->wq) { + esw_warn(mdev, "Failed to allocate bridge offloads workqueue\n"); + goto err_alloc_wq; + } + + br_offloads->nb.notifier_call = mlx5_esw_bridge_switchdev_event; + err = register_switchdev_notifier(&br_offloads->nb); + if (err) { + esw_warn(mdev, "Failed to register switchdev notifier (err=%d)\n", err); + goto err_register_swdev; + } + br_offloads->netdev_nb.notifier_call = mlx5_esw_bridge_switchdev_port_event; err = register_netdevice_notifier(&br_offloads->netdev_nb); if (err) { esw_warn(mdev, "Failed to register bridge offloads netdevice notifier (err=%d)\n", err); - mlx5_esw_bridge_cleanup(esw); + goto err_register_netdev; } + return; + +err_register_netdev: + unregister_switchdev_notifier(&br_offloads->nb); +err_register_swdev: + destroy_workqueue(br_offloads->wq); +err_alloc_wq: + mlx5_esw_bridge_cleanup(esw); } void mlx5e_rep_bridge_cleanup(struct mlx5e_priv *priv) @@ -102,6 +248,8 @@ void mlx5e_rep_bridge_cleanup(struct mlx5e_priv *priv) return; unregister_netdevice_notifier(&br_offloads->netdev_nb); + unregister_switchdev_notifier(&br_offloads->nb); + destroy_workqueue(br_offloads->wq); rtnl_lock(); mlx5_esw_bridge_cleanup(esw); rtnl_unlock(); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c index b503562f97d0..6dd47891189c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c @@ -3,6 +3,7 @@ #include #include +#include #include #include "bridge.h" #include "eswitch.h" @@ -21,15 +22,53 @@ enum { MLX5_ESW_BRIDGE_LEVEL_EGRESS_TABLE, }; +struct mlx5_esw_bridge_fdb_key { + unsigned char addr[ETH_ALEN]; + u16 vid; +}; + +struct mlx5_esw_bridge_fdb_entry { + struct mlx5_esw_bridge_fdb_key key; + struct rhash_head ht_node; + struct list_head list; + u16 vport_num; + + struct mlx5_flow_handle *ingress_handle; + struct mlx5_flow_handle *egress_handle; +}; + +static const struct rhashtable_params fdb_ht_params = { + .key_offset = offsetof(struct mlx5_esw_bridge_fdb_entry, key), + .key_len = sizeof(struct mlx5_esw_bridge_fdb_key), + .head_offset = offsetof(struct mlx5_esw_bridge_fdb_entry, ht_node), + .automatic_shrinking = true, +}; + struct mlx5_esw_bridge { int ifindex; int refcnt; struct list_head list; + struct mlx5_esw_bridge_offloads *br_offloads; + + struct list_head fdb_list; + struct rhashtable fdb_ht; struct mlx5_flow_table *egress_ft; struct mlx5_flow_group *egress_mac_fg; }; +static void +mlx5_esw_bridge_fdb_offload_notify(struct net_device *dev, const unsigned char *addr, u16 vid, + unsigned long val) +{ + struct switchdev_notifier_fdb_info send_info; + + send_info.addr = addr; + send_info.vid = vid; + send_info.offloaded = true; + call_switchdev_notifiers(val, dev, &send_info.info, NULL); +} + static struct mlx5_flow_table * mlx5_esw_bridge_table_create(int max_fte, u32 level, struct mlx5_eswitch *esw) { @@ -128,6 +167,9 @@ mlx5_esw_bridge_ingress_table_init(struct mlx5_esw_bridge_offloads *br_offloads) struct mlx5_flow_group *mac_fg; int err; + if (!mlx5_eswitch_vport_match_metadata_enabled(br_offloads->esw)) + return -EOPNOTSUPP; + ingress_ft = mlx5_esw_bridge_table_create(MLX5_ESW_BRIDGE_INGRESS_TABLE_SIZE, MLX5_ESW_BRIDGE_LEVEL_INGRESS_TABLE, br_offloads->esw); @@ -194,6 +236,82 @@ mlx5_esw_bridge_egress_table_cleanup(struct mlx5_esw_bridge *bridge) mlx5_destroy_flow_table(bridge->egress_ft); } +static struct mlx5_flow_handle * +mlx5_esw_bridge_ingress_flow_create(u16 vport_num, const unsigned char *addr, u16 vid, + struct mlx5_esw_bridge *bridge) +{ + struct mlx5_esw_bridge_offloads *br_offloads = bridge->br_offloads; + struct mlx5_flow_destination dest = { + .type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE, + .ft = bridge->egress_ft, + }; + struct mlx5_flow_act flow_act = { + .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + .flags = FLOW_ACT_NO_APPEND, + }; + struct mlx5_flow_spec *rule_spec; + struct mlx5_flow_handle *handle; + u8 *smac_v, *smac_c; + + rule_spec = kvzalloc(sizeof(*rule_spec), GFP_KERNEL); + if (!rule_spec) + return ERR_PTR(-ENOMEM); + + rule_spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS_2; + + smac_v = MLX5_ADDR_OF(fte_match_param, rule_spec->match_value, + outer_headers.smac_47_16); + ether_addr_copy(smac_v, addr); + smac_c = MLX5_ADDR_OF(fte_match_param, rule_spec->match_criteria, + outer_headers.smac_47_16); + eth_broadcast_addr(smac_c); + + MLX5_SET(fte_match_param, rule_spec->match_criteria, + misc_parameters_2.metadata_reg_c_0, mlx5_eswitch_get_vport_metadata_mask()); + MLX5_SET(fte_match_param, rule_spec->match_value, misc_parameters_2.metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_for_match(br_offloads->esw, vport_num)); + + handle = mlx5_add_flow_rules(br_offloads->ingress_ft, rule_spec, &flow_act, &dest, 1); + + kvfree(rule_spec); + return handle; +} + +static struct mlx5_flow_handle * +mlx5_esw_bridge_egress_flow_create(u16 vport_num, const unsigned char *addr, u16 vid, + struct mlx5_esw_bridge *bridge) +{ + struct mlx5_flow_destination dest = { + .type = MLX5_FLOW_DESTINATION_TYPE_VPORT, + .vport.num = vport_num, + }; + struct mlx5_flow_act flow_act = { + .action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST, + .flags = FLOW_ACT_NO_APPEND, + }; + struct mlx5_flow_spec *rule_spec; + struct mlx5_flow_handle *handle; + u8 *dmac_v, *dmac_c; + + rule_spec = kvzalloc(sizeof(*rule_spec), GFP_KERNEL); + if (!rule_spec) + return ERR_PTR(-ENOMEM); + + rule_spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS; + + dmac_v = MLX5_ADDR_OF(fte_match_param, rule_spec->match_value, + outer_headers.dmac_47_16); + ether_addr_copy(dmac_v, addr); + dmac_c = MLX5_ADDR_OF(fte_match_param, rule_spec->match_criteria, + outer_headers.dmac_47_16); + eth_broadcast_addr(dmac_c); + + handle = mlx5_add_flow_rules(bridge->egress_ft, rule_spec, &flow_act, &dest, 1); + + kvfree(rule_spec); + return handle; +} + static struct mlx5_esw_bridge *mlx5_esw_bridge_create(int ifindex, struct mlx5_esw_bridge_offloads *br_offloads) { @@ -204,16 +322,24 @@ static struct mlx5_esw_bridge *mlx5_esw_bridge_create(int ifindex, if (!bridge) return ERR_PTR(-ENOMEM); + bridge->br_offloads = br_offloads; err = mlx5_esw_bridge_egress_table_init(br_offloads, bridge); if (err) goto err_egress_tbl; + err = rhashtable_init(&bridge->fdb_ht, &fdb_ht_params); + if (err) + goto err_fdb_ht; + + INIT_LIST_HEAD(&bridge->fdb_list); bridge->ifindex = ifindex; bridge->refcnt = 1; list_add(&bridge->list, &br_offloads->bridges); return bridge; +err_fdb_ht: + mlx5_esw_bridge_egress_table_cleanup(bridge); err_egress_tbl: kvfree(bridge); return ERR_PTR(err); @@ -232,6 +358,7 @@ static void mlx5_esw_bridge_put(struct mlx5_esw_bridge_offloads *br_offloads, mlx5_esw_bridge_egress_table_cleanup(bridge); list_del(&bridge->list); + rhashtable_destroy(&bridge->fdb_ht); kvfree(bridge); if (list_empty(&br_offloads->bridges)) @@ -265,6 +392,69 @@ mlx5_esw_bridge_lookup(int ifindex, struct mlx5_esw_bridge_offloads *br_offloads return bridge; } +static void +mlx5_esw_bridge_fdb_entry_cleanup(struct mlx5_esw_bridge_fdb_entry *entry, + struct mlx5_esw_bridge *bridge) +{ + rhashtable_remove_fast(&bridge->fdb_ht, &entry->ht_node, fdb_ht_params); + mlx5_del_flow_rules(entry->egress_handle); + mlx5_del_flow_rules(entry->ingress_handle); + list_del(&entry->list); + kvfree(entry); +} + +static struct mlx5_esw_bridge_fdb_entry * +mlx5_esw_bridge_fdb_entry_init(struct net_device *dev, u16 vport_num, const unsigned char *addr, + u16 vid, struct mlx5_eswitch *esw, struct mlx5_esw_bridge *bridge) +{ + struct mlx5_esw_bridge_fdb_entry *entry; + struct mlx5_flow_handle *handle; + int err; + + entry = kvzalloc(sizeof(*entry), GFP_KERNEL); + if (!entry) + return ERR_PTR(-ENOMEM); + + ether_addr_copy(entry->key.addr, addr); + entry->key.vid = vid; + entry->vport_num = vport_num; + + handle = mlx5_esw_bridge_ingress_flow_create(vport_num, addr, vid, bridge); + if (IS_ERR(handle)) { + err = PTR_ERR(handle); + esw_warn(esw->dev, "Failed to create ingress flow(vport=%u,err=%d)\n", + vport_num, err); + goto err_ingress_flow_create; + } + entry->ingress_handle = handle; + + handle = mlx5_esw_bridge_egress_flow_create(vport_num, addr, vid, bridge); + if (IS_ERR(handle)) { + err = PTR_ERR(handle); + esw_warn(esw->dev, "Failed to create egress flow(vport=%u,err=%d)\n", + vport_num, err); + goto err_egress_flow_create; + } + entry->egress_handle = handle; + + err = rhashtable_insert_fast(&bridge->fdb_ht, &entry->ht_node, fdb_ht_params); + if (err) { + esw_warn(esw->dev, "Failed to insert FDB flow(vport=%u,err=%d)\n", vport_num, err); + goto err_ht_init; + } + + list_add(&entry->list, &bridge->fdb_list); + return entry; + +err_ht_init: + mlx5_del_flow_rules(entry->egress_handle); +err_egress_flow_create: + mlx5_del_flow_rules(entry->ingress_handle); +err_ingress_flow_create: + kvfree(entry); + return ERR_PTR(err); +} + static int mlx5_esw_bridge_vport_init(struct mlx5_esw_bridge *bridge, struct mlx5_vport *vport) { @@ -275,7 +465,14 @@ static int mlx5_esw_bridge_vport_init(struct mlx5_esw_bridge *bridge, static int mlx5_esw_bridge_vport_cleanup(struct mlx5_esw_bridge_offloads *br_offloads, struct mlx5_vport *vport) { - mlx5_esw_bridge_put(br_offloads, vport->bridge); + struct mlx5_esw_bridge *bridge = vport->bridge; + struct mlx5_esw_bridge_fdb_entry *entry, *tmp; + + list_for_each_entry_safe(entry, tmp, &bridge->fdb_list, list) + if (entry->vport_num == vport->vport) + mlx5_esw_bridge_fdb_entry_cleanup(entry, bridge); + + mlx5_esw_bridge_put(br_offloads, bridge); vport->bridge = NULL; return 0; } @@ -299,11 +496,13 @@ int mlx5_esw_bridge_vport_link(int ifindex, struct mlx5_esw_bridge_offloads *br_ int mlx5_esw_bridge_vport_unlink(int ifindex, struct mlx5_esw_bridge_offloads *br_offloads, struct mlx5_vport *vport, struct netlink_ext_ack *extack) { - if (!vport->bridge) { + struct mlx5_esw_bridge *bridge = vport->bridge; + + if (!bridge) { NL_SET_ERR_MSG_MOD(extack, "Port is not attached to any bridge"); return -EINVAL; } - if (vport->bridge->ifindex != ifindex) { + if (bridge->ifindex != ifindex) { NL_SET_ERR_MSG_MOD(extack, "Port is attached to another bridge"); return -EINVAL; } @@ -311,6 +510,55 @@ int mlx5_esw_bridge_vport_unlink(int ifindex, struct mlx5_esw_bridge_offloads *b return mlx5_esw_bridge_vport_cleanup(br_offloads, vport); } +void mlx5_esw_bridge_fdb_create(struct net_device *dev, struct mlx5_eswitch *esw, + struct mlx5_vport *vport, + struct switchdev_notifier_fdb_info *fdb_info) +{ + struct mlx5_esw_bridge *bridge = vport->bridge; + struct mlx5_esw_bridge_fdb_entry *entry; + u16 vport_num = vport->vport; + + if (!bridge) { + esw_info(esw->dev, "Vport is not assigned to bridge (vport=%u)\n", vport_num); + return; + } + + entry = mlx5_esw_bridge_fdb_entry_init(dev, vport_num, fdb_info->addr, fdb_info->vid, + esw, bridge); + if (IS_ERR(entry)) + return; + + mlx5_esw_bridge_fdb_offload_notify(dev, entry->key.addr, entry->key.vid, + SWITCHDEV_FDB_OFFLOADED); +} + +void mlx5_esw_bridge_fdb_remove(struct net_device *dev, struct mlx5_eswitch *esw, + struct mlx5_vport *vport, + struct switchdev_notifier_fdb_info *fdb_info) +{ + struct mlx5_esw_bridge *bridge = vport->bridge; + struct mlx5_esw_bridge_fdb_entry *entry; + struct mlx5_esw_bridge_fdb_key key; + u16 vport_num = vport->vport; + + if (!bridge) { + esw_warn(esw->dev, "Vport is not assigned to bridge (vport=%u)\n", vport_num); + return; + } + + ether_addr_copy(key.addr, fdb_info->addr); + key.vid = fdb_info->vid; + entry = rhashtable_lookup_fast(&bridge->fdb_ht, &key, fdb_ht_params); + if (!entry) { + esw_warn(esw->dev, + "FDB entry with specified key not found (MAC=%pM,vid=%u,vport=%u)\n", + key.addr, key.vid, vport_num); + return; + } + + mlx5_esw_bridge_fdb_entry_cleanup(entry, bridge); +} + static void mlx5_esw_bridge_flush(struct mlx5_esw_bridge_offloads *br_offloads) { struct mlx5_eswitch *esw = br_offloads->esw; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h index 319b6f1db0ba..cec118c0b733 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h @@ -10,11 +10,14 @@ struct mlx5_flow_table; struct mlx5_flow_group; +struct workqueue_struct; struct mlx5_esw_bridge_offloads { struct mlx5_eswitch *esw; struct list_head bridges; struct notifier_block netdev_nb; + struct notifier_block nb; + struct workqueue_struct *wq; struct mlx5_flow_table *ingress_ft; struct mlx5_flow_group *ingress_mac_fg; @@ -26,5 +29,11 @@ int mlx5_esw_bridge_vport_link(int ifindex, struct mlx5_esw_bridge_offloads *br_ struct mlx5_vport *vport, struct netlink_ext_ack *extack); int mlx5_esw_bridge_vport_unlink(int ifindex, struct mlx5_esw_bridge_offloads *br_offloads, struct mlx5_vport *vport, struct netlink_ext_ack *extack); +void mlx5_esw_bridge_fdb_create(struct net_device *dev, struct mlx5_eswitch *esw, + struct mlx5_vport *vport, + struct switchdev_notifier_fdb_info *fdb_info); +void mlx5_esw_bridge_fdb_remove(struct net_device *dev, struct mlx5_eswitch *esw, + struct mlx5_vport *vport, + struct switchdev_notifier_fdb_info *fdb_info); #endif /* __MLX5_ESW_BRIDGE_H__ */ -- cgit v1.2.3 From ffc89ee5e5e88aa5924034c28d5e5aae75229e0f Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Fri, 12 Mar 2021 13:37:46 +0200 Subject: net/mlx5: Bridge, match FDB entry vlan tag Add support for FDB vlan-tagged entries. Extend ingress and egress flow tables with flow groups to match packet vlan tag. Modify the flow creation code to include vlan tag, if vlan is configured on port and vlan configuration is supported for offload. Signed-off-by: Vlad Buslov Reviewed-by: Jianbo Liu Signed-off-by: Saeed Mahameed --- .../device_drivers/ethernet/mellanox/mlx5.rst | 9 + .../net/ethernet/mellanox/mlx5/core/esw/bridge.c | 181 +++++++++++++++++++-- .../net/ethernet/mellanox/mlx5/core/esw/bridge.h | 1 + 3 files changed, 181 insertions(+), 10 deletions(-) (limited to 'Documentation') diff --git a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst index ea32136b30e7..a0c91fe5574d 100644 --- a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst +++ b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst @@ -232,6 +232,15 @@ representor is attached to bridge. $ ip link set enp8s0f0 master bridge1 +VLANs +----- +Following bridge VLAN functions are supported by mlx5: + +- VLAN filtering (including multiple VLANs per port):: + + $ ip link set bridge1 type bridge vlan_filtering 1 + $ bridge vlan add dev enp8s0f0 vid 2-3 + mlx5 subfunction ================ mlx5 supports subfunction management using devlink port (see :ref:`Documentation/networking/devlink/devlink-port.rst `) interface. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c index eec5897c6b79..e1467dbe80dc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c @@ -12,11 +12,17 @@ #include "fs_core.h" #define MLX5_ESW_BRIDGE_INGRESS_TABLE_SIZE 64000 -#define MLX5_ESW_BRIDGE_INGRESS_TABLE_MAC_GRP_IDX_FROM 0 +#define MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_IDX_FROM 0 +#define MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_IDX_TO (MLX5_ESW_BRIDGE_INGRESS_TABLE_SIZE / 2 - 1) +#define MLX5_ESW_BRIDGE_INGRESS_TABLE_MAC_GRP_IDX_FROM \ + (MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_IDX_TO + 1) #define MLX5_ESW_BRIDGE_INGRESS_TABLE_MAC_GRP_IDX_TO (MLX5_ESW_BRIDGE_INGRESS_TABLE_SIZE - 1) #define MLX5_ESW_BRIDGE_EGRESS_TABLE_SIZE 64000 -#define MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_IDX_FROM 0 +#define MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_IDX_FROM 0 +#define MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_IDX_TO (MLX5_ESW_BRIDGE_EGRESS_TABLE_SIZE / 2 - 1) +#define MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_IDX_FROM \ + (MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_IDX_TO + 1) #define MLX5_ESW_BRIDGE_EGRESS_TABLE_MAC_GRP_IDX_TO (MLX5_ESW_BRIDGE_EGRESS_TABLE_SIZE - 1) enum { @@ -79,6 +85,7 @@ struct mlx5_esw_bridge { struct xarray vports; struct mlx5_flow_table *egress_ft; + struct mlx5_flow_group *egress_vlan_fg; struct mlx5_flow_group *egress_mac_fg; unsigned long ageing_time; u32 flags; @@ -120,6 +127,44 @@ mlx5_esw_bridge_table_create(int max_fte, u32 level, struct mlx5_eswitch *esw) return fdb; } +static struct mlx5_flow_group * +mlx5_esw_bridge_ingress_vlan_fg_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *ingress_ft) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_group *fg; + u32 *in, *match; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return ERR_PTR(-ENOMEM); + + MLX5_SET(create_flow_group_in, in, match_criteria_enable, + MLX5_MATCH_OUTER_HEADERS | MLX5_MATCH_MISC_PARAMETERS_2); + match = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + + MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.smac_47_16); + MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.smac_15_0); + MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.cvlan_tag); + MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.first_vid); + + MLX5_SET(fte_match_param, match, misc_parameters_2.metadata_reg_c_0, + mlx5_eswitch_get_vport_metadata_mask()); + + MLX5_SET(create_flow_group_in, in, start_flow_index, + MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_IDX_FROM); + MLX5_SET(create_flow_group_in, in, end_flow_index, + MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_IDX_TO); + + fg = mlx5_create_flow_group(ingress_ft, in); + kvfree(in); + if (IS_ERR(fg)) + esw_warn(esw->dev, + "Failed to create VLAN flow group for bridge ingress table (err=%ld)\n", + PTR_ERR(fg)); + + return fg; +} + static struct mlx5_flow_group * mlx5_esw_bridge_ingress_mac_fg_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *ingress_ft) { @@ -149,13 +194,46 @@ mlx5_esw_bridge_ingress_mac_fg_create(struct mlx5_eswitch *esw, struct mlx5_flow fg = mlx5_create_flow_group(ingress_ft, in); if (IS_ERR(fg)) esw_warn(esw->dev, - "Failed to create bridge ingress table MAC flow group (err=%ld)\n", + "Failed to create MAC flow group for bridge ingress table (err=%ld)\n", PTR_ERR(fg)); kvfree(in); return fg; } +static struct mlx5_flow_group * +mlx5_esw_bridge_egress_vlan_fg_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *egress_ft) +{ + int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in); + struct mlx5_flow_group *fg; + u32 *in, *match; + + in = kvzalloc(inlen, GFP_KERNEL); + if (!in) + return ERR_PTR(-ENOMEM); + + MLX5_SET(create_flow_group_in, in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS); + match = MLX5_ADDR_OF(create_flow_group_in, in, match_criteria); + + MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.dmac_47_16); + MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.dmac_15_0); + MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.cvlan_tag); + MLX5_SET_TO_ONES(fte_match_param, match, outer_headers.first_vid); + + MLX5_SET(create_flow_group_in, in, start_flow_index, + MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_IDX_FROM); + MLX5_SET(create_flow_group_in, in, end_flow_index, + MLX5_ESW_BRIDGE_EGRESS_TABLE_VLAN_GRP_IDX_TO); + + fg = mlx5_create_flow_group(egress_ft, in); + if (IS_ERR(fg)) + esw_warn(esw->dev, + "Failed to create VLAN flow group for bridge egress table (err=%ld)\n", + PTR_ERR(fg)); + kvfree(in); + return fg; +} + static struct mlx5_flow_group * mlx5_esw_bridge_egress_mac_fg_create(struct mlx5_eswitch *esw, struct mlx5_flow_table *egress_ft) { @@ -190,8 +268,8 @@ mlx5_esw_bridge_egress_mac_fg_create(struct mlx5_eswitch *esw, struct mlx5_flow_ static int mlx5_esw_bridge_ingress_table_init(struct mlx5_esw_bridge_offloads *br_offloads) { + struct mlx5_flow_group *mac_fg, *vlan_fg; struct mlx5_flow_table *ingress_ft; - struct mlx5_flow_group *mac_fg; int err; if (!mlx5_eswitch_vport_match_metadata_enabled(br_offloads->esw)) @@ -203,6 +281,12 @@ mlx5_esw_bridge_ingress_table_init(struct mlx5_esw_bridge_offloads *br_offloads) if (IS_ERR(ingress_ft)) return PTR_ERR(ingress_ft); + vlan_fg = mlx5_esw_bridge_ingress_vlan_fg_create(br_offloads->esw, ingress_ft); + if (IS_ERR(vlan_fg)) { + err = PTR_ERR(vlan_fg); + goto err_vlan_fg; + } + mac_fg = mlx5_esw_bridge_ingress_mac_fg_create(br_offloads->esw, ingress_ft); if (IS_ERR(mac_fg)) { err = PTR_ERR(mac_fg); @@ -210,10 +294,13 @@ mlx5_esw_bridge_ingress_table_init(struct mlx5_esw_bridge_offloads *br_offloads) } br_offloads->ingress_ft = ingress_ft; + br_offloads->ingress_vlan_fg = vlan_fg; br_offloads->ingress_mac_fg = mac_fg; return 0; err_mac_fg: + mlx5_destroy_flow_group(vlan_fg); +err_vlan_fg: mlx5_destroy_flow_table(ingress_ft); return err; } @@ -223,6 +310,8 @@ mlx5_esw_bridge_ingress_table_cleanup(struct mlx5_esw_bridge_offloads *br_offloa { mlx5_destroy_flow_group(br_offloads->ingress_mac_fg); br_offloads->ingress_mac_fg = NULL; + mlx5_destroy_flow_group(br_offloads->ingress_vlan_fg); + br_offloads->ingress_vlan_fg = NULL; mlx5_destroy_flow_table(br_offloads->ingress_ft); br_offloads->ingress_ft = NULL; } @@ -231,8 +320,8 @@ static int mlx5_esw_bridge_egress_table_init(struct mlx5_esw_bridge_offloads *br_offloads, struct mlx5_esw_bridge *bridge) { + struct mlx5_flow_group *mac_fg, *vlan_fg; struct mlx5_flow_table *egress_ft; - struct mlx5_flow_group *mac_fg; int err; egress_ft = mlx5_esw_bridge_table_create(MLX5_ESW_BRIDGE_EGRESS_TABLE_SIZE, @@ -241,6 +330,12 @@ mlx5_esw_bridge_egress_table_init(struct mlx5_esw_bridge_offloads *br_offloads, if (IS_ERR(egress_ft)) return PTR_ERR(egress_ft); + vlan_fg = mlx5_esw_bridge_egress_vlan_fg_create(br_offloads->esw, egress_ft); + if (IS_ERR(vlan_fg)) { + err = PTR_ERR(vlan_fg); + goto err_vlan_fg; + } + mac_fg = mlx5_esw_bridge_egress_mac_fg_create(br_offloads->esw, egress_ft); if (IS_ERR(mac_fg)) { err = PTR_ERR(mac_fg); @@ -248,10 +343,13 @@ mlx5_esw_bridge_egress_table_init(struct mlx5_esw_bridge_offloads *br_offloads, } bridge->egress_ft = egress_ft; + bridge->egress_vlan_fg = vlan_fg; bridge->egress_mac_fg = mac_fg; return 0; err_mac_fg: + mlx5_destroy_flow_group(vlan_fg); +err_vlan_fg: mlx5_destroy_flow_table(egress_ft); return err; } @@ -260,12 +358,14 @@ static void mlx5_esw_bridge_egress_table_cleanup(struct mlx5_esw_bridge *bridge) { mlx5_destroy_flow_group(bridge->egress_mac_fg); + mlx5_destroy_flow_group(bridge->egress_vlan_fg); mlx5_destroy_flow_table(bridge->egress_ft); } static struct mlx5_flow_handle * -mlx5_esw_bridge_ingress_flow_create(u16 vport_num, const unsigned char *addr, u16 vid, - u32 counter_id, struct mlx5_esw_bridge *bridge) +mlx5_esw_bridge_ingress_flow_create(u16 vport_num, const unsigned char *addr, + struct mlx5_esw_bridge_vlan *vlan, u32 counter_id, + struct mlx5_esw_bridge *bridge) { struct mlx5_esw_bridge_offloads *br_offloads = bridge->br_offloads; struct mlx5_flow_act flow_act = { @@ -295,6 +395,17 @@ mlx5_esw_bridge_ingress_flow_create(u16 vport_num, const unsigned char *addr, u1 MLX5_SET(fte_match_param, rule_spec->match_value, misc_parameters_2.metadata_reg_c_0, mlx5_eswitch_get_vport_metadata_for_match(br_offloads->esw, vport_num)); + if (vlan) { + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, + outer_headers.cvlan_tag); + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_value, + outer_headers.cvlan_tag); + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, + outer_headers.first_vid); + MLX5_SET(fte_match_param, rule_spec->match_value, outer_headers.first_vid, + vlan->vid); + } + dests[0].type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE; dests[0].ft = bridge->egress_ft; dests[1].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER; @@ -308,7 +419,8 @@ mlx5_esw_bridge_ingress_flow_create(u16 vport_num, const unsigned char *addr, u1 } static struct mlx5_flow_handle * -mlx5_esw_bridge_egress_flow_create(u16 vport_num, const unsigned char *addr, u16 vid, +mlx5_esw_bridge_egress_flow_create(u16 vport_num, const unsigned char *addr, + struct mlx5_esw_bridge_vlan *vlan, struct mlx5_esw_bridge *bridge) { struct mlx5_flow_destination dest = { @@ -336,6 +448,17 @@ mlx5_esw_bridge_egress_flow_create(u16 vport_num, const unsigned char *addr, u16 outer_headers.dmac_47_16); eth_broadcast_addr(dmac_c); + if (vlan) { + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, + outer_headers.cvlan_tag); + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_value, + outer_headers.cvlan_tag); + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, + outer_headers.first_vid); + MLX5_SET(fte_match_param, rule_spec->match_value, outer_headers.first_vid, + vlan->vid); + } + handle = mlx5_add_flow_rules(bridge->egress_ft, rule_spec, &flow_act, &dest, 1); kvfree(rule_spec); @@ -517,17 +640,55 @@ static void mlx5_esw_bridge_port_vlans_flush(struct mlx5_esw_bridge_port *port) mlx5_esw_bridge_vlan_cleanup(port, vlan); } +static struct mlx5_esw_bridge_vlan * +mlx5_esw_bridge_port_vlan_lookup(u16 vid, u16 vport_num, struct mlx5_esw_bridge *bridge, + struct mlx5_eswitch *esw) +{ + struct mlx5_esw_bridge_port *port; + struct mlx5_esw_bridge_vlan *vlan; + + port = mlx5_esw_bridge_port_lookup(vport_num, bridge); + if (!port) { + /* FDB is added asynchronously on wq while port might have been deleted + * concurrently. Report on 'info' logging level and skip the FDB offload. + */ + esw_info(esw->dev, "Failed to lookup bridge port (vport=%u)\n", vport_num); + return ERR_PTR(-EINVAL); + } + + vlan = mlx5_esw_bridge_vlan_lookup(vid, port); + if (!vlan) { + /* FDB is added asynchronously on wq while vlan might have been deleted + * concurrently. Report on 'info' logging level and skip the FDB offload. + */ + esw_info(esw->dev, "Failed to lookup bridge port vlan metadata (vport=%u)\n", + vport_num); + return ERR_PTR(-EINVAL); + } + + return vlan; +} + static struct mlx5_esw_bridge_fdb_entry * mlx5_esw_bridge_fdb_entry_init(struct net_device *dev, u16 vport_num, const unsigned char *addr, u16 vid, bool added_by_user, struct mlx5_eswitch *esw, struct mlx5_esw_bridge *bridge) { + struct mlx5_esw_bridge_vlan *vlan = NULL; struct mlx5_esw_bridge_fdb_entry *entry; struct mlx5_flow_handle *handle; struct mlx5_fc *counter; struct mlx5e_priv *priv; int err; + if (bridge->flags & MLX5_ESW_BRIDGE_VLAN_FILTERING_FLAG && vid) { + vlan = mlx5_esw_bridge_port_vlan_lookup(vid, vport_num, bridge, esw); + if (IS_ERR(vlan)) + return ERR_CAST(vlan); + if (vlan->flags & (BRIDGE_VLAN_INFO_PVID | BRIDGE_VLAN_INFO_UNTAGGED)) + return ERR_PTR(-EOPNOTSUPP); /* can't offload vlan push/pop */ + } + priv = netdev_priv(dev); entry = kvzalloc(sizeof(*entry), GFP_KERNEL); if (!entry) @@ -548,7 +709,7 @@ mlx5_esw_bridge_fdb_entry_init(struct net_device *dev, u16 vport_num, const unsi } entry->ingress_counter = counter; - handle = mlx5_esw_bridge_ingress_flow_create(vport_num, addr, vid, mlx5_fc_id(counter), + handle = mlx5_esw_bridge_ingress_flow_create(vport_num, addr, vlan, mlx5_fc_id(counter), bridge); if (IS_ERR(handle)) { err = PTR_ERR(handle); @@ -558,7 +719,7 @@ mlx5_esw_bridge_fdb_entry_init(struct net_device *dev, u16 vport_num, const unsi } entry->ingress_handle = handle; - handle = mlx5_esw_bridge_egress_flow_create(vport_num, addr, vid, bridge); + handle = mlx5_esw_bridge_egress_flow_create(vport_num, addr, vlan, bridge); if (IS_ERR(handle)) { err = PTR_ERR(handle); esw_warn(esw->dev, "Failed to create egress flow(vport=%u,err=%d)\n", diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h index 276ed0392607..bedbda57cdb3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h @@ -22,6 +22,7 @@ struct mlx5_esw_bridge_offloads { struct delayed_work update_work; struct mlx5_flow_table *ingress_ft; + struct mlx5_flow_group *ingress_vlan_fg; struct mlx5_flow_group *ingress_mac_fg; }; -- cgit v1.2.3 From 36e55079e54955a70b2c340eedd6125f794a911d Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Fri, 2 Apr 2021 15:09:06 +0300 Subject: net/mlx5: Bridge, support pvid and untagged vlan configurations Implement support for pushing vlan header into untagged packet on ingress of port that has pvid configured and support for popping vlan on egress of port that has the matching vlan configured as untagged. To support such configurations packet reformat contexts of {INSERT|REMOVE}_HEADER types are created per such vlan and saved to struct mlx5_esw_bridge_vlan which allows all FDB entries on particular vlan to share single packet reformat instance. When initializing FDB entries with pvid or untagged vlan type set its mlx5_flow_act->pkt_reformat action accordingly. Flush all flows when removing vlan from port. This is necessary because even though software bridge removes all FDB entries before removing their vlan, mlx5 bridge implementation deletes their corresponding flow entries from hardware in asynchronous workqueue task, which will cause firmware error if vlan packet reformat context is deleted before all flows that point to it. Signed-off-by: Vlad Buslov Reviewed-by: Jianbo Liu Signed-off-by: Saeed Mahameed --- .../device_drivers/ethernet/mellanox/mlx5.rst | 8 + .../net/ethernet/mellanox/mlx5/core/esw/bridge.c | 175 +++++++++++++++++++-- 2 files changed, 167 insertions(+), 16 deletions(-) (limited to 'Documentation') diff --git a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst index a0c91fe5574d..058882dca17b 100644 --- a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst +++ b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst @@ -241,6 +241,14 @@ Following bridge VLAN functions are supported by mlx5: $ ip link set bridge1 type bridge vlan_filtering 1 $ bridge vlan add dev enp8s0f0 vid 2-3 +- VLAN push on bridge ingress:: + + $ bridge vlan add dev enp8s0f0 vid 3 pvid + +- VLAN pop on bridge egress:: + + $ bridge vlan add dev enp8s0f0 vid 3 untagged + mlx5 subfunction ================ mlx5 supports subfunction management using devlink port (see :ref:`Documentation/networking/devlink/devlink-port.rst `) interface. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c index e1467dbe80dc..442a62ff7b43 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c @@ -6,6 +6,8 @@ #include #include #include +#include +#include #include #include "bridge.h" #include "eswitch.h" @@ -44,6 +46,7 @@ struct mlx5_esw_bridge_fdb_entry { struct rhash_head ht_node; struct net_device *dev; struct list_head list; + struct list_head vlan_list; u16 vport_num; u16 flags; @@ -63,6 +66,9 @@ static const struct rhashtable_params fdb_ht_params = { struct mlx5_esw_bridge_vlan { u16 vid; u16 flags; + struct list_head fdb_list; + struct mlx5_pkt_reformat *pkt_reformat_push; + struct mlx5_pkt_reformat *pkt_reformat_pop; }; struct mlx5_esw_bridge_port { @@ -117,6 +123,7 @@ mlx5_esw_bridge_table_create(int max_fte, u32 level, struct mlx5_eswitch *esw) return ERR_PTR(-ENOENT); } + ft_attr.flags = MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT; ft_attr.max_fte = max_fte; ft_attr.level = level; ft_attr.prio = FDB_BR_OFFLOAD; @@ -395,7 +402,10 @@ mlx5_esw_bridge_ingress_flow_create(u16 vport_num, const unsigned char *addr, MLX5_SET(fte_match_param, rule_spec->match_value, misc_parameters_2.metadata_reg_c_0, mlx5_eswitch_get_vport_metadata_for_match(br_offloads->esw, vport_num)); - if (vlan) { + if (vlan && vlan->pkt_reformat_push) { + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + flow_act.pkt_reformat = vlan->pkt_reformat_push; + } else if (vlan) { MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, outer_headers.cvlan_tag); MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_value, @@ -449,6 +459,11 @@ mlx5_esw_bridge_egress_flow_create(u16 vport_num, const unsigned char *addr, eth_broadcast_addr(dmac_c); if (vlan) { + if (vlan->pkt_reformat_pop) { + flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT; + flow_act.pkt_reformat = vlan->pkt_reformat_pop; + } + MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_criteria, outer_headers.cvlan_tag); MLX5_SET_TO_ONES(fte_match_param, rule_spec->match_value, @@ -597,8 +612,90 @@ mlx5_esw_bridge_vlan_lookup(u16 vid, struct mlx5_esw_bridge_port *port) return xa_load(&port->vlans, vid); } +static int +mlx5_esw_bridge_vlan_push_create(struct mlx5_esw_bridge_vlan *vlan, struct mlx5_eswitch *esw) +{ + struct { + __be16 h_vlan_proto; + __be16 h_vlan_TCI; + } vlan_hdr = { htons(ETH_P_8021Q), htons(vlan->vid) }; + struct mlx5_pkt_reformat_params reformat_params = {}; + struct mlx5_pkt_reformat *pkt_reformat; + + if (!BIT(MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, reformat_insert)) || + MLX5_CAP_GEN_2(esw->dev, max_reformat_insert_size) < sizeof(vlan_hdr) || + MLX5_CAP_GEN_2(esw->dev, max_reformat_insert_offset) < + offsetof(struct vlan_ethhdr, h_vlan_proto)) { + esw_warn(esw->dev, "Packet reformat INSERT_HEADER is not supported\n"); + return -EOPNOTSUPP; + } + + reformat_params.type = MLX5_REFORMAT_TYPE_INSERT_HDR; + reformat_params.param_0 = MLX5_REFORMAT_CONTEXT_ANCHOR_MAC_START; + reformat_params.param_1 = offsetof(struct vlan_ethhdr, h_vlan_proto); + reformat_params.size = sizeof(vlan_hdr); + reformat_params.data = &vlan_hdr; + pkt_reformat = mlx5_packet_reformat_alloc(esw->dev, + &reformat_params, + MLX5_FLOW_NAMESPACE_FDB); + if (IS_ERR(pkt_reformat)) { + esw_warn(esw->dev, "Failed to alloc packet reformat INSERT_HEADER (err=%ld)\n", + PTR_ERR(pkt_reformat)); + return PTR_ERR(pkt_reformat); + } + + vlan->pkt_reformat_push = pkt_reformat; + return 0; +} + +static void +mlx5_esw_bridge_vlan_push_cleanup(struct mlx5_esw_bridge_vlan *vlan, struct mlx5_eswitch *esw) +{ + mlx5_packet_reformat_dealloc(esw->dev, vlan->pkt_reformat_push); + vlan->pkt_reformat_push = NULL; +} + +static int +mlx5_esw_bridge_vlan_pop_create(struct mlx5_esw_bridge_vlan *vlan, struct mlx5_eswitch *esw) +{ + struct mlx5_pkt_reformat_params reformat_params = {}; + struct mlx5_pkt_reformat *pkt_reformat; + + if (!BIT(MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, reformat_remove)) || + MLX5_CAP_GEN_2(esw->dev, max_reformat_remove_size) < sizeof(struct vlan_hdr) || + MLX5_CAP_GEN_2(esw->dev, max_reformat_remove_offset) < + offsetof(struct vlan_ethhdr, h_vlan_proto)) { + esw_warn(esw->dev, "Packet reformat REMOVE_HEADER is not supported\n"); + return -EOPNOTSUPP; + } + + reformat_params.type = MLX5_REFORMAT_TYPE_REMOVE_HDR; + reformat_params.param_0 = MLX5_REFORMAT_CONTEXT_ANCHOR_MAC_START; + reformat_params.param_1 = offsetof(struct vlan_ethhdr, h_vlan_proto); + reformat_params.size = sizeof(struct vlan_hdr); + pkt_reformat = mlx5_packet_reformat_alloc(esw->dev, + &reformat_params, + MLX5_FLOW_NAMESPACE_FDB); + if (IS_ERR(pkt_reformat)) { + esw_warn(esw->dev, "Failed to alloc packet reformat REMOVE_HEADER (err=%ld)\n", + PTR_ERR(pkt_reformat)); + return PTR_ERR(pkt_reformat); + } + + vlan->pkt_reformat_pop = pkt_reformat; + return 0; +} + +static void +mlx5_esw_bridge_vlan_pop_cleanup(struct mlx5_esw_bridge_vlan *vlan, struct mlx5_eswitch *esw) +{ + mlx5_packet_reformat_dealloc(esw->dev, vlan->pkt_reformat_pop); + vlan->pkt_reformat_pop = NULL; +} + static struct mlx5_esw_bridge_vlan * -mlx5_esw_bridge_vlan_create(u16 vid, u16 flags, struct mlx5_esw_bridge_port *port) +mlx5_esw_bridge_vlan_create(u16 vid, u16 flags, struct mlx5_esw_bridge_port *port, + struct mlx5_eswitch *esw) { struct mlx5_esw_bridge_vlan *vlan; int err; @@ -609,13 +706,34 @@ mlx5_esw_bridge_vlan_create(u16 vid, u16 flags, struct mlx5_esw_bridge_port *por vlan->vid = vid; vlan->flags = flags; - err = xa_insert(&port->vlans, vid, vlan, GFP_KERNEL); - if (err) { - kvfree(vlan); - return ERR_PTR(err); + INIT_LIST_HEAD(&vlan->fdb_list); + + if (flags & BRIDGE_VLAN_INFO_PVID) { + err = mlx5_esw_bridge_vlan_push_create(vlan, esw); + if (err) + goto err_vlan_push; + } + if (flags & BRIDGE_VLAN_INFO_UNTAGGED) { + err = mlx5_esw_bridge_vlan_pop_create(vlan, esw); + if (err) + goto err_vlan_pop; } + err = xa_insert(&port->vlans, vid, vlan, GFP_KERNEL); + if (err) + goto err_xa_insert; + return vlan; + +err_xa_insert: + if (vlan->pkt_reformat_pop) + mlx5_esw_bridge_vlan_pop_cleanup(vlan, esw); +err_vlan_pop: + if (vlan->pkt_reformat_push) + mlx5_esw_bridge_vlan_push_cleanup(vlan, esw); +err_vlan_push: + kvfree(vlan); + return ERR_PTR(err); } static void mlx5_esw_bridge_vlan_erase(struct mlx5_esw_bridge_port *port, @@ -624,20 +742,42 @@ static void mlx5_esw_bridge_vlan_erase(struct mlx5_esw_bridge_port *port, xa_erase(&port->vlans, vlan->vid); } +static void mlx5_esw_bridge_vlan_flush(struct mlx5_esw_bridge_vlan *vlan, + struct mlx5_esw_bridge *bridge) +{ + struct mlx5_esw_bridge_fdb_entry *entry, *tmp; + + list_for_each_entry_safe(entry, tmp, &vlan->fdb_list, vlan_list) { + if (!(entry->flags & MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER)) + mlx5_esw_bridge_fdb_offload_notify(entry->dev, entry->key.addr, + entry->key.vid, + SWITCHDEV_FDB_DEL_TO_BRIDGE); + mlx5_esw_bridge_fdb_entry_cleanup(entry, bridge); + } + + if (vlan->pkt_reformat_pop) + mlx5_esw_bridge_vlan_pop_cleanup(vlan, bridge->br_offloads->esw); + if (vlan->pkt_reformat_push) + mlx5_esw_bridge_vlan_push_cleanup(vlan, bridge->br_offloads->esw); +} + static void mlx5_esw_bridge_vlan_cleanup(struct mlx5_esw_bridge_port *port, - struct mlx5_esw_bridge_vlan *vlan) + struct mlx5_esw_bridge_vlan *vlan, + struct mlx5_esw_bridge *bridge) { + mlx5_esw_bridge_vlan_flush(vlan, bridge); mlx5_esw_bridge_vlan_erase(port, vlan); kvfree(vlan); } -static void mlx5_esw_bridge_port_vlans_flush(struct mlx5_esw_bridge_port *port) +static void mlx5_esw_bridge_port_vlans_flush(struct mlx5_esw_bridge_port *port, + struct mlx5_esw_bridge *bridge) { struct mlx5_esw_bridge_vlan *vlan; unsigned long index; xa_for_each(&port->vlans, index, vlan) - mlx5_esw_bridge_vlan_cleanup(port, vlan); + mlx5_esw_bridge_vlan_cleanup(port, vlan, bridge); } static struct mlx5_esw_bridge_vlan * @@ -685,8 +825,6 @@ mlx5_esw_bridge_fdb_entry_init(struct net_device *dev, u16 vport_num, const unsi vlan = mlx5_esw_bridge_port_vlan_lookup(vid, vport_num, bridge, esw); if (IS_ERR(vlan)) return ERR_CAST(vlan); - if (vlan->flags & (BRIDGE_VLAN_INFO_PVID | BRIDGE_VLAN_INFO_UNTAGGED)) - return ERR_PTR(-EOPNOTSUPP); /* can't offload vlan push/pop */ } priv = netdev_priv(dev); @@ -734,6 +872,10 @@ mlx5_esw_bridge_fdb_entry_init(struct net_device *dev, u16 vport_num, const unsi goto err_ht_init; } + if (vlan) + list_add(&entry->vlan_list, &vlan->fdb_list); + else + INIT_LIST_HEAD(&entry->vlan_list); list_add(&entry->list, &bridge->fdb_list); return entry; @@ -831,7 +973,7 @@ static int mlx5_esw_bridge_vport_cleanup(struct mlx5_esw_bridge_offloads *br_off return -EINVAL; } - mlx5_esw_bridge_port_vlans_flush(port); + mlx5_esw_bridge_port_vlans_flush(port, bridge); mlx5_esw_bridge_port_erase(port, bridge); kvfree(port); mlx5_esw_bridge_put(br_offloads, bridge); @@ -892,11 +1034,12 @@ int mlx5_esw_bridge_port_vlan_add(u16 vid, u16 flags, struct mlx5_eswitch *esw, vlan = mlx5_esw_bridge_vlan_lookup(vid, port); if (vlan) { - vlan->flags = flags; - return 0; + if (vlan->flags == flags) + return 0; + mlx5_esw_bridge_vlan_cleanup(port, vlan, vport->bridge); } - vlan = mlx5_esw_bridge_vlan_create(vid, flags, port); + vlan = mlx5_esw_bridge_vlan_create(vid, flags, port, esw); if (IS_ERR(vlan)) { NL_SET_ERR_MSG_MOD(extack, "Failed to create VLAN entry"); return PTR_ERR(vlan); @@ -916,7 +1059,7 @@ void mlx5_esw_bridge_port_vlan_del(u16 vid, struct mlx5_eswitch *esw, struct mlx vlan = mlx5_esw_bridge_vlan_lookup(vid, port); if (!vlan) return; - mlx5_esw_bridge_vlan_cleanup(port, vlan); + mlx5_esw_bridge_vlan_cleanup(port, vlan, vport->bridge); } void mlx5_esw_bridge_fdb_create(struct net_device *dev, struct mlx5_eswitch *esw, -- cgit v1.2.3 From 9724fd5d9c2a0d3686b799ed5ca90cb9378ca4f2 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Fri, 2 Apr 2021 21:16:13 +0300 Subject: net/mlx5: Bridge, add tracepoints Move private bridge structures to dedicated headers that is accessible to bridge tracepoint header. Implemented following tracepoints: - Initialize FDB entry. - Refresh FDB entry. - Cleanup FDB entry. - Create VLAN. - Cleanup VLAN. - Attach port to bridge. - Detach port from bridge. Usage example: ># cd /sys/kernel/debug/tracing ># echo mlx5:mlx5_esw_bridge_fdb_entry_init >> set_event ># cat trace ... kworker/u20:1-96 [001] .... 231.892503: mlx5_esw_bridge_fdb_entry_init: net_device=enp8s0f0_0 addr=e4:fd:05:08:00:02 vid=3 flags=0 lastuse=4294895695 Signed-off-by: Vlad Buslov Reviewed-by: Jianbo Liu Signed-off-by: Saeed Mahameed --- .../device_drivers/ethernet/mellanox/mlx5.rst | 56 ++++++++++ .../net/ethernet/mellanox/mlx5/core/esw/bridge.c | 75 +++++--------- .../ethernet/mellanox/mlx5/core/esw/bridge_priv.h | 53 ++++++++++ .../mlx5/core/esw/diag/bridge_tracepoint.h | 113 +++++++++++++++++++++ 4 files changed, 247 insertions(+), 50 deletions(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h (limited to 'Documentation') diff --git a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst index 058882dca17b..ef8cb62e82a1 100644 --- a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst +++ b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5.rst @@ -600,3 +600,59 @@ tc and eswitch offloads tracepoints: $ cat /sys/kernel/debug/tracing/trace ... kworker/u48:7-2221 [009] ...1 1475.387435: mlx5e_rep_neigh_update: netdev: ens1f0 MAC: 24:8a:07:9a:17:9a IPv4: 1.1.1.10 IPv6: ::ffff:1.1.1.10 neigh_connected=1 + +Bridge offloads tracepoints: + +- mlx5_esw_bridge_fdb_entry_init: trace bridge FDB entry offloaded to mlx5:: + + $ echo mlx5:mlx5_esw_bridge_fdb_entry_init >> set_event + $ cat /sys/kernel/debug/tracing/trace + ... + kworker/u20:9-2217 [003] ...1 318.582243: mlx5_esw_bridge_fdb_entry_init: net_device=enp8s0f0_0 addr=e4:fd:05:08:00:02 vid=0 flags=0 used=0 + +- mlx5_esw_bridge_fdb_entry_cleanup: trace bridge FDB entry deleted from mlx5:: + + $ echo mlx5:mlx5_esw_bridge_fdb_entry_cleanup >> set_event + $ cat /sys/kernel/debug/tracing/trace + ... + ip-2581 [005] ...1 318.629871: mlx5_esw_bridge_fdb_entry_cleanup: net_device=enp8s0f0_1 addr=e4:fd:05:08:00:03 vid=0 flags=0 used=16 + +- mlx5_esw_bridge_fdb_entry_refresh: trace bridge FDB entry offload refreshed in + mlx5:: + + $ echo mlx5:mlx5_esw_bridge_fdb_entry_refresh >> set_event + $ cat /sys/kernel/debug/tracing/trace + ... + kworker/u20:8-3849 [003] ...1 466716: mlx5_esw_bridge_fdb_entry_refresh: net_device=enp8s0f0_0 addr=e4:fd:05:08:00:02 vid=3 flags=0 used=0 + +- mlx5_esw_bridge_vlan_create: trace bridge VLAN object add on mlx5 + representor:: + + $ echo mlx5:mlx5_esw_bridge_vlan_create >> set_event + $ cat /sys/kernel/debug/tracing/trace + ... + ip-2560 [007] ...1 318.460258: mlx5_esw_bridge_vlan_create: vid=1 flags=6 + +- mlx5_esw_bridge_vlan_cleanup: trace bridge VLAN object delete from mlx5 + representor:: + + $ echo mlx5:mlx5_esw_bridge_vlan_cleanup >> set_event + $ cat /sys/kernel/debug/tracing/trace + ... + bridge-2582 [007] ...1 318.653496: mlx5_esw_bridge_vlan_cleanup: vid=2 flags=8 + +- mlx5_esw_bridge_vport_init: trace mlx5 vport assigned with bridge upper + device:: + + $ echo mlx5:mlx5_esw_bridge_vport_init >> set_event + $ cat /sys/kernel/debug/tracing/trace + ... + ip-2560 [007] ...1 318.458915: mlx5_esw_bridge_vport_init: vport_num=1 + +- mlx5_esw_bridge_vport_cleanup: trace mlx5 vport removed from bridge upper + device:: + + $ echo mlx5:mlx5_esw_bridge_vport_cleanup >> set_event + $ cat /sys/kernel/debug/tracing/trace + ... + ip-5387 [000] ...1 573713: mlx5_esw_bridge_vport_cleanup: vport_num=1 diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c index b6345619cbfe..a6e1d4f78268 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c @@ -1,17 +1,15 @@ // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB /* Copyright (c) 2021 Mellanox Technologies. */ -#include #include -#include -#include -#include -#include -#include +#include +#include #include #include "bridge.h" #include "eswitch.h" -#include "fs_core.h" +#include "bridge_priv.h" +#define CREATE_TRACE_POINTS +#include "diag/bridge_tracepoint.h" #define MLX5_ESW_BRIDGE_INGRESS_TABLE_SIZE 64000 #define MLX5_ESW_BRIDGE_INGRESS_TABLE_VLAN_GRP_IDX_FROM 0 @@ -39,31 +37,6 @@ enum { MLX5_ESW_BRIDGE_LEVEL_SKIP_TABLE, }; -struct mlx5_esw_bridge_fdb_key { - unsigned char addr[ETH_ALEN]; - u16 vid; -}; - -enum { - MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER = BIT(0), -}; - -struct mlx5_esw_bridge_fdb_entry { - struct mlx5_esw_bridge_fdb_key key; - struct rhash_head ht_node; - struct net_device *dev; - struct list_head list; - struct list_head vlan_list; - u16 vport_num; - u16 flags; - - struct mlx5_flow_handle *ingress_handle; - struct mlx5_fc *ingress_counter; - unsigned long lastuse; - struct mlx5_flow_handle *egress_handle; - struct mlx5_flow_handle *filter_handle; -}; - static const struct rhashtable_params fdb_ht_params = { .key_offset = offsetof(struct mlx5_esw_bridge_fdb_entry, key), .key_len = sizeof(struct mlx5_esw_bridge_fdb_key), @@ -71,19 +44,6 @@ static const struct rhashtable_params fdb_ht_params = { .automatic_shrinking = true, }; -struct mlx5_esw_bridge_vlan { - u16 vid; - u16 flags; - struct list_head fdb_list; - struct mlx5_pkt_reformat *pkt_reformat_push; - struct mlx5_pkt_reformat *pkt_reformat_pop; -}; - -struct mlx5_esw_bridge_port { - u16 vport_num; - struct xarray vlans; -}; - enum { MLX5_ESW_BRIDGE_VLAN_FILTERING_FLAG = BIT(0), }; @@ -697,10 +657,23 @@ static void mlx5_esw_bridge_port_erase(struct mlx5_esw_bridge_port *port, xa_erase(&bridge->vports, port->vport_num); } +static void mlx5_esw_bridge_fdb_entry_refresh(unsigned long lastuse, + struct mlx5_esw_bridge_fdb_entry *entry) +{ + trace_mlx5_esw_bridge_fdb_entry_refresh(entry); + + entry->lastuse = lastuse; + mlx5_esw_bridge_fdb_offload_notify(entry->dev, entry->key.addr, + entry->key.vid, + SWITCHDEV_FDB_ADD_TO_BRIDGE); +} + static void mlx5_esw_bridge_fdb_entry_cleanup(struct mlx5_esw_bridge_fdb_entry *entry, struct mlx5_esw_bridge *bridge) { + trace_mlx5_esw_bridge_fdb_entry_cleanup(entry); + rhashtable_remove_fast(&bridge->fdb_ht, &entry->ht_node, fdb_ht_params); mlx5_del_flow_rules(entry->egress_handle); if (entry->filter_handle) @@ -842,6 +815,7 @@ mlx5_esw_bridge_vlan_create(u16 vid, u16 flags, struct mlx5_esw_bridge_port *por if (err) goto err_xa_insert; + trace_mlx5_esw_bridge_vlan_create(vlan); return vlan; err_xa_insert: @@ -884,6 +858,7 @@ static void mlx5_esw_bridge_vlan_cleanup(struct mlx5_esw_bridge_port *port, struct mlx5_esw_bridge_vlan *vlan, struct mlx5_esw_bridge *bridge) { + trace_mlx5_esw_bridge_vlan_cleanup(vlan); mlx5_esw_bridge_vlan_flush(vlan, bridge); mlx5_esw_bridge_vlan_erase(port, vlan); kvfree(vlan); @@ -1007,6 +982,8 @@ mlx5_esw_bridge_fdb_entry_init(struct net_device *dev, u16 vport_num, const unsi else INIT_LIST_HEAD(&entry->vlan_list); list_add(&entry->list, &bridge->fdb_list); + + trace_mlx5_esw_bridge_fdb_entry_init(entry); return entry; err_ht_init: @@ -1078,6 +1055,7 @@ static int mlx5_esw_bridge_vport_init(struct mlx5_esw_bridge_offloads *br_offloa vport->vport, err); goto err_port_insert; } + trace_mlx5_esw_bridge_vport_init(port); vport->bridge = bridge; return 0; @@ -1106,6 +1084,7 @@ static int mlx5_esw_bridge_vport_cleanup(struct mlx5_esw_bridge_offloads *br_off return -EINVAL; } + trace_mlx5_esw_bridge_vport_cleanup(port); mlx5_esw_bridge_port_vlans_flush(port, bridge); mlx5_esw_bridge_port_erase(port, bridge); kvfree(port); @@ -1266,11 +1245,7 @@ void mlx5_esw_bridge_update(struct mlx5_esw_bridge_offloads *br_offloads) continue; if (time_after(lastuse, entry->lastuse)) { - entry->lastuse = lastuse; - /* refresh existing bridge entry */ - mlx5_esw_bridge_fdb_offload_notify(entry->dev, entry->key.addr, - entry->key.vid, - SWITCHDEV_FDB_ADD_TO_BRIDGE); + mlx5_esw_bridge_fdb_entry_refresh(lastuse, entry); } else if (time_is_before_jiffies(entry->lastuse + bridge->ageing_time)) { mlx5_esw_bridge_fdb_offload_notify(entry->dev, entry->key.addr, entry->key.vid, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h new file mode 100644 index 000000000000..d9ab2e8bc2cb --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h @@ -0,0 +1,53 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021 Mellanox Technologies. */ + +#ifndef _MLX5_ESW_BRIDGE_PRIVATE_ +#define _MLX5_ESW_BRIDGE_PRIVATE_ + +#include +#include +#include +#include +#include +#include +#include "fs_core.h" + +struct mlx5_esw_bridge_fdb_key { + unsigned char addr[ETH_ALEN]; + u16 vid; +}; + +enum { + MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER = BIT(0), +}; + +struct mlx5_esw_bridge_fdb_entry { + struct mlx5_esw_bridge_fdb_key key; + struct rhash_head ht_node; + struct net_device *dev; + struct list_head list; + struct list_head vlan_list; + u16 vport_num; + u16 flags; + + struct mlx5_flow_handle *ingress_handle; + struct mlx5_fc *ingress_counter; + unsigned long lastuse; + struct mlx5_flow_handle *egress_handle; + struct mlx5_flow_handle *filter_handle; +}; + +struct mlx5_esw_bridge_vlan { + u16 vid; + u16 flags; + struct list_head fdb_list; + struct mlx5_pkt_reformat *pkt_reformat_push; + struct mlx5_pkt_reformat *pkt_reformat_pop; +}; + +struct mlx5_esw_bridge_port { + u16 vport_num; + struct xarray vlans; +}; + +#endif /* _MLX5_ESW_BRIDGE_PRIVATE_ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h new file mode 100644 index 000000000000..227964b7d3b9 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/diag/bridge_tracepoint.h @@ -0,0 +1,113 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */ +/* Copyright (c) 2021 Mellanox Technologies. */ + +#undef TRACE_SYSTEM +#define TRACE_SYSTEM mlx5 + +#if !defined(_MLX5_ESW_BRIDGE_TRACEPOINT_) || defined(TRACE_HEADER_MULTI_READ) +#define _MLX5_ESW_BRIDGE_TRACEPOINT_ + +#include +#include "../bridge_priv.h" + +DECLARE_EVENT_CLASS(mlx5_esw_bridge_fdb_template, + TP_PROTO(const struct mlx5_esw_bridge_fdb_entry *fdb), + TP_ARGS(fdb), + TP_STRUCT__entry( + __array(char, dev_name, IFNAMSIZ) + __array(unsigned char, addr, ETH_ALEN) + __field(u16, vid) + __field(u16, flags) + __field(unsigned int, used) + ), + TP_fast_assign( + strncpy(__entry->dev_name, + netdev_name(fdb->dev), + IFNAMSIZ); + memcpy(__entry->addr, fdb->key.addr, ETH_ALEN); + __entry->vid = fdb->key.vid; + __entry->flags = fdb->flags; + __entry->used = jiffies_to_msecs(jiffies - fdb->lastuse) + ), + TP_printk("net_device=%s addr=%pM vid=%hu flags=%hx used=%u", + __entry->dev_name, + __entry->addr, + __entry->vid, + __entry->flags, + __entry->used / 1000) + ); + +DEFINE_EVENT(mlx5_esw_bridge_fdb_template, + mlx5_esw_bridge_fdb_entry_init, + TP_PROTO(const struct mlx5_esw_bridge_fdb_entry *fdb), + TP_ARGS(fdb) + ); +DEFINE_EVENT(mlx5_esw_bridge_fdb_template, + mlx5_esw_bridge_fdb_entry_refresh, + TP_PROTO(const struct mlx5_esw_bridge_fdb_entry *fdb), + TP_ARGS(fdb) + ); +DEFINE_EVENT(mlx5_esw_bridge_fdb_template, + mlx5_esw_bridge_fdb_entry_cleanup, + TP_PROTO(const struct mlx5_esw_bridge_fdb_entry *fdb), + TP_ARGS(fdb) + ); + +DECLARE_EVENT_CLASS(mlx5_esw_bridge_vlan_template, + TP_PROTO(const struct mlx5_esw_bridge_vlan *vlan), + TP_ARGS(vlan), + TP_STRUCT__entry( + __field(u16, vid) + __field(u16, flags) + ), + TP_fast_assign( + __entry->vid = vlan->vid; + __entry->flags = vlan->flags; + ), + TP_printk("vid=%hu flags=%hx", + __entry->vid, + __entry->flags) + ); + +DEFINE_EVENT(mlx5_esw_bridge_vlan_template, + mlx5_esw_bridge_vlan_create, + TP_PROTO(const struct mlx5_esw_bridge_vlan *vlan), + TP_ARGS(vlan) + ); +DEFINE_EVENT(mlx5_esw_bridge_vlan_template, + mlx5_esw_bridge_vlan_cleanup, + TP_PROTO(const struct mlx5_esw_bridge_vlan *vlan), + TP_ARGS(vlan) + ); + +DECLARE_EVENT_CLASS(mlx5_esw_bridge_port_template, + TP_PROTO(const struct mlx5_esw_bridge_port *port), + TP_ARGS(port), + TP_STRUCT__entry( + __field(u16, vport_num) + ), + TP_fast_assign( + __entry->vport_num = port->vport_num; + ), + TP_printk("vport_num=%hu", __entry->vport_num) + ); + +DEFINE_EVENT(mlx5_esw_bridge_port_template, + mlx5_esw_bridge_vport_init, + TP_PROTO(const struct mlx5_esw_bridge_port *port), + TP_ARGS(port) + ); +DEFINE_EVENT(mlx5_esw_bridge_port_template, + mlx5_esw_bridge_vport_cleanup, + TP_PROTO(const struct mlx5_esw_bridge_port *port), + TP_ARGS(port) + ); + +#endif + +/* This part must be outside protection */ +#undef TRACE_INCLUDE_PATH +#define TRACE_INCLUDE_PATH esw/diag +#undef TRACE_INCLUDE_FILE +#define TRACE_INCLUDE_FILE bridge_tracepoint +#include -- cgit v1.2.3 From 4e50025129efabb07714c1f27a80526897da374b Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 11 Jun 2021 22:01:24 +0300 Subject: net: dsa: generalize overhead for taggers that use both headers and trailers Some really really weird switches just couldn't decide whether to use a normal or a tail tagger, so they just did both. This creates problems for DSA, because we only have the concept of an 'overhead' which can be applied to the headroom or to the tailroom of the skb (like for example during the central TX reallocation procedure), depending on the value of bool tail_tag, but not to both. We need to generalize DSA to cater for these odd switches by transforming the 'overhead / tail_tag' pair into 'needed_headroom / needed_tailroom'. The DSA master's MTU is increased to account for both. The flow dissector code is modified such that it only calls the DSA adjustment callback if the tagger has a non-zero header length. Taggers are trivially modified to declare either needed_headroom or needed_tailroom, based on the tail_tag value that they currently declare. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- Documentation/networking/dsa/dsa.rst | 21 +++++++++++---------- include/net/dsa.h | 6 +++--- net/core/flow_dissector.c | 2 +- net/dsa/dsa_priv.h | 5 +++++ net/dsa/master.c | 6 ++++-- net/dsa/slave.c | 10 ++++------ net/dsa/tag_ar9331.c | 2 +- net/dsa/tag_brcm.c | 6 +++--- net/dsa/tag_dsa.c | 4 ++-- net/dsa/tag_gswip.c | 2 +- net/dsa/tag_hellcreek.c | 3 +-- net/dsa/tag_ksz.c | 9 +++------ net/dsa/tag_lan9303.c | 2 +- net/dsa/tag_mtk.c | 2 +- net/dsa/tag_ocelot.c | 4 ++-- net/dsa/tag_ocelot_8021q.c | 2 +- net/dsa/tag_qca.c | 2 +- net/dsa/tag_rtl4_a.c | 2 +- net/dsa/tag_sja1105.c | 2 +- net/dsa/tag_trailer.c | 3 +-- net/dsa/tag_xrs700x.c | 3 +-- 21 files changed, 49 insertions(+), 49 deletions(-) (limited to 'Documentation') diff --git a/Documentation/networking/dsa/dsa.rst b/Documentation/networking/dsa/dsa.rst index 8688009514cc..20baacf2bc5c 100644 --- a/Documentation/networking/dsa/dsa.rst +++ b/Documentation/networking/dsa/dsa.rst @@ -93,14 +93,15 @@ A tagging protocol may tag all packets with switch tags of the same length, or the tag length might vary (for example packets with PTP timestamps might require an extended switch tag, or there might be one tag length on TX and a different one on RX). Either way, the tagging protocol driver must populate the -``struct dsa_device_ops::overhead`` with the length in octets of the longest -switch frame header. The DSA framework will automatically adjust the MTU of the -master interface to accomodate for this extra size in order for DSA user ports -to support the standard MTU (L2 payload length) of 1500 octets. The ``overhead`` -is also used to request from the network stack, on a best-effort basis, the -allocation of packets with a ``needed_headroom`` or ``needed_tailroom`` -sufficient such that the act of pushing the switch tag on transmission of a -packet does not cause it to reallocate due to lack of memory. +``struct dsa_device_ops::needed_headroom`` and/or ``struct dsa_device_ops::needed_tailroom`` +with the length in octets of the longest switch frame header/trailer. The DSA +framework will automatically adjust the MTU of the master interface to +accommodate for this extra size in order for DSA user ports to support the +standard MTU (L2 payload length) of 1500 octets. The ``needed_headroom`` and +``needed_tailroom`` properties are also used to request from the network stack, +on a best-effort basis, the allocation of packets with enough extra space such +that the act of pushing the switch tag on transmission of a packet does not +cause it to reallocate due to lack of memory. Even though applications are not expected to parse DSA-specific frame headers, the format on the wire of the tagging protocol represents an Application Binary @@ -169,8 +170,8 @@ The job of this method is to prepare the skb in a way that the switch will understand what egress port the packet is for (and not deliver it towards other ports). Typically this is fulfilled by pushing a frame header. Checking for insufficient size in the skb headroom or tailroom is unnecessary provided that -the ``overhead`` and ``tail_tag`` properties were filled out properly, because -DSA ensures there is enough space before calling this method. +the ``needed_headroom`` and ``needed_tailroom`` properties were filled out +properly, because DSA ensures there is enough space before calling this method. The reception of a packet goes through the tagger's ``rcv`` function. The passed ``struct sk_buff *skb`` has ``skb->data`` pointing at diff --git a/include/net/dsa.h b/include/net/dsa.h index e1a2610a0e06..0a10f6fffc3d 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -91,7 +91,8 @@ struct dsa_device_ops { * as regular on the master net device. */ bool (*filter)(const struct sk_buff *skb, struct net_device *dev); - unsigned int overhead; + unsigned int needed_headroom; + unsigned int needed_tailroom; const char *name; enum dsa_tag_protocol proto; /* Some tagging protocols either mangle or shift the destination MAC @@ -100,7 +101,6 @@ struct dsa_device_ops { * its RX filter. */ bool promisc_on_master; - bool tail_tag; }; /* This structure defines the control interfaces that are overlayed by the @@ -926,7 +926,7 @@ static inline void dsa_tag_generic_flow_dissect(const struct sk_buff *skb, { #if IS_ENABLED(CONFIG_NET_DSA) const struct dsa_device_ops *ops = skb->dev->dsa_ptr->tag_ops; - int tag_len = ops->overhead; + int tag_len = ops->needed_headroom; *offset = tag_len; *proto = ((__be16 *)skb->data)[(tag_len / 2) - 1]; diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 3ed7c98a98e1..c04455981c1e 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -944,7 +944,7 @@ bool __skb_flow_dissect(const struct net *net, ops = skb->dev->dsa_ptr->tag_ops; /* Tail taggers don't break flow dissection */ - if (!ops->tail_tag) { + if (!ops->needed_headroom) { if (ops->flow_dissect) ops->flow_dissect(skb, &proto, &offset); else diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 92282de54230..b8b17474b72b 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -154,6 +154,11 @@ const struct dsa_device_ops *dsa_find_tagger_by_name(const char *buf); bool dsa_schedule_work(struct work_struct *work); const char *dsa_tag_protocol_to_str(const struct dsa_device_ops *ops); +static inline int dsa_tag_protocol_overhead(const struct dsa_device_ops *ops) +{ + return ops->needed_headroom + ops->needed_tailroom; +} + /* master.c */ int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp); void dsa_master_teardown(struct net_device *dev); diff --git a/net/dsa/master.c b/net/dsa/master.c index 63adbc21a735..3fc90e36772d 100644 --- a/net/dsa/master.c +++ b/net/dsa/master.c @@ -346,10 +346,12 @@ static struct lock_class_key dsa_master_addr_list_lock_key; int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp) { - int mtu = ETH_DATA_LEN + cpu_dp->tag_ops->overhead; + const struct dsa_device_ops *tag_ops = cpu_dp->tag_ops; struct dsa_switch *ds = cpu_dp->ds; struct device_link *consumer_link; - int ret; + int mtu, ret; + + mtu = ETH_DATA_LEN + dsa_tag_protocol_overhead(tag_ops); /* The DSA master must use SET_NETDEV_DEV for this to work. */ consumer_link = device_link_add(ds->dev, dev->dev.parent, diff --git a/net/dsa/slave.c b/net/dsa/slave.c index d4756b920108..3ca509eb284d 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -1569,7 +1569,7 @@ int dsa_slave_change_mtu(struct net_device *dev, int new_mtu) mtu_limit = min_t(int, master->max_mtu, dev->max_mtu); old_master_mtu = master->mtu; - new_master_mtu = largest_mtu + cpu_dp->tag_ops->overhead; + new_master_mtu = largest_mtu + dsa_tag_protocol_overhead(cpu_dp->tag_ops); if (new_master_mtu > mtu_limit) return -ERANGE; @@ -1605,7 +1605,7 @@ int dsa_slave_change_mtu(struct net_device *dev, int new_mtu) out_port_failed: if (new_master_mtu != old_master_mtu) dsa_port_mtu_change(cpu_dp, old_master_mtu - - cpu_dp->tag_ops->overhead, + dsa_tag_protocol_overhead(cpu_dp->tag_ops), true); out_cpu_failed: if (new_master_mtu != old_master_mtu) @@ -1824,10 +1824,8 @@ void dsa_slave_setup_tagger(struct net_device *slave) const struct dsa_port *cpu_dp = dp->cpu_dp; struct net_device *master = cpu_dp->master; - if (cpu_dp->tag_ops->tail_tag) - slave->needed_tailroom = cpu_dp->tag_ops->overhead; - else - slave->needed_headroom = cpu_dp->tag_ops->overhead; + slave->needed_headroom = cpu_dp->tag_ops->needed_headroom; + slave->needed_tailroom = cpu_dp->tag_ops->needed_tailroom; /* Try to save one extra realloc later in the TX path (in the master) * by also inheriting the master's needed headroom and tailroom. * The 8021q driver also does this. diff --git a/net/dsa/tag_ar9331.c b/net/dsa/tag_ar9331.c index 002cf7f952e2..0efae1a372b3 100644 --- a/net/dsa/tag_ar9331.c +++ b/net/dsa/tag_ar9331.c @@ -85,7 +85,7 @@ static const struct dsa_device_ops ar9331_netdev_ops = { .proto = DSA_TAG_PROTO_AR9331, .xmit = ar9331_tag_xmit, .rcv = ar9331_tag_rcv, - .overhead = AR9331_HDR_LEN, + .needed_headroom = AR9331_HDR_LEN, }; MODULE_LICENSE("GPL v2"); diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c index 40e9f3098c8d..0750af951fc9 100644 --- a/net/dsa/tag_brcm.c +++ b/net/dsa/tag_brcm.c @@ -205,7 +205,7 @@ static const struct dsa_device_ops brcm_netdev_ops = { .proto = DSA_TAG_PROTO_BRCM, .xmit = brcm_tag_xmit, .rcv = brcm_tag_rcv, - .overhead = BRCM_TAG_LEN, + .needed_headroom = BRCM_TAG_LEN, }; DSA_TAG_DRIVER(brcm_netdev_ops); @@ -286,7 +286,7 @@ static const struct dsa_device_ops brcm_legacy_netdev_ops = { .proto = DSA_TAG_PROTO_BRCM_LEGACY, .xmit = brcm_leg_tag_xmit, .rcv = brcm_leg_tag_rcv, - .overhead = BRCM_LEG_TAG_LEN, + .needed_headroom = BRCM_LEG_TAG_LEN, }; DSA_TAG_DRIVER(brcm_legacy_netdev_ops); @@ -314,7 +314,7 @@ static const struct dsa_device_ops brcm_prepend_netdev_ops = { .proto = DSA_TAG_PROTO_BRCM_PREPEND, .xmit = brcm_tag_xmit_prepend, .rcv = brcm_tag_rcv_prepend, - .overhead = BRCM_TAG_LEN, + .needed_headroom = BRCM_TAG_LEN, }; DSA_TAG_DRIVER(brcm_prepend_netdev_ops); diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c index 7e7b7decdf39..a822355afc90 100644 --- a/net/dsa/tag_dsa.c +++ b/net/dsa/tag_dsa.c @@ -303,7 +303,7 @@ static const struct dsa_device_ops dsa_netdev_ops = { .proto = DSA_TAG_PROTO_DSA, .xmit = dsa_xmit, .rcv = dsa_rcv, - .overhead = DSA_HLEN, + .needed_headroom = DSA_HLEN, }; DSA_TAG_DRIVER(dsa_netdev_ops); @@ -346,7 +346,7 @@ static const struct dsa_device_ops edsa_netdev_ops = { .proto = DSA_TAG_PROTO_EDSA, .xmit = edsa_xmit, .rcv = edsa_rcv, - .overhead = EDSA_HLEN, + .needed_headroom = EDSA_HLEN, }; DSA_TAG_DRIVER(edsa_netdev_ops); diff --git a/net/dsa/tag_gswip.c b/net/dsa/tag_gswip.c index 2f5bd5e338ab..5985dab06ab8 100644 --- a/net/dsa/tag_gswip.c +++ b/net/dsa/tag_gswip.c @@ -103,7 +103,7 @@ static const struct dsa_device_ops gswip_netdev_ops = { .proto = DSA_TAG_PROTO_GSWIP, .xmit = gswip_tag_xmit, .rcv = gswip_tag_rcv, - .overhead = GSWIP_RX_HEADER_LEN, + .needed_headroom = GSWIP_RX_HEADER_LEN, }; MODULE_LICENSE("GPL"); diff --git a/net/dsa/tag_hellcreek.c b/net/dsa/tag_hellcreek.c index a09805c8e1ab..424130f85f59 100644 --- a/net/dsa/tag_hellcreek.c +++ b/net/dsa/tag_hellcreek.c @@ -54,8 +54,7 @@ static const struct dsa_device_ops hellcreek_netdev_ops = { .proto = DSA_TAG_PROTO_HELLCREEK, .xmit = hellcreek_xmit, .rcv = hellcreek_rcv, - .overhead = HELLCREEK_TAG_LEN, - .tail_tag = true, + .needed_tailroom = HELLCREEK_TAG_LEN, }; MODULE_LICENSE("Dual MIT/GPL"); diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c index 4820dbcedfa2..53565f48934c 100644 --- a/net/dsa/tag_ksz.c +++ b/net/dsa/tag_ksz.c @@ -77,8 +77,7 @@ static const struct dsa_device_ops ksz8795_netdev_ops = { .proto = DSA_TAG_PROTO_KSZ8795, .xmit = ksz8795_xmit, .rcv = ksz8795_rcv, - .overhead = KSZ_INGRESS_TAG_LEN, - .tail_tag = true, + .needed_tailroom = KSZ_INGRESS_TAG_LEN, }; DSA_TAG_DRIVER(ksz8795_netdev_ops); @@ -149,8 +148,7 @@ static const struct dsa_device_ops ksz9477_netdev_ops = { .proto = DSA_TAG_PROTO_KSZ9477, .xmit = ksz9477_xmit, .rcv = ksz9477_rcv, - .overhead = KSZ9477_INGRESS_TAG_LEN, - .tail_tag = true, + .needed_tailroom = KSZ9477_INGRESS_TAG_LEN, }; DSA_TAG_DRIVER(ksz9477_netdev_ops); @@ -183,8 +181,7 @@ static const struct dsa_device_ops ksz9893_netdev_ops = { .proto = DSA_TAG_PROTO_KSZ9893, .xmit = ksz9893_xmit, .rcv = ksz9477_rcv, - .overhead = KSZ_INGRESS_TAG_LEN, - .tail_tag = true, + .needed_tailroom = KSZ_INGRESS_TAG_LEN, }; DSA_TAG_DRIVER(ksz9893_netdev_ops); diff --git a/net/dsa/tag_lan9303.c b/net/dsa/tag_lan9303.c index aa1318dccaf0..26207ef39ebc 100644 --- a/net/dsa/tag_lan9303.c +++ b/net/dsa/tag_lan9303.c @@ -125,7 +125,7 @@ static const struct dsa_device_ops lan9303_netdev_ops = { .proto = DSA_TAG_PROTO_LAN9303, .xmit = lan9303_xmit, .rcv = lan9303_rcv, - .overhead = LAN9303_TAG_LEN, + .needed_headroom = LAN9303_TAG_LEN, }; MODULE_LICENSE("GPL"); diff --git a/net/dsa/tag_mtk.c b/net/dsa/tag_mtk.c index f9b2966d1936..cc3ba864ad5b 100644 --- a/net/dsa/tag_mtk.c +++ b/net/dsa/tag_mtk.c @@ -102,7 +102,7 @@ static const struct dsa_device_ops mtk_netdev_ops = { .proto = DSA_TAG_PROTO_MTK, .xmit = mtk_tag_xmit, .rcv = mtk_tag_rcv, - .overhead = MTK_HDR_LEN, + .needed_headroom = MTK_HDR_LEN, }; MODULE_LICENSE("GPL"); diff --git a/net/dsa/tag_ocelot.c b/net/dsa/tag_ocelot.c index 91f0fd1242cd..190f4bfd3bef 100644 --- a/net/dsa/tag_ocelot.c +++ b/net/dsa/tag_ocelot.c @@ -143,7 +143,7 @@ static const struct dsa_device_ops ocelot_netdev_ops = { .proto = DSA_TAG_PROTO_OCELOT, .xmit = ocelot_xmit, .rcv = ocelot_rcv, - .overhead = OCELOT_TOTAL_TAG_LEN, + .needed_headroom = OCELOT_TOTAL_TAG_LEN, .promisc_on_master = true, }; @@ -155,7 +155,7 @@ static const struct dsa_device_ops seville_netdev_ops = { .proto = DSA_TAG_PROTO_SEVILLE, .xmit = seville_xmit, .rcv = ocelot_rcv, - .overhead = OCELOT_TOTAL_TAG_LEN, + .needed_headroom = OCELOT_TOTAL_TAG_LEN, .promisc_on_master = true, }; diff --git a/net/dsa/tag_ocelot_8021q.c b/net/dsa/tag_ocelot_8021q.c index 62a93303bd63..663b74793cfc 100644 --- a/net/dsa/tag_ocelot_8021q.c +++ b/net/dsa/tag_ocelot_8021q.c @@ -73,7 +73,7 @@ static const struct dsa_device_ops ocelot_8021q_netdev_ops = { .proto = DSA_TAG_PROTO_OCELOT_8021Q, .xmit = ocelot_xmit, .rcv = ocelot_rcv, - .overhead = VLAN_HLEN, + .needed_headroom = VLAN_HLEN, .promisc_on_master = true, }; diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c index 88181b52f480..693bda013065 100644 --- a/net/dsa/tag_qca.c +++ b/net/dsa/tag_qca.c @@ -91,7 +91,7 @@ static const struct dsa_device_ops qca_netdev_ops = { .proto = DSA_TAG_PROTO_QCA, .xmit = qca_tag_xmit, .rcv = qca_tag_rcv, - .overhead = QCA_HDR_LEN, + .needed_headroom = QCA_HDR_LEN, }; MODULE_LICENSE("GPL"); diff --git a/net/dsa/tag_rtl4_a.c b/net/dsa/tag_rtl4_a.c index cf8ac316f4c7..57c46b4ab2b3 100644 --- a/net/dsa/tag_rtl4_a.c +++ b/net/dsa/tag_rtl4_a.c @@ -124,7 +124,7 @@ static const struct dsa_device_ops rtl4a_netdev_ops = { .proto = DSA_TAG_PROTO_RTL4_A, .xmit = rtl4a_tag_xmit, .rcv = rtl4a_tag_rcv, - .overhead = RTL4_A_HDR_LEN, + .needed_headroom = RTL4_A_HDR_LEN, }; module_dsa_tag_driver(rtl4a_netdev_ops); diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c index 50496013cdb7..ff4a81eae16f 100644 --- a/net/dsa/tag_sja1105.c +++ b/net/dsa/tag_sja1105.c @@ -362,7 +362,7 @@ static const struct dsa_device_ops sja1105_netdev_ops = { .xmit = sja1105_xmit, .rcv = sja1105_rcv, .filter = sja1105_filter, - .overhead = VLAN_HLEN, + .needed_headroom = VLAN_HLEN, .flow_dissect = sja1105_flow_dissect, .promisc_on_master = true, }; diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c index 5b97ede56a0f..ba73804340a5 100644 --- a/net/dsa/tag_trailer.c +++ b/net/dsa/tag_trailer.c @@ -55,8 +55,7 @@ static const struct dsa_device_ops trailer_netdev_ops = { .proto = DSA_TAG_PROTO_TRAILER, .xmit = trailer_xmit, .rcv = trailer_rcv, - .overhead = 4, - .tail_tag = true, + .needed_tailroom = 4, }; MODULE_LICENSE("GPL"); diff --git a/net/dsa/tag_xrs700x.c b/net/dsa/tag_xrs700x.c index 858cdf9d2913..a31ff7fcb45f 100644 --- a/net/dsa/tag_xrs700x.c +++ b/net/dsa/tag_xrs700x.c @@ -56,8 +56,7 @@ static const struct dsa_device_ops xrs700x_netdev_ops = { .proto = DSA_TAG_PROTO_XRS700X, .xmit = xrs700x_xmit, .rcv = xrs700x_rcv, - .overhead = 1, - .tail_tag = true, + .needed_tailroom = 1, }; MODULE_LICENSE("GPL"); -- cgit v1.2.3 From e71305acd81cac222c41849e538c5c661b12c584 Mon Sep 17 00:00:00 2001 From: Calvin Johnson Date: Fri, 11 Jun 2021 13:53:47 +0300 Subject: Documentation: ACPI: DSD: Document MDIO PHY Introduce a mechanism based on generic ACPI _DSD device properties definition [1] to get PHYs registered on a MDIO bus and provide them to be connected to MAC. [1] http://www.uefi.org/sites/default/files/resources/_DSD-device-properties-UUID.pdf Describe properties "phy-handle" and "phy-mode". Signed-off-by: Calvin Johnson Signed-off-by: Ioana Ciornei Acked-by: Rafael J. Wysocki Acked-by: Grant Likely Signed-off-by: David S. Miller --- Documentation/firmware-guide/acpi/dsd/phy.rst | 133 ++++++++++++++++++++++++++ 1 file changed, 133 insertions(+) create mode 100644 Documentation/firmware-guide/acpi/dsd/phy.rst (limited to 'Documentation') diff --git a/Documentation/firmware-guide/acpi/dsd/phy.rst b/Documentation/firmware-guide/acpi/dsd/phy.rst new file mode 100644 index 000000000000..7d01ae8b3cc6 --- /dev/null +++ b/Documentation/firmware-guide/acpi/dsd/phy.rst @@ -0,0 +1,133 @@ +.. SPDX-License-Identifier: GPL-2.0 + +========================= +MDIO bus and PHYs in ACPI +========================= + +The PHYs on an MDIO bus [1] are probed and registered using +fwnode_mdiobus_register_phy(). + +Later, for connecting these PHYs to their respective MACs, the PHYs registered +on the MDIO bus have to be referenced. + +This document introduces two _DSD properties that are to be used +for connecting PHYs on the MDIO bus [3] to the MAC layer. + +These properties are defined in accordance with the "Device +Properties UUID For _DSD" [2] document and the +daffd814-6eba-4d8c-8a91-bc9bbf4aa301 UUID must be used in the Device +Data Descriptors containing them. + +phy-handle +---------- +For each MAC node, a device property "phy-handle" is used to reference +the PHY that is registered on an MDIO bus. This is mandatory for +network interfaces that have PHYs connected to MAC via MDIO bus. + +During the MDIO bus driver initialization, PHYs on this bus are probed +using the _ADR object as shown below and are registered on the MDIO bus. + +:: + Scope(\_SB.MDI0) + { + Device(PHY1) { + Name (_ADR, 0x1) + } // end of PHY1 + + Device(PHY2) { + Name (_ADR, 0x2) + } // end of PHY2 + } + +Later, during the MAC driver initialization, the registered PHY devices +have to be retrieved from the MDIO bus. For this, the MAC driver needs +references to the previously registered PHYs which are provided +as device object references (e.g. \_SB.MDI0.PHY1). + +phy-mode +-------- +The "phy-mode" _DSD property is used to describe the connection to +the PHY. The valid values for "phy-mode" are defined in [4]. + +The following ASL example illustrates the usage of these properties. + +DSDT entry for MDIO node +------------------------ + +The MDIO bus has an SoC component (MDIO controller) and a platform +component (PHYs on the MDIO bus). + +a) Silicon Component +This node describes the MDIO controller, MDI0 +--------------------------------------------- +:: + Scope(_SB) + { + Device(MDI0) { + Name(_HID, "NXP0006") + Name(_CCA, 1) + Name(_UID, 0) + Name(_CRS, ResourceTemplate() { + Memory32Fixed(ReadWrite, MDI0_BASE, MDI_LEN) + Interrupt(ResourceConsumer, Level, ActiveHigh, Shared) + { + MDI0_IT + } + }) // end of _CRS for MDI0 + } // end of MDI0 + } + +b) Platform Component +The PHY1 and PHY2 nodes represent the PHYs connected to MDIO bus MDI0 +--------------------------------------------------------------------- +:: + Scope(\_SB.MDI0) + { + Device(PHY1) { + Name (_ADR, 0x1) + } // end of PHY1 + + Device(PHY2) { + Name (_ADR, 0x2) + } // end of PHY2 + } + +DSDT entries representing MAC nodes +----------------------------------- + +Below are the MAC nodes where PHY nodes are referenced. +phy-mode and phy-handle are used as explained earlier. +------------------------------------------------------ +:: + Scope(\_SB.MCE0.PR17) + { + Name (_DSD, Package () { + ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"), + Package () { + Package (2) {"phy-mode", "rgmii-id"}, + Package (2) {"phy-handle", \_SB.MDI0.PHY1} + } + }) + } + + Scope(\_SB.MCE0.PR18) + { + Name (_DSD, Package () { + ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"), + Package () { + Package (2) {"phy-mode", "rgmii-id"}, + Package (2) {"phy-handle", \_SB.MDI0.PHY2}} + } + }) + } + +References +========== + +[1] Documentation/networking/phy.rst + +[2] https://www.uefi.org/sites/default/files/resources/_DSD-device-properties-UUID.pdf + +[3] Documentation/firmware-guide/acpi/DSD-properties-rules.rst + +[4] Documentation/devicetree/bindings/net/ethernet-controller.yaml -- cgit v1.2.3 From 2e3cf97f4741b320e8f4639fcca732b17614a55f Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Fri, 11 Jun 2021 15:39:40 -0500 Subject: net: ipa: introduce sysfs code Add IPA device attributes to expose information known by the IPA driver about the hardware and its configuration. All pointers used to display these attribute values (i.e., IPA pointer and endpoint pointers) will have been initialized by the time IPA probe has completed, so they may be safely dereferenced. Signed-off-by: Alex Elder Signed-off-by: David S. Miller --- .../ABI/testing/sysfs-devices-platform-soc-ipa | 78 ++++++++++++ drivers/net/ipa/Makefile | 3 +- drivers/net/ipa/ipa_main.c | 9 ++ drivers/net/ipa/ipa_sysfs.c | 136 +++++++++++++++++++++ drivers/net/ipa/ipa_sysfs.h | 15 +++ drivers/net/ipa/ipa_version.h | 2 + 6 files changed, 242 insertions(+), 1 deletion(-) create mode 100644 Documentation/ABI/testing/sysfs-devices-platform-soc-ipa create mode 100644 drivers/net/ipa/ipa_sysfs.c create mode 100644 drivers/net/ipa/ipa_sysfs.h (limited to 'Documentation') diff --git a/Documentation/ABI/testing/sysfs-devices-platform-soc-ipa b/Documentation/ABI/testing/sysfs-devices-platform-soc-ipa new file mode 100644 index 000000000000..c56dcf15bf29 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-devices-platform-soc-ipa @@ -0,0 +1,78 @@ +What: /sys/devices/platform/soc@X/XXXXXXX.ipa/ +Date: June 2021 +KernelVersion: v5.14 +Contact: Alex Elder +Description: + The /sys/devices/platform/soc@X/XXXXXXX.ipa/ directory + contains read-only attributes exposing information about + an IPA device. The X values could vary, but are typically + "soc@0/1e40000.ipa". + +What: .../XXXXXXX.ipa/version +Date: June 2021 +KernelVersion: v5.14 +Contact: Alex Elder +Description: + The .../XXXXXXX.ipa/version file contains the IPA hardware + version, as a period-separated set of two or three integers + (e.g., "3.5.1" or "4.2"). + +What: .../XXXXXXX.ipa/feature/ +Date: June 2021 +KernelVersion: v5.14 +Contact: Alex Elder +Description: + The .../XXXXXXX.ipa/feature/ directory contains a set of + attributes describing features implemented by the IPA + hardware. + +What: .../XXXXXXX.ipa/feature/rx_offload +Date: June 2021 +KernelVersion: v5.14 +Contact: Alex Elder +Description: + The .../XXXXXXX.ipa/feature/rx_offload file contains a + string indicating the type of receive checksum offload + that is supported by the hardware. The possible values + are "MAPv4" or "MAPv5". + +What: .../XXXXXXX.ipa/feature/tx_offload +Date: June 2021 +KernelVersion: v5.14 +Contact: Alex Elder +Description: + The .../XXXXXXX.ipa/feature/tx_offload file contains a + string indicating the type of transmit checksum offload + that is supported by the hardware. The possible values + are "MAPv4" or "MAPv5". + +What: .../XXXXXXX.ipa/modem/ +Date: June 2021 +KernelVersion: v5.14 +Contact: Alex Elder +Description: + The .../XXXXXXX.ipa/modem/ directory contains a set of + attributes describing properties of the modem execution + environment reachable by the IPA hardware. + +What: .../XXXXXXX.ipa/modem/rx_endpoint_id +Date: June 2021 +KernelVersion: v5.14 +Contact: Alex Elder +Description: + The .../XXXXXXX.ipa/feature/rx_endpoint_id file contains + the AP endpoint ID that receives packets originating from + the modem execution environment. The "rx" is from the + perspective of the AP; this endpoint is considered an "IPA + producer". An endpoint ID is a small unsigned integer. + +What: .../XXXXXXX.ipa/modem/tx_endpoint_id +Date: June 2021 +KernelVersion: v5.14 +Contact: Alex Elder +Description: + The .../XXXXXXX.ipa/feature/tx_endpoint_id file contains + the AP endpoint ID used to transmit packets destined for + the modem execution environment. The "tx" is from the + perspective of the AP; this endpoint is considered an "IPA + consumer". An endpoint ID is a small unsigned integer. diff --git a/drivers/net/ipa/Makefile b/drivers/net/ipa/Makefile index 1efe1a88104b..bd34fce8f6e6 100644 --- a/drivers/net/ipa/Makefile +++ b/drivers/net/ipa/Makefile @@ -7,7 +7,8 @@ ipa-y := ipa_main.o ipa_clock.o ipa_reg.o ipa_mem.o \ ipa_table.o ipa_interrupt.o gsi.o gsi_trans.o \ ipa_gsi.o ipa_smp2p.o ipa_uc.o \ ipa_endpoint.o ipa_cmd.o ipa_modem.o \ - ipa_resource.o ipa_qmi.o ipa_qmi_msg.o + ipa_resource.o ipa_qmi.o ipa_qmi_msg.o \ + ipa_sysfs.o ipa-y += ipa_data-v3.5.1.o ipa_data-v4.2.o \ ipa_data-v4.5.o ipa_data-v4.9.o \ diff --git a/drivers/net/ipa/ipa_main.c b/drivers/net/ipa/ipa_main.c index cbd39e4667a3..2243e3e5b7ea 100644 --- a/drivers/net/ipa/ipa_main.c +++ b/drivers/net/ipa/ipa_main.c @@ -31,6 +31,7 @@ #include "ipa_uc.h" #include "ipa_interrupt.h" #include "gsi_trans.h" +#include "ipa_sysfs.h" /** * DOC: The IP Accelerator @@ -906,6 +907,13 @@ static const struct dev_pm_ops ipa_pm_ops = { .resume = ipa_resume, }; +static const struct attribute_group *ipa_attribute_groups[] = { + &ipa_attribute_group, + &ipa_feature_attribute_group, + &ipa_modem_attribute_group, + NULL, +}; + static struct platform_driver ipa_driver = { .probe = ipa_probe, .remove = ipa_remove, @@ -914,6 +922,7 @@ static struct platform_driver ipa_driver = { .name = "ipa", .pm = &ipa_pm_ops, .of_match_table = ipa_match, + .dev_groups = ipa_attribute_groups, }, }; diff --git a/drivers/net/ipa/ipa_sysfs.c b/drivers/net/ipa/ipa_sysfs.c new file mode 100644 index 000000000000..ff61dbdd70d8 --- /dev/null +++ b/drivers/net/ipa/ipa_sysfs.c @@ -0,0 +1,136 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* Copyright (C) 2021 Linaro Ltd. */ + +#include +#include +#include +#include + +#include "ipa.h" +#include "ipa_version.h" +#include "ipa_sysfs.h" + +static const char *ipa_version_string(struct ipa *ipa) +{ + switch (ipa->version) { + case IPA_VERSION_3_0: + return "3.0"; + case IPA_VERSION_3_1: + return "3.1"; + case IPA_VERSION_3_5: + return "3.5"; + case IPA_VERSION_3_5_1: + return "3.5.1"; + case IPA_VERSION_4_0: + return "4.0"; + case IPA_VERSION_4_1: + return "4.1"; + case IPA_VERSION_4_2: + return "4.2"; + case IPA_VERSION_4_5: + return "4.5"; + case IPA_VERSION_4_7: + return "4.7"; + case IPA_VERSION_4_9: + return "4.9"; + case IPA_VERSION_4_11: + return "4.11"; + default: + return "0.0"; /* Won't happen (checked at probe time) */ + } +} + +static ssize_t +version_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct ipa *ipa = dev_get_drvdata(dev); + + return scnprintf(buf, PAGE_SIZE, "%s\n", ipa_version_string(ipa)); +} + +static DEVICE_ATTR_RO(version); + +static struct attribute *ipa_attrs[] = { + &dev_attr_version.attr, + NULL +}; + +const struct attribute_group ipa_attribute_group = { + .attrs = ipa_attrs, +}; + +static const char *ipa_offload_string(struct ipa *ipa) +{ + return ipa->version < IPA_VERSION_4_5 ? "MAPv4" : "MAPv5"; +} + +static ssize_t rx_offload_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct ipa *ipa = dev_get_drvdata(dev); + + return scnprintf(buf, PAGE_SIZE, "%s\n", ipa_offload_string(ipa)); +} + +static DEVICE_ATTR_RO(rx_offload); + +static ssize_t tx_offload_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct ipa *ipa = dev_get_drvdata(dev); + + return scnprintf(buf, PAGE_SIZE, "%s\n", ipa_offload_string(ipa)); +} + +static DEVICE_ATTR_RO(tx_offload); + +static struct attribute *ipa_feature_attrs[] = { + &dev_attr_rx_offload.attr, + &dev_attr_tx_offload.attr, + NULL +}; + +const struct attribute_group ipa_feature_attribute_group = { + .name = "feature", + .attrs = ipa_feature_attrs, +}; + +static ssize_t +ipa_endpoint_id_show(struct ipa *ipa, char *buf, enum ipa_endpoint_name name) +{ + u32 endpoint_id = ipa->name_map[name]->endpoint_id; + + return scnprintf(buf, PAGE_SIZE, "%u\n", endpoint_id); +} + +static ssize_t rx_endpoint_id_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct ipa *ipa = dev_get_drvdata(dev); + + return ipa_endpoint_id_show(ipa, buf, IPA_ENDPOINT_AP_MODEM_RX); +} + +static DEVICE_ATTR_RO(rx_endpoint_id); + +static ssize_t tx_endpoint_id_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct ipa *ipa = dev_get_drvdata(dev); + + return ipa_endpoint_id_show(ipa, buf, IPA_ENDPOINT_AP_MODEM_TX); +} + +static DEVICE_ATTR_RO(tx_endpoint_id); + +static struct attribute *ipa_modem_attrs[] = { + &dev_attr_rx_endpoint_id.attr, + &dev_attr_tx_endpoint_id.attr, + NULL +}; + +const struct attribute_group ipa_modem_attribute_group = { + .name = "modem", + .attrs = ipa_modem_attrs, +}; diff --git a/drivers/net/ipa/ipa_sysfs.h b/drivers/net/ipa/ipa_sysfs.h new file mode 100644 index 000000000000..b34e5650bf8c --- /dev/null +++ b/drivers/net/ipa/ipa_sysfs.h @@ -0,0 +1,15 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +/* Copyright (c) 2012-2018, The Linux Foundation. All rights reserved. + * Copyright (C) 2019-2021 Linaro Ltd. + */ +#ifndef _IPA_SYSFS_H_ +#define _IPA_SYSFS_H_ + +struct attribute_group; + +extern const struct attribute_group ipa_attribute_group; +extern const struct attribute_group ipa_feature_attribute_group; +extern const struct attribute_group ipa_modem_attribute_group; + +#endif /* _IPA_SYSFS_H_ */ diff --git a/drivers/net/ipa/ipa_version.h b/drivers/net/ipa/ipa_version.h index ee2b3d02f3cd..6c16c895d842 100644 --- a/drivers/net/ipa/ipa_version.h +++ b/drivers/net/ipa/ipa_version.h @@ -21,6 +21,8 @@ * @IPA_VERSION_4_11: IPA version 4.11/GSI version 2.11 (2.1.1) * * Defines the version of IPA (and GSI) hardware present on the platform. + * Please update ipa_version_valid() and ipa_version_string() whenever a + * new version is added. */ enum ipa_version { IPA_VERSION_3_0, -- cgit v1.2.3 From 858252c9c3463abc3f7b13e42aae3b8845f0479d Mon Sep 17 00:00:00 2001 From: Steen Hegelund Date: Fri, 11 Jun 2021 14:54:50 +0200 Subject: dt-bindings: net: Add 25G BASE-R phy interface Add 25gbase-r PHY interface mode. Signed-off-by: Steen Hegelund Signed-off-by: Bjarni Jonasson Reviewed-by: Andrew Lunn Reviewed-by: Russell King (Oracle) Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/ethernet-controller.yaml | 1 + 1 file changed, 1 insertion(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/ethernet-controller.yaml b/Documentation/devicetree/bindings/net/ethernet-controller.yaml index d97b561003ed..b0933a8c295a 100644 --- a/Documentation/devicetree/bindings/net/ethernet-controller.yaml +++ b/Documentation/devicetree/bindings/net/ethernet-controller.yaml @@ -98,6 +98,7 @@ properties: - 10gbase-kr - usxgmii - 10gbase-r + - 25gbase-r phy-mode: $ref: "#/properties/phy-connection-type" -- cgit v1.2.3 From a56c286865692ac12291afe4c66198915c6b08f9 Mon Sep 17 00:00:00 2001 From: Steen Hegelund Date: Fri, 11 Jun 2021 14:54:51 +0200 Subject: net: phy: Add 25G BASE-R interface mode Add 25gbase-r phy interface mode Signed-off-by: Steen Hegelund Signed-off-by: Bjarni Jonasson Reviewed-by: Andrew Lunn Reviewed-by: Russell King (Oracle) Signed-off-by: David S. Miller --- Documentation/networking/phy.rst | 6 ++++++ include/linux/phy.h | 4 ++++ 2 files changed, 10 insertions(+) (limited to 'Documentation') diff --git a/Documentation/networking/phy.rst b/Documentation/networking/phy.rst index 3f05d50ecd6e..571ba08386e7 100644 --- a/Documentation/networking/phy.rst +++ b/Documentation/networking/phy.rst @@ -292,6 +292,12 @@ Some of the interface modes are described below: Note: due to legacy usage, some 10GBASE-R usage incorrectly makes use of this definition. +``PHY_INTERFACE_MODE_25GBASER`` + This is the IEEE 802.3 PCS Clause 107 defined 25GBASE-R protocol. + The PCS is identical to 10GBASE-R, i.e. 64B/66B encoded + running 2.5 as fast, giving a fixed bit rate of 25.78125 Gbaud. + Please refer to the IEEE standard for further information. + ``PHY_INTERFACE_MODE_100BASEX`` This defines IEEE 802.3 Clause 24. The link operates at a fixed data rate of 125Mpbs using a 4B/5B encoding scheme, resulting in an underlying diff --git a/include/linux/phy.h b/include/linux/phy.h index b60694734b07..3b80dc3ed68b 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -112,6 +112,7 @@ extern const int phy_10gbit_features_array[1]; * @PHY_INTERFACE_MODE_RXAUI: Reduced XAUI * @PHY_INTERFACE_MODE_XAUI: 10 Gigabit Attachment Unit Interface * @PHY_INTERFACE_MODE_10GBASER: 10G BaseR + * @PHY_INTERFACE_MODE_25GBASER: 25G BaseR * @PHY_INTERFACE_MODE_USXGMII: Universal Serial 10GE MII * @PHY_INTERFACE_MODE_10GKR: 10GBASE-KR - with Clause 73 AN * @PHY_INTERFACE_MODE_MAX: Book keeping @@ -147,6 +148,7 @@ typedef enum { PHY_INTERFACE_MODE_XAUI, /* 10GBASE-R, XFI, SFI - single lane 10G Serdes */ PHY_INTERFACE_MODE_10GBASER, + PHY_INTERFACE_MODE_25GBASER, PHY_INTERFACE_MODE_USXGMII, /* 10GBASE-KR - with Clause 73 AN */ PHY_INTERFACE_MODE_10GKR, @@ -223,6 +225,8 @@ static inline const char *phy_modes(phy_interface_t interface) return "xaui"; case PHY_INTERFACE_MODE_10GBASER: return "10gbase-r"; + case PHY_INTERFACE_MODE_25GBASER: + return "25gbase-r"; case PHY_INTERFACE_MODE_USXGMII: return "usxgmii"; case PHY_INTERFACE_MODE_10GKR: -- cgit v1.2.3 From f7af616c632ee2ac3af0876fe33bf9e0232e665a Mon Sep 17 00:00:00 2001 From: M Chetan Kumar Date: Sun, 13 Jun 2021 18:20:23 +0530 Subject: net: iosm: infrastructure 1) Kconfig & Makefile changes for IOSM Driver compilation. 2) Add IOSM Driver documentation. 3) Modified MAINTAINER file for IOSM Driver addition. Signed-off-by: M Chetan Kumar Signed-off-by: David S. Miller --- Documentation/networking/device_drivers/index.rst | 1 + .../networking/device_drivers/wwan/index.rst | 18 ++++ .../networking/device_drivers/wwan/iosm.rst | 96 ++++++++++++++++++++++ MAINTAINERS | 7 ++ drivers/net/wwan/Kconfig | 12 +++ drivers/net/wwan/Makefile | 1 + drivers/net/wwan/iosm/Makefile | 26 ++++++ 7 files changed, 161 insertions(+) create mode 100644 Documentation/networking/device_drivers/wwan/index.rst create mode 100644 Documentation/networking/device_drivers/wwan/iosm.rst create mode 100644 drivers/net/wwan/iosm/Makefile (limited to 'Documentation') diff --git a/Documentation/networking/device_drivers/index.rst b/Documentation/networking/device_drivers/index.rst index d8279de7bf25..3a5a1d46e77e 100644 --- a/Documentation/networking/device_drivers/index.rst +++ b/Documentation/networking/device_drivers/index.rst @@ -18,6 +18,7 @@ Contents: qlogic/index wan/index wifi/index + wwan/index .. only:: subproject and html diff --git a/Documentation/networking/device_drivers/wwan/index.rst b/Documentation/networking/device_drivers/wwan/index.rst new file mode 100644 index 000000000000..1cb8c7371401 --- /dev/null +++ b/Documentation/networking/device_drivers/wwan/index.rst @@ -0,0 +1,18 @@ +.. SPDX-License-Identifier: GPL-2.0-only + +WWAN Device Drivers +=================== + +Contents: + +.. toctree:: + :maxdepth: 2 + + iosm + +.. only:: subproject and html + + Indices + ======= + + * :ref:`genindex` diff --git a/Documentation/networking/device_drivers/wwan/iosm.rst b/Documentation/networking/device_drivers/wwan/iosm.rst new file mode 100644 index 000000000000..cd12f57d980a --- /dev/null +++ b/Documentation/networking/device_drivers/wwan/iosm.rst @@ -0,0 +1,96 @@ +.. SPDX-License-Identifier: GPL-2.0-only + +.. Copyright (C) 2020-21 Intel Corporation + +.. _iosm_driver_doc: + +=========================================== +IOSM Driver for Intel M.2 PCIe based Modems +=========================================== +The IOSM (IPC over Shared Memory) driver is a WWAN PCIe host driver developed +for linux or chrome platform for data exchange over PCIe interface between +Host platform & Intel M.2 Modem. The driver exposes interface conforming to the +MBIM protocol [1]. Any front end application ( eg: Modem Manager) could easily +manage the MBIM interface to enable data communication towards WWAN. + +Basic usage +=========== +MBIM functions are inactive when unmanaged. The IOSM driver only provides a +userspace interface MBIM "WWAN PORT" representing MBIM control channel and does +not play any role in managing the functionality. It is the job of a userspace +application to detect port enumeration and enable MBIM functionality. + +Examples of few such userspace application are: +- mbimcli (included with the libmbim [2] library), and +- Modem Manager [3] + +Management Applications to carry out below required actions for establishing +MBIM IP session: +- open the MBIM control channel +- configure network connection settings +- connect to network +- configure IP network interface + +Management application development +================================== +The driver and userspace interfaces are described below. The MBIM protocol is +described in [1] Mobile Broadband Interface Model v1.0 Errata-1. + +MBIM control channel userspace ABI +---------------------------------- + +/dev/wwan0mbim0 character device +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The driver exposes an MBIM interface to the MBIM function by implementing +MBIM WWAN Port. The userspace end of the control channel pipe is a +/dev/wwan0mbim0 character device. Application shall use this interface for +MBIM protocol communication. + +Fragmentation +~~~~~~~~~~~~~ +The userspace application is responsible for all control message fragmentation +and defragmentation as per MBIM specification. + +/dev/wwan0mbim0 write() +~~~~~~~~~~~~~~~~~~~~~ +The MBIM control messages from the management application must not exceed the +negotiated control message size. + +/dev/wwan0mbim0 read() +~~~~~~~~~~~~~~~~~~~~ +The management application must accept control messages of up the negotiated +control message size. + +MBIM data channel userspace ABI +------------------------------- + +wwan0-X network device +~~~~~~~~~~~~~~~~~~~~ +The IOSM driver exposes IP link interface "wwan0-X" of type "wwan" for IP +traffic. Iproute network utility is used for creating "wwan0-X" network +interface and for associating it with MBIM IP session. The Driver supports +upto 8 IP sessions for simultaneous IP communication. + +The userspace management application is responsible for creating new IP link +prior to establishing MBIM IP session where the SessionId is greater than 0. + +For example, creating new IP link for a MBIM IP session with SessionId 1: + + ip link add dev wwan0-1 parentdev-name wwan0 type wwan linkid 1 + +The driver will automatically map the "wwan0-1" network device to MBIM IP +session 1. + +References +========== +[1] "MBIM (Mobile Broadband Interface Model) Errata-1" + - https://www.usb.org/document-library/ + +[2] libmbim - "a glib-based library for talking to WWAN modems and + devices which speak the Mobile Interface Broadband Model (MBIM) + protocol" + - http://www.freedesktop.org/wiki/Software/libmbim/ + +[3] Modem Manager - "a DBus-activated daemon which controls mobile + broadband (2G/3G/4G) devices and connections" + - http://www.freedesktop.org/wiki/Software/ModemManager/ diff --git a/MAINTAINERS b/MAINTAINERS index 349a87b42d3c..183cc61e2dc0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9453,6 +9453,13 @@ L: Dell.Client.Kernel@dell.com S: Maintained F: drivers/platform/x86/intel-wmi-thunderbolt.c +INTEL WWAN IOSM DRIVER +M: M Chetan Kumar +M: Intel Corporation +L: netdev@vger.kernel.org +S: Maintained +F: drivers/net/wwan/iosm/ + INTEL(R) TRACE HUB M: Alexander Shishkin S: Supported diff --git a/drivers/net/wwan/Kconfig b/drivers/net/wwan/Kconfig index ec0b194a373c..13613a4f53d8 100644 --- a/drivers/net/wwan/Kconfig +++ b/drivers/net/wwan/Kconfig @@ -44,4 +44,16 @@ config MHI_WWAN_CTRL To compile this driver as a module, choose M here: the module will be called mhi_wwan_ctrl. +config IOSM + tristate "IOSM Driver for Intel M.2 WWAN Device" + select WWAN_CORE + depends on INTEL_IOMMU + help + This driver enables Intel M.2 WWAN Device communication. + + If you have one of those Intel M.2 WWAN Modules and wish to use it in + Linux say Y/M here. + + If unsure, say N. + endif # WWAN diff --git a/drivers/net/wwan/Makefile b/drivers/net/wwan/Makefile index f33f77ca1021..3e565d3f984f 100644 --- a/drivers/net/wwan/Makefile +++ b/drivers/net/wwan/Makefile @@ -9,3 +9,4 @@ wwan-objs += wwan_core.o obj-$(CONFIG_WWAN_HWSIM) += wwan_hwsim.o obj-$(CONFIG_MHI_WWAN_CTRL) += mhi_wwan_ctrl.o +obj-$(CONFIG_IOSM) += iosm/ diff --git a/drivers/net/wwan/iosm/Makefile b/drivers/net/wwan/iosm/Makefile new file mode 100644 index 000000000000..cdeeb9357af6 --- /dev/null +++ b/drivers/net/wwan/iosm/Makefile @@ -0,0 +1,26 @@ +# SPDX-License-Identifier: (GPL-2.0-only) +# +# Copyright (C) 2020-21 Intel Corporation. +# + +iosm-y = \ + iosm_ipc_task_queue.o \ + iosm_ipc_imem.o \ + iosm_ipc_imem_ops.o \ + iosm_ipc_mmio.o \ + iosm_ipc_port.o \ + iosm_ipc_wwan.o \ + iosm_ipc_uevent.o \ + iosm_ipc_pm.o \ + iosm_ipc_pcie.o \ + iosm_ipc_irq.o \ + iosm_ipc_chnl_cfg.o \ + iosm_ipc_protocol.o \ + iosm_ipc_protocol_ops.o \ + iosm_ipc_mux.o \ + iosm_ipc_mux_codec.o + +obj-$(CONFIG_IOSM) := iosm.o + +# compilation flags +ccflags-y += -DDEBUG -- cgit v1.2.3 From 66826c43e63d5c5e8307bd36862d6334db9d98b7 Mon Sep 17 00:00:00 2001 From: Oleksandr Mazur Date: Mon, 14 Jun 2021 16:01:18 +0300 Subject: documentation: networking: devlink: add prestera switched driver Documentation Add documentation for the devlink feature prestera switchdev driver supports: add description for the support of the driver-specific devlink traps (include both traps with action TRAP and action DROP); Signed-off-by: Oleksandr Mazur Signed-off-by: David S. Miller --- Documentation/networking/devlink/prestera.rst | 141 ++++++++++++++++++++++++++ 1 file changed, 141 insertions(+) create mode 100644 Documentation/networking/devlink/prestera.rst (limited to 'Documentation') diff --git a/Documentation/networking/devlink/prestera.rst b/Documentation/networking/devlink/prestera.rst new file mode 100644 index 000000000000..e8b52ffd4707 --- /dev/null +++ b/Documentation/networking/devlink/prestera.rst @@ -0,0 +1,141 @@ +.. SPDX-License-Identifier: GPL-2.0 + +===================== +prestera devlink support +===================== + +This document describes the devlink features implemented by the ``prestera`` +device driver. + +Driver-specific Traps +===================== + +.. list-table:: List of Driver-specific Traps Registered by ``prestera`` + :widths: 5 5 90 + + * - Name + - Type + - Description +.. list-table:: List of Driver-specific Traps Registered by ``prestera`` + :widths: 5 5 90 + + * - Name + - Type + - Description + * - ``arp_bc`` + - ``trap`` + - Traps ARP broadcast packets (both requests/responses) + * - ``is_is`` + - ``trap`` + - Traps IS-IS packets + * - ``ospf`` + - ``trap`` + - Traps OSPF packets + * - ``ip_bc_mac`` + - ``trap`` + - Traps IPv4 packets with broadcast DA Mac address + * - ``stp`` + - ``trap`` + - Traps STP BPDU + * - ``lacp`` + - ``trap`` + - Traps LACP packets + * - ``lldp`` + - ``trap`` + - Traps LLDP packets + * - ``router_mc`` + - ``trap`` + - Traps multicast packets + * - ``vrrp`` + - ``trap`` + - Traps VRRP packets + * - ``dhcp`` + - ``trap`` + - Traps DHCP packets + * - ``mtu_error`` + - ``trap`` + - Traps (exception) packets that exceeded port's MTU + * - ``mac_to_me`` + - ``trap`` + - Traps packets with switch-port's DA Mac address + * - ``ttl_error`` + - ``trap`` + - Traps (exception) IPv4 packets whose TTL exceeded + * - ``ipv4_options`` + - ``trap`` + - Traps (exception) packets due to the malformed IPV4 header options + * - ``ip_default_route`` + - ``trap`` + - Traps packets that have no specific IP interface (IP to me) and no forwarding prefix + * - ``local_route`` + - ``trap`` + - Traps packets that have been send to one of switch IP interfaces addresses + * - ``ipv4_icmp_redirect`` + - ``trap`` + - Traps (exception) IPV4 ICMP redirect packets + * - ``arp_response`` + - ``trap`` + - Traps ARP replies packets that have switch-port's DA Mac address + * - ``acl_code_0`` + - ``trap`` + - Traps packets that have ACL priority set to 0 (tc pref 0) + * - ``acl_code_1`` + - ``trap`` + - Traps packets that have ACL priority set to 1 (tc pref 1) + * - ``acl_code_2`` + - ``trap`` + - Traps packets that have ACL priority set to 2 (tc pref 2) + * - ``acl_code_3`` + - ``trap`` + - Traps packets that have ACL priority set to 3 (tc pref 3) + * - ``acl_code_4`` + - ``trap`` + - Traps packets that have ACL priority set to 4 (tc pref 4) + * - ``acl_code_5`` + - ``trap`` + - Traps packets that have ACL priority set to 5 (tc pref 5) + * - ``acl_code_6`` + - ``trap`` + - Traps packets that have ACL priority set to 6 (tc pref 6) + * - ``acl_code_7`` + - ``trap`` + - Traps packets that have ACL priority set to 7 (tc pref 7) + * - ``ipv4_bgp`` + - ``trap`` + - Traps IPv4 BGP packets + * - ``ssh`` + - ``trap`` + - Traps SSH packets + * - ``telnet`` + - ``trap`` + - Traps Telnet packets + * - ``icmp`` + - ``trap`` + - Traps ICMP packets + * - ``rxdma_drop`` + - ``drop`` + - Drops packets (RxDMA) due to the lack of ingress buffers etc. + * - ``port_no_vlan`` + - ``drop`` + - Drops packets due to faulty-configured network or due to internal bug (config issue). + * - ``local_port`` + - ``drop`` + - Drops packets whose decision (FDB entry) is to bridge packet back to the incoming port/trunk. + * - ``invalid_sa`` + - ``drop`` + - Drops packets with multicast source MAC address. + * - ``illegal_ip_addr`` + - ``drop`` + - Drops packets with illegal SIP/DIP multicast/unicast addresses. + * - ``illegal_ipv4_hdr`` + - ``drop`` + - Drops packets with illegal IPV4 header. + * - ``ip_uc_dip_da_mismatch`` + - ``drop`` + - Drops packets with destination MAC being unicast, but destination IP address being multicast. + * - ``ip_sip_is_zero`` + - ``drop`` + - Drops packets with zero (0) IPV4 source address. + * - ``met_red`` + - ``drop`` + - Drops non-conforming packets (dropped by Ingress policer, metering drop), e.g. packet rate exceeded configured bandwith. -- cgit v1.2.3 From 3b8401066e5a8ee465891cc8bad614c797701348 Mon Sep 17 00:00:00 2001 From: "周琰杰 (Zhou Yanjie)" Date: Tue, 15 Jun 2021 01:15:36 +0800 Subject: dt-bindings: dwmac: Add bindings for new Ingenic SoCs. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add the dwmac bindings for the JZ4775 SoC, the X1000 SoC, the X1600 SoC, the X1830 SoC and the X2000 SoC from Ingenic. Signed-off-by: 周琰杰 (Zhou Yanjie) Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- .../devicetree/bindings/net/ingenic,mac.yaml | 76 ++++++++++++++++++++++ .../devicetree/bindings/net/snps,dwmac.yaml | 15 +++++ 2 files changed, 91 insertions(+) create mode 100644 Documentation/devicetree/bindings/net/ingenic,mac.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/ingenic,mac.yaml b/Documentation/devicetree/bindings/net/ingenic,mac.yaml new file mode 100644 index 000000000000..5e93d4f9a080 --- /dev/null +++ b/Documentation/devicetree/bindings/net/ingenic,mac.yaml @@ -0,0 +1,76 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/ingenic,mac.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Bindings for MAC in Ingenic SoCs + +maintainers: + - 周琰杰 (Zhou Yanjie) + +description: + The Ethernet Media Access Controller in Ingenic SoCs. + +properties: + compatible: + enum: + - ingenic,jz4775-mac + - ingenic,x1000-mac + - ingenic,x1600-mac + - ingenic,x1830-mac + - ingenic,x2000-mac + + reg: + maxItems: 1 + + interrupts: + maxItems: 1 + + interrupt-names: + const: macirq + + clocks: + maxItems: 1 + + clock-names: + const: stmmaceth + + mode-reg: + description: An extra syscon register that control ethernet interface and timing delay + + rx-clk-delay-ps: + description: RGMII receive clock delay defined in pico seconds + + tx-clk-delay-ps: + description: RGMII transmit clock delay defined in pico seconds + +required: + - compatible + - reg + - interrupts + - interrupt-names + - clocks + - clock-names + - mode-reg + +additionalProperties: false + +examples: + - | + #include + + mac: ethernet@134b0000 { + compatible = "ingenic,x1000-mac", "snps,dwmac"; + reg = <0x134b0000 0x2000>; + + interrupt-parent = <&intc>; + interrupts = <55>; + interrupt-names = "macirq"; + + clocks = <&cgu X1000_CLK_MAC>; + clock-names = "stmmaceth"; + + mode-reg = <&mac_phy_ctrl>; + }; +... diff --git a/Documentation/devicetree/bindings/net/snps,dwmac.yaml b/Documentation/devicetree/bindings/net/snps,dwmac.yaml index 2edd8bea993e..9c0ce92e9212 100644 --- a/Documentation/devicetree/bindings/net/snps,dwmac.yaml +++ b/Documentation/devicetree/bindings/net/snps,dwmac.yaml @@ -56,6 +56,11 @@ properties: - amlogic,meson8m2-dwmac - amlogic,meson-gxbb-dwmac - amlogic,meson-axg-dwmac + - ingenic,jz4775-mac + - ingenic,x1000-mac + - ingenic,x1600-mac + - ingenic,x1830-mac + - ingenic,x2000-mac - rockchip,px30-gmac - rockchip,rk3128-gmac - rockchip,rk3228-gmac @@ -310,6 +315,11 @@ allOf: - allwinner,sun8i-r40-emac - allwinner,sun8i-v3s-emac - allwinner,sun50i-a64-emac + - ingenic,jz4775-mac + - ingenic,x1000-mac + - ingenic,x1600-mac + - ingenic,x1830-mac + - ingenic,x2000-mac - snps,dwxgmac - snps,dwxgmac-2.10 - st,spear600-gmac @@ -353,6 +363,11 @@ allOf: - allwinner,sun8i-r40-emac - allwinner,sun8i-v3s-emac - allwinner,sun50i-a64-emac + - ingenic,jz4775-mac + - ingenic,x1000-mac + - ingenic,x1600-mac + - ingenic,x1830-mac + - ingenic,x2000-mac - snps,dwmac-4.00 - snps,dwmac-4.10a - snps,dwmac-4.20a -- cgit v1.2.3 From f9ac779f881c2ec3d1cdcd7fa9d4f9442bf60e80 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Sat, 12 Jun 2021 21:32:14 +0900 Subject: net: Introduce net.ipv4.tcp_migrate_req. This commit adds a new sysctl option: net.ipv4.tcp_migrate_req. If this option is enabled or eBPF program is attached, we will be able to migrate child sockets from a listener to another in the same reuseport group after close() or shutdown() syscalls. Signed-off-by: Kuniyuki Iwashima Signed-off-by: Daniel Borkmann Reviewed-by: Benjamin Herrenschmidt Reviewed-by: Eric Dumazet Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20210612123224.12525-2-kuniyu@amazon.co.jp --- Documentation/networking/ip-sysctl.rst | 25 +++++++++++++++++++++++++ include/net/netns/ipv4.h | 1 + net/ipv4/sysctl_net_ipv4.c | 9 +++++++++ 3 files changed, 35 insertions(+) (limited to 'Documentation') diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index a5c250044500..b0436d3a4f11 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -761,6 +761,31 @@ tcp_syncookies - INTEGER network connections you can set this knob to 2 to enable unconditionally generation of syncookies. +tcp_migrate_req - BOOLEAN + The incoming connection is tied to a specific listening socket when + the initial SYN packet is received during the three-way handshake. + When a listener is closed, in-flight request sockets during the + handshake and established sockets in the accept queue are aborted. + + If the listener has SO_REUSEPORT enabled, other listeners on the + same port should have been able to accept such connections. This + option makes it possible to migrate such child sockets to another + listener after close() or shutdown(). + + The BPF_SK_REUSEPORT_SELECT_OR_MIGRATE type of eBPF program should + usually be used to define the policy to pick an alive listener. + Otherwise, the kernel will randomly pick an alive listener only if + this option is enabled. + + Note that migration between listeners with different settings may + crash applications. Let's say migration happens from listener A to + B, and only B has TCP_SAVE_SYN enabled. B cannot read SYN data from + the requests migrated from A. To avoid such a situation, cancel + migration by returning SK_DROP in the type of eBPF program, or + disable this option. + + Default: 0 + tcp_fastopen - INTEGER Enable TCP Fast Open (RFC7413) to send and accept data in the opening SYN packet. diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 746c80cd4257..b8620519eace 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -126,6 +126,7 @@ struct netns_ipv4 { u8 sysctl_tcp_syn_retries; u8 sysctl_tcp_synack_retries; u8 sysctl_tcp_syncookies; + u8 sysctl_tcp_migrate_req; int sysctl_tcp_reordering; u8 sysctl_tcp_retries1; u8 sysctl_tcp_retries2; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 4fa77f182dcb..6f1e64d49232 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -960,6 +960,15 @@ static struct ctl_table ipv4_net_table[] = { .proc_handler = proc_dou8vec_minmax, }, #endif + { + .procname = "tcp_migrate_req", + .data = &init_net.ipv4.sysctl_tcp_migrate_req, + .maxlen = sizeof(u8), + .mode = 0644, + .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE + }, { .procname = "tcp_reordering", .data = &init_net.ipv4.sysctl_tcp_reordering, -- cgit v1.2.3 From 925a56b2c085a7c6f5c741c8516e21c3aa6134b4 Mon Sep 17 00:00:00 2001 From: M Chetan Kumar Date: Tue, 15 Jun 2021 18:38:22 +0530 Subject: net: wwan: iosm: Fix htmldocs warnings Fixes .rst file warnings seen on linux-next build. Fixes: f7af616c632e ("net: iosm: infrastructure") Reported-by: Stephen Rothwell Signed-off-by: M Chetan Kumar Signed-off-by: David S. Miller --- Documentation/networking/device_drivers/wwan/iosm.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'Documentation') diff --git a/Documentation/networking/device_drivers/wwan/iosm.rst b/Documentation/networking/device_drivers/wwan/iosm.rst index cd12f57d980a..aceb0223eb46 100644 --- a/Documentation/networking/device_drivers/wwan/iosm.rst +++ b/Documentation/networking/device_drivers/wwan/iosm.rst @@ -40,7 +40,7 @@ MBIM control channel userspace ABI ---------------------------------- /dev/wwan0mbim0 character device -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ The driver exposes an MBIM interface to the MBIM function by implementing MBIM WWAN Port. The userspace end of the control channel pipe is a /dev/wwan0mbim0 character device. Application shall use this interface for @@ -52,12 +52,12 @@ The userspace application is responsible for all control message fragmentation and defragmentation as per MBIM specification. /dev/wwan0mbim0 write() -~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~ The MBIM control messages from the management application must not exceed the negotiated control message size. /dev/wwan0mbim0 read() -~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~ The management application must accept control messages of up the negotiated control message size. @@ -65,7 +65,7 @@ MBIM data channel userspace ABI ------------------------------- wwan0-X network device -~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~ The IOSM driver exposes IP link interface "wwan0-X" of type "wwan" for IP traffic. Iproute network utility is used for creating "wwan0-X" network interface and for associating it with MBIM IP session. The Driver supports -- cgit v1.2.3 From 01f1b6ed2b846ae124bb54c636ddadb4dd1813a3 Mon Sep 17 00:00:00 2001 From: Oleksandr Mazur Date: Wed, 16 Jun 2021 20:46:07 +0300 Subject: documentation: networking: devlink: fix prestera.rst formatting that causes build warnings Fixes: 66826c43e63d ("documentation: networking: devlink: add prestera switched driver Documentation") Signed-off-by: Oleksandr Mazur Signed-off-by: David S. Miller --- Documentation/networking/devlink/devlink-trap.rst | 1 + Documentation/networking/devlink/index.rst | 1 + Documentation/networking/devlink/prestera.rst | 4 ++-- 3 files changed, 4 insertions(+), 2 deletions(-) (limited to 'Documentation') diff --git a/Documentation/networking/devlink/devlink-trap.rst b/Documentation/networking/devlink/devlink-trap.rst index 935b6397e8cf..ef8928c355df 100644 --- a/Documentation/networking/devlink/devlink-trap.rst +++ b/Documentation/networking/devlink/devlink-trap.rst @@ -497,6 +497,7 @@ drivers: * :doc:`netdevsim` * :doc:`mlxsw` + * :doc:`prestera` .. _Generic-Packet-Trap-Groups: diff --git a/Documentation/networking/devlink/index.rst b/Documentation/networking/devlink/index.rst index 8428a1220723..b3b9e0692088 100644 --- a/Documentation/networking/devlink/index.rst +++ b/Documentation/networking/devlink/index.rst @@ -46,3 +46,4 @@ parameters, info versions, and other features it supports. qed ti-cpsw-switch am65-nuss-cpsw-switch + prestera diff --git a/Documentation/networking/devlink/prestera.rst b/Documentation/networking/devlink/prestera.rst index e8b52ffd4707..49409d1d3081 100644 --- a/Documentation/networking/devlink/prestera.rst +++ b/Documentation/networking/devlink/prestera.rst @@ -1,8 +1,8 @@ .. SPDX-License-Identifier: GPL-2.0 -===================== +======================== prestera devlink support -===================== +======================== This document describes the devlink features implemented by the ``prestera`` device driver. -- cgit v1.2.3 From 79ab2b37034b7790bd598597faddf689f5b10676 Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Thu, 17 Jun 2021 18:55:51 +0300 Subject: Documentation: ACPI: DSD: include phy.rst in the toctree Include the new phy.rst into the index of the ACPI support documentation. Fixes: e71305acd81c ("Documentation: ACPI: DSD: Document MDIO PHY") Reported-by: Stephen Rothwell Signed-off-by: Ioana Ciornei Signed-off-by: David S. Miller --- Documentation/firmware-guide/acpi/index.rst | 1 + 1 file changed, 1 insertion(+) (limited to 'Documentation') diff --git a/Documentation/firmware-guide/acpi/index.rst b/Documentation/firmware-guide/acpi/index.rst index f72b5f1769fb..a99ee402b212 100644 --- a/Documentation/firmware-guide/acpi/index.rst +++ b/Documentation/firmware-guide/acpi/index.rst @@ -11,6 +11,7 @@ ACPI Support dsd/graph dsd/data-node-references dsd/leds + dsd/phy enumeration osi method-customizing -- cgit v1.2.3 From 5a336f97f1f5011cdca5467ef96372fd6d2fd128 Mon Sep 17 00:00:00 2001 From: Ioana Ciornei Date: Thu, 17 Jun 2021 18:55:52 +0300 Subject: Documentation: ACPI: DSD: fix block code comments Use the '.. code-block:: none' to properly highlight the documented DSDT entries. This also fixes warnings in the documentation build process. Fixes: e71305acd81c ("Documentation: ACPI: DSD: Document MDIO PHY") Reported-by: Stephen Rothwell Signed-off-by: Ioana Ciornei Signed-off-by: David S. Miller --- Documentation/firmware-guide/acpi/dsd/phy.rst | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'Documentation') diff --git a/Documentation/firmware-guide/acpi/dsd/phy.rst b/Documentation/firmware-guide/acpi/dsd/phy.rst index 7d01ae8b3cc6..0d49bad2ea9c 100644 --- a/Documentation/firmware-guide/acpi/dsd/phy.rst +++ b/Documentation/firmware-guide/acpi/dsd/phy.rst @@ -27,7 +27,8 @@ network interfaces that have PHYs connected to MAC via MDIO bus. During the MDIO bus driver initialization, PHYs on this bus are probed using the _ADR object as shown below and are registered on the MDIO bus. -:: +.. code-block:: none + Scope(\_SB.MDI0) { Device(PHY1) { @@ -60,7 +61,9 @@ component (PHYs on the MDIO bus). a) Silicon Component This node describes the MDIO controller, MDI0 --------------------------------------------- -:: + +.. code-block:: none + Scope(_SB) { Device(MDI0) { @@ -80,7 +83,9 @@ This node describes the MDIO controller, MDI0 b) Platform Component The PHY1 and PHY2 nodes represent the PHYs connected to MDIO bus MDI0 --------------------------------------------------------------------- -:: + +.. code-block:: none + Scope(\_SB.MDI0) { Device(PHY1) { @@ -98,7 +103,9 @@ DSDT entries representing MAC nodes Below are the MAC nodes where PHY nodes are referenced. phy-mode and phy-handle are used as explained earlier. ------------------------------------------------------ -:: + +.. code-block:: none + Scope(\_SB.MCE0.PR17) { Name (_DSD, Package () { -- cgit v1.2.3 From fc3c82eebf8e2e193412612f509530b4ff5611bf Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Thu, 17 Jun 2021 16:46:19 -0700 Subject: mptcp: add a new sysctl checksum_enabled This patch added a new sysctl, named checksum_enabled, to control whether DSS checksum can be enabled. Acked-by: Paolo Abeni Co-developed-by: Matthieu Baerts Signed-off-by: Matthieu Baerts Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- Documentation/networking/mptcp-sysctl.rst | 8 ++++++++ net/mptcp/ctrl.c | 16 ++++++++++++++++ net/mptcp/protocol.h | 2 +- 3 files changed, 25 insertions(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/networking/mptcp-sysctl.rst b/Documentation/networking/mptcp-sysctl.rst index 3b352e5f6300..ee06fd782465 100644 --- a/Documentation/networking/mptcp-sysctl.rst +++ b/Documentation/networking/mptcp-sysctl.rst @@ -24,3 +24,11 @@ add_addr_timeout - INTEGER (seconds) sysctl. Default: 120 + +checksum_enabled - BOOLEAN + Control whether DSS checksum can be enabled. + + DSS checksum can be enabled if the value is nonzero. This is a + per-namespace sysctl. + + Default: 0 diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c index 1ec4d36a39f0..6c2639bb9c19 100644 --- a/net/mptcp/ctrl.c +++ b/net/mptcp/ctrl.c @@ -23,6 +23,7 @@ struct mptcp_pernet { u8 mptcp_enabled; unsigned int add_addr_timeout; + u8 checksum_enabled; }; static struct mptcp_pernet *mptcp_get_pernet(struct net *net) @@ -40,10 +41,16 @@ unsigned int mptcp_get_add_addr_timeout(struct net *net) return mptcp_get_pernet(net)->add_addr_timeout; } +int mptcp_is_checksum_enabled(struct net *net) +{ + return mptcp_get_pernet(net)->checksum_enabled; +} + static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet) { pernet->mptcp_enabled = 1; pernet->add_addr_timeout = TCP_RTO_MAX; + pernet->checksum_enabled = 0; } #ifdef CONFIG_SYSCTL @@ -65,6 +72,14 @@ static struct ctl_table mptcp_sysctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, + { + .procname = "checksum_enabled", + .maxlen = sizeof(u8), + .mode = 0644, + .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE + }, {} }; @@ -82,6 +97,7 @@ static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet) table[0].data = &pernet->mptcp_enabled; table[1].data = &pernet->add_addr_timeout; + table[2].data = &pernet->checksum_enabled; hdr = register_net_sysctl(net, MPTCP_SYSCTL_PATH, table); if (!hdr) diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 12637299b42e..16e50caf200e 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -542,7 +542,7 @@ static inline void mptcp_subflow_delegated_done(struct mptcp_subflow_context *su int mptcp_is_enabled(struct net *net); unsigned int mptcp_get_add_addr_timeout(struct net *net); -static inline int mptcp_is_checksum_enabled(struct net *net) { return false; } +int mptcp_is_checksum_enabled(struct net *net); void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow, struct mptcp_options_received *mp_opt); bool mptcp_subflow_data_available(struct sock *sk); -- cgit v1.2.3 From 68277749a0133fa6f9f5ec8576691e5fc9718610 Mon Sep 17 00:00:00 2001 From: Qing Zhang Date: Fri, 18 Jun 2021 10:53:37 +0800 Subject: dt-bindings: dwmac: Add bindings for new Loongson SoC and bridge chip Add the dwmac bindings for the Loongson-2K SoC and the LS7A bridge chip. Signed-off-by: Qing Zhang Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/snps,dwmac.yaml | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/snps,dwmac.yaml b/Documentation/devicetree/bindings/net/snps,dwmac.yaml index 9c0ce92e9212..56f2235f5fb5 100644 --- a/Documentation/devicetree/bindings/net/snps,dwmac.yaml +++ b/Documentation/devicetree/bindings/net/snps,dwmac.yaml @@ -51,11 +51,15 @@ properties: - allwinner,sun8i-r40-emac - allwinner,sun8i-v3s-emac - allwinner,sun50i-a64-emac + - loongson,ls2k-dwmac + - loongson,ls7a-dwmac - amlogic,meson6-dwmac - amlogic,meson8b-dwmac - amlogic,meson8m2-dwmac - amlogic,meson-gxbb-dwmac - amlogic,meson-axg-dwmac + - loongson,ls2k-dwmac + - loongson,ls7a-dwmac - ingenic,jz4775-mac - ingenic,x1000-mac - ingenic,x1600-mac @@ -363,6 +367,8 @@ allOf: - allwinner,sun8i-r40-emac - allwinner,sun8i-v3s-emac - allwinner,sun50i-a64-emac + - loongson,ls2k-dwmac + - loongson,ls7a-dwmac - ingenic,jz4775-mac - ingenic,x1000-mac - ingenic,x1600-mac -- cgit v1.2.3 From f42cfb469f9b4a1c002a03cce3d9329376800a6f Mon Sep 17 00:00:00 2001 From: Grant Seltzer Date: Fri, 18 Jun 2021 14:04:59 +0000 Subject: bpf: Add documentation for libbpf including API autogen This patch is meant to start the initiative to document libbpf. It includes .rst files which are text documentation describing building, API naming convention, as well as an index to generated API documentation. In this approach the generated API documentation is enabled by the kernels existing kernel documentation system which uses sphinx. The resulting docs would then be synced to kernel.org/doc You can test this by running `make htmldocs` and serving the html in Documentation/output. Since libbpf does not yet have comments in kernel doc format, see kernel.org/doc/html/latest/doc-guide/kernel-doc.html for an example so you can test this. The advantage of this approach is to use the existing sphinx infrastructure that the kernel has, and have libbpf docs in the same place as everything else. The current plan is to have the libbpf mirror sync the generated docs and version them based on the libbpf releases which are cut on github. This patch includes the addition of libbpf_api.rst which pulls comment documentation from header files in libbpf under tools/lib/bpf/. The comment docs would be of the standard kernel doc format. Signed-off-by: Grant Seltzer Signed-off-by: Andrii Nakryiko Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20210618140459.9887-2-grantseltzer@gmail.com --- Documentation/bpf/index.rst | 13 ++ Documentation/bpf/libbpf/libbpf.rst | 14 ++ Documentation/bpf/libbpf/libbpf_api.rst | 27 ++++ Documentation/bpf/libbpf/libbpf_build.rst | 37 +++++ .../bpf/libbpf/libbpf_naming_convention.rst | 162 ++++++++++++++++++++ tools/lib/bpf/README.rst | 168 --------------------- 6 files changed, 253 insertions(+), 168 deletions(-) create mode 100644 Documentation/bpf/libbpf/libbpf.rst create mode 100644 Documentation/bpf/libbpf/libbpf_api.rst create mode 100644 Documentation/bpf/libbpf/libbpf_build.rst create mode 100644 Documentation/bpf/libbpf/libbpf_naming_convention.rst delete mode 100644 tools/lib/bpf/README.rst (limited to 'Documentation') diff --git a/Documentation/bpf/index.rst b/Documentation/bpf/index.rst index 93e8cf12a6d4..baea6c2abba5 100644 --- a/Documentation/bpf/index.rst +++ b/Documentation/bpf/index.rst @@ -12,6 +12,19 @@ BPF instruction-set. The Cilium project also maintains a `BPF and XDP Reference Guide`_ that goes into great technical depth about the BPF Architecture. +libbpf +====== + +Libbpf is a userspace library for loading and interacting with bpf programs. + +.. toctree:: + :maxdepth: 1 + + libbpf/libbpf + libbpf/libbpf_api + libbpf/libbpf_build + libbpf/libbpf_naming_convention + BPF Type Format (BTF) ===================== diff --git a/Documentation/bpf/libbpf/libbpf.rst b/Documentation/bpf/libbpf/libbpf.rst new file mode 100644 index 000000000000..1b1e61d5ead1 --- /dev/null +++ b/Documentation/bpf/libbpf/libbpf.rst @@ -0,0 +1,14 @@ +.. SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) + +libbpf +====== + +This is documentation for libbpf, a userspace library for loading and +interacting with bpf programs. + +All general BPF questions, including kernel functionality, libbpf APIs and +their application, should be sent to bpf@vger.kernel.org mailing list. +You can `subscribe `_ to the +mailing list search its `archive `_. +Please search the archive before asking new questions. It very well might +be that this was already addressed or answered before. diff --git a/Documentation/bpf/libbpf/libbpf_api.rst b/Documentation/bpf/libbpf/libbpf_api.rst new file mode 100644 index 000000000000..f07eecd054da --- /dev/null +++ b/Documentation/bpf/libbpf/libbpf_api.rst @@ -0,0 +1,27 @@ +.. SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) + +API +=== + +This documentation is autogenerated from header files in libbpf, tools/lib/bpf + +.. kernel-doc:: tools/lib/bpf/libbpf.h + :internal: + +.. kernel-doc:: tools/lib/bpf/bpf.h + :internal: + +.. kernel-doc:: tools/lib/bpf/btf.h + :internal: + +.. kernel-doc:: tools/lib/bpf/xsk.h + :internal: + +.. kernel-doc:: tools/lib/bpf/bpf_tracing.h + :internal: + +.. kernel-doc:: tools/lib/bpf/bpf_core_read.h + :internal: + +.. kernel-doc:: tools/lib/bpf/bpf_endian.h + :internal: \ No newline at end of file diff --git a/Documentation/bpf/libbpf/libbpf_build.rst b/Documentation/bpf/libbpf/libbpf_build.rst new file mode 100644 index 000000000000..8e8c23e8093d --- /dev/null +++ b/Documentation/bpf/libbpf/libbpf_build.rst @@ -0,0 +1,37 @@ +.. SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) + +Building libbpf +=============== + +libelf and zlib are internal dependencies of libbpf and thus are required to link +against and must be installed on the system for applications to work. +pkg-config is used by default to find libelf, and the program called +can be overridden with PKG_CONFIG. + +If using pkg-config at build time is not desired, it can be disabled by +setting NO_PKG_CONFIG=1 when calling make. + +To build both static libbpf.a and shared libbpf.so: + +.. code-block:: bash + + $ cd src + $ make + +To build only static libbpf.a library in directory build/ and install them +together with libbpf headers in a staging directory root/: + +.. code-block:: bash + + $ cd src + $ mkdir build root + $ BUILD_STATIC_ONLY=y OBJDIR=build DESTDIR=root make install + +To build both static libbpf.a and shared libbpf.so against a custom libelf +dependency installed in /build/root/ and install them together with libbpf +headers in a build directory /build/root/: + +.. code-block:: bash + + $ cd src + $ PKG_CONFIG_PATH=/build/root/lib64/pkgconfig DESTDIR=/build/root make \ No newline at end of file diff --git a/Documentation/bpf/libbpf/libbpf_naming_convention.rst b/Documentation/bpf/libbpf/libbpf_naming_convention.rst new file mode 100644 index 000000000000..3de1d51e41da --- /dev/null +++ b/Documentation/bpf/libbpf/libbpf_naming_convention.rst @@ -0,0 +1,162 @@ +.. SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) + +API naming convention +===================== + +libbpf API provides access to a few logically separated groups of +functions and types. Every group has its own naming convention +described here. It's recommended to follow these conventions whenever a +new function or type is added to keep libbpf API clean and consistent. + +All types and functions provided by libbpf API should have one of the +following prefixes: ``bpf_``, ``btf_``, ``libbpf_``, ``xsk_``, +``btf_dump_``, ``ring_buffer_``, ``perf_buffer_``. + +System call wrappers +-------------------- + +System call wrappers are simple wrappers for commands supported by +sys_bpf system call. These wrappers should go to ``bpf.h`` header file +and map one to one to corresponding commands. + +For example ``bpf_map_lookup_elem`` wraps ``BPF_MAP_LOOKUP_ELEM`` +command of sys_bpf, ``bpf_prog_attach`` wraps ``BPF_PROG_ATTACH``, etc. + +Objects +------- + +Another class of types and functions provided by libbpf API is "objects" +and functions to work with them. Objects are high-level abstractions +such as BPF program or BPF map. They're represented by corresponding +structures such as ``struct bpf_object``, ``struct bpf_program``, +``struct bpf_map``, etc. + +Structures are forward declared and access to their fields should be +provided via corresponding getters and setters rather than directly. + +These objects are associated with corresponding parts of ELF object that +contains compiled BPF programs. + +For example ``struct bpf_object`` represents ELF object itself created +from an ELF file or from a buffer, ``struct bpf_program`` represents a +program in ELF object and ``struct bpf_map`` is a map. + +Functions that work with an object have names built from object name, +double underscore and part that describes function purpose. + +For example ``bpf_object__open`` consists of the name of corresponding +object, ``bpf_object``, double underscore and ``open`` that defines the +purpose of the function to open ELF file and create ``bpf_object`` from +it. + +All objects and corresponding functions other than BTF related should go +to ``libbpf.h``. BTF types and functions should go to ``btf.h``. + +Auxiliary functions +------------------- + +Auxiliary functions and types that don't fit well in any of categories +described above should have ``libbpf_`` prefix, e.g. +``libbpf_get_error`` or ``libbpf_prog_type_by_name``. + +AF_XDP functions +------------------- + +AF_XDP functions should have an ``xsk_`` prefix, e.g. +``xsk_umem__get_data`` or ``xsk_umem__create``. The interface consists +of both low-level ring access functions and high-level configuration +functions. These can be mixed and matched. Note that these functions +are not reentrant for performance reasons. + +ABI +========== + +libbpf can be both linked statically or used as DSO. To avoid possible +conflicts with other libraries an application is linked with, all +non-static libbpf symbols should have one of the prefixes mentioned in +API documentation above. See API naming convention to choose the right +name for a new symbol. + +Symbol visibility +----------------- + +libbpf follow the model when all global symbols have visibility "hidden" +by default and to make a symbol visible it has to be explicitly +attributed with ``LIBBPF_API`` macro. For example: + +.. code-block:: c + + LIBBPF_API int bpf_prog_get_fd_by_id(__u32 id); + +This prevents from accidentally exporting a symbol, that is not supposed +to be a part of ABI what, in turn, improves both libbpf developer- and +user-experiences. + +ABI versionning +--------------- + +To make future ABI extensions possible libbpf ABI is versioned. +Versioning is implemented by ``libbpf.map`` version script that is +passed to linker. + +Version name is ``LIBBPF_`` prefix + three-component numeric version, +starting from ``0.0.1``. + +Every time ABI is being changed, e.g. because a new symbol is added or +semantic of existing symbol is changed, ABI version should be bumped. +This bump in ABI version is at most once per kernel development cycle. + +For example, if current state of ``libbpf.map`` is: + +.. code-block:: c + + LIBBPF_0.0.1 { + global: + bpf_func_a; + bpf_func_b; + local: + \*; + }; + +, and a new symbol ``bpf_func_c`` is being introduced, then +``libbpf.map`` should be changed like this: + +.. code-block:: c + + LIBBPF_0.0.1 { + global: + bpf_func_a; + bpf_func_b; + local: + \*; + }; + LIBBPF_0.0.2 { + global: + bpf_func_c; + } LIBBPF_0.0.1; + +, where new version ``LIBBPF_0.0.2`` depends on the previous +``LIBBPF_0.0.1``. + +Format of version script and ways to handle ABI changes, including +incompatible ones, described in details in [1]. + +Stand-alone build +------------------- + +Under https://github.com/libbpf/libbpf there is a (semi-)automated +mirror of the mainline's version of libbpf for a stand-alone build. + +However, all changes to libbpf's code base must be upstreamed through +the mainline kernel tree. + +License +------------------- + +libbpf is dual-licensed under LGPL 2.1 and BSD 2-Clause. + +Links +------------------- + +[1] https://www.akkadia.org/drepper/dsohowto.pdf + (Chapter 3. Maintaining APIs and ABIs). diff --git a/tools/lib/bpf/README.rst b/tools/lib/bpf/README.rst deleted file mode 100644 index 8928f7787f2d..000000000000 --- a/tools/lib/bpf/README.rst +++ /dev/null @@ -1,168 +0,0 @@ -.. SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) - -libbpf API naming convention -============================ - -libbpf API provides access to a few logically separated groups of -functions and types. Every group has its own naming convention -described here. It's recommended to follow these conventions whenever a -new function or type is added to keep libbpf API clean and consistent. - -All types and functions provided by libbpf API should have one of the -following prefixes: ``bpf_``, ``btf_``, ``libbpf_``, ``xsk_``, -``perf_buffer_``. - -System call wrappers --------------------- - -System call wrappers are simple wrappers for commands supported by -sys_bpf system call. These wrappers should go to ``bpf.h`` header file -and map one-on-one to corresponding commands. - -For example ``bpf_map_lookup_elem`` wraps ``BPF_MAP_LOOKUP_ELEM`` -command of sys_bpf, ``bpf_prog_attach`` wraps ``BPF_PROG_ATTACH``, etc. - -Objects -------- - -Another class of types and functions provided by libbpf API is "objects" -and functions to work with them. Objects are high-level abstractions -such as BPF program or BPF map. They're represented by corresponding -structures such as ``struct bpf_object``, ``struct bpf_program``, -``struct bpf_map``, etc. - -Structures are forward declared and access to their fields should be -provided via corresponding getters and setters rather than directly. - -These objects are associated with corresponding parts of ELF object that -contains compiled BPF programs. - -For example ``struct bpf_object`` represents ELF object itself created -from an ELF file or from a buffer, ``struct bpf_program`` represents a -program in ELF object and ``struct bpf_map`` is a map. - -Functions that work with an object have names built from object name, -double underscore and part that describes function purpose. - -For example ``bpf_object__open`` consists of the name of corresponding -object, ``bpf_object``, double underscore and ``open`` that defines the -purpose of the function to open ELF file and create ``bpf_object`` from -it. - -Another example: ``bpf_program__load`` is named for corresponding -object, ``bpf_program``, that is separated from other part of the name -by double underscore. - -All objects and corresponding functions other than BTF related should go -to ``libbpf.h``. BTF types and functions should go to ``btf.h``. - -Auxiliary functions -------------------- - -Auxiliary functions and types that don't fit well in any of categories -described above should have ``libbpf_`` prefix, e.g. -``libbpf_get_error`` or ``libbpf_prog_type_by_name``. - -AF_XDP functions -------------------- - -AF_XDP functions should have an ``xsk_`` prefix, e.g. -``xsk_umem__get_data`` or ``xsk_umem__create``. The interface consists -of both low-level ring access functions and high-level configuration -functions. These can be mixed and matched. Note that these functions -are not reentrant for performance reasons. - -Please take a look at Documentation/networking/af_xdp.rst in the Linux -kernel source tree on how to use XDP sockets and for some common -mistakes in case you do not get any traffic up to user space. - -libbpf ABI -========== - -libbpf can be both linked statically or used as DSO. To avoid possible -conflicts with other libraries an application is linked with, all -non-static libbpf symbols should have one of the prefixes mentioned in -API documentation above. See API naming convention to choose the right -name for a new symbol. - -Symbol visibility ------------------ - -libbpf follow the model when all global symbols have visibility "hidden" -by default and to make a symbol visible it has to be explicitly -attributed with ``LIBBPF_API`` macro. For example: - -.. code-block:: c - - LIBBPF_API int bpf_prog_get_fd_by_id(__u32 id); - -This prevents from accidentally exporting a symbol, that is not supposed -to be a part of ABI what, in turn, improves both libbpf developer- and -user-experiences. - -ABI versionning ---------------- - -To make future ABI extensions possible libbpf ABI is versioned. -Versioning is implemented by ``libbpf.map`` version script that is -passed to linker. - -Version name is ``LIBBPF_`` prefix + three-component numeric version, -starting from ``0.0.1``. - -Every time ABI is being changed, e.g. because a new symbol is added or -semantic of existing symbol is changed, ABI version should be bumped. -This bump in ABI version is at most once per kernel development cycle. - -For example, if current state of ``libbpf.map`` is: - -.. code-block:: - LIBBPF_0.0.1 { - global: - bpf_func_a; - bpf_func_b; - local: - \*; - }; - -, and a new symbol ``bpf_func_c`` is being introduced, then -``libbpf.map`` should be changed like this: - -.. code-block:: - LIBBPF_0.0.1 { - global: - bpf_func_a; - bpf_func_b; - local: - \*; - }; - LIBBPF_0.0.2 { - global: - bpf_func_c; - } LIBBPF_0.0.1; - -, where new version ``LIBBPF_0.0.2`` depends on the previous -``LIBBPF_0.0.1``. - -Format of version script and ways to handle ABI changes, including -incompatible ones, described in details in [1]. - -Stand-alone build -================= - -Under https://github.com/libbpf/libbpf there is a (semi-)automated -mirror of the mainline's version of libbpf for a stand-alone build. - -However, all changes to libbpf's code base must be upstreamed through -the mainline kernel tree. - -License -======= - -libbpf is dual-licensed under LGPL 2.1 and BSD 2-Clause. - -Links -===== - -[1] https://www.akkadia.org/drepper/dsohowto.pdf - (Chapter 3. Maintaining APIs and ABIs). -- cgit v1.2.3 From 3078d964c0fe6cf8eba197c862d1011cb7c0e7b4 Mon Sep 17 00:00:00 2001 From: Oz Shlomo Date: Thu, 17 Jun 2021 09:50:06 +0300 Subject: docs: networking: Update connection tracking offload sysctl parameters Document the following connection offload configuration parameters: - nf_flowtable_tcp_timeout - nf_flowtable_tcp_pickup - nf_flowtable_udp_timeout - nf_flowtable_udp_pickup Signed-off-by: Oz Shlomo Signed-off-by: Pablo Neira Ayuso --- Documentation/networking/nf_conntrack-sysctl.rst | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'Documentation') diff --git a/Documentation/networking/nf_conntrack-sysctl.rst b/Documentation/networking/nf_conntrack-sysctl.rst index 11a9b76786cb..0467b30e4abe 100644 --- a/Documentation/networking/nf_conntrack-sysctl.rst +++ b/Documentation/networking/nf_conntrack-sysctl.rst @@ -177,3 +177,27 @@ nf_conntrack_gre_timeout_stream - INTEGER (seconds) This extended timeout will be used in case there is an GRE stream detected. + +nf_flowtable_tcp_timeout - INTEGER (seconds) + default 30 + + Control offload timeout for tcp connections. + TCP connections may be offloaded from nf conntrack to nf flow table. + Once aged, the connection is returned to nf conntrack with tcp pickup timeout. + +nf_flowtable_tcp_pickup - INTEGER (seconds) + default 120 + + TCP connection timeout after being aged from nf flow table offload. + +nf_flowtable_udp_timeout - INTEGER (seconds) + default 30 + + Control offload timeout for udp connections. + UDP connections may be offloaded from nf conntrack to nf flow table. + Once aged, the connection is returned to nf conntrack with udp pickup timeout. + +nf_flowtable_udp_pickup - INTEGER (seconds) + default 30 + + UDP connection timeout after being aged from nf flow table offload. -- cgit v1.2.3 From 2afd6c8b43c1ee50444d410e953d7d2adf86b5ea Mon Sep 17 00:00:00 2001 From: Alex Elder Date: Mon, 21 Jun 2021 12:56:22 -0500 Subject: dt-bindings: net: qcom,ipa: add support for MSM8998 Add support for "qcom,msm8998-ipa", which uses IPA v3.1. Originally proposed by AngeloGioacchino Del Regno. Link: https://lore.kernel.org/linux-arm-msm/20210211175015.200772-8-angelogioacchino.delregno@somainline.org Signed-off-by: Alex Elder Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/qcom,ipa.yaml | 1 + 1 file changed, 1 insertion(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/qcom,ipa.yaml b/Documentation/devicetree/bindings/net/qcom,ipa.yaml index 5fe6d3dceb08..ed88ba4b94df 100644 --- a/Documentation/devicetree/bindings/net/qcom,ipa.yaml +++ b/Documentation/devicetree/bindings/net/qcom,ipa.yaml @@ -44,6 +44,7 @@ description: properties: compatible: enum: + - qcom,msm8998-ipa - qcom,sc7180-ipa - qcom,sc7280-ipa - qcom,sdm845-ipa -- cgit v1.2.3 From 19e068b18e729aecca4fbe5b261b05b59230c80f Mon Sep 17 00:00:00 2001 From: "周琰杰 (Zhou Yanjie)" Date: Sun, 20 Jun 2021 20:38:49 +0800 Subject: dt-bindings: dwmac: Remove unexpected item. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the unexpected "snps,dwmac" item in the example. Fixes: 3b8401066e5a ("dt-bindings: dwmac: Add bindings for new Ingenic SoCs.") Signed-off-by: 周琰杰 (Zhou Yanjie) Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/ingenic,mac.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/ingenic,mac.yaml b/Documentation/devicetree/bindings/net/ingenic,mac.yaml index 5e93d4f9a080..d08a88125a5c 100644 --- a/Documentation/devicetree/bindings/net/ingenic,mac.yaml +++ b/Documentation/devicetree/bindings/net/ingenic,mac.yaml @@ -61,7 +61,7 @@ examples: #include mac: ethernet@134b0000 { - compatible = "ingenic,x1000-mac", "snps,dwmac"; + compatible = "ingenic,x1000-mac"; reg = <0x134b0000 0x2000>; interrupt-parent = <&intc>; -- cgit v1.2.3 From 7c4d7ca8cce3c8167e10f52a5afb553851f2086b Mon Sep 17 00:00:00 2001 From: Marcin Wojtas Date: Mon, 21 Jun 2021 19:30:23 +0200 Subject: Documentation: ACPI: DSD: describe additional MAC configuration Document additional MAC configuration modes which can be processed by the existing fwnode_ phylink helpers: * "managed" standard ACPI _DSD property [1] * "fixed-link" data-only subnode linked in the _DSD package via generic mechanism of the hierarchical data extension [2] [1] https://www.uefi.org/sites/default/files/resources/_DSD-device-properties-UUID.pdf [2] https://github.com/UEFI/DSD-Guide/blob/main/dsd-guide.pdf Signed-off-by: Marcin Wojtas Signed-off-by: David S. Miller --- Documentation/firmware-guide/acpi/dsd/phy.rst | 59 +++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) (limited to 'Documentation') diff --git a/Documentation/firmware-guide/acpi/dsd/phy.rst b/Documentation/firmware-guide/acpi/dsd/phy.rst index 0d49bad2ea9c..680ad179e5f9 100644 --- a/Documentation/firmware-guide/acpi/dsd/phy.rst +++ b/Documentation/firmware-guide/acpi/dsd/phy.rst @@ -50,6 +50,21 @@ phy-mode The "phy-mode" _DSD property is used to describe the connection to the PHY. The valid values for "phy-mode" are defined in [4]. +managed +------- +Optional property, which specifies the PHY management type. +The valid values for "managed" are defined in [4]. + +fixed-link +---------- +The "fixed-link" is described by a data-only subnode of the +MAC port, which is linked in the _DSD package via +hierarchical data extension (UUID dbb8e3e6-5886-4ba6-8795-1319f52a966b +in accordance with [5] "_DSD Implementation Guide" document). +The subnode should comprise a required property ("speed") and +possibly the optional ones - complete list of parameters and +their values are specified in [4]. + The following ASL example illustrates the usage of these properties. DSDT entry for MDIO node @@ -128,6 +143,48 @@ phy-mode and phy-handle are used as explained earlier. }) } +MAC node example where "managed" property is specified. +------------------------------------------------------- + +.. code-block:: none + + Scope(\_SB.PP21.ETH0) + { + Name (_DSD, Package () { + ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"), + Package () { + Package () {"phy-mode", "sgmii"}, + Package () {"managed", "in-band-status"} + } + }) + } + +MAC node example with a "fixed-link" subnode. +--------------------------------------------- + +.. code-block:: none + + Scope(\_SB.PP21.ETH1) + { + Name (_DSD, Package () { + ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"), + Package () { + Package () {"phy-mode", "sgmii"}, + }, + ToUUID("dbb8e3e6-5886-4ba6-8795-1319f52a966b"), + Package () { + Package () {"fixed-link", "LNK0"} + } + }) + Name (LNK0, Package(){ // Data-only subnode of port + ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"), + Package () { + Package () {"speed", 1000}, + Package () {"full-duplex", 1} + } + }) + } + References ========== @@ -138,3 +195,5 @@ References [3] Documentation/firmware-guide/acpi/DSD-properties-rules.rst [4] Documentation/devicetree/bindings/net/ethernet-controller.yaml + +[5] https://github.com/UEFI/DSD-Guide/blob/main/dsd-guide.pdf -- cgit v1.2.3 From 78c57f22e3c87ab0a2844d7c9a120eba51ae34f4 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 22 Jun 2021 09:50:46 +0300 Subject: ethtool: Use correct command name in title The command is called 'ETHTOOL_MSG_MODULE_EEPROM_GET', not 'ETHTOOL_MSG_MODULE_EEPROM'. Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- Documentation/networking/ethtool-netlink.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'Documentation') diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index 25131df3c2bd..c3600f9c8988 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -1363,8 +1363,8 @@ in an implementation specific way. ``ETHTOOL_A_FEC_AUTO`` requests the driver to choose FEC mode based on SFP module parameters. This does not mean autonegotiation. -MODULE_EEPROM -============= +MODULE_EEPROM_GET +================= Fetch module EEPROM data dump. This interface is designed to allow dumps of at most 1/2 page at once. This -- cgit v1.2.3 From 913d026fbfaf114ff87afcc77fa4e9309f87f114 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 22 Jun 2021 09:50:47 +0300 Subject: ethtool: Document correct attribute type 'ETHTOOL_A_MODULE_EEPROM_DATA' is a binary attribute, not a nested one. Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- Documentation/networking/ethtool-netlink.rst | 2 +- include/uapi/linux/ethtool_netlink.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'Documentation') diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index c3600f9c8988..8ae644f800f0 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -1388,7 +1388,7 @@ Kernel response contents: +---------------------------------------------+--------+---------------------+ | ``ETHTOOL_A_MODULE_EEPROM_HEADER`` | nested | reply header | +---------------------------------------------+--------+---------------------+ - | ``ETHTOOL_A_MODULE_EEPROM_DATA`` | nested | array of bytes from | + | ``ETHTOOL_A_MODULE_EEPROM_DATA`` | binary | array of bytes from | | | | module EEPROM | +---------------------------------------------+--------+---------------------+ diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index 825cfda1c5d5..c7135c9c37a5 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -675,7 +675,7 @@ enum { ETHTOOL_A_MODULE_EEPROM_PAGE, /* u8 */ ETHTOOL_A_MODULE_EEPROM_BANK, /* u8 */ ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS, /* u8 */ - ETHTOOL_A_MODULE_EEPROM_DATA, /* nested */ + ETHTOOL_A_MODULE_EEPROM_DATA, /* binary */ __ETHTOOL_A_MODULE_EEPROM_CNT, ETHTOOL_A_MODULE_EEPROM_MAX = (__ETHTOOL_A_MODULE_EEPROM_CNT - 1) -- cgit v1.2.3 From 37a025e83902903df658489665499a548a53423b Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 22 Jun 2021 09:50:49 +0300 Subject: ethtool: Document behavior when module EEPROM bank attribute is omitted The kernel assumes bank 0 when 'ETHTOOL_MSG_MODULE_EEPROM_GET' is sent without 'ETHTOOL_A_MODULE_EEPROM_BANK'. Document it as part of the interface documentation. Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- Documentation/networking/ethtool-netlink.rst | 2 ++ 1 file changed, 2 insertions(+) (limited to 'Documentation') diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index 8ae644f800f0..6ea91e41593f 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -1383,6 +1383,8 @@ Request contents: ``ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS`` u8 page I2C address ======================================= ====== ========================== +If ``ETHTOOL_A_MODULE_EEPROM_BANK`` is not specified, bank 0 is assumed. + Kernel response contents: +---------------------------------------------+--------+---------------------+ -- cgit v1.2.3 From d1e462a7a5f359cbb9a0e8fbfafcfb6657034105 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Tue, 22 Jun 2021 14:04:48 -0400 Subject: sctp: add probe_interval in sysctl and sock/asoc/transport PLPMTUD can be enabled by doing 'sysctl -w net.sctp.probe_interval=n'. 'n' is the interval for PLPMTUD probe timer in milliseconds, and it can't be less than 5000 if it's not 0. All asoc/transport's PLPMTUD in a new socket will be enabled by default. Signed-off-by: Xin Long Acked-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.rst | 8 ++++++++ include/net/netns/sctp.h | 3 +++ include/net/sctp/constants.h | 2 ++ include/net/sctp/structs.h | 3 +++ net/sctp/associola.c | 2 ++ net/sctp/socket.c | 1 + net/sctp/sysctl.c | 35 ++++++++++++++++++++++++++++++++++ 7 files changed, 54 insertions(+) (limited to 'Documentation') diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index b0436d3a4f11..8bff728b3a1e 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -2834,6 +2834,14 @@ encap_port - INTEGER Default: 0 +plpmtud_probe_interval - INTEGER + The time interval (in milliseconds) for sending PLPMTUD probe chunks. + These chunks are sent at the specified interval with a variable size + to probe the mtu of a given path between 2 endpoints. PLPMTUD will + be disabled when 0 is set, and other values for it must be >= 5000. + + Default: 0 + ``/proc/sys/net/core/*`` ======================== diff --git a/include/net/netns/sctp.h b/include/net/netns/sctp.h index a0f315effa94..40240722cdca 100644 --- a/include/net/netns/sctp.h +++ b/include/net/netns/sctp.h @@ -84,6 +84,9 @@ struct netns_sctp { /* HB.interval - 30 seconds */ unsigned int hb_interval; + /* The interval for PLPMTUD probe timer */ + unsigned int probe_interval; + /* Association.Max.Retrans - 10 attempts * Path.Max.Retrans - 5 attempts (per destination address) * Max.Init.Retransmits - 8 attempts diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index 14a0d22c9113..449cf9cb428b 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -424,4 +424,6 @@ enum { */ #define SCTP_AUTH_RANDOM_LENGTH 32 +#define SCTP_PROBE_TIMER_MIN 5000 + #endif /* __sctp_constants_h__ */ diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 1aa585216f34..bf5d22deaefb 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -177,6 +177,7 @@ struct sctp_sock { * will be inherited by all new associations. */ __u32 hbinterval; + __u32 probe_interval; __be16 udp_port; __be16 encap_port; @@ -858,6 +859,7 @@ struct sctp_transport { * the destination address every heartbeat interval. */ unsigned long hbinterval; + unsigned long probe_interval; /* SACK delay timeout */ unsigned long sackdelay; @@ -1795,6 +1797,7 @@ struct sctp_association { * will be inherited by all new transports. */ unsigned long hbinterval; + unsigned long probe_interval; __be16 encap_port; diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 336df4b36655..e01895edd3a4 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -98,6 +98,7 @@ static struct sctp_association *sctp_association_init( * sock configured value. */ asoc->hbinterval = msecs_to_jiffies(sp->hbinterval); + asoc->probe_interval = msecs_to_jiffies(sp->probe_interval); asoc->encap_port = sp->encap_port; @@ -625,6 +626,7 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc, * association configured value. */ peer->hbinterval = asoc->hbinterval; + peer->probe_interval = asoc->probe_interval; peer->encap_port = asoc->encap_port; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index a79d193ff872..d2960ab665a5 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4989,6 +4989,7 @@ static int sctp_init_sock(struct sock *sk) atomic_set(&sp->pd_mode, 0); skb_queue_head_init(&sp->pd_lobby); sp->frag_interleave = 0; + sp->probe_interval = net->sctp.probe_interval; /* Create a per socket endpoint structure. Even if we * change the data structure relationships, this may still diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index 55871b277f47..b46a416787ec 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -55,6 +55,8 @@ static int proc_sctp_do_alpha_beta(struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos); static int proc_sctp_do_auth(struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos); +static int proc_sctp_do_probe_interval(struct ctl_table *ctl, int write, + void *buffer, size_t *lenp, loff_t *ppos); static struct ctl_table sctp_table[] = { { @@ -293,6 +295,13 @@ static struct ctl_table sctp_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, + { + .procname = "plpmtud_probe_interval", + .data = &init_net.sctp.probe_interval, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_sctp_do_probe_interval, + }, { .procname = "udp_port", .data = &init_net.sctp.udp_port, @@ -539,6 +548,32 @@ static int proc_sctp_do_udp_port(struct ctl_table *ctl, int write, return ret; } +static int proc_sctp_do_probe_interval(struct ctl_table *ctl, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + struct net *net = current->nsproxy->net_ns; + struct ctl_table tbl; + int ret, new_value; + + memset(&tbl, 0, sizeof(struct ctl_table)); + tbl.maxlen = sizeof(unsigned int); + + if (write) + tbl.data = &new_value; + else + tbl.data = &net->sctp.probe_interval; + + ret = proc_dointvec(&tbl, write, buffer, lenp, ppos); + if (write && ret == 0) { + if (new_value && new_value < SCTP_PROBE_TIMER_MIN) + return -EINVAL; + + net->sctp.probe_interval = new_value; + } + + return ret; +} + int sctp_sysctl_net_register(struct net *net) { struct ctl_table *table; -- cgit v1.2.3 From d2f77960e5b03b2d373252b2ee150a4a14010f99 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Tue, 22 Jun 2021 12:25:18 -0700 Subject: mptcp: add sysctl allow_join_initial_addr_port This patch added a new sysctl, named allow_join_initial_addr_port, to control whether allow peers to send join requests to the IP address and port number used by the initial subflow. Suggested-by: Florian Westphal Acked-by: Paolo Abeni Signed-off-by: Geliang Tang Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- Documentation/networking/mptcp-sysctl.rst | 13 +++++++++++++ net/mptcp/ctrl.c | 16 ++++++++++++++++ net/mptcp/protocol.h | 1 + 3 files changed, 30 insertions(+) (limited to 'Documentation') diff --git a/Documentation/networking/mptcp-sysctl.rst b/Documentation/networking/mptcp-sysctl.rst index ee06fd782465..76d939e688b8 100644 --- a/Documentation/networking/mptcp-sysctl.rst +++ b/Documentation/networking/mptcp-sysctl.rst @@ -32,3 +32,16 @@ checksum_enabled - BOOLEAN per-namespace sysctl. Default: 0 + +allow_join_initial_addr_port - BOOLEAN + Allow peers to send join requests to the IP address and port number used + by the initial subflow if the value is 1. This controls a flag that is + sent to the peer at connection time, and whether such join requests are + accepted or denied. + + Joins to addresses advertised with ADD_ADDR are not affected by this + value. + + This is a per-namespace sysctl. + + Default: 1 diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c index 6c2639bb9c19..7d738bd06f2c 100644 --- a/net/mptcp/ctrl.c +++ b/net/mptcp/ctrl.c @@ -24,6 +24,7 @@ struct mptcp_pernet { u8 mptcp_enabled; unsigned int add_addr_timeout; u8 checksum_enabled; + u8 allow_join_initial_addr_port; }; static struct mptcp_pernet *mptcp_get_pernet(struct net *net) @@ -46,11 +47,17 @@ int mptcp_is_checksum_enabled(struct net *net) return mptcp_get_pernet(net)->checksum_enabled; } +int mptcp_allow_join_id0(struct net *net) +{ + return mptcp_get_pernet(net)->allow_join_initial_addr_port; +} + static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet) { pernet->mptcp_enabled = 1; pernet->add_addr_timeout = TCP_RTO_MAX; pernet->checksum_enabled = 0; + pernet->allow_join_initial_addr_port = 1; } #ifdef CONFIG_SYSCTL @@ -80,6 +87,14 @@ static struct ctl_table mptcp_sysctl_table[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE }, + { + .procname = "allow_join_initial_addr_port", + .maxlen = sizeof(u8), + .mode = 0644, + .proc_handler = proc_dou8vec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE + }, {} }; @@ -98,6 +113,7 @@ static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet) table[0].data = &pernet->mptcp_enabled; table[1].data = &pernet->add_addr_timeout; table[2].data = &pernet->checksum_enabled; + table[3].data = &pernet->allow_join_initial_addr_port; hdr = register_net_sysctl(net, MPTCP_SYSCTL_PATH, table); if (!hdr) diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 160c2ab09f19..9aab5fb54716 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -540,6 +540,7 @@ static inline void mptcp_subflow_delegated_done(struct mptcp_subflow_context *su int mptcp_is_enabled(struct net *net); unsigned int mptcp_get_add_addr_timeout(struct net *net); int mptcp_is_checksum_enabled(struct net *net); +int mptcp_allow_join_id0(struct net *net); void mptcp_subflow_fully_established(struct mptcp_subflow_context *subflow, struct mptcp_options_received *mp_opt); bool mptcp_subflow_data_available(struct sock *sk); -- cgit v1.2.3 From 4b9718b5a201eddcd00d9db6c36b18840125c7ee Mon Sep 17 00:00:00 2001 From: Ilya Maximets Date: Tue, 22 Jun 2021 20:56:47 +0200 Subject: docs, af_xdp: Consistent indentation in examples Examples in this document use all kinds of indentation from 3 to 5 spaces and even mixed with tabs. Making them all even and equal to 4 spaces. Signed-off-by: Ilya Maximets Signed-off-by: Daniel Borkmann Acked-by: Magnus Karlsson Link: https://lore.kernel.org/bpf/20210622185647.3705104-1-i.maximets@ovn.org --- Documentation/networking/af_xdp.rst | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'Documentation') diff --git a/Documentation/networking/af_xdp.rst b/Documentation/networking/af_xdp.rst index 2ccc5644cc98..42576880aa4a 100644 --- a/Documentation/networking/af_xdp.rst +++ b/Documentation/networking/af_xdp.rst @@ -290,19 +290,19 @@ round-robin example of distributing packets is shown below: #define MAX_SOCKS 16 struct { - __uint(type, BPF_MAP_TYPE_XSKMAP); - __uint(max_entries, MAX_SOCKS); - __uint(key_size, sizeof(int)); - __uint(value_size, sizeof(int)); + __uint(type, BPF_MAP_TYPE_XSKMAP); + __uint(max_entries, MAX_SOCKS); + __uint(key_size, sizeof(int)); + __uint(value_size, sizeof(int)); } xsks_map SEC(".maps"); static unsigned int rr; SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx) { - rr = (rr + 1) & (MAX_SOCKS - 1); + rr = (rr + 1) & (MAX_SOCKS - 1); - return bpf_redirect_map(&xsks_map, rr, XDP_DROP); + return bpf_redirect_map(&xsks_map, rr, XDP_DROP); } Note, that since there is only a single set of FILL and COMPLETION @@ -379,7 +379,7 @@ would look like this for the TX path: .. code-block:: c if (xsk_ring_prod__needs_wakeup(&my_tx_ring)) - sendto(xsk_socket__fd(xsk_handle), NULL, 0, MSG_DONTWAIT, NULL, 0); + sendto(xsk_socket__fd(xsk_handle), NULL, 0, MSG_DONTWAIT, NULL, 0); I.e., only use the syscall if the flag is set. @@ -442,9 +442,9 @@ purposes. The supported statistics are shown below: .. code-block:: c struct xdp_statistics { - __u64 rx_dropped; /* Dropped for reasons other than invalid desc */ - __u64 rx_invalid_descs; /* Dropped due to invalid descriptor */ - __u64 tx_invalid_descs; /* Dropped due to invalid descriptor */ + __u64 rx_dropped; /* Dropped for reasons other than invalid desc */ + __u64 rx_invalid_descs; /* Dropped due to invalid descriptor */ + __u64 tx_invalid_descs; /* Dropped due to invalid descriptor */ }; XDP_OPTIONS getsockopt @@ -483,15 +483,15 @@ like this: .. code-block:: c // struct xdp_rxtx_ring { - // __u32 *producer; - // __u32 *consumer; - // struct xdp_desc *desc; + // __u32 *producer; + // __u32 *consumer; + // struct xdp_desc *desc; // }; // struct xdp_umem_ring { - // __u32 *producer; - // __u32 *consumer; - // __u64 *desc; + // __u32 *producer; + // __u32 *consumer; + // __u64 *desc; // }; // typedef struct xdp_rxtx_ring RING; -- cgit v1.2.3 From 9a145c04a293933002ec288a4d6b4f370b59e4d1 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 24 Jun 2021 18:05:52 +0200 Subject: doc: Clarify and expand RCU updaters and corresponding readers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit clarifies which primitives readers can use given that the corresponding updaters have made a specific choice. This commit also adds this information for the various RCU Tasks flavors. While in the area, it removes a paragraph that no longer applies in any straightforward manner. Signed-off-by: Paul E. McKenney Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20210624160609.292325-3-toke@redhat.com --- Documentation/RCU/checklist.rst | 48 +++++++++++++++++++++++------------------ 1 file changed, 27 insertions(+), 21 deletions(-) (limited to 'Documentation') diff --git a/Documentation/RCU/checklist.rst b/Documentation/RCU/checklist.rst index 1030119294d0..07f6cb8f674d 100644 --- a/Documentation/RCU/checklist.rst +++ b/Documentation/RCU/checklist.rst @@ -211,27 +211,33 @@ over a rather long period of time, but improvements are always welcome! of the system, especially to real-time workloads running on the rest of the system. -7. As of v4.20, a given kernel implements only one RCU flavor, - which is RCU-sched for PREEMPTION=n and RCU-preempt for PREEMPTION=y. - If the updater uses call_rcu() or synchronize_rcu(), - then the corresponding readers may use rcu_read_lock() and - rcu_read_unlock(), rcu_read_lock_bh() and rcu_read_unlock_bh(), - or any pair of primitives that disables and re-enables preemption, - for example, rcu_read_lock_sched() and rcu_read_unlock_sched(). - If the updater uses synchronize_srcu() or call_srcu(), - then the corresponding readers must use srcu_read_lock() and - srcu_read_unlock(), and with the same srcu_struct. The rules for - the expedited primitives are the same as for their non-expedited - counterparts. Mixing things up will result in confusion and - broken kernels, and has even resulted in an exploitable security - issue. - - One exception to this rule: rcu_read_lock() and rcu_read_unlock() - may be substituted for rcu_read_lock_bh() and rcu_read_unlock_bh() - in cases where local bottom halves are already known to be - disabled, for example, in irq or softirq context. Commenting - such cases is a must, of course! And the jury is still out on - whether the increased speed is worth it. +7. As of v4.20, a given kernel implements only one RCU flavor, which + is RCU-sched for PREEMPTION=n and RCU-preempt for PREEMPTION=y. + If the updater uses call_rcu() or synchronize_rcu(), then + the corresponding readers may use: (1) rcu_read_lock() and + rcu_read_unlock(), (2) any pair of primitives that disables + and re-enables softirq, for example, rcu_read_lock_bh() and + rcu_read_unlock_bh(), or (3) any pair of primitives that disables + and re-enables preemption, for example, rcu_read_lock_sched() and + rcu_read_unlock_sched(). If the updater uses synchronize_srcu() + or call_srcu(), then the corresponding readers must use + srcu_read_lock() and srcu_read_unlock(), and with the same + srcu_struct. The rules for the expedited RCU grace-period-wait + primitives are the same as for their non-expedited counterparts. + + If the updater uses call_rcu_tasks() or synchronize_rcu_tasks(), + then the readers must refrain from executing voluntary + context switches, that is, from blocking. If the updater uses + call_rcu_tasks_trace() or synchronize_rcu_tasks_trace(), then + the corresponding readers must use rcu_read_lock_trace() and + rcu_read_unlock_trace(). If an updater uses call_rcu_tasks_rude() + or synchronize_rcu_tasks_rude(), then the corresponding readers + must use anything that disables interrupts. + + Mixing things up will result in confusion and broken kernels, and + has even resulted in an exploitable security issue. Therefore, + when using non-obvious pairs of primitives, commenting is of + course a must. 8. Although synchronize_rcu() is slower than is call_rcu(), it usually results in simpler code. So, unless update performance is -- cgit v1.2.3 From e74c74f9e51deb725e72d129084ba8252d47222d Mon Sep 17 00:00:00 2001 From: Toke Høiland-Jørgensen Date: Thu, 24 Jun 2021 18:05:53 +0200 Subject: doc: Give XDP as example of non-obvious RCU reader/updater pairing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit gives an example of non-obvious RCU reader/updater pairing in the guise of the XDP feature in networking, which calls BPF programs from network-driver NAPI (softirq) context. Signed-off-by: Paul E. McKenney Signed-off-by: Toke Høiland-Jørgensen Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20210624160609.292325-4-toke@redhat.com --- Documentation/RCU/checklist.rst | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'Documentation') diff --git a/Documentation/RCU/checklist.rst b/Documentation/RCU/checklist.rst index 07f6cb8f674d..01cc21f17f7b 100644 --- a/Documentation/RCU/checklist.rst +++ b/Documentation/RCU/checklist.rst @@ -236,8 +236,15 @@ over a rather long period of time, but improvements are always welcome! Mixing things up will result in confusion and broken kernels, and has even resulted in an exploitable security issue. Therefore, - when using non-obvious pairs of primitives, commenting is of - course a must. + when using non-obvious pairs of primitives, commenting is + of course a must. One example of non-obvious pairing is + the XDP feature in networking, which calls BPF programs from + network-driver NAPI (softirq) context. BPF relies heavily on RCU + protection for its data structures, but because the BPF program + invocation happens entirely within a single local_bh_disable() + section in a NAPI poll cycle, this usage is safe. The reason + that this usage is safe is that readers can use anything that + disables BH when updaters use call_rcu() or synchronize_rcu(). 8. Although synchronize_rcu() is slower than is call_rcu(), it usually results in simpler code. So, unless update performance is -- cgit v1.2.3 From f8c63088a98bac8926cb40ecf46ebd71dc1232c4 Mon Sep 17 00:00:00 2001 From: Steen Hegelund Date: Thu, 24 Jun 2021 09:07:49 +0200 Subject: dt-bindings: net: sparx5: Add sparx5-switch bindings Document the Sparx5 switch device driver bindings Signed-off-by: Steen Hegelund Signed-off-by: Lars Povlsen Signed-off-by: Bjarni Jonasson Reviewed-by: Rob Herring Signed-off-by: David S. Miller --- .../bindings/net/microchip,sparx5-switch.yaml | 226 +++++++++++++++++++++ 1 file changed, 226 insertions(+) create mode 100644 Documentation/devicetree/bindings/net/microchip,sparx5-switch.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/microchip,sparx5-switch.yaml b/Documentation/devicetree/bindings/net/microchip,sparx5-switch.yaml new file mode 100644 index 000000000000..347b912a46bb --- /dev/null +++ b/Documentation/devicetree/bindings/net/microchip,sparx5-switch.yaml @@ -0,0 +1,226 @@ +# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/microchip,sparx5-switch.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Microchip Sparx5 Ethernet switch controller + +maintainers: + - Steen Hegelund + - Lars Povlsen + +description: | + The SparX-5 Enterprise Ethernet switch family provides a rich set of + Enterprise switching features such as advanced TCAM-based VLAN and + QoS processing enabling delivery of differentiated services, and + security through TCAM-based frame processing using versatile content + aware processor (VCAP). + + IPv4/IPv6 Layer 3 (L3) unicast and multicast routing is supported + with up to 18K IPv4/9K IPv6 unicast LPM entries and up to 9K IPv4/3K + IPv6 (S,G) multicast groups. + + L3 security features include source guard and reverse path + forwarding (uRPF) tasks. Additional L3 features include VRF-Lite and + IP tunnels (IP over GRE/IP). + + The SparX-5 switch family targets managed Layer 2 and Layer 3 + equipment in SMB, SME, and Enterprise where high port count + 1G/2.5G/5G/10G switching with 10G/25G aggregation links is required. + +properties: + $nodename: + pattern: "^switch@[0-9a-f]+$" + + compatible: + const: microchip,sparx5-switch + + reg: + items: + - description: cpu target + - description: devices target + - description: general control block target + + reg-names: + items: + - const: cpu + - const: devices + - const: gcb + + interrupts: + minItems: 1 + items: + - description: register based extraction + - description: frame dma based extraction + + interrupt-names: + minItems: 1 + items: + - const: xtr + - const: fdma + + resets: + items: + - description: Reset controller used for switch core reset (soft reset) + + reset-names: + items: + - const: switch + + mac-address: true + + ethernet-ports: + type: object + patternProperties: + "^port@[0-9a-f]+$": + type: object + + properties: + '#address-cells': + const: 1 + '#size-cells': + const: 0 + + reg: + description: Switch port number + + phys: + maxItems: 1 + description: + phandle of a Ethernet SerDes PHY. This defines which SerDes + instance will handle the Ethernet traffic. + + phy-mode: + description: + This specifies the interface used by the Ethernet SerDes towards + the PHY or SFP. + + microchip,bandwidth: + description: Specifies bandwidth in Mbit/s allocated to the port. + $ref: "/schemas/types.yaml#/definitions/uint32" + maximum: 25000 + + phy-handle: + description: + phandle of a Ethernet PHY. This is optional and if provided it + points to the cuPHY used by the Ethernet SerDes. + + sfp: + description: + phandle of an SFP. This is optional and used when not specifying + a cuPHY. It points to the SFP node that describes the SFP used by + the Ethernet SerDes. + + managed: true + + microchip,sd-sgpio: + description: + Index of the ports Signal Detect SGPIO in the set of 384 SGPIOs + This is optional, and only needed if the default used index is + is not correct. + $ref: "/schemas/types.yaml#/definitions/uint32" + minimum: 0 + maximum: 383 + + required: + - reg + - phys + - phy-mode + - microchip,bandwidth + + oneOf: + - required: + - phy-handle + - required: + - sfp + - managed + +required: + - compatible + - reg + - reg-names + - interrupts + - interrupt-names + - resets + - reset-names + - ethernet-ports + +additionalProperties: false + +examples: + - | + #include + switch: switch@600000000 { + compatible = "microchip,sparx5-switch"; + reg = <0 0x401000>, + <0x10004000 0x7fc000>, + <0x11010000 0xaf0000>; + reg-names = "cpu", "devices", "gcb"; + interrupts = ; + interrupt-names = "xtr"; + resets = <&reset 0>; + reset-names = "switch"; + ethernet-ports { + #address-cells = <1>; + #size-cells = <0>; + + port0: port@0 { + reg = <0>; + microchip,bandwidth = <1000>; + phys = <&serdes 13>; + phy-handle = <&phy0>; + phy-mode = "qsgmii"; + }; + /* ... */ + /* Then the 25G interfaces */ + port60: port@60 { + reg = <60>; + microchip,bandwidth = <25000>; + phys = <&serdes 29>; + phy-mode = "10gbase-r"; + sfp = <&sfp_eth60>; + managed = "in-band-status"; + microchip,sd-sgpio = <365>; + }; + port61: port@61 { + reg = <61>; + microchip,bandwidth = <25000>; + phys = <&serdes 30>; + phy-mode = "10gbase-r"; + sfp = <&sfp_eth61>; + managed = "in-band-status"; + microchip,sd-sgpio = <369>; + }; + port62: port@62 { + reg = <62>; + microchip,bandwidth = <25000>; + phys = <&serdes 31>; + phy-mode = "10gbase-r"; + sfp = <&sfp_eth62>; + managed = "in-band-status"; + microchip,sd-sgpio = <373>; + }; + port63: port@63 { + reg = <63>; + microchip,bandwidth = <25000>; + phys = <&serdes 32>; + phy-mode = "10gbase-r"; + sfp = <&sfp_eth63>; + managed = "in-band-status"; + microchip,sd-sgpio = <377>; + }; + /* Finally the Management interface */ + port64: port@64 { + reg = <64>; + microchip,bandwidth = <1000>; + phys = <&serdes 0>; + phy-handle = <&phy64>; + phy-mode = "sgmii"; + mac-address = [ 00 00 00 01 02 03 ]; + }; + }; + }; + +... +# vim: set ts=2 sw=2 sts=2 tw=80 et cc=80 ft=yaml : -- cgit v1.2.3 From c6a7ed77ee6334f3a85a0f3db74ca80101e25304 Mon Sep 17 00:00:00 2001 From: Bailey Forrest Date: Thu, 24 Jun 2021 11:06:17 -0700 Subject: gve: Update GVE documentation to describe DQO DQO is a new descriptor format for our next generation virtual NIC. Signed-off-by: Bailey Forrest Reviewed-by: Willem de Bruijn Reviewed-by: Catherine Sullivan Signed-off-by: David S. Miller --- .../device_drivers/ethernet/google/gve.rst | 53 ++++++++++++++++++++-- 1 file changed, 48 insertions(+), 5 deletions(-) (limited to 'Documentation') diff --git a/Documentation/networking/device_drivers/ethernet/google/gve.rst b/Documentation/networking/device_drivers/ethernet/google/gve.rst index 793693cef6e3..6d73ee78f3d7 100644 --- a/Documentation/networking/device_drivers/ethernet/google/gve.rst +++ b/Documentation/networking/device_drivers/ethernet/google/gve.rst @@ -47,13 +47,24 @@ The driver interacts with the device in the following ways: - Transmit and Receive Queues - See description below +Descriptor Formats +------------------ +GVE supports two descriptor formats: GQI and DQO. These two formats have +entirely different descriptors, which will be described below. + Registers --------- -All registers are MMIO and big endian. +All registers are MMIO. The registers are used for initializing and configuring the device as well as querying device status in response to management interrupts. +Endianness +---------- +- Admin Queue messages and registers are all Big Endian. +- GQI descriptors and datapath registers are Big Endian. +- DQO descriptors and datapath registers are Little Endian. + Admin Queue (AQ) ---------------- The Admin Queue is a PAGE_SIZE memory block, treated as an array of AQ @@ -97,10 +108,10 @@ the queues associated with that interrupt. The handler for these irqs schedule the napi for that block to run and poll the queues. -Traffic Queues --------------- -gVNIC's queues are composed of a descriptor ring and a buffer and are -assigned to a notification block. +GQI Traffic Queues +------------------ +GQI queues are composed of a descriptor ring and a buffer and are assigned to a +notification block. The descriptor rings are power-of-two-sized ring buffers consisting of fixed-size descriptors. They advance their head pointer using a __be32 @@ -121,3 +132,35 @@ Receive The buffers for receive rings are put into a data ring that is the same length as the descriptor ring and the head and tail pointers advance over the rings together. + +DQO Traffic Queues +------------------ +- Every TX and RX queue is assigned a notification block. + +- TX and RX buffers queues, which send descriptors to the device, use MMIO + doorbells to notify the device of new descriptors. + +- RX and TX completion queues, which receive descriptors from the device, use a + "generation bit" to know when a descriptor was populated by the device. The + driver initializes all bits with the "current generation". The device will + populate received descriptors with the "next generation" which is inverted + from the current generation. When the ring wraps, the current/next generation + are swapped. + +- It's the driver's responsibility to ensure that the RX and TX completion + queues are not overrun. This can be accomplished by limiting the number of + descriptors posted to HW. + +- TX packets have a 16 bit completion_tag and RX buffers have a 16 bit + buffer_id. These will be returned on the TX completion and RX queues + respectively to let the driver know which packet/buffer was completed. + +Transmit +~~~~~~~~ +A packet's buffers are DMA mapped for the device to access before transmission. +After the packet was successfully transmitted, the buffers are unmapped. + +Receive +~~~~~~~ +The driver posts fixed sized buffers to HW on the RX buffer queue. The packet +received on the associated RX queue may span multiple descriptors. -- cgit v1.2.3 From 44531076338fc9d9556685d3e7efc2526185760d Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Thu, 24 Jun 2021 17:55:23 +0300 Subject: Documentation: net: dsa: add details about SJA1110 Denote that the new switch generation is supported, detail its pin strapping options (with differences compared to SJA1105) and explain how MDIO access to the internal 100base-T1 and 100base-TX PHYs is performed. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- Documentation/networking/dsa/sja1105.rst | 61 +++++++++++++++++++++++++++++--- 1 file changed, 57 insertions(+), 4 deletions(-) (limited to 'Documentation') diff --git a/Documentation/networking/dsa/sja1105.rst b/Documentation/networking/dsa/sja1105.rst index 7395a33baaf9..da4057ba37f1 100644 --- a/Documentation/networking/dsa/sja1105.rst +++ b/Documentation/networking/dsa/sja1105.rst @@ -5,7 +5,7 @@ NXP SJA1105 switch driver Overview ======== -The NXP SJA1105 is a family of 6 devices: +The NXP SJA1105 is a family of 10 SPI-managed automotive switches: - SJA1105E: First generation, no TTEthernet - SJA1105T: First generation, TTEthernet @@ -13,9 +13,11 @@ The NXP SJA1105 is a family of 6 devices: - SJA1105Q: Second generation, TTEthernet, no SGMII - SJA1105R: Second generation, no TTEthernet, SGMII - SJA1105S: Second generation, TTEthernet, SGMII - -These are SPI-managed automotive switches, with all ports being gigabit -capable, and supporting MII/RMII/RGMII and optionally SGMII on one port. +- SJA1110A: Third generation, TTEthernet, SGMII, integrated 100base-T1 and + 100base-TX PHYs +- SJA1110B: Third generation, TTEthernet, SGMII, 100base-T1, 100base-TX +- SJA1110C: Third generation, TTEthernet, SGMII, 100base-T1, 100base-TX +- SJA1110D: Third generation, TTEthernet, SGMII, 100base-T1 Being automotive parts, their configuration interface is geared towards set-and-forget use, with minimal dynamic interaction at runtime. They @@ -579,3 +581,54 @@ A board would need to hook up the PHYs connected to the switch to any other MDIO bus available to Linux within the system (e.g. to the DSA master's MDIO bus). Link state management then works by the driver manually keeping in sync (over SPI commands) the MAC link speed with the settings negotiated by the PHY. + +By comparison, the SJA1110 supports an MDIO slave access point over which its +internal 100base-T1 PHYs can be accessed from the host. This is, however, not +used by the driver, instead the internal 100base-T1 and 100base-TX PHYs are +accessed through SPI commands, modeled in Linux as virtual MDIO buses. + +The microcontroller attached to the SJA1110 port 0 also has an MDIO controller +operating in master mode, however the driver does not support this either, +since the microcontroller gets disabled when the Linux driver operates. +Discrete PHYs connected to the switch ports should have their MDIO interface +attached to an MDIO controller from the host system and not to the switch, +similar to SJA1105. + +Port compatibility matrix +------------------------- + +The SJA1105 port compatibility matrix is: + +===== ============== ============== ============== +Port SJA1105E/T SJA1105P/Q SJA1105R/S +===== ============== ============== ============== +0 xMII xMII xMII +1 xMII xMII xMII +2 xMII xMII xMII +3 xMII xMII xMII +4 xMII xMII SGMII +===== ============== ============== ============== + + +The SJA1110 port compatibility matrix is: + +===== ============== ============== ============== ============== +Port SJA1110A SJA1110B SJA1110C SJA1110D +===== ============== ============== ============== ============== +0 RevMII (uC) RevMII (uC) RevMII (uC) RevMII (uC) +1 100base-TX 100base-TX 100base-TX + or SGMII SGMII +2 xMII xMII xMII xMII + or SGMII or SGMII +3 xMII xMII xMII + or SGMII or SGMII SGMII + or 2500base-X or 2500base-X or 2500base-X +4 SGMII SGMII SGMII SGMII + or 2500base-X or 2500base-X or 2500base-X or 2500base-X +5 100base-T1 100base-T1 100base-T1 100base-T1 +6 100base-T1 100base-T1 100base-T1 100base-T1 +7 100base-T1 100base-T1 100base-T1 100base-T1 +8 100base-T1 100base-T1 n/a n/a +9 100base-T1 100base-T1 n/a n/a +10 100base-T1 n/a n/a n/a +===== ============== ============== ============== ============== -- cgit v1.2.3 From fea1d5b17f821b78abbdadb9cb6f28fe433b635e Mon Sep 17 00:00:00 2001 From: Xin Long Date: Thu, 24 Jun 2021 11:48:09 -0400 Subject: sctp: send the next probe immediately once the last one is acked These is no need to wait for 'interval' period for the next probe if the last probe is already acked in search state. The 'interval' period waiting should be only for probe failure timeout and the current pmtu check when it's in search complete state. This change will shorten the probe time a lot in search state, and also fix the document accordingly. Signed-off-by: Xin Long Signed-off-by: David S. Miller --- Documentation/networking/ip-sysctl.rst | 12 ++++++++---- net/sctp/sm_statefuns.c | 5 ++++- 2 files changed, 12 insertions(+), 5 deletions(-) (limited to 'Documentation') diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst index 8bff728b3a1e..b3fa522e4cd9 100644 --- a/Documentation/networking/ip-sysctl.rst +++ b/Documentation/networking/ip-sysctl.rst @@ -2835,10 +2835,14 @@ encap_port - INTEGER Default: 0 plpmtud_probe_interval - INTEGER - The time interval (in milliseconds) for sending PLPMTUD probe chunks. - These chunks are sent at the specified interval with a variable size - to probe the mtu of a given path between 2 endpoints. PLPMTUD will - be disabled when 0 is set, and other values for it must be >= 5000. + The time interval (in milliseconds) for the PLPMTUD probe timer, + which is configured to expire after this period to receive an + acknowledgment to a probe packet. This is also the time interval + between the probes for the current pmtu when the probe search + is done. + + PLPMTUD will be disabled when 0 is set, and other values for it + must be >= 5000. Default: 0 diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index d29b579da904..09a8f23ec709 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -1275,7 +1275,10 @@ enum sctp_disposition sctp_sf_backbeat_8_3(struct net *net, return SCTP_DISPOSITION_DISCARD; sctp_transport_pl_recv(link); - return SCTP_DISPOSITION_CONSUME; + if (link->pl.state == SCTP_PL_COMPLETE) + return SCTP_DISPOSITION_CONSUME; + + return sctp_sf_send_probe(net, ep, asoc, type, link, commands); } max_interval = link->hbinterval + link->rto; -- cgit v1.2.3 From d88c6de4f8b6e6f1b6c3e3a85d39106c83553bc9 Mon Sep 17 00:00:00 2001 From: Venkata Lakshmi Narayana Gubba Date: Tue, 18 May 2021 22:04:45 +0530 Subject: dt-bindings: net: bluetooth: Convert Qualcomm BT binding to DT schema Converted Qualcomm Bluetooth binidings to DT schema. Signed-off-by: Venkata Lakshmi Narayana Gubba Reviewed-by: Rob Herring Signed-off-by: Marcel Holtmann --- .../devicetree/bindings/net/qualcomm-bluetooth.txt | 69 ------------- .../bindings/net/qualcomm-bluetooth.yaml | 112 +++++++++++++++++++++ 2 files changed, 112 insertions(+), 69 deletions(-) delete mode 100644 Documentation/devicetree/bindings/net/qualcomm-bluetooth.txt create mode 100644 Documentation/devicetree/bindings/net/qualcomm-bluetooth.yaml (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/qualcomm-bluetooth.txt b/Documentation/devicetree/bindings/net/qualcomm-bluetooth.txt deleted file mode 100644 index 709ca6d51650..000000000000 --- a/Documentation/devicetree/bindings/net/qualcomm-bluetooth.txt +++ /dev/null @@ -1,69 +0,0 @@ -Qualcomm Bluetooth Chips ---------------------- - -This documents the binding structure and common properties for serial -attached Qualcomm devices. - -Serial attached Qualcomm devices shall be a child node of the host UART -device the slave device is attached to. - -Required properties: - - compatible: should contain one of the following: - * "qcom,qca6174-bt" - * "qcom,qca9377-bt" - * "qcom,wcn3990-bt" - * "qcom,wcn3991-bt" - * "qcom,wcn3998-bt" - * "qcom,qca6390-bt" - -Optional properties for compatible string qcom,qca6174-bt: - - - enable-gpios: gpio specifier used to enable chip - - clocks: clock provided to the controller (SUSCLK_32KHZ) - - firmware-name: specify the name of nvm firmware to load - -Optional properties for compatible string qcom,qca9377-bt: - - - max-speed: see Documentation/devicetree/bindings/serial/serial.yaml - -Required properties for compatible string qcom,wcn399x-bt: - - - vddio-supply: VDD_IO supply regulator handle. - - vddxo-supply: VDD_XO supply regulator handle. - - vddrf-supply: VDD_RF supply regulator handle. - - vddch0-supply: VDD_CH0 supply regulator handle. - -Optional properties for compatible string qcom,wcn399x-bt: - - - max-speed: see Documentation/devicetree/bindings/serial/serial.yaml - - firmware-name: specify the name of nvm firmware to load - - clocks: clock provided to the controller - -Examples: - -serial@7570000 { - label = "BT-UART"; - status = "okay"; - - bluetooth { - compatible = "qcom,qca6174-bt"; - - enable-gpios = <&pm8994_gpios 19 GPIO_ACTIVE_HIGH>; - clocks = <&divclk4>; - firmware-name = "nvm_00440302.bin"; - }; -}; - -serial@898000 { - bluetooth { - compatible = "qcom,wcn3990-bt"; - - vddio-supply = <&vreg_s4a_1p8>; - vddxo-supply = <&vreg_l7a_1p8>; - vddrf-supply = <&vreg_l17a_1p3>; - vddch0-supply = <&vreg_l25a_3p3>; - max-speed = <3200000>; - firmware-name = "crnv21.bin"; - clocks = <&rpmhcc RPMH_RF_CLK2>; - }; -}; diff --git a/Documentation/devicetree/bindings/net/qualcomm-bluetooth.yaml b/Documentation/devicetree/bindings/net/qualcomm-bluetooth.yaml new file mode 100644 index 000000000000..772689bf50c1 --- /dev/null +++ b/Documentation/devicetree/bindings/net/qualcomm-bluetooth.yaml @@ -0,0 +1,112 @@ +# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/net/qualcomm-bluetooth.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: Qualcomm Bluetooth Chips + +maintainers: + - Balakrishna Godavarthi + - Rocky Liao + +description: + This binding describes Qualcomm UART-attached bluetooth chips. + +properties: + compatible: + enum: + - qcom,qca6174-bt + - qcom,qca9377-bt + - qcom,wcn3990-bt + - qcom,wcn3991-bt + - qcom,wcn3998-bt + - qcom,qca6390-bt + + enable-gpios: + maxItems: 1 + description: gpio specifier used to enable chip + + clocks: + maxItems: 1 + description: clock provided to the controller (SUSCLK_32KHZ) + + vddio-supply: + description: VDD_IO supply regulator handle + + vddxo-supply: + description: VDD_XO supply regulator handle + + vddrf-supply: + description: VDD_RF supply regulator handle + + vddch0-supply: + description: VDD_CH0 supply regulator handle + + max-speed: + description: see Documentation/devicetree/bindings/serial/serial.yaml + + firmware-name: + description: specify the name of nvm firmware to load + + local-bd-address: + description: see Documentation/devicetree/bindings/net/bluetooth.txt + + +required: + - compatible + +additionalProperties: false + +allOf: + - if: + properties: + compatible: + contains: + enum: + - qcom,qca6174-bt + then: + required: + - enable-gpios + - clocks + + - if: + properties: + compatible: + contains: + enum: + - qcom,wcn3990-bt + - qcom,wcn3991-bt + - qcom,wcn3998-bt + then: + required: + - vddio-supply + - vddxo-supply + - vddrf-supply + - vddch0-supply + +examples: + - | + #include + serial { + + bluetooth { + compatible = "qcom,qca6174-bt"; + enable-gpios = <&pm8994_gpios 19 GPIO_ACTIVE_HIGH>; + clocks = <&divclk4>; + firmware-name = "nvm_00440302.bin"; + }; + }; + - | + serial { + + bluetooth { + compatible = "qcom,wcn3990-bt"; + vddio-supply = <&vreg_s4a_1p8>; + vddxo-supply = <&vreg_l7a_1p8>; + vddrf-supply = <&vreg_l17a_1p3>; + vddch0-supply = <&vreg_l25a_3p3>; + max-speed = <3200000>; + firmware-name = "crnv21.bin"; + }; + }; -- cgit v1.2.3 From 7a4cb1635a4b879f8d118ec7c6586aef913819f3 Mon Sep 17 00:00:00 2001 From: Venkata Lakshmi Narayana Gubba Date: Tue, 18 May 2021 22:04:46 +0530 Subject: dt-bindings: net: bluetooth: Add device tree bindings for QTI chip wcn6750 This patch enables regulators and gpios for the Qualcomm Bluetooth wcn6750 controller. Signed-off-by: Venkata Lakshmi Narayana Gubba Reviewed-by: Rob Herring Signed-off-by: Marcel Holtmann --- .../bindings/net/qualcomm-bluetooth.yaml | 71 ++++++++++++++++++++++ 1 file changed, 71 insertions(+) (limited to 'Documentation') diff --git a/Documentation/devicetree/bindings/net/qualcomm-bluetooth.yaml b/Documentation/devicetree/bindings/net/qualcomm-bluetooth.yaml index 772689bf50c1..f93c6e7a1b59 100644 --- a/Documentation/devicetree/bindings/net/qualcomm-bluetooth.yaml +++ b/Documentation/devicetree/bindings/net/qualcomm-bluetooth.yaml @@ -22,11 +22,17 @@ properties: - qcom,wcn3991-bt - qcom,wcn3998-bt - qcom,qca6390-bt + - qcom,wcn6750-bt enable-gpios: maxItems: 1 description: gpio specifier used to enable chip + swctrl-gpios: + maxItems: 1 + description: gpio specifier is used to find status + of clock supply to SoC + clocks: maxItems: 1 description: clock provided to the controller (SUSCLK_32KHZ) @@ -43,6 +49,30 @@ properties: vddch0-supply: description: VDD_CH0 supply regulator handle + vddaon-supply: + description: VDD_AON supply regulator handle + + vddbtcxmx-supply: + description: VDD_BT_CXMX supply regulator handle + + vddrfacmn-supply: + description: VDD_RFA_CMN supply regulator handle + + vddrfa0p8-supply: + description: VDD_RFA_0P8 suppply regulator handle + + vddrfa1p7-supply: + description: VDD_RFA_1P7 supply regulator handle + + vddrfa1p2-supply: + description: VDD_RFA_1P2 supply regulator handle + + vddrfa2p2-supply: + description: VDD_RFA_2P2 supply regulator handle + + vddasd-supply: + description: VDD_ASD supply regulator handle + max-speed: description: see Documentation/devicetree/bindings/serial/serial.yaml @@ -85,6 +115,25 @@ allOf: - vddrf-supply - vddch0-supply + - if: + properties: + compatible: + contains: + enum: + - qcom,wcn6750-bt + then: + required: + - enable-gpios + - swctrl-gpios + - vddio-supply + - vddaon-supply + - vddbtcxmx-supply + - vddrfacmn-supply + - vddrfa0p8-supply + - vddrfa1p7-supply + - vddrfa1p2-supply + - vddasd-supply + examples: - | #include @@ -110,3 +159,25 @@ examples: firmware-name = "crnv21.bin"; }; }; + - | + serial { + + bluetooth { + compatible = "qcom,wcn6750-bt"; + pinctrl-names = "default"; + pinctrl-0 = <&bt_en_default>; + enable-gpios = <&tlmm 85 GPIO_ACTIVE_HIGH>; + swctrl-gpios = <&tlmm 86 GPIO_ACTIVE_HIGH>; + vddio-supply = <&vreg_l19b_1p8>; + vddaon-supply = <&vreg_s7b_0p9>; + vddbtcxmx-supply = <&vreg_s7b_0p9>; + vddrfacmn-supply = <&vreg_s7b_0p9>; + vddrfa0p8-supply = <&vreg_s7b_0p9>; + vddrfa1p7-supply = <&vreg_s1b_1p8>; + vddrfa1p2-supply = <&vreg_s8b_1p2>; + vddrfa2p2-supply = <&vreg_s1c_2p2>; + vddasd-supply = <&vreg_l11c_2p8>; + max-speed = <3200000>; + firmware-name = "msnv11.bin"; + }; + }; -- cgit v1.2.3 From b117e1e8a86d363fc1ad53df8d2c47884d2c0048 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 29 Jun 2021 17:06:47 +0300 Subject: net: dsa: delete dsa_legacy_fdb_add and dsa_legacy_fdb_del We want to add reference counting for FDB entries in cross-chip topologies, and in order for that to have any chance of working and not be unbalanced (leading to entries which are never deleted), we need to ensure that higher layers are sane, because if they aren't, it's garbage in, garbage out. For example, if we add a bridge FDB entry twice, the bridge properly errors out: $ bridge fdb add dev swp0 00:01:02:03:04:07 master static $ bridge fdb add dev swp0 00:01:02:03:04:07 master static RTNETLINK answers: File exists However, the same thing cannot be said about the bridge bypass operations: $ bridge fdb add dev swp0 00:01:02:03:04:07 $ bridge fdb add dev swp0 00:01:02:03:04:07 $ bridge fdb add dev swp0 00:01:02:03:04:07 $ bridge fdb add dev swp0 00:01:02:03:04:07 $ echo $? 0 But one 'bridge fdb del' is enough to remove the entry, no matter how many times it was added. The bridge bypass operations are impossible to maintain in these circumstances and lack of support for reference counting the cross-chip notifiers is holding us back from making further progress, so just drop support for them. The only way left for users to install static bridge FDB entries is the proper one, using the "master static" flags. With this change, rtnl_fdb_add() falls back to calling ndo_dflt_fdb_add() which uses the duplicate-exclusive variant of dev_uc_add(): dev_uc_add_excl(). Because DSA does not (yet) declare IFF_UNICAST_FLT, this results in us going to promiscuous mode: $ bridge fdb add dev swp0 00:01:02:03:04:05 [ 28.206743] device swp0 entered promiscuous mode $ bridge fdb add dev swp0 00:01:02:03:04:05 RTNETLINK answers: File exists So even if it does not completely fail, there is at least some indication that it is behaving differently from before, and closer to user space expectations, I would argue (the lack of a "local|static" specifier defaults to "local", or "host-only", so dev_uc_add() is a reasonable default implementation). If the generic implementation of .ndo_fdb_add provided by Vlad Yasevich is a proof of anything, it only proves that the implementation provided by DSA was always wrong, by not looking at "ndm->ndm_state & NUD_NOARP" (the "static" flag which means that the FDB entry points outwards) and "ndm->ndm_state & NUD_PERMANENT" (the "local" flag which means that the FDB entry points towards the host). It all used to mean the same thing to DSA. Update the documentation so that the users are not confused about what's going on. Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- Documentation/networking/dsa/configuration.rst | 68 ++++++++++++++++++++++++++ net/dsa/slave.c | 23 --------- 2 files changed, 68 insertions(+), 23 deletions(-) (limited to 'Documentation') diff --git a/Documentation/networking/dsa/configuration.rst b/Documentation/networking/dsa/configuration.rst index 774f0e76c746..2b08f1a772d3 100644 --- a/Documentation/networking/dsa/configuration.rst +++ b/Documentation/networking/dsa/configuration.rst @@ -292,3 +292,71 @@ configuration. # bring up the bridge devices ip link set br0 up + +Forwarding database (FDB) management +------------------------------------ + +The existing DSA switches do not have the necessary hardware support to keep +the software FDB of the bridge in sync with the hardware tables, so the two +tables are managed separately (``bridge fdb show`` queries both, and depending +on whether the ``self`` or ``master`` flags are being used, a ``bridge fdb +add`` or ``bridge fdb del`` command acts upon entries from one or both tables). + +Up until kernel v4.14, DSA only supported user space management of bridge FDB +entries using the bridge bypass operations (which do not update the software +FDB, just the hardware one) using the ``self`` flag (which is optional and can +be omitted). + + .. code-block:: sh + + bridge fdb add dev swp0 00:01:02:03:04:05 self static + # or shorthand + bridge fdb add dev swp0 00:01:02:03:04:05 static + +Due to a bug, the bridge bypass FDB implementation provided by DSA did not +distinguish between ``static`` and ``local`` FDB entries (``static`` are meant +to be forwarded, while ``local`` are meant to be locally terminated, i.e. sent +to the host port). Instead, all FDB entries with the ``self`` flag (implicit or +explicit) are treated by DSA as ``static`` even if they are ``local``. + + .. code-block:: sh + + # This command: + bridge fdb add dev swp0 00:01:02:03:04:05 static + # behaves the same for DSA as this command: + bridge fdb add dev swp0 00:01:02:03:04:05 local + # or shorthand, because the 'local' flag is implicit if 'static' is not + # specified, it also behaves the same as: + bridge fdb add dev swp0 00:01:02:03:04:05 + +The last command is an incorrect way of adding a static bridge FDB entry to a +DSA switch using the bridge bypass operations, and works by mistake. Other +drivers will treat an FDB entry added by the same command as ``local`` and as +such, will not forward it, as opposed to DSA. + +Between kernel v4.14 and v5.14, DSA has supported in parallel two modes of +adding a bridge FDB entry to the switch: the bridge bypass discussed above, as +well as a new mode using the ``master`` flag which installs FDB entries in the +software bridge too. + + .. code-block:: sh + + bridge fdb add dev swp0 00:01:02:03:04:05 master static + +Since kernel v5.14, DSA has gained stronger integration with the bridge's +software FDB, and the support for its bridge bypass FDB implementation (using +the ``self`` flag) has been removed. This results in the following changes: + + .. code-block:: sh + + # This is the only valid way of adding an FDB entry that is supported, + # compatible with v4.14 kernels and later: + bridge fdb add dev swp0 00:01:02:03:04:05 master static + # This command is no longer buggy and the entry is properly treated as + # 'local' instead of being forwarded: + bridge fdb add dev swp0 00:01:02:03:04:05 + # This command no longer installs a static FDB entry to hardware: + bridge fdb add dev swp0 00:01:02:03:04:05 static + +Script writers are therefore encouraged to use the ``master static`` set of +flags when working with bridge FDB entries on DSA switch interfaces. diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 898ed9cf756f..64acb1e11cd7 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -1651,27 +1651,6 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = { .self_test = dsa_slave_net_selftest, }; -/* legacy way, bypassing the bridge *****************************************/ -static int dsa_legacy_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], - struct net_device *dev, - const unsigned char *addr, u16 vid, - u16 flags, - struct netlink_ext_ack *extack) -{ - struct dsa_port *dp = dsa_slave_to_port(dev); - - return dsa_port_fdb_add(dp, addr, vid); -} - -static int dsa_legacy_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], - struct net_device *dev, - const unsigned char *addr, u16 vid) -{ - struct dsa_port *dp = dsa_slave_to_port(dev); - - return dsa_port_fdb_del(dp, addr, vid); -} - static struct devlink_port *dsa_slave_get_devlink_port(struct net_device *dev) { struct dsa_port *dp = dsa_slave_to_port(dev); @@ -1713,8 +1692,6 @@ static const struct net_device_ops dsa_slave_netdev_ops = { .ndo_change_rx_flags = dsa_slave_change_rx_flags, .ndo_set_rx_mode = dsa_slave_set_rx_mode, .ndo_set_mac_address = dsa_slave_set_mac_address, - .ndo_fdb_add = dsa_legacy_fdb_add, - .ndo_fdb_del = dsa_legacy_fdb_del, .ndo_fdb_dump = dsa_slave_fdb_dump, .ndo_do_ioctl = dsa_slave_ioctl, .ndo_get_iflink = dsa_slave_get_iflink, -- cgit v1.2.3